1 /* 2 * kmp_runtime.cpp -- KPTS runtime support library 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp.h" 17 #include "kmp_affinity.h" 18 #include "kmp_atomic.h" 19 #include "kmp_environment.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_io.h" 23 #include "kmp_itt.h" 24 #include "kmp_settings.h" 25 #include "kmp_stats.h" 26 #include "kmp_str.h" 27 #include "kmp_wait_release.h" 28 #include "kmp_wrapper_getpid.h" 29 30 #if OMPT_SUPPORT 31 #include "ompt-specific.h" 32 #endif 33 34 /* these are temporary issues to be dealt with */ 35 #define KMP_USE_PRCTL 0 36 37 #if KMP_OS_WINDOWS 38 #include <process.h> 39 #endif 40 41 #include "tsan_annotations.h" 42 43 #if defined(KMP_GOMP_COMPAT) 44 char const __kmp_version_alt_comp[] = 45 KMP_VERSION_PREFIX "alternative compiler support: yes"; 46 #endif /* defined(KMP_GOMP_COMPAT) */ 47 48 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " 49 #if OMP_50_ENABLED 50 "5.0 (201611)"; 51 #elif OMP_45_ENABLED 52 "4.5 (201511)"; 53 #elif OMP_40_ENABLED 54 "4.0 (201307)"; 55 #else 56 "3.1 (201107)"; 57 #endif 58 59 #ifdef KMP_DEBUG 60 char const __kmp_version_lock[] = 61 KMP_VERSION_PREFIX "lock type: run time selectable"; 62 #endif /* KMP_DEBUG */ 63 64 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 65 66 /* ------------------------------------------------------------------------ */ 67 68 kmp_info_t __kmp_monitor; 69 70 /* Forward declarations */ 71 72 void __kmp_cleanup(void); 73 74 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 75 int gtid); 76 static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 77 kmp_internal_control_t *new_icvs, 78 ident_t *loc); 79 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 80 static void __kmp_partition_places(kmp_team_t *team, 81 int update_master_only = 0); 82 #endif 83 static void __kmp_do_serial_initialize(void); 84 void __kmp_fork_barrier(int gtid, int tid); 85 void __kmp_join_barrier(int gtid); 86 void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 87 kmp_internal_control_t *new_icvs, ident_t *loc); 88 89 #ifdef USE_LOAD_BALANCE 90 static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 91 #endif 92 93 static int __kmp_expand_threads(int nWish, int nNeed); 94 #if KMP_OS_WINDOWS 95 static int __kmp_unregister_root_other_thread(int gtid); 96 #endif 97 static void __kmp_unregister_library(void); // called by __kmp_internal_end() 98 static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 99 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 100 101 /* Calculate the identifier of the current thread */ 102 /* fast (and somewhat portable) way to get unique identifier of executing 103 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 104 int __kmp_get_global_thread_id() { 105 int i; 106 kmp_info_t **other_threads; 107 size_t stack_data; 108 char *stack_addr; 109 size_t stack_size; 110 char *stack_base; 111 112 KA_TRACE( 113 1000, 114 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 115 __kmp_nth, __kmp_all_nth)); 116 117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 118 a parallel region, made it return KMP_GTID_DNE to force serial_initialize 119 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 120 __kmp_init_gtid for this to work. */ 121 122 if (!TCR_4(__kmp_init_gtid)) 123 return KMP_GTID_DNE; 124 125 #ifdef KMP_TDATA_GTID 126 if (TCR_4(__kmp_gtid_mode) >= 3) { 127 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 128 return __kmp_gtid; 129 } 130 #endif 131 if (TCR_4(__kmp_gtid_mode) >= 2) { 132 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 133 return __kmp_gtid_get_specific(); 134 } 135 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 136 137 stack_addr = (char *)&stack_data; 138 other_threads = __kmp_threads; 139 140 /* ATT: The code below is a source of potential bugs due to unsynchronized 141 access to __kmp_threads array. For example: 142 1. Current thread loads other_threads[i] to thr and checks it, it is 143 non-NULL. 144 2. Current thread is suspended by OS. 145 3. Another thread unregisters and finishes (debug versions of free() 146 may fill memory with something like 0xEF). 147 4. Current thread is resumed. 148 5. Current thread reads junk from *thr. 149 TODO: Fix it. --ln */ 150 151 for (i = 0; i < __kmp_threads_capacity; i++) { 152 153 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 154 if (!thr) 155 continue; 156 157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 159 160 /* stack grows down -- search through all of the active threads */ 161 162 if (stack_addr <= stack_base) { 163 size_t stack_diff = stack_base - stack_addr; 164 165 if (stack_diff <= stack_size) { 166 /* The only way we can be closer than the allocated */ 167 /* stack size is if we are running on this thread. */ 168 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 169 return i; 170 } 171 } 172 } 173 174 /* get specific to try and determine our gtid */ 175 KA_TRACE(1000, 176 ("*** __kmp_get_global_thread_id: internal alg. failed to find " 177 "thread, using TLS\n")); 178 i = __kmp_gtid_get_specific(); 179 180 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 181 182 /* if we havn't been assigned a gtid, then return code */ 183 if (i < 0) 184 return i; 185 186 /* dynamically updated stack window for uber threads to avoid get_specific 187 call */ 188 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 189 KMP_FATAL(StackOverflow, i); 190 } 191 192 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 193 if (stack_addr > stack_base) { 194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 196 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 197 stack_base); 198 } else { 199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 200 stack_base - stack_addr); 201 } 202 203 /* Reprint stack bounds for ubermaster since they have been refined */ 204 if (__kmp_storage_map) { 205 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 206 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 207 __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 208 other_threads[i]->th.th_info.ds.ds_stacksize, 209 "th_%d stack (refinement)", i); 210 } 211 return i; 212 } 213 214 int __kmp_get_global_thread_id_reg() { 215 int gtid; 216 217 if (!__kmp_init_serial) { 218 gtid = KMP_GTID_DNE; 219 } else 220 #ifdef KMP_TDATA_GTID 221 if (TCR_4(__kmp_gtid_mode) >= 3) { 222 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 223 gtid = __kmp_gtid; 224 } else 225 #endif 226 if (TCR_4(__kmp_gtid_mode) >= 2) { 227 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 228 gtid = __kmp_gtid_get_specific(); 229 } else { 230 KA_TRACE(1000, 231 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 232 gtid = __kmp_get_global_thread_id(); 233 } 234 235 /* we must be a new uber master sibling thread */ 236 if (gtid == KMP_GTID_DNE) { 237 KA_TRACE(10, 238 ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 239 "Registering a new gtid.\n")); 240 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 241 if (!__kmp_init_serial) { 242 __kmp_do_serial_initialize(); 243 gtid = __kmp_gtid_get_specific(); 244 } else { 245 gtid = __kmp_register_root(FALSE); 246 } 247 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 248 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 249 } 250 251 KMP_DEBUG_ASSERT(gtid >= 0); 252 253 return gtid; 254 } 255 256 /* caller must hold forkjoin_lock */ 257 void __kmp_check_stack_overlap(kmp_info_t *th) { 258 int f; 259 char *stack_beg = NULL; 260 char *stack_end = NULL; 261 int gtid; 262 263 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 264 if (__kmp_storage_map) { 265 stack_end = (char *)th->th.th_info.ds.ds_stackbase; 266 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 267 268 gtid = __kmp_gtid_from_thread(th); 269 270 if (gtid == KMP_GTID_MONITOR) { 271 __kmp_print_storage_map_gtid( 272 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 273 "th_%s stack (%s)", "mon", 274 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 275 } else { 276 __kmp_print_storage_map_gtid( 277 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 278 "th_%d stack (%s)", gtid, 279 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 280 } 281 } 282 283 /* No point in checking ubermaster threads since they use refinement and 284 * cannot overlap */ 285 gtid = __kmp_gtid_from_thread(th); 286 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 287 KA_TRACE(10, 288 ("__kmp_check_stack_overlap: performing extensive checking\n")); 289 if (stack_beg == NULL) { 290 stack_end = (char *)th->th.th_info.ds.ds_stackbase; 291 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 292 } 293 294 for (f = 0; f < __kmp_threads_capacity; f++) { 295 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 296 297 if (f_th && f_th != th) { 298 char *other_stack_end = 299 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 300 char *other_stack_beg = 301 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 302 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 303 (stack_end > other_stack_beg && stack_end < other_stack_end)) { 304 305 /* Print the other stack values before the abort */ 306 if (__kmp_storage_map) 307 __kmp_print_storage_map_gtid( 308 -1, other_stack_beg, other_stack_end, 309 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 310 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 311 312 __kmp_msg(kmp_ms_fatal, KMP_MSG(StackOverlap), 313 KMP_HNT(ChangeStackLimit), __kmp_msg_null); 314 } 315 } 316 } 317 } 318 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 319 } 320 321 /* ------------------------------------------------------------------------ */ 322 323 void __kmp_infinite_loop(void) { 324 static int done = FALSE; 325 326 while (!done) { 327 KMP_YIELD(1); 328 } 329 } 330 331 #define MAX_MESSAGE 512 332 333 void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 334 char const *format, ...) { 335 char buffer[MAX_MESSAGE]; 336 va_list ap; 337 338 va_start(ap, format); 339 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 340 p2, (unsigned long)size, format); 341 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 342 __kmp_vprintf(kmp_err, buffer, ap); 343 #if KMP_PRINT_DATA_PLACEMENT 344 int node; 345 if (gtid >= 0) { 346 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 347 if (__kmp_storage_map_verbose) { 348 node = __kmp_get_host_node(p1); 349 if (node < 0) /* doesn't work, so don't try this next time */ 350 __kmp_storage_map_verbose = FALSE; 351 else { 352 char *last; 353 int lastNode; 354 int localProc = __kmp_get_cpu_from_gtid(gtid); 355 356 const int page_size = KMP_GET_PAGE_SIZE(); 357 358 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 359 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 360 if (localProc >= 0) 361 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 362 localProc >> 1); 363 else 364 __kmp_printf_no_lock(" GTID %d\n", gtid); 365 #if KMP_USE_PRCTL 366 /* The more elaborate format is disabled for now because of the prctl 367 * hanging bug. */ 368 do { 369 last = p1; 370 lastNode = node; 371 /* This loop collates adjacent pages with the same host node. */ 372 do { 373 (char *)p1 += page_size; 374 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 375 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 376 lastNode); 377 } while (p1 <= p2); 378 #else 379 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 380 (char *)p1 + (page_size - 1), 381 __kmp_get_host_node(p1)); 382 if (p1 < p2) { 383 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 384 (char *)p2 + (page_size - 1), 385 __kmp_get_host_node(p2)); 386 } 387 #endif 388 } 389 } 390 } else 391 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 392 } 393 #endif /* KMP_PRINT_DATA_PLACEMENT */ 394 __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 395 } 396 397 void __kmp_warn(char const *format, ...) { 398 char buffer[MAX_MESSAGE]; 399 va_list ap; 400 401 if (__kmp_generate_warnings == kmp_warnings_off) { 402 return; 403 } 404 405 va_start(ap, format); 406 407 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 408 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 409 __kmp_vprintf(kmp_err, buffer, ap); 410 __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 411 412 va_end(ap); 413 } 414 415 void __kmp_abort_process() { 416 // Later threads may stall here, but that's ok because abort() will kill them. 417 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 418 419 if (__kmp_debug_buf) { 420 __kmp_dump_debug_buffer(); 421 }; // if 422 423 if (KMP_OS_WINDOWS) { 424 // Let other threads know of abnormal termination and prevent deadlock 425 // if abort happened during library initialization or shutdown 426 __kmp_global.g.g_abort = SIGABRT; 427 428 /* On Windows* OS by default abort() causes pop-up error box, which stalls 429 nightly testing. Unfortunately, we cannot reliably suppress pop-up error 430 boxes. _set_abort_behavior() works well, but this function is not 431 available in VS7 (this is not problem for DLL, but it is a problem for 432 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 433 help, at least in some versions of MS C RTL. 434 435 It seems following sequence is the only way to simulate abort() and 436 avoid pop-up error box. */ 437 raise(SIGABRT); 438 _exit(3); // Just in case, if signal ignored, exit anyway. 439 } else { 440 abort(); 441 }; // if 442 443 __kmp_infinite_loop(); 444 __kmp_release_bootstrap_lock(&__kmp_exit_lock); 445 446 } // __kmp_abort_process 447 448 void __kmp_abort_thread(void) { 449 // TODO: Eliminate g_abort global variable and this function. 450 // In case of abort just call abort(), it will kill all the threads. 451 __kmp_infinite_loop(); 452 } // __kmp_abort_thread 453 454 /* Print out the storage map for the major kmp_info_t thread data structures 455 that are allocated together. */ 456 457 static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 458 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 459 gtid); 460 461 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 462 sizeof(kmp_desc_t), "th_%d.th_info", gtid); 463 464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 465 sizeof(kmp_local_t), "th_%d.th_local", gtid); 466 467 __kmp_print_storage_map_gtid( 468 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 469 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 470 471 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 472 &thr->th.th_bar[bs_plain_barrier + 1], 473 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 474 gtid); 475 476 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 477 &thr->th.th_bar[bs_forkjoin_barrier + 1], 478 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 479 gtid); 480 481 #if KMP_FAST_REDUCTION_BARRIER 482 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 483 &thr->th.th_bar[bs_reduction_barrier + 1], 484 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 485 gtid); 486 #endif // KMP_FAST_REDUCTION_BARRIER 487 } 488 489 /* Print out the storage map for the major kmp_team_t team data structures 490 that are allocated together. */ 491 492 static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 493 int team_id, int num_thr) { 494 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 495 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 496 header, team_id); 497 498 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 499 &team->t.t_bar[bs_last_barrier], 500 sizeof(kmp_balign_team_t) * bs_last_barrier, 501 "%s_%d.t_bar", header, team_id); 502 503 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 504 &team->t.t_bar[bs_plain_barrier + 1], 505 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 506 header, team_id); 507 508 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 509 &team->t.t_bar[bs_forkjoin_barrier + 1], 510 sizeof(kmp_balign_team_t), 511 "%s_%d.t_bar[forkjoin]", header, team_id); 512 513 #if KMP_FAST_REDUCTION_BARRIER 514 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 515 &team->t.t_bar[bs_reduction_barrier + 1], 516 sizeof(kmp_balign_team_t), 517 "%s_%d.t_bar[reduction]", header, team_id); 518 #endif // KMP_FAST_REDUCTION_BARRIER 519 520 __kmp_print_storage_map_gtid( 521 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 522 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 523 524 __kmp_print_storage_map_gtid( 525 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 526 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 527 528 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 529 &team->t.t_disp_buffer[num_disp_buff], 530 sizeof(dispatch_shared_info_t) * num_disp_buff, 531 "%s_%d.t_disp_buffer", header, team_id); 532 533 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data, 534 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, 535 team_id); 536 } 537 538 static void __kmp_init_allocator() {} 539 static void __kmp_fini_allocator() {} 540 541 /* ------------------------------------------------------------------------ */ 542 543 #ifdef KMP_DYNAMIC_LIB 544 #if KMP_OS_WINDOWS 545 546 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) { 547 // TODO: Change to __kmp_break_bootstrap_lock(). 548 __kmp_init_bootstrap_lock(lck); // make the lock released 549 } 550 551 static void __kmp_reset_locks_on_process_detach(int gtid_req) { 552 int i; 553 int thread_count; 554 555 // PROCESS_DETACH is expected to be called by a thread that executes 556 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one 557 // calling ProcessExit or FreeLibrary). So, it might be safe to access the 558 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some 559 // threads can be still alive here, although being about to be terminated. The 560 // threads in the array with ds_thread==0 are most suspicious. Actually, it 561 // can be not safe to access the __kmp_threads[]. 562 563 // TODO: does it make sense to check __kmp_roots[] ? 564 565 // Let's check that there are no other alive threads registered with the OMP 566 // lib. 567 while (1) { 568 thread_count = 0; 569 for (i = 0; i < __kmp_threads_capacity; ++i) { 570 if (!__kmp_threads) 571 continue; 572 kmp_info_t *th = __kmp_threads[i]; 573 if (th == NULL) 574 continue; 575 int gtid = th->th.th_info.ds.ds_gtid; 576 if (gtid == gtid_req) 577 continue; 578 if (gtid < 0) 579 continue; 580 DWORD exit_val; 581 int alive = __kmp_is_thread_alive(th, &exit_val); 582 if (alive) { 583 ++thread_count; 584 } 585 } 586 if (thread_count == 0) 587 break; // success 588 } 589 590 // Assume that I'm alone. Now it might be safe to check and reset locks. 591 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. 592 __kmp_reset_lock(&__kmp_forkjoin_lock); 593 #ifdef KMP_DEBUG 594 __kmp_reset_lock(&__kmp_stdio_lock); 595 #endif // KMP_DEBUG 596 } 597 598 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 599 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 600 601 switch (fdwReason) { 602 603 case DLL_PROCESS_ATTACH: 604 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 605 606 return TRUE; 607 608 case DLL_PROCESS_DETACH: 609 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 610 611 if (lpReserved != NULL) { 612 // lpReserved is used for telling the difference: 613 // lpReserved == NULL when FreeLibrary() was called, 614 // lpReserved != NULL when the process terminates. 615 // When FreeLibrary() is called, worker threads remain alive. So they will 616 // release the forkjoin lock by themselves. When the process terminates, 617 // worker threads disappear triggering the problem of unreleased forkjoin 618 // lock as described below. 619 620 // A worker thread can take the forkjoin lock. The problem comes up if 621 // that worker thread becomes dead before it releases the forkjoin lock. 622 // The forkjoin lock remains taken, while the thread executing 623 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try 624 // to take the forkjoin lock and will always fail, so that the application 625 // will never finish [normally]. This scenario is possible if 626 // __kmpc_end() has not been executed. It looks like it's not a corner 627 // case, but common cases: 628 // - the main function was compiled by an alternative compiler; 629 // - the main function was compiled by icl but without /Qopenmp 630 // (application with plugins); 631 // - application terminates by calling C exit(), Fortran CALL EXIT() or 632 // Fortran STOP. 633 // - alive foreign thread prevented __kmpc_end from doing cleanup. 634 // 635 // This is a hack to work around the problem. 636 // TODO: !!! figure out something better. 637 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific()); 638 } 639 640 __kmp_internal_end_library(__kmp_gtid_get_specific()); 641 642 return TRUE; 643 644 case DLL_THREAD_ATTACH: 645 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 646 647 /* if we want to register new siblings all the time here call 648 * __kmp_get_gtid(); */ 649 return TRUE; 650 651 case DLL_THREAD_DETACH: 652 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 653 654 __kmp_internal_end_thread(__kmp_gtid_get_specific()); 655 return TRUE; 656 } 657 658 return TRUE; 659 } 660 661 #endif /* KMP_OS_WINDOWS */ 662 #endif /* KMP_DYNAMIC_LIB */ 663 664 /* Change the library type to "status" and return the old type */ 665 /* called from within initialization routines where __kmp_initz_lock is held */ 666 int __kmp_change_library(int status) { 667 int old_status; 668 669 old_status = __kmp_yield_init & 670 1; // check whether KMP_LIBRARY=throughput (even init count) 671 672 if (status) { 673 __kmp_yield_init |= 1; // throughput => turnaround (odd init count) 674 } else { 675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count) 676 } 677 678 return old_status; // return previous setting of whether 679 // KMP_LIBRARY=throughput 680 } 681 682 /* __kmp_parallel_deo -- Wait until it's our turn. */ 683 void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 684 int gtid = *gtid_ref; 685 #ifdef BUILD_PARALLEL_ORDERED 686 kmp_team_t *team = __kmp_team_from_gtid(gtid); 687 #endif /* BUILD_PARALLEL_ORDERED */ 688 689 if (__kmp_env_consistency_check) { 690 if (__kmp_threads[gtid]->th.th_root->r.r_active) 691 #if KMP_USE_DYNAMIC_LOCK 692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 693 #else 694 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 695 #endif 696 } 697 #ifdef BUILD_PARALLEL_ORDERED 698 if (!team->t.t_serialized) { 699 KMP_MB(); 700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), 701 KMP_EQ, NULL); 702 KMP_MB(); 703 } 704 #endif /* BUILD_PARALLEL_ORDERED */ 705 } 706 707 /* __kmp_parallel_dxo -- Signal the next task. */ 708 void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 709 int gtid = *gtid_ref; 710 #ifdef BUILD_PARALLEL_ORDERED 711 int tid = __kmp_tid_from_gtid(gtid); 712 kmp_team_t *team = __kmp_team_from_gtid(gtid); 713 #endif /* BUILD_PARALLEL_ORDERED */ 714 715 if (__kmp_env_consistency_check) { 716 if (__kmp_threads[gtid]->th.th_root->r.r_active) 717 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 718 } 719 #ifdef BUILD_PARALLEL_ORDERED 720 if (!team->t.t_serialized) { 721 KMP_MB(); /* Flush all pending memory write invalidates. */ 722 723 /* use the tid of the next thread in this team */ 724 /* TODO replace with general release procedure */ 725 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 726 727 #if OMPT_SUPPORT && OMPT_BLAME 728 if (ompt_enabled && 729 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { 730 /* accept blame for "ordered" waiting */ 731 kmp_info_t *this_thread = __kmp_threads[gtid]; 732 ompt_callbacks.ompt_callback(ompt_event_release_ordered)( 733 this_thread->th.ompt_thread_info.wait_id); 734 } 735 #endif 736 737 KMP_MB(); /* Flush all pending memory write invalidates. */ 738 } 739 #endif /* BUILD_PARALLEL_ORDERED */ 740 } 741 742 /* ------------------------------------------------------------------------ */ 743 /* The BARRIER for a SINGLE process section is always explicit */ 744 745 int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 746 int status; 747 kmp_info_t *th; 748 kmp_team_t *team; 749 750 if (!TCR_4(__kmp_init_parallel)) 751 __kmp_parallel_initialize(); 752 753 th = __kmp_threads[gtid]; 754 team = th->th.th_team; 755 status = 0; 756 757 th->th.th_ident = id_ref; 758 759 if (team->t.t_serialized) { 760 status = 1; 761 } else { 762 kmp_int32 old_this = th->th.th_local.this_construct; 763 764 ++th->th.th_local.this_construct; 765 /* try to set team count to thread count--success means thread got the 766 single block */ 767 /* TODO: Should this be acquire or release? */ 768 if (team->t.t_construct == old_this) { 769 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, 770 th->th.th_local.this_construct); 771 } 772 #if USE_ITT_BUILD 773 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 774 KMP_MASTER_GTID(gtid) && 775 #if OMP_40_ENABLED 776 th->th.th_teams_microtask == NULL && 777 #endif 778 team->t.t_active_level == 779 1) { // Only report metadata by master of active team at level 1 780 __kmp_itt_metadata_single(id_ref); 781 } 782 #endif /* USE_ITT_BUILD */ 783 } 784 785 if (__kmp_env_consistency_check) { 786 if (status && push_ws) { 787 __kmp_push_workshare(gtid, ct_psingle, id_ref); 788 } else { 789 __kmp_check_workshare(gtid, ct_psingle, id_ref); 790 } 791 } 792 #if USE_ITT_BUILD 793 if (status) { 794 __kmp_itt_single_start(gtid); 795 } 796 #endif /* USE_ITT_BUILD */ 797 return status; 798 } 799 800 void __kmp_exit_single(int gtid) { 801 #if USE_ITT_BUILD 802 __kmp_itt_single_end(gtid); 803 #endif /* USE_ITT_BUILD */ 804 if (__kmp_env_consistency_check) 805 __kmp_pop_workshare(gtid, ct_psingle, NULL); 806 } 807 808 /* determine if we can go parallel or must use a serialized parallel region and 809 * how many threads we can use 810 * set_nproc is the number of threads requested for the team 811 * returns 0 if we should serialize or only use one thread, 812 * otherwise the number of threads to use 813 * The forkjoin lock is held by the caller. */ 814 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 815 int master_tid, int set_nthreads 816 #if OMP_40_ENABLED 817 , 818 int enter_teams 819 #endif /* OMP_40_ENABLED */ 820 ) { 821 int capacity; 822 int new_nthreads; 823 KMP_DEBUG_ASSERT(__kmp_init_serial); 824 KMP_DEBUG_ASSERT(root && parent_team); 825 826 // If dyn-var is set, dynamically adjust the number of desired threads, 827 // according to the method specified by dynamic_mode. 828 new_nthreads = set_nthreads; 829 if (!get__dynamic_2(parent_team, master_tid)) { 830 ; 831 } 832 #ifdef USE_LOAD_BALANCE 833 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 834 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 835 if (new_nthreads == 1) { 836 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 837 "reservation to 1 thread\n", 838 master_tid)); 839 return 1; 840 } 841 if (new_nthreads < set_nthreads) { 842 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 843 "reservation to %d threads\n", 844 master_tid, new_nthreads)); 845 } 846 } 847 #endif /* USE_LOAD_BALANCE */ 848 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 849 new_nthreads = __kmp_avail_proc - __kmp_nth + 850 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 851 if (new_nthreads <= 1) { 852 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 853 "reservation to 1 thread\n", 854 master_tid)); 855 return 1; 856 } 857 if (new_nthreads < set_nthreads) { 858 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 859 "reservation to %d threads\n", 860 master_tid, new_nthreads)); 861 } else { 862 new_nthreads = set_nthreads; 863 } 864 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 865 if (set_nthreads > 2) { 866 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 867 new_nthreads = (new_nthreads % set_nthreads) + 1; 868 if (new_nthreads == 1) { 869 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 870 "reservation to 1 thread\n", 871 master_tid)); 872 return 1; 873 } 874 if (new_nthreads < set_nthreads) { 875 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 876 "reservation to %d threads\n", 877 master_tid, new_nthreads)); 878 } 879 } 880 } else { 881 KMP_ASSERT(0); 882 } 883 884 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. 885 if (__kmp_nth + new_nthreads - 886 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 887 __kmp_max_nth) { 888 int tl_nthreads = __kmp_max_nth - __kmp_nth + 889 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 890 if (tl_nthreads <= 0) { 891 tl_nthreads = 1; 892 } 893 894 // If dyn-var is false, emit a 1-time warning. 895 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 896 __kmp_reserve_warn = 1; 897 __kmp_msg(kmp_ms_warning, 898 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 899 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 900 } 901 if (tl_nthreads == 1) { 902 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " 903 "reservation to 1 thread\n", 904 master_tid)); 905 return 1; 906 } 907 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " 908 "reservation to %d threads\n", 909 master_tid, tl_nthreads)); 910 new_nthreads = tl_nthreads; 911 } 912 913 // Check if the threads array is large enough, or needs expanding. 914 // See comment in __kmp_register_root() about the adjustment if 915 // __kmp_threads[0] == NULL. 916 capacity = __kmp_threads_capacity; 917 if (TCR_PTR(__kmp_threads[0]) == NULL) { 918 --capacity; 919 } 920 if (__kmp_nth + new_nthreads - 921 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 922 capacity) { 923 // Expand the threads array. 924 int slotsRequired = __kmp_nth + new_nthreads - 925 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 926 capacity; 927 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); 928 if (slotsAdded < slotsRequired) { 929 // The threads array was not expanded enough. 930 new_nthreads -= (slotsRequired - slotsAdded); 931 KMP_ASSERT(new_nthreads >= 1); 932 933 // If dyn-var is false, emit a 1-time warning. 934 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 935 __kmp_reserve_warn = 1; 936 if (__kmp_tp_cached) { 937 __kmp_msg(kmp_ms_warning, 938 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 939 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 940 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 941 } else { 942 __kmp_msg(kmp_ms_warning, 943 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 944 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 945 } 946 } 947 } 948 } 949 950 #ifdef KMP_DEBUG 951 if (new_nthreads == 1) { 952 KC_TRACE(10, 953 ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 954 "dead roots and rechecking; requested %d threads\n", 955 __kmp_get_gtid(), set_nthreads)); 956 } else { 957 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested" 958 " %d threads\n", 959 __kmp_get_gtid(), new_nthreads, set_nthreads)); 960 } 961 #endif // KMP_DEBUG 962 return new_nthreads; 963 } 964 965 /* Allocate threads from the thread pool and assign them to the new team. We are 966 assured that there are enough threads available, because we checked on that 967 earlier within critical section forkjoin */ 968 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 969 kmp_info_t *master_th, int master_gtid) { 970 int i; 971 int use_hot_team; 972 973 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 974 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 975 KMP_MB(); 976 977 /* first, let's setup the master thread */ 978 master_th->th.th_info.ds.ds_tid = 0; 979 master_th->th.th_team = team; 980 master_th->th.th_team_nproc = team->t.t_nproc; 981 master_th->th.th_team_master = master_th; 982 master_th->th.th_team_serialized = FALSE; 983 master_th->th.th_dispatch = &team->t.t_dispatch[0]; 984 985 /* make sure we are not the optimized hot team */ 986 #if KMP_NESTED_HOT_TEAMS 987 use_hot_team = 0; 988 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 989 if (hot_teams) { // hot teams array is not allocated if 990 // KMP_HOT_TEAMS_MAX_LEVEL=0 991 int level = team->t.t_active_level - 1; // index in array of hot teams 992 if (master_th->th.th_teams_microtask) { // are we inside the teams? 993 if (master_th->th.th_teams_size.nteams > 1) { 994 ++level; // level was not increased in teams construct for 995 // team_of_masters 996 } 997 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 998 master_th->th.th_teams_level == team->t.t_level) { 999 ++level; // level was not increased in teams construct for 1000 // team_of_workers before the parallel 1001 } // team->t.t_level will be increased inside parallel 1002 } 1003 if (level < __kmp_hot_teams_max_level) { 1004 if (hot_teams[level].hot_team) { 1005 // hot team has already been allocated for given level 1006 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 1007 use_hot_team = 1; // the team is ready to use 1008 } else { 1009 use_hot_team = 0; // AC: threads are not allocated yet 1010 hot_teams[level].hot_team = team; // remember new hot team 1011 hot_teams[level].hot_team_nth = team->t.t_nproc; 1012 } 1013 } else { 1014 use_hot_team = 0; 1015 } 1016 } 1017 #else 1018 use_hot_team = team == root->r.r_hot_team; 1019 #endif 1020 if (!use_hot_team) { 1021 1022 /* install the master thread */ 1023 team->t.t_threads[0] = master_th; 1024 __kmp_initialize_info(master_th, team, 0, master_gtid); 1025 1026 /* now, install the worker threads */ 1027 for (i = 1; i < team->t.t_nproc; i++) { 1028 1029 /* fork or reallocate a new thread and install it in team */ 1030 kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 1031 team->t.t_threads[i] = thr; 1032 KMP_DEBUG_ASSERT(thr); 1033 KMP_DEBUG_ASSERT(thr->th.th_team == team); 1034 /* align team and thread arrived states */ 1035 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1036 "T#%d(%d:%d) join =%llu, plain=%llu\n", 1037 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 1038 __kmp_gtid_from_tid(i, team), team->t.t_id, i, 1039 team->t.t_bar[bs_forkjoin_barrier].b_arrived, 1040 team->t.t_bar[bs_plain_barrier].b_arrived)); 1041 #if OMP_40_ENABLED 1042 thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 1043 thr->th.th_teams_level = master_th->th.th_teams_level; 1044 thr->th.th_teams_size = master_th->th.th_teams_size; 1045 #endif 1046 { // Initialize threads' barrier data. 1047 int b; 1048 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 1049 for (b = 0; b < bs_last_barrier; ++b) { 1050 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 1051 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 1052 #if USE_DEBUGGER 1053 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 1054 #endif 1055 }; // for b 1056 } 1057 } 1058 1059 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1060 __kmp_partition_places(team); 1061 #endif 1062 } 1063 1064 KMP_MB(); 1065 } 1066 1067 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1068 // Propagate any changes to the floating point control registers out to the team 1069 // We try to avoid unnecessary writes to the relevant cache line in the team 1070 // structure, so we don't make changes unless they are needed. 1071 inline static void propagateFPControl(kmp_team_t *team) { 1072 if (__kmp_inherit_fp_control) { 1073 kmp_int16 x87_fpu_control_word; 1074 kmp_uint32 mxcsr; 1075 1076 // Get master values of FPU control flags (both X87 and vector) 1077 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 1078 __kmp_store_mxcsr(&mxcsr); 1079 mxcsr &= KMP_X86_MXCSR_MASK; 1080 1081 // There is no point looking at t_fp_control_saved here. 1082 // If it is TRUE, we still have to update the values if they are different from 1083 // those we now have. 1084 // If it is FALSE we didn't save anything yet, but our objective is the same. We 1085 // have to ensure that the values in the team are the same as those we have. 1086 // So, this code achieves what we need whether or not t_fp_control_saved is 1087 // true. By checking whether the value needs updating we avoid unnecessary 1088 // writes that would put the cache-line into a written state, causing all 1089 // threads in the team to have to read it again. 1090 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 1091 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 1092 // Although we don't use this value, other code in the runtime wants to know 1093 // whether it should restore them. So we must ensure it is correct. 1094 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 1095 } else { 1096 // Similarly here. Don't write to this cache-line in the team structure 1097 // unless we have to. 1098 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 1099 } 1100 } 1101 1102 // Do the opposite, setting the hardware registers to the updated values from 1103 // the team. 1104 inline static void updateHWFPControl(kmp_team_t *team) { 1105 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 1106 // Only reset the fp control regs if they have been changed in the team. 1107 // the parallel region that we are exiting. 1108 kmp_int16 x87_fpu_control_word; 1109 kmp_uint32 mxcsr; 1110 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 1111 __kmp_store_mxcsr(&mxcsr); 1112 mxcsr &= KMP_X86_MXCSR_MASK; 1113 1114 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 1115 __kmp_clear_x87_fpu_status_word(); 1116 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 1117 } 1118 1119 if (team->t.t_mxcsr != mxcsr) { 1120 __kmp_load_mxcsr(&team->t.t_mxcsr); 1121 } 1122 } 1123 } 1124 #else 1125 #define propagateFPControl(x) ((void)0) 1126 #define updateHWFPControl(x) ((void)0) 1127 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1128 1129 static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 1130 int realloc); // forward declaration 1131 1132 /* Run a parallel region that has been serialized, so runs only in a team of the 1133 single master thread. */ 1134 void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 1135 kmp_info_t *this_thr; 1136 kmp_team_t *serial_team; 1137 1138 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 1139 1140 /* Skip all this code for autopar serialized loops since it results in 1141 unacceptable overhead */ 1142 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 1143 return; 1144 1145 if (!TCR_4(__kmp_init_parallel)) 1146 __kmp_parallel_initialize(); 1147 1148 this_thr = __kmp_threads[global_tid]; 1149 serial_team = this_thr->th.th_serial_team; 1150 1151 /* utilize the serialized team held by this thread */ 1152 KMP_DEBUG_ASSERT(serial_team); 1153 KMP_MB(); 1154 1155 if (__kmp_tasking_mode != tskm_immediate_exec) { 1156 KMP_DEBUG_ASSERT( 1157 this_thr->th.th_task_team == 1158 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 1159 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 1160 NULL); 1161 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1162 "team %p, new task_team = NULL\n", 1163 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 1164 this_thr->th.th_task_team = NULL; 1165 } 1166 1167 #if OMP_40_ENABLED 1168 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 1169 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1170 proc_bind = proc_bind_false; 1171 } else if (proc_bind == proc_bind_default) { 1172 // No proc_bind clause was specified, so use the current value 1173 // of proc-bind-var for this parallel region. 1174 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 1175 } 1176 // Reset for next parallel region 1177 this_thr->th.th_set_proc_bind = proc_bind_default; 1178 #endif /* OMP_40_ENABLED */ 1179 1180 if (this_thr->th.th_team != serial_team) { 1181 // Nested level will be an index in the nested nthreads array 1182 int level = this_thr->th.th_team->t.t_level; 1183 1184 if (serial_team->t.t_serialized) { 1185 /* this serial team was already used 1186 TODO increase performance by making this locks more specific */ 1187 kmp_team_t *new_team; 1188 1189 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 1190 1191 #if OMPT_SUPPORT 1192 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1193 #endif 1194 1195 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, 1196 #if OMPT_SUPPORT 1197 ompt_parallel_id, 1198 #endif 1199 #if OMP_40_ENABLED 1200 proc_bind, 1201 #endif 1202 &this_thr->th.th_current_task->td_icvs, 1203 0 USE_NESTED_HOT_ARG(NULL)); 1204 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 1205 KMP_ASSERT(new_team); 1206 1207 /* setup new serialized team and install it */ 1208 new_team->t.t_threads[0] = this_thr; 1209 new_team->t.t_parent = this_thr->th.th_team; 1210 serial_team = new_team; 1211 this_thr->th.th_serial_team = serial_team; 1212 1213 KF_TRACE( 1214 10, 1215 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 1216 global_tid, serial_team)); 1217 1218 /* TODO the above breaks the requirement that if we run out of resources, 1219 then we can still guarantee that serialized teams are ok, since we may 1220 need to allocate a new one */ 1221 } else { 1222 KF_TRACE( 1223 10, 1224 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 1225 global_tid, serial_team)); 1226 } 1227 1228 /* we have to initialize this serial team */ 1229 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 1230 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 1231 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 1232 serial_team->t.t_ident = loc; 1233 serial_team->t.t_serialized = 1; 1234 serial_team->t.t_nproc = 1; 1235 serial_team->t.t_parent = this_thr->th.th_team; 1236 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; 1237 this_thr->th.th_team = serial_team; 1238 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 1239 1240 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid, 1241 this_thr->th.th_current_task)); 1242 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 1243 this_thr->th.th_current_task->td_flags.executing = 0; 1244 1245 __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 1246 1247 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 1248 implicit task for each serialized task represented by 1249 team->t.t_serialized? */ 1250 copy_icvs(&this_thr->th.th_current_task->td_icvs, 1251 &this_thr->th.th_current_task->td_parent->td_icvs); 1252 1253 // Thread value exists in the nested nthreads array for the next nested 1254 // level 1255 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 1256 this_thr->th.th_current_task->td_icvs.nproc = 1257 __kmp_nested_nth.nth[level + 1]; 1258 } 1259 1260 #if OMP_40_ENABLED 1261 if (__kmp_nested_proc_bind.used && 1262 (level + 1 < __kmp_nested_proc_bind.used)) { 1263 this_thr->th.th_current_task->td_icvs.proc_bind = 1264 __kmp_nested_proc_bind.bind_types[level + 1]; 1265 } 1266 #endif /* OMP_40_ENABLED */ 1267 1268 #if USE_DEBUGGER 1269 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 1270 #endif 1271 this_thr->th.th_info.ds.ds_tid = 0; 1272 1273 /* set thread cache values */ 1274 this_thr->th.th_team_nproc = 1; 1275 this_thr->th.th_team_master = this_thr; 1276 this_thr->th.th_team_serialized = 1; 1277 1278 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 1279 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 1280 1281 propagateFPControl(serial_team); 1282 1283 /* check if we need to allocate dispatch buffers stack */ 1284 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1285 if (!serial_team->t.t_dispatch->th_disp_buffer) { 1286 serial_team->t.t_dispatch->th_disp_buffer = 1287 (dispatch_private_info_t *)__kmp_allocate( 1288 sizeof(dispatch_private_info_t)); 1289 } 1290 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1291 1292 #if OMPT_SUPPORT 1293 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1294 __ompt_team_assign_id(serial_team, ompt_parallel_id); 1295 #endif 1296 1297 KMP_MB(); 1298 1299 } else { 1300 /* this serialized team is already being used, 1301 * that's fine, just add another nested level */ 1302 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 1303 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 1305 ++serial_team->t.t_serialized; 1306 this_thr->th.th_team_serialized = serial_team->t.t_serialized; 1307 1308 // Nested level will be an index in the nested nthreads array 1309 int level = this_thr->th.th_team->t.t_level; 1310 // Thread value exists in the nested nthreads array for the next nested 1311 // level 1312 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 1313 this_thr->th.th_current_task->td_icvs.nproc = 1314 __kmp_nested_nth.nth[level + 1]; 1315 } 1316 serial_team->t.t_level++; 1317 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 1318 "of serial team %p to %d\n", 1319 global_tid, serial_team, serial_team->t.t_level)); 1320 1321 /* allocate/push dispatch buffers stack */ 1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1323 { 1324 dispatch_private_info_t *disp_buffer = 1325 (dispatch_private_info_t *)__kmp_allocate( 1326 sizeof(dispatch_private_info_t)); 1327 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 1328 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 1329 } 1330 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1331 1332 KMP_MB(); 1333 } 1334 #if OMP_40_ENABLED 1335 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 1336 #endif 1337 1338 if (__kmp_env_consistency_check) 1339 __kmp_push_parallel(global_tid, NULL); 1340 } 1341 1342 /* most of the work for a fork */ 1343 /* return true if we really went parallel, false if serialized */ 1344 int __kmp_fork_call(ident_t *loc, int gtid, 1345 enum fork_context_e call_context, // Intel, GNU, ... 1346 kmp_int32 argc, 1347 #if OMPT_SUPPORT 1348 void *unwrapped_task, 1349 #endif 1350 microtask_t microtask, launch_t invoker, 1351 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1352 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1353 va_list *ap 1354 #else 1355 va_list ap 1356 #endif 1357 ) { 1358 void **argv; 1359 int i; 1360 int master_tid; 1361 int master_this_cons; 1362 kmp_team_t *team; 1363 kmp_team_t *parent_team; 1364 kmp_info_t *master_th; 1365 kmp_root_t *root; 1366 int nthreads; 1367 int master_active; 1368 int master_set_numthreads; 1369 int level; 1370 #if OMP_40_ENABLED 1371 int active_level; 1372 int teams_level; 1373 #endif 1374 #if KMP_NESTED_HOT_TEAMS 1375 kmp_hot_team_ptr_t **p_hot_teams; 1376 #endif 1377 { // KMP_TIME_BLOCK 1378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 1379 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 1380 1381 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 1382 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 1383 /* Some systems prefer the stack for the root thread(s) to start with */ 1384 /* some gap from the parent stack to prevent false sharing. */ 1385 void *dummy = KMP_ALLOCA(__kmp_stkpadding); 1386 /* These 2 lines below are so this does not get optimized out */ 1387 if (__kmp_stkpadding > KMP_MAX_STKPADDING) 1388 __kmp_stkpadding += (short)((kmp_int64)dummy); 1389 } 1390 1391 /* initialize if needed */ 1392 KMP_DEBUG_ASSERT( 1393 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 1394 if (!TCR_4(__kmp_init_parallel)) 1395 __kmp_parallel_initialize(); 1396 1397 /* setup current data */ 1398 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with 1399 // shutdown 1400 parent_team = master_th->th.th_team; 1401 master_tid = master_th->th.th_info.ds.ds_tid; 1402 master_this_cons = master_th->th.th_local.this_construct; 1403 root = master_th->th.th_root; 1404 master_active = root->r.r_active; 1405 master_set_numthreads = master_th->th.th_set_nproc; 1406 1407 #if OMPT_SUPPORT 1408 ompt_parallel_id_t ompt_parallel_id; 1409 ompt_task_id_t ompt_task_id; 1410 ompt_frame_t *ompt_frame; 1411 ompt_task_id_t my_task_id; 1412 ompt_parallel_id_t my_parallel_id; 1413 1414 if (ompt_enabled) { 1415 ompt_parallel_id = __ompt_parallel_id_new(gtid); 1416 ompt_task_id = __ompt_get_task_id_internal(0); 1417 ompt_frame = __ompt_get_task_frame_internal(0); 1418 } 1419 #endif 1420 1421 // Nested level will be an index in the nested nthreads array 1422 level = parent_team->t.t_level; 1423 // used to launch non-serial teams even if nested is not allowed 1424 active_level = parent_team->t.t_active_level; 1425 #if OMP_40_ENABLED 1426 // needed to check nesting inside the teams 1427 teams_level = master_th->th.th_teams_level; 1428 #endif 1429 #if KMP_NESTED_HOT_TEAMS 1430 p_hot_teams = &master_th->th.th_hot_teams; 1431 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 1432 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 1433 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 1434 (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 1435 // it is either actual or not needed (when active_level > 0) 1436 (*p_hot_teams)[0].hot_team_nth = 1; 1437 } 1438 #endif 1439 1440 #if OMPT_SUPPORT 1441 if (ompt_enabled && 1442 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { 1443 int team_size = master_set_numthreads; 1444 1445 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( 1446 ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task, 1447 OMPT_INVOKER(call_context)); 1448 } 1449 #endif 1450 1451 master_th->th.th_ident = loc; 1452 1453 #if OMP_40_ENABLED 1454 if (master_th->th.th_teams_microtask && ap && 1455 microtask != (microtask_t)__kmp_teams_master && level == teams_level) { 1456 // AC: This is start of parallel that is nested inside teams construct. 1457 // The team is actual (hot), all workers are ready at the fork barrier. 1458 // No lock needed to initialize the team a bit, then free workers. 1459 parent_team->t.t_ident = loc; 1460 __kmp_alloc_argv_entries(argc, parent_team, TRUE); 1461 parent_team->t.t_argc = argc; 1462 argv = (void **)parent_team->t.t_argv; 1463 for (i = argc - 1; i >= 0; --i) 1464 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1465 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1466 *argv++ = va_arg(*ap, void *); 1467 #else 1468 *argv++ = va_arg(ap, void *); 1469 #endif 1470 // Increment our nested depth levels, but not increase the serialization 1471 if (parent_team == master_th->th.th_serial_team) { 1472 // AC: we are in serialized parallel 1473 __kmpc_serialized_parallel(loc, gtid); 1474 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1475 // AC: need this in order enquiry functions work 1476 // correctly, will restore at join time 1477 parent_team->t.t_serialized--; 1478 #if OMPT_SUPPORT 1479 void *dummy; 1480 void **exit_runtime_p; 1481 1482 ompt_lw_taskteam_t lw_taskteam; 1483 1484 if (ompt_enabled) { 1485 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task, 1486 ompt_parallel_id); 1487 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1488 exit_runtime_p = 1489 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1490 1491 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1492 1493 #if OMPT_TRACE 1494 /* OMPT implicit task begin */ 1495 my_task_id = lw_taskteam.ompt_task_info.task_id; 1496 my_parallel_id = parent_team->t.ompt_team_info.parallel_id; 1497 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1498 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1499 my_parallel_id, my_task_id); 1500 } 1501 #endif 1502 1503 /* OMPT state */ 1504 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1505 } else { 1506 exit_runtime_p = &dummy; 1507 } 1508 #endif 1509 1510 { 1511 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1512 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1513 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 1514 #if OMPT_SUPPORT 1515 , 1516 exit_runtime_p 1517 #endif 1518 ); 1519 } 1520 1521 #if OMPT_SUPPORT 1522 *exit_runtime_p = NULL; 1523 if (ompt_enabled) { 1524 #if OMPT_TRACE 1525 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1526 1527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1529 ompt_parallel_id, ompt_task_id); 1530 } 1531 1532 __ompt_lw_taskteam_unlink(master_th); 1533 // reset clear the task id only after unlinking the task 1534 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1535 #endif 1536 1537 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1538 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1539 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); 1540 } 1541 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1542 } 1543 #endif 1544 return TRUE; 1545 } 1546 1547 parent_team->t.t_pkfn = microtask; 1548 #if OMPT_SUPPORT 1549 parent_team->t.ompt_team_info.microtask = unwrapped_task; 1550 #endif 1551 parent_team->t.t_invoke = invoker; 1552 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); 1553 parent_team->t.t_active_level++; 1554 parent_team->t.t_level++; 1555 1556 /* Change number of threads in the team if requested */ 1557 if (master_set_numthreads) { // The parallel has num_threads clause 1558 if (master_set_numthreads < master_th->th.th_teams_size.nth) { 1559 // AC: only can reduce number of threads dynamically, can't increase 1560 kmp_info_t **other_threads = parent_team->t.t_threads; 1561 parent_team->t.t_nproc = master_set_numthreads; 1562 for (i = 0; i < master_set_numthreads; ++i) { 1563 other_threads[i]->th.th_team_nproc = master_set_numthreads; 1564 } 1565 // Keep extra threads hot in the team for possible next parallels 1566 } 1567 master_th->th.th_set_nproc = 0; 1568 } 1569 1570 #if USE_DEBUGGER 1571 if (__kmp_debugging) { // Let debugger override number of threads. 1572 int nth = __kmp_omp_num_threads(loc); 1573 if (nth > 0) { // 0 means debugger doesn't want to change num threads 1574 master_set_numthreads = nth; 1575 }; // if 1576 }; // if 1577 #endif 1578 1579 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " 1580 "master_th=%p, gtid=%d\n", 1581 root, parent_team, master_th, gtid)); 1582 __kmp_internal_fork(loc, gtid, parent_team); 1583 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " 1584 "master_th=%p, gtid=%d\n", 1585 root, parent_team, master_th, gtid)); 1586 1587 /* Invoke microtask for MASTER thread */ 1588 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 1589 parent_team->t.t_id, parent_team->t.t_pkfn)); 1590 1591 { 1592 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1593 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1594 if (!parent_team->t.t_invoke(gtid)) { 1595 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); 1596 } 1597 } 1598 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 1599 parent_team->t.t_id, parent_team->t.t_pkfn)); 1600 KMP_MB(); /* Flush all pending memory write invalidates. */ 1601 1602 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 1603 1604 return TRUE; 1605 } // Parallel closely nested in teams construct 1606 #endif /* OMP_40_ENABLED */ 1607 1608 #if KMP_DEBUG 1609 if (__kmp_tasking_mode != tskm_immediate_exec) { 1610 KMP_DEBUG_ASSERT(master_th->th.th_task_team == 1611 parent_team->t.t_task_team[master_th->th.th_task_state]); 1612 } 1613 #endif 1614 1615 if (parent_team->t.t_active_level >= 1616 master_th->th.th_current_task->td_icvs.max_active_levels) { 1617 nthreads = 1; 1618 } else { 1619 #if OMP_40_ENABLED 1620 int enter_teams = ((ap == NULL && active_level == 0) || 1621 (ap && teams_level > 0 && teams_level == level)); 1622 #endif 1623 nthreads = 1624 master_set_numthreads 1625 ? master_set_numthreads 1626 : get__nproc_2( 1627 parent_team, 1628 master_tid); // TODO: get nproc directly from current task 1629 1630 // Check if we need to take forkjoin lock? (no need for serialized 1631 // parallel out of teams construct). This code moved here from 1632 // __kmp_reserve_threads() to speedup nested serialized parallels. 1633 if (nthreads > 1) { 1634 if ((!get__nested(master_th) && (root->r.r_in_parallel 1635 #if OMP_40_ENABLED 1636 && !enter_teams 1637 #endif /* OMP_40_ENABLED */ 1638 )) || 1639 (__kmp_library == library_serial)) { 1640 KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" 1641 " threads\n", 1642 gtid, nthreads)); 1643 nthreads = 1; 1644 } 1645 } 1646 if (nthreads > 1) { 1647 /* determine how many new threads we can use */ 1648 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 1649 nthreads = __kmp_reserve_threads( 1650 root, parent_team, master_tid, nthreads 1651 #if OMP_40_ENABLED 1652 /* AC: If we execute teams from parallel region (on host), then 1653 teams should be created but each can only have 1 thread if 1654 nesting is disabled. If teams called from serial region, then 1655 teams and their threads should be created regardless of the 1656 nesting setting. */ 1657 , 1658 enter_teams 1659 #endif /* OMP_40_ENABLED */ 1660 ); 1661 if (nthreads == 1) { 1662 // Free lock for single thread execution here; for multi-thread 1663 // execution it will be freed later after team of threads created 1664 // and initialized 1665 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 1666 } 1667 } 1668 } 1669 KMP_DEBUG_ASSERT(nthreads > 0); 1670 1671 // If we temporarily changed the set number of threads then restore it now 1672 master_th->th.th_set_nproc = 0; 1673 1674 /* create a serialized parallel region? */ 1675 if (nthreads == 1) { 1676 /* josh todo: hypothetical question: what do we do for OS X*? */ 1677 #if KMP_OS_LINUX && \ 1678 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1679 void *args[argc]; 1680 #else 1681 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 1682 #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 1683 KMP_ARCH_AARCH64) */ 1684 1685 KA_TRACE(20, 1686 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); 1687 1688 __kmpc_serialized_parallel(loc, gtid); 1689 1690 if (call_context == fork_context_intel) { 1691 /* TODO this sucks, use the compiler itself to pass args! :) */ 1692 master_th->th.th_serial_team->t.t_ident = loc; 1693 #if OMP_40_ENABLED 1694 if (!ap) { 1695 // revert change made in __kmpc_serialized_parallel() 1696 master_th->th.th_serial_team->t.t_level--; 1697 // Get args from parent team for teams construct 1698 1699 #if OMPT_SUPPORT 1700 void *dummy; 1701 void **exit_runtime_p; 1702 1703 ompt_lw_taskteam_t lw_taskteam; 1704 1705 if (ompt_enabled) { 1706 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1707 unwrapped_task, ompt_parallel_id); 1708 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1709 exit_runtime_p = 1710 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1711 1712 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1713 1714 #if OMPT_TRACE 1715 my_task_id = lw_taskteam.ompt_task_info.task_id; 1716 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1717 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1718 ompt_parallel_id, my_task_id); 1719 } 1720 #endif 1721 1722 /* OMPT state */ 1723 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1724 } else { 1725 exit_runtime_p = &dummy; 1726 } 1727 #endif 1728 1729 { 1730 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1731 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1732 __kmp_invoke_microtask(microtask, gtid, 0, argc, 1733 parent_team->t.t_argv 1734 #if OMPT_SUPPORT 1735 , 1736 exit_runtime_p 1737 #endif 1738 ); 1739 } 1740 1741 #if OMPT_SUPPORT 1742 *exit_runtime_p = NULL; 1743 if (ompt_enabled) { 1744 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1745 1746 #if OMPT_TRACE 1747 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1748 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1749 ompt_parallel_id, ompt_task_id); 1750 } 1751 #endif 1752 1753 __ompt_lw_taskteam_unlink(master_th); 1754 // reset clear the task id only after unlinking the task 1755 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1756 1757 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1758 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1759 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); 1760 } 1761 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1762 } 1763 #endif 1764 } else if (microtask == (microtask_t)__kmp_teams_master) { 1765 KMP_DEBUG_ASSERT(master_th->th.th_team == 1766 master_th->th.th_serial_team); 1767 team = master_th->th.th_team; 1768 // team->t.t_pkfn = microtask; 1769 team->t.t_invoke = invoker; 1770 __kmp_alloc_argv_entries(argc, team, TRUE); 1771 team->t.t_argc = argc; 1772 argv = (void **)team->t.t_argv; 1773 if (ap) { 1774 for (i = argc - 1; i >= 0; --i) 1775 // TODO: revert workaround for Intel(R) 64 tracker #96 1776 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1777 *argv++ = va_arg(*ap, void *); 1778 #else 1779 *argv++ = va_arg(ap, void *); 1780 #endif 1781 } else { 1782 for (i = 0; i < argc; ++i) 1783 // Get args from parent team for teams construct 1784 argv[i] = parent_team->t.t_argv[i]; 1785 } 1786 // AC: revert change made in __kmpc_serialized_parallel() 1787 // because initial code in teams should have level=0 1788 team->t.t_level--; 1789 // AC: call special invoker for outer "parallel" of teams construct 1790 { 1791 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1792 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1793 invoker(gtid); 1794 } 1795 } else { 1796 #endif /* OMP_40_ENABLED */ 1797 argv = args; 1798 for (i = argc - 1; i >= 0; --i) 1799 // TODO: revert workaround for Intel(R) 64 tracker #96 1800 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1801 *argv++ = va_arg(*ap, void *); 1802 #else 1803 *argv++ = va_arg(ap, void *); 1804 #endif 1805 KMP_MB(); 1806 1807 #if OMPT_SUPPORT 1808 void *dummy; 1809 void **exit_runtime_p; 1810 1811 ompt_lw_taskteam_t lw_taskteam; 1812 1813 if (ompt_enabled) { 1814 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1815 unwrapped_task, ompt_parallel_id); 1816 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1817 exit_runtime_p = 1818 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1819 1820 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1821 1822 #if OMPT_TRACE 1823 /* OMPT implicit task begin */ 1824 my_task_id = lw_taskteam.ompt_task_info.task_id; 1825 my_parallel_id = ompt_parallel_id; 1826 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1827 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1828 my_parallel_id, my_task_id); 1829 } 1830 #endif 1831 1832 /* OMPT state */ 1833 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1834 } else { 1835 exit_runtime_p = &dummy; 1836 } 1837 #endif 1838 1839 { 1840 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1841 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1842 __kmp_invoke_microtask(microtask, gtid, 0, argc, args 1843 #if OMPT_SUPPORT 1844 , 1845 exit_runtime_p 1846 #endif 1847 ); 1848 } 1849 1850 #if OMPT_SUPPORT 1851 *exit_runtime_p = NULL; 1852 if (ompt_enabled) { 1853 #if OMPT_TRACE 1854 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1855 1856 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1857 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1858 my_parallel_id, my_task_id); 1859 } 1860 #endif 1861 1862 __ompt_lw_taskteam_unlink(master_th); 1863 // reset clear the task id only after unlinking the task 1864 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1865 1866 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1867 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1868 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); 1869 } 1870 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1871 } 1872 #endif 1873 #if OMP_40_ENABLED 1874 } 1875 #endif /* OMP_40_ENABLED */ 1876 } else if (call_context == fork_context_gnu) { 1877 #if OMPT_SUPPORT 1878 ompt_lw_taskteam_t *lwt = 1879 (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); 1880 __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task, 1881 ompt_parallel_id); 1882 1883 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); 1884 lwt->ompt_task_info.frame.exit_runtime_frame = NULL; 1885 __ompt_lw_taskteam_link(lwt, master_th); 1886 #endif 1887 1888 // we were called from GNU native code 1889 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 1890 return FALSE; 1891 } 1892 else { 1893 KMP_ASSERT2(call_context < fork_context_last, 1894 "__kmp_fork_call: unknown fork_context parameter"); 1895 } 1896 1897 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 1898 KMP_MB(); 1899 return FALSE; 1900 } 1901 1902 // GEH: only modify the executing flag in the case when not serialized 1903 // serialized case is handled in kmpc_serialized_parallel 1904 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1905 "curtask=%p, curtask_max_aclevel=%d\n", 1906 parent_team->t.t_active_level, master_th, 1907 master_th->th.th_current_task, 1908 master_th->th.th_current_task->td_icvs.max_active_levels)); 1909 // TODO: GEH - cannot do this assertion because root thread not set up as 1910 // executing 1911 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 1912 master_th->th.th_current_task->td_flags.executing = 0; 1913 1914 #if OMP_40_ENABLED 1915 if (!master_th->th.th_teams_microtask || level > teams_level) 1916 #endif /* OMP_40_ENABLED */ 1917 { 1918 /* Increment our nested depth level */ 1919 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); 1920 } 1921 1922 // See if we need to make a copy of the ICVs. 1923 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 1924 if ((level + 1 < __kmp_nested_nth.used) && 1925 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 1926 nthreads_icv = __kmp_nested_nth.nth[level + 1]; 1927 } else { 1928 nthreads_icv = 0; // don't update 1929 } 1930 1931 #if OMP_40_ENABLED 1932 // Figure out the proc_bind_policy for the new team. 1933 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1934 kmp_proc_bind_t proc_bind_icv = 1935 proc_bind_default; // proc_bind_default means don't update 1936 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1937 proc_bind = proc_bind_false; 1938 } else { 1939 if (proc_bind == proc_bind_default) { 1940 // No proc_bind clause specified; use current proc-bind-var for this 1941 // parallel region 1942 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1943 } 1944 /* else: The proc_bind policy was specified explicitly on parallel clause. 1945 This overrides proc-bind-var for this parallel region, but does not 1946 change proc-bind-var. */ 1947 // Figure the value of proc-bind-var for the child threads. 1948 if ((level + 1 < __kmp_nested_proc_bind.used) && 1949 (__kmp_nested_proc_bind.bind_types[level + 1] != 1950 master_th->th.th_current_task->td_icvs.proc_bind)) { 1951 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 1952 } 1953 } 1954 1955 // Reset for next parallel region 1956 master_th->th.th_set_proc_bind = proc_bind_default; 1957 #endif /* OMP_40_ENABLED */ 1958 1959 if ((nthreads_icv > 0) 1960 #if OMP_40_ENABLED 1961 || (proc_bind_icv != proc_bind_default) 1962 #endif /* OMP_40_ENABLED */ 1963 ) { 1964 kmp_internal_control_t new_icvs; 1965 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 1966 new_icvs.next = NULL; 1967 if (nthreads_icv > 0) { 1968 new_icvs.nproc = nthreads_icv; 1969 } 1970 1971 #if OMP_40_ENABLED 1972 if (proc_bind_icv != proc_bind_default) { 1973 new_icvs.proc_bind = proc_bind_icv; 1974 } 1975 #endif /* OMP_40_ENABLED */ 1976 1977 /* allocate a new parallel team */ 1978 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 1979 team = __kmp_allocate_team(root, nthreads, nthreads, 1980 #if OMPT_SUPPORT 1981 ompt_parallel_id, 1982 #endif 1983 #if OMP_40_ENABLED 1984 proc_bind, 1985 #endif 1986 &new_icvs, argc USE_NESTED_HOT_ARG(master_th)); 1987 } else { 1988 /* allocate a new parallel team */ 1989 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 1990 team = __kmp_allocate_team(root, nthreads, nthreads, 1991 #if OMPT_SUPPORT 1992 ompt_parallel_id, 1993 #endif 1994 #if OMP_40_ENABLED 1995 proc_bind, 1996 #endif 1997 &master_th->th.th_current_task->td_icvs, 1998 argc USE_NESTED_HOT_ARG(master_th)); 1999 } 2000 KF_TRACE( 2001 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 2002 2003 /* setup the new team */ 2004 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 2005 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 2006 KMP_CHECK_UPDATE(team->t.t_ident, loc); 2007 KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 2008 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 2009 #if OMPT_SUPPORT 2010 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); 2011 #endif 2012 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 2013 // TODO: parent_team->t.t_level == INT_MAX ??? 2014 #if OMP_40_ENABLED 2015 if (!master_th->th.th_teams_microtask || level > teams_level) { 2016 #endif /* OMP_40_ENABLED */ 2017 int new_level = parent_team->t.t_level + 1; 2018 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2019 new_level = parent_team->t.t_active_level + 1; 2020 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2021 #if OMP_40_ENABLED 2022 } else { 2023 // AC: Do not increase parallel level at start of the teams construct 2024 int new_level = parent_team->t.t_level; 2025 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2026 new_level = parent_team->t.t_active_level; 2027 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2028 } 2029 #endif /* OMP_40_ENABLED */ 2030 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2031 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || 2032 team->t.t_sched.chunk != new_sched.chunk) 2033 team->t.t_sched = 2034 new_sched; // set master's schedule as new run-time schedule 2035 2036 #if OMP_40_ENABLED 2037 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 2038 #endif 2039 2040 // Update the floating point rounding in the team if required. 2041 propagateFPControl(team); 2042 2043 if (__kmp_tasking_mode != tskm_immediate_exec) { 2044 // Set master's task team to team's task team. Unless this is hot team, it 2045 // should be NULL. 2046 KMP_DEBUG_ASSERT(master_th->th.th_task_team == 2047 parent_team->t.t_task_team[master_th->th.th_task_state]); 2048 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team " 2049 "%p, new task_team %p / team %p\n", 2050 __kmp_gtid_from_thread(master_th), 2051 master_th->th.th_task_team, parent_team, 2052 team->t.t_task_team[master_th->th.th_task_state], team)); 2053 2054 if (active_level || master_th->th.th_task_team) { 2055 // Take a memo of master's task_state 2056 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2057 if (master_th->th.th_task_state_top >= 2058 master_th->th.th_task_state_stack_sz) { // increase size 2059 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 2060 kmp_uint8 *old_stack, *new_stack; 2061 kmp_uint32 i; 2062 new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 2063 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 2064 new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 2065 } 2066 for (i = master_th->th.th_task_state_stack_sz; i < new_size; 2067 ++i) { // zero-init rest of stack 2068 new_stack[i] = 0; 2069 } 2070 old_stack = master_th->th.th_task_state_memo_stack; 2071 master_th->th.th_task_state_memo_stack = new_stack; 2072 master_th->th.th_task_state_stack_sz = new_size; 2073 __kmp_free(old_stack); 2074 } 2075 // Store master's task_state on stack 2076 master_th->th 2077 .th_task_state_memo_stack[master_th->th.th_task_state_top] = 2078 master_th->th.th_task_state; 2079 master_th->th.th_task_state_top++; 2080 #if KMP_NESTED_HOT_TEAMS 2081 if (team == master_th->th.th_hot_teams[active_level].hot_team) { 2082 // Restore master's nested state if nested hot team 2083 master_th->th.th_task_state = 2084 master_th->th 2085 .th_task_state_memo_stack[master_th->th.th_task_state_top]; 2086 } else { 2087 #endif 2088 master_th->th.th_task_state = 0; 2089 #if KMP_NESTED_HOT_TEAMS 2090 } 2091 #endif 2092 } 2093 #if !KMP_NESTED_HOT_TEAMS 2094 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 2095 (team == root->r.r_hot_team)); 2096 #endif 2097 } 2098 2099 KA_TRACE( 2100 20, 2101 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 2102 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 2103 team->t.t_nproc)); 2104 KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 2105 (team->t.t_master_tid == 0 && 2106 (team->t.t_parent == root->r.r_root_team || 2107 team->t.t_parent->t.t_serialized))); 2108 KMP_MB(); 2109 2110 /* now, setup the arguments */ 2111 argv = (void **)team->t.t_argv; 2112 #if OMP_40_ENABLED 2113 if (ap) { 2114 #endif /* OMP_40_ENABLED */ 2115 for (i = argc - 1; i >= 0; --i) { 2116 // TODO: revert workaround for Intel(R) 64 tracker #96 2117 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2118 void *new_argv = va_arg(*ap, void *); 2119 #else 2120 void *new_argv = va_arg(ap, void *); 2121 #endif 2122 KMP_CHECK_UPDATE(*argv, new_argv); 2123 argv++; 2124 } 2125 #if OMP_40_ENABLED 2126 } else { 2127 for (i = 0; i < argc; ++i) { 2128 // Get args from parent team for teams construct 2129 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 2130 } 2131 } 2132 #endif /* OMP_40_ENABLED */ 2133 2134 /* now actually fork the threads */ 2135 KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 2136 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 2137 root->r.r_active = TRUE; 2138 2139 __kmp_fork_team_threads(root, team, master_th, gtid); 2140 __kmp_setup_icv_copy(team, nthreads, 2141 &master_th->th.th_current_task->td_icvs, loc); 2142 2143 #if OMPT_SUPPORT 2144 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 2145 #endif 2146 2147 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 2148 2149 #if USE_ITT_BUILD 2150 if (team->t.t_active_level == 1 // only report frames at level 1 2151 #if OMP_40_ENABLED 2152 && !master_th->th.th_teams_microtask // not in teams construct 2153 #endif /* OMP_40_ENABLED */ 2154 ) { 2155 #if USE_ITT_NOTIFY 2156 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 2157 (__kmp_forkjoin_frames_mode == 3 || 2158 __kmp_forkjoin_frames_mode == 1)) { 2159 kmp_uint64 tmp_time = 0; 2160 if (__itt_get_timestamp_ptr) 2161 tmp_time = __itt_get_timestamp(); 2162 // Internal fork - report frame begin 2163 master_th->th.th_frame_time = tmp_time; 2164 if (__kmp_forkjoin_frames_mode == 3) 2165 team->t.t_region_time = tmp_time; 2166 } else 2167 // only one notification scheme (either "submit" or "forking/joined", not both) 2168 #endif /* USE_ITT_NOTIFY */ 2169 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 2170 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 2171 // Mark start of "parallel" region for VTune. 2172 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 2173 } 2174 } 2175 #endif /* USE_ITT_BUILD */ 2176 2177 /* now go on and do the work */ 2178 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 2179 KMP_MB(); 2180 KF_TRACE(10, 2181 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 2182 root, team, master_th, gtid)); 2183 2184 #if USE_ITT_BUILD 2185 if (__itt_stack_caller_create_ptr) { 2186 team->t.t_stack_id = 2187 __kmp_itt_stack_caller_create(); // create new stack stitching id 2188 // before entering fork barrier 2189 } 2190 #endif /* USE_ITT_BUILD */ 2191 2192 #if OMP_40_ENABLED 2193 // AC: skip __kmp_internal_fork at teams construct, let only master 2194 // threads execute 2195 if (ap) 2196 #endif /* OMP_40_ENABLED */ 2197 { 2198 __kmp_internal_fork(loc, gtid, team); 2199 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 2200 "master_th=%p, gtid=%d\n", 2201 root, team, master_th, gtid)); 2202 } 2203 2204 if (call_context == fork_context_gnu) { 2205 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 2206 return TRUE; 2207 } 2208 2209 /* Invoke microtask for MASTER thread */ 2210 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 2211 team->t.t_id, team->t.t_pkfn)); 2212 } // END of timer KMP_fork_call block 2213 2214 { 2215 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 2216 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 2217 if (!team->t.t_invoke(gtid)) { 2218 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); 2219 } 2220 } 2221 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 2222 team->t.t_id, team->t.t_pkfn)); 2223 KMP_MB(); /* Flush all pending memory write invalidates. */ 2224 2225 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 2226 2227 #if OMPT_SUPPORT 2228 if (ompt_enabled) { 2229 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2230 } 2231 #endif 2232 2233 return TRUE; 2234 } 2235 2236 #if OMPT_SUPPORT 2237 static inline void __kmp_join_restore_state(kmp_info_t *thread, 2238 kmp_team_t *team) { 2239 // restore state outside the region 2240 thread->th.ompt_thread_info.state = 2241 ((team->t.t_serialized) ? ompt_state_work_serial 2242 : ompt_state_work_parallel); 2243 } 2244 2245 static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team, 2246 ompt_parallel_id_t parallel_id, 2247 fork_context_e fork_context) { 2248 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2249 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 2250 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 2251 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); 2252 } 2253 2254 task_info->frame.reenter_runtime_frame = NULL; 2255 __kmp_join_restore_state(thread, team); 2256 } 2257 #endif 2258 2259 void __kmp_join_call(ident_t *loc, int gtid 2260 #if OMPT_SUPPORT 2261 , 2262 enum fork_context_e fork_context 2263 #endif 2264 #if OMP_40_ENABLED 2265 , 2266 int exit_teams 2267 #endif /* OMP_40_ENABLED */ 2268 ) { 2269 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 2270 kmp_team_t *team; 2271 kmp_team_t *parent_team; 2272 kmp_info_t *master_th; 2273 kmp_root_t *root; 2274 int master_active; 2275 int i; 2276 2277 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 2278 2279 /* setup current data */ 2280 master_th = __kmp_threads[gtid]; 2281 root = master_th->th.th_root; 2282 team = master_th->th.th_team; 2283 parent_team = team->t.t_parent; 2284 2285 master_th->th.th_ident = loc; 2286 2287 #if OMPT_SUPPORT 2288 if (ompt_enabled) { 2289 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2290 } 2291 #endif 2292 2293 #if KMP_DEBUG 2294 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 2295 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2296 "th_task_team = %p\n", 2297 __kmp_gtid_from_thread(master_th), team, 2298 team->t.t_task_team[master_th->th.th_task_state], 2299 master_th->th.th_task_team)); 2300 KMP_DEBUG_ASSERT(master_th->th.th_task_team == 2301 team->t.t_task_team[master_th->th.th_task_state]); 2302 } 2303 #endif 2304 2305 if (team->t.t_serialized) { 2306 #if OMP_40_ENABLED 2307 if (master_th->th.th_teams_microtask) { 2308 // We are in teams construct 2309 int level = team->t.t_level; 2310 int tlevel = master_th->th.th_teams_level; 2311 if (level == tlevel) { 2312 // AC: we haven't incremented it earlier at start of teams construct, 2313 // so do it here - at the end of teams construct 2314 team->t.t_level++; 2315 } else if (level == tlevel + 1) { 2316 // AC: we are exiting parallel inside teams, need to increment 2317 // serialization in order to restore it in the next call to 2318 // __kmpc_end_serialized_parallel 2319 team->t.t_serialized++; 2320 } 2321 } 2322 #endif /* OMP_40_ENABLED */ 2323 __kmpc_end_serialized_parallel(loc, gtid); 2324 2325 #if OMPT_SUPPORT 2326 if (ompt_enabled) { 2327 __kmp_join_restore_state(master_th, parent_team); 2328 } 2329 #endif 2330 2331 return; 2332 } 2333 2334 master_active = team->t.t_master_active; 2335 2336 #if OMP_40_ENABLED 2337 if (!exit_teams) 2338 #endif /* OMP_40_ENABLED */ 2339 { 2340 // AC: No barrier for internal teams at exit from teams construct. 2341 // But there is barrier for external team (league). 2342 __kmp_internal_join(loc, gtid, team); 2343 } 2344 #if OMP_40_ENABLED 2345 else { 2346 master_th->th.th_task_state = 2347 0; // AC: no tasking in teams (out of any parallel) 2348 } 2349 #endif /* OMP_40_ENABLED */ 2350 2351 KMP_MB(); 2352 2353 #if OMPT_SUPPORT 2354 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; 2355 #endif 2356 2357 #if USE_ITT_BUILD 2358 if (__itt_stack_caller_create_ptr) { 2359 __kmp_itt_stack_caller_destroy( 2360 (__itt_caller)team->t 2361 .t_stack_id); // destroy the stack stitching id after join barrier 2362 } 2363 2364 // Mark end of "parallel" region for VTune. 2365 if (team->t.t_active_level == 1 2366 #if OMP_40_ENABLED 2367 && !master_th->th.th_teams_microtask /* not in teams construct */ 2368 #endif /* OMP_40_ENABLED */ 2369 ) { 2370 master_th->th.th_ident = loc; 2371 // only one notification scheme (either "submit" or "forking/joined", not 2372 // both) 2373 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 2374 __kmp_forkjoin_frames_mode == 3) 2375 __kmp_itt_frame_submit(gtid, team->t.t_region_time, 2376 master_th->th.th_frame_time, 0, loc, 2377 master_th->th.th_team_nproc, 1); 2378 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 2379 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 2380 __kmp_itt_region_joined(gtid); 2381 } // active_level == 1 2382 #endif /* USE_ITT_BUILD */ 2383 2384 #if OMP_40_ENABLED 2385 if (master_th->th.th_teams_microtask && !exit_teams && 2386 team->t.t_pkfn != (microtask_t)__kmp_teams_master && 2387 team->t.t_level == master_th->th.th_teams_level + 1) { 2388 // AC: We need to leave the team structure intact at the end of parallel 2389 // inside the teams construct, so that at the next parallel same (hot) team 2390 // works, only adjust nesting levels 2391 2392 /* Decrement our nested depth level */ 2393 team->t.t_level--; 2394 team->t.t_active_level--; 2395 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel); 2396 2397 /* Restore number of threads in the team if needed */ 2398 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 2399 int old_num = master_th->th.th_team_nproc; 2400 int new_num = master_th->th.th_teams_size.nth; 2401 kmp_info_t **other_threads = team->t.t_threads; 2402 team->t.t_nproc = new_num; 2403 for (i = 0; i < old_num; ++i) { 2404 other_threads[i]->th.th_team_nproc = new_num; 2405 } 2406 // Adjust states of non-used threads of the team 2407 for (i = old_num; i < new_num; ++i) { 2408 // Re-initialize thread's barrier data. 2409 int b; 2410 kmp_balign_t *balign = other_threads[i]->th.th_bar; 2411 for (b = 0; b < bs_last_barrier; ++b) { 2412 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 2413 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 2414 #if USE_DEBUGGER 2415 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 2416 #endif 2417 } 2418 if (__kmp_tasking_mode != tskm_immediate_exec) { 2419 // Synchronize thread's task state 2420 other_threads[i]->th.th_task_state = master_th->th.th_task_state; 2421 } 2422 } 2423 } 2424 2425 #if OMPT_SUPPORT 2426 if (ompt_enabled) { 2427 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2428 } 2429 #endif 2430 2431 return; 2432 } 2433 #endif /* OMP_40_ENABLED */ 2434 2435 /* do cleanup and restore the parent team */ 2436 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 2437 master_th->th.th_local.this_construct = team->t.t_master_this_cons; 2438 2439 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 2440 2441 /* jc: The following lock has instructions with REL and ACQ semantics, 2442 separating the parallel user code called in this parallel region 2443 from the serial user code called after this function returns. */ 2444 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 2445 2446 #if OMP_40_ENABLED 2447 if (!master_th->th.th_teams_microtask || 2448 team->t.t_level > master_th->th.th_teams_level) 2449 #endif /* OMP_40_ENABLED */ 2450 { 2451 /* Decrement our nested depth level */ 2452 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel); 2453 } 2454 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 2455 2456 #if OMPT_SUPPORT && OMPT_TRACE 2457 if (ompt_enabled) { 2458 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2459 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 2460 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 2461 parallel_id, task_info->task_id); 2462 } 2463 task_info->frame.exit_runtime_frame = NULL; 2464 task_info->task_id = 0; 2465 } 2466 #endif 2467 2468 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 2469 master_th, team)); 2470 __kmp_pop_current_task_from_thread(master_th); 2471 2472 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2473 // Restore master thread's partition. 2474 master_th->th.th_first_place = team->t.t_first_place; 2475 master_th->th.th_last_place = team->t.t_last_place; 2476 #endif /* OMP_40_ENABLED */ 2477 2478 updateHWFPControl(team); 2479 2480 if (root->r.r_active != master_active) 2481 root->r.r_active = master_active; 2482 2483 __kmp_free_team(root, team USE_NESTED_HOT_ARG( 2484 master_th)); // this will free worker threads 2485 2486 /* this race was fun to find. make sure the following is in the critical 2487 region otherwise assertions may fail occasionally since the old team may be 2488 reallocated and the hierarchy appears inconsistent. it is actually safe to 2489 run and won't cause any bugs, but will cause those assertion failures. it's 2490 only one deref&assign so might as well put this in the critical region */ 2491 master_th->th.th_team = parent_team; 2492 master_th->th.th_team_nproc = parent_team->t.t_nproc; 2493 master_th->th.th_team_master = parent_team->t.t_threads[0]; 2494 master_th->th.th_team_serialized = parent_team->t.t_serialized; 2495 2496 /* restore serialized team, if need be */ 2497 if (parent_team->t.t_serialized && 2498 parent_team != master_th->th.th_serial_team && 2499 parent_team != root->r.r_root_team) { 2500 __kmp_free_team(root, 2501 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 2502 master_th->th.th_serial_team = parent_team; 2503 } 2504 2505 if (__kmp_tasking_mode != tskm_immediate_exec) { 2506 if (master_th->th.th_task_state_top > 2507 0) { // Restore task state from memo stack 2508 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2509 // Remember master's state if we re-use this nested hot team 2510 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 2511 master_th->th.th_task_state; 2512 --master_th->th.th_task_state_top; // pop 2513 // Now restore state at this level 2514 master_th->th.th_task_state = 2515 master_th->th 2516 .th_task_state_memo_stack[master_th->th.th_task_state_top]; 2517 } 2518 // Copy the task team from the parent team to the master thread 2519 master_th->th.th_task_team = 2520 parent_team->t.t_task_team[master_th->th.th_task_state]; 2521 KA_TRACE(20, 2522 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", 2523 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 2524 parent_team)); 2525 } 2526 2527 // TODO: GEH - cannot do this assertion because root thread not set up as 2528 // executing 2529 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 2530 master_th->th.th_current_task->td_flags.executing = 1; 2531 2532 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 2533 2534 #if OMPT_SUPPORT 2535 if (ompt_enabled) { 2536 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2537 } 2538 #endif 2539 2540 KMP_MB(); 2541 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 2542 } 2543 2544 /* Check whether we should push an internal control record onto the 2545 serial team stack. If so, do it. */ 2546 void __kmp_save_internal_controls(kmp_info_t *thread) { 2547 2548 if (thread->th.th_team != thread->th.th_serial_team) { 2549 return; 2550 } 2551 if (thread->th.th_team->t.t_serialized > 1) { 2552 int push = 0; 2553 2554 if (thread->th.th_team->t.t_control_stack_top == NULL) { 2555 push = 1; 2556 } else { 2557 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 2558 thread->th.th_team->t.t_serialized) { 2559 push = 1; 2560 } 2561 } 2562 if (push) { /* push a record on the serial team's stack */ 2563 kmp_internal_control_t *control = 2564 (kmp_internal_control_t *)__kmp_allocate( 2565 sizeof(kmp_internal_control_t)); 2566 2567 copy_icvs(control, &thread->th.th_current_task->td_icvs); 2568 2569 control->serial_nesting_level = thread->th.th_team->t.t_serialized; 2570 2571 control->next = thread->th.th_team->t.t_control_stack_top; 2572 thread->th.th_team->t.t_control_stack_top = control; 2573 } 2574 } 2575 } 2576 2577 /* Changes set_nproc */ 2578 void __kmp_set_num_threads(int new_nth, int gtid) { 2579 kmp_info_t *thread; 2580 kmp_root_t *root; 2581 2582 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 2583 KMP_DEBUG_ASSERT(__kmp_init_serial); 2584 2585 if (new_nth < 1) 2586 new_nth = 1; 2587 else if (new_nth > __kmp_max_nth) 2588 new_nth = __kmp_max_nth; 2589 2590 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 2591 thread = __kmp_threads[gtid]; 2592 2593 __kmp_save_internal_controls(thread); 2594 2595 set__nproc(thread, new_nth); 2596 2597 // If this omp_set_num_threads() call will cause the hot team size to be 2598 // reduced (in the absence of a num_threads clause), then reduce it now, 2599 // rather than waiting for the next parallel region. 2600 root = thread->th.th_root; 2601 if (__kmp_init_parallel && (!root->r.r_active) && 2602 (root->r.r_hot_team->t.t_nproc > new_nth) 2603 #if KMP_NESTED_HOT_TEAMS 2604 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 2605 #endif 2606 ) { 2607 kmp_team_t *hot_team = root->r.r_hot_team; 2608 int f; 2609 2610 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 2611 2612 // Release the extra threads we don't need any more. 2613 for (f = new_nth; f < hot_team->t.t_nproc; f++) { 2614 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 2615 if (__kmp_tasking_mode != tskm_immediate_exec) { 2616 // When decreasing team size, threads no longer in the team should unref 2617 // task team. 2618 hot_team->t.t_threads[f]->th.th_task_team = NULL; 2619 } 2620 __kmp_free_thread(hot_team->t.t_threads[f]); 2621 hot_team->t.t_threads[f] = NULL; 2622 } 2623 hot_team->t.t_nproc = new_nth; 2624 #if KMP_NESTED_HOT_TEAMS 2625 if (thread->th.th_hot_teams) { 2626 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 2627 thread->th.th_hot_teams[0].hot_team_nth = new_nth; 2628 } 2629 #endif 2630 2631 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 2632 2633 // Update the t_nproc field in the threads that are still active. 2634 for (f = 0; f < new_nth; f++) { 2635 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 2636 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 2637 } 2638 // Special flag in case omp_set_num_threads() call 2639 hot_team->t.t_size_changed = -1; 2640 } 2641 } 2642 2643 /* Changes max_active_levels */ 2644 void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 2645 kmp_info_t *thread; 2646 2647 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 2648 "%d = (%d)\n", 2649 gtid, max_active_levels)); 2650 KMP_DEBUG_ASSERT(__kmp_init_serial); 2651 2652 // validate max_active_levels 2653 if (max_active_levels < 0) { 2654 KMP_WARNING(ActiveLevelsNegative, max_active_levels); 2655 // We ignore this call if the user has specified a negative value. 2656 // The current setting won't be changed. The last valid setting will be 2657 // used. A warning will be issued (if warnings are allowed as controlled by 2658 // the KMP_WARNINGS env var). 2659 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 2660 "max_active_levels for thread %d = (%d)\n", 2661 gtid, max_active_levels)); 2662 return; 2663 } 2664 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 2665 // it's OK, the max_active_levels is within the valid range: [ 0; 2666 // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 2667 // We allow a zero value. (implementation defined behavior) 2668 } else { 2669 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 2670 KMP_MAX_ACTIVE_LEVELS_LIMIT); 2671 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 2672 // Current upper limit is MAX_INT. (implementation defined behavior) 2673 // If the input exceeds the upper limit, we correct the input to be the 2674 // upper limit. (implementation defined behavior) 2675 // Actually, the flow should never get here until we use MAX_INT limit. 2676 } 2677 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 2678 "max_active_levels for thread %d = (%d)\n", 2679 gtid, max_active_levels)); 2680 2681 thread = __kmp_threads[gtid]; 2682 2683 __kmp_save_internal_controls(thread); 2684 2685 set__max_active_levels(thread, max_active_levels); 2686 } 2687 2688 /* Gets max_active_levels */ 2689 int __kmp_get_max_active_levels(int gtid) { 2690 kmp_info_t *thread; 2691 2692 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 2693 KMP_DEBUG_ASSERT(__kmp_init_serial); 2694 2695 thread = __kmp_threads[gtid]; 2696 KMP_DEBUG_ASSERT(thread->th.th_current_task); 2697 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 2698 "curtask_maxaclevel=%d\n", 2699 gtid, thread->th.th_current_task, 2700 thread->th.th_current_task->td_icvs.max_active_levels)); 2701 return thread->th.th_current_task->td_icvs.max_active_levels; 2702 } 2703 2704 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 2705 void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 2706 kmp_info_t *thread; 2707 // kmp_team_t *team; 2708 2709 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 2710 gtid, (int)kind, chunk)); 2711 KMP_DEBUG_ASSERT(__kmp_init_serial); 2712 2713 // Check if the kind parameter is valid, correct if needed. 2714 // Valid parameters should fit in one of two intervals - standard or extended: 2715 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 2716 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 2717 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 2718 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 2719 // TODO: Hint needs attention in case we change the default schedule. 2720 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 2721 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 2722 __kmp_msg_null); 2723 kind = kmp_sched_default; 2724 chunk = 0; // ignore chunk value in case of bad kind 2725 } 2726 2727 thread = __kmp_threads[gtid]; 2728 2729 __kmp_save_internal_controls(thread); 2730 2731 if (kind < kmp_sched_upper_std) { 2732 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 2733 // differ static chunked vs. unchunked: chunk should be invalid to 2734 // indicate unchunked schedule (which is the default) 2735 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 2736 } else { 2737 thread->th.th_current_task->td_icvs.sched.r_sched_type = 2738 __kmp_sch_map[kind - kmp_sched_lower - 1]; 2739 } 2740 } else { 2741 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 2742 // kmp_sched_lower - 2 ]; 2743 thread->th.th_current_task->td_icvs.sched.r_sched_type = 2744 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 2745 kmp_sched_lower - 2]; 2746 } 2747 if (kind == kmp_sched_auto || chunk < 1) { 2748 // ignore parameter chunk for schedule auto 2749 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 2750 } else { 2751 thread->th.th_current_task->td_icvs.sched.chunk = chunk; 2752 } 2753 } 2754 2755 /* Gets def_sched_var ICV values */ 2756 void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 2757 kmp_info_t *thread; 2758 enum sched_type th_type; 2759 2760 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 2761 KMP_DEBUG_ASSERT(__kmp_init_serial); 2762 2763 thread = __kmp_threads[gtid]; 2764 2765 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 2766 2767 switch (th_type) { 2768 case kmp_sch_static: 2769 case kmp_sch_static_greedy: 2770 case kmp_sch_static_balanced: 2771 *kind = kmp_sched_static; 2772 *chunk = 0; // chunk was not set, try to show this fact via zero value 2773 return; 2774 case kmp_sch_static_chunked: 2775 *kind = kmp_sched_static; 2776 break; 2777 case kmp_sch_dynamic_chunked: 2778 *kind = kmp_sched_dynamic; 2779 break; 2780 case kmp_sch_guided_chunked: 2781 case kmp_sch_guided_iterative_chunked: 2782 case kmp_sch_guided_analytical_chunked: 2783 *kind = kmp_sched_guided; 2784 break; 2785 case kmp_sch_auto: 2786 *kind = kmp_sched_auto; 2787 break; 2788 case kmp_sch_trapezoidal: 2789 *kind = kmp_sched_trapezoidal; 2790 break; 2791 #if KMP_STATIC_STEAL_ENABLED 2792 case kmp_sch_static_steal: 2793 *kind = kmp_sched_static_steal; 2794 break; 2795 #endif 2796 default: 2797 KMP_FATAL(UnknownSchedulingType, th_type); 2798 } 2799 2800 *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 2801 } 2802 2803 int __kmp_get_ancestor_thread_num(int gtid, int level) { 2804 2805 int ii, dd; 2806 kmp_team_t *team; 2807 kmp_info_t *thr; 2808 2809 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 2810 KMP_DEBUG_ASSERT(__kmp_init_serial); 2811 2812 // validate level 2813 if (level == 0) 2814 return 0; 2815 if (level < 0) 2816 return -1; 2817 thr = __kmp_threads[gtid]; 2818 team = thr->th.th_team; 2819 ii = team->t.t_level; 2820 if (level > ii) 2821 return -1; 2822 2823 #if OMP_40_ENABLED 2824 if (thr->th.th_teams_microtask) { 2825 // AC: we are in teams region where multiple nested teams have same level 2826 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2827 if (level <= 2828 tlevel) { // otherwise usual algorithm works (will not touch the teams) 2829 KMP_DEBUG_ASSERT(ii >= tlevel); 2830 // AC: As we need to pass by the teams league, we need to artificially 2831 // increase ii 2832 if (ii == tlevel) { 2833 ii += 2; // three teams have same level 2834 } else { 2835 ii++; // two teams have same level 2836 } 2837 } 2838 } 2839 #endif 2840 2841 if (ii == level) 2842 return __kmp_tid_from_gtid(gtid); 2843 2844 dd = team->t.t_serialized; 2845 level++; 2846 while (ii > level) { 2847 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 2848 } 2849 if ((team->t.t_serialized) && (!dd)) { 2850 team = team->t.t_parent; 2851 continue; 2852 } 2853 if (ii > level) { 2854 team = team->t.t_parent; 2855 dd = team->t.t_serialized; 2856 ii--; 2857 } 2858 } 2859 2860 return (dd > 1) ? (0) : (team->t.t_master_tid); 2861 } 2862 2863 int __kmp_get_team_size(int gtid, int level) { 2864 2865 int ii, dd; 2866 kmp_team_t *team; 2867 kmp_info_t *thr; 2868 2869 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 2870 KMP_DEBUG_ASSERT(__kmp_init_serial); 2871 2872 // validate level 2873 if (level == 0) 2874 return 1; 2875 if (level < 0) 2876 return -1; 2877 thr = __kmp_threads[gtid]; 2878 team = thr->th.th_team; 2879 ii = team->t.t_level; 2880 if (level > ii) 2881 return -1; 2882 2883 #if OMP_40_ENABLED 2884 if (thr->th.th_teams_microtask) { 2885 // AC: we are in teams region where multiple nested teams have same level 2886 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2887 if (level <= 2888 tlevel) { // otherwise usual algorithm works (will not touch the teams) 2889 KMP_DEBUG_ASSERT(ii >= tlevel); 2890 // AC: As we need to pass by the teams league, we need to artificially 2891 // increase ii 2892 if (ii == tlevel) { 2893 ii += 2; // three teams have same level 2894 } else { 2895 ii++; // two teams have same level 2896 } 2897 } 2898 } 2899 #endif 2900 2901 while (ii > level) { 2902 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 2903 } 2904 if (team->t.t_serialized && (!dd)) { 2905 team = team->t.t_parent; 2906 continue; 2907 } 2908 if (ii > level) { 2909 team = team->t.t_parent; 2910 ii--; 2911 } 2912 } 2913 2914 return team->t.t_nproc; 2915 } 2916 2917 kmp_r_sched_t __kmp_get_schedule_global() { 2918 // This routine created because pairs (__kmp_sched, __kmp_chunk) and 2919 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 2920 // independently. So one can get the updated schedule here. 2921 2922 kmp_r_sched_t r_sched; 2923 2924 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 2925 // __kmp_guided. __kmp_sched should keep original value, so that user can set 2926 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 2927 // different roots (even in OMP 2.5) 2928 if (__kmp_sched == kmp_sch_static) { 2929 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed 2930 // schedule (balanced or greedy) 2931 } else if (__kmp_sched == kmp_sch_guided_chunked) { 2932 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed 2933 // schedule (iterative or analytical) 2934 } else { 2935 r_sched.r_sched_type = 2936 __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 2937 } 2938 2939 if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it 2940 // was not ever set) 2941 r_sched.chunk = KMP_DEFAULT_CHUNK; 2942 } else { 2943 r_sched.chunk = __kmp_chunk; 2944 } 2945 2946 return r_sched; 2947 } 2948 2949 /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 2950 at least argc number of *t_argv entries for the requested team. */ 2951 static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 2952 2953 KMP_DEBUG_ASSERT(team); 2954 if (!realloc || argc > team->t.t_max_argc) { 2955 2956 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 2957 "current entries=%d\n", 2958 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 2959 /* if previously allocated heap space for args, free them */ 2960 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 2961 __kmp_free((void *)team->t.t_argv); 2962 2963 if (argc <= KMP_INLINE_ARGV_ENTRIES) { 2964 /* use unused space in the cache line for arguments */ 2965 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 2966 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 2967 "argv entries\n", 2968 team->t.t_id, team->t.t_max_argc)); 2969 team->t.t_argv = &team->t.t_inline_argv[0]; 2970 if (__kmp_storage_map) { 2971 __kmp_print_storage_map_gtid( 2972 -1, &team->t.t_inline_argv[0], 2973 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 2974 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 2975 team->t.t_id); 2976 } 2977 } else { 2978 /* allocate space for arguments in the heap */ 2979 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 2980 ? KMP_MIN_MALLOC_ARGV_ENTRIES 2981 : 2 * argc; 2982 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 2983 "argv entries\n", 2984 team->t.t_id, team->t.t_max_argc)); 2985 team->t.t_argv = 2986 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 2987 if (__kmp_storage_map) { 2988 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 2989 &team->t.t_argv[team->t.t_max_argc], 2990 sizeof(void *) * team->t.t_max_argc, 2991 "team_%d.t_argv", team->t.t_id); 2992 } 2993 } 2994 } 2995 } 2996 2997 static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 2998 int i; 2999 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 3000 team->t.t_threads = 3001 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 3002 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 3003 sizeof(dispatch_shared_info_t) * num_disp_buff); 3004 team->t.t_dispatch = 3005 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 3006 team->t.t_implicit_task_taskdata = 3007 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 3008 team->t.t_max_nproc = max_nth; 3009 3010 /* setup dispatch buffers */ 3011 for (i = 0; i < num_disp_buff; ++i) { 3012 team->t.t_disp_buffer[i].buffer_index = i; 3013 #if OMP_45_ENABLED 3014 team->t.t_disp_buffer[i].doacross_buf_idx = i; 3015 #endif 3016 } 3017 } 3018 3019 static void __kmp_free_team_arrays(kmp_team_t *team) { 3020 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 3021 int i; 3022 for (i = 0; i < team->t.t_max_nproc; ++i) { 3023 if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 3024 __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 3025 team->t.t_dispatch[i].th_disp_buffer = NULL; 3026 }; // if 3027 }; // for 3028 __kmp_free(team->t.t_threads); 3029 __kmp_free(team->t.t_disp_buffer); 3030 __kmp_free(team->t.t_dispatch); 3031 __kmp_free(team->t.t_implicit_task_taskdata); 3032 team->t.t_threads = NULL; 3033 team->t.t_disp_buffer = NULL; 3034 team->t.t_dispatch = NULL; 3035 team->t.t_implicit_task_taskdata = 0; 3036 } 3037 3038 static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 3039 kmp_info_t **oldThreads = team->t.t_threads; 3040 3041 __kmp_free(team->t.t_disp_buffer); 3042 __kmp_free(team->t.t_dispatch); 3043 __kmp_free(team->t.t_implicit_task_taskdata); 3044 __kmp_allocate_team_arrays(team, max_nth); 3045 3046 KMP_MEMCPY(team->t.t_threads, oldThreads, 3047 team->t.t_nproc * sizeof(kmp_info_t *)); 3048 3049 __kmp_free(oldThreads); 3050 } 3051 3052 static kmp_internal_control_t __kmp_get_global_icvs(void) { 3053 3054 kmp_r_sched_t r_sched = 3055 __kmp_get_schedule_global(); // get current state of scheduling globals 3056 3057 #if OMP_40_ENABLED 3058 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 3059 #endif /* OMP_40_ENABLED */ 3060 3061 kmp_internal_control_t g_icvs = { 3062 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 3063 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control 3064 // for nested parallelism (per thread) 3065 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 3066 // adjustment of threads (per thread) 3067 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 3068 // whether blocktime is explicitly set 3069 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 3070 #if KMP_USE_MONITOR 3071 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 3072 // intervals 3073 #endif 3074 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 3075 // next parallel region (per thread) 3076 // (use a max ub on value if __kmp_parallel_initialize not called yet) 3077 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 3078 // for max_active_levels 3079 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 3080 // {sched,chunk} pair 3081 #if OMP_40_ENABLED 3082 __kmp_nested_proc_bind.bind_types[0], 3083 __kmp_default_device, 3084 #endif /* OMP_40_ENABLED */ 3085 NULL // struct kmp_internal_control *next; 3086 }; 3087 3088 return g_icvs; 3089 } 3090 3091 static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 3092 3093 kmp_internal_control_t gx_icvs; 3094 gx_icvs.serial_nesting_level = 3095 0; // probably =team->t.t_serial like in save_inter_controls 3096 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 3097 gx_icvs.next = NULL; 3098 3099 return gx_icvs; 3100 } 3101 3102 static void __kmp_initialize_root(kmp_root_t *root) { 3103 int f; 3104 kmp_team_t *root_team; 3105 kmp_team_t *hot_team; 3106 int hot_team_max_nth; 3107 kmp_r_sched_t r_sched = 3108 __kmp_get_schedule_global(); // get current state of scheduling globals 3109 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3110 KMP_DEBUG_ASSERT(root); 3111 KMP_ASSERT(!root->r.r_begin); 3112 3113 /* setup the root state structure */ 3114 __kmp_init_lock(&root->r.r_begin_lock); 3115 root->r.r_begin = FALSE; 3116 root->r.r_active = FALSE; 3117 root->r.r_in_parallel = 0; 3118 root->r.r_blocktime = __kmp_dflt_blocktime; 3119 root->r.r_nested = __kmp_dflt_nested; 3120 3121 /* setup the root team for this task */ 3122 /* allocate the root team structure */ 3123 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 3124 3125 root_team = 3126 __kmp_allocate_team(root, 3127 1, // new_nproc 3128 1, // max_nproc 3129 #if OMPT_SUPPORT 3130 0, // root parallel id 3131 #endif 3132 #if OMP_40_ENABLED 3133 __kmp_nested_proc_bind.bind_types[0], 3134 #endif 3135 &r_icvs, 3136 0 // argc 3137 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3138 ); 3139 #if USE_DEBUGGER 3140 // Non-NULL value should be assigned to make the debugger display the root 3141 // team. 3142 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 3143 #endif 3144 3145 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 3146 3147 root->r.r_root_team = root_team; 3148 root_team->t.t_control_stack_top = NULL; 3149 3150 /* initialize root team */ 3151 root_team->t.t_threads[0] = NULL; 3152 root_team->t.t_nproc = 1; 3153 root_team->t.t_serialized = 1; 3154 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3155 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3156 root_team->t.t_sched.chunk = r_sched.chunk; 3157 KA_TRACE( 3158 20, 3159 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 3160 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 3161 3162 /* setup the hot team for this task */ 3163 /* allocate the hot team structure */ 3164 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 3165 3166 hot_team = 3167 __kmp_allocate_team(root, 3168 1, // new_nproc 3169 __kmp_dflt_team_nth_ub * 2, // max_nproc 3170 #if OMPT_SUPPORT 3171 0, // root parallel id 3172 #endif 3173 #if OMP_40_ENABLED 3174 __kmp_nested_proc_bind.bind_types[0], 3175 #endif 3176 &r_icvs, 3177 0 // argc 3178 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3179 ); 3180 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 3181 3182 root->r.r_hot_team = hot_team; 3183 root_team->t.t_control_stack_top = NULL; 3184 3185 /* first-time initialization */ 3186 hot_team->t.t_parent = root_team; 3187 3188 /* initialize hot team */ 3189 hot_team_max_nth = hot_team->t.t_max_nproc; 3190 for (f = 0; f < hot_team_max_nth; ++f) { 3191 hot_team->t.t_threads[f] = NULL; 3192 }; // for 3193 hot_team->t.t_nproc = 1; 3194 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3195 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3196 hot_team->t.t_sched.chunk = r_sched.chunk; 3197 hot_team->t.t_size_changed = 0; 3198 } 3199 3200 #ifdef KMP_DEBUG 3201 3202 typedef struct kmp_team_list_item { 3203 kmp_team_p const *entry; 3204 struct kmp_team_list_item *next; 3205 } kmp_team_list_item_t; 3206 typedef kmp_team_list_item_t *kmp_team_list_t; 3207 3208 static void __kmp_print_structure_team_accum( // Add team to list of teams. 3209 kmp_team_list_t list, // List of teams. 3210 kmp_team_p const *team // Team to add. 3211 ) { 3212 3213 // List must terminate with item where both entry and next are NULL. 3214 // Team is added to the list only once. 3215 // List is sorted in ascending order by team id. 3216 // Team id is *not* a key. 3217 3218 kmp_team_list_t l; 3219 3220 KMP_DEBUG_ASSERT(list != NULL); 3221 if (team == NULL) { 3222 return; 3223 }; // if 3224 3225 __kmp_print_structure_team_accum(list, team->t.t_parent); 3226 __kmp_print_structure_team_accum(list, team->t.t_next_pool); 3227 3228 // Search list for the team. 3229 l = list; 3230 while (l->next != NULL && l->entry != team) { 3231 l = l->next; 3232 }; // while 3233 if (l->next != NULL) { 3234 return; // Team has been added before, exit. 3235 }; // if 3236 3237 // Team is not found. Search list again for insertion point. 3238 l = list; 3239 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 3240 l = l->next; 3241 }; // while 3242 3243 // Insert team. 3244 { 3245 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 3246 sizeof(kmp_team_list_item_t)); 3247 *item = *l; 3248 l->entry = team; 3249 l->next = item; 3250 } 3251 } 3252 3253 static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 3254 3255 ) { 3256 __kmp_printf("%s", title); 3257 if (team != NULL) { 3258 __kmp_printf("%2x %p\n", team->t.t_id, team); 3259 } else { 3260 __kmp_printf(" - (nil)\n"); 3261 }; // if 3262 } 3263 3264 static void __kmp_print_structure_thread(char const *title, 3265 kmp_info_p const *thread) { 3266 __kmp_printf("%s", title); 3267 if (thread != NULL) { 3268 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 3269 } else { 3270 __kmp_printf(" - (nil)\n"); 3271 }; // if 3272 } 3273 3274 void __kmp_print_structure(void) { 3275 3276 kmp_team_list_t list; 3277 3278 // Initialize list of teams. 3279 list = 3280 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 3281 list->entry = NULL; 3282 list->next = NULL; 3283 3284 __kmp_printf("\n------------------------------\nGlobal Thread " 3285 "Table\n------------------------------\n"); 3286 { 3287 int gtid; 3288 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 3289 __kmp_printf("%2d", gtid); 3290 if (__kmp_threads != NULL) { 3291 __kmp_printf(" %p", __kmp_threads[gtid]); 3292 }; // if 3293 if (__kmp_root != NULL) { 3294 __kmp_printf(" %p", __kmp_root[gtid]); 3295 }; // if 3296 __kmp_printf("\n"); 3297 }; // for gtid 3298 } 3299 3300 // Print out __kmp_threads array. 3301 __kmp_printf("\n------------------------------\nThreads\n--------------------" 3302 "----------\n"); 3303 if (__kmp_threads != NULL) { 3304 int gtid; 3305 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 3306 kmp_info_t const *thread = __kmp_threads[gtid]; 3307 if (thread != NULL) { 3308 __kmp_printf("GTID %2d %p:\n", gtid, thread); 3309 __kmp_printf(" Our Root: %p\n", thread->th.th_root); 3310 __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 3311 __kmp_print_structure_team(" Serial Team: ", 3312 thread->th.th_serial_team); 3313 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 3314 __kmp_print_structure_thread(" Master: ", 3315 thread->th.th_team_master); 3316 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 3317 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 3318 #if OMP_40_ENABLED 3319 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 3320 #endif 3321 __kmp_print_structure_thread(" Next in pool: ", 3322 thread->th.th_next_pool); 3323 __kmp_printf("\n"); 3324 __kmp_print_structure_team_accum(list, thread->th.th_team); 3325 __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 3326 }; // if 3327 }; // for gtid 3328 } else { 3329 __kmp_printf("Threads array is not allocated.\n"); 3330 }; // if 3331 3332 // Print out __kmp_root array. 3333 __kmp_printf("\n------------------------------\nUbers\n----------------------" 3334 "--------\n"); 3335 if (__kmp_root != NULL) { 3336 int gtid; 3337 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 3338 kmp_root_t const *root = __kmp_root[gtid]; 3339 if (root != NULL) { 3340 __kmp_printf("GTID %2d %p:\n", gtid, root); 3341 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 3342 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 3343 __kmp_print_structure_thread(" Uber Thread: ", 3344 root->r.r_uber_thread); 3345 __kmp_printf(" Active?: %2d\n", root->r.r_active); 3346 __kmp_printf(" Nested?: %2d\n", root->r.r_nested); 3347 __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel); 3348 __kmp_printf("\n"); 3349 __kmp_print_structure_team_accum(list, root->r.r_root_team); 3350 __kmp_print_structure_team_accum(list, root->r.r_hot_team); 3351 }; // if 3352 }; // for gtid 3353 } else { 3354 __kmp_printf("Ubers array is not allocated.\n"); 3355 }; // if 3356 3357 __kmp_printf("\n------------------------------\nTeams\n----------------------" 3358 "--------\n"); 3359 while (list->next != NULL) { 3360 kmp_team_p const *team = list->entry; 3361 int i; 3362 __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 3363 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 3364 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid); 3365 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 3366 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 3367 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 3368 for (i = 0; i < team->t.t_nproc; ++i) { 3369 __kmp_printf(" Thread %2d: ", i); 3370 __kmp_print_structure_thread("", team->t.t_threads[i]); 3371 }; // for i 3372 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 3373 __kmp_printf("\n"); 3374 list = list->next; 3375 }; // while 3376 3377 // Print out __kmp_thread_pool and __kmp_team_pool. 3378 __kmp_printf("\n------------------------------\nPools\n----------------------" 3379 "--------\n"); 3380 __kmp_print_structure_thread("Thread pool: ", 3381 (kmp_info_t *)__kmp_thread_pool); 3382 __kmp_print_structure_team("Team pool: ", 3383 (kmp_team_t *)__kmp_team_pool); 3384 __kmp_printf("\n"); 3385 3386 // Free team list. 3387 while (list != NULL) { 3388 kmp_team_list_item_t *item = list; 3389 list = list->next; 3390 KMP_INTERNAL_FREE(item); 3391 }; // while 3392 } 3393 3394 #endif 3395 3396 //--------------------------------------------------------------------------- 3397 // Stuff for per-thread fast random number generator 3398 // Table of primes 3399 static const unsigned __kmp_primes[] = { 3400 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 3401 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 3402 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 3403 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 3404 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 3405 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 3406 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 3407 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 3408 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 3409 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 3410 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 3411 3412 //--------------------------------------------------------------------------- 3413 // __kmp_get_random: Get a random number using a linear congruential method. 3414 unsigned short __kmp_get_random(kmp_info_t *thread) { 3415 unsigned x = thread->th.th_x; 3416 unsigned short r = x >> 16; 3417 3418 thread->th.th_x = x * thread->th.th_a + 1; 3419 3420 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 3421 thread->th.th_info.ds.ds_tid, r)); 3422 3423 return r; 3424 } 3425 //-------------------------------------------------------- 3426 // __kmp_init_random: Initialize a random number generator 3427 void __kmp_init_random(kmp_info_t *thread) { 3428 unsigned seed = thread->th.th_info.ds.ds_tid; 3429 3430 thread->th.th_a = 3431 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 3432 thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 3433 KA_TRACE(30, 3434 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 3435 } 3436 3437 #if KMP_OS_WINDOWS 3438 /* reclaim array entries for root threads that are already dead, returns number 3439 * reclaimed */ 3440 static int __kmp_reclaim_dead_roots(void) { 3441 int i, r = 0; 3442 3443 for (i = 0; i < __kmp_threads_capacity; ++i) { 3444 if (KMP_UBER_GTID(i) && 3445 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 3446 !__kmp_root[i] 3447 ->r.r_active) { // AC: reclaim only roots died in non-active state 3448 r += __kmp_unregister_root_other_thread(i); 3449 } 3450 } 3451 return r; 3452 } 3453 #endif 3454 3455 /* This function attempts to create free entries in __kmp_threads and 3456 __kmp_root, and returns the number of free entries generated. 3457 3458 For Windows* OS static library, the first mechanism used is to reclaim array 3459 entries for root threads that are already dead. 3460 3461 On all platforms, expansion is attempted on the arrays __kmp_threads_ and 3462 __kmp_root, with appropriate update to __kmp_threads_capacity. Array 3463 capacity is increased by doubling with clipping to __kmp_tp_capacity, if 3464 threadprivate cache array has been created. Synchronization with 3465 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 3466 3467 After any dead root reclamation, if the clipping value allows array expansion 3468 to result in the generation of a total of nWish free slots, the function does 3469 that expansion. If not, but the clipping value allows array expansion to 3470 result in the generation of a total of nNeed free slots, the function does 3471 that expansion. Otherwise, nothing is done beyond the possible initial root 3472 thread reclamation. However, if nNeed is zero, a best-effort attempt is made 3473 to fulfil nWish as far as possible, i.e. the function will attempt to create 3474 as many free slots as possible up to nWish. 3475 3476 If any argument is negative, the behavior is undefined. */ 3477 static int __kmp_expand_threads(int nWish, int nNeed) { 3478 int added = 0; 3479 int old_tp_cached; 3480 int __kmp_actual_max_nth; 3481 3482 if (nNeed > nWish) /* normalize the arguments */ 3483 nWish = nNeed; 3484 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3485 /* only for Windows static library */ 3486 /* reclaim array entries for root threads that are already dead */ 3487 added = __kmp_reclaim_dead_roots(); 3488 3489 if (nNeed) { 3490 nNeed -= added; 3491 if (nNeed < 0) 3492 nNeed = 0; 3493 } 3494 if (nWish) { 3495 nWish -= added; 3496 if (nWish < 0) 3497 nWish = 0; 3498 } 3499 #endif 3500 if (nWish <= 0) 3501 return added; 3502 3503 while (1) { 3504 int nTarget; 3505 int minimumRequiredCapacity; 3506 int newCapacity; 3507 kmp_info_t **newThreads; 3508 kmp_root_t **newRoot; 3509 3510 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 3511 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 3512 // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become 3513 // > __kmp_max_nth in one of two ways: 3514 // 3515 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 3516 // may not be resused by another thread, so we may need to increase 3517 // __kmp_threads_capacity to __kmp_max_threads + 1. 3518 // 3519 // 2) New foreign root(s) are encountered. We always register new foreign 3520 // roots. This may cause a smaller # of threads to be allocated at 3521 // subsequent parallel regions, but the worker threads hang around (and 3522 // eventually go to sleep) and need slots in the __kmp_threads[] array. 3523 // 3524 // Anyway, that is the reason for moving the check to see if 3525 // __kmp_max_threads was exceeded into __kmp_reseerve_threads() 3526 // instead of having it performed here. -BB 3527 old_tp_cached = __kmp_tp_cached; 3528 __kmp_actual_max_nth = 3529 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; 3530 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); 3531 3532 /* compute expansion headroom to check if we can expand and whether to aim 3533 for nWish or nNeed */ 3534 nTarget = nWish; 3535 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3536 /* can't fulfil nWish, so try nNeed */ 3537 if (nNeed) { 3538 nTarget = nNeed; 3539 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3540 /* possible expansion too small -- give up */ 3541 break; 3542 } 3543 } else { 3544 /* best-effort */ 3545 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; 3546 if (!nTarget) { 3547 /* can expand at all -- give up */ 3548 break; 3549 } 3550 } 3551 } 3552 minimumRequiredCapacity = __kmp_threads_capacity + nTarget; 3553 3554 newCapacity = __kmp_threads_capacity; 3555 do { 3556 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1) 3557 ? (newCapacity << 1) 3558 : __kmp_actual_max_nth; 3559 } while (newCapacity < minimumRequiredCapacity); 3560 newThreads = (kmp_info_t **)__kmp_allocate( 3561 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + 3562 CACHE_LINE); 3563 newRoot = (kmp_root_t **)((char *)newThreads + 3564 sizeof(kmp_info_t *) * newCapacity); 3565 KMP_MEMCPY(newThreads, __kmp_threads, 3566 __kmp_threads_capacity * sizeof(kmp_info_t *)); 3567 KMP_MEMCPY(newRoot, __kmp_root, 3568 __kmp_threads_capacity * sizeof(kmp_root_t *)); 3569 memset(newThreads + __kmp_threads_capacity, 0, 3570 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *)); 3571 memset(newRoot + __kmp_threads_capacity, 0, 3572 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *)); 3573 3574 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3575 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has 3576 allocated a threadprivate cache while we were allocating the expanded 3577 array, and our new capacity is larger than the threadprivate cache 3578 capacity, so we should deallocate the expanded arrays and try again. 3579 This is the first check of a double-check pair. */ 3580 __kmp_free(newThreads); 3581 continue; /* start over and try again */ 3582 } 3583 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 3584 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3585 /* Same check as above, but this time with the lock so we can be sure if 3586 we can succeed. */ 3587 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3588 __kmp_free(newThreads); 3589 continue; /* start over and try again */ 3590 } else { 3591 /* success */ 3592 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be 3593 // investigated. 3594 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 3595 *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 3596 added += newCapacity - __kmp_threads_capacity; 3597 *(volatile int *)&__kmp_threads_capacity = newCapacity; 3598 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3599 break; /* succeeded, so we can exit the loop */ 3600 } 3601 } 3602 return added; 3603 } 3604 3605 /* Register the current thread as a root thread and obtain our gtid. We must 3606 have the __kmp_initz_lock held at this point. Argument TRUE only if are the 3607 thread that calls from __kmp_do_serial_initialize() */ 3608 int __kmp_register_root(int initial_thread) { 3609 kmp_info_t *root_thread; 3610 kmp_root_t *root; 3611 int gtid; 3612 int capacity; 3613 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 3614 KA_TRACE(20, ("__kmp_register_root: entered\n")); 3615 KMP_MB(); 3616 3617 /* 2007-03-02: 3618 If initial thread did not invoke OpenMP RTL yet, and this thread is not an 3619 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 3620 work as expected -- it may return false (that means there is at least one 3621 empty slot in __kmp_threads array), but it is possible the only free slot 3622 is #0, which is reserved for initial thread and so cannot be used for this 3623 one. Following code workarounds this bug. 3624 3625 However, right solution seems to be not reserving slot #0 for initial 3626 thread because: 3627 (1) there is no magic in slot #0, 3628 (2) we cannot detect initial thread reliably (the first thread which does 3629 serial initialization may be not a real initial thread). 3630 */ 3631 capacity = __kmp_threads_capacity; 3632 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 3633 --capacity; 3634 }; // if 3635 3636 /* see if there are too many threads */ 3637 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) { 3638 if (__kmp_tp_cached) { 3639 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread), 3640 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 3641 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 3642 } else { 3643 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread), 3644 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 3645 } 3646 }; // if 3647 3648 /* find an available thread slot */ 3649 /* Don't reassign the zero slot since we need that to only be used by initial 3650 thread */ 3651 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; 3652 gtid++) 3653 ; 3654 KA_TRACE(1, 3655 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 3656 KMP_ASSERT(gtid < __kmp_threads_capacity); 3657 3658 /* update global accounting */ 3659 __kmp_all_nth++; 3660 TCW_4(__kmp_nth, __kmp_nth + 1); 3661 3662 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 3663 // numbers of procs, and method #2 (keyed API call) for higher numbers. 3664 if (__kmp_adjust_gtid_mode) { 3665 if (__kmp_all_nth >= __kmp_tls_gtid_min) { 3666 if (TCR_4(__kmp_gtid_mode) != 2) { 3667 TCW_4(__kmp_gtid_mode, 2); 3668 } 3669 } else { 3670 if (TCR_4(__kmp_gtid_mode) != 1) { 3671 TCW_4(__kmp_gtid_mode, 1); 3672 } 3673 } 3674 } 3675 3676 #ifdef KMP_ADJUST_BLOCKTIME 3677 /* Adjust blocktime to zero if necessary */ 3678 /* Middle initialization might not have occurred yet */ 3679 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 3680 if (__kmp_nth > __kmp_avail_proc) { 3681 __kmp_zero_bt = TRUE; 3682 } 3683 } 3684 #endif /* KMP_ADJUST_BLOCKTIME */ 3685 3686 /* setup this new hierarchy */ 3687 if (!(root = __kmp_root[gtid])) { 3688 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 3689 KMP_DEBUG_ASSERT(!root->r.r_root_team); 3690 } 3691 3692 #if KMP_STATS_ENABLED 3693 // Initialize stats as soon as possible (right after gtid assignment). 3694 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 3695 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); 3696 KMP_SET_THREAD_STATE(SERIAL_REGION); 3697 KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 3698 #endif 3699 __kmp_initialize_root(root); 3700 3701 /* setup new root thread structure */ 3702 if (root->r.r_uber_thread) { 3703 root_thread = root->r.r_uber_thread; 3704 } else { 3705 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 3706 if (__kmp_storage_map) { 3707 __kmp_print_thread_storage_map(root_thread, gtid); 3708 } 3709 root_thread->th.th_info.ds.ds_gtid = gtid; 3710 root_thread->th.th_root = root; 3711 if (__kmp_env_consistency_check) { 3712 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 3713 } 3714 #if USE_FAST_MEMORY 3715 __kmp_initialize_fast_memory(root_thread); 3716 #endif /* USE_FAST_MEMORY */ 3717 3718 #if KMP_USE_BGET 3719 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 3720 __kmp_initialize_bget(root_thread); 3721 #endif 3722 __kmp_init_random(root_thread); // Initialize random number generator 3723 } 3724 3725 /* setup the serial team held in reserve by the root thread */ 3726 if (!root_thread->th.th_serial_team) { 3727 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3728 KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 3729 root_thread->th.th_serial_team = 3730 __kmp_allocate_team(root, 1, 1, 3731 #if OMPT_SUPPORT 3732 0, // root parallel id 3733 #endif 3734 #if OMP_40_ENABLED 3735 proc_bind_default, 3736 #endif 3737 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 3738 } 3739 KMP_ASSERT(root_thread->th.th_serial_team); 3740 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 3741 root_thread->th.th_serial_team)); 3742 3743 /* drop root_thread into place */ 3744 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 3745 3746 root->r.r_root_team->t.t_threads[0] = root_thread; 3747 root->r.r_hot_team->t.t_threads[0] = root_thread; 3748 root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 3749 // AC: the team created in reserve, not for execution (it is unused for now). 3750 root_thread->th.th_serial_team->t.t_serialized = 0; 3751 root->r.r_uber_thread = root_thread; 3752 3753 /* initialize the thread, get it ready to go */ 3754 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 3755 TCW_4(__kmp_init_gtid, TRUE); 3756 3757 /* prepare the master thread for get_gtid() */ 3758 __kmp_gtid_set_specific(gtid); 3759 3760 #if USE_ITT_BUILD 3761 __kmp_itt_thread_name(gtid); 3762 #endif /* USE_ITT_BUILD */ 3763 3764 #ifdef KMP_TDATA_GTID 3765 __kmp_gtid = gtid; 3766 #endif 3767 __kmp_create_worker(gtid, root_thread, __kmp_stksize); 3768 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 3769 3770 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3771 "plain=%u\n", 3772 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 3773 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 3774 KMP_INIT_BARRIER_STATE)); 3775 { // Initialize barrier data. 3776 int b; 3777 for (b = 0; b < bs_last_barrier; ++b) { 3778 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 3779 #if USE_DEBUGGER 3780 root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 3781 #endif 3782 }; // for 3783 } 3784 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 3785 KMP_INIT_BARRIER_STATE); 3786 3787 #if KMP_AFFINITY_SUPPORTED 3788 #if OMP_40_ENABLED 3789 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 3790 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 3791 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 3792 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 3793 #endif 3794 3795 if (TCR_4(__kmp_init_middle)) { 3796 __kmp_affinity_set_init_mask(gtid, TRUE); 3797 } 3798 #endif /* KMP_AFFINITY_SUPPORTED */ 3799 3800 __kmp_root_counter++; 3801 3802 KMP_MB(); 3803 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 3804 3805 return gtid; 3806 } 3807 3808 #if KMP_NESTED_HOT_TEAMS 3809 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 3810 const int max_level) { 3811 int i, n, nth; 3812 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 3813 if (!hot_teams || !hot_teams[level].hot_team) { 3814 return 0; 3815 } 3816 KMP_DEBUG_ASSERT(level < max_level); 3817 kmp_team_t *team = hot_teams[level].hot_team; 3818 nth = hot_teams[level].hot_team_nth; 3819 n = nth - 1; // master is not freed 3820 if (level < max_level - 1) { 3821 for (i = 0; i < nth; ++i) { 3822 kmp_info_t *th = team->t.t_threads[i]; 3823 n += __kmp_free_hot_teams(root, th, level + 1, max_level); 3824 if (i > 0 && th->th.th_hot_teams) { 3825 __kmp_free(th->th.th_hot_teams); 3826 th->th.th_hot_teams = NULL; 3827 } 3828 } 3829 } 3830 __kmp_free_team(root, team, NULL); 3831 return n; 3832 } 3833 #endif 3834 3835 // Resets a root thread and clear its root and hot teams. 3836 // Returns the number of __kmp_threads entries directly and indirectly freed. 3837 static int __kmp_reset_root(int gtid, kmp_root_t *root) { 3838 kmp_team_t *root_team = root->r.r_root_team; 3839 kmp_team_t *hot_team = root->r.r_hot_team; 3840 int n = hot_team->t.t_nproc; 3841 int i; 3842 3843 KMP_DEBUG_ASSERT(!root->r.r_active); 3844 3845 root->r.r_root_team = NULL; 3846 root->r.r_hot_team = NULL; 3847 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 3848 // before call to __kmp_free_team(). 3849 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 3850 #if KMP_NESTED_HOT_TEAMS 3851 if (__kmp_hot_teams_max_level > 3852 0) { // need to free nested hot teams and their threads if any 3853 for (i = 0; i < hot_team->t.t_nproc; ++i) { 3854 kmp_info_t *th = hot_team->t.t_threads[i]; 3855 if (__kmp_hot_teams_max_level > 1) { 3856 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 3857 } 3858 if (th->th.th_hot_teams) { 3859 __kmp_free(th->th.th_hot_teams); 3860 th->th.th_hot_teams = NULL; 3861 } 3862 } 3863 } 3864 #endif 3865 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 3866 3867 // Before we can reap the thread, we need to make certain that all other 3868 // threads in the teams that had this root as ancestor have stopped trying to 3869 // steal tasks. 3870 if (__kmp_tasking_mode != tskm_immediate_exec) { 3871 __kmp_wait_to_unref_task_teams(); 3872 } 3873 3874 #if KMP_OS_WINDOWS 3875 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 3876 KA_TRACE( 3877 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 3878 "\n", 3879 (LPVOID) & (root->r.r_uber_thread->th), 3880 root->r.r_uber_thread->th.th_info.ds.ds_thread)); 3881 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 3882 #endif /* KMP_OS_WINDOWS */ 3883 3884 #if OMPT_SUPPORT 3885 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 3886 int gtid = __kmp_get_gtid(); 3887 __ompt_thread_end(ompt_thread_initial, gtid); 3888 } 3889 #endif 3890 3891 TCW_4(__kmp_nth, 3892 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 3893 __kmp_reap_thread(root->r.r_uber_thread, 1); 3894 3895 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead 3896 // of freeing. 3897 root->r.r_uber_thread = NULL; 3898 /* mark root as no longer in use */ 3899 root->r.r_begin = FALSE; 3900 3901 return n; 3902 } 3903 3904 void __kmp_unregister_root_current_thread(int gtid) { 3905 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 3906 /* this lock should be ok, since unregister_root_current_thread is never 3907 called during an abort, only during a normal close. furthermore, if you 3908 have the forkjoin lock, you should never try to get the initz lock */ 3909 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 3910 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 3911 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 3912 "exiting T#%d\n", 3913 gtid)); 3914 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 3915 return; 3916 } 3917 kmp_root_t *root = __kmp_root[gtid]; 3918 3919 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 3920 KMP_ASSERT(KMP_UBER_GTID(gtid)); 3921 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 3922 KMP_ASSERT(root->r.r_active == FALSE); 3923 3924 KMP_MB(); 3925 3926 #if OMP_45_ENABLED 3927 kmp_info_t *thread = __kmp_threads[gtid]; 3928 kmp_team_t *team = thread->th.th_team; 3929 kmp_task_team_t *task_team = thread->th.th_task_team; 3930 3931 // we need to wait for the proxy tasks before finishing the thread 3932 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { 3933 #if OMPT_SUPPORT 3934 // the runtime is shutting down so we won't report any events 3935 thread->th.ompt_thread_info.state = ompt_state_undefined; 3936 #endif 3937 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 3938 } 3939 #endif 3940 3941 __kmp_reset_root(gtid, root); 3942 3943 /* free up this thread slot */ 3944 __kmp_gtid_set_specific(KMP_GTID_DNE); 3945 #ifdef KMP_TDATA_GTID 3946 __kmp_gtid = KMP_GTID_DNE; 3947 #endif 3948 3949 KMP_MB(); 3950 KC_TRACE(10, 3951 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 3952 3953 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 3954 } 3955 3956 #if KMP_OS_WINDOWS 3957 /* __kmp_forkjoin_lock must be already held 3958 Unregisters a root thread that is not the current thread. Returns the number 3959 of __kmp_threads entries freed as a result. */ 3960 static int __kmp_unregister_root_other_thread(int gtid) { 3961 kmp_root_t *root = __kmp_root[gtid]; 3962 int r; 3963 3964 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 3965 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 3966 KMP_ASSERT(KMP_UBER_GTID(gtid)); 3967 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 3968 KMP_ASSERT(root->r.r_active == FALSE); 3969 3970 r = __kmp_reset_root(gtid, root); 3971 KC_TRACE(10, 3972 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 3973 return r; 3974 } 3975 #endif 3976 3977 #if KMP_DEBUG 3978 void __kmp_task_info() { 3979 3980 kmp_int32 gtid = __kmp_entry_gtid(); 3981 kmp_int32 tid = __kmp_tid_from_gtid(gtid); 3982 kmp_info_t *this_thr = __kmp_threads[gtid]; 3983 kmp_team_t *steam = this_thr->th.th_serial_team; 3984 kmp_team_t *team = this_thr->th.th_team; 3985 3986 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " 3987 "ptask=%p\n", 3988 gtid, tid, this_thr, team, this_thr->th.th_current_task, 3989 team->t.t_implicit_task_taskdata[tid].td_parent); 3990 } 3991 #endif // KMP_DEBUG 3992 3993 /* TODO optimize with one big memclr, take out what isn't needed, split 3994 responsibility to workers as much as possible, and delay initialization of 3995 features as much as possible */ 3996 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 3997 int tid, int gtid) { 3998 /* this_thr->th.th_info.ds.ds_gtid is setup in 3999 kmp_allocate_thread/create_worker. 4000 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 4001 kmp_info_t *master = team->t.t_threads[0]; 4002 KMP_DEBUG_ASSERT(this_thr != NULL); 4003 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 4004 KMP_DEBUG_ASSERT(team); 4005 KMP_DEBUG_ASSERT(team->t.t_threads); 4006 KMP_DEBUG_ASSERT(team->t.t_dispatch); 4007 KMP_DEBUG_ASSERT(master); 4008 KMP_DEBUG_ASSERT(master->th.th_root); 4009 4010 KMP_MB(); 4011 4012 TCW_SYNC_PTR(this_thr->th.th_team, team); 4013 4014 this_thr->th.th_info.ds.ds_tid = tid; 4015 this_thr->th.th_set_nproc = 0; 4016 if (__kmp_tasking_mode != tskm_immediate_exec) 4017 // When tasking is possible, threads are not safe to reap until they are 4018 // done tasking; this will be set when tasking code is exited in wait 4019 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 4020 else // no tasking --> always safe to reap 4021 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 4022 #if OMP_40_ENABLED 4023 this_thr->th.th_set_proc_bind = proc_bind_default; 4024 #if KMP_AFFINITY_SUPPORTED 4025 this_thr->th.th_new_place = this_thr->th.th_current_place; 4026 #endif 4027 #endif 4028 this_thr->th.th_root = master->th.th_root; 4029 4030 /* setup the thread's cache of the team structure */ 4031 this_thr->th.th_team_nproc = team->t.t_nproc; 4032 this_thr->th.th_team_master = master; 4033 this_thr->th.th_team_serialized = team->t.t_serialized; 4034 TCW_PTR(this_thr->th.th_sleep_loc, NULL); 4035 4036 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 4037 4038 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 4039 tid, gtid, this_thr, this_thr->th.th_current_task)); 4040 4041 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 4042 team, tid, TRUE); 4043 4044 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 4045 tid, gtid, this_thr, this_thr->th.th_current_task)); 4046 // TODO: Initialize ICVs from parent; GEH - isn't that already done in 4047 // __kmp_initialize_team()? 4048 4049 /* TODO no worksharing in speculative threads */ 4050 this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 4051 4052 this_thr->th.th_local.this_construct = 0; 4053 4054 #ifdef BUILD_TV 4055 this_thr->th.th_local.tv_data = 0; 4056 #endif 4057 4058 if (!this_thr->th.th_pri_common) { 4059 this_thr->th.th_pri_common = 4060 (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 4061 if (__kmp_storage_map) { 4062 __kmp_print_storage_map_gtid( 4063 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 4064 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 4065 }; // if 4066 this_thr->th.th_pri_head = NULL; 4067 }; // if 4068 4069 /* Initialize dynamic dispatch */ 4070 { 4071 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 4072 // Use team max_nproc since this will never change for the team. 4073 size_t disp_size = 4074 sizeof(dispatch_private_info_t) * 4075 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 4076 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 4077 team->t.t_max_nproc)); 4078 KMP_ASSERT(dispatch); 4079 KMP_DEBUG_ASSERT(team->t.t_dispatch); 4080 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 4081 4082 dispatch->th_disp_index = 0; 4083 #if OMP_45_ENABLED 4084 dispatch->th_doacross_buf_idx = 0; 4085 #endif 4086 if (!dispatch->th_disp_buffer) { 4087 dispatch->th_disp_buffer = 4088 (dispatch_private_info_t *)__kmp_allocate(disp_size); 4089 4090 if (__kmp_storage_map) { 4091 __kmp_print_storage_map_gtid( 4092 gtid, &dispatch->th_disp_buffer[0], 4093 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 4094 ? 1 4095 : __kmp_dispatch_num_buffers], 4096 disp_size, "th_%d.th_dispatch.th_disp_buffer " 4097 "(team_%d.t_dispatch[%d].th_disp_buffer)", 4098 gtid, team->t.t_id, gtid); 4099 } 4100 } else { 4101 memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 4102 } 4103 4104 dispatch->th_dispatch_pr_current = 0; 4105 dispatch->th_dispatch_sh_current = 0; 4106 4107 dispatch->th_deo_fcn = 0; /* ORDERED */ 4108 dispatch->th_dxo_fcn = 0; /* END ORDERED */ 4109 } 4110 4111 this_thr->th.th_next_pool = NULL; 4112 4113 if (!this_thr->th.th_task_state_memo_stack) { 4114 size_t i; 4115 this_thr->th.th_task_state_memo_stack = 4116 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 4117 this_thr->th.th_task_state_top = 0; 4118 this_thr->th.th_task_state_stack_sz = 4; 4119 for (i = 0; i < this_thr->th.th_task_state_stack_sz; 4120 ++i) // zero init the stack 4121 this_thr->th.th_task_state_memo_stack[i] = 0; 4122 } 4123 4124 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 4125 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 4126 4127 KMP_MB(); 4128 } 4129 4130 /* allocate a new thread for the requesting team. this is only called from 4131 within a forkjoin critical section. we will first try to get an available 4132 thread from the thread pool. if none is available, we will fork a new one 4133 assuming we are able to create a new one. this should be assured, as the 4134 caller should check on this first. */ 4135 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 4136 int new_tid) { 4137 kmp_team_t *serial_team; 4138 kmp_info_t *new_thr; 4139 int new_gtid; 4140 4141 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 4142 KMP_DEBUG_ASSERT(root && team); 4143 #if !KMP_NESTED_HOT_TEAMS 4144 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 4145 #endif 4146 KMP_MB(); 4147 4148 /* first, try to get one from the thread pool */ 4149 if (__kmp_thread_pool) { 4150 4151 new_thr = (kmp_info_t *)__kmp_thread_pool; 4152 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 4153 if (new_thr == __kmp_thread_pool_insert_pt) { 4154 __kmp_thread_pool_insert_pt = NULL; 4155 } 4156 TCW_4(new_thr->th.th_in_pool, FALSE); 4157 // Don't touch th_active_in_pool or th_active. 4158 // The worker thread adjusts those flags as it sleeps/awakens. 4159 __kmp_thread_pool_nth--; 4160 4161 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 4162 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 4163 KMP_ASSERT(!new_thr->th.th_team); 4164 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 4165 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0); 4166 4167 /* setup the thread structure */ 4168 __kmp_initialize_info(new_thr, team, new_tid, 4169 new_thr->th.th_info.ds.ds_gtid); 4170 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 4171 4172 TCW_4(__kmp_nth, __kmp_nth + 1); 4173 4174 new_thr->th.th_task_state = 0; 4175 new_thr->th.th_task_state_top = 0; 4176 new_thr->th.th_task_state_stack_sz = 4; 4177 4178 #ifdef KMP_ADJUST_BLOCKTIME 4179 /* Adjust blocktime back to zero if necessary */ 4180 /* Middle initialization might not have occurred yet */ 4181 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 4182 if (__kmp_nth > __kmp_avail_proc) { 4183 __kmp_zero_bt = TRUE; 4184 } 4185 } 4186 #endif /* KMP_ADJUST_BLOCKTIME */ 4187 4188 #if KMP_DEBUG 4189 // If thread entered pool via __kmp_free_thread, wait_flag should != 4190 // KMP_BARRIER_PARENT_FLAG. 4191 int b; 4192 kmp_balign_t *balign = new_thr->th.th_bar; 4193 for (b = 0; b < bs_last_barrier; ++b) 4194 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4195 #endif 4196 4197 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 4198 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 4199 4200 KMP_MB(); 4201 return new_thr; 4202 } 4203 4204 /* no, well fork a new one */ 4205 KMP_ASSERT(__kmp_nth == __kmp_all_nth); 4206 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 4207 4208 #if KMP_USE_MONITOR 4209 // If this is the first worker thread the RTL is creating, then also 4210 // launch the monitor thread. We try to do this as early as possible. 4211 if (!TCR_4(__kmp_init_monitor)) { 4212 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 4213 if (!TCR_4(__kmp_init_monitor)) { 4214 KF_TRACE(10, ("before __kmp_create_monitor\n")); 4215 TCW_4(__kmp_init_monitor, 1); 4216 __kmp_create_monitor(&__kmp_monitor); 4217 KF_TRACE(10, ("after __kmp_create_monitor\n")); 4218 #if KMP_OS_WINDOWS 4219 // AC: wait until monitor has started. This is a fix for CQ232808. 4220 // The reason is that if the library is loaded/unloaded in a loop with 4221 // small (parallel) work in between, then there is high probability that 4222 // monitor thread started after the library shutdown. At shutdown it is 4223 // too late to cope with the problem, because when the master is in 4224 // DllMain (process detach) the monitor has no chances to start (it is 4225 // blocked), and master has no means to inform the monitor that the 4226 // library has gone, because all the memory which the monitor can access 4227 // is going to be released/reset. 4228 while (TCR_4(__kmp_init_monitor) < 2) { 4229 KMP_YIELD(TRUE); 4230 } 4231 KF_TRACE(10, ("after monitor thread has started\n")); 4232 #endif 4233 } 4234 __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 4235 } 4236 #endif 4237 4238 KMP_MB(); 4239 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { 4240 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 4241 } 4242 4243 /* allocate space for it. */ 4244 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 4245 4246 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 4247 4248 if (__kmp_storage_map) { 4249 __kmp_print_thread_storage_map(new_thr, new_gtid); 4250 } 4251 4252 // add the reserve serialized team, initialized from the team's master thread 4253 { 4254 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 4255 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 4256 new_thr->th.th_serial_team = serial_team = 4257 (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 4258 #if OMPT_SUPPORT 4259 0, // root parallel id 4260 #endif 4261 #if OMP_40_ENABLED 4262 proc_bind_default, 4263 #endif 4264 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 4265 } 4266 KMP_ASSERT(serial_team); 4267 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 4268 // execution (it is unused for now). 4269 serial_team->t.t_threads[0] = new_thr; 4270 KF_TRACE(10, 4271 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 4272 new_thr)); 4273 4274 /* setup the thread structures */ 4275 __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 4276 4277 #if USE_FAST_MEMORY 4278 __kmp_initialize_fast_memory(new_thr); 4279 #endif /* USE_FAST_MEMORY */ 4280 4281 #if KMP_USE_BGET 4282 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 4283 __kmp_initialize_bget(new_thr); 4284 #endif 4285 4286 __kmp_init_random(new_thr); // Initialize random number generator 4287 4288 /* Initialize these only once when thread is grabbed for a team allocation */ 4289 KA_TRACE(20, 4290 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 4291 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 4292 4293 int b; 4294 kmp_balign_t *balign = new_thr->th.th_bar; 4295 for (b = 0; b < bs_last_barrier; ++b) { 4296 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 4297 balign[b].bb.team = NULL; 4298 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 4299 balign[b].bb.use_oncore_barrier = 0; 4300 } 4301 4302 new_thr->th.th_spin_here = FALSE; 4303 new_thr->th.th_next_waiting = 0; 4304 4305 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4306 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 4307 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 4308 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 4309 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 4310 #endif 4311 4312 TCW_4(new_thr->th.th_in_pool, FALSE); 4313 new_thr->th.th_active_in_pool = FALSE; 4314 TCW_4(new_thr->th.th_active, TRUE); 4315 4316 /* adjust the global counters */ 4317 __kmp_all_nth++; 4318 __kmp_nth++; 4319 4320 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 4321 // numbers of procs, and method #2 (keyed API call) for higher numbers. 4322 if (__kmp_adjust_gtid_mode) { 4323 if (__kmp_all_nth >= __kmp_tls_gtid_min) { 4324 if (TCR_4(__kmp_gtid_mode) != 2) { 4325 TCW_4(__kmp_gtid_mode, 2); 4326 } 4327 } else { 4328 if (TCR_4(__kmp_gtid_mode) != 1) { 4329 TCW_4(__kmp_gtid_mode, 1); 4330 } 4331 } 4332 } 4333 4334 #ifdef KMP_ADJUST_BLOCKTIME 4335 /* Adjust blocktime back to zero if necessary */ 4336 /* Middle initialization might not have occurred yet */ 4337 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 4338 if (__kmp_nth > __kmp_avail_proc) { 4339 __kmp_zero_bt = TRUE; 4340 } 4341 } 4342 #endif /* KMP_ADJUST_BLOCKTIME */ 4343 4344 /* actually fork it and create the new worker thread */ 4345 KF_TRACE( 4346 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 4347 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 4348 KF_TRACE(10, 4349 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 4350 4351 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 4352 new_gtid)); 4353 KMP_MB(); 4354 return new_thr; 4355 } 4356 4357 /* Reinitialize team for reuse. 4358 The hot team code calls this case at every fork barrier, so EPCC barrier 4359 test are extremely sensitive to changes in it, esp. writes to the team 4360 struct, which cause a cache invalidation in all threads. 4361 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 4362 static void __kmp_reinitialize_team(kmp_team_t *team, 4363 kmp_internal_control_t *new_icvs, 4364 ident_t *loc) { 4365 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 4366 team->t.t_threads[0], team)); 4367 KMP_DEBUG_ASSERT(team && new_icvs); 4368 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 4369 KMP_CHECK_UPDATE(team->t.t_ident, loc); 4370 4371 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4372 // Copy ICVs to the master thread's implicit taskdata 4373 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 4374 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 4375 4376 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 4377 team->t.t_threads[0], team)); 4378 } 4379 4380 /* Initialize the team data structure. 4381 This assumes the t_threads and t_max_nproc are already set. 4382 Also, we don't touch the arguments */ 4383 static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 4384 kmp_internal_control_t *new_icvs, 4385 ident_t *loc) { 4386 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 4387 4388 /* verify */ 4389 KMP_DEBUG_ASSERT(team); 4390 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 4391 KMP_DEBUG_ASSERT(team->t.t_threads); 4392 KMP_MB(); 4393 4394 team->t.t_master_tid = 0; /* not needed */ 4395 /* team->t.t_master_bar; not needed */ 4396 team->t.t_serialized = new_nproc > 1 ? 0 : 1; 4397 team->t.t_nproc = new_nproc; 4398 4399 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 4400 team->t.t_next_pool = NULL; 4401 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 4402 * up hot team */ 4403 4404 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 4405 team->t.t_invoke = NULL; /* not needed */ 4406 4407 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4408 team->t.t_sched = new_icvs->sched; 4409 4410 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4411 team->t.t_fp_control_saved = FALSE; /* not needed */ 4412 team->t.t_x87_fpu_control_word = 0; /* not needed */ 4413 team->t.t_mxcsr = 0; /* not needed */ 4414 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 4415 4416 team->t.t_construct = 0; 4417 __kmp_init_lock(&team->t.t_single_lock); 4418 4419 team->t.t_ordered.dt.t_value = 0; 4420 team->t.t_master_active = FALSE; 4421 4422 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t)); 4423 4424 #ifdef KMP_DEBUG 4425 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 4426 #endif 4427 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 4428 4429 team->t.t_control_stack_top = NULL; 4430 4431 __kmp_reinitialize_team(team, new_icvs, loc); 4432 4433 KMP_MB(); 4434 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 4435 } 4436 4437 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4438 /* Sets full mask for thread and returns old mask, no changes to structures. */ 4439 static void 4440 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { 4441 if (KMP_AFFINITY_CAPABLE()) { 4442 int status; 4443 if (old_mask != NULL) { 4444 status = __kmp_get_system_affinity(old_mask, TRUE); 4445 int error = errno; 4446 if (status != 0) { 4447 __kmp_msg(kmp_ms_fatal, KMP_MSG(ChangeThreadAffMaskError), 4448 KMP_ERR(error), __kmp_msg_null); 4449 } 4450 } 4451 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); 4452 } 4453 } 4454 #endif 4455 4456 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4457 4458 // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4459 // It calculats the worker + master thread's partition based upon the parent 4460 // thread's partition, and binds each worker to a thread in their partition. 4461 // The master thread's partition should already include its current binding. 4462 static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 4463 // Copy the master thread's place partion to the team struct 4464 kmp_info_t *master_th = team->t.t_threads[0]; 4465 KMP_DEBUG_ASSERT(master_th != NULL); 4466 kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 4467 int first_place = master_th->th.th_first_place; 4468 int last_place = master_th->th.th_last_place; 4469 int masters_place = master_th->th.th_current_place; 4470 team->t.t_first_place = first_place; 4471 team->t.t_last_place = last_place; 4472 4473 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4474 "bound to place %d partition = [%d,%d]\n", 4475 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 4476 team->t.t_id, masters_place, first_place, last_place)); 4477 4478 switch (proc_bind) { 4479 4480 case proc_bind_default: 4481 // serial teams might have the proc_bind policy set to proc_bind_default. It 4482 // doesn't matter, as we don't rebind master thread for any proc_bind policy 4483 KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 4484 break; 4485 4486 case proc_bind_master: { 4487 int f; 4488 int n_th = team->t.t_nproc; 4489 for (f = 1; f < n_th; f++) { 4490 kmp_info_t *th = team->t.t_threads[f]; 4491 KMP_DEBUG_ASSERT(th != NULL); 4492 th->th.th_first_place = first_place; 4493 th->th.th_last_place = last_place; 4494 th->th.th_new_place = masters_place; 4495 4496 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " 4497 "partition = [%d,%d]\n", 4498 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 4499 f, masters_place, first_place, last_place)); 4500 } 4501 } break; 4502 4503 case proc_bind_close: { 4504 int f; 4505 int n_th = team->t.t_nproc; 4506 int n_places; 4507 if (first_place <= last_place) { 4508 n_places = last_place - first_place + 1; 4509 } else { 4510 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4511 } 4512 if (n_th <= n_places) { 4513 int place = masters_place; 4514 for (f = 1; f < n_th; f++) { 4515 kmp_info_t *th = team->t.t_threads[f]; 4516 KMP_DEBUG_ASSERT(th != NULL); 4517 4518 if (place == last_place) { 4519 place = first_place; 4520 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4521 place = 0; 4522 } else { 4523 place++; 4524 } 4525 th->th.th_first_place = first_place; 4526 th->th.th_last_place = last_place; 4527 th->th.th_new_place = place; 4528 4529 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 4530 "partition = [%d,%d]\n", 4531 __kmp_gtid_from_thread(team->t.t_threads[f]), 4532 team->t.t_id, f, place, first_place, last_place)); 4533 } 4534 } else { 4535 int S, rem, gap, s_count; 4536 S = n_th / n_places; 4537 s_count = 0; 4538 rem = n_th - (S * n_places); 4539 gap = rem > 0 ? n_places / rem : n_places; 4540 int place = masters_place; 4541 int gap_ct = gap; 4542 for (f = 0; f < n_th; f++) { 4543 kmp_info_t *th = team->t.t_threads[f]; 4544 KMP_DEBUG_ASSERT(th != NULL); 4545 4546 th->th.th_first_place = first_place; 4547 th->th.th_last_place = last_place; 4548 th->th.th_new_place = place; 4549 s_count++; 4550 4551 if ((s_count == S) && rem && (gap_ct == gap)) { 4552 // do nothing, add an extra thread to place on next iteration 4553 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 4554 // we added an extra thread to this place; move to next place 4555 if (place == last_place) { 4556 place = first_place; 4557 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4558 place = 0; 4559 } else { 4560 place++; 4561 } 4562 s_count = 0; 4563 gap_ct = 1; 4564 rem--; 4565 } else if (s_count == S) { // place full; don't add extra 4566 if (place == last_place) { 4567 place = first_place; 4568 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4569 place = 0; 4570 } else { 4571 place++; 4572 } 4573 gap_ct++; 4574 s_count = 0; 4575 } 4576 4577 KA_TRACE(100, 4578 ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 4579 "partition = [%d,%d]\n", 4580 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 4581 th->th.th_new_place, first_place, last_place)); 4582 } 4583 KMP_DEBUG_ASSERT(place == masters_place); 4584 } 4585 } break; 4586 4587 case proc_bind_spread: { 4588 int f; 4589 int n_th = team->t.t_nproc; 4590 int n_places; 4591 int thidx; 4592 if (first_place <= last_place) { 4593 n_places = last_place - first_place + 1; 4594 } else { 4595 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4596 } 4597 if (n_th <= n_places) { 4598 int place = masters_place; 4599 int S = n_places / n_th; 4600 int s_count, rem, gap, gap_ct; 4601 rem = n_places - n_th * S; 4602 gap = rem ? n_th / rem : 1; 4603 gap_ct = gap; 4604 thidx = n_th; 4605 if (update_master_only == 1) 4606 thidx = 1; 4607 for (f = 0; f < thidx; f++) { 4608 kmp_info_t *th = team->t.t_threads[f]; 4609 KMP_DEBUG_ASSERT(th != NULL); 4610 4611 th->th.th_first_place = place; 4612 th->th.th_new_place = place; 4613 s_count = 1; 4614 while (s_count < S) { 4615 if (place == last_place) { 4616 place = first_place; 4617 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4618 place = 0; 4619 } else { 4620 place++; 4621 } 4622 s_count++; 4623 } 4624 if (rem && (gap_ct == gap)) { 4625 if (place == last_place) { 4626 place = first_place; 4627 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4628 place = 0; 4629 } else { 4630 place++; 4631 } 4632 rem--; 4633 gap_ct = 0; 4634 } 4635 th->th.th_last_place = place; 4636 gap_ct++; 4637 4638 if (place == last_place) { 4639 place = first_place; 4640 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4641 place = 0; 4642 } else { 4643 place++; 4644 } 4645 4646 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4647 "partition = [%d,%d]\n", 4648 __kmp_gtid_from_thread(team->t.t_threads[f]), 4649 team->t.t_id, f, th->th.th_new_place, 4650 th->th.th_first_place, th->th.th_last_place)); 4651 } 4652 KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 4653 } else { 4654 int S, rem, gap, s_count; 4655 S = n_th / n_places; 4656 s_count = 0; 4657 rem = n_th - (S * n_places); 4658 gap = rem > 0 ? n_places / rem : n_places; 4659 int place = masters_place; 4660 int gap_ct = gap; 4661 thidx = n_th; 4662 if (update_master_only == 1) 4663 thidx = 1; 4664 for (f = 0; f < thidx; f++) { 4665 kmp_info_t *th = team->t.t_threads[f]; 4666 KMP_DEBUG_ASSERT(th != NULL); 4667 4668 th->th.th_first_place = place; 4669 th->th.th_last_place = place; 4670 th->th.th_new_place = place; 4671 s_count++; 4672 4673 if ((s_count == S) && rem && (gap_ct == gap)) { 4674 // do nothing, add an extra thread to place on next iteration 4675 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 4676 // we added an extra thread to this place; move on to next place 4677 if (place == last_place) { 4678 place = first_place; 4679 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4680 place = 0; 4681 } else { 4682 place++; 4683 } 4684 s_count = 0; 4685 gap_ct = 1; 4686 rem--; 4687 } else if (s_count == S) { // place is full; don't add extra thread 4688 if (place == last_place) { 4689 place = first_place; 4690 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4691 place = 0; 4692 } else { 4693 place++; 4694 } 4695 gap_ct++; 4696 s_count = 0; 4697 } 4698 4699 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4700 "partition = [%d,%d]\n", 4701 __kmp_gtid_from_thread(team->t.t_threads[f]), 4702 team->t.t_id, f, th->th.th_new_place, 4703 th->th.th_first_place, th->th.th_last_place)); 4704 } 4705 KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 4706 } 4707 } break; 4708 4709 default: 4710 break; 4711 } 4712 4713 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 4714 } 4715 4716 #endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ 4717 4718 /* allocate a new team data structure to use. take one off of the free pool if 4719 available */ 4720 kmp_team_t * 4721 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 4722 #if OMPT_SUPPORT 4723 ompt_parallel_id_t ompt_parallel_id, 4724 #endif 4725 #if OMP_40_ENABLED 4726 kmp_proc_bind_t new_proc_bind, 4727 #endif 4728 kmp_internal_control_t *new_icvs, 4729 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 4730 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 4731 int f; 4732 kmp_team_t *team; 4733 int use_hot_team = !root->r.r_active; 4734 int level = 0; 4735 4736 KA_TRACE(20, ("__kmp_allocate_team: called\n")); 4737 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 4738 KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 4739 KMP_MB(); 4740 4741 #if KMP_NESTED_HOT_TEAMS 4742 kmp_hot_team_ptr_t *hot_teams; 4743 if (master) { 4744 team = master->th.th_team; 4745 level = team->t.t_active_level; 4746 if (master->th.th_teams_microtask) { // in teams construct? 4747 if (master->th.th_teams_size.nteams > 1 && 4748 ( // #teams > 1 4749 team->t.t_pkfn == 4750 (microtask_t)__kmp_teams_master || // inner fork of the teams 4751 master->th.th_teams_level < 4752 team->t.t_level)) { // or nested parallel inside the teams 4753 ++level; // not increment if #teams==1, or for outer fork of the teams; 4754 // increment otherwise 4755 } 4756 } 4757 hot_teams = master->th.th_hot_teams; 4758 if (level < __kmp_hot_teams_max_level && hot_teams && 4759 hot_teams[level] 4760 .hot_team) { // hot team has already been allocated for given level 4761 use_hot_team = 1; 4762 } else { 4763 use_hot_team = 0; 4764 } 4765 } 4766 #endif 4767 // Optimization to use a "hot" team 4768 if (use_hot_team && new_nproc > 1) { 4769 KMP_DEBUG_ASSERT(new_nproc == max_nproc); 4770 #if KMP_NESTED_HOT_TEAMS 4771 team = hot_teams[level].hot_team; 4772 #else 4773 team = root->r.r_hot_team; 4774 #endif 4775 #if KMP_DEBUG 4776 if (__kmp_tasking_mode != tskm_immediate_exec) { 4777 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 4778 "task_team[1] = %p before reinit\n", 4779 team->t.t_task_team[0], team->t.t_task_team[1])); 4780 } 4781 #endif 4782 4783 // Has the number of threads changed? 4784 /* Let's assume the most common case is that the number of threads is 4785 unchanged, and put that case first. */ 4786 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 4787 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 4788 // This case can mean that omp_set_num_threads() was called and the hot 4789 // team size was already reduced, so we check the special flag 4790 if (team->t.t_size_changed == -1) { 4791 team->t.t_size_changed = 1; 4792 } else { 4793 KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 4794 } 4795 4796 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4797 kmp_r_sched_t new_sched = new_icvs->sched; 4798 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || 4799 team->t.t_sched.chunk != new_sched.chunk) 4800 team->t.t_sched = 4801 new_sched; // set master's schedule as new run-time schedule 4802 4803 __kmp_reinitialize_team(team, new_icvs, 4804 root->r.r_uber_thread->th.th_ident); 4805 4806 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 4807 team->t.t_threads[0], team)); 4808 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 4809 4810 #if OMP_40_ENABLED 4811 #if KMP_AFFINITY_SUPPORTED 4812 if ((team->t.t_size_changed == 0) && 4813 (team->t.t_proc_bind == new_proc_bind)) { 4814 if (new_proc_bind == proc_bind_spread) { 4815 __kmp_partition_places( 4816 team, 1); // add flag to update only master for spread 4817 } 4818 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 4819 "proc_bind = %d, partition = [%d,%d]\n", 4820 team->t.t_id, new_proc_bind, team->t.t_first_place, 4821 team->t.t_last_place)); 4822 } else { 4823 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4824 __kmp_partition_places(team); 4825 } 4826 #else 4827 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4828 #endif /* KMP_AFFINITY_SUPPORTED */ 4829 #endif /* OMP_40_ENABLED */ 4830 } else if (team->t.t_nproc > new_nproc) { 4831 KA_TRACE(20, 4832 ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 4833 new_nproc)); 4834 4835 team->t.t_size_changed = 1; 4836 #if KMP_NESTED_HOT_TEAMS 4837 if (__kmp_hot_teams_mode == 0) { 4838 // AC: saved number of threads should correspond to team's value in this 4839 // mode, can be bigger in mode 1, when hot team has threads in reserve 4840 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 4841 hot_teams[level].hot_team_nth = new_nproc; 4842 #endif // KMP_NESTED_HOT_TEAMS 4843 /* release the extra threads we don't need any more */ 4844 for (f = new_nproc; f < team->t.t_nproc; f++) { 4845 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 4846 if (__kmp_tasking_mode != tskm_immediate_exec) { 4847 // When decreasing team size, threads no longer in the team should 4848 // unref task team. 4849 team->t.t_threads[f]->th.th_task_team = NULL; 4850 } 4851 __kmp_free_thread(team->t.t_threads[f]); 4852 team->t.t_threads[f] = NULL; 4853 } 4854 #if KMP_NESTED_HOT_TEAMS 4855 } // (__kmp_hot_teams_mode == 0) 4856 else { 4857 // When keeping extra threads in team, switch threads to wait on own 4858 // b_go flag 4859 for (f = new_nproc; f < team->t.t_nproc; ++f) { 4860 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 4861 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 4862 for (int b = 0; b < bs_last_barrier; ++b) { 4863 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 4864 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 4865 } 4866 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 4867 } 4868 } 4869 } 4870 #endif // KMP_NESTED_HOT_TEAMS 4871 team->t.t_nproc = new_nproc; 4872 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4873 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type || 4874 team->t.t_sched.chunk != new_icvs->sched.chunk) 4875 team->t.t_sched = new_icvs->sched; 4876 __kmp_reinitialize_team(team, new_icvs, 4877 root->r.r_uber_thread->th.th_ident); 4878 4879 /* update the remaining threads */ 4880 for (f = 0; f < new_nproc; ++f) { 4881 team->t.t_threads[f]->th.th_team_nproc = new_nproc; 4882 } 4883 // restore the current task state of the master thread: should be the 4884 // implicit task 4885 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 4886 team->t.t_threads[0], team)); 4887 4888 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 4889 4890 #ifdef KMP_DEBUG 4891 for (f = 0; f < team->t.t_nproc; f++) { 4892 KMP_DEBUG_ASSERT(team->t.t_threads[f] && 4893 team->t.t_threads[f]->th.th_team_nproc == 4894 team->t.t_nproc); 4895 } 4896 #endif 4897 4898 #if OMP_40_ENABLED 4899 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4900 #if KMP_AFFINITY_SUPPORTED 4901 __kmp_partition_places(team); 4902 #endif 4903 #endif 4904 } else { // team->t.t_nproc < new_nproc 4905 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4906 kmp_affin_mask_t *old_mask; 4907 if (KMP_AFFINITY_CAPABLE()) { 4908 KMP_CPU_ALLOC(old_mask); 4909 } 4910 #endif 4911 4912 KA_TRACE(20, 4913 ("__kmp_allocate_team: increasing hot team thread count to %d\n", 4914 new_nproc)); 4915 4916 team->t.t_size_changed = 1; 4917 4918 #if KMP_NESTED_HOT_TEAMS 4919 int avail_threads = hot_teams[level].hot_team_nth; 4920 if (new_nproc < avail_threads) 4921 avail_threads = new_nproc; 4922 kmp_info_t **other_threads = team->t.t_threads; 4923 for (f = team->t.t_nproc; f < avail_threads; ++f) { 4924 // Adjust barrier data of reserved threads (if any) of the team 4925 // Other data will be set in __kmp_initialize_info() below. 4926 int b; 4927 kmp_balign_t *balign = other_threads[f]->th.th_bar; 4928 for (b = 0; b < bs_last_barrier; ++b) { 4929 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 4930 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4931 #if USE_DEBUGGER 4932 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 4933 #endif 4934 } 4935 } 4936 if (hot_teams[level].hot_team_nth >= new_nproc) { 4937 // we have all needed threads in reserve, no need to allocate any 4938 // this only possible in mode 1, cannot have reserved threads in mode 0 4939 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 4940 team->t.t_nproc = new_nproc; // just get reserved threads involved 4941 } else { 4942 // we may have some threads in reserve, but not enough 4943 team->t.t_nproc = 4944 hot_teams[level] 4945 .hot_team_nth; // get reserved threads involved if any 4946 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 4947 #endif // KMP_NESTED_HOT_TEAMS 4948 if (team->t.t_max_nproc < new_nproc) { 4949 /* reallocate larger arrays */ 4950 __kmp_reallocate_team_arrays(team, new_nproc); 4951 __kmp_reinitialize_team(team, new_icvs, NULL); 4952 } 4953 4954 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4955 /* Temporarily set full mask for master thread before creation of 4956 workers. The reason is that workers inherit the affinity from master, 4957 so if a lot of workers are created on the single core quickly, they 4958 don't get a chance to set their own affinity for a long time. */ 4959 __kmp_set_thread_affinity_mask_full_tmp(old_mask); 4960 #endif 4961 4962 /* allocate new threads for the hot team */ 4963 for (f = team->t.t_nproc; f < new_nproc; f++) { 4964 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 4965 KMP_DEBUG_ASSERT(new_worker); 4966 team->t.t_threads[f] = new_worker; 4967 4968 KA_TRACE(20, 4969 ("__kmp_allocate_team: team %d init T#%d arrived: " 4970 "join=%llu, plain=%llu\n", 4971 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 4972 team->t.t_bar[bs_forkjoin_barrier].b_arrived, 4973 team->t.t_bar[bs_plain_barrier].b_arrived)); 4974 4975 { // Initialize barrier data for new threads. 4976 int b; 4977 kmp_balign_t *balign = new_worker->th.th_bar; 4978 for (b = 0; b < bs_last_barrier; ++b) { 4979 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 4980 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 4981 KMP_BARRIER_PARENT_FLAG); 4982 #if USE_DEBUGGER 4983 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 4984 #endif 4985 } 4986 } 4987 } 4988 4989 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4990 if (KMP_AFFINITY_CAPABLE()) { 4991 /* Restore initial master thread's affinity mask */ 4992 __kmp_set_system_affinity(old_mask, TRUE); 4993 KMP_CPU_FREE(old_mask); 4994 } 4995 #endif 4996 #if KMP_NESTED_HOT_TEAMS 4997 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 4998 #endif // KMP_NESTED_HOT_TEAMS 4999 /* make sure everyone is syncronized */ 5000 int old_nproc = team->t.t_nproc; // save old value and use to update only 5001 // new threads below 5002 __kmp_initialize_team(team, new_nproc, new_icvs, 5003 root->r.r_uber_thread->th.th_ident); 5004 5005 /* reinitialize the threads */ 5006 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 5007 for (f = 0; f < team->t.t_nproc; ++f) 5008 __kmp_initialize_info(team->t.t_threads[f], team, f, 5009 __kmp_gtid_from_tid(f, team)); 5010 if (level) { // set th_task_state for new threads in nested hot team 5011 // __kmp_initialize_info() no longer zeroes th_task_state, so we should 5012 // only need to set the th_task_state for the new threads. th_task_state 5013 // for master thread will not be accurate until after this in 5014 // __kmp_fork_call(), so we look to the master's memo_stack to get the 5015 // correct value. 5016 for (f = old_nproc; f < team->t.t_nproc; ++f) 5017 team->t.t_threads[f]->th.th_task_state = 5018 team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 5019 } else { // set th_task_state for new threads in non-nested hot team 5020 int old_state = 5021 team->t.t_threads[0]->th.th_task_state; // copy master's state 5022 for (f = old_nproc; f < team->t.t_nproc; ++f) 5023 team->t.t_threads[f]->th.th_task_state = old_state; 5024 } 5025 5026 #ifdef KMP_DEBUG 5027 for (f = 0; f < team->t.t_nproc; ++f) { 5028 KMP_DEBUG_ASSERT(team->t.t_threads[f] && 5029 team->t.t_threads[f]->th.th_team_nproc == 5030 team->t.t_nproc); 5031 } 5032 #endif 5033 5034 #if OMP_40_ENABLED 5035 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 5036 #if KMP_AFFINITY_SUPPORTED 5037 __kmp_partition_places(team); 5038 #endif 5039 #endif 5040 } // Check changes in number of threads 5041 5042 #if OMP_40_ENABLED 5043 kmp_info_t *master = team->t.t_threads[0]; 5044 if (master->th.th_teams_microtask) { 5045 for (f = 1; f < new_nproc; ++f) { 5046 // propagate teams construct specific info to workers 5047 kmp_info_t *thr = team->t.t_threads[f]; 5048 thr->th.th_teams_microtask = master->th.th_teams_microtask; 5049 thr->th.th_teams_level = master->th.th_teams_level; 5050 thr->th.th_teams_size = master->th.th_teams_size; 5051 } 5052 } 5053 #endif /* OMP_40_ENABLED */ 5054 #if KMP_NESTED_HOT_TEAMS 5055 if (level) { 5056 // Sync barrier state for nested hot teams, not needed for outermost hot 5057 // team. 5058 for (f = 1; f < new_nproc; ++f) { 5059 kmp_info_t *thr = team->t.t_threads[f]; 5060 int b; 5061 kmp_balign_t *balign = thr->th.th_bar; 5062 for (b = 0; b < bs_last_barrier; ++b) { 5063 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 5064 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 5065 #if USE_DEBUGGER 5066 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 5067 #endif 5068 } 5069 } 5070 } 5071 #endif // KMP_NESTED_HOT_TEAMS 5072 5073 /* reallocate space for arguments if necessary */ 5074 __kmp_alloc_argv_entries(argc, team, TRUE); 5075 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5076 // The hot team re-uses the previous task team, 5077 // if untouched during the previous release->gather phase. 5078 5079 KF_TRACE(10, (" hot_team = %p\n", team)); 5080 5081 #if KMP_DEBUG 5082 if (__kmp_tasking_mode != tskm_immediate_exec) { 5083 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 5084 "task_team[1] = %p after reinit\n", 5085 team->t.t_task_team[0], team->t.t_task_team[1])); 5086 } 5087 #endif 5088 5089 #if OMPT_SUPPORT 5090 __ompt_team_assign_id(team, ompt_parallel_id); 5091 #endif 5092 5093 KMP_MB(); 5094 5095 return team; 5096 } 5097 5098 /* next, let's try to take one from the team pool */ 5099 KMP_MB(); 5100 for (team = (kmp_team_t *)__kmp_team_pool; (team);) { 5101 /* TODO: consider resizing undersized teams instead of reaping them, now 5102 that we have a resizing mechanism */ 5103 if (team->t.t_max_nproc >= max_nproc) { 5104 /* take this team from the team pool */ 5105 __kmp_team_pool = team->t.t_next_pool; 5106 5107 /* setup the team for fresh use */ 5108 __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 5109 5110 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 5111 "task_team[1] %p to NULL\n", 5112 &team->t.t_task_team[0], &team->t.t_task_team[1])); 5113 team->t.t_task_team[0] = NULL; 5114 team->t.t_task_team[1] = NULL; 5115 5116 /* reallocate space for arguments if necessary */ 5117 __kmp_alloc_argv_entries(argc, team, TRUE); 5118 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5119 5120 KA_TRACE( 5121 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5122 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 5123 { // Initialize barrier data. 5124 int b; 5125 for (b = 0; b < bs_last_barrier; ++b) { 5126 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 5127 #if USE_DEBUGGER 5128 team->t.t_bar[b].b_master_arrived = 0; 5129 team->t.t_bar[b].b_team_arrived = 0; 5130 #endif 5131 } 5132 } 5133 5134 #if OMP_40_ENABLED 5135 team->t.t_proc_bind = new_proc_bind; 5136 #endif 5137 5138 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 5139 team->t.t_id)); 5140 5141 #if OMPT_SUPPORT 5142 __ompt_team_assign_id(team, ompt_parallel_id); 5143 #endif 5144 5145 KMP_MB(); 5146 5147 return team; 5148 } 5149 5150 /* reap team if it is too small, then loop back and check the next one */ 5151 // not sure if this is wise, but, will be redone during the hot-teams rewrite. 5152 /* TODO: Use technique to find the right size hot-team, don't reap them */ 5153 team = __kmp_reap_team(team); 5154 __kmp_team_pool = team; 5155 } 5156 5157 /* nothing available in the pool, no matter, make a new team! */ 5158 KMP_MB(); 5159 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 5160 5161 /* and set it up */ 5162 team->t.t_max_nproc = max_nproc; 5163 /* NOTE well, for some reason allocating one big buffer and dividing it up 5164 seems to really hurt performance a lot on the P4, so, let's not use this */ 5165 __kmp_allocate_team_arrays(team, max_nproc); 5166 5167 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 5168 __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 5169 5170 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5171 "%p to NULL\n", 5172 &team->t.t_task_team[0], &team->t.t_task_team[1])); 5173 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 5174 // memory, no need to duplicate 5175 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 5176 // memory, no need to duplicate 5177 5178 if (__kmp_storage_map) { 5179 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 5180 } 5181 5182 /* allocate space for arguments */ 5183 __kmp_alloc_argv_entries(argc, team, FALSE); 5184 team->t.t_argc = argc; 5185 5186 KA_TRACE(20, 5187 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5188 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 5189 { // Initialize barrier data. 5190 int b; 5191 for (b = 0; b < bs_last_barrier; ++b) { 5192 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 5193 #if USE_DEBUGGER 5194 team->t.t_bar[b].b_master_arrived = 0; 5195 team->t.t_bar[b].b_team_arrived = 0; 5196 #endif 5197 } 5198 } 5199 5200 #if OMP_40_ENABLED 5201 team->t.t_proc_bind = new_proc_bind; 5202 #endif 5203 5204 #if OMPT_SUPPORT 5205 __ompt_team_assign_id(team, ompt_parallel_id); 5206 team->t.ompt_serialized_team_info = NULL; 5207 #endif 5208 5209 KMP_MB(); 5210 5211 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 5212 team->t.t_id)); 5213 5214 return team; 5215 } 5216 5217 /* TODO implement hot-teams at all levels */ 5218 /* TODO implement lazy thread release on demand (disband request) */ 5219 5220 /* free the team. return it to the team pool. release all the threads 5221 * associated with it */ 5222 void __kmp_free_team(kmp_root_t *root, 5223 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 5224 int f; 5225 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 5226 team->t.t_id)); 5227 5228 /* verify state */ 5229 KMP_DEBUG_ASSERT(root); 5230 KMP_DEBUG_ASSERT(team); 5231 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 5232 KMP_DEBUG_ASSERT(team->t.t_threads); 5233 5234 int use_hot_team = team == root->r.r_hot_team; 5235 #if KMP_NESTED_HOT_TEAMS 5236 int level; 5237 kmp_hot_team_ptr_t *hot_teams; 5238 if (master) { 5239 level = team->t.t_active_level - 1; 5240 if (master->th.th_teams_microtask) { // in teams construct? 5241 if (master->th.th_teams_size.nteams > 1) { 5242 ++level; // level was not increased in teams construct for 5243 // team_of_masters 5244 } 5245 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 5246 master->th.th_teams_level == team->t.t_level) { 5247 ++level; // level was not increased in teams construct for 5248 // team_of_workers before the parallel 5249 } // team->t.t_level will be increased inside parallel 5250 } 5251 hot_teams = master->th.th_hot_teams; 5252 if (level < __kmp_hot_teams_max_level) { 5253 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 5254 use_hot_team = 1; 5255 } 5256 } 5257 #endif // KMP_NESTED_HOT_TEAMS 5258 5259 /* team is done working */ 5260 TCW_SYNC_PTR(team->t.t_pkfn, 5261 NULL); // Important for Debugging Support Library. 5262 team->t.t_copyin_counter = 0; // init counter for possible reuse 5263 // Do not reset pointer to parent team to NULL for hot teams. 5264 5265 /* if we are non-hot team, release our threads */ 5266 if (!use_hot_team) { 5267 if (__kmp_tasking_mode != tskm_immediate_exec) { 5268 // Wait for threads to reach reapable state 5269 for (f = 1; f < team->t.t_nproc; ++f) { 5270 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5271 kmp_info_t *th = team->t.t_threads[f]; 5272 volatile kmp_uint32 *state = &th->th.th_reap_state; 5273 while (*state != KMP_SAFE_TO_REAP) { 5274 #if KMP_OS_WINDOWS 5275 // On Windows a thread can be killed at any time, check this 5276 DWORD ecode; 5277 if (!__kmp_is_thread_alive(th, &ecode)) { 5278 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 5279 break; 5280 } 5281 #endif 5282 // first check if thread is sleeping 5283 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 5284 if (fl.is_sleeping()) 5285 fl.resume(__kmp_gtid_from_thread(th)); 5286 KMP_CPU_PAUSE(); 5287 } 5288 } 5289 5290 // Delete task teams 5291 int tt_idx; 5292 for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 5293 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 5294 if (task_team != NULL) { 5295 for (f = 0; f < team->t.t_nproc; 5296 ++f) { // Have all threads unref task teams 5297 team->t.t_threads[f]->th.th_task_team = NULL; 5298 } 5299 KA_TRACE( 5300 20, 5301 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 5302 __kmp_get_gtid(), task_team, team->t.t_id)); 5303 #if KMP_NESTED_HOT_TEAMS 5304 __kmp_free_task_team(master, task_team); 5305 #endif 5306 team->t.t_task_team[tt_idx] = NULL; 5307 } 5308 } 5309 } 5310 5311 // Reset pointer to parent team only for non-hot teams. 5312 team->t.t_parent = NULL; 5313 team->t.t_level = 0; 5314 team->t.t_active_level = 0; 5315 5316 /* free the worker threads */ 5317 for (f = 1; f < team->t.t_nproc; ++f) { 5318 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5319 __kmp_free_thread(team->t.t_threads[f]); 5320 team->t.t_threads[f] = NULL; 5321 } 5322 5323 /* put the team back in the team pool */ 5324 /* TODO limit size of team pool, call reap_team if pool too large */ 5325 team->t.t_next_pool = (kmp_team_t *)__kmp_team_pool; 5326 __kmp_team_pool = (volatile kmp_team_t *)team; 5327 } 5328 5329 KMP_MB(); 5330 } 5331 5332 /* reap the team. destroy it, reclaim all its resources and free its memory */ 5333 kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 5334 kmp_team_t *next_pool = team->t.t_next_pool; 5335 5336 KMP_DEBUG_ASSERT(team); 5337 KMP_DEBUG_ASSERT(team->t.t_dispatch); 5338 KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 5339 KMP_DEBUG_ASSERT(team->t.t_threads); 5340 KMP_DEBUG_ASSERT(team->t.t_argv); 5341 5342 /* TODO clean the threads that are a part of this? */ 5343 5344 /* free stuff */ 5345 __kmp_free_team_arrays(team); 5346 if (team->t.t_argv != &team->t.t_inline_argv[0]) 5347 __kmp_free((void *)team->t.t_argv); 5348 __kmp_free(team); 5349 5350 KMP_MB(); 5351 return next_pool; 5352 } 5353 5354 // Free the thread. Don't reap it, just place it on the pool of available 5355 // threads. 5356 // 5357 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 5358 // binding for the affinity mechanism to be useful. 5359 // 5360 // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 5361 // However, we want to avoid a potential performance problem by always 5362 // scanning through the list to find the correct point at which to insert 5363 // the thread (potential N**2 behavior). To do this we keep track of the 5364 // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 5365 // With single-level parallelism, threads will always be added to the tail 5366 // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 5367 // parallelism, all bets are off and we may need to scan through the entire 5368 // free list. 5369 // 5370 // This change also has a potentially large performance benefit, for some 5371 // applications. Previously, as threads were freed from the hot team, they 5372 // would be placed back on the free list in inverse order. If the hot team 5373 // grew back to it's original size, then the freed thread would be placed 5374 // back on the hot team in reverse order. This could cause bad cache 5375 // locality problems on programs where the size of the hot team regularly 5376 // grew and shrunk. 5377 // 5378 // Now, for single-level parallelism, the OMP tid is alway == gtid. 5379 void __kmp_free_thread(kmp_info_t *this_th) { 5380 int gtid; 5381 kmp_info_t **scan; 5382 5383 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 5384 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 5385 5386 KMP_DEBUG_ASSERT(this_th); 5387 5388 // When moving thread to pool, switch thread to wait on own b_go flag, and 5389 // uninitialized (NULL team). 5390 int b; 5391 kmp_balign_t *balign = this_th->th.th_bar; 5392 for (b = 0; b < bs_last_barrier; ++b) { 5393 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 5394 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 5395 balign[b].bb.team = NULL; 5396 balign[b].bb.leaf_kids = 0; 5397 } 5398 this_th->th.th_task_state = 0; 5399 5400 /* put thread back on the free pool */ 5401 TCW_PTR(this_th->th.th_team, NULL); 5402 TCW_PTR(this_th->th.th_root, NULL); 5403 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 5404 5405 // If the __kmp_thread_pool_insert_pt is already past the new insert 5406 // point, then we need to re-scan the entire list. 5407 gtid = this_th->th.th_info.ds.ds_gtid; 5408 if (__kmp_thread_pool_insert_pt != NULL) { 5409 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 5410 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 5411 __kmp_thread_pool_insert_pt = NULL; 5412 } 5413 } 5414 5415 // Scan down the list to find the place to insert the thread. 5416 // scan is the address of a link in the list, possibly the address of 5417 // __kmp_thread_pool itself. 5418 // 5419 // In the absence of nested parallism, the for loop will have 0 iterations. 5420 if (__kmp_thread_pool_insert_pt != NULL) { 5421 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 5422 } else { 5423 scan = (kmp_info_t **)&__kmp_thread_pool; 5424 } 5425 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 5426 scan = &((*scan)->th.th_next_pool)) 5427 ; 5428 5429 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 5430 // to its address. 5431 TCW_PTR(this_th->th.th_next_pool, *scan); 5432 __kmp_thread_pool_insert_pt = *scan = this_th; 5433 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 5434 (this_th->th.th_info.ds.ds_gtid < 5435 this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 5436 TCW_4(this_th->th.th_in_pool, TRUE); 5437 __kmp_thread_pool_nth++; 5438 5439 TCW_4(__kmp_nth, __kmp_nth - 1); 5440 5441 #ifdef KMP_ADJUST_BLOCKTIME 5442 /* Adjust blocktime back to user setting or default if necessary */ 5443 /* Middle initialization might never have occurred */ 5444 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 5445 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 5446 if (__kmp_nth <= __kmp_avail_proc) { 5447 __kmp_zero_bt = FALSE; 5448 } 5449 } 5450 #endif /* KMP_ADJUST_BLOCKTIME */ 5451 5452 KMP_MB(); 5453 } 5454 5455 /* ------------------------------------------------------------------------ */ 5456 5457 void *__kmp_launch_thread(kmp_info_t *this_thr) { 5458 int gtid = this_thr->th.th_info.ds.ds_gtid; 5459 /* void *stack_data;*/ 5460 kmp_team_t *(*volatile pteam); 5461 5462 KMP_MB(); 5463 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 5464 5465 if (__kmp_env_consistency_check) { 5466 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 5467 } 5468 5469 #if OMPT_SUPPORT 5470 if (ompt_enabled) { 5471 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5472 this_thr->th.ompt_thread_info.wait_id = 0; 5473 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); 5474 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { 5475 __ompt_thread_begin(ompt_thread_worker, gtid); 5476 } 5477 } 5478 #endif 5479 5480 /* This is the place where threads wait for work */ 5481 while (!TCR_4(__kmp_global.g.g_done)) { 5482 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 5483 KMP_MB(); 5484 5485 /* wait for work to do */ 5486 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 5487 5488 #if OMPT_SUPPORT 5489 if (ompt_enabled) { 5490 this_thr->th.ompt_thread_info.state = ompt_state_idle; 5491 } 5492 #endif 5493 5494 /* No tid yet since not part of a team */ 5495 __kmp_fork_barrier(gtid, KMP_GTID_DNE); 5496 5497 #if OMPT_SUPPORT 5498 if (ompt_enabled) { 5499 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5500 } 5501 #endif 5502 5503 pteam = (kmp_team_t * (*))(&this_thr->th.th_team); 5504 5505 /* have we been allocated? */ 5506 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 5507 #if OMPT_SUPPORT 5508 ompt_task_info_t *task_info; 5509 ompt_parallel_id_t my_parallel_id; 5510 if (ompt_enabled) { 5511 task_info = __ompt_get_taskinfo(0); 5512 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; 5513 } 5514 #endif 5515 /* we were just woken up, so run our new task */ 5516 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 5517 int rc; 5518 KA_TRACE(20, 5519 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 5520 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 5521 (*pteam)->t.t_pkfn)); 5522 5523 updateHWFPControl(*pteam); 5524 5525 #if OMPT_SUPPORT 5526 if (ompt_enabled) { 5527 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 5528 // Initialize OMPT task id for implicit task. 5529 int tid = __kmp_tid_from_gtid(gtid); 5530 task_info->task_id = __ompt_task_id_new(tid); 5531 } 5532 #endif 5533 5534 { 5535 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 5536 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 5537 rc = (*pteam)->t.t_invoke(gtid); 5538 } 5539 KMP_ASSERT(rc); 5540 5541 #if OMPT_SUPPORT 5542 if (ompt_enabled) { 5543 /* no frame set while outside task */ 5544 task_info->frame.exit_runtime_frame = NULL; 5545 5546 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5547 } 5548 #endif 5549 KMP_MB(); 5550 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 5551 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 5552 (*pteam)->t.t_pkfn)); 5553 } 5554 /* join barrier after parallel region */ 5555 __kmp_join_barrier(gtid); 5556 #if OMPT_SUPPORT && OMPT_TRACE 5557 if (ompt_enabled) { 5558 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 5559 // don't access *pteam here: it may have already been freed 5560 // by the master thread behind the barrier (possible race) 5561 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 5562 my_parallel_id, task_info->task_id); 5563 } 5564 task_info->frame.exit_runtime_frame = NULL; 5565 task_info->task_id = 0; 5566 } 5567 #endif 5568 } 5569 } 5570 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 5571 5572 #if OMPT_SUPPORT 5573 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 5574 __ompt_thread_end(ompt_thread_worker, gtid); 5575 } 5576 #endif 5577 5578 this_thr->th.th_task_team = NULL; 5579 /* run the destructors for the threadprivate data for this thread */ 5580 __kmp_common_destroy_gtid(gtid); 5581 5582 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 5583 KMP_MB(); 5584 return this_thr; 5585 } 5586 5587 /* ------------------------------------------------------------------------ */ 5588 5589 void __kmp_internal_end_dest(void *specific_gtid) { 5590 #if KMP_COMPILER_ICC 5591 #pragma warning(push) 5592 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5593 // significant bits 5594 #endif 5595 // Make sure no significant bits are lost 5596 int gtid = (kmp_intptr_t)specific_gtid - 1; 5597 #if KMP_COMPILER_ICC 5598 #pragma warning(pop) 5599 #endif 5600 5601 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 5602 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 5603 * this is because 0 is reserved for the nothing-stored case */ 5604 5605 /* josh: One reason for setting the gtid specific data even when it is being 5606 destroyed by pthread is to allow gtid lookup through thread specific data 5607 (__kmp_gtid_get_specific). Some of the code, especially stat code, 5608 that gets executed in the call to __kmp_internal_end_thread, actually 5609 gets the gtid through the thread specific data. Setting it here seems 5610 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread 5611 to run smoothly. 5612 todo: get rid of this after we remove the dependence on 5613 __kmp_gtid_get_specific */ 5614 if (gtid >= 0 && KMP_UBER_GTID(gtid)) 5615 __kmp_gtid_set_specific(gtid); 5616 #ifdef KMP_TDATA_GTID 5617 __kmp_gtid = gtid; 5618 #endif 5619 __kmp_internal_end_thread(gtid); 5620 } 5621 5622 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5623 5624 // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases 5625 // destructors work perfectly, but in real libomp.so I have no evidence it is 5626 // ever called. However, -fini linker option in makefile.mk works fine. 5627 5628 __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 5629 __kmp_internal_end_atexit(); 5630 } 5631 5632 void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); } 5633 5634 #endif 5635 5636 /* [Windows] josh: when the atexit handler is called, there may still be more 5637 than one thread alive */ 5638 void __kmp_internal_end_atexit(void) { 5639 KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 5640 /* [Windows] 5641 josh: ideally, we want to completely shutdown the library in this atexit 5642 handler, but stat code that depends on thread specific data for gtid fails 5643 because that data becomes unavailable at some point during the shutdown, so 5644 we call __kmp_internal_end_thread instead. We should eventually remove the 5645 dependency on __kmp_get_specific_gtid in the stat code and use 5646 __kmp_internal_end_library to cleanly shutdown the library. 5647 5648 // TODO: Can some of this comment about GVS be removed? 5649 I suspect that the offending stat code is executed when the calling thread 5650 tries to clean up a dead root thread's data structures, resulting in GVS 5651 code trying to close the GVS structures for that thread, but since the stat 5652 code uses __kmp_get_specific_gtid to get the gtid with the assumption that 5653 the calling thread is cleaning up itself instead of another thread, it get 5654 confused. This happens because allowing a thread to unregister and cleanup 5655 another thread is a recent modification for addressing an issue. 5656 Based on the current design (20050722), a thread may end up 5657 trying to unregister another thread only if thread death does not trigger 5658 the calling of __kmp_internal_end_thread. For Linux* OS, there is the 5659 thread specific data destructor function to detect thread death. For 5660 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 5661 is nothing. Thus, the workaround is applicable only for Windows static 5662 stat library. */ 5663 __kmp_internal_end_library(-1); 5664 #if KMP_OS_WINDOWS 5665 __kmp_close_console(); 5666 #endif 5667 } 5668 5669 static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 5670 // It is assumed __kmp_forkjoin_lock is acquired. 5671 5672 int gtid; 5673 5674 KMP_DEBUG_ASSERT(thread != NULL); 5675 5676 gtid = thread->th.th_info.ds.ds_gtid; 5677 5678 if (!is_root) { 5679 5680 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 5681 /* Assume the threads are at the fork barrier here */ 5682 KA_TRACE( 5683 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 5684 gtid)); 5685 /* Need release fence here to prevent seg faults for tree forkjoin barrier 5686 * (GEH) */ 5687 ANNOTATE_HAPPENS_BEFORE(thread); 5688 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); 5689 __kmp_release_64(&flag); 5690 }; // if 5691 5692 // Terminate OS thread. 5693 __kmp_reap_worker(thread); 5694 5695 // The thread was killed asynchronously. If it was actively 5696 // spinning in the thread pool, decrement the global count. 5697 // 5698 // There is a small timing hole here - if the worker thread was just waking 5699 // up after sleeping in the pool, had reset it's th_active_in_pool flag but 5700 // not decremented the global counter __kmp_thread_pool_active_nth yet, then 5701 // the global counter might not get updated. 5702 // 5703 // Currently, this can only happen as the library is unloaded, 5704 // so there are no harmful side effects. 5705 if (thread->th.th_active_in_pool) { 5706 thread->th.th_active_in_pool = FALSE; 5707 KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth); 5708 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); 5709 } 5710 5711 // Decrement # of [worker] threads in the pool. 5712 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0); 5713 --__kmp_thread_pool_nth; 5714 }; // if 5715 5716 __kmp_free_implicit_task(thread); 5717 5718 // Free the fast memory for tasking 5719 #if USE_FAST_MEMORY 5720 __kmp_free_fast_memory(thread); 5721 #endif /* USE_FAST_MEMORY */ 5722 5723 __kmp_suspend_uninitialize_thread(thread); 5724 5725 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 5726 TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 5727 5728 --__kmp_all_nth; 5729 // __kmp_nth was decremented when thread is added to the pool. 5730 5731 #ifdef KMP_ADJUST_BLOCKTIME 5732 /* Adjust blocktime back to user setting or default if necessary */ 5733 /* Middle initialization might never have occurred */ 5734 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 5735 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 5736 if (__kmp_nth <= __kmp_avail_proc) { 5737 __kmp_zero_bt = FALSE; 5738 } 5739 } 5740 #endif /* KMP_ADJUST_BLOCKTIME */ 5741 5742 /* free the memory being used */ 5743 if (__kmp_env_consistency_check) { 5744 if (thread->th.th_cons) { 5745 __kmp_free_cons_stack(thread->th.th_cons); 5746 thread->th.th_cons = NULL; 5747 }; // if 5748 } 5749 5750 if (thread->th.th_pri_common != NULL) { 5751 __kmp_free(thread->th.th_pri_common); 5752 thread->th.th_pri_common = NULL; 5753 }; // if 5754 5755 if (thread->th.th_task_state_memo_stack != NULL) { 5756 __kmp_free(thread->th.th_task_state_memo_stack); 5757 thread->th.th_task_state_memo_stack = NULL; 5758 } 5759 5760 #if KMP_USE_BGET 5761 if (thread->th.th_local.bget_data != NULL) { 5762 __kmp_finalize_bget(thread); 5763 }; // if 5764 #endif 5765 5766 #if KMP_AFFINITY_SUPPORTED 5767 if (thread->th.th_affin_mask != NULL) { 5768 KMP_CPU_FREE(thread->th.th_affin_mask); 5769 thread->th.th_affin_mask = NULL; 5770 }; // if 5771 #endif /* KMP_AFFINITY_SUPPORTED */ 5772 5773 __kmp_reap_team(thread->th.th_serial_team); 5774 thread->th.th_serial_team = NULL; 5775 __kmp_free(thread); 5776 5777 KMP_MB(); 5778 5779 } // __kmp_reap_thread 5780 5781 static void __kmp_internal_end(void) { 5782 int i; 5783 5784 /* First, unregister the library */ 5785 __kmp_unregister_library(); 5786 5787 #if KMP_OS_WINDOWS 5788 /* In Win static library, we can't tell when a root actually dies, so we 5789 reclaim the data structures for any root threads that have died but not 5790 unregistered themselves, in order to shut down cleanly. 5791 In Win dynamic library we also can't tell when a thread dies. */ 5792 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 5793 // dead roots 5794 #endif 5795 5796 for (i = 0; i < __kmp_threads_capacity; i++) 5797 if (__kmp_root[i]) 5798 if (__kmp_root[i]->r.r_active) 5799 break; 5800 KMP_MB(); /* Flush all pending memory write invalidates. */ 5801 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5802 5803 if (i < __kmp_threads_capacity) { 5804 #if KMP_USE_MONITOR 5805 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 5806 KMP_MB(); /* Flush all pending memory write invalidates. */ 5807 5808 // Need to check that monitor was initialized before reaping it. If we are 5809 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 5810 // __kmp_monitor will appear to contain valid data, but it is only valid in the 5811 // parent process, not the child. 5812 // New behavior (201008): instead of keying off of the flag 5813 // __kmp_init_parallel, the monitor thread creation is keyed off 5814 // of the new flag __kmp_init_monitor. 5815 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 5816 if (TCR_4(__kmp_init_monitor)) { 5817 __kmp_reap_monitor(&__kmp_monitor); 5818 TCW_4(__kmp_init_monitor, 0); 5819 } 5820 __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 5821 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 5822 #endif // KMP_USE_MONITOR 5823 } else { 5824 /* TODO move this to cleanup code */ 5825 #ifdef KMP_DEBUG 5826 /* make sure that everything has properly ended */ 5827 for (i = 0; i < __kmp_threads_capacity; i++) { 5828 if (__kmp_root[i]) { 5829 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 5830 // there can be uber threads alive here 5831 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 5832 } 5833 } 5834 #endif 5835 5836 KMP_MB(); 5837 5838 // Reap the worker threads. 5839 // This is valid for now, but be careful if threads are reaped sooner. 5840 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 5841 // Get the next thread from the pool. 5842 kmp_info_t *thread = (kmp_info_t *)__kmp_thread_pool; 5843 __kmp_thread_pool = thread->th.th_next_pool; 5844 // Reap it. 5845 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 5846 thread->th.th_next_pool = NULL; 5847 thread->th.th_in_pool = FALSE; 5848 __kmp_reap_thread(thread, 0); 5849 }; // while 5850 __kmp_thread_pool_insert_pt = NULL; 5851 5852 // Reap teams. 5853 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 5854 // Get the next team from the pool. 5855 kmp_team_t *team = (kmp_team_t *)__kmp_team_pool; 5856 __kmp_team_pool = team->t.t_next_pool; 5857 // Reap it. 5858 team->t.t_next_pool = NULL; 5859 __kmp_reap_team(team); 5860 }; // while 5861 5862 __kmp_reap_task_teams(); 5863 5864 for (i = 0; i < __kmp_threads_capacity; ++i) { 5865 // TBD: Add some checking... 5866 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 5867 } 5868 5869 /* Make sure all threadprivate destructors get run by joining with all 5870 worker threads before resetting this flag */ 5871 TCW_SYNC_4(__kmp_init_common, FALSE); 5872 5873 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 5874 KMP_MB(); 5875 5876 #if KMP_USE_MONITOR 5877 // See note above: One of the possible fixes for CQ138434 / CQ140126 5878 // 5879 // FIXME: push both code fragments down and CSE them? 5880 // push them into __kmp_cleanup() ? 5881 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 5882 if (TCR_4(__kmp_init_monitor)) { 5883 __kmp_reap_monitor(&__kmp_monitor); 5884 TCW_4(__kmp_init_monitor, 0); 5885 } 5886 __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 5887 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 5888 #endif 5889 } /* else !__kmp_global.t_active */ 5890 TCW_4(__kmp_init_gtid, FALSE); 5891 KMP_MB(); /* Flush all pending memory write invalidates. */ 5892 5893 __kmp_cleanup(); 5894 #if OMPT_SUPPORT 5895 ompt_fini(); 5896 #endif 5897 } 5898 5899 void __kmp_internal_end_library(int gtid_req) { 5900 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 5901 /* this shouldn't be a race condition because __kmp_internal_end() is the 5902 only place to clear __kmp_serial_init */ 5903 /* we'll check this later too, after we get the lock */ 5904 // 2009-09-06: We do not set g_abort without setting g_done. This check looks 5905 // redundaant, because the next check will work in any case. 5906 if (__kmp_global.g.g_abort) { 5907 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 5908 /* TODO abort? */ 5909 return; 5910 } 5911 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 5912 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 5913 return; 5914 } 5915 5916 KMP_MB(); /* Flush all pending memory write invalidates. */ 5917 5918 /* find out who we are and what we should do */ 5919 { 5920 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 5921 KA_TRACE( 5922 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 5923 if (gtid == KMP_GTID_SHUTDOWN) { 5924 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 5925 "already shutdown\n")); 5926 return; 5927 } else if (gtid == KMP_GTID_MONITOR) { 5928 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 5929 "registered, or system shutdown\n")); 5930 return; 5931 } else if (gtid == KMP_GTID_DNE) { 5932 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 5933 "shutdown\n")); 5934 /* we don't know who we are, but we may still shutdown the library */ 5935 } else if (KMP_UBER_GTID(gtid)) { 5936 /* unregister ourselves as an uber thread. gtid is no longer valid */ 5937 if (__kmp_root[gtid]->r.r_active) { 5938 __kmp_global.g.g_abort = -1; 5939 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5940 KA_TRACE(10, 5941 ("__kmp_internal_end_library: root still active, abort T#%d\n", 5942 gtid)); 5943 return; 5944 } else { 5945 KA_TRACE( 5946 10, 5947 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 5948 __kmp_unregister_root_current_thread(gtid); 5949 } 5950 } else { 5951 /* worker threads may call this function through the atexit handler, if they 5952 * call exit() */ 5953 /* For now, skip the usual subsequent processing and just dump the debug buffer. 5954 TODO: do a thorough shutdown instead */ 5955 #ifdef DUMP_DEBUG_ON_EXIT 5956 if (__kmp_debug_buf) 5957 __kmp_dump_debug_buffer(); 5958 #endif 5959 return; 5960 } 5961 } 5962 /* synchronize the termination process */ 5963 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 5964 5965 /* have we already finished */ 5966 if (__kmp_global.g.g_abort) { 5967 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 5968 /* TODO abort? */ 5969 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 5970 return; 5971 } 5972 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 5973 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 5974 return; 5975 } 5976 5977 /* We need this lock to enforce mutex between this reading of 5978 __kmp_threads_capacity and the writing by __kmp_register_root. 5979 Alternatively, we can use a counter of roots that is atomically updated by 5980 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 5981 __kmp_internal_end_*. */ 5982 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 5983 5984 /* now we can safely conduct the actual termination */ 5985 __kmp_internal_end(); 5986 5987 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 5988 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 5989 5990 KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 5991 5992 #ifdef DUMP_DEBUG_ON_EXIT 5993 if (__kmp_debug_buf) 5994 __kmp_dump_debug_buffer(); 5995 #endif 5996 5997 #if KMP_OS_WINDOWS 5998 __kmp_close_console(); 5999 #endif 6000 6001 __kmp_fini_allocator(); 6002 6003 } // __kmp_internal_end_library 6004 6005 void __kmp_internal_end_thread(int gtid_req) { 6006 int i; 6007 6008 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 6009 /* this shouldn't be a race condition because __kmp_internal_end() is the 6010 * only place to clear __kmp_serial_init */ 6011 /* we'll check this later too, after we get the lock */ 6012 // 2009-09-06: We do not set g_abort without setting g_done. This check looks 6013 // redundant, because the next check will work in any case. 6014 if (__kmp_global.g.g_abort) { 6015 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 6016 /* TODO abort? */ 6017 return; 6018 } 6019 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 6020 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 6021 return; 6022 } 6023 6024 KMP_MB(); /* Flush all pending memory write invalidates. */ 6025 6026 /* find out who we are and what we should do */ 6027 { 6028 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 6029 KA_TRACE(10, 6030 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 6031 if (gtid == KMP_GTID_SHUTDOWN) { 6032 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 6033 "already shutdown\n")); 6034 return; 6035 } else if (gtid == KMP_GTID_MONITOR) { 6036 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 6037 "registered, or system shutdown\n")); 6038 return; 6039 } else if (gtid == KMP_GTID_DNE) { 6040 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 6041 "shutdown\n")); 6042 return; 6043 /* we don't know who we are */ 6044 } else if (KMP_UBER_GTID(gtid)) { 6045 /* unregister ourselves as an uber thread. gtid is no longer valid */ 6046 if (__kmp_root[gtid]->r.r_active) { 6047 __kmp_global.g.g_abort = -1; 6048 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6049 KA_TRACE(10, 6050 ("__kmp_internal_end_thread: root still active, abort T#%d\n", 6051 gtid)); 6052 return; 6053 } else { 6054 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 6055 gtid)); 6056 __kmp_unregister_root_current_thread(gtid); 6057 } 6058 } else { 6059 /* just a worker thread, let's leave */ 6060 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 6061 6062 if (gtid >= 0) { 6063 __kmp_threads[gtid]->th.th_task_team = NULL; 6064 } 6065 6066 KA_TRACE(10, 6067 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 6068 gtid)); 6069 return; 6070 } 6071 } 6072 #if defined KMP_DYNAMIC_LIB 6073 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber 6074 // thread, because we will better shutdown later in the library destructor. 6075 // The reason of this change is performance problem when non-openmp thread in 6076 // a loop forks and joins many openmp threads. We can save a lot of time 6077 // keeping worker threads alive until the program shutdown. 6078 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) 6079 // and Windows(DPD200287443) that occurs when using critical sections from 6080 // foreign threads. 6081 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 6082 return; 6083 #endif 6084 /* synchronize the termination process */ 6085 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6086 6087 /* have we already finished */ 6088 if (__kmp_global.g.g_abort) { 6089 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 6090 /* TODO abort? */ 6091 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6092 return; 6093 } 6094 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 6095 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6096 return; 6097 } 6098 6099 /* We need this lock to enforce mutex between this reading of 6100 __kmp_threads_capacity and the writing by __kmp_register_root. 6101 Alternatively, we can use a counter of roots that is atomically updated by 6102 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 6103 __kmp_internal_end_*. */ 6104 6105 /* should we finish the run-time? are all siblings done? */ 6106 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 6107 6108 for (i = 0; i < __kmp_threads_capacity; ++i) { 6109 if (KMP_UBER_GTID(i)) { 6110 KA_TRACE( 6111 10, 6112 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 6113 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 6114 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6115 return; 6116 }; 6117 } 6118 6119 /* now we can safely conduct the actual termination */ 6120 6121 __kmp_internal_end(); 6122 6123 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 6124 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6125 6126 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 6127 6128 #ifdef DUMP_DEBUG_ON_EXIT 6129 if (__kmp_debug_buf) 6130 __kmp_dump_debug_buffer(); 6131 #endif 6132 } // __kmp_internal_end_thread 6133 6134 // ----------------------------------------------------------------------------- 6135 // Library registration stuff. 6136 6137 static long __kmp_registration_flag = 0; 6138 // Random value used to indicate library initialization. 6139 static char *__kmp_registration_str = NULL; 6140 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 6141 6142 static inline char *__kmp_reg_status_name() { 6143 /* On RHEL 3u5 if linked statically, getpid() returns different values in 6144 each thread. If registration and unregistration go in different threads 6145 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 6146 env var can not be found, because the name will contain different pid. */ 6147 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 6148 } // __kmp_reg_status_get 6149 6150 void __kmp_register_library_startup(void) { 6151 6152 char *name = __kmp_reg_status_name(); // Name of the environment variable. 6153 int done = 0; 6154 union { 6155 double dtime; 6156 long ltime; 6157 } time; 6158 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6159 __kmp_initialize_system_tick(); 6160 #endif 6161 __kmp_read_system_time(&time.dtime); 6162 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 6163 __kmp_registration_str = 6164 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 6165 __kmp_registration_flag, KMP_LIBRARY_FILE); 6166 6167 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 6168 __kmp_registration_str)); 6169 6170 while (!done) { 6171 6172 char *value = NULL; // Actual value of the environment variable. 6173 6174 // Set environment variable, but do not overwrite if it is exist. 6175 __kmp_env_set(name, __kmp_registration_str, 0); 6176 // Check the variable is written. 6177 value = __kmp_env_get(name); 6178 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 6179 6180 done = 1; // Ok, environment variable set successfully, exit the loop. 6181 6182 } else { 6183 6184 // Oops. Write failed. Another copy of OpenMP RTL is in memory. 6185 // Check whether it alive or dead. 6186 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 6187 char *tail = value; 6188 char *flag_addr_str = NULL; 6189 char *flag_val_str = NULL; 6190 char const *file_name = NULL; 6191 __kmp_str_split(tail, '-', &flag_addr_str, &tail); 6192 __kmp_str_split(tail, '-', &flag_val_str, &tail); 6193 file_name = tail; 6194 if (tail != NULL) { 6195 long *flag_addr = 0; 6196 long flag_val = 0; 6197 KMP_SSCANF(flag_addr_str, "%p", &flag_addr); 6198 KMP_SSCANF(flag_val_str, "%lx", &flag_val); 6199 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 6200 // First, check whether environment-encoded address is mapped into 6201 // addr space. 6202 // If so, dereference it to see if it still has the right value. 6203 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 6204 neighbor = 1; 6205 } else { 6206 // If not, then we know the other copy of the library is no longer 6207 // running. 6208 neighbor = 2; 6209 }; // if 6210 }; // if 6211 }; // if 6212 switch (neighbor) { 6213 case 0: // Cannot parse environment variable -- neighbor status unknown. 6214 // Assume it is the incompatible format of future version of the 6215 // library. Assume the other library is alive. 6216 // WARN( ... ); // TODO: Issue a warning. 6217 file_name = "unknown library"; 6218 // Attention! Falling to the next case. That's intentional. 6219 case 1: { // Neighbor is alive. 6220 // Check it is allowed. 6221 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 6222 if (!__kmp_str_match_true(duplicate_ok)) { 6223 // That's not allowed. Issue fatal error. 6224 __kmp_msg(kmp_ms_fatal, 6225 KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 6226 KMP_HNT(DuplicateLibrary), __kmp_msg_null); 6227 }; // if 6228 KMP_INTERNAL_FREE(duplicate_ok); 6229 __kmp_duplicate_library_ok = 1; 6230 done = 1; // Exit the loop. 6231 } break; 6232 case 2: { // Neighbor is dead. 6233 // Clear the variable and try to register library again. 6234 __kmp_env_unset(name); 6235 } break; 6236 default: { KMP_DEBUG_ASSERT(0); } break; 6237 }; // switch 6238 6239 }; // if 6240 KMP_INTERNAL_FREE((void *)value); 6241 6242 }; // while 6243 KMP_INTERNAL_FREE((void *)name); 6244 6245 } // func __kmp_register_library_startup 6246 6247 void __kmp_unregister_library(void) { 6248 6249 char *name = __kmp_reg_status_name(); 6250 char *value = __kmp_env_get(name); 6251 6252 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 6253 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 6254 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 6255 // Ok, this is our variable. Delete it. 6256 __kmp_env_unset(name); 6257 }; // if 6258 6259 KMP_INTERNAL_FREE(__kmp_registration_str); 6260 KMP_INTERNAL_FREE(value); 6261 KMP_INTERNAL_FREE(name); 6262 6263 __kmp_registration_flag = 0; 6264 __kmp_registration_str = NULL; 6265 6266 } // __kmp_unregister_library 6267 6268 // End of Library registration stuff. 6269 // ----------------------------------------------------------------------------- 6270 6271 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6272 6273 static void __kmp_check_mic_type() { 6274 kmp_cpuid_t cpuid_state = {0}; 6275 kmp_cpuid_t *cs_p = &cpuid_state; 6276 __kmp_x86_cpuid(1, 0, cs_p); 6277 // We don't support mic1 at the moment 6278 if ((cs_p->eax & 0xff0) == 0xB10) { 6279 __kmp_mic_type = mic2; 6280 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 6281 __kmp_mic_type = mic3; 6282 } else { 6283 __kmp_mic_type = non_mic; 6284 } 6285 } 6286 6287 #endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ 6288 6289 static void __kmp_do_serial_initialize(void) { 6290 int i, gtid; 6291 int size; 6292 6293 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 6294 6295 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 6296 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 6297 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 6298 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 6299 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 6300 6301 #if OMPT_SUPPORT 6302 ompt_pre_init(); 6303 #endif 6304 6305 __kmp_validate_locks(); 6306 6307 /* Initialize internal memory allocator */ 6308 __kmp_init_allocator(); 6309 6310 /* Register the library startup via an environment variable and check to see 6311 whether another copy of the library is already registered. */ 6312 6313 __kmp_register_library_startup(); 6314 6315 /* TODO reinitialization of library */ 6316 if (TCR_4(__kmp_global.g.g_done)) { 6317 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 6318 } 6319 6320 __kmp_global.g.g_abort = 0; 6321 TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 6322 6323 /* initialize the locks */ 6324 #if KMP_USE_ADAPTIVE_LOCKS 6325 #if KMP_DEBUG_ADAPTIVE_LOCKS 6326 __kmp_init_speculative_stats(); 6327 #endif 6328 #endif 6329 #if KMP_STATS_ENABLED 6330 __kmp_stats_init(); 6331 #endif 6332 __kmp_init_lock(&__kmp_global_lock); 6333 __kmp_init_queuing_lock(&__kmp_dispatch_lock); 6334 __kmp_init_lock(&__kmp_debug_lock); 6335 __kmp_init_atomic_lock(&__kmp_atomic_lock); 6336 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 6337 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 6338 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 6339 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 6340 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 6341 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 6342 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 6343 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 6344 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 6345 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 6346 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 6347 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 6348 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 6349 __kmp_init_bootstrap_lock(&__kmp_exit_lock); 6350 #if KMP_USE_MONITOR 6351 __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 6352 #endif 6353 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 6354 6355 /* conduct initialization and initial setup of configuration */ 6356 6357 __kmp_runtime_initialize(); 6358 6359 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6360 __kmp_check_mic_type(); 6361 #endif 6362 6363 // Some global variable initialization moved here from kmp_env_initialize() 6364 #ifdef KMP_DEBUG 6365 kmp_diag = 0; 6366 #endif 6367 __kmp_abort_delay = 0; 6368 6369 // From __kmp_init_dflt_team_nth() 6370 /* assume the entire machine will be used */ 6371 __kmp_dflt_team_nth_ub = __kmp_xproc; 6372 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 6373 __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 6374 } 6375 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 6376 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 6377 } 6378 __kmp_max_nth = __kmp_sys_max_nth; 6379 6380 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 6381 // part 6382 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 6383 #if KMP_USE_MONITOR 6384 __kmp_monitor_wakeups = 6385 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 6386 __kmp_bt_intervals = 6387 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 6388 #endif 6389 // From "KMP_LIBRARY" part of __kmp_env_initialize() 6390 __kmp_library = library_throughput; 6391 // From KMP_SCHEDULE initialization 6392 __kmp_static = kmp_sch_static_balanced; 6393 // AC: do not use analytical here, because it is non-monotonous 6394 //__kmp_guided = kmp_sch_guided_iterative_chunked; 6395 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 6396 // need to repeat assignment 6397 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 6398 // bit control and barrier method control parts 6399 #if KMP_FAST_REDUCTION_BARRIER 6400 #define kmp_reduction_barrier_gather_bb ((int)1) 6401 #define kmp_reduction_barrier_release_bb ((int)1) 6402 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6403 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6404 #endif // KMP_FAST_REDUCTION_BARRIER 6405 for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 6406 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 6407 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 6408 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 6409 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 6410 #if KMP_FAST_REDUCTION_BARRIER 6411 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 6412 // lin_64 ): hyper,1 6413 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 6414 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 6415 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 6416 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 6417 } 6418 #endif // KMP_FAST_REDUCTION_BARRIER 6419 } 6420 #if KMP_FAST_REDUCTION_BARRIER 6421 #undef kmp_reduction_barrier_release_pat 6422 #undef kmp_reduction_barrier_gather_pat 6423 #undef kmp_reduction_barrier_release_bb 6424 #undef kmp_reduction_barrier_gather_bb 6425 #endif // KMP_FAST_REDUCTION_BARRIER 6426 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6427 if (__kmp_mic_type == mic2) { // KNC 6428 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 6429 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 6430 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 6431 1; // forkjoin release 6432 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 6433 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 6434 } 6435 #if KMP_FAST_REDUCTION_BARRIER 6436 if (__kmp_mic_type == mic2) { // KNC 6437 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 6438 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 6439 } 6440 #endif 6441 #endif 6442 6443 // From KMP_CHECKS initialization 6444 #ifdef KMP_DEBUG 6445 __kmp_env_checks = TRUE; /* development versions have the extra checks */ 6446 #else 6447 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 6448 #endif 6449 6450 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 6451 __kmp_foreign_tp = TRUE; 6452 6453 __kmp_global.g.g_dynamic = FALSE; 6454 __kmp_global.g.g_dynamic_mode = dynamic_default; 6455 6456 __kmp_env_initialize(NULL); 6457 6458 // Print all messages in message catalog for testing purposes. 6459 #ifdef KMP_DEBUG 6460 char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 6461 if (__kmp_str_match_true(val)) { 6462 kmp_str_buf_t buffer; 6463 __kmp_str_buf_init(&buffer); 6464 __kmp_i18n_dump_catalog(&buffer); 6465 __kmp_printf("%s", buffer.str); 6466 __kmp_str_buf_free(&buffer); 6467 }; // if 6468 __kmp_env_free(&val); 6469 #endif 6470 6471 __kmp_threads_capacity = 6472 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 6473 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 6474 __kmp_tp_capacity = __kmp_default_tp_capacity( 6475 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 6476 6477 // If the library is shut down properly, both pools must be NULL. Just in 6478 // case, set them to NULL -- some memory may leak, but subsequent code will 6479 // work even if pools are not freed. 6480 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 6481 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 6482 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 6483 __kmp_thread_pool = NULL; 6484 __kmp_thread_pool_insert_pt = NULL; 6485 __kmp_team_pool = NULL; 6486 6487 /* Allocate all of the variable sized records */ 6488 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 6489 * expandable */ 6490 /* Since allocation is cache-aligned, just add extra padding at the end */ 6491 size = 6492 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 6493 CACHE_LINE; 6494 __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 6495 __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 6496 sizeof(kmp_info_t *) * __kmp_threads_capacity); 6497 6498 /* init thread counts */ 6499 KMP_DEBUG_ASSERT(__kmp_all_nth == 6500 0); // Asserts fail if the library is reinitializing and 6501 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 6502 __kmp_all_nth = 0; 6503 __kmp_nth = 0; 6504 6505 /* setup the uber master thread and hierarchy */ 6506 gtid = __kmp_register_root(TRUE); 6507 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 6508 KMP_ASSERT(KMP_UBER_GTID(gtid)); 6509 KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 6510 6511 KMP_MB(); /* Flush all pending memory write invalidates. */ 6512 6513 __kmp_common_initialize(); 6514 6515 #if KMP_OS_UNIX 6516 /* invoke the child fork handler */ 6517 __kmp_register_atfork(); 6518 #endif 6519 6520 #if !defined KMP_DYNAMIC_LIB 6521 { 6522 /* Invoke the exit handler when the program finishes, only for static 6523 library. For dynamic library, we already have _fini and DllMain. */ 6524 int rc = atexit(__kmp_internal_end_atexit); 6525 if (rc != 0) { 6526 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 6527 __kmp_msg_null); 6528 }; // if 6529 } 6530 #endif 6531 6532 #if KMP_HANDLE_SIGNALS 6533 #if KMP_OS_UNIX 6534 /* NOTE: make sure that this is called before the user installs their own 6535 signal handlers so that the user handlers are called first. this way they 6536 can return false, not call our handler, avoid terminating the library, and 6537 continue execution where they left off. */ 6538 __kmp_install_signals(FALSE); 6539 #endif /* KMP_OS_UNIX */ 6540 #if KMP_OS_WINDOWS 6541 __kmp_install_signals(TRUE); 6542 #endif /* KMP_OS_WINDOWS */ 6543 #endif 6544 6545 /* we have finished the serial initialization */ 6546 __kmp_init_counter++; 6547 6548 __kmp_init_serial = TRUE; 6549 6550 if (__kmp_settings) { 6551 __kmp_env_print(); 6552 } 6553 6554 #if OMP_40_ENABLED 6555 if (__kmp_display_env || __kmp_display_env_verbose) { 6556 __kmp_env_print_2(); 6557 } 6558 #endif // OMP_40_ENABLED 6559 6560 #if OMPT_SUPPORT 6561 ompt_post_init(); 6562 #endif 6563 6564 KMP_MB(); 6565 6566 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 6567 } 6568 6569 void __kmp_serial_initialize(void) { 6570 if (__kmp_init_serial) { 6571 return; 6572 } 6573 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6574 if (__kmp_init_serial) { 6575 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6576 return; 6577 } 6578 __kmp_do_serial_initialize(); 6579 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6580 } 6581 6582 static void __kmp_do_middle_initialize(void) { 6583 int i, j; 6584 int prev_dflt_team_nth; 6585 6586 if (!__kmp_init_serial) { 6587 __kmp_do_serial_initialize(); 6588 } 6589 6590 KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 6591 6592 // Save the previous value for the __kmp_dflt_team_nth so that 6593 // we can avoid some reinitialization if it hasn't changed. 6594 prev_dflt_team_nth = __kmp_dflt_team_nth; 6595 6596 #if KMP_AFFINITY_SUPPORTED 6597 // __kmp_affinity_initialize() will try to set __kmp_ncores to the 6598 // number of cores on the machine. 6599 __kmp_affinity_initialize(); 6600 6601 // Run through the __kmp_threads array and set the affinity mask 6602 // for each root thread that is currently registered with the RTL. 6603 for (i = 0; i < __kmp_threads_capacity; i++) { 6604 if (TCR_PTR(__kmp_threads[i]) != NULL) { 6605 __kmp_affinity_set_init_mask(i, TRUE); 6606 } 6607 } 6608 #endif /* KMP_AFFINITY_SUPPORTED */ 6609 6610 KMP_ASSERT(__kmp_xproc > 0); 6611 if (__kmp_avail_proc == 0) { 6612 __kmp_avail_proc = __kmp_xproc; 6613 } 6614 6615 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 6616 // correct them now 6617 j = 0; 6618 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 6619 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 6620 __kmp_avail_proc; 6621 j++; 6622 } 6623 6624 if (__kmp_dflt_team_nth == 0) { 6625 #ifdef KMP_DFLT_NTH_CORES 6626 // Default #threads = #cores 6627 __kmp_dflt_team_nth = __kmp_ncores; 6628 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6629 "__kmp_ncores (%d)\n", 6630 __kmp_dflt_team_nth)); 6631 #else 6632 // Default #threads = #available OS procs 6633 __kmp_dflt_team_nth = __kmp_avail_proc; 6634 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6635 "__kmp_avail_proc(%d)\n", 6636 __kmp_dflt_team_nth)); 6637 #endif /* KMP_DFLT_NTH_CORES */ 6638 } 6639 6640 if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 6641 __kmp_dflt_team_nth = KMP_MIN_NTH; 6642 } 6643 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 6644 __kmp_dflt_team_nth = __kmp_sys_max_nth; 6645 } 6646 6647 // There's no harm in continuing if the following check fails, 6648 // but it indicates an error in the previous logic. 6649 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 6650 6651 if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 6652 // Run through the __kmp_threads array and set the num threads icv for each 6653 // root thread that is currently registered with the RTL (which has not 6654 // already explicitly set its nthreads-var with a call to 6655 // omp_set_num_threads()). 6656 for (i = 0; i < __kmp_threads_capacity; i++) { 6657 kmp_info_t *thread = __kmp_threads[i]; 6658 if (thread == NULL) 6659 continue; 6660 if (thread->th.th_current_task->td_icvs.nproc != 0) 6661 continue; 6662 6663 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 6664 } 6665 } 6666 KA_TRACE( 6667 20, 6668 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 6669 __kmp_dflt_team_nth)); 6670 6671 #ifdef KMP_ADJUST_BLOCKTIME 6672 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 6673 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 6674 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 6675 if (__kmp_nth > __kmp_avail_proc) { 6676 __kmp_zero_bt = TRUE; 6677 } 6678 } 6679 #endif /* KMP_ADJUST_BLOCKTIME */ 6680 6681 /* we have finished middle initialization */ 6682 TCW_SYNC_4(__kmp_init_middle, TRUE); 6683 6684 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 6685 } 6686 6687 void __kmp_middle_initialize(void) { 6688 if (__kmp_init_middle) { 6689 return; 6690 } 6691 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6692 if (__kmp_init_middle) { 6693 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6694 return; 6695 } 6696 __kmp_do_middle_initialize(); 6697 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6698 } 6699 6700 void __kmp_parallel_initialize(void) { 6701 int gtid = __kmp_entry_gtid(); // this might be a new root 6702 6703 /* synchronize parallel initialization (for sibling) */ 6704 if (TCR_4(__kmp_init_parallel)) 6705 return; 6706 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6707 if (TCR_4(__kmp_init_parallel)) { 6708 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6709 return; 6710 } 6711 6712 /* TODO reinitialization after we have already shut down */ 6713 if (TCR_4(__kmp_global.g.g_done)) { 6714 KA_TRACE( 6715 10, 6716 ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 6717 __kmp_infinite_loop(); 6718 } 6719 6720 /* jc: The lock __kmp_initz_lock is already held, so calling 6721 __kmp_serial_initialize would cause a deadlock. So we call 6722 __kmp_do_serial_initialize directly. */ 6723 if (!__kmp_init_middle) { 6724 __kmp_do_middle_initialize(); 6725 } 6726 6727 /* begin initialization */ 6728 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 6729 KMP_ASSERT(KMP_UBER_GTID(gtid)); 6730 6731 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6732 // Save the FP control regs. 6733 // Worker threads will set theirs to these values at thread startup. 6734 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 6735 __kmp_store_mxcsr(&__kmp_init_mxcsr); 6736 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 6737 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 6738 6739 #if KMP_OS_UNIX 6740 #if KMP_HANDLE_SIGNALS 6741 /* must be after __kmp_serial_initialize */ 6742 __kmp_install_signals(TRUE); 6743 #endif 6744 #endif 6745 6746 __kmp_suspend_initialize(); 6747 6748 #if defined(USE_LOAD_BALANCE) 6749 if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 6750 __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 6751 } 6752 #else 6753 if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 6754 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 6755 } 6756 #endif 6757 6758 if (__kmp_version) { 6759 __kmp_print_version_2(); 6760 } 6761 6762 /* we have finished parallel initialization */ 6763 TCW_SYNC_4(__kmp_init_parallel, TRUE); 6764 6765 KMP_MB(); 6766 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 6767 6768 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6769 } 6770 6771 /* ------------------------------------------------------------------------ */ 6772 6773 void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 6774 kmp_team_t *team) { 6775 kmp_disp_t *dispatch; 6776 6777 KMP_MB(); 6778 6779 /* none of the threads have encountered any constructs, yet. */ 6780 this_thr->th.th_local.this_construct = 0; 6781 #if KMP_CACHE_MANAGE 6782 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 6783 #endif /* KMP_CACHE_MANAGE */ 6784 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 6785 KMP_DEBUG_ASSERT(dispatch); 6786 KMP_DEBUG_ASSERT(team->t.t_dispatch); 6787 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 6788 // this_thr->th.th_info.ds.ds_tid ] ); 6789 6790 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 6791 #if OMP_45_ENABLED 6792 dispatch->th_doacross_buf_idx = 6793 0; /* reset the doacross dispatch buffer counter */ 6794 #endif 6795 if (__kmp_env_consistency_check) 6796 __kmp_push_parallel(gtid, team->t.t_ident); 6797 6798 KMP_MB(); /* Flush all pending memory write invalidates. */ 6799 } 6800 6801 void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 6802 kmp_team_t *team) { 6803 if (__kmp_env_consistency_check) 6804 __kmp_pop_parallel(gtid, team->t.t_ident); 6805 6806 __kmp_finish_implicit_task(this_thr); 6807 } 6808 6809 int __kmp_invoke_task_func(int gtid) { 6810 int rc; 6811 int tid = __kmp_tid_from_gtid(gtid); 6812 kmp_info_t *this_thr = __kmp_threads[gtid]; 6813 kmp_team_t *team = this_thr->th.th_team; 6814 6815 __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 6816 #if USE_ITT_BUILD 6817 if (__itt_stack_caller_create_ptr) { 6818 __kmp_itt_stack_callee_enter( 6819 (__itt_caller) 6820 team->t.t_stack_id); // inform ittnotify about entering user's code 6821 } 6822 #endif /* USE_ITT_BUILD */ 6823 #if INCLUDE_SSC_MARKS 6824 SSC_MARK_INVOKING(); 6825 #endif 6826 6827 #if OMPT_SUPPORT 6828 void *dummy; 6829 void **exit_runtime_p; 6830 ompt_task_id_t my_task_id; 6831 ompt_parallel_id_t my_parallel_id; 6832 6833 if (ompt_enabled) { 6834 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid] 6835 .ompt_task_info.frame.exit_runtime_frame); 6836 } else { 6837 exit_runtime_p = &dummy; 6838 } 6839 6840 #if OMPT_TRACE 6841 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; 6842 my_parallel_id = team->t.ompt_team_info.parallel_id; 6843 if (ompt_enabled && 6844 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 6845 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id, 6846 my_task_id); 6847 } 6848 #endif 6849 #endif 6850 6851 { 6852 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 6853 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 6854 rc = 6855 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 6856 tid, (int)team->t.t_argc, (void **)team->t.t_argv 6857 #if OMPT_SUPPORT 6858 , 6859 exit_runtime_p 6860 #endif 6861 ); 6862 #if OMPT_SUPPORT 6863 *exit_runtime_p = NULL; 6864 #endif 6865 } 6866 6867 #if USE_ITT_BUILD 6868 if (__itt_stack_caller_create_ptr) { 6869 __kmp_itt_stack_callee_leave( 6870 (__itt_caller) 6871 team->t.t_stack_id); // inform ittnotify about leaving user's code 6872 } 6873 #endif /* USE_ITT_BUILD */ 6874 __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 6875 6876 return rc; 6877 } 6878 6879 #if OMP_40_ENABLED 6880 void __kmp_teams_master(int gtid) { 6881 // This routine is called by all master threads in teams construct 6882 kmp_info_t *thr = __kmp_threads[gtid]; 6883 kmp_team_t *team = thr->th.th_team; 6884 ident_t *loc = team->t.t_ident; 6885 thr->th.th_set_nproc = thr->th.th_teams_size.nth; 6886 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 6887 KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 6888 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 6889 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 6890 // Launch league of teams now, but not let workers execute 6891 // (they hang on fork barrier until next parallel) 6892 #if INCLUDE_SSC_MARKS 6893 SSC_MARK_FORKING(); 6894 #endif 6895 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 6896 #if OMPT_SUPPORT 6897 (void *)thr->th.th_teams_microtask, // "unwrapped" task 6898 #endif 6899 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 6900 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 6901 #if INCLUDE_SSC_MARKS 6902 SSC_MARK_JOINING(); 6903 #endif 6904 6905 // AC: last parameter "1" eliminates join barrier which won't work because 6906 // worker threads are in a fork barrier waiting for more parallel regions 6907 __kmp_join_call(loc, gtid 6908 #if OMPT_SUPPORT 6909 , 6910 fork_context_intel 6911 #endif 6912 , 6913 1); 6914 } 6915 6916 int __kmp_invoke_teams_master(int gtid) { 6917 kmp_info_t *this_thr = __kmp_threads[gtid]; 6918 kmp_team_t *team = this_thr->th.th_team; 6919 #if KMP_DEBUG 6920 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 6921 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 6922 (void *)__kmp_teams_master); 6923 #endif 6924 __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 6925 __kmp_teams_master(gtid); 6926 __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 6927 return 1; 6928 } 6929 #endif /* OMP_40_ENABLED */ 6930 6931 /* this sets the requested number of threads for the next parallel region 6932 encountered by this team. since this should be enclosed in the forkjoin 6933 critical section it should avoid race conditions with assymmetrical nested 6934 parallelism */ 6935 6936 void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 6937 kmp_info_t *thr = __kmp_threads[gtid]; 6938 6939 if (num_threads > 0) 6940 thr->th.th_set_nproc = num_threads; 6941 } 6942 6943 #if OMP_40_ENABLED 6944 6945 /* this sets the requested number of teams for the teams region and/or 6946 the number of threads for the next parallel region encountered */ 6947 void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 6948 int num_threads) { 6949 kmp_info_t *thr = __kmp_threads[gtid]; 6950 KMP_DEBUG_ASSERT(num_teams >= 0); 6951 KMP_DEBUG_ASSERT(num_threads >= 0); 6952 6953 if (num_teams == 0) 6954 num_teams = 1; // default number of teams is 1. 6955 if (num_teams > __kmp_max_nth) { // if too many teams requested? 6956 if (!__kmp_reserve_warn) { 6957 __kmp_reserve_warn = 1; 6958 __kmp_msg(kmp_ms_warning, 6959 KMP_MSG(CantFormThrTeam, num_teams, __kmp_max_nth), 6960 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 6961 } 6962 num_teams = __kmp_max_nth; 6963 } 6964 // Set number of teams (number of threads in the outer "parallel" of the 6965 // teams) 6966 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 6967 6968 // Remember the number of threads for inner parallel regions 6969 if (num_threads == 0) { 6970 if (!TCR_4(__kmp_init_middle)) 6971 __kmp_middle_initialize(); // get __kmp_avail_proc calculated 6972 num_threads = __kmp_avail_proc / num_teams; 6973 if (num_teams * num_threads > __kmp_max_nth) { 6974 // adjust num_threads w/o warning as it is not user setting 6975 num_threads = __kmp_max_nth / num_teams; 6976 } 6977 } else { 6978 if (num_teams * num_threads > __kmp_max_nth) { 6979 int new_threads = __kmp_max_nth / num_teams; 6980 if (!__kmp_reserve_warn) { // user asked for too many threads 6981 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT 6982 __kmp_msg(kmp_ms_warning, 6983 KMP_MSG(CantFormThrTeam, num_threads, new_threads), 6984 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 6985 } 6986 num_threads = new_threads; 6987 } 6988 } 6989 thr->th.th_teams_size.nth = num_threads; 6990 } 6991 6992 // Set the proc_bind var to use in the following parallel region. 6993 void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 6994 kmp_info_t *thr = __kmp_threads[gtid]; 6995 thr->th.th_set_proc_bind = proc_bind; 6996 } 6997 6998 #endif /* OMP_40_ENABLED */ 6999 7000 /* Launch the worker threads into the microtask. */ 7001 7002 void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 7003 kmp_info_t *this_thr = __kmp_threads[gtid]; 7004 7005 #ifdef KMP_DEBUG 7006 int f; 7007 #endif /* KMP_DEBUG */ 7008 7009 KMP_DEBUG_ASSERT(team); 7010 KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 7011 KMP_ASSERT(KMP_MASTER_GTID(gtid)); 7012 KMP_MB(); /* Flush all pending memory write invalidates. */ 7013 7014 team->t.t_construct = 0; /* no single directives seen yet */ 7015 team->t.t_ordered.dt.t_value = 7016 0; /* thread 0 enters the ordered section first */ 7017 7018 /* Reset the identifiers on the dispatch buffer */ 7019 KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 7020 if (team->t.t_max_nproc > 1) { 7021 int i; 7022 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 7023 team->t.t_disp_buffer[i].buffer_index = i; 7024 #if OMP_45_ENABLED 7025 team->t.t_disp_buffer[i].doacross_buf_idx = i; 7026 #endif 7027 } 7028 } else { 7029 team->t.t_disp_buffer[0].buffer_index = 0; 7030 #if OMP_45_ENABLED 7031 team->t.t_disp_buffer[0].doacross_buf_idx = 0; 7032 #endif 7033 } 7034 7035 KMP_MB(); /* Flush all pending memory write invalidates. */ 7036 KMP_ASSERT(this_thr->th.th_team == team); 7037 7038 #ifdef KMP_DEBUG 7039 for (f = 0; f < team->t.t_nproc; f++) { 7040 KMP_DEBUG_ASSERT(team->t.t_threads[f] && 7041 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 7042 } 7043 #endif /* KMP_DEBUG */ 7044 7045 /* release the worker threads so they may begin working */ 7046 __kmp_fork_barrier(gtid, 0); 7047 } 7048 7049 void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 7050 kmp_info_t *this_thr = __kmp_threads[gtid]; 7051 7052 KMP_DEBUG_ASSERT(team); 7053 KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 7054 KMP_ASSERT(KMP_MASTER_GTID(gtid)); 7055 KMP_MB(); /* Flush all pending memory write invalidates. */ 7056 7057 /* Join barrier after fork */ 7058 7059 #ifdef KMP_DEBUG 7060 if (__kmp_threads[gtid] && 7061 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 7062 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 7063 __kmp_threads[gtid]); 7064 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7065 "team->t.t_nproc=%d\n", 7066 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 7067 team->t.t_nproc); 7068 __kmp_print_structure(); 7069 } 7070 KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 7071 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 7072 #endif /* KMP_DEBUG */ 7073 7074 __kmp_join_barrier(gtid); /* wait for everyone */ 7075 7076 KMP_MB(); /* Flush all pending memory write invalidates. */ 7077 KMP_ASSERT(this_thr->th.th_team == team); 7078 } 7079 7080 /* ------------------------------------------------------------------------ */ 7081 7082 #ifdef USE_LOAD_BALANCE 7083 7084 // Return the worker threads actively spinning in the hot team, if we 7085 // are at the outermost level of parallelism. Otherwise, return 0. 7086 static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 7087 int i; 7088 int retval; 7089 kmp_team_t *hot_team; 7090 7091 if (root->r.r_active) { 7092 return 0; 7093 } 7094 hot_team = root->r.r_hot_team; 7095 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 7096 return hot_team->t.t_nproc - 1; // Don't count master thread 7097 } 7098 7099 // Skip the master thread - it is accounted for elsewhere. 7100 retval = 0; 7101 for (i = 1; i < hot_team->t.t_nproc; i++) { 7102 if (hot_team->t.t_threads[i]->th.th_active) { 7103 retval++; 7104 } 7105 } 7106 return retval; 7107 } 7108 7109 // Perform an automatic adjustment to the number of 7110 // threads used by the next parallel region. 7111 static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 7112 int retval; 7113 int pool_active; 7114 int hot_team_active; 7115 int team_curr_active; 7116 int system_active; 7117 7118 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 7119 set_nproc)); 7120 KMP_DEBUG_ASSERT(root); 7121 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 7122 ->th.th_current_task->td_icvs.dynamic == TRUE); 7123 KMP_DEBUG_ASSERT(set_nproc > 1); 7124 7125 if (set_nproc == 1) { 7126 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 7127 return 1; 7128 } 7129 7130 // Threads that are active in the thread pool, active in the hot team for this 7131 // particular root (if we are at the outer par level), and the currently 7132 // executing thread (to become the master) are available to add to the new 7133 // team, but are currently contributing to the system load, and must be 7134 // accounted for. 7135 pool_active = TCR_4(__kmp_thread_pool_active_nth); 7136 hot_team_active = __kmp_active_hot_team_nproc(root); 7137 team_curr_active = pool_active + hot_team_active + 1; 7138 7139 // Check the system load. 7140 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 7141 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 7142 "hot team active = %d\n", 7143 system_active, pool_active, hot_team_active)); 7144 7145 if (system_active < 0) { 7146 // There was an error reading the necessary info from /proc, so use the 7147 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 7148 // = dynamic_thread_limit, we shouldn't wind up getting back here. 7149 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 7150 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 7151 7152 // Make this call behave like the thread limit algorithm. 7153 retval = __kmp_avail_proc - __kmp_nth + 7154 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 7155 if (retval > set_nproc) { 7156 retval = set_nproc; 7157 } 7158 if (retval < KMP_MIN_NTH) { 7159 retval = KMP_MIN_NTH; 7160 } 7161 7162 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 7163 retval)); 7164 return retval; 7165 } 7166 7167 // There is a slight delay in the load balance algorithm in detecting new 7168 // running procs. The real system load at this instant should be at least as 7169 // large as the #active omp thread that are available to add to the team. 7170 if (system_active < team_curr_active) { 7171 system_active = team_curr_active; 7172 } 7173 retval = __kmp_avail_proc - system_active + team_curr_active; 7174 if (retval > set_nproc) { 7175 retval = set_nproc; 7176 } 7177 if (retval < KMP_MIN_NTH) { 7178 retval = KMP_MIN_NTH; 7179 } 7180 7181 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 7182 return retval; 7183 } // __kmp_load_balance_nproc() 7184 7185 #endif /* USE_LOAD_BALANCE */ 7186 7187 /* ------------------------------------------------------------------------ */ 7188 7189 /* NOTE: this is called with the __kmp_init_lock held */ 7190 void __kmp_cleanup(void) { 7191 int f; 7192 7193 KA_TRACE(10, ("__kmp_cleanup: enter\n")); 7194 7195 if (TCR_4(__kmp_init_parallel)) { 7196 #if KMP_HANDLE_SIGNALS 7197 __kmp_remove_signals(); 7198 #endif 7199 TCW_4(__kmp_init_parallel, FALSE); 7200 } 7201 7202 if (TCR_4(__kmp_init_middle)) { 7203 #if KMP_AFFINITY_SUPPORTED 7204 __kmp_affinity_uninitialize(); 7205 #endif /* KMP_AFFINITY_SUPPORTED */ 7206 __kmp_cleanup_hierarchy(); 7207 TCW_4(__kmp_init_middle, FALSE); 7208 } 7209 7210 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 7211 7212 if (__kmp_init_serial) { 7213 __kmp_runtime_destroy(); 7214 __kmp_init_serial = FALSE; 7215 } 7216 7217 for (f = 0; f < __kmp_threads_capacity; f++) { 7218 if (__kmp_root[f] != NULL) { 7219 __kmp_free(__kmp_root[f]); 7220 __kmp_root[f] = NULL; 7221 } 7222 } 7223 __kmp_free(__kmp_threads); 7224 // __kmp_threads and __kmp_root were allocated at once, as single block, so 7225 // there is no need in freeing __kmp_root. 7226 __kmp_threads = NULL; 7227 __kmp_root = NULL; 7228 __kmp_threads_capacity = 0; 7229 7230 #if KMP_USE_DYNAMIC_LOCK 7231 __kmp_cleanup_indirect_user_locks(); 7232 #else 7233 __kmp_cleanup_user_locks(); 7234 #endif 7235 7236 #if KMP_AFFINITY_SUPPORTED 7237 KMP_INTERNAL_FREE((void *)__kmp_cpuinfo_file); 7238 __kmp_cpuinfo_file = NULL; 7239 #endif /* KMP_AFFINITY_SUPPORTED */ 7240 7241 #if KMP_USE_ADAPTIVE_LOCKS 7242 #if KMP_DEBUG_ADAPTIVE_LOCKS 7243 __kmp_print_speculative_stats(); 7244 #endif 7245 #endif 7246 KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 7247 __kmp_nested_nth.nth = NULL; 7248 __kmp_nested_nth.size = 0; 7249 __kmp_nested_nth.used = 0; 7250 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 7251 __kmp_nested_proc_bind.bind_types = NULL; 7252 __kmp_nested_proc_bind.size = 0; 7253 __kmp_nested_proc_bind.used = 0; 7254 7255 __kmp_i18n_catclose(); 7256 7257 #if KMP_STATS_ENABLED 7258 __kmp_stats_fini(); 7259 #endif 7260 7261 KA_TRACE(10, ("__kmp_cleanup: exit\n")); 7262 } 7263 7264 /* ------------------------------------------------------------------------ */ 7265 7266 int __kmp_ignore_mppbeg(void) { 7267 char *env; 7268 7269 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 7270 if (__kmp_str_match_false(env)) 7271 return FALSE; 7272 } 7273 // By default __kmpc_begin() is no-op. 7274 return TRUE; 7275 } 7276 7277 int __kmp_ignore_mppend(void) { 7278 char *env; 7279 7280 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 7281 if (__kmp_str_match_false(env)) 7282 return FALSE; 7283 } 7284 // By default __kmpc_end() is no-op. 7285 return TRUE; 7286 } 7287 7288 void __kmp_internal_begin(void) { 7289 int gtid; 7290 kmp_root_t *root; 7291 7292 /* this is a very important step as it will register new sibling threads 7293 and assign these new uber threads a new gtid */ 7294 gtid = __kmp_entry_gtid(); 7295 root = __kmp_threads[gtid]->th.th_root; 7296 KMP_ASSERT(KMP_UBER_GTID(gtid)); 7297 7298 if (root->r.r_begin) 7299 return; 7300 __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 7301 if (root->r.r_begin) { 7302 __kmp_release_lock(&root->r.r_begin_lock, gtid); 7303 return; 7304 } 7305 7306 root->r.r_begin = TRUE; 7307 7308 __kmp_release_lock(&root->r.r_begin_lock, gtid); 7309 } 7310 7311 /* ------------------------------------------------------------------------ */ 7312 7313 void __kmp_user_set_library(enum library_type arg) { 7314 int gtid; 7315 kmp_root_t *root; 7316 kmp_info_t *thread; 7317 7318 /* first, make sure we are initialized so we can get our gtid */ 7319 7320 gtid = __kmp_entry_gtid(); 7321 thread = __kmp_threads[gtid]; 7322 7323 root = thread->th.th_root; 7324 7325 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 7326 library_serial)); 7327 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 7328 thread */ 7329 KMP_WARNING(SetLibraryIncorrectCall); 7330 return; 7331 } 7332 7333 switch (arg) { 7334 case library_serial: 7335 thread->th.th_set_nproc = 0; 7336 set__nproc(thread, 1); 7337 break; 7338 case library_turnaround: 7339 thread->th.th_set_nproc = 0; 7340 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 7341 : __kmp_dflt_team_nth_ub); 7342 break; 7343 case library_throughput: 7344 thread->th.th_set_nproc = 0; 7345 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 7346 : __kmp_dflt_team_nth_ub); 7347 break; 7348 default: 7349 KMP_FATAL(UnknownLibraryType, arg); 7350 } 7351 7352 __kmp_aux_set_library(arg); 7353 } 7354 7355 void __kmp_aux_set_stacksize(size_t arg) { 7356 if (!__kmp_init_serial) 7357 __kmp_serial_initialize(); 7358 7359 #if KMP_OS_DARWIN 7360 if (arg & (0x1000 - 1)) { 7361 arg &= ~(0x1000 - 1); 7362 if (arg + 0x1000) /* check for overflow if we round up */ 7363 arg += 0x1000; 7364 } 7365 #endif 7366 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 7367 7368 /* only change the default stacksize before the first parallel region */ 7369 if (!TCR_4(__kmp_init_parallel)) { 7370 size_t value = arg; /* argument is in bytes */ 7371 7372 if (value < __kmp_sys_min_stksize) 7373 value = __kmp_sys_min_stksize; 7374 else if (value > KMP_MAX_STKSIZE) 7375 value = KMP_MAX_STKSIZE; 7376 7377 __kmp_stksize = value; 7378 7379 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 7380 } 7381 7382 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7383 } 7384 7385 /* set the behaviour of the runtime library */ 7386 /* TODO this can cause some odd behaviour with sibling parallelism... */ 7387 void __kmp_aux_set_library(enum library_type arg) { 7388 __kmp_library = arg; 7389 7390 switch (__kmp_library) { 7391 case library_serial: { 7392 KMP_INFORM(LibraryIsSerial); 7393 (void)__kmp_change_library(TRUE); 7394 } break; 7395 case library_turnaround: 7396 (void)__kmp_change_library(TRUE); 7397 break; 7398 case library_throughput: 7399 (void)__kmp_change_library(FALSE); 7400 break; 7401 default: 7402 KMP_FATAL(UnknownLibraryType, arg); 7403 } 7404 } 7405 7406 /* ------------------------------------------------------------------------ */ 7407 7408 void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 7409 int blocktime = arg; /* argument is in milliseconds */ 7410 #if KMP_USE_MONITOR 7411 int bt_intervals; 7412 #endif 7413 int bt_set; 7414 7415 __kmp_save_internal_controls(thread); 7416 7417 /* Normalize and set blocktime for the teams */ 7418 if (blocktime < KMP_MIN_BLOCKTIME) 7419 blocktime = KMP_MIN_BLOCKTIME; 7420 else if (blocktime > KMP_MAX_BLOCKTIME) 7421 blocktime = KMP_MAX_BLOCKTIME; 7422 7423 set__blocktime_team(thread->th.th_team, tid, blocktime); 7424 set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 7425 7426 #if KMP_USE_MONITOR 7427 /* Calculate and set blocktime intervals for the teams */ 7428 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 7429 7430 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 7431 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 7432 #endif 7433 7434 /* Set whether blocktime has been set to "TRUE" */ 7435 bt_set = TRUE; 7436 7437 set__bt_set_team(thread->th.th_team, tid, bt_set); 7438 set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 7439 #if KMP_USE_MONITOR 7440 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7441 "bt_intervals=%d, monitor_updates=%d\n", 7442 __kmp_gtid_from_tid(tid, thread->th.th_team), 7443 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 7444 __kmp_monitor_wakeups)); 7445 #else 7446 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 7447 __kmp_gtid_from_tid(tid, thread->th.th_team), 7448 thread->th.th_team->t.t_id, tid, blocktime)); 7449 #endif 7450 } 7451 7452 void __kmp_aux_set_defaults(char const *str, int len) { 7453 if (!__kmp_init_serial) { 7454 __kmp_serial_initialize(); 7455 }; 7456 __kmp_env_initialize(str); 7457 7458 if (__kmp_settings 7459 #if OMP_40_ENABLED 7460 || __kmp_display_env || __kmp_display_env_verbose 7461 #endif // OMP_40_ENABLED 7462 ) { 7463 __kmp_env_print(); 7464 } 7465 } // __kmp_aux_set_defaults 7466 7467 /* ------------------------------------------------------------------------ */ 7468 /* internal fast reduction routines */ 7469 7470 PACKED_REDUCTION_METHOD_T 7471 __kmp_determine_reduction_method( 7472 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 7473 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 7474 kmp_critical_name *lck) { 7475 7476 // Default reduction method: critical construct ( lck != NULL, like in current 7477 // PAROPT ) 7478 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 7479 // can be selected by RTL 7480 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 7481 // can be selected by RTL 7482 // Finally, it's up to OpenMP RTL to make a decision on which method to select 7483 // among generated by PAROPT. 7484 7485 PACKED_REDUCTION_METHOD_T retval; 7486 7487 int team_size; 7488 7489 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) 7490 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 7491 7492 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7493 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7494 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7495 7496 retval = critical_reduce_block; 7497 7498 // another choice of getting a team size (with 1 dynamic deference) is slower 7499 team_size = __kmp_get_team_num_threads(global_tid); 7500 if (team_size == 1) { 7501 7502 retval = empty_reduce_block; 7503 7504 } else { 7505 7506 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7507 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7508 7509 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7510 7511 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7512 KMP_OS_DARWIN 7513 7514 int teamsize_cutoff = 4; 7515 7516 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7517 if (__kmp_mic_type != non_mic) { 7518 teamsize_cutoff = 8; 7519 } 7520 #endif 7521 if (tree_available) { 7522 if (team_size <= teamsize_cutoff) { 7523 if (atomic_available) { 7524 retval = atomic_reduce_block; 7525 } 7526 } else { 7527 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7528 } 7529 } else if (atomic_available) { 7530 retval = atomic_reduce_block; 7531 } 7532 #else 7533 #error "Unknown or unsupported OS" 7534 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7535 // KMP_OS_DARWIN 7536 7537 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7538 7539 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7540 7541 // basic tuning 7542 7543 if (atomic_available) { 7544 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 7545 retval = atomic_reduce_block; 7546 } 7547 } // otherwise: use critical section 7548 7549 #elif KMP_OS_DARWIN 7550 7551 if (atomic_available && (num_vars <= 3)) { 7552 retval = atomic_reduce_block; 7553 } else if (tree_available) { 7554 if ((reduce_size > (9 * sizeof(kmp_real64))) && 7555 (reduce_size < (2000 * sizeof(kmp_real64)))) { 7556 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 7557 } 7558 } // otherwise: use critical section 7559 7560 #else 7561 #error "Unknown or unsupported OS" 7562 #endif 7563 7564 #else 7565 #error "Unknown or unsupported architecture" 7566 #endif 7567 } 7568 7569 // KMP_FORCE_REDUCTION 7570 7571 // If the team is serialized (team_size == 1), ignore the forced reduction 7572 // method and stay with the unsynchronized method (empty_reduce_block) 7573 if (__kmp_force_reduction_method != reduction_method_not_defined && 7574 team_size != 1) { 7575 7576 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 7577 7578 int atomic_available, tree_available; 7579 7580 switch ((forced_retval = __kmp_force_reduction_method)) { 7581 case critical_reduce_block: 7582 KMP_ASSERT(lck); // lck should be != 0 7583 break; 7584 7585 case atomic_reduce_block: 7586 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7587 if (!atomic_available) { 7588 KMP_WARNING(RedMethodNotSupported, "atomic"); 7589 forced_retval = critical_reduce_block; 7590 } 7591 break; 7592 7593 case tree_reduce_block: 7594 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7595 if (!tree_available) { 7596 KMP_WARNING(RedMethodNotSupported, "tree"); 7597 forced_retval = critical_reduce_block; 7598 } else { 7599 #if KMP_FAST_REDUCTION_BARRIER 7600 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7601 #endif 7602 } 7603 break; 7604 7605 default: 7606 KMP_ASSERT(0); // "unsupported method specified" 7607 } 7608 7609 retval = forced_retval; 7610 } 7611 7612 KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 7613 7614 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7615 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7616 7617 return (retval); 7618 } 7619 7620 // this function is for testing set/get/determine reduce method 7621 kmp_int32 __kmp_get_reduce_method(void) { 7622 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 7623 } 7624