1 /* 2 * kmp_runtime.cpp -- KPTS runtime support library 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp.h" 17 #include "kmp_affinity.h" 18 #include "kmp_atomic.h" 19 #include "kmp_environment.h" 20 #include "kmp_error.h" 21 #include "kmp_i18n.h" 22 #include "kmp_io.h" 23 #include "kmp_itt.h" 24 #include "kmp_settings.h" 25 #include "kmp_stats.h" 26 #include "kmp_str.h" 27 #include "kmp_wait_release.h" 28 #include "kmp_wrapper_getpid.h" 29 30 #if OMPT_SUPPORT 31 #include "ompt-specific.h" 32 #endif 33 34 /* these are temporary issues to be dealt with */ 35 #define KMP_USE_PRCTL 0 36 37 #if KMP_OS_WINDOWS 38 #include <process.h> 39 #endif 40 41 #include "tsan_annotations.h" 42 43 #if defined(KMP_GOMP_COMPAT) 44 char const __kmp_version_alt_comp[] = 45 KMP_VERSION_PREFIX "alternative compiler support: yes"; 46 #endif /* defined(KMP_GOMP_COMPAT) */ 47 48 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " 49 #if OMP_50_ENABLED 50 "5.0 (201611)"; 51 #elif OMP_45_ENABLED 52 "4.5 (201511)"; 53 #elif OMP_40_ENABLED 54 "4.0 (201307)"; 55 #else 56 "3.1 (201107)"; 57 #endif 58 59 #ifdef KMP_DEBUG 60 char const __kmp_version_lock[] = 61 KMP_VERSION_PREFIX "lock type: run time selectable"; 62 #endif /* KMP_DEBUG */ 63 64 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 65 66 /* ------------------------------------------------------------------------ */ 67 68 kmp_info_t __kmp_monitor; 69 70 /* Forward declarations */ 71 72 void __kmp_cleanup(void); 73 74 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, 75 int gtid); 76 static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 77 kmp_internal_control_t *new_icvs, 78 ident_t *loc); 79 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 80 static void __kmp_partition_places(kmp_team_t *team, 81 int update_master_only = 0); 82 #endif 83 static void __kmp_do_serial_initialize(void); 84 void __kmp_fork_barrier(int gtid, int tid); 85 void __kmp_join_barrier(int gtid); 86 void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, 87 kmp_internal_control_t *new_icvs, ident_t *loc); 88 89 #ifdef USE_LOAD_BALANCE 90 static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); 91 #endif 92 93 static int __kmp_expand_threads(int nWish, int nNeed); 94 #if KMP_OS_WINDOWS 95 static int __kmp_unregister_root_other_thread(int gtid); 96 #endif 97 static void __kmp_unregister_library(void); // called by __kmp_internal_end() 98 static void __kmp_reap_thread(kmp_info_t *thread, int is_root); 99 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 100 101 /* Calculate the identifier of the current thread */ 102 /* fast (and somewhat portable) way to get unique identifier of executing 103 thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ 104 105 int __kmp_get_global_thread_id() { 106 int i; 107 kmp_info_t **other_threads; 108 size_t stack_data; 109 char *stack_addr; 110 size_t stack_size; 111 char *stack_base; 112 113 KA_TRACE( 114 1000, 115 ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 116 __kmp_nth, __kmp_all_nth)); 117 118 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to 119 a parallel region, made it return KMP_GTID_DNE to force serial_initialize 120 by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 121 __kmp_init_gtid for this to work. */ 122 123 if (!TCR_4(__kmp_init_gtid)) 124 return KMP_GTID_DNE; 125 126 #ifdef KMP_TDATA_GTID 127 if (TCR_4(__kmp_gtid_mode) >= 3) { 128 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); 129 return __kmp_gtid; 130 } 131 #endif 132 if (TCR_4(__kmp_gtid_mode) >= 2) { 133 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); 134 return __kmp_gtid_get_specific(); 135 } 136 KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); 137 138 stack_addr = (char *)&stack_data; 139 other_threads = __kmp_threads; 140 141 /* ATT: The code below is a source of potential bugs due to unsynchronized 142 access to __kmp_threads array. For example: 143 1. Current thread loads other_threads[i] to thr and checks it, it is 144 non-NULL. 145 2. Current thread is suspended by OS. 146 3. Another thread unregisters and finishes (debug versions of free() 147 may fill memory with something like 0xEF). 148 4. Current thread is resumed. 149 5. Current thread reads junk from *thr. 150 TODO: Fix it. --ln */ 151 152 for (i = 0; i < __kmp_threads_capacity; i++) { 153 154 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 155 if (!thr) 156 continue; 157 158 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 159 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 160 161 /* stack grows down -- search through all of the active threads */ 162 163 if (stack_addr <= stack_base) { 164 size_t stack_diff = stack_base - stack_addr; 165 166 if (stack_diff <= stack_size) { 167 /* The only way we can be closer than the allocated */ 168 /* stack size is if we are running on this thread. */ 169 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); 170 return i; 171 } 172 } 173 } 174 175 /* get specific to try and determine our gtid */ 176 KA_TRACE(1000, 177 ("*** __kmp_get_global_thread_id: internal alg. failed to find " 178 "thread, using TLS\n")); 179 i = __kmp_gtid_get_specific(); 180 181 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 182 183 /* if we havn't been assigned a gtid, then return code */ 184 if (i < 0) 185 return i; 186 187 /* dynamically updated stack window for uber threads to avoid get_specific 188 call */ 189 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { 190 KMP_FATAL(StackOverflow, i); 191 } 192 193 stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 194 if (stack_addr > stack_base) { 195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 196 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 197 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - 198 stack_base); 199 } else { 200 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 201 stack_base - stack_addr); 202 } 203 204 /* Reprint stack bounds for ubermaster since they have been refined */ 205 if (__kmp_storage_map) { 206 char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; 207 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 208 __kmp_print_storage_map_gtid(i, stack_beg, stack_end, 209 other_threads[i]->th.th_info.ds.ds_stacksize, 210 "th_%d stack (refinement)", i); 211 } 212 return i; 213 } 214 215 int __kmp_get_global_thread_id_reg() { 216 int gtid; 217 218 if (!__kmp_init_serial) { 219 gtid = KMP_GTID_DNE; 220 } else 221 #ifdef KMP_TDATA_GTID 222 if (TCR_4(__kmp_gtid_mode) >= 3) { 223 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); 224 gtid = __kmp_gtid; 225 } else 226 #endif 227 if (TCR_4(__kmp_gtid_mode) >= 2) { 228 KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); 229 gtid = __kmp_gtid_get_specific(); 230 } else { 231 KA_TRACE(1000, 232 ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); 233 gtid = __kmp_get_global_thread_id(); 234 } 235 236 /* we must be a new uber master sibling thread */ 237 if (gtid == KMP_GTID_DNE) { 238 KA_TRACE(10, 239 ("__kmp_get_global_thread_id_reg: Encountered new root thread. " 240 "Registering a new gtid.\n")); 241 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 242 if (!__kmp_init_serial) { 243 __kmp_do_serial_initialize(); 244 gtid = __kmp_gtid_get_specific(); 245 } else { 246 gtid = __kmp_register_root(FALSE); 247 } 248 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 249 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 250 } 251 252 KMP_DEBUG_ASSERT(gtid >= 0); 253 254 return gtid; 255 } 256 257 /* caller must hold forkjoin_lock */ 258 void __kmp_check_stack_overlap(kmp_info_t *th) { 259 int f; 260 char *stack_beg = NULL; 261 char *stack_end = NULL; 262 int gtid; 263 264 KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); 265 if (__kmp_storage_map) { 266 stack_end = (char *)th->th.th_info.ds.ds_stackbase; 267 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 268 269 gtid = __kmp_gtid_from_thread(th); 270 271 if (gtid == KMP_GTID_MONITOR) { 272 __kmp_print_storage_map_gtid( 273 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 274 "th_%s stack (%s)", "mon", 275 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 276 } else { 277 __kmp_print_storage_map_gtid( 278 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 279 "th_%d stack (%s)", gtid, 280 (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); 281 } 282 } 283 284 /* No point in checking ubermaster threads since they use refinement and 285 * cannot overlap */ 286 gtid = __kmp_gtid_from_thread(th); 287 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { 288 KA_TRACE(10, 289 ("__kmp_check_stack_overlap: performing extensive checking\n")); 290 if (stack_beg == NULL) { 291 stack_end = (char *)th->th.th_info.ds.ds_stackbase; 292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 293 } 294 295 for (f = 0; f < __kmp_threads_capacity; f++) { 296 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 297 298 if (f_th && f_th != th) { 299 char *other_stack_end = 300 (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 301 char *other_stack_beg = 302 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 303 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 304 (stack_end > other_stack_beg && stack_end < other_stack_end)) { 305 306 /* Print the other stack values before the abort */ 307 if (__kmp_storage_map) 308 __kmp_print_storage_map_gtid( 309 -1, other_stack_beg, other_stack_end, 310 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 311 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); 312 313 __kmp_msg(kmp_ms_fatal, KMP_MSG(StackOverlap), 314 KMP_HNT(ChangeStackLimit), __kmp_msg_null); 315 } 316 } 317 } 318 } 319 KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); 320 } 321 322 /* ------------------------------------------------------------------------ */ 323 324 void __kmp_infinite_loop(void) { 325 static int done = FALSE; 326 327 while (!done) { 328 KMP_YIELD(1); 329 } 330 } 331 332 #define MAX_MESSAGE 512 333 334 void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, 335 char const *format, ...) { 336 char buffer[MAX_MESSAGE]; 337 va_list ap; 338 339 va_start(ap, format); 340 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, 341 p2, (unsigned long)size, format); 342 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 343 __kmp_vprintf(kmp_err, buffer, ap); 344 #if KMP_PRINT_DATA_PLACEMENT 345 int node; 346 if (gtid >= 0) { 347 if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { 348 if (__kmp_storage_map_verbose) { 349 node = __kmp_get_host_node(p1); 350 if (node < 0) /* doesn't work, so don't try this next time */ 351 __kmp_storage_map_verbose = FALSE; 352 else { 353 char *last; 354 int lastNode; 355 int localProc = __kmp_get_cpu_from_gtid(gtid); 356 357 const int page_size = KMP_GET_PAGE_SIZE(); 358 359 p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); 360 p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); 361 if (localProc >= 0) 362 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, 363 localProc >> 1); 364 else 365 __kmp_printf_no_lock(" GTID %d\n", gtid); 366 #if KMP_USE_PRCTL 367 /* The more elaborate format is disabled for now because of the prctl 368 * hanging bug. */ 369 do { 370 last = p1; 371 lastNode = node; 372 /* This loop collates adjacent pages with the same host node. */ 373 do { 374 (char *)p1 += page_size; 375 } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 376 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, 377 lastNode); 378 } while (p1 <= p2); 379 #else 380 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 381 (char *)p1 + (page_size - 1), 382 __kmp_get_host_node(p1)); 383 if (p1 < p2) { 384 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 385 (char *)p2 + (page_size - 1), 386 __kmp_get_host_node(p2)); 387 } 388 #endif 389 } 390 } 391 } else 392 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); 393 } 394 #endif /* KMP_PRINT_DATA_PLACEMENT */ 395 __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 396 } 397 398 void __kmp_warn(char const *format, ...) { 399 char buffer[MAX_MESSAGE]; 400 va_list ap; 401 402 if (__kmp_generate_warnings == kmp_warnings_off) { 403 return; 404 } 405 406 va_start(ap, format); 407 408 KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); 409 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); 410 __kmp_vprintf(kmp_err, buffer, ap); 411 __kmp_release_bootstrap_lock(&__kmp_stdio_lock); 412 413 va_end(ap); 414 } 415 416 void __kmp_abort_process() { 417 // Later threads may stall here, but that's ok because abort() will kill them. 418 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); 419 420 if (__kmp_debug_buf) { 421 __kmp_dump_debug_buffer(); 422 }; // if 423 424 if (KMP_OS_WINDOWS) { 425 // Let other threads know of abnormal termination and prevent deadlock 426 // if abort happened during library initialization or shutdown 427 __kmp_global.g.g_abort = SIGABRT; 428 429 /* On Windows* OS by default abort() causes pop-up error box, which stalls 430 nightly testing. Unfortunately, we cannot reliably suppress pop-up error 431 boxes. _set_abort_behavior() works well, but this function is not 432 available in VS7 (this is not problem for DLL, but it is a problem for 433 static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not 434 help, at least in some versions of MS C RTL. 435 436 It seems following sequence is the only way to simulate abort() and 437 avoid pop-up error box. */ 438 raise(SIGABRT); 439 _exit(3); // Just in case, if signal ignored, exit anyway. 440 } else { 441 abort(); 442 }; // if 443 444 __kmp_infinite_loop(); 445 __kmp_release_bootstrap_lock(&__kmp_exit_lock); 446 447 } // __kmp_abort_process 448 449 void __kmp_abort_thread(void) { 450 // TODO: Eliminate g_abort global variable and this function. 451 // In case of abort just call abort(), it will kill all the threads. 452 __kmp_infinite_loop(); 453 } // __kmp_abort_thread 454 455 /* Print out the storage map for the major kmp_info_t thread data structures 456 that are allocated together. */ 457 458 static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { 459 __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", 460 gtid); 461 462 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, 463 sizeof(kmp_desc_t), "th_%d.th_info", gtid); 464 465 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, 466 sizeof(kmp_local_t), "th_%d.th_local", gtid); 467 468 __kmp_print_storage_map_gtid( 469 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 470 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); 471 472 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], 473 &thr->th.th_bar[bs_plain_barrier + 1], 474 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", 475 gtid); 476 477 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], 478 &thr->th.th_bar[bs_forkjoin_barrier + 1], 479 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", 480 gtid); 481 482 #if KMP_FAST_REDUCTION_BARRIER 483 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], 484 &thr->th.th_bar[bs_reduction_barrier + 1], 485 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", 486 gtid); 487 #endif // KMP_FAST_REDUCTION_BARRIER 488 } 489 490 /* Print out the storage map for the major kmp_team_t team data structures 491 that are allocated together. */ 492 493 static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, 494 int team_id, int num_thr) { 495 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 496 __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 497 header, team_id); 498 499 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], 500 &team->t.t_bar[bs_last_barrier], 501 sizeof(kmp_balign_team_t) * bs_last_barrier, 502 "%s_%d.t_bar", header, team_id); 503 504 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], 505 &team->t.t_bar[bs_plain_barrier + 1], 506 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", 507 header, team_id); 508 509 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], 510 &team->t.t_bar[bs_forkjoin_barrier + 1], 511 sizeof(kmp_balign_team_t), 512 "%s_%d.t_bar[forkjoin]", header, team_id); 513 514 #if KMP_FAST_REDUCTION_BARRIER 515 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], 516 &team->t.t_bar[bs_reduction_barrier + 1], 517 sizeof(kmp_balign_team_t), 518 "%s_%d.t_bar[reduction]", header, team_id); 519 #endif // KMP_FAST_REDUCTION_BARRIER 520 521 __kmp_print_storage_map_gtid( 522 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 523 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); 524 525 __kmp_print_storage_map_gtid( 526 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 527 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); 528 529 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], 530 &team->t.t_disp_buffer[num_disp_buff], 531 sizeof(dispatch_shared_info_t) * num_disp_buff, 532 "%s_%d.t_disp_buffer", header, team_id); 533 534 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data, 535 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, 536 team_id); 537 } 538 539 static void __kmp_init_allocator() {} 540 static void __kmp_fini_allocator() {} 541 542 /* ------------------------------------------------------------------------ */ 543 544 #ifdef KMP_DYNAMIC_LIB 545 #if KMP_OS_WINDOWS 546 547 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) { 548 // TODO: Change to __kmp_break_bootstrap_lock(). 549 __kmp_init_bootstrap_lock(lck); // make the lock released 550 } 551 552 static void __kmp_reset_locks_on_process_detach(int gtid_req) { 553 int i; 554 int thread_count; 555 556 // PROCESS_DETACH is expected to be called by a thread that executes 557 // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one 558 // calling ProcessExit or FreeLibrary). So, it might be safe to access the 559 // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some 560 // threads can be still alive here, although being about to be terminated. The 561 // threads in the array with ds_thread==0 are most suspicious. Actually, it 562 // can be not safe to access the __kmp_threads[]. 563 564 // TODO: does it make sense to check __kmp_roots[] ? 565 566 // Let's check that there are no other alive threads registered with the OMP 567 // lib. 568 while (1) { 569 thread_count = 0; 570 for (i = 0; i < __kmp_threads_capacity; ++i) { 571 if (!__kmp_threads) 572 continue; 573 kmp_info_t *th = __kmp_threads[i]; 574 if (th == NULL) 575 continue; 576 int gtid = th->th.th_info.ds.ds_gtid; 577 if (gtid == gtid_req) 578 continue; 579 if (gtid < 0) 580 continue; 581 DWORD exit_val; 582 int alive = __kmp_is_thread_alive(th, &exit_val); 583 if (alive) { 584 ++thread_count; 585 } 586 } 587 if (thread_count == 0) 588 break; // success 589 } 590 591 // Assume that I'm alone. Now it might be safe to check and reset locks. 592 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. 593 __kmp_reset_lock(&__kmp_forkjoin_lock); 594 #ifdef KMP_DEBUG 595 __kmp_reset_lock(&__kmp_stdio_lock); 596 #endif // KMP_DEBUG 597 } 598 599 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { 600 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 601 602 switch (fdwReason) { 603 604 case DLL_PROCESS_ATTACH: 605 KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); 606 607 return TRUE; 608 609 case DLL_PROCESS_DETACH: 610 KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); 611 612 if (lpReserved != NULL) { 613 // lpReserved is used for telling the difference: 614 // lpReserved == NULL when FreeLibrary() was called, 615 // lpReserved != NULL when the process terminates. 616 // When FreeLibrary() is called, worker threads remain alive. So they will 617 // release the forkjoin lock by themselves. When the process terminates, 618 // worker threads disappear triggering the problem of unreleased forkjoin 619 // lock as described below. 620 621 // A worker thread can take the forkjoin lock. The problem comes up if 622 // that worker thread becomes dead before it releases the forkjoin lock. 623 // The forkjoin lock remains taken, while the thread executing 624 // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try 625 // to take the forkjoin lock and will always fail, so that the application 626 // will never finish [normally]. This scenario is possible if 627 // __kmpc_end() has not been executed. It looks like it's not a corner 628 // case, but common cases: 629 // - the main function was compiled by an alternative compiler; 630 // - the main function was compiled by icl but without /Qopenmp 631 // (application with plugins); 632 // - application terminates by calling C exit(), Fortran CALL EXIT() or 633 // Fortran STOP. 634 // - alive foreign thread prevented __kmpc_end from doing cleanup. 635 // 636 // This is a hack to work around the problem. 637 // TODO: !!! figure out something better. 638 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific()); 639 } 640 641 __kmp_internal_end_library(__kmp_gtid_get_specific()); 642 643 return TRUE; 644 645 case DLL_THREAD_ATTACH: 646 KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); 647 648 /* if we want to register new siblings all the time here call 649 * __kmp_get_gtid(); */ 650 return TRUE; 651 652 case DLL_THREAD_DETACH: 653 KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); 654 655 __kmp_internal_end_thread(__kmp_gtid_get_specific()); 656 return TRUE; 657 } 658 659 return TRUE; 660 } 661 662 #endif /* KMP_OS_WINDOWS */ 663 #endif /* KMP_DYNAMIC_LIB */ 664 665 /* Change the library type to "status" and return the old type */ 666 /* called from within initialization routines where __kmp_initz_lock is held */ 667 int __kmp_change_library(int status) { 668 int old_status; 669 670 old_status = __kmp_yield_init & 671 1; // check whether KMP_LIBRARY=throughput (even init count) 672 673 if (status) { 674 __kmp_yield_init |= 1; // throughput => turnaround (odd init count) 675 } else { 676 __kmp_yield_init &= ~1; // turnaround => throughput (even init count) 677 } 678 679 return old_status; // return previous setting of whether 680 // KMP_LIBRARY=throughput 681 } 682 683 /* __kmp_parallel_deo -- Wait until it's our turn. */ 684 void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 685 int gtid = *gtid_ref; 686 #ifdef BUILD_PARALLEL_ORDERED 687 kmp_team_t *team = __kmp_team_from_gtid(gtid); 688 #endif /* BUILD_PARALLEL_ORDERED */ 689 690 if (__kmp_env_consistency_check) { 691 if (__kmp_threads[gtid]->th.th_root->r.r_active) 692 #if KMP_USE_DYNAMIC_LOCK 693 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); 694 #else 695 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); 696 #endif 697 } 698 #ifdef BUILD_PARALLEL_ORDERED 699 if (!team->t.t_serialized) { 700 KMP_MB(); 701 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), 702 KMP_EQ, NULL); 703 KMP_MB(); 704 } 705 #endif /* BUILD_PARALLEL_ORDERED */ 706 } 707 708 /* __kmp_parallel_dxo -- Signal the next task. */ 709 void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { 710 int gtid = *gtid_ref; 711 #ifdef BUILD_PARALLEL_ORDERED 712 int tid = __kmp_tid_from_gtid(gtid); 713 kmp_team_t *team = __kmp_team_from_gtid(gtid); 714 #endif /* BUILD_PARALLEL_ORDERED */ 715 716 if (__kmp_env_consistency_check) { 717 if (__kmp_threads[gtid]->th.th_root->r.r_active) 718 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); 719 } 720 #ifdef BUILD_PARALLEL_ORDERED 721 if (!team->t.t_serialized) { 722 KMP_MB(); /* Flush all pending memory write invalidates. */ 723 724 /* use the tid of the next thread in this team */ 725 /* TODO replace with general release procedure */ 726 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); 727 728 #if OMPT_SUPPORT && OMPT_BLAME 729 if (ompt_enabled && 730 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { 731 /* accept blame for "ordered" waiting */ 732 kmp_info_t *this_thread = __kmp_threads[gtid]; 733 ompt_callbacks.ompt_callback(ompt_event_release_ordered)( 734 this_thread->th.ompt_thread_info.wait_id); 735 } 736 #endif 737 738 KMP_MB(); /* Flush all pending memory write invalidates. */ 739 } 740 #endif /* BUILD_PARALLEL_ORDERED */ 741 } 742 743 /* ------------------------------------------------------------------------ */ 744 /* The BARRIER for a SINGLE process section is always explicit */ 745 746 int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { 747 int status; 748 kmp_info_t *th; 749 kmp_team_t *team; 750 751 if (!TCR_4(__kmp_init_parallel)) 752 __kmp_parallel_initialize(); 753 754 th = __kmp_threads[gtid]; 755 team = th->th.th_team; 756 status = 0; 757 758 th->th.th_ident = id_ref; 759 760 if (team->t.t_serialized) { 761 status = 1; 762 } else { 763 kmp_int32 old_this = th->th.th_local.this_construct; 764 765 ++th->th.th_local.this_construct; 766 /* try to set team count to thread count--success means thread got the 767 single block */ 768 /* TODO: Should this be acquire or release? */ 769 if (team->t.t_construct == old_this) { 770 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, 771 th->th.th_local.this_construct); 772 } 773 #if USE_ITT_BUILD 774 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && 775 KMP_MASTER_GTID(gtid) && 776 #if OMP_40_ENABLED 777 th->th.th_teams_microtask == NULL && 778 #endif 779 team->t.t_active_level == 780 1) { // Only report metadata by master of active team at level 1 781 __kmp_itt_metadata_single(id_ref); 782 } 783 #endif /* USE_ITT_BUILD */ 784 } 785 786 if (__kmp_env_consistency_check) { 787 if (status && push_ws) { 788 __kmp_push_workshare(gtid, ct_psingle, id_ref); 789 } else { 790 __kmp_check_workshare(gtid, ct_psingle, id_ref); 791 } 792 } 793 #if USE_ITT_BUILD 794 if (status) { 795 __kmp_itt_single_start(gtid); 796 } 797 #endif /* USE_ITT_BUILD */ 798 return status; 799 } 800 801 void __kmp_exit_single(int gtid) { 802 #if USE_ITT_BUILD 803 __kmp_itt_single_end(gtid); 804 #endif /* USE_ITT_BUILD */ 805 if (__kmp_env_consistency_check) 806 __kmp_pop_workshare(gtid, ct_psingle, NULL); 807 } 808 809 /* determine if we can go parallel or must use a serialized parallel region and 810 * how many threads we can use 811 * set_nproc is the number of threads requested for the team 812 * returns 0 if we should serialize or only use one thread, 813 * otherwise the number of threads to use 814 * The forkjoin lock is held by the caller. */ 815 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, 816 int master_tid, int set_nthreads 817 #if OMP_40_ENABLED 818 , 819 int enter_teams 820 #endif /* OMP_40_ENABLED */ 821 ) { 822 int capacity; 823 int new_nthreads; 824 KMP_DEBUG_ASSERT(__kmp_init_serial); 825 KMP_DEBUG_ASSERT(root && parent_team); 826 827 // If dyn-var is set, dynamically adjust the number of desired threads, 828 // according to the method specified by dynamic_mode. 829 new_nthreads = set_nthreads; 830 if (!get__dynamic_2(parent_team, master_tid)) { 831 ; 832 } 833 #ifdef USE_LOAD_BALANCE 834 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { 835 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); 836 if (new_nthreads == 1) { 837 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 838 "reservation to 1 thread\n", 839 master_tid)); 840 return 1; 841 } 842 if (new_nthreads < set_nthreads) { 843 KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " 844 "reservation to %d threads\n", 845 master_tid, new_nthreads)); 846 } 847 } 848 #endif /* USE_LOAD_BALANCE */ 849 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { 850 new_nthreads = __kmp_avail_proc - __kmp_nth + 851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 852 if (new_nthreads <= 1) { 853 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 854 "reservation to 1 thread\n", 855 master_tid)); 856 return 1; 857 } 858 if (new_nthreads < set_nthreads) { 859 KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " 860 "reservation to %d threads\n", 861 master_tid, new_nthreads)); 862 } else { 863 new_nthreads = set_nthreads; 864 } 865 } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { 866 if (set_nthreads > 2) { 867 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); 868 new_nthreads = (new_nthreads % set_nthreads) + 1; 869 if (new_nthreads == 1) { 870 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 871 "reservation to 1 thread\n", 872 master_tid)); 873 return 1; 874 } 875 if (new_nthreads < set_nthreads) { 876 KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " 877 "reservation to %d threads\n", 878 master_tid, new_nthreads)); 879 } 880 } 881 } else { 882 KMP_ASSERT(0); 883 } 884 885 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. 886 if (__kmp_nth + new_nthreads - 887 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 888 __kmp_max_nth) { 889 int tl_nthreads = __kmp_max_nth - __kmp_nth + 890 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 891 if (tl_nthreads <= 0) { 892 tl_nthreads = 1; 893 } 894 895 // If dyn-var is false, emit a 1-time warning. 896 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 897 __kmp_reserve_warn = 1; 898 __kmp_msg(kmp_ms_warning, 899 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), 900 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 901 } 902 if (tl_nthreads == 1) { 903 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " 904 "reservation to 1 thread\n", 905 master_tid)); 906 return 1; 907 } 908 KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " 909 "reservation to %d threads\n", 910 master_tid, tl_nthreads)); 911 new_nthreads = tl_nthreads; 912 } 913 914 // Check if the threads array is large enough, or needs expanding. 915 // 916 // See comment in __kmp_register_root() about the adjustment if 917 // __kmp_threads[0] == NULL. 918 capacity = __kmp_threads_capacity; 919 if (TCR_PTR(__kmp_threads[0]) == NULL) { 920 --capacity; 921 } 922 if (__kmp_nth + new_nthreads - 923 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > 924 capacity) { 925 // Expand the threads array. 926 int slotsRequired = __kmp_nth + new_nthreads - 927 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - 928 capacity; 929 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); 930 if (slotsAdded < slotsRequired) { 931 // The threads array was not expanded enough. 932 new_nthreads -= (slotsRequired - slotsAdded); 933 KMP_ASSERT(new_nthreads >= 1); 934 935 // If dyn-var is false, emit a 1-time warning. 936 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { 937 __kmp_reserve_warn = 1; 938 if (__kmp_tp_cached) { 939 __kmp_msg(kmp_ms_warning, 940 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 941 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 942 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 943 } else { 944 __kmp_msg(kmp_ms_warning, 945 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), 946 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 947 } 948 } 949 } 950 } 951 952 if (new_nthreads == 1) { 953 KC_TRACE(10, 954 ("__kmp_reserve_threads: T#%d serializing team after reclaiming " 955 "dead roots and rechecking; requested %d threads\n", 956 __kmp_get_gtid(), set_nthreads)); 957 return 1; 958 } 959 960 KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested " 961 "%d threads\n", 962 __kmp_get_gtid(), new_nthreads, set_nthreads)); 963 return new_nthreads; 964 } 965 966 /* Allocate threads from the thread pool and assign them to the new team. We are 967 assured that there are enough threads available, because we checked on that 968 earlier within critical section forkjoin */ 969 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, 970 kmp_info_t *master_th, int master_gtid) { 971 int i; 972 int use_hot_team; 973 974 KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); 975 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); 976 KMP_MB(); 977 978 /* first, let's setup the master thread */ 979 master_th->th.th_info.ds.ds_tid = 0; 980 master_th->th.th_team = team; 981 master_th->th.th_team_nproc = team->t.t_nproc; 982 master_th->th.th_team_master = master_th; 983 master_th->th.th_team_serialized = FALSE; 984 master_th->th.th_dispatch = &team->t.t_dispatch[0]; 985 986 /* make sure we are not the optimized hot team */ 987 #if KMP_NESTED_HOT_TEAMS 988 use_hot_team = 0; 989 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 990 if (hot_teams) { // hot teams array is not allocated if 991 // KMP_HOT_TEAMS_MAX_LEVEL=0 992 int level = team->t.t_active_level - 1; // index in array of hot teams 993 if (master_th->th.th_teams_microtask) { // are we inside the teams? 994 if (master_th->th.th_teams_size.nteams > 1) { 995 ++level; // level was not increased in teams construct for 996 // team_of_masters 997 } 998 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 999 master_th->th.th_teams_level == team->t.t_level) { 1000 ++level; // level was not increased in teams construct for 1001 // team_of_workers before the parallel 1002 } // team->t.t_level will be increased inside parallel 1003 } 1004 if (level < __kmp_hot_teams_max_level) { 1005 if (hot_teams[level].hot_team) { 1006 // hot team has already been allocated for given level 1007 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 1008 use_hot_team = 1; // the team is ready to use 1009 } else { 1010 use_hot_team = 0; // AC: threads are not allocated yet 1011 hot_teams[level].hot_team = team; // remember new hot team 1012 hot_teams[level].hot_team_nth = team->t.t_nproc; 1013 } 1014 } else { 1015 use_hot_team = 0; 1016 } 1017 } 1018 #else 1019 use_hot_team = team == root->r.r_hot_team; 1020 #endif 1021 if (!use_hot_team) { 1022 1023 /* install the master thread */ 1024 team->t.t_threads[0] = master_th; 1025 __kmp_initialize_info(master_th, team, 0, master_gtid); 1026 1027 /* now, install the worker threads */ 1028 for (i = 1; i < team->t.t_nproc; i++) { 1029 1030 /* fork or reallocate a new thread and install it in team */ 1031 kmp_info_t *thr = __kmp_allocate_thread(root, team, i); 1032 team->t.t_threads[i] = thr; 1033 KMP_DEBUG_ASSERT(thr); 1034 KMP_DEBUG_ASSERT(thr->th.th_team == team); 1035 /* align team and thread arrived states */ 1036 KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1037 "T#%d(%d:%d) join =%llu, plain=%llu\n", 1038 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, 1039 __kmp_gtid_from_tid(i, team), team->t.t_id, i, 1040 team->t.t_bar[bs_forkjoin_barrier].b_arrived, 1041 team->t.t_bar[bs_plain_barrier].b_arrived)); 1042 #if OMP_40_ENABLED 1043 thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 1044 thr->th.th_teams_level = master_th->th.th_teams_level; 1045 thr->th.th_teams_size = master_th->th.th_teams_size; 1046 #endif 1047 { // Initialize threads' barrier data. 1048 int b; 1049 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; 1050 for (b = 0; b < bs_last_barrier; ++b) { 1051 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 1052 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 1053 #if USE_DEBUGGER 1054 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 1055 #endif 1056 }; // for b 1057 } 1058 } 1059 1060 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1061 __kmp_partition_places(team); 1062 #endif 1063 } 1064 1065 KMP_MB(); 1066 } 1067 1068 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1069 // Propagate any changes to the floating point control registers out to the team 1070 // We try to avoid unnecessary writes to the relevant cache line in the team 1071 // structure, so we don't make changes unless they are needed. 1072 inline static void propagateFPControl(kmp_team_t *team) { 1073 if (__kmp_inherit_fp_control) { 1074 kmp_int16 x87_fpu_control_word; 1075 kmp_uint32 mxcsr; 1076 1077 // Get master values of FPU control flags (both X87 and vector) 1078 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 1079 __kmp_store_mxcsr(&mxcsr); 1080 mxcsr &= KMP_X86_MXCSR_MASK; 1081 1082 // There is no point looking at t_fp_control_saved here. 1083 // If it is TRUE, we still have to update the values if they are different from 1084 // those we now have. 1085 // If it is FALSE we didn't save anything yet, but our objective is the same. We 1086 // have to ensure that the values in the team are the same as those we have. 1087 // So, this code achieves what we need whether or not t_fp_control_saved is 1088 // true. By checking whether the value needs updating we avoid unnecessary 1089 // writes that would put the cache-line into a written state, causing all 1090 // threads in the team to have to read it again. 1091 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 1092 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 1093 // Although we don't use this value, other code in the runtime wants to know 1094 // whether it should restore them. So we must ensure it is correct. 1095 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 1096 } else { 1097 // Similarly here. Don't write to this cache-line in the team structure 1098 // unless we have to. 1099 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 1100 } 1101 } 1102 1103 // Do the opposite, setting the hardware registers to the updated values from 1104 // the team. 1105 inline static void updateHWFPControl(kmp_team_t *team) { 1106 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { 1107 // Only reset the fp control regs if they have been changed in the team. 1108 // the parallel region that we are exiting. 1109 kmp_int16 x87_fpu_control_word; 1110 kmp_uint32 mxcsr; 1111 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); 1112 __kmp_store_mxcsr(&mxcsr); 1113 mxcsr &= KMP_X86_MXCSR_MASK; 1114 1115 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { 1116 __kmp_clear_x87_fpu_status_word(); 1117 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); 1118 } 1119 1120 if (team->t.t_mxcsr != mxcsr) { 1121 __kmp_load_mxcsr(&team->t.t_mxcsr); 1122 } 1123 } 1124 } 1125 #else 1126 #define propagateFPControl(x) ((void)0) 1127 #define updateHWFPControl(x) ((void)0) 1128 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1129 1130 static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, 1131 int realloc); // forward declaration 1132 1133 /* Run a parallel region that has been serialized, so runs only in a team of the 1134 single master thread. */ 1135 void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 1136 kmp_info_t *this_thr; 1137 kmp_team_t *serial_team; 1138 1139 KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); 1140 1141 /* Skip all this code for autopar serialized loops since it results in 1142 unacceptable overhead */ 1143 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 1144 return; 1145 1146 if (!TCR_4(__kmp_init_parallel)) 1147 __kmp_parallel_initialize(); 1148 1149 this_thr = __kmp_threads[global_tid]; 1150 serial_team = this_thr->th.th_serial_team; 1151 1152 /* utilize the serialized team held by this thread */ 1153 KMP_DEBUG_ASSERT(serial_team); 1154 KMP_MB(); 1155 1156 if (__kmp_tasking_mode != tskm_immediate_exec) { 1157 KMP_DEBUG_ASSERT( 1158 this_thr->th.th_task_team == 1159 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 1160 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == 1161 NULL); 1162 KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1163 "team %p, new task_team = NULL\n", 1164 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 1165 this_thr->th.th_task_team = NULL; 1166 } 1167 1168 #if OMP_40_ENABLED 1169 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 1170 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1171 proc_bind = proc_bind_false; 1172 } else if (proc_bind == proc_bind_default) { 1173 // No proc_bind clause was specified, so use the current value 1174 // of proc-bind-var for this parallel region. 1175 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 1176 } 1177 // Reset for next parallel region 1178 this_thr->th.th_set_proc_bind = proc_bind_default; 1179 #endif /* OMP_40_ENABLED */ 1180 1181 if (this_thr->th.th_team != serial_team) { 1182 // Nested level will be an index in the nested nthreads array 1183 int level = this_thr->th.th_team->t.t_level; 1184 1185 if (serial_team->t.t_serialized) { 1186 /* this serial team was already used 1187 TODO increase performance by making this locks more specific */ 1188 kmp_team_t *new_team; 1189 1190 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 1191 1192 #if OMPT_SUPPORT 1193 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1194 #endif 1195 1196 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, 1197 #if OMPT_SUPPORT 1198 ompt_parallel_id, 1199 #endif 1200 #if OMP_40_ENABLED 1201 proc_bind, 1202 #endif 1203 &this_thr->th.th_current_task->td_icvs, 1204 0 USE_NESTED_HOT_ARG(NULL)); 1205 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 1206 KMP_ASSERT(new_team); 1207 1208 /* setup new serialized team and install it */ 1209 new_team->t.t_threads[0] = this_thr; 1210 new_team->t.t_parent = this_thr->th.th_team; 1211 serial_team = new_team; 1212 this_thr->th.th_serial_team = serial_team; 1213 1214 KF_TRACE( 1215 10, 1216 ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 1217 global_tid, serial_team)); 1218 1219 /* TODO the above breaks the requirement that if we run out of resources, 1220 then we can still guarantee that serialized teams are ok, since we may 1221 need to allocate a new one */ 1222 } else { 1223 KF_TRACE( 1224 10, 1225 ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 1226 global_tid, serial_team)); 1227 } 1228 1229 /* we have to initialize this serial team */ 1230 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 1231 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 1232 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); 1233 serial_team->t.t_ident = loc; 1234 serial_team->t.t_serialized = 1; 1235 serial_team->t.t_nproc = 1; 1236 serial_team->t.t_parent = this_thr->th.th_team; 1237 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; 1238 this_thr->th.th_team = serial_team; 1239 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 1240 1241 KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid, 1242 this_thr->th.th_current_task)); 1243 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); 1244 this_thr->th.th_current_task->td_flags.executing = 0; 1245 1246 __kmp_push_current_task_to_thread(this_thr, serial_team, 0); 1247 1248 /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an 1249 implicit task for each serialized task represented by 1250 team->t.t_serialized? */ 1251 copy_icvs(&this_thr->th.th_current_task->td_icvs, 1252 &this_thr->th.th_current_task->td_parent->td_icvs); 1253 1254 // Thread value exists in the nested nthreads array for the next nested 1255 // level 1256 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 1257 this_thr->th.th_current_task->td_icvs.nproc = 1258 __kmp_nested_nth.nth[level + 1]; 1259 } 1260 1261 #if OMP_40_ENABLED 1262 if (__kmp_nested_proc_bind.used && 1263 (level + 1 < __kmp_nested_proc_bind.used)) { 1264 this_thr->th.th_current_task->td_icvs.proc_bind = 1265 __kmp_nested_proc_bind.bind_types[level + 1]; 1266 } 1267 #endif /* OMP_40_ENABLED */ 1268 1269 #if USE_DEBUGGER 1270 serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. 1271 #endif 1272 this_thr->th.th_info.ds.ds_tid = 0; 1273 1274 /* set thread cache values */ 1275 this_thr->th.th_team_nproc = 1; 1276 this_thr->th.th_team_master = this_thr; 1277 this_thr->th.th_team_serialized = 1; 1278 1279 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 1280 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 1281 1282 propagateFPControl(serial_team); 1283 1284 /* check if we need to allocate dispatch buffers stack */ 1285 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1286 if (!serial_team->t.t_dispatch->th_disp_buffer) { 1287 serial_team->t.t_dispatch->th_disp_buffer = 1288 (dispatch_private_info_t *)__kmp_allocate( 1289 sizeof(dispatch_private_info_t)); 1290 } 1291 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1292 1293 #if OMPT_SUPPORT 1294 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1295 __ompt_team_assign_id(serial_team, ompt_parallel_id); 1296 #endif 1297 1298 KMP_MB(); 1299 1300 } else { 1301 /* this serialized team is already being used, 1302 * that's fine, just add another nested level */ 1303 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 1305 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 1306 ++serial_team->t.t_serialized; 1307 this_thr->th.th_team_serialized = serial_team->t.t_serialized; 1308 1309 // Nested level will be an index in the nested nthreads array 1310 int level = this_thr->th.th_team->t.t_level; 1311 // Thread value exists in the nested nthreads array for the next nested 1312 // level 1313 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { 1314 this_thr->th.th_current_task->td_icvs.nproc = 1315 __kmp_nested_nth.nth[level + 1]; 1316 } 1317 serial_team->t.t_level++; 1318 KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " 1319 "of serial team %p to %d\n", 1320 global_tid, serial_team, serial_team->t.t_level)); 1321 1322 /* allocate/push dispatch buffers stack */ 1323 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1324 { 1325 dispatch_private_info_t *disp_buffer = 1326 (dispatch_private_info_t *)__kmp_allocate( 1327 sizeof(dispatch_private_info_t)); 1328 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 1329 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 1330 } 1331 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1332 1333 KMP_MB(); 1334 } 1335 #if OMP_40_ENABLED 1336 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); 1337 #endif 1338 1339 if (__kmp_env_consistency_check) 1340 __kmp_push_parallel(global_tid, NULL); 1341 } 1342 1343 /* most of the work for a fork */ 1344 /* return true if we really went parallel, false if serialized */ 1345 int __kmp_fork_call(ident_t *loc, int gtid, 1346 enum fork_context_e call_context, // Intel, GNU, ... 1347 kmp_int32 argc, 1348 #if OMPT_SUPPORT 1349 void *unwrapped_task, 1350 #endif 1351 microtask_t microtask, launch_t invoker, 1352 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1353 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1354 va_list *ap 1355 #else 1356 va_list ap 1357 #endif 1358 ) { 1359 void **argv; 1360 int i; 1361 int master_tid; 1362 int master_this_cons; 1363 kmp_team_t *team; 1364 kmp_team_t *parent_team; 1365 kmp_info_t *master_th; 1366 kmp_root_t *root; 1367 int nthreads; 1368 int master_active; 1369 int master_set_numthreads; 1370 int level; 1371 #if OMP_40_ENABLED 1372 int active_level; 1373 int teams_level; 1374 #endif 1375 #if KMP_NESTED_HOT_TEAMS 1376 kmp_hot_team_ptr_t **p_hot_teams; 1377 #endif 1378 { // KMP_TIME_BLOCK 1379 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 1380 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 1381 1382 KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); 1383 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { 1384 /* Some systems prefer the stack for the root thread(s) to start with */ 1385 /* some gap from the parent stack to prevent false sharing. */ 1386 void *dummy = KMP_ALLOCA(__kmp_stkpadding); 1387 /* These 2 lines below are so this does not get optimized out */ 1388 if (__kmp_stkpadding > KMP_MAX_STKPADDING) 1389 __kmp_stkpadding += (short)((kmp_int64)dummy); 1390 } 1391 1392 /* initialize if needed */ 1393 KMP_DEBUG_ASSERT( 1394 __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown 1395 if (!TCR_4(__kmp_init_parallel)) 1396 __kmp_parallel_initialize(); 1397 1398 /* setup current data */ 1399 master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with 1400 // shutdown 1401 parent_team = master_th->th.th_team; 1402 master_tid = master_th->th.th_info.ds.ds_tid; 1403 master_this_cons = master_th->th.th_local.this_construct; 1404 root = master_th->th.th_root; 1405 master_active = root->r.r_active; 1406 master_set_numthreads = master_th->th.th_set_nproc; 1407 1408 #if OMPT_SUPPORT 1409 ompt_parallel_id_t ompt_parallel_id; 1410 ompt_task_id_t ompt_task_id; 1411 ompt_frame_t *ompt_frame; 1412 ompt_task_id_t my_task_id; 1413 ompt_parallel_id_t my_parallel_id; 1414 1415 if (ompt_enabled) { 1416 ompt_parallel_id = __ompt_parallel_id_new(gtid); 1417 ompt_task_id = __ompt_get_task_id_internal(0); 1418 ompt_frame = __ompt_get_task_frame_internal(0); 1419 } 1420 #endif 1421 1422 // Nested level will be an index in the nested nthreads array 1423 level = parent_team->t.t_level; 1424 // used to launch non-serial teams even if nested is not allowed 1425 active_level = parent_team->t.t_active_level; 1426 #if OMP_40_ENABLED 1427 teams_level = 1428 master_th->th 1429 .th_teams_level; // needed to check nesting inside the teams 1430 #endif 1431 #if KMP_NESTED_HOT_TEAMS 1432 p_hot_teams = &master_th->th.th_hot_teams; 1433 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { 1434 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( 1435 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 1436 (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 1437 (*p_hot_teams)[0].hot_team_nth = 1438 1; // it is either actual or not needed (when active_level > 0) 1439 } 1440 #endif 1441 1442 #if OMPT_SUPPORT 1443 if (ompt_enabled && 1444 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { 1445 int team_size = master_set_numthreads; 1446 1447 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( 1448 ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task, 1449 OMPT_INVOKER(call_context)); 1450 } 1451 #endif 1452 1453 master_th->th.th_ident = loc; 1454 1455 #if OMP_40_ENABLED 1456 if (master_th->th.th_teams_microtask && ap && 1457 microtask != (microtask_t)__kmp_teams_master && level == teams_level) { 1458 // AC: This is start of parallel that is nested inside teams construct. 1459 // The team is actual (hot), all workers are ready at the fork barrier. 1460 // No lock needed to initialize the team a bit, then free workers. 1461 parent_team->t.t_ident = loc; 1462 __kmp_alloc_argv_entries(argc, parent_team, TRUE); 1463 parent_team->t.t_argc = argc; 1464 argv = (void **)parent_team->t.t_argv; 1465 for (i = argc - 1; i >= 0; --i) 1466 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1467 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1468 *argv++ = va_arg(*ap, void *); 1469 #else 1470 *argv++ = va_arg(ap, void *); 1471 #endif 1472 // Increment our nested depth levels, but not increase the serialization 1473 if (parent_team == master_th->th.th_serial_team) { 1474 // AC: we are in serialized parallel 1475 __kmpc_serialized_parallel(loc, gtid); 1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); 1477 // AC: need this in order enquiry functions work 1478 // correctly, will restore at join time 1479 parent_team->t.t_serialized--; 1480 #if OMPT_SUPPORT 1481 void *dummy; 1482 void **exit_runtime_p; 1483 1484 ompt_lw_taskteam_t lw_taskteam; 1485 1486 if (ompt_enabled) { 1487 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task, 1488 ompt_parallel_id); 1489 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1490 exit_runtime_p = 1491 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1492 1493 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1494 1495 #if OMPT_TRACE 1496 /* OMPT implicit task begin */ 1497 my_task_id = lw_taskteam.ompt_task_info.task_id; 1498 my_parallel_id = parent_team->t.ompt_team_info.parallel_id; 1499 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1500 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1501 my_parallel_id, my_task_id); 1502 } 1503 #endif 1504 1505 /* OMPT state */ 1506 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1507 } else { 1508 exit_runtime_p = &dummy; 1509 } 1510 #endif 1511 1512 { 1513 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1514 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1515 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv 1516 #if OMPT_SUPPORT 1517 , 1518 exit_runtime_p 1519 #endif 1520 ); 1521 } 1522 1523 #if OMPT_SUPPORT 1524 *exit_runtime_p = NULL; 1525 if (ompt_enabled) { 1526 #if OMPT_TRACE 1527 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1528 1529 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1530 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1531 ompt_parallel_id, ompt_task_id); 1532 } 1533 1534 __ompt_lw_taskteam_unlink(master_th); 1535 // reset clear the task id only after unlinking the task 1536 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1537 #endif 1538 1539 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1540 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1541 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); 1542 } 1543 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1544 } 1545 #endif 1546 return TRUE; 1547 } 1548 1549 parent_team->t.t_pkfn = microtask; 1550 #if OMPT_SUPPORT 1551 parent_team->t.ompt_team_info.microtask = unwrapped_task; 1552 #endif 1553 parent_team->t.t_invoke = invoker; 1554 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); 1555 parent_team->t.t_active_level++; 1556 parent_team->t.t_level++; 1557 1558 /* Change number of threads in the team if requested */ 1559 if (master_set_numthreads) { // The parallel has num_threads clause 1560 if (master_set_numthreads < master_th->th.th_teams_size.nth) { 1561 // AC: only can reduce number of threads dynamically, can't increase 1562 kmp_info_t **other_threads = parent_team->t.t_threads; 1563 parent_team->t.t_nproc = master_set_numthreads; 1564 for (i = 0; i < master_set_numthreads; ++i) { 1565 other_threads[i]->th.th_team_nproc = master_set_numthreads; 1566 } 1567 // Keep extra threads hot in the team for possible next parallels 1568 } 1569 master_th->th.th_set_nproc = 0; 1570 } 1571 1572 #if USE_DEBUGGER 1573 if (__kmp_debugging) { // Let debugger override number of threads. 1574 int nth = __kmp_omp_num_threads(loc); 1575 if (nth > 1576 0) { // 0 means debugger does not want to change number of threads. 1577 master_set_numthreads = nth; 1578 }; // if 1579 }; // if 1580 #endif 1581 1582 KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " 1583 "master_th=%p, gtid=%d\n", 1584 root, parent_team, master_th, gtid)); 1585 __kmp_internal_fork(loc, gtid, parent_team); 1586 KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " 1587 "master_th=%p, gtid=%d\n", 1588 root, parent_team, master_th, gtid)); 1589 1590 /* Invoke microtask for MASTER thread */ 1591 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 1592 parent_team->t.t_id, parent_team->t.t_pkfn)); 1593 1594 { 1595 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1596 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1597 if (!parent_team->t.t_invoke(gtid)) { 1598 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); 1599 } 1600 } 1601 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 1602 parent_team->t.t_id, parent_team->t.t_pkfn)); 1603 KMP_MB(); /* Flush all pending memory write invalidates. */ 1604 1605 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 1606 1607 return TRUE; 1608 } // Parallel closely nested in teams construct 1609 #endif /* OMP_40_ENABLED */ 1610 1611 #if KMP_DEBUG 1612 if (__kmp_tasking_mode != tskm_immediate_exec) { 1613 KMP_DEBUG_ASSERT(master_th->th.th_task_team == 1614 parent_team->t.t_task_team[master_th->th.th_task_state]); 1615 } 1616 #endif 1617 1618 if (parent_team->t.t_active_level >= 1619 master_th->th.th_current_task->td_icvs.max_active_levels) { 1620 nthreads = 1; 1621 } else { 1622 #if OMP_40_ENABLED 1623 int enter_teams = ((ap == NULL && active_level == 0) || 1624 (ap && teams_level > 0 && teams_level == level)); 1625 #endif 1626 nthreads = 1627 master_set_numthreads 1628 ? master_set_numthreads 1629 : get__nproc_2( 1630 parent_team, 1631 master_tid); // TODO: get nproc directly from current task 1632 1633 // Check if we need to take forkjoin lock? (no need for serialized 1634 // parallel out of teams construct). This code moved here from 1635 // __kmp_reserve_threads() to speedup nested serialized parallels. 1636 if (nthreads > 1) { 1637 if ((!get__nested(master_th) && (root->r.r_in_parallel 1638 #if OMP_40_ENABLED 1639 && !enter_teams 1640 #endif /* OMP_40_ENABLED */ 1641 )) || 1642 (__kmp_library == library_serial)) { 1643 KC_TRACE( 1644 10, 1645 ("__kmp_fork_call: T#%d serializing team; requested %d threads\n", 1646 gtid, nthreads)); 1647 nthreads = 1; 1648 } 1649 } 1650 if (nthreads > 1) { 1651 /* determine how many new threads we can use */ 1652 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 1653 1654 nthreads = __kmp_reserve_threads( 1655 root, parent_team, master_tid, nthreads 1656 #if OMP_40_ENABLED 1657 /* AC: If we execute teams from parallel region (on host), then 1658 teams should be created but each can only have 1 thread if 1659 nesting is disabled. If teams called from serial region, then 1660 teams and their threads should be created regardless of the 1661 nesting setting. */ 1662 , 1663 enter_teams 1664 #endif /* OMP_40_ENABLED */ 1665 ); 1666 if (nthreads == 1) { 1667 // Free lock for single thread execution here; for multi-thread 1668 // execution it will be freed later after team of threads created 1669 // and initialized 1670 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 1671 } 1672 } 1673 } 1674 KMP_DEBUG_ASSERT(nthreads > 0); 1675 1676 // If we temporarily changed the set number of threads then restore it now 1677 master_th->th.th_set_nproc = 0; 1678 1679 /* create a serialized parallel region? */ 1680 if (nthreads == 1) { 1681 /* josh todo: hypothetical question: what do we do for OS X*? */ 1682 #if KMP_OS_LINUX && \ 1683 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1684 void *args[argc]; 1685 #else 1686 void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); 1687 #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ 1688 KMP_ARCH_AARCH64) */ 1689 1690 KA_TRACE(20, 1691 ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); 1692 1693 __kmpc_serialized_parallel(loc, gtid); 1694 1695 if (call_context == fork_context_intel) { 1696 /* TODO this sucks, use the compiler itself to pass args! :) */ 1697 master_th->th.th_serial_team->t.t_ident = loc; 1698 #if OMP_40_ENABLED 1699 if (!ap) { 1700 // revert change made in __kmpc_serialized_parallel() 1701 master_th->th.th_serial_team->t.t_level--; 1702 // Get args from parent team for teams construct 1703 1704 #if OMPT_SUPPORT 1705 void *dummy; 1706 void **exit_runtime_p; 1707 1708 ompt_lw_taskteam_t lw_taskteam; 1709 1710 if (ompt_enabled) { 1711 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1712 unwrapped_task, ompt_parallel_id); 1713 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1714 exit_runtime_p = 1715 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1716 1717 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1718 1719 #if OMPT_TRACE 1720 my_task_id = lw_taskteam.ompt_task_info.task_id; 1721 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1722 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1723 ompt_parallel_id, my_task_id); 1724 } 1725 #endif 1726 1727 /* OMPT state */ 1728 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1729 } else { 1730 exit_runtime_p = &dummy; 1731 } 1732 #endif 1733 1734 { 1735 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1736 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1737 __kmp_invoke_microtask(microtask, gtid, 0, argc, 1738 parent_team->t.t_argv 1739 #if OMPT_SUPPORT 1740 , 1741 exit_runtime_p 1742 #endif 1743 ); 1744 } 1745 1746 #if OMPT_SUPPORT 1747 *exit_runtime_p = NULL; 1748 if (ompt_enabled) { 1749 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1750 1751 #if OMPT_TRACE 1752 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1753 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1754 ompt_parallel_id, ompt_task_id); 1755 } 1756 #endif 1757 1758 __ompt_lw_taskteam_unlink(master_th); 1759 // reset clear the task id only after unlinking the task 1760 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1761 1762 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1763 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1764 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); 1765 } 1766 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1767 } 1768 #endif 1769 } else if (microtask == (microtask_t)__kmp_teams_master) { 1770 KMP_DEBUG_ASSERT(master_th->th.th_team == 1771 master_th->th.th_serial_team); 1772 team = master_th->th.th_team; 1773 // team->t.t_pkfn = microtask; 1774 team->t.t_invoke = invoker; 1775 __kmp_alloc_argv_entries(argc, team, TRUE); 1776 team->t.t_argc = argc; 1777 argv = (void **)team->t.t_argv; 1778 if (ap) { 1779 for (i = argc - 1; i >= 0; --i) 1780 // TODO: revert workaround for Intel(R) 64 tracker #96 1781 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1782 *argv++ = va_arg(*ap, void *); 1783 #else 1784 *argv++ = va_arg(ap, void *); 1785 #endif 1786 } else { 1787 for (i = 0; i < argc; ++i) 1788 // Get args from parent team for teams construct 1789 argv[i] = parent_team->t.t_argv[i]; 1790 } 1791 // AC: revert change made in __kmpc_serialized_parallel() 1792 // because initial code in teams should have level=0 1793 team->t.t_level--; 1794 // AC: call special invoker for outer "parallel" of teams construct 1795 { 1796 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1797 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1798 invoker(gtid); 1799 } 1800 } else { 1801 #endif /* OMP_40_ENABLED */ 1802 argv = args; 1803 for (i = argc - 1; i >= 0; --i) 1804 // TODO: revert workaround for Intel(R) 64 tracker #96 1805 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1806 *argv++ = va_arg(*ap, void *); 1807 #else 1808 *argv++ = va_arg(ap, void *); 1809 #endif 1810 KMP_MB(); 1811 1812 #if OMPT_SUPPORT 1813 void *dummy; 1814 void **exit_runtime_p; 1815 1816 ompt_lw_taskteam_t lw_taskteam; 1817 1818 if (ompt_enabled) { 1819 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1820 unwrapped_task, ompt_parallel_id); 1821 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1822 exit_runtime_p = 1823 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1824 1825 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1826 1827 #if OMPT_TRACE 1828 /* OMPT implicit task begin */ 1829 my_task_id = lw_taskteam.ompt_task_info.task_id; 1830 my_parallel_id = ompt_parallel_id; 1831 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1832 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1833 my_parallel_id, my_task_id); 1834 } 1835 #endif 1836 1837 /* OMPT state */ 1838 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1839 } else { 1840 exit_runtime_p = &dummy; 1841 } 1842 #endif 1843 1844 { 1845 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1846 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1847 __kmp_invoke_microtask(microtask, gtid, 0, argc, args 1848 #if OMPT_SUPPORT 1849 , 1850 exit_runtime_p 1851 #endif 1852 ); 1853 } 1854 1855 #if OMPT_SUPPORT 1856 *exit_runtime_p = NULL; 1857 if (ompt_enabled) { 1858 #if OMPT_TRACE 1859 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1860 1861 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1862 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1863 my_parallel_id, my_task_id); 1864 } 1865 #endif 1866 1867 __ompt_lw_taskteam_unlink(master_th); 1868 // reset clear the task id only after unlinking the task 1869 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1870 1871 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1872 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1873 ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); 1874 } 1875 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1876 } 1877 #endif 1878 #if OMP_40_ENABLED 1879 } 1880 #endif /* OMP_40_ENABLED */ 1881 } else if (call_context == fork_context_gnu) { 1882 #if OMPT_SUPPORT 1883 ompt_lw_taskteam_t *lwt = 1884 (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); 1885 __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task, 1886 ompt_parallel_id); 1887 1888 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); 1889 lwt->ompt_task_info.frame.exit_runtime_frame = NULL; 1890 __ompt_lw_taskteam_link(lwt, master_th); 1891 #endif 1892 1893 // we were called from GNU native code 1894 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 1895 return FALSE; 1896 } 1897 else { 1898 KMP_ASSERT2(call_context < fork_context_last, 1899 "__kmp_fork_call: unknown fork_context parameter"); 1900 } 1901 1902 KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); 1903 KMP_MB(); 1904 return FALSE; 1905 } 1906 1907 // GEH: only modify the executing flag in the case when not serialized 1908 // serialized case is handled in kmpc_serialized_parallel 1909 KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1910 "curtask=%p, curtask_max_aclevel=%d\n", 1911 parent_team->t.t_active_level, master_th, 1912 master_th->th.th_current_task, 1913 master_th->th.th_current_task->td_icvs.max_active_levels)); 1914 // TODO: GEH - cannot do this assertion because root thread not set up as 1915 // executing 1916 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 1917 master_th->th.th_current_task->td_flags.executing = 0; 1918 1919 #if OMP_40_ENABLED 1920 if (!master_th->th.th_teams_microtask || level > teams_level) 1921 #endif /* OMP_40_ENABLED */ 1922 { 1923 /* Increment our nested depth level */ 1924 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); 1925 } 1926 1927 // See if we need to make a copy of the ICVs. 1928 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 1929 if ((level + 1 < __kmp_nested_nth.used) && 1930 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { 1931 nthreads_icv = __kmp_nested_nth.nth[level + 1]; 1932 } else { 1933 nthreads_icv = 0; // don't update 1934 } 1935 1936 #if OMP_40_ENABLED 1937 // Figure out the proc_bind_policy for the new team. 1938 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1939 kmp_proc_bind_t proc_bind_icv = 1940 proc_bind_default; // proc_bind_default means don't update 1941 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { 1942 proc_bind = proc_bind_false; 1943 } else { 1944 if (proc_bind == proc_bind_default) { 1945 // No proc_bind clause specified; use current proc-bind-var for this 1946 // parallel region 1947 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1948 } 1949 /* else: The proc_bind policy was specified explicitly on parallel clause. 1950 This overrides proc-bind-var for this parallel region, but does not 1951 change proc-bind-var. */ 1952 // Figure the value of proc-bind-var for the child threads. 1953 if ((level + 1 < __kmp_nested_proc_bind.used) && 1954 (__kmp_nested_proc_bind.bind_types[level + 1] != 1955 master_th->th.th_current_task->td_icvs.proc_bind)) { 1956 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; 1957 } 1958 } 1959 1960 // Reset for next parallel region 1961 master_th->th.th_set_proc_bind = proc_bind_default; 1962 #endif /* OMP_40_ENABLED */ 1963 1964 if ((nthreads_icv > 0) 1965 #if OMP_40_ENABLED 1966 || (proc_bind_icv != proc_bind_default) 1967 #endif /* OMP_40_ENABLED */ 1968 ) { 1969 kmp_internal_control_t new_icvs; 1970 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 1971 new_icvs.next = NULL; 1972 if (nthreads_icv > 0) { 1973 new_icvs.nproc = nthreads_icv; 1974 } 1975 1976 #if OMP_40_ENABLED 1977 if (proc_bind_icv != proc_bind_default) { 1978 new_icvs.proc_bind = proc_bind_icv; 1979 } 1980 #endif /* OMP_40_ENABLED */ 1981 1982 /* allocate a new parallel team */ 1983 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 1984 team = __kmp_allocate_team(root, nthreads, nthreads, 1985 #if OMPT_SUPPORT 1986 ompt_parallel_id, 1987 #endif 1988 #if OMP_40_ENABLED 1989 proc_bind, 1990 #endif 1991 &new_icvs, argc USE_NESTED_HOT_ARG(master_th)); 1992 } else { 1993 /* allocate a new parallel team */ 1994 KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); 1995 team = __kmp_allocate_team(root, nthreads, nthreads, 1996 #if OMPT_SUPPORT 1997 ompt_parallel_id, 1998 #endif 1999 #if OMP_40_ENABLED 2000 proc_bind, 2001 #endif 2002 &master_th->th.th_current_task->td_icvs, 2003 argc USE_NESTED_HOT_ARG(master_th)); 2004 } 2005 KF_TRACE( 2006 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); 2007 2008 /* setup the new team */ 2009 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 2010 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 2011 KMP_CHECK_UPDATE(team->t.t_ident, loc); 2012 KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 2013 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 2014 #if OMPT_SUPPORT 2015 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); 2016 #endif 2017 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe 2018 // TODO: parent_team->t.t_level == INT_MAX ??? 2019 #if OMP_40_ENABLED 2020 if (!master_th->th.th_teams_microtask || level > teams_level) { 2021 #endif /* OMP_40_ENABLED */ 2022 int new_level = parent_team->t.t_level + 1; 2023 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2024 new_level = parent_team->t.t_active_level + 1; 2025 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2026 #if OMP_40_ENABLED 2027 } else { 2028 // AC: Do not increase parallel level at start of the teams construct 2029 int new_level = parent_team->t.t_level; 2030 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2031 new_level = parent_team->t.t_active_level; 2032 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2033 } 2034 #endif /* OMP_40_ENABLED */ 2035 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2036 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || 2037 team->t.t_sched.chunk != new_sched.chunk) 2038 team->t.t_sched = 2039 new_sched; // set master's schedule as new run-time schedule 2040 2041 #if OMP_40_ENABLED 2042 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 2043 #endif 2044 2045 // Update the floating point rounding in the team if required. 2046 propagateFPControl(team); 2047 2048 if (__kmp_tasking_mode != tskm_immediate_exec) { 2049 // Set master's task team to team's task team. Unless this is hot team, it 2050 // should be NULL. 2051 #if 0 2052 // Patch out an assertion that trips while the runtime seems to operate 2053 // correctly. Avoiding the preconditions that cause the assertion to trip 2054 // has been promised as a forthcoming patch. 2055 KMP_DEBUG_ASSERT(master_th->th.th_task_team == 2056 parent_team->t.t_task_team[master_th->th.th_task_state]); 2057 #endif 2058 KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team " 2059 "%p, new task_team %p / team %p\n", 2060 __kmp_gtid_from_thread(master_th), 2061 master_th->th.th_task_team, parent_team, 2062 team->t.t_task_team[master_th->th.th_task_state], team)); 2063 2064 if (active_level || master_th->th.th_task_team) { 2065 // Take a memo of master's task_state 2066 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2067 if (master_th->th.th_task_state_top >= 2068 master_th->th.th_task_state_stack_sz) { // increase size 2069 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; 2070 kmp_uint8 *old_stack, *new_stack; 2071 kmp_uint32 i; 2072 new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 2073 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { 2074 new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 2075 } 2076 for (i = master_th->th.th_task_state_stack_sz; i < new_size; 2077 ++i) { // zero-init rest of stack 2078 new_stack[i] = 0; 2079 } 2080 old_stack = master_th->th.th_task_state_memo_stack; 2081 master_th->th.th_task_state_memo_stack = new_stack; 2082 master_th->th.th_task_state_stack_sz = new_size; 2083 __kmp_free(old_stack); 2084 } 2085 // Store master's task_state on stack 2086 master_th->th 2087 .th_task_state_memo_stack[master_th->th.th_task_state_top] = 2088 master_th->th.th_task_state; 2089 master_th->th.th_task_state_top++; 2090 #if KMP_NESTED_HOT_TEAMS 2091 if (team == 2092 master_th->th.th_hot_teams[active_level] 2093 .hot_team) { // Restore master's nested state if nested hot team 2094 master_th->th.th_task_state = 2095 master_th->th 2096 .th_task_state_memo_stack[master_th->th.th_task_state_top]; 2097 } else { 2098 #endif 2099 master_th->th.th_task_state = 0; 2100 #if KMP_NESTED_HOT_TEAMS 2101 } 2102 #endif 2103 } 2104 #if !KMP_NESTED_HOT_TEAMS 2105 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || 2106 (team == root->r.r_hot_team)); 2107 #endif 2108 } 2109 2110 KA_TRACE( 2111 20, 2112 ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 2113 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, 2114 team->t.t_nproc)); 2115 KMP_DEBUG_ASSERT(team != root->r.r_hot_team || 2116 (team->t.t_master_tid == 0 && 2117 (team->t.t_parent == root->r.r_root_team || 2118 team->t.t_parent->t.t_serialized))); 2119 KMP_MB(); 2120 2121 /* now, setup the arguments */ 2122 argv = (void **)team->t.t_argv; 2123 #if OMP_40_ENABLED 2124 if (ap) { 2125 #endif /* OMP_40_ENABLED */ 2126 for (i = argc - 1; i >= 0; --i) { 2127 // TODO: revert workaround for Intel(R) 64 tracker #96 2128 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2129 void *new_argv = va_arg(*ap, void *); 2130 #else 2131 void *new_argv = va_arg(ap, void *); 2132 #endif 2133 KMP_CHECK_UPDATE(*argv, new_argv); 2134 argv++; 2135 } 2136 #if OMP_40_ENABLED 2137 } else { 2138 for (i = 0; i < argc; ++i) { 2139 // Get args from parent team for teams construct 2140 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 2141 } 2142 } 2143 #endif /* OMP_40_ENABLED */ 2144 2145 /* now actually fork the threads */ 2146 KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 2147 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 2148 root->r.r_active = TRUE; 2149 2150 __kmp_fork_team_threads(root, team, master_th, gtid); 2151 __kmp_setup_icv_copy(team, nthreads, 2152 &master_th->th.th_current_task->td_icvs, loc); 2153 2154 #if OMPT_SUPPORT 2155 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 2156 #endif 2157 2158 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 2159 2160 #if USE_ITT_BUILD 2161 if (team->t.t_active_level == 1 // only report frames at level 1 2162 #if OMP_40_ENABLED 2163 && !master_th->th.th_teams_microtask // not in teams construct 2164 #endif /* OMP_40_ENABLED */ 2165 ) { 2166 #if USE_ITT_NOTIFY 2167 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 2168 (__kmp_forkjoin_frames_mode == 3 || 2169 __kmp_forkjoin_frames_mode == 1)) { 2170 kmp_uint64 tmp_time = 0; 2171 if (__itt_get_timestamp_ptr) 2172 tmp_time = __itt_get_timestamp(); 2173 // Internal fork - report frame begin 2174 master_th->th.th_frame_time = tmp_time; 2175 if (__kmp_forkjoin_frames_mode == 3) 2176 team->t.t_region_time = tmp_time; 2177 } else // only one notification scheme (either "submit" or 2178 // "forking/joined", not both) 2179 #endif /* USE_ITT_NOTIFY */ 2180 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && 2181 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { 2182 // Mark start of "parallel" region for VTune. 2183 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 2184 } 2185 } 2186 #endif /* USE_ITT_BUILD */ 2187 2188 /* now go on and do the work */ 2189 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); 2190 KMP_MB(); 2191 KF_TRACE(10, 2192 ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 2193 root, team, master_th, gtid)); 2194 2195 #if USE_ITT_BUILD 2196 if (__itt_stack_caller_create_ptr) { 2197 team->t.t_stack_id = 2198 __kmp_itt_stack_caller_create(); // create new stack stitching id 2199 // before entering fork barrier 2200 } 2201 #endif /* USE_ITT_BUILD */ 2202 2203 #if OMP_40_ENABLED 2204 if (ap) // AC: skip __kmp_internal_fork at teams construct, let only master 2205 // threads execute 2206 #endif /* OMP_40_ENABLED */ 2207 { 2208 __kmp_internal_fork(loc, gtid, team); 2209 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " 2210 "master_th=%p, gtid=%d\n", 2211 root, team, master_th, gtid)); 2212 } 2213 2214 if (call_context == fork_context_gnu) { 2215 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 2216 return TRUE; 2217 } 2218 2219 /* Invoke microtask for MASTER thread */ 2220 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, 2221 team->t.t_id, team->t.t_pkfn)); 2222 } // END of timer KMP_fork_call block 2223 2224 { 2225 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 2226 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 2227 if (!team->t.t_invoke(gtid)) { 2228 KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); 2229 } 2230 } 2231 KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, 2232 team->t.t_id, team->t.t_pkfn)); 2233 KMP_MB(); /* Flush all pending memory write invalidates. */ 2234 2235 KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); 2236 2237 #if OMPT_SUPPORT 2238 if (ompt_enabled) { 2239 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2240 } 2241 #endif 2242 2243 return TRUE; 2244 } 2245 2246 #if OMPT_SUPPORT 2247 static inline void __kmp_join_restore_state(kmp_info_t *thread, 2248 kmp_team_t *team) { 2249 // restore state outside the region 2250 thread->th.ompt_thread_info.state = 2251 ((team->t.t_serialized) ? ompt_state_work_serial 2252 : ompt_state_work_parallel); 2253 } 2254 2255 static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team, 2256 ompt_parallel_id_t parallel_id, 2257 fork_context_e fork_context) { 2258 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2259 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 2260 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 2261 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); 2262 } 2263 2264 task_info->frame.reenter_runtime_frame = NULL; 2265 __kmp_join_restore_state(thread, team); 2266 } 2267 #endif 2268 2269 void __kmp_join_call(ident_t *loc, int gtid 2270 #if OMPT_SUPPORT 2271 , 2272 enum fork_context_e fork_context 2273 #endif 2274 #if OMP_40_ENABLED 2275 , 2276 int exit_teams 2277 #endif /* OMP_40_ENABLED */ 2278 ) { 2279 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 2280 kmp_team_t *team; 2281 kmp_team_t *parent_team; 2282 kmp_info_t *master_th; 2283 kmp_root_t *root; 2284 int master_active; 2285 int i; 2286 2287 KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); 2288 2289 /* setup current data */ 2290 master_th = __kmp_threads[gtid]; 2291 root = master_th->th.th_root; 2292 team = master_th->th.th_team; 2293 parent_team = team->t.t_parent; 2294 2295 master_th->th.th_ident = loc; 2296 2297 #if OMPT_SUPPORT 2298 if (ompt_enabled) { 2299 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2300 } 2301 #endif 2302 2303 #if KMP_DEBUG 2304 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { 2305 KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2306 "th_task_team = %p\n", 2307 __kmp_gtid_from_thread(master_th), team, 2308 team->t.t_task_team[master_th->th.th_task_state], 2309 master_th->th.th_task_team)); 2310 KMP_DEBUG_ASSERT(master_th->th.th_task_team == 2311 team->t.t_task_team[master_th->th.th_task_state]); 2312 } 2313 #endif 2314 2315 if (team->t.t_serialized) { 2316 #if OMP_40_ENABLED 2317 if (master_th->th.th_teams_microtask) { 2318 // We are in teams construct 2319 int level = team->t.t_level; 2320 int tlevel = master_th->th.th_teams_level; 2321 if (level == tlevel) { 2322 // AC: we haven't incremented it earlier at start of teams construct, 2323 // so do it here - at the end of teams construct 2324 team->t.t_level++; 2325 } else if (level == tlevel + 1) { 2326 // AC: we are exiting parallel inside teams, need to increment 2327 // serialization in order to restore it in the next call to 2328 // __kmpc_end_serialized_parallel 2329 team->t.t_serialized++; 2330 } 2331 } 2332 #endif /* OMP_40_ENABLED */ 2333 __kmpc_end_serialized_parallel(loc, gtid); 2334 2335 #if OMPT_SUPPORT 2336 if (ompt_enabled) { 2337 __kmp_join_restore_state(master_th, parent_team); 2338 } 2339 #endif 2340 2341 return; 2342 } 2343 2344 master_active = team->t.t_master_active; 2345 2346 #if OMP_40_ENABLED 2347 if (!exit_teams) 2348 #endif /* OMP_40_ENABLED */ 2349 { 2350 // AC: No barrier for internal teams at exit from teams construct. 2351 // But there is barrier for external team (league). 2352 __kmp_internal_join(loc, gtid, team); 2353 } 2354 #if OMP_40_ENABLED 2355 else { 2356 master_th->th.th_task_state = 2357 0; // AC: no tasking in teams (out of any parallel) 2358 } 2359 #endif /* OMP_40_ENABLED */ 2360 2361 KMP_MB(); 2362 2363 #if OMPT_SUPPORT 2364 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; 2365 #endif 2366 2367 #if USE_ITT_BUILD 2368 if (__itt_stack_caller_create_ptr) { 2369 __kmp_itt_stack_caller_destroy( 2370 (__itt_caller)team->t 2371 .t_stack_id); // destroy the stack stitching id after join barrier 2372 } 2373 2374 // Mark end of "parallel" region for VTune. 2375 if (team->t.t_active_level == 1 2376 #if OMP_40_ENABLED 2377 && !master_th->th.th_teams_microtask /* not in teams construct */ 2378 #endif /* OMP_40_ENABLED */ 2379 ) { 2380 master_th->th.th_ident = loc; 2381 // only one notification scheme (either "submit" or "forking/joined", not 2382 // both) 2383 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && 2384 __kmp_forkjoin_frames_mode == 3) 2385 __kmp_itt_frame_submit(gtid, team->t.t_region_time, 2386 master_th->th.th_frame_time, 0, loc, 2387 master_th->th.th_team_nproc, 1); 2388 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && 2389 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) 2390 __kmp_itt_region_joined(gtid); 2391 } // active_level == 1 2392 #endif /* USE_ITT_BUILD */ 2393 2394 #if OMP_40_ENABLED 2395 if (master_th->th.th_teams_microtask && !exit_teams && 2396 team->t.t_pkfn != (microtask_t)__kmp_teams_master && 2397 team->t.t_level == master_th->th.th_teams_level + 1) { 2398 // AC: We need to leave the team structure intact at the end of parallel 2399 // inside the teams construct, so that at the next parallel same (hot) team 2400 // works, only adjust nesting levels 2401 2402 /* Decrement our nested depth level */ 2403 team->t.t_level--; 2404 team->t.t_active_level--; 2405 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel); 2406 2407 /* Restore number of threads in the team if needed */ 2408 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { 2409 int old_num = master_th->th.th_team_nproc; 2410 int new_num = master_th->th.th_teams_size.nth; 2411 kmp_info_t **other_threads = team->t.t_threads; 2412 team->t.t_nproc = new_num; 2413 for (i = 0; i < old_num; ++i) { 2414 other_threads[i]->th.th_team_nproc = new_num; 2415 } 2416 // Adjust states of non-used threads of the team 2417 for (i = old_num; i < new_num; ++i) { 2418 // Re-initialize thread's barrier data. 2419 int b; 2420 kmp_balign_t *balign = other_threads[i]->th.th_bar; 2421 for (b = 0; b < bs_last_barrier; ++b) { 2422 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 2423 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 2424 #if USE_DEBUGGER 2425 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 2426 #endif 2427 } 2428 if (__kmp_tasking_mode != tskm_immediate_exec) { 2429 // Synchronize thread's task state 2430 other_threads[i]->th.th_task_state = master_th->th.th_task_state; 2431 } 2432 } 2433 } 2434 2435 #if OMPT_SUPPORT 2436 if (ompt_enabled) { 2437 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2438 } 2439 #endif 2440 2441 return; 2442 } 2443 #endif /* OMP_40_ENABLED */ 2444 2445 /* do cleanup and restore the parent team */ 2446 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; 2447 master_th->th.th_local.this_construct = team->t.t_master_this_cons; 2448 2449 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; 2450 2451 /* jc: The following lock has instructions with REL and ACQ semantics, 2452 separating the parallel user code called in this parallel region 2453 from the serial user code called after this function returns. */ 2454 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 2455 2456 #if OMP_40_ENABLED 2457 if (!master_th->th.th_teams_microtask || 2458 team->t.t_level > master_th->th.th_teams_level) 2459 #endif /* OMP_40_ENABLED */ 2460 { 2461 /* Decrement our nested depth level */ 2462 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel); 2463 } 2464 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); 2465 2466 #if OMPT_SUPPORT && OMPT_TRACE 2467 if (ompt_enabled) { 2468 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2469 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 2470 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 2471 parallel_id, task_info->task_id); 2472 } 2473 task_info->frame.exit_runtime_frame = NULL; 2474 task_info->task_id = 0; 2475 } 2476 #endif 2477 2478 KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, 2479 master_th, team)); 2480 __kmp_pop_current_task_from_thread(master_th); 2481 2482 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2483 // Restore master thread's partition. 2484 master_th->th.th_first_place = team->t.t_first_place; 2485 master_th->th.th_last_place = team->t.t_last_place; 2486 #endif /* OMP_40_ENABLED */ 2487 2488 updateHWFPControl(team); 2489 2490 if (root->r.r_active != master_active) 2491 root->r.r_active = master_active; 2492 2493 __kmp_free_team(root, team USE_NESTED_HOT_ARG( 2494 master_th)); // this will free worker threads 2495 2496 /* this race was fun to find. make sure the following is in the critical 2497 region otherwise assertions may fail occasionally since the old team may be 2498 reallocated and the hierarchy appears inconsistent. it is actually safe to 2499 run and won't cause any bugs, but will cause those assertion failures. it's 2500 only one deref&assign so might as well put this in the critical region */ 2501 master_th->th.th_team = parent_team; 2502 master_th->th.th_team_nproc = parent_team->t.t_nproc; 2503 master_th->th.th_team_master = parent_team->t.t_threads[0]; 2504 master_th->th.th_team_serialized = parent_team->t.t_serialized; 2505 2506 /* restore serialized team, if need be */ 2507 if (parent_team->t.t_serialized && 2508 parent_team != master_th->th.th_serial_team && 2509 parent_team != root->r.r_root_team) { 2510 __kmp_free_team(root, 2511 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); 2512 master_th->th.th_serial_team = parent_team; 2513 } 2514 2515 if (__kmp_tasking_mode != tskm_immediate_exec) { 2516 if (master_th->th.th_task_state_top > 2517 0) { // Restore task state from memo stack 2518 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2519 // Remember master's state if we re-use this nested hot team 2520 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = 2521 master_th->th.th_task_state; 2522 --master_th->th.th_task_state_top; // pop 2523 // Now restore state at this level 2524 master_th->th.th_task_state = 2525 master_th->th 2526 .th_task_state_memo_stack[master_th->th.th_task_state_top]; 2527 } 2528 // Copy the task team from the parent team to the master thread 2529 master_th->th.th_task_team = 2530 parent_team->t.t_task_team[master_th->th.th_task_state]; 2531 KA_TRACE(20, 2532 ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", 2533 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, 2534 parent_team)); 2535 } 2536 2537 // TODO: GEH - cannot do this assertion because root thread not set up as 2538 // executing 2539 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 2540 master_th->th.th_current_task->td_flags.executing = 1; 2541 2542 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 2543 2544 #if OMPT_SUPPORT 2545 if (ompt_enabled) { 2546 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2547 } 2548 #endif 2549 2550 KMP_MB(); 2551 KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); 2552 } 2553 2554 /* Check whether we should push an internal control record onto the 2555 serial team stack. If so, do it. */ 2556 void __kmp_save_internal_controls(kmp_info_t *thread) { 2557 2558 if (thread->th.th_team != thread->th.th_serial_team) { 2559 return; 2560 } 2561 if (thread->th.th_team->t.t_serialized > 1) { 2562 int push = 0; 2563 2564 if (thread->th.th_team->t.t_control_stack_top == NULL) { 2565 push = 1; 2566 } else { 2567 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 2568 thread->th.th_team->t.t_serialized) { 2569 push = 1; 2570 } 2571 } 2572 if (push) { /* push a record on the serial team's stack */ 2573 kmp_internal_control_t *control = 2574 (kmp_internal_control_t *)__kmp_allocate( 2575 sizeof(kmp_internal_control_t)); 2576 2577 copy_icvs(control, &thread->th.th_current_task->td_icvs); 2578 2579 control->serial_nesting_level = thread->th.th_team->t.t_serialized; 2580 2581 control->next = thread->th.th_team->t.t_control_stack_top; 2582 thread->th.th_team->t.t_control_stack_top = control; 2583 } 2584 } 2585 } 2586 2587 /* Changes set_nproc */ 2588 void __kmp_set_num_threads(int new_nth, int gtid) { 2589 kmp_info_t *thread; 2590 kmp_root_t *root; 2591 2592 KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); 2593 KMP_DEBUG_ASSERT(__kmp_init_serial); 2594 2595 if (new_nth < 1) 2596 new_nth = 1; 2597 else if (new_nth > __kmp_max_nth) 2598 new_nth = __kmp_max_nth; 2599 2600 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 2601 thread = __kmp_threads[gtid]; 2602 2603 __kmp_save_internal_controls(thread); 2604 2605 set__nproc(thread, new_nth); 2606 2607 // If this omp_set_num_threads() call will cause the hot team size to be 2608 // reduced (in the absence of a num_threads clause), then reduce it now, 2609 // rather than waiting for the next parallel region. 2610 root = thread->th.th_root; 2611 if (__kmp_init_parallel && (!root->r.r_active) && 2612 (root->r.r_hot_team->t.t_nproc > new_nth) 2613 #if KMP_NESTED_HOT_TEAMS 2614 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 2615 #endif 2616 ) { 2617 kmp_team_t *hot_team = root->r.r_hot_team; 2618 int f; 2619 2620 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 2621 2622 // Release the extra threads we don't need any more. 2623 for (f = new_nth; f < hot_team->t.t_nproc; f++) { 2624 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 2625 if (__kmp_tasking_mode != tskm_immediate_exec) { 2626 // When decreasing team size, threads no longer in the team should unref 2627 // task team. 2628 hot_team->t.t_threads[f]->th.th_task_team = NULL; 2629 } 2630 __kmp_free_thread(hot_team->t.t_threads[f]); 2631 hot_team->t.t_threads[f] = NULL; 2632 } 2633 hot_team->t.t_nproc = new_nth; 2634 #if KMP_NESTED_HOT_TEAMS 2635 if (thread->th.th_hot_teams) { 2636 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); 2637 thread->th.th_hot_teams[0].hot_team_nth = new_nth; 2638 } 2639 #endif 2640 2641 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 2642 2643 // Update the t_nproc field in the threads that are still active. 2644 for (f = 0; f < new_nth; f++) { 2645 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); 2646 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 2647 } 2648 // Special flag in case omp_set_num_threads() call 2649 hot_team->t.t_size_changed = -1; 2650 } 2651 } 2652 2653 /* Changes max_active_levels */ 2654 void __kmp_set_max_active_levels(int gtid, int max_active_levels) { 2655 kmp_info_t *thread; 2656 2657 KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " 2658 "%d = (%d)\n", 2659 gtid, max_active_levels)); 2660 KMP_DEBUG_ASSERT(__kmp_init_serial); 2661 2662 // validate max_active_levels 2663 if (max_active_levels < 0) { 2664 KMP_WARNING(ActiveLevelsNegative, max_active_levels); 2665 // We ignore this call if the user has specified a negative value. 2666 // The current setting won't be changed. The last valid setting will be 2667 // used. A warning will be issued (if warnings are allowed as controlled by 2668 // the KMP_WARNINGS env var). 2669 KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " 2670 "max_active_levels for thread %d = (%d)\n", 2671 gtid, max_active_levels)); 2672 return; 2673 } 2674 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { 2675 // it's OK, the max_active_levels is within the valid range: [ 0; 2676 // KMP_MAX_ACTIVE_LEVELS_LIMIT ] 2677 // We allow a zero value. (implementation defined behavior) 2678 } else { 2679 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, 2680 KMP_MAX_ACTIVE_LEVELS_LIMIT); 2681 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 2682 // Current upper limit is MAX_INT. (implementation defined behavior) 2683 // If the input exceeds the upper limit, we correct the input to be the 2684 // upper limit. (implementation defined behavior) 2685 // Actually, the flow should never get here until we use MAX_INT limit. 2686 } 2687 KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " 2688 "max_active_levels for thread %d = (%d)\n", 2689 gtid, max_active_levels)); 2690 2691 thread = __kmp_threads[gtid]; 2692 2693 __kmp_save_internal_controls(thread); 2694 2695 set__max_active_levels(thread, max_active_levels); 2696 } 2697 2698 /* Gets max_active_levels */ 2699 int __kmp_get_max_active_levels(int gtid) { 2700 kmp_info_t *thread; 2701 2702 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); 2703 KMP_DEBUG_ASSERT(__kmp_init_serial); 2704 2705 thread = __kmp_threads[gtid]; 2706 KMP_DEBUG_ASSERT(thread->th.th_current_task); 2707 KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " 2708 "curtask_maxaclevel=%d\n", 2709 gtid, thread->th.th_current_task, 2710 thread->th.th_current_task->td_icvs.max_active_levels)); 2711 return thread->th.th_current_task->td_icvs.max_active_levels; 2712 } 2713 2714 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 2715 void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { 2716 kmp_info_t *thread; 2717 // kmp_team_t *team; 2718 2719 KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", 2720 gtid, (int)kind, chunk)); 2721 KMP_DEBUG_ASSERT(__kmp_init_serial); 2722 2723 // Check if the kind parameter is valid, correct if needed. 2724 // Valid parameters should fit in one of two intervals - standard or extended: 2725 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 2726 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 2727 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || 2728 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { 2729 // TODO: Hint needs attention in case we change the default schedule. 2730 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), 2731 KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), 2732 __kmp_msg_null); 2733 kind = kmp_sched_default; 2734 chunk = 0; // ignore chunk value in case of bad kind 2735 } 2736 2737 thread = __kmp_threads[gtid]; 2738 2739 __kmp_save_internal_controls(thread); 2740 2741 if (kind < kmp_sched_upper_std) { 2742 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { 2743 // differ static chunked vs. unchunked: chunk should be invalid to 2744 // indicate unchunked schedule (which is the default) 2745 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 2746 } else { 2747 thread->th.th_current_task->td_icvs.sched.r_sched_type = 2748 __kmp_sch_map[kind - kmp_sched_lower - 1]; 2749 } 2750 } else { 2751 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - 2752 // kmp_sched_lower - 2 ]; 2753 thread->th.th_current_task->td_icvs.sched.r_sched_type = 2754 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - 2755 kmp_sched_lower - 2]; 2756 } 2757 if (kind == kmp_sched_auto) { 2758 // ignore parameter chunk for schedule auto 2759 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 2760 } else { 2761 thread->th.th_current_task->td_icvs.sched.chunk = chunk; 2762 } 2763 } 2764 2765 /* Gets def_sched_var ICV values */ 2766 void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { 2767 kmp_info_t *thread; 2768 enum sched_type th_type; 2769 2770 KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); 2771 KMP_DEBUG_ASSERT(__kmp_init_serial); 2772 2773 thread = __kmp_threads[gtid]; 2774 2775 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 2776 2777 switch (th_type) { 2778 case kmp_sch_static: 2779 case kmp_sch_static_greedy: 2780 case kmp_sch_static_balanced: 2781 *kind = kmp_sched_static; 2782 *chunk = 0; // chunk was not set, try to show this fact via zero value 2783 return; 2784 case kmp_sch_static_chunked: 2785 *kind = kmp_sched_static; 2786 break; 2787 case kmp_sch_dynamic_chunked: 2788 *kind = kmp_sched_dynamic; 2789 break; 2790 case kmp_sch_guided_chunked: 2791 case kmp_sch_guided_iterative_chunked: 2792 case kmp_sch_guided_analytical_chunked: 2793 *kind = kmp_sched_guided; 2794 break; 2795 case kmp_sch_auto: 2796 *kind = kmp_sched_auto; 2797 break; 2798 case kmp_sch_trapezoidal: 2799 *kind = kmp_sched_trapezoidal; 2800 break; 2801 #if KMP_STATIC_STEAL_ENABLED 2802 case kmp_sch_static_steal: 2803 *kind = kmp_sched_static_steal; 2804 break; 2805 #endif 2806 default: 2807 KMP_FATAL(UnknownSchedulingType, th_type); 2808 } 2809 2810 *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 2811 } 2812 2813 int __kmp_get_ancestor_thread_num(int gtid, int level) { 2814 2815 int ii, dd; 2816 kmp_team_t *team; 2817 kmp_info_t *thr; 2818 2819 KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); 2820 KMP_DEBUG_ASSERT(__kmp_init_serial); 2821 2822 // validate level 2823 if (level == 0) 2824 return 0; 2825 if (level < 0) 2826 return -1; 2827 thr = __kmp_threads[gtid]; 2828 team = thr->th.th_team; 2829 ii = team->t.t_level; 2830 if (level > ii) 2831 return -1; 2832 2833 #if OMP_40_ENABLED 2834 if (thr->th.th_teams_microtask) { 2835 // AC: we are in teams region where multiple nested teams have same level 2836 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2837 if (level <= 2838 tlevel) { // otherwise usual algorithm works (will not touch the teams) 2839 KMP_DEBUG_ASSERT(ii >= tlevel); 2840 // AC: As we need to pass by the teams league, we need to artificially 2841 // increase ii 2842 if (ii == tlevel) { 2843 ii += 2; // three teams have same level 2844 } else { 2845 ii++; // two teams have same level 2846 } 2847 } 2848 } 2849 #endif 2850 2851 if (ii == level) 2852 return __kmp_tid_from_gtid(gtid); 2853 2854 dd = team->t.t_serialized; 2855 level++; 2856 while (ii > level) { 2857 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 2858 } 2859 if ((team->t.t_serialized) && (!dd)) { 2860 team = team->t.t_parent; 2861 continue; 2862 } 2863 if (ii > level) { 2864 team = team->t.t_parent; 2865 dd = team->t.t_serialized; 2866 ii--; 2867 } 2868 } 2869 2870 return (dd > 1) ? (0) : (team->t.t_master_tid); 2871 } 2872 2873 int __kmp_get_team_size(int gtid, int level) { 2874 2875 int ii, dd; 2876 kmp_team_t *team; 2877 kmp_info_t *thr; 2878 2879 KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); 2880 KMP_DEBUG_ASSERT(__kmp_init_serial); 2881 2882 // validate level 2883 if (level == 0) 2884 return 1; 2885 if (level < 0) 2886 return -1; 2887 thr = __kmp_threads[gtid]; 2888 team = thr->th.th_team; 2889 ii = team->t.t_level; 2890 if (level > ii) 2891 return -1; 2892 2893 #if OMP_40_ENABLED 2894 if (thr->th.th_teams_microtask) { 2895 // AC: we are in teams region where multiple nested teams have same level 2896 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2897 if (level <= 2898 tlevel) { // otherwise usual algorithm works (will not touch the teams) 2899 KMP_DEBUG_ASSERT(ii >= tlevel); 2900 // AC: As we need to pass by the teams league, we need to artificially 2901 // increase ii 2902 if (ii == tlevel) { 2903 ii += 2; // three teams have same level 2904 } else { 2905 ii++; // two teams have same level 2906 } 2907 } 2908 } 2909 #endif 2910 2911 while (ii > level) { 2912 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { 2913 } 2914 if (team->t.t_serialized && (!dd)) { 2915 team = team->t.t_parent; 2916 continue; 2917 } 2918 if (ii > level) { 2919 team = team->t.t_parent; 2920 ii--; 2921 } 2922 } 2923 2924 return team->t.t_nproc; 2925 } 2926 2927 kmp_r_sched_t __kmp_get_schedule_global() { 2928 // This routine created because pairs (__kmp_sched, __kmp_chunk) and 2929 // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults 2930 // independently. So one can get the updated schedule here. 2931 2932 kmp_r_sched_t r_sched; 2933 2934 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, 2935 // __kmp_guided. __kmp_sched should keep original value, so that user can set 2936 // KMP_SCHEDULE multiple times, and thus have different run-time schedules in 2937 // different roots (even in OMP 2.5) 2938 if (__kmp_sched == kmp_sch_static) { 2939 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed 2940 // schedule (balanced or greedy) 2941 } else if (__kmp_sched == kmp_sch_guided_chunked) { 2942 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed 2943 // schedule (iterative or analytical) 2944 } else { 2945 r_sched.r_sched_type = 2946 __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 2947 } 2948 2949 if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it 2950 // was not ever set) 2951 r_sched.chunk = KMP_DEFAULT_CHUNK; 2952 } else { 2953 r_sched.chunk = __kmp_chunk; 2954 } 2955 2956 return r_sched; 2957 } 2958 2959 /* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 2960 at least argc number of *t_argv entries for the requested team. */ 2961 static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { 2962 2963 KMP_DEBUG_ASSERT(team); 2964 if (!realloc || argc > team->t.t_max_argc) { 2965 2966 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " 2967 "current entries=%d\n", 2968 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); 2969 /* if previously allocated heap space for args, free them */ 2970 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) 2971 __kmp_free((void *)team->t.t_argv); 2972 2973 if (argc <= KMP_INLINE_ARGV_ENTRIES) { 2974 /* use unused space in the cache line for arguments */ 2975 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 2976 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " 2977 "argv entries\n", 2978 team->t.t_id, team->t.t_max_argc)); 2979 team->t.t_argv = &team->t.t_inline_argv[0]; 2980 if (__kmp_storage_map) { 2981 __kmp_print_storage_map_gtid( 2982 -1, &team->t.t_inline_argv[0], 2983 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 2984 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", 2985 team->t.t_id); 2986 } 2987 } else { 2988 /* allocate space for arguments in the heap */ 2989 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) 2990 ? KMP_MIN_MALLOC_ARGV_ENTRIES 2991 : 2 * argc; 2992 KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 2993 "argv entries\n", 2994 team->t.t_id, team->t.t_max_argc)); 2995 team->t.t_argv = 2996 (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); 2997 if (__kmp_storage_map) { 2998 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], 2999 &team->t.t_argv[team->t.t_max_argc], 3000 sizeof(void *) * team->t.t_max_argc, 3001 "team_%d.t_argv", team->t.t_id); 3002 } 3003 } 3004 } 3005 } 3006 3007 static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { 3008 int i; 3009 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 3010 team->t.t_threads = 3011 (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); 3012 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( 3013 sizeof(dispatch_shared_info_t) * num_disp_buff); 3014 team->t.t_dispatch = 3015 (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); 3016 team->t.t_implicit_task_taskdata = 3017 (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); 3018 team->t.t_max_nproc = max_nth; 3019 3020 /* setup dispatch buffers */ 3021 for (i = 0; i < num_disp_buff; ++i) { 3022 team->t.t_disp_buffer[i].buffer_index = i; 3023 #if OMP_45_ENABLED 3024 team->t.t_disp_buffer[i].doacross_buf_idx = i; 3025 #endif 3026 } 3027 } 3028 3029 static void __kmp_free_team_arrays(kmp_team_t *team) { 3030 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 3031 int i; 3032 for (i = 0; i < team->t.t_max_nproc; ++i) { 3033 if (team->t.t_dispatch[i].th_disp_buffer != NULL) { 3034 __kmp_free(team->t.t_dispatch[i].th_disp_buffer); 3035 team->t.t_dispatch[i].th_disp_buffer = NULL; 3036 }; // if 3037 }; // for 3038 __kmp_free(team->t.t_threads); 3039 __kmp_free(team->t.t_disp_buffer); 3040 __kmp_free(team->t.t_dispatch); 3041 __kmp_free(team->t.t_implicit_task_taskdata); 3042 team->t.t_threads = NULL; 3043 team->t.t_disp_buffer = NULL; 3044 team->t.t_dispatch = NULL; 3045 team->t.t_implicit_task_taskdata = 0; 3046 } 3047 3048 static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 3049 kmp_info_t **oldThreads = team->t.t_threads; 3050 3051 __kmp_free(team->t.t_disp_buffer); 3052 __kmp_free(team->t.t_dispatch); 3053 __kmp_free(team->t.t_implicit_task_taskdata); 3054 __kmp_allocate_team_arrays(team, max_nth); 3055 3056 KMP_MEMCPY(team->t.t_threads, oldThreads, 3057 team->t.t_nproc * sizeof(kmp_info_t *)); 3058 3059 __kmp_free(oldThreads); 3060 } 3061 3062 static kmp_internal_control_t __kmp_get_global_icvs(void) { 3063 3064 kmp_r_sched_t r_sched = 3065 __kmp_get_schedule_global(); // get current state of scheduling globals 3066 3067 #if OMP_40_ENABLED 3068 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); 3069 #endif /* OMP_40_ENABLED */ 3070 3071 kmp_internal_control_t g_icvs = { 3072 0, // int serial_nesting_level; //corresponds to value of th_team_serialized 3073 (kmp_int8)__kmp_dflt_nested, // int nested; //internal control 3074 // for nested parallelism (per thread) 3075 (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic 3076 // adjustment of threads (per thread) 3077 (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for 3078 // whether blocktime is explicitly set 3079 __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime 3080 #if KMP_USE_MONITOR 3081 __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime 3082 // intervals 3083 #endif 3084 __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for 3085 // next parallel region (per thread) 3086 // (use a max ub on value if __kmp_parallel_initialize not called yet) 3087 __kmp_dflt_max_active_levels, // int max_active_levels; //internal control 3088 // for max_active_levels 3089 r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule 3090 // {sched,chunk} pair 3091 #if OMP_40_ENABLED 3092 __kmp_nested_proc_bind.bind_types[0], 3093 __kmp_default_device, 3094 #endif /* OMP_40_ENABLED */ 3095 NULL // struct kmp_internal_control *next; 3096 }; 3097 3098 return g_icvs; 3099 } 3100 3101 static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { 3102 3103 kmp_internal_control_t gx_icvs; 3104 gx_icvs.serial_nesting_level = 3105 0; // probably =team->t.t_serial like in save_inter_controls 3106 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); 3107 gx_icvs.next = NULL; 3108 3109 return gx_icvs; 3110 } 3111 3112 static void __kmp_initialize_root(kmp_root_t *root) { 3113 int f; 3114 kmp_team_t *root_team; 3115 kmp_team_t *hot_team; 3116 int hot_team_max_nth; 3117 kmp_r_sched_t r_sched = 3118 __kmp_get_schedule_global(); // get current state of scheduling globals 3119 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3120 KMP_DEBUG_ASSERT(root); 3121 KMP_ASSERT(!root->r.r_begin); 3122 3123 /* setup the root state structure */ 3124 __kmp_init_lock(&root->r.r_begin_lock); 3125 root->r.r_begin = FALSE; 3126 root->r.r_active = FALSE; 3127 root->r.r_in_parallel = 0; 3128 root->r.r_blocktime = __kmp_dflt_blocktime; 3129 root->r.r_nested = __kmp_dflt_nested; 3130 3131 /* setup the root team for this task */ 3132 /* allocate the root team structure */ 3133 KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); 3134 3135 root_team = 3136 __kmp_allocate_team(root, 3137 1, // new_nproc 3138 1, // max_nproc 3139 #if OMPT_SUPPORT 3140 0, // root parallel id 3141 #endif 3142 #if OMP_40_ENABLED 3143 __kmp_nested_proc_bind.bind_types[0], 3144 #endif 3145 &r_icvs, 3146 0 // argc 3147 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3148 ); 3149 #if USE_DEBUGGER 3150 // Non-NULL value should be assigned to make the debugger display the root 3151 // team. 3152 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); 3153 #endif 3154 3155 KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); 3156 3157 root->r.r_root_team = root_team; 3158 root_team->t.t_control_stack_top = NULL; 3159 3160 /* initialize root team */ 3161 root_team->t.t_threads[0] = NULL; 3162 root_team->t.t_nproc = 1; 3163 root_team->t.t_serialized = 1; 3164 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3165 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3166 root_team->t.t_sched.chunk = r_sched.chunk; 3167 KA_TRACE( 3168 20, 3169 ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 3170 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 3171 3172 /* setup the hot team for this task */ 3173 /* allocate the hot team structure */ 3174 KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); 3175 3176 hot_team = 3177 __kmp_allocate_team(root, 3178 1, // new_nproc 3179 __kmp_dflt_team_nth_ub * 2, // max_nproc 3180 #if OMPT_SUPPORT 3181 0, // root parallel id 3182 #endif 3183 #if OMP_40_ENABLED 3184 __kmp_nested_proc_bind.bind_types[0], 3185 #endif 3186 &r_icvs, 3187 0 // argc 3188 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3189 ); 3190 KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); 3191 3192 root->r.r_hot_team = hot_team; 3193 root_team->t.t_control_stack_top = NULL; 3194 3195 /* first-time initialization */ 3196 hot_team->t.t_parent = root_team; 3197 3198 /* initialize hot team */ 3199 hot_team_max_nth = hot_team->t.t_max_nproc; 3200 for (f = 0; f < hot_team_max_nth; ++f) { 3201 hot_team->t.t_threads[f] = NULL; 3202 }; // for 3203 hot_team->t.t_nproc = 1; 3204 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3205 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3206 hot_team->t.t_sched.chunk = r_sched.chunk; 3207 hot_team->t.t_size_changed = 0; 3208 } 3209 3210 #ifdef KMP_DEBUG 3211 3212 typedef struct kmp_team_list_item { 3213 kmp_team_p const *entry; 3214 struct kmp_team_list_item *next; 3215 } kmp_team_list_item_t; 3216 typedef kmp_team_list_item_t *kmp_team_list_t; 3217 3218 static void __kmp_print_structure_team_accum( // Add team to list of teams. 3219 kmp_team_list_t list, // List of teams. 3220 kmp_team_p const *team // Team to add. 3221 ) { 3222 3223 // List must terminate with item where both entry and next are NULL. 3224 // Team is added to the list only once. 3225 // List is sorted in ascending order by team id. 3226 // Team id is *not* a key. 3227 3228 kmp_team_list_t l; 3229 3230 KMP_DEBUG_ASSERT(list != NULL); 3231 if (team == NULL) { 3232 return; 3233 }; // if 3234 3235 __kmp_print_structure_team_accum(list, team->t.t_parent); 3236 __kmp_print_structure_team_accum(list, team->t.t_next_pool); 3237 3238 // Search list for the team. 3239 l = list; 3240 while (l->next != NULL && l->entry != team) { 3241 l = l->next; 3242 }; // while 3243 if (l->next != NULL) { 3244 return; // Team has been added before, exit. 3245 }; // if 3246 3247 // Team is not found. Search list again for insertion point. 3248 l = list; 3249 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { 3250 l = l->next; 3251 }; // while 3252 3253 // Insert team. 3254 { 3255 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( 3256 sizeof(kmp_team_list_item_t)); 3257 *item = *l; 3258 l->entry = team; 3259 l->next = item; 3260 } 3261 } 3262 3263 static void __kmp_print_structure_team(char const *title, kmp_team_p const *team 3264 3265 ) { 3266 __kmp_printf("%s", title); 3267 if (team != NULL) { 3268 __kmp_printf("%2x %p\n", team->t.t_id, team); 3269 } else { 3270 __kmp_printf(" - (nil)\n"); 3271 }; // if 3272 } 3273 3274 static void __kmp_print_structure_thread(char const *title, 3275 kmp_info_p const *thread) { 3276 __kmp_printf("%s", title); 3277 if (thread != NULL) { 3278 __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); 3279 } else { 3280 __kmp_printf(" - (nil)\n"); 3281 }; // if 3282 } 3283 3284 void __kmp_print_structure(void) { 3285 3286 kmp_team_list_t list; 3287 3288 // Initialize list of teams. 3289 list = 3290 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); 3291 list->entry = NULL; 3292 list->next = NULL; 3293 3294 __kmp_printf("\n------------------------------\nGlobal Thread " 3295 "Table\n------------------------------\n"); 3296 { 3297 int gtid; 3298 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 3299 __kmp_printf("%2d", gtid); 3300 if (__kmp_threads != NULL) { 3301 __kmp_printf(" %p", __kmp_threads[gtid]); 3302 }; // if 3303 if (__kmp_root != NULL) { 3304 __kmp_printf(" %p", __kmp_root[gtid]); 3305 }; // if 3306 __kmp_printf("\n"); 3307 }; // for gtid 3308 } 3309 3310 // Print out __kmp_threads array. 3311 __kmp_printf("\n------------------------------\nThreads\n--------------------" 3312 "----------\n"); 3313 if (__kmp_threads != NULL) { 3314 int gtid; 3315 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 3316 kmp_info_t const *thread = __kmp_threads[gtid]; 3317 if (thread != NULL) { 3318 __kmp_printf("GTID %2d %p:\n", gtid, thread); 3319 __kmp_printf(" Our Root: %p\n", thread->th.th_root); 3320 __kmp_print_structure_team(" Our Team: ", thread->th.th_team); 3321 __kmp_print_structure_team(" Serial Team: ", 3322 thread->th.th_serial_team); 3323 __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); 3324 __kmp_print_structure_thread(" Master: ", 3325 thread->th.th_team_master); 3326 __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); 3327 __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); 3328 #if OMP_40_ENABLED 3329 __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); 3330 #endif 3331 __kmp_print_structure_thread(" Next in pool: ", 3332 thread->th.th_next_pool); 3333 __kmp_printf("\n"); 3334 __kmp_print_structure_team_accum(list, thread->th.th_team); 3335 __kmp_print_structure_team_accum(list, thread->th.th_serial_team); 3336 }; // if 3337 }; // for gtid 3338 } else { 3339 __kmp_printf("Threads array is not allocated.\n"); 3340 }; // if 3341 3342 // Print out __kmp_root array. 3343 __kmp_printf("\n------------------------------\nUbers\n----------------------" 3344 "--------\n"); 3345 if (__kmp_root != NULL) { 3346 int gtid; 3347 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { 3348 kmp_root_t const *root = __kmp_root[gtid]; 3349 if (root != NULL) { 3350 __kmp_printf("GTID %2d %p:\n", gtid, root); 3351 __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); 3352 __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); 3353 __kmp_print_structure_thread(" Uber Thread: ", 3354 root->r.r_uber_thread); 3355 __kmp_printf(" Active?: %2d\n", root->r.r_active); 3356 __kmp_printf(" Nested?: %2d\n", root->r.r_nested); 3357 __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel); 3358 __kmp_printf("\n"); 3359 __kmp_print_structure_team_accum(list, root->r.r_root_team); 3360 __kmp_print_structure_team_accum(list, root->r.r_hot_team); 3361 }; // if 3362 }; // for gtid 3363 } else { 3364 __kmp_printf("Ubers array is not allocated.\n"); 3365 }; // if 3366 3367 __kmp_printf("\n------------------------------\nTeams\n----------------------" 3368 "--------\n"); 3369 while (list->next != NULL) { 3370 kmp_team_p const *team = list->entry; 3371 int i; 3372 __kmp_printf("Team %2x %p:\n", team->t.t_id, team); 3373 __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); 3374 __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid); 3375 __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); 3376 __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); 3377 __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); 3378 for (i = 0; i < team->t.t_nproc; ++i) { 3379 __kmp_printf(" Thread %2d: ", i); 3380 __kmp_print_structure_thread("", team->t.t_threads[i]); 3381 }; // for i 3382 __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); 3383 __kmp_printf("\n"); 3384 list = list->next; 3385 }; // while 3386 3387 // Print out __kmp_thread_pool and __kmp_team_pool. 3388 __kmp_printf("\n------------------------------\nPools\n----------------------" 3389 "--------\n"); 3390 __kmp_print_structure_thread("Thread pool: ", 3391 (kmp_info_t *)__kmp_thread_pool); 3392 __kmp_print_structure_team("Team pool: ", 3393 (kmp_team_t *)__kmp_team_pool); 3394 __kmp_printf("\n"); 3395 3396 // Free team list. 3397 while (list != NULL) { 3398 kmp_team_list_item_t *item = list; 3399 list = list->next; 3400 KMP_INTERNAL_FREE(item); 3401 }; // while 3402 } 3403 3404 #endif 3405 3406 //--------------------------------------------------------------------------- 3407 // Stuff for per-thread fast random number generator 3408 // Table of primes 3409 static const unsigned __kmp_primes[] = { 3410 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, 3411 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 3412 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, 3413 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 3414 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, 3415 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 3416 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, 3417 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 3418 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, 3419 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 3420 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; 3421 3422 //--------------------------------------------------------------------------- 3423 // __kmp_get_random: Get a random number using a linear congruential method. 3424 unsigned short __kmp_get_random(kmp_info_t *thread) { 3425 unsigned x = thread->th.th_x; 3426 unsigned short r = x >> 16; 3427 3428 thread->th.th_x = x * thread->th.th_a + 1; 3429 3430 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 3431 thread->th.th_info.ds.ds_tid, r)); 3432 3433 return r; 3434 } 3435 //-------------------------------------------------------- 3436 // __kmp_init_random: Initialize a random number generator 3437 void __kmp_init_random(kmp_info_t *thread) { 3438 unsigned seed = thread->th.th_info.ds.ds_tid; 3439 3440 thread->th.th_a = 3441 __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; 3442 thread->th.th_x = (seed + 1) * thread->th.th_a + 1; 3443 KA_TRACE(30, 3444 ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); 3445 } 3446 3447 #if KMP_OS_WINDOWS 3448 /* reclaim array entries for root threads that are already dead, returns number 3449 * reclaimed */ 3450 static int __kmp_reclaim_dead_roots(void) { 3451 int i, r = 0; 3452 3453 for (i = 0; i < __kmp_threads_capacity; ++i) { 3454 if (KMP_UBER_GTID(i) && 3455 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 3456 !__kmp_root[i] 3457 ->r.r_active) { // AC: reclaim only roots died in non-active state 3458 r += __kmp_unregister_root_other_thread(i); 3459 } 3460 } 3461 return r; 3462 } 3463 #endif 3464 3465 /* This function attempts to create free entries in __kmp_threads and 3466 __kmp_root, and returns the number of free entries generated. 3467 3468 For Windows* OS static library, the first mechanism used is to reclaim array 3469 entries for root threads that are already dead. 3470 3471 On all platforms, expansion is attempted on the arrays __kmp_threads_ and 3472 __kmp_root, with appropriate update to __kmp_threads_capacity. Array 3473 capacity is increased by doubling with clipping to __kmp_tp_capacity, if 3474 threadprivate cache array has been created. Synchronization with 3475 __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 3476 3477 After any dead root reclamation, if the clipping value allows array expansion 3478 to result in the generation of a total of nWish free slots, the function does 3479 that expansion. If not, but the clipping value allows array expansion to 3480 result in the generation of a total of nNeed free slots, the function does 3481 that expansion. Otherwise, nothing is done beyond the possible initial root 3482 thread reclamation. However, if nNeed is zero, a best-effort attempt is made 3483 to fulfil nWish as far as possible, i.e. the function will attempt to create 3484 as many free slots as possible up to nWish. 3485 3486 If any argument is negative, the behavior is undefined. */ 3487 static int __kmp_expand_threads(int nWish, int nNeed) { 3488 int added = 0; 3489 int old_tp_cached; 3490 int __kmp_actual_max_nth; 3491 3492 if (nNeed > nWish) /* normalize the arguments */ 3493 nWish = nNeed; 3494 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3495 /* only for Windows static library */ 3496 /* reclaim array entries for root threads that are already dead */ 3497 added = __kmp_reclaim_dead_roots(); 3498 3499 if (nNeed) { 3500 nNeed -= added; 3501 if (nNeed < 0) 3502 nNeed = 0; 3503 } 3504 if (nWish) { 3505 nWish -= added; 3506 if (nWish < 0) 3507 nWish = 0; 3508 } 3509 #endif 3510 if (nWish <= 0) 3511 return added; 3512 3513 while (1) { 3514 int nTarget; 3515 int minimumRequiredCapacity; 3516 int newCapacity; 3517 kmp_info_t **newThreads; 3518 kmp_root_t **newRoot; 3519 3520 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If 3521 // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the 3522 // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become 3523 // > __kmp_max_nth in one of two ways: 3524 // 3525 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 3526 // may not be resused by another thread, so we may need to increase 3527 // __kmp_threads_capacity to __kmp_max_threads + 1. 3528 // 3529 // 2) New foreign root(s) are encountered. We always register new foreign 3530 // roots. This may cause a smaller # of threads to be allocated at 3531 // subsequent parallel regions, but the worker threads hang around (and 3532 // eventually go to sleep) and need slots in the __kmp_threads[] array. 3533 // 3534 // Anyway, that is the reason for moving the check to see if 3535 // __kmp_max_threads was exceeded into __kmp_reseerve_threads() 3536 // instead of having it performed here. -BB 3537 old_tp_cached = __kmp_tp_cached; 3538 __kmp_actual_max_nth = 3539 old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; 3540 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); 3541 3542 /* compute expansion headroom to check if we can expand and whether to aim 3543 for nWish or nNeed */ 3544 nTarget = nWish; 3545 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3546 /* can't fulfil nWish, so try nNeed */ 3547 if (nNeed) { 3548 nTarget = nNeed; 3549 if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3550 /* possible expansion too small -- give up */ 3551 break; 3552 } 3553 } else { 3554 /* best-effort */ 3555 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; 3556 if (!nTarget) { 3557 /* can expand at all -- give up */ 3558 break; 3559 } 3560 } 3561 } 3562 minimumRequiredCapacity = __kmp_threads_capacity + nTarget; 3563 3564 newCapacity = __kmp_threads_capacity; 3565 do { 3566 newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1) 3567 ? (newCapacity << 1) 3568 : __kmp_actual_max_nth; 3569 } while (newCapacity < minimumRequiredCapacity); 3570 newThreads = (kmp_info_t **)__kmp_allocate( 3571 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + 3572 CACHE_LINE); 3573 newRoot = (kmp_root_t **)((char *)newThreads + 3574 sizeof(kmp_info_t *) * newCapacity); 3575 KMP_MEMCPY(newThreads, __kmp_threads, 3576 __kmp_threads_capacity * sizeof(kmp_info_t *)); 3577 KMP_MEMCPY(newRoot, __kmp_root, 3578 __kmp_threads_capacity * sizeof(kmp_root_t *)); 3579 memset(newThreads + __kmp_threads_capacity, 0, 3580 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *)); 3581 memset(newRoot + __kmp_threads_capacity, 0, 3582 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *)); 3583 3584 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3585 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has 3586 allocated a threadprivate cache while we were allocating the expanded 3587 array, and our new capacity is larger than the threadprivate cache 3588 capacity, so we should deallocate the expanded arrays and try again. 3589 This is the first check of a double-check pair. */ 3590 __kmp_free(newThreads); 3591 continue; /* start over and try again */ 3592 } 3593 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 3594 if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3595 /* Same check as above, but this time with the lock so we can be sure if 3596 we can succeed. */ 3597 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3598 __kmp_free(newThreads); 3599 continue; /* start over and try again */ 3600 } else { 3601 /* success */ 3602 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be 3603 // investigated. 3604 *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; 3605 *(kmp_root_t * *volatile *)&__kmp_root = newRoot; 3606 added += newCapacity - __kmp_threads_capacity; 3607 *(volatile int *)&__kmp_threads_capacity = newCapacity; 3608 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3609 break; /* succeeded, so we can exit the loop */ 3610 } 3611 } 3612 return added; 3613 } 3614 3615 /* Register the current thread as a root thread and obtain our gtid. We must 3616 have the __kmp_initz_lock held at this point. Argument TRUE only if are the 3617 thread that calls from __kmp_do_serial_initialize() */ 3618 int __kmp_register_root(int initial_thread) { 3619 kmp_info_t *root_thread; 3620 kmp_root_t *root; 3621 int gtid; 3622 int capacity; 3623 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 3624 KA_TRACE(20, ("__kmp_register_root: entered\n")); 3625 KMP_MB(); 3626 3627 /* 2007-03-02: 3628 If initial thread did not invoke OpenMP RTL yet, and this thread is not an 3629 initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not 3630 work as expected -- it may return false (that means there is at least one 3631 empty slot in __kmp_threads array), but it is possible the only free slot 3632 is #0, which is reserved for initial thread and so cannot be used for this 3633 one. Following code workarounds this bug. 3634 3635 However, right solution seems to be not reserving slot #0 for initial 3636 thread because: 3637 (1) there is no magic in slot #0, 3638 (2) we cannot detect initial thread reliably (the first thread which does 3639 serial initialization may be not a real initial thread). 3640 */ 3641 capacity = __kmp_threads_capacity; 3642 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { 3643 --capacity; 3644 }; // if 3645 3646 /* see if there are too many threads */ 3647 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) { 3648 if (__kmp_tp_cached) { 3649 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread), 3650 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), 3651 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); 3652 } else { 3653 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread), 3654 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); 3655 } 3656 }; // if 3657 3658 /* find an available thread slot */ 3659 /* Don't reassign the zero slot since we need that to only be used by initial 3660 thread */ 3661 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; 3662 gtid++) 3663 ; 3664 KA_TRACE(1, 3665 ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); 3666 KMP_ASSERT(gtid < __kmp_threads_capacity); 3667 3668 /* update global accounting */ 3669 __kmp_all_nth++; 3670 TCW_4(__kmp_nth, __kmp_nth + 1); 3671 3672 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 3673 // numbers of procs, and method #2 (keyed API call) for higher numbers. 3674 if (__kmp_adjust_gtid_mode) { 3675 if (__kmp_all_nth >= __kmp_tls_gtid_min) { 3676 if (TCR_4(__kmp_gtid_mode) != 2) { 3677 TCW_4(__kmp_gtid_mode, 2); 3678 } 3679 } else { 3680 if (TCR_4(__kmp_gtid_mode) != 1) { 3681 TCW_4(__kmp_gtid_mode, 1); 3682 } 3683 } 3684 } 3685 3686 #ifdef KMP_ADJUST_BLOCKTIME 3687 /* Adjust blocktime to zero if necessary */ 3688 /* Middle initialization might not have occurred yet */ 3689 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 3690 if (__kmp_nth > __kmp_avail_proc) { 3691 __kmp_zero_bt = TRUE; 3692 } 3693 } 3694 #endif /* KMP_ADJUST_BLOCKTIME */ 3695 3696 /* setup this new hierarchy */ 3697 if (!(root = __kmp_root[gtid])) { 3698 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); 3699 KMP_DEBUG_ASSERT(!root->r.r_root_team); 3700 } 3701 3702 #if KMP_STATS_ENABLED 3703 // Initialize stats as soon as possible (right after gtid assignment). 3704 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 3705 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); 3706 KMP_SET_THREAD_STATE(SERIAL_REGION); 3707 KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 3708 #endif 3709 __kmp_initialize_root(root); 3710 3711 /* setup new root thread structure */ 3712 if (root->r.r_uber_thread) { 3713 root_thread = root->r.r_uber_thread; 3714 } else { 3715 root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 3716 if (__kmp_storage_map) { 3717 __kmp_print_thread_storage_map(root_thread, gtid); 3718 } 3719 root_thread->th.th_info.ds.ds_gtid = gtid; 3720 root_thread->th.th_root = root; 3721 if (__kmp_env_consistency_check) { 3722 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); 3723 } 3724 #if USE_FAST_MEMORY 3725 __kmp_initialize_fast_memory(root_thread); 3726 #endif /* USE_FAST_MEMORY */ 3727 3728 #if KMP_USE_BGET 3729 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); 3730 __kmp_initialize_bget(root_thread); 3731 #endif 3732 __kmp_init_random(root_thread); // Initialize random number generator 3733 } 3734 3735 /* setup the serial team held in reserve by the root thread */ 3736 if (!root_thread->th.th_serial_team) { 3737 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3738 KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); 3739 root_thread->th.th_serial_team = 3740 __kmp_allocate_team(root, 1, 1, 3741 #if OMPT_SUPPORT 3742 0, // root parallel id 3743 #endif 3744 #if OMP_40_ENABLED 3745 proc_bind_default, 3746 #endif 3747 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 3748 } 3749 KMP_ASSERT(root_thread->th.th_serial_team); 3750 KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", 3751 root_thread->th.th_serial_team)); 3752 3753 /* drop root_thread into place */ 3754 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 3755 3756 root->r.r_root_team->t.t_threads[0] = root_thread; 3757 root->r.r_hot_team->t.t_threads[0] = root_thread; 3758 root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 3759 // AC: the team created in reserve, not for execution (it is unused for now). 3760 root_thread->th.th_serial_team->t.t_serialized = 0; 3761 root->r.r_uber_thread = root_thread; 3762 3763 /* initialize the thread, get it ready to go */ 3764 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); 3765 TCW_4(__kmp_init_gtid, TRUE); 3766 3767 /* prepare the master thread for get_gtid() */ 3768 __kmp_gtid_set_specific(gtid); 3769 3770 #if USE_ITT_BUILD 3771 __kmp_itt_thread_name(gtid); 3772 #endif /* USE_ITT_BUILD */ 3773 3774 #ifdef KMP_TDATA_GTID 3775 __kmp_gtid = gtid; 3776 #endif 3777 __kmp_create_worker(gtid, root_thread, __kmp_stksize); 3778 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); 3779 3780 KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3781 "plain=%u\n", 3782 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), 3783 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 3784 KMP_INIT_BARRIER_STATE)); 3785 { // Initialize barrier data. 3786 int b; 3787 for (b = 0; b < bs_last_barrier; ++b) { 3788 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; 3789 #if USE_DEBUGGER 3790 root_thread->th.th_bar[b].bb.b_worker_arrived = 0; 3791 #endif 3792 }; // for 3793 } 3794 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == 3795 KMP_INIT_BARRIER_STATE); 3796 3797 #if KMP_AFFINITY_SUPPORTED 3798 #if OMP_40_ENABLED 3799 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 3800 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 3801 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 3802 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 3803 #endif 3804 3805 if (TCR_4(__kmp_init_middle)) { 3806 __kmp_affinity_set_init_mask(gtid, TRUE); 3807 } 3808 #endif /* KMP_AFFINITY_SUPPORTED */ 3809 3810 __kmp_root_counter++; 3811 3812 KMP_MB(); 3813 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 3814 3815 return gtid; 3816 } 3817 3818 #if KMP_NESTED_HOT_TEAMS 3819 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, 3820 const int max_level) { 3821 int i, n, nth; 3822 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 3823 if (!hot_teams || !hot_teams[level].hot_team) { 3824 return 0; 3825 } 3826 KMP_DEBUG_ASSERT(level < max_level); 3827 kmp_team_t *team = hot_teams[level].hot_team; 3828 nth = hot_teams[level].hot_team_nth; 3829 n = nth - 1; // master is not freed 3830 if (level < max_level - 1) { 3831 for (i = 0; i < nth; ++i) { 3832 kmp_info_t *th = team->t.t_threads[i]; 3833 n += __kmp_free_hot_teams(root, th, level + 1, max_level); 3834 if (i > 0 && th->th.th_hot_teams) { 3835 __kmp_free(th->th.th_hot_teams); 3836 th->th.th_hot_teams = NULL; 3837 } 3838 } 3839 } 3840 __kmp_free_team(root, team, NULL); 3841 return n; 3842 } 3843 #endif 3844 3845 // Resets a root thread and clear its root and hot teams. 3846 // Returns the number of __kmp_threads entries directly and indirectly freed. 3847 static int __kmp_reset_root(int gtid, kmp_root_t *root) { 3848 kmp_team_t *root_team = root->r.r_root_team; 3849 kmp_team_t *hot_team = root->r.r_hot_team; 3850 int n = hot_team->t.t_nproc; 3851 int i; 3852 3853 KMP_DEBUG_ASSERT(!root->r.r_active); 3854 3855 root->r.r_root_team = NULL; 3856 root->r.r_hot_team = NULL; 3857 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team 3858 // before call to __kmp_free_team(). 3859 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); 3860 #if KMP_NESTED_HOT_TEAMS 3861 if (__kmp_hot_teams_max_level > 3862 0) { // need to free nested hot teams and their threads if any 3863 for (i = 0; i < hot_team->t.t_nproc; ++i) { 3864 kmp_info_t *th = hot_team->t.t_threads[i]; 3865 if (__kmp_hot_teams_max_level > 1) { 3866 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); 3867 } 3868 if (th->th.th_hot_teams) { 3869 __kmp_free(th->th.th_hot_teams); 3870 th->th.th_hot_teams = NULL; 3871 } 3872 } 3873 } 3874 #endif 3875 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); 3876 3877 // Before we can reap the thread, we need to make certain that all other 3878 // threads in the teams that had this root as ancestor have stopped trying to 3879 // steal tasks. 3880 if (__kmp_tasking_mode != tskm_immediate_exec) { 3881 __kmp_wait_to_unref_task_teams(); 3882 } 3883 3884 #if KMP_OS_WINDOWS 3885 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 3886 KA_TRACE( 3887 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC 3888 "\n", 3889 (LPVOID) & (root->r.r_uber_thread->th), 3890 root->r.r_uber_thread->th.th_info.ds.ds_thread)); 3891 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); 3892 #endif /* KMP_OS_WINDOWS */ 3893 3894 #if OMPT_SUPPORT 3895 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 3896 int gtid = __kmp_get_gtid(); 3897 __ompt_thread_end(ompt_thread_initial, gtid); 3898 } 3899 #endif 3900 3901 TCW_4(__kmp_nth, 3902 __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 3903 __kmp_reap_thread(root->r.r_uber_thread, 1); 3904 3905 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead 3906 // of freeing. 3907 root->r.r_uber_thread = NULL; 3908 /* mark root as no longer in use */ 3909 root->r.r_begin = FALSE; 3910 3911 return n; 3912 } 3913 3914 void __kmp_unregister_root_current_thread(int gtid) { 3915 KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); 3916 /* this lock should be ok, since unregister_root_current_thread is never 3917 called during an abort, only during a normal close. furthermore, if you 3918 have the forkjoin lock, you should never try to get the initz lock */ 3919 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 3920 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 3921 KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " 3922 "exiting T#%d\n", 3923 gtid)); 3924 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 3925 return; 3926 } 3927 kmp_root_t *root = __kmp_root[gtid]; 3928 3929 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 3930 KMP_ASSERT(KMP_UBER_GTID(gtid)); 3931 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 3932 KMP_ASSERT(root->r.r_active == FALSE); 3933 3934 KMP_MB(); 3935 3936 #if OMP_45_ENABLED 3937 kmp_info_t *thread = __kmp_threads[gtid]; 3938 kmp_team_t *team = thread->th.th_team; 3939 kmp_task_team_t *task_team = thread->th.th_task_team; 3940 3941 // we need to wait for the proxy tasks before finishing the thread 3942 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { 3943 #if OMPT_SUPPORT 3944 // the runtime is shutting down so we won't report any events 3945 thread->th.ompt_thread_info.state = ompt_state_undefined; 3946 #endif 3947 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 3948 } 3949 #endif 3950 3951 __kmp_reset_root(gtid, root); 3952 3953 /* free up this thread slot */ 3954 __kmp_gtid_set_specific(KMP_GTID_DNE); 3955 #ifdef KMP_TDATA_GTID 3956 __kmp_gtid = KMP_GTID_DNE; 3957 #endif 3958 3959 KMP_MB(); 3960 KC_TRACE(10, 3961 ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); 3962 3963 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 3964 } 3965 3966 #if KMP_OS_WINDOWS 3967 /* __kmp_forkjoin_lock must be already held 3968 Unregisters a root thread that is not the current thread. Returns the number 3969 of __kmp_threads entries freed as a result. */ 3970 static int __kmp_unregister_root_other_thread(int gtid) { 3971 kmp_root_t *root = __kmp_root[gtid]; 3972 int r; 3973 3974 KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); 3975 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); 3976 KMP_ASSERT(KMP_UBER_GTID(gtid)); 3977 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); 3978 KMP_ASSERT(root->r.r_active == FALSE); 3979 3980 r = __kmp_reset_root(gtid, root); 3981 KC_TRACE(10, 3982 ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); 3983 return r; 3984 } 3985 #endif 3986 3987 #if KMP_DEBUG 3988 void __kmp_task_info() { 3989 3990 kmp_int32 gtid = __kmp_entry_gtid(); 3991 kmp_int32 tid = __kmp_tid_from_gtid(gtid); 3992 kmp_info_t *this_thr = __kmp_threads[gtid]; 3993 kmp_team_t *steam = this_thr->th.th_serial_team; 3994 kmp_team_t *team = this_thr->th.th_team; 3995 3996 __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " 3997 "ptask=%p\n", 3998 gtid, tid, this_thr, team, this_thr->th.th_current_task, 3999 team->t.t_implicit_task_taskdata[tid].td_parent); 4000 } 4001 #endif // KMP_DEBUG 4002 4003 /* TODO optimize with one big memclr, take out what isn't needed, split 4004 responsibility to workers as much as possible, and delay initialization of 4005 features as much as possible */ 4006 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, 4007 int tid, int gtid) { 4008 /* this_thr->th.th_info.ds.ds_gtid is setup in 4009 kmp_allocate_thread/create_worker. 4010 this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 4011 kmp_info_t *master = team->t.t_threads[0]; 4012 KMP_DEBUG_ASSERT(this_thr != NULL); 4013 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); 4014 KMP_DEBUG_ASSERT(team); 4015 KMP_DEBUG_ASSERT(team->t.t_threads); 4016 KMP_DEBUG_ASSERT(team->t.t_dispatch); 4017 KMP_DEBUG_ASSERT(master); 4018 KMP_DEBUG_ASSERT(master->th.th_root); 4019 4020 KMP_MB(); 4021 4022 TCW_SYNC_PTR(this_thr->th.th_team, team); 4023 4024 this_thr->th.th_info.ds.ds_tid = tid; 4025 this_thr->th.th_set_nproc = 0; 4026 if (__kmp_tasking_mode != tskm_immediate_exec) 4027 // When tasking is possible, threads are not safe to reap until they are 4028 // done tasking; this will be set when tasking code is exited in wait 4029 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 4030 else // no tasking --> always safe to reap 4031 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 4032 #if OMP_40_ENABLED 4033 this_thr->th.th_set_proc_bind = proc_bind_default; 4034 #if KMP_AFFINITY_SUPPORTED 4035 this_thr->th.th_new_place = this_thr->th.th_current_place; 4036 #endif 4037 #endif 4038 this_thr->th.th_root = master->th.th_root; 4039 4040 /* setup the thread's cache of the team structure */ 4041 this_thr->th.th_team_nproc = team->t.t_nproc; 4042 this_thr->th.th_team_master = master; 4043 this_thr->th.th_team_serialized = team->t.t_serialized; 4044 TCW_PTR(this_thr->th.th_sleep_loc, NULL); 4045 4046 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); 4047 4048 KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 4049 tid, gtid, this_thr, this_thr->th.th_current_task)); 4050 4051 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, 4052 team, tid, TRUE); 4053 4054 KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 4055 tid, gtid, this_thr, this_thr->th.th_current_task)); 4056 // TODO: Initialize ICVs from parent; GEH - isn't that already done in 4057 // __kmp_initialize_team()? 4058 4059 /* TODO no worksharing in speculative threads */ 4060 this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; 4061 4062 this_thr->th.th_local.this_construct = 0; 4063 4064 #ifdef BUILD_TV 4065 this_thr->th.th_local.tv_data = 0; 4066 #endif 4067 4068 if (!this_thr->th.th_pri_common) { 4069 this_thr->th.th_pri_common = 4070 (struct common_table *)__kmp_allocate(sizeof(struct common_table)); 4071 if (__kmp_storage_map) { 4072 __kmp_print_storage_map_gtid( 4073 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 4074 sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); 4075 }; // if 4076 this_thr->th.th_pri_head = NULL; 4077 }; // if 4078 4079 /* Initialize dynamic dispatch */ 4080 { 4081 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 4082 // Use team max_nproc since this will never change for the team. 4083 size_t disp_size = 4084 sizeof(dispatch_private_info_t) * 4085 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); 4086 KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, 4087 team->t.t_max_nproc)); 4088 KMP_ASSERT(dispatch); 4089 KMP_DEBUG_ASSERT(team->t.t_dispatch); 4090 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); 4091 4092 dispatch->th_disp_index = 0; 4093 #if OMP_45_ENABLED 4094 dispatch->th_doacross_buf_idx = 0; 4095 #endif 4096 if (!dispatch->th_disp_buffer) { 4097 dispatch->th_disp_buffer = 4098 (dispatch_private_info_t *)__kmp_allocate(disp_size); 4099 4100 if (__kmp_storage_map) { 4101 __kmp_print_storage_map_gtid( 4102 gtid, &dispatch->th_disp_buffer[0], 4103 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 4104 ? 1 4105 : __kmp_dispatch_num_buffers], 4106 disp_size, "th_%d.th_dispatch.th_disp_buffer " 4107 "(team_%d.t_dispatch[%d].th_disp_buffer)", 4108 gtid, team->t.t_id, gtid); 4109 } 4110 } else { 4111 memset(&dispatch->th_disp_buffer[0], '\0', disp_size); 4112 } 4113 4114 dispatch->th_dispatch_pr_current = 0; 4115 dispatch->th_dispatch_sh_current = 0; 4116 4117 dispatch->th_deo_fcn = 0; /* ORDERED */ 4118 dispatch->th_dxo_fcn = 0; /* END ORDERED */ 4119 } 4120 4121 this_thr->th.th_next_pool = NULL; 4122 4123 if (!this_thr->th.th_task_state_memo_stack) { 4124 size_t i; 4125 this_thr->th.th_task_state_memo_stack = 4126 (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); 4127 this_thr->th.th_task_state_top = 0; 4128 this_thr->th.th_task_state_stack_sz = 4; 4129 for (i = 0; i < this_thr->th.th_task_state_stack_sz; 4130 ++i) // zero init the stack 4131 this_thr->th.th_task_state_memo_stack[i] = 0; 4132 } 4133 4134 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 4135 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 4136 4137 KMP_MB(); 4138 } 4139 4140 /* allocate a new thread for the requesting team. this is only called from 4141 within a forkjoin critical section. we will first try to get an available 4142 thread from the thread pool. if none is available, we will fork a new one 4143 assuming we are able to create a new one. this should be assured, as the 4144 caller should check on this first. */ 4145 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, 4146 int new_tid) { 4147 kmp_team_t *serial_team; 4148 kmp_info_t *new_thr; 4149 int new_gtid; 4150 4151 KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); 4152 KMP_DEBUG_ASSERT(root && team); 4153 #if !KMP_NESTED_HOT_TEAMS 4154 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); 4155 #endif 4156 KMP_MB(); 4157 4158 /* first, try to get one from the thread pool */ 4159 if (__kmp_thread_pool) { 4160 4161 new_thr = (kmp_info_t *)__kmp_thread_pool; 4162 __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; 4163 if (new_thr == __kmp_thread_pool_insert_pt) { 4164 __kmp_thread_pool_insert_pt = NULL; 4165 } 4166 TCW_4(new_thr->th.th_in_pool, FALSE); 4167 // Don't touch th_active_in_pool or th_active. 4168 // The worker thread adjusts those flags as it sleeps/awakens. 4169 __kmp_thread_pool_nth--; 4170 4171 KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 4172 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); 4173 KMP_ASSERT(!new_thr->th.th_team); 4174 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); 4175 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0); 4176 4177 /* setup the thread structure */ 4178 __kmp_initialize_info(new_thr, team, new_tid, 4179 new_thr->th.th_info.ds.ds_gtid); 4180 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); 4181 4182 TCW_4(__kmp_nth, __kmp_nth + 1); 4183 4184 new_thr->th.th_task_state = 0; 4185 new_thr->th.th_task_state_top = 0; 4186 new_thr->th.th_task_state_stack_sz = 4; 4187 4188 #ifdef KMP_ADJUST_BLOCKTIME 4189 /* Adjust blocktime back to zero if necessary */ 4190 /* Middle initialization might not have occurred yet */ 4191 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 4192 if (__kmp_nth > __kmp_avail_proc) { 4193 __kmp_zero_bt = TRUE; 4194 } 4195 } 4196 #endif /* KMP_ADJUST_BLOCKTIME */ 4197 4198 #if KMP_DEBUG 4199 // If thread entered pool via __kmp_free_thread, wait_flag should != 4200 // KMP_BARRIER_PARENT_FLAG. 4201 int b; 4202 kmp_balign_t *balign = new_thr->th.th_bar; 4203 for (b = 0; b < bs_last_barrier; ++b) 4204 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4205 #endif 4206 4207 KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 4208 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); 4209 4210 KMP_MB(); 4211 return new_thr; 4212 } 4213 4214 /* no, well fork a new one */ 4215 KMP_ASSERT(__kmp_nth == __kmp_all_nth); 4216 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); 4217 4218 #if KMP_USE_MONITOR 4219 // If this is the first worker thread the RTL is creating, then also 4220 // launch the monitor thread. We try to do this as early as possible. 4221 if (!TCR_4(__kmp_init_monitor)) { 4222 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 4223 if (!TCR_4(__kmp_init_monitor)) { 4224 KF_TRACE(10, ("before __kmp_create_monitor\n")); 4225 TCW_4(__kmp_init_monitor, 1); 4226 __kmp_create_monitor(&__kmp_monitor); 4227 KF_TRACE(10, ("after __kmp_create_monitor\n")); 4228 #if KMP_OS_WINDOWS 4229 // AC: wait until monitor has started. This is a fix for CQ232808. 4230 // The reason is that if the library is loaded/unloaded in a loop with 4231 // small (parallel) work in between, then there is high probability that 4232 // monitor thread started after the library shutdown. At shutdown it is 4233 // too late to cope with the problem, because when the master is in 4234 // DllMain (process detach) the monitor has no chances to start (it is 4235 // blocked), and master has no means to inform the monitor that the 4236 // library has gone, because all the memory which the monitor can access 4237 // is going to be released/reset. 4238 while (TCR_4(__kmp_init_monitor) < 2) { 4239 KMP_YIELD(TRUE); 4240 } 4241 KF_TRACE(10, ("after monitor thread has started\n")); 4242 #endif 4243 } 4244 __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 4245 } 4246 #endif 4247 4248 KMP_MB(); 4249 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { 4250 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); 4251 } 4252 4253 /* allocate space for it. */ 4254 new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); 4255 4256 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 4257 4258 if (__kmp_storage_map) { 4259 __kmp_print_thread_storage_map(new_thr, new_gtid); 4260 } 4261 4262 // add the reserve serialized team, initialized from the team's master thread 4263 { 4264 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); 4265 KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); 4266 new_thr->th.th_serial_team = serial_team = 4267 (kmp_team_t *)__kmp_allocate_team(root, 1, 1, 4268 #if OMPT_SUPPORT 4269 0, // root parallel id 4270 #endif 4271 #if OMP_40_ENABLED 4272 proc_bind_default, 4273 #endif 4274 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); 4275 } 4276 KMP_ASSERT(serial_team); 4277 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for 4278 // execution (it is unused for now). 4279 serial_team->t.t_threads[0] = new_thr; 4280 KF_TRACE(10, 4281 ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 4282 new_thr)); 4283 4284 /* setup the thread structures */ 4285 __kmp_initialize_info(new_thr, team, new_tid, new_gtid); 4286 4287 #if USE_FAST_MEMORY 4288 __kmp_initialize_fast_memory(new_thr); 4289 #endif /* USE_FAST_MEMORY */ 4290 4291 #if KMP_USE_BGET 4292 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); 4293 __kmp_initialize_bget(new_thr); 4294 #endif 4295 4296 __kmp_init_random(new_thr); // Initialize random number generator 4297 4298 /* Initialize these only once when thread is grabbed for a team allocation */ 4299 KA_TRACE(20, 4300 ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 4301 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 4302 4303 int b; 4304 kmp_balign_t *balign = new_thr->th.th_bar; 4305 for (b = 0; b < bs_last_barrier; ++b) { 4306 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 4307 balign[b].bb.team = NULL; 4308 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 4309 balign[b].bb.use_oncore_barrier = 0; 4310 } 4311 4312 new_thr->th.th_spin_here = FALSE; 4313 new_thr->th.th_next_waiting = 0; 4314 4315 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4316 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 4317 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 4318 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 4319 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 4320 #endif 4321 4322 TCW_4(new_thr->th.th_in_pool, FALSE); 4323 new_thr->th.th_active_in_pool = FALSE; 4324 TCW_4(new_thr->th.th_active, TRUE); 4325 4326 /* adjust the global counters */ 4327 __kmp_all_nth++; 4328 __kmp_nth++; 4329 4330 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low 4331 // numbers of procs, and method #2 (keyed API call) for higher numbers. 4332 if (__kmp_adjust_gtid_mode) { 4333 if (__kmp_all_nth >= __kmp_tls_gtid_min) { 4334 if (TCR_4(__kmp_gtid_mode) != 2) { 4335 TCW_4(__kmp_gtid_mode, 2); 4336 } 4337 } else { 4338 if (TCR_4(__kmp_gtid_mode) != 1) { 4339 TCW_4(__kmp_gtid_mode, 1); 4340 } 4341 } 4342 } 4343 4344 #ifdef KMP_ADJUST_BLOCKTIME 4345 /* Adjust blocktime back to zero if necessary */ 4346 /* Middle initialization might not have occurred yet */ 4347 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 4348 if (__kmp_nth > __kmp_avail_proc) { 4349 __kmp_zero_bt = TRUE; 4350 } 4351 } 4352 #endif /* KMP_ADJUST_BLOCKTIME */ 4353 4354 /* actually fork it and create the new worker thread */ 4355 KF_TRACE( 4356 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); 4357 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); 4358 KF_TRACE(10, 4359 ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); 4360 4361 KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), 4362 new_gtid)); 4363 KMP_MB(); 4364 return new_thr; 4365 } 4366 4367 /* Reinitialize team for reuse. 4368 The hot team code calls this case at every fork barrier, so EPCC barrier 4369 test are extremely sensitive to changes in it, esp. writes to the team 4370 struct, which cause a cache invalidation in all threads. 4371 IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ 4372 static void __kmp_reinitialize_team(kmp_team_t *team, 4373 kmp_internal_control_t *new_icvs, 4374 ident_t *loc) { 4375 KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 4376 team->t.t_threads[0], team)); 4377 KMP_DEBUG_ASSERT(team && new_icvs); 4378 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); 4379 KMP_CHECK_UPDATE(team->t.t_ident, loc); 4380 4381 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4382 4383 // Copy ICVs to the master thread's implicit taskdata 4384 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); 4385 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 4386 4387 KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 4388 team->t.t_threads[0], team)); 4389 } 4390 4391 /* Initialize the team data structure. 4392 This assumes the t_threads and t_max_nproc are already set. 4393 Also, we don't touch the arguments */ 4394 static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, 4395 kmp_internal_control_t *new_icvs, 4396 ident_t *loc) { 4397 KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); 4398 4399 /* verify */ 4400 KMP_DEBUG_ASSERT(team); 4401 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); 4402 KMP_DEBUG_ASSERT(team->t.t_threads); 4403 KMP_MB(); 4404 4405 team->t.t_master_tid = 0; /* not needed */ 4406 /* team->t.t_master_bar; not needed */ 4407 team->t.t_serialized = new_nproc > 1 ? 0 : 1; 4408 team->t.t_nproc = new_nproc; 4409 4410 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 4411 team->t.t_next_pool = NULL; 4412 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess 4413 * up hot team */ 4414 4415 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 4416 team->t.t_invoke = NULL; /* not needed */ 4417 4418 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4419 team->t.t_sched = new_icvs->sched; 4420 4421 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4422 team->t.t_fp_control_saved = FALSE; /* not needed */ 4423 team->t.t_x87_fpu_control_word = 0; /* not needed */ 4424 team->t.t_mxcsr = 0; /* not needed */ 4425 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 4426 4427 team->t.t_construct = 0; 4428 __kmp_init_lock(&team->t.t_single_lock); 4429 4430 team->t.t_ordered.dt.t_value = 0; 4431 team->t.t_master_active = FALSE; 4432 4433 memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t)); 4434 4435 #ifdef KMP_DEBUG 4436 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 4437 #endif 4438 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 4439 4440 team->t.t_control_stack_top = NULL; 4441 4442 __kmp_reinitialize_team(team, new_icvs, loc); 4443 4444 KMP_MB(); 4445 KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); 4446 } 4447 4448 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4449 /* Sets full mask for thread and returns old mask, no changes to structures. */ 4450 static void 4451 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { 4452 if (KMP_AFFINITY_CAPABLE()) { 4453 int status; 4454 if (old_mask != NULL) { 4455 status = __kmp_get_system_affinity(old_mask, TRUE); 4456 int error = errno; 4457 if (status != 0) { 4458 __kmp_msg(kmp_ms_fatal, KMP_MSG(ChangeThreadAffMaskError), 4459 KMP_ERR(error), __kmp_msg_null); 4460 } 4461 } 4462 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); 4463 } 4464 } 4465 #endif 4466 4467 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4468 4469 // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4470 // It calculats the worker + master thread's partition based upon the parent 4471 // thread's partition, and binds each worker to a thread in their partition. 4472 // The master thread's partition should already include its current binding. 4473 static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { 4474 // Copy the master thread's place partion to the team struct 4475 kmp_info_t *master_th = team->t.t_threads[0]; 4476 KMP_DEBUG_ASSERT(master_th != NULL); 4477 kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 4478 int first_place = master_th->th.th_first_place; 4479 int last_place = master_th->th.th_last_place; 4480 int masters_place = master_th->th.th_current_place; 4481 team->t.t_first_place = first_place; 4482 team->t.t_last_place = last_place; 4483 4484 KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4485 "bound to place %d partition = [%d,%d]\n", 4486 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), 4487 team->t.t_id, masters_place, first_place, last_place)); 4488 4489 switch (proc_bind) { 4490 4491 case proc_bind_default: 4492 // serial teams might have the proc_bind policy set to proc_bind_default. It 4493 // doesn't matter, as we don't rebind master thread for any proc_bind policy 4494 KMP_DEBUG_ASSERT(team->t.t_nproc == 1); 4495 break; 4496 4497 case proc_bind_master: { 4498 int f; 4499 int n_th = team->t.t_nproc; 4500 for (f = 1; f < n_th; f++) { 4501 kmp_info_t *th = team->t.t_threads[f]; 4502 KMP_DEBUG_ASSERT(th != NULL); 4503 th->th.th_first_place = first_place; 4504 th->th.th_last_place = last_place; 4505 th->th.th_new_place = masters_place; 4506 4507 KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " 4508 "partition = [%d,%d]\n", 4509 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, 4510 f, masters_place, first_place, last_place)); 4511 } 4512 } break; 4513 4514 case proc_bind_close: { 4515 int f; 4516 int n_th = team->t.t_nproc; 4517 int n_places; 4518 if (first_place <= last_place) { 4519 n_places = last_place - first_place + 1; 4520 } else { 4521 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4522 } 4523 if (n_th <= n_places) { 4524 int place = masters_place; 4525 for (f = 1; f < n_th; f++) { 4526 kmp_info_t *th = team->t.t_threads[f]; 4527 KMP_DEBUG_ASSERT(th != NULL); 4528 4529 if (place == last_place) { 4530 place = first_place; 4531 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4532 place = 0; 4533 } else { 4534 place++; 4535 } 4536 th->th.th_first_place = first_place; 4537 th->th.th_last_place = last_place; 4538 th->th.th_new_place = place; 4539 4540 KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 4541 "partition = [%d,%d]\n", 4542 __kmp_gtid_from_thread(team->t.t_threads[f]), 4543 team->t.t_id, f, place, first_place, last_place)); 4544 } 4545 } else { 4546 int S, rem, gap, s_count; 4547 S = n_th / n_places; 4548 s_count = 0; 4549 rem = n_th - (S * n_places); 4550 gap = rem > 0 ? n_places / rem : n_places; 4551 int place = masters_place; 4552 int gap_ct = gap; 4553 for (f = 0; f < n_th; f++) { 4554 kmp_info_t *th = team->t.t_threads[f]; 4555 KMP_DEBUG_ASSERT(th != NULL); 4556 4557 th->th.th_first_place = first_place; 4558 th->th.th_last_place = last_place; 4559 th->th.th_new_place = place; 4560 s_count++; 4561 4562 if ((s_count == S) && rem && (gap_ct == gap)) { 4563 // do nothing, add an extra thread to place on next iteration 4564 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 4565 // we added an extra thread to this place; move to next place 4566 if (place == last_place) { 4567 place = first_place; 4568 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4569 place = 0; 4570 } else { 4571 place++; 4572 } 4573 s_count = 0; 4574 gap_ct = 1; 4575 rem--; 4576 } else if (s_count == S) { // place full; don't add extra 4577 if (place == last_place) { 4578 place = first_place; 4579 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4580 place = 0; 4581 } else { 4582 place++; 4583 } 4584 gap_ct++; 4585 s_count = 0; 4586 } 4587 4588 KA_TRACE(100, 4589 ("__kmp_partition_places: close: T#%d(%d:%d) place %d " 4590 "partition = [%d,%d]\n", 4591 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, 4592 th->th.th_new_place, first_place, last_place)); 4593 } 4594 KMP_DEBUG_ASSERT(place == masters_place); 4595 } 4596 } break; 4597 4598 case proc_bind_spread: { 4599 int f; 4600 int n_th = team->t.t_nproc; 4601 int n_places; 4602 int thidx; 4603 if (first_place <= last_place) { 4604 n_places = last_place - first_place + 1; 4605 } else { 4606 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4607 } 4608 if (n_th <= n_places) { 4609 int place = masters_place; 4610 int S = n_places / n_th; 4611 int s_count, rem, gap, gap_ct; 4612 rem = n_places - n_th * S; 4613 gap = rem ? n_th / rem : 1; 4614 gap_ct = gap; 4615 thidx = n_th; 4616 if (update_master_only == 1) 4617 thidx = 1; 4618 for (f = 0; f < thidx; f++) { 4619 kmp_info_t *th = team->t.t_threads[f]; 4620 KMP_DEBUG_ASSERT(th != NULL); 4621 4622 th->th.th_first_place = place; 4623 th->th.th_new_place = place; 4624 s_count = 1; 4625 while (s_count < S) { 4626 if (place == last_place) { 4627 place = first_place; 4628 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4629 place = 0; 4630 } else { 4631 place++; 4632 } 4633 s_count++; 4634 } 4635 if (rem && (gap_ct == gap)) { 4636 if (place == last_place) { 4637 place = first_place; 4638 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4639 place = 0; 4640 } else { 4641 place++; 4642 } 4643 rem--; 4644 gap_ct = 0; 4645 } 4646 th->th.th_last_place = place; 4647 gap_ct++; 4648 4649 if (place == last_place) { 4650 place = first_place; 4651 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4652 place = 0; 4653 } else { 4654 place++; 4655 } 4656 4657 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4658 "partition = [%d,%d]\n", 4659 __kmp_gtid_from_thread(team->t.t_threads[f]), 4660 team->t.t_id, f, th->th.th_new_place, 4661 th->th.th_first_place, th->th.th_last_place)); 4662 } 4663 KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 4664 } else { 4665 int S, rem, gap, s_count; 4666 S = n_th / n_places; 4667 s_count = 0; 4668 rem = n_th - (S * n_places); 4669 gap = rem > 0 ? n_places / rem : n_places; 4670 int place = masters_place; 4671 int gap_ct = gap; 4672 thidx = n_th; 4673 if (update_master_only == 1) 4674 thidx = 1; 4675 for (f = 0; f < thidx; f++) { 4676 kmp_info_t *th = team->t.t_threads[f]; 4677 KMP_DEBUG_ASSERT(th != NULL); 4678 4679 th->th.th_first_place = place; 4680 th->th.th_last_place = place; 4681 th->th.th_new_place = place; 4682 s_count++; 4683 4684 if ((s_count == S) && rem && (gap_ct == gap)) { 4685 // do nothing, add an extra thread to place on next iteration 4686 } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { 4687 // we added an extra thread to this place; move on to next place 4688 if (place == last_place) { 4689 place = first_place; 4690 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4691 place = 0; 4692 } else { 4693 place++; 4694 } 4695 s_count = 0; 4696 gap_ct = 1; 4697 rem--; 4698 } else if (s_count == S) { // place is full; don't add extra thread 4699 if (place == last_place) { 4700 place = first_place; 4701 } else if (place == (int)(__kmp_affinity_num_masks - 1)) { 4702 place = 0; 4703 } else { 4704 place++; 4705 } 4706 gap_ct++; 4707 s_count = 0; 4708 } 4709 4710 KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4711 "partition = [%d,%d]\n", 4712 __kmp_gtid_from_thread(team->t.t_threads[f]), 4713 team->t.t_id, f, th->th.th_new_place, 4714 th->th.th_first_place, th->th.th_last_place)); 4715 } 4716 KMP_DEBUG_ASSERT(update_master_only || place == masters_place); 4717 } 4718 } break; 4719 4720 default: 4721 break; 4722 } 4723 4724 KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); 4725 } 4726 4727 #endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ 4728 4729 /* allocate a new team data structure to use. take one off of the free pool if 4730 available */ 4731 kmp_team_t * 4732 __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, 4733 #if OMPT_SUPPORT 4734 ompt_parallel_id_t ompt_parallel_id, 4735 #endif 4736 #if OMP_40_ENABLED 4737 kmp_proc_bind_t new_proc_bind, 4738 #endif 4739 kmp_internal_control_t *new_icvs, 4740 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { 4741 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 4742 int f; 4743 kmp_team_t *team; 4744 int use_hot_team = !root->r.r_active; 4745 int level = 0; 4746 4747 KA_TRACE(20, ("__kmp_allocate_team: called\n")); 4748 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); 4749 KMP_DEBUG_ASSERT(max_nproc >= new_nproc); 4750 KMP_MB(); 4751 4752 #if KMP_NESTED_HOT_TEAMS 4753 kmp_hot_team_ptr_t *hot_teams; 4754 if (master) { 4755 team = master->th.th_team; 4756 level = team->t.t_active_level; 4757 if (master->th.th_teams_microtask) { // in teams construct? 4758 if (master->th.th_teams_size.nteams > 1 && 4759 ( // #teams > 1 4760 team->t.t_pkfn == 4761 (microtask_t)__kmp_teams_master || // inner fork of the teams 4762 master->th.th_teams_level < 4763 team->t.t_level)) { // or nested parallel inside the teams 4764 ++level; // not increment if #teams==1, or for outer fork of the teams; 4765 // increment otherwise 4766 } 4767 } 4768 hot_teams = master->th.th_hot_teams; 4769 if (level < __kmp_hot_teams_max_level && hot_teams && 4770 hot_teams[level] 4771 .hot_team) { // hot team has already been allocated for given level 4772 use_hot_team = 1; 4773 } else { 4774 use_hot_team = 0; 4775 } 4776 } 4777 #endif 4778 // Optimization to use a "hot" team 4779 if (use_hot_team && new_nproc > 1) { 4780 KMP_DEBUG_ASSERT(new_nproc == max_nproc); 4781 #if KMP_NESTED_HOT_TEAMS 4782 team = hot_teams[level].hot_team; 4783 #else 4784 team = root->r.r_hot_team; 4785 #endif 4786 #if KMP_DEBUG 4787 if (__kmp_tasking_mode != tskm_immediate_exec) { 4788 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 4789 "task_team[1] = %p before reinit\n", 4790 team->t.t_task_team[0], team->t.t_task_team[1])); 4791 } 4792 #endif 4793 4794 // Has the number of threads changed? 4795 /* Let's assume the most common case is that the number of threads is 4796 unchanged, and put that case first. */ 4797 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 4798 KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); 4799 // This case can mean that omp_set_num_threads() was called and the hot 4800 // team size 4801 // was already reduced, so we check the special flag 4802 if (team->t.t_size_changed == -1) { 4803 team->t.t_size_changed = 1; 4804 } else { 4805 KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 4806 } 4807 4808 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4809 kmp_r_sched_t new_sched = new_icvs->sched; 4810 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || 4811 team->t.t_sched.chunk != new_sched.chunk) 4812 team->t.t_sched = 4813 new_sched; // set master's schedule as new run-time schedule 4814 4815 __kmp_reinitialize_team(team, new_icvs, 4816 root->r.r_uber_thread->th.th_ident); 4817 4818 KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, 4819 team->t.t_threads[0], team)); 4820 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 4821 4822 #if OMP_40_ENABLED 4823 #if KMP_AFFINITY_SUPPORTED 4824 if ((team->t.t_size_changed == 0) && 4825 (team->t.t_proc_bind == new_proc_bind)) { 4826 if (new_proc_bind == proc_bind_spread) { 4827 __kmp_partition_places( 4828 team, 1); // add flag to update only master for spread 4829 } 4830 KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " 4831 "proc_bind = %d, partition = [%d,%d]\n", 4832 team->t.t_id, new_proc_bind, team->t.t_first_place, 4833 team->t.t_last_place)); 4834 } else { 4835 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4836 __kmp_partition_places(team); 4837 } 4838 #else 4839 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4840 #endif /* KMP_AFFINITY_SUPPORTED */ 4841 #endif /* OMP_40_ENABLED */ 4842 } else if (team->t.t_nproc > new_nproc) { 4843 KA_TRACE(20, 4844 ("__kmp_allocate_team: decreasing hot team thread count to %d\n", 4845 new_nproc)); 4846 4847 team->t.t_size_changed = 1; 4848 #if KMP_NESTED_HOT_TEAMS 4849 if (__kmp_hot_teams_mode == 0) { 4850 // AC: saved number of threads should correspond to team's value in this 4851 // mode, can be bigger in mode 1, when hot team has threads in reserve 4852 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 4853 hot_teams[level].hot_team_nth = new_nproc; 4854 #endif // KMP_NESTED_HOT_TEAMS 4855 /* release the extra threads we don't need any more */ 4856 for (f = new_nproc; f < team->t.t_nproc; f++) { 4857 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 4858 if (__kmp_tasking_mode != tskm_immediate_exec) { 4859 // When decreasing team size, threads no longer in the team should 4860 // unref task team. 4861 team->t.t_threads[f]->th.th_task_team = NULL; 4862 } 4863 __kmp_free_thread(team->t.t_threads[f]); 4864 team->t.t_threads[f] = NULL; 4865 } 4866 #if KMP_NESTED_HOT_TEAMS 4867 } // (__kmp_hot_teams_mode == 0) 4868 else { 4869 // When keeping extra threads in team, switch threads to wait on own 4870 // b_go flag 4871 for (f = new_nproc; f < team->t.t_nproc; ++f) { 4872 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 4873 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 4874 for (int b = 0; b < bs_last_barrier; ++b) { 4875 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 4876 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 4877 } 4878 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 4879 } 4880 } 4881 } 4882 #endif // KMP_NESTED_HOT_TEAMS 4883 team->t.t_nproc = new_nproc; 4884 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4885 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type || 4886 team->t.t_sched.chunk != new_icvs->sched.chunk) 4887 team->t.t_sched = new_icvs->sched; 4888 __kmp_reinitialize_team(team, new_icvs, 4889 root->r.r_uber_thread->th.th_ident); 4890 4891 /* update the remaining threads */ 4892 for (f = 0; f < new_nproc; ++f) { 4893 team->t.t_threads[f]->th.th_team_nproc = new_nproc; 4894 } 4895 // restore the current task state of the master thread: should be the 4896 // implicit task 4897 KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, 4898 team->t.t_threads[0], team)); 4899 4900 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); 4901 4902 #ifdef KMP_DEBUG 4903 for (f = 0; f < team->t.t_nproc; f++) { 4904 KMP_DEBUG_ASSERT(team->t.t_threads[f] && 4905 team->t.t_threads[f]->th.th_team_nproc == 4906 team->t.t_nproc); 4907 } 4908 #endif 4909 4910 #if OMP_40_ENABLED 4911 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4912 #if KMP_AFFINITY_SUPPORTED 4913 __kmp_partition_places(team); 4914 #endif 4915 #endif 4916 } else { // team->t.t_nproc < new_nproc 4917 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4918 kmp_affin_mask_t *old_mask; 4919 if (KMP_AFFINITY_CAPABLE()) { 4920 KMP_CPU_ALLOC(old_mask); 4921 } 4922 #endif 4923 4924 KA_TRACE(20, 4925 ("__kmp_allocate_team: increasing hot team thread count to %d\n", 4926 new_nproc)); 4927 4928 team->t.t_size_changed = 1; 4929 4930 #if KMP_NESTED_HOT_TEAMS 4931 int avail_threads = hot_teams[level].hot_team_nth; 4932 if (new_nproc < avail_threads) 4933 avail_threads = new_nproc; 4934 kmp_info_t **other_threads = team->t.t_threads; 4935 for (f = team->t.t_nproc; f < avail_threads; ++f) { 4936 // Adjust barrier data of reserved threads (if any) of the team 4937 // Other data will be set in __kmp_initialize_info() below. 4938 int b; 4939 kmp_balign_t *balign = other_threads[f]->th.th_bar; 4940 for (b = 0; b < bs_last_barrier; ++b) { 4941 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 4942 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4943 #if USE_DEBUGGER 4944 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 4945 #endif 4946 } 4947 } 4948 if (hot_teams[level].hot_team_nth >= new_nproc) { 4949 // we have all needed threads in reserve, no need to allocate any 4950 // this only possible in mode 1, cannot have reserved threads in mode 0 4951 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 4952 team->t.t_nproc = new_nproc; // just get reserved threads involved 4953 } else { 4954 // we may have some threads in reserve, but not enough 4955 team->t.t_nproc = 4956 hot_teams[level] 4957 .hot_team_nth; // get reserved threads involved if any 4958 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 4959 #endif // KMP_NESTED_HOT_TEAMS 4960 if (team->t.t_max_nproc < new_nproc) { 4961 /* reallocate larger arrays */ 4962 __kmp_reallocate_team_arrays(team, new_nproc); 4963 __kmp_reinitialize_team(team, new_icvs, NULL); 4964 } 4965 4966 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4967 /* Temporarily set full mask for master thread before creation of 4968 workers. The reason is that workers inherit the affinity from master, 4969 so if a lot of workers are created on the single core quickly, they 4970 don't get a chance to set their own affinity for a long time. */ 4971 __kmp_set_thread_affinity_mask_full_tmp(old_mask); 4972 #endif 4973 4974 /* allocate new threads for the hot team */ 4975 for (f = team->t.t_nproc; f < new_nproc; f++) { 4976 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); 4977 KMP_DEBUG_ASSERT(new_worker); 4978 team->t.t_threads[f] = new_worker; 4979 4980 KA_TRACE(20, 4981 ("__kmp_allocate_team: team %d init T#%d arrived: " 4982 "join=%llu, plain=%llu\n", 4983 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, 4984 team->t.t_bar[bs_forkjoin_barrier].b_arrived, 4985 team->t.t_bar[bs_plain_barrier].b_arrived)); 4986 4987 { // Initialize barrier data for new threads. 4988 int b; 4989 kmp_balign_t *balign = new_worker->th.th_bar; 4990 for (b = 0; b < bs_last_barrier; ++b) { 4991 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 4992 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != 4993 KMP_BARRIER_PARENT_FLAG); 4994 #if USE_DEBUGGER 4995 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 4996 #endif 4997 } 4998 } 4999 } 5000 5001 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5002 if (KMP_AFFINITY_CAPABLE()) { 5003 /* Restore initial master thread's affinity mask */ 5004 __kmp_set_system_affinity(old_mask, TRUE); 5005 KMP_CPU_FREE(old_mask); 5006 } 5007 #endif 5008 #if KMP_NESTED_HOT_TEAMS 5009 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 5010 #endif // KMP_NESTED_HOT_TEAMS 5011 /* make sure everyone is syncronized */ 5012 int old_nproc = team->t.t_nproc; // save old value and use to update only 5013 // new threads below 5014 __kmp_initialize_team(team, new_nproc, new_icvs, 5015 root->r.r_uber_thread->th.th_ident); 5016 5017 /* reinitialize the threads */ 5018 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 5019 for (f = 0; f < team->t.t_nproc; ++f) 5020 __kmp_initialize_info(team->t.t_threads[f], team, f, 5021 __kmp_gtid_from_tid(f, team)); 5022 if (level) { // set th_task_state for new threads in nested hot team 5023 // __kmp_initialize_info() no longer zeroes th_task_state, so we should 5024 // only need to set the th_task_state for the new threads. th_task_state 5025 // for master thread will not be accurate until after this in 5026 // __kmp_fork_call(), so we look to the master's memo_stack to get the 5027 // correct value. 5028 for (f = old_nproc; f < team->t.t_nproc; ++f) 5029 team->t.t_threads[f]->th.th_task_state = 5030 team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 5031 } else { // set th_task_state for new threads in non-nested hot team 5032 int old_state = 5033 team->t.t_threads[0]->th.th_task_state; // copy master's state 5034 for (f = old_nproc; f < team->t.t_nproc; ++f) 5035 team->t.t_threads[f]->th.th_task_state = old_state; 5036 } 5037 5038 #ifdef KMP_DEBUG 5039 for (f = 0; f < team->t.t_nproc; ++f) { 5040 KMP_DEBUG_ASSERT(team->t.t_threads[f] && 5041 team->t.t_threads[f]->th.th_team_nproc == 5042 team->t.t_nproc); 5043 } 5044 #endif 5045 5046 #if OMP_40_ENABLED 5047 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 5048 #if KMP_AFFINITY_SUPPORTED 5049 __kmp_partition_places(team); 5050 #endif 5051 #endif 5052 } // Check changes in number of threads 5053 5054 #if OMP_40_ENABLED 5055 kmp_info_t *master = team->t.t_threads[0]; 5056 if (master->th.th_teams_microtask) { 5057 for (f = 1; f < new_nproc; ++f) { 5058 // propagate teams construct specific info to workers 5059 kmp_info_t *thr = team->t.t_threads[f]; 5060 thr->th.th_teams_microtask = master->th.th_teams_microtask; 5061 thr->th.th_teams_level = master->th.th_teams_level; 5062 thr->th.th_teams_size = master->th.th_teams_size; 5063 } 5064 } 5065 #endif /* OMP_40_ENABLED */ 5066 #if KMP_NESTED_HOT_TEAMS 5067 if (level) { 5068 // Sync barrier state for nested hot teams, not needed for outermost hot 5069 // team. 5070 for (f = 1; f < new_nproc; ++f) { 5071 kmp_info_t *thr = team->t.t_threads[f]; 5072 int b; 5073 kmp_balign_t *balign = thr->th.th_bar; 5074 for (b = 0; b < bs_last_barrier; ++b) { 5075 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 5076 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 5077 #if USE_DEBUGGER 5078 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 5079 #endif 5080 } 5081 } 5082 } 5083 #endif // KMP_NESTED_HOT_TEAMS 5084 5085 /* reallocate space for arguments if necessary */ 5086 __kmp_alloc_argv_entries(argc, team, TRUE); 5087 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5088 // The hot team re-uses the previous task team, 5089 // if untouched during the previous release->gather phase. 5090 5091 KF_TRACE(10, (" hot_team = %p\n", team)); 5092 5093 #if KMP_DEBUG 5094 if (__kmp_tasking_mode != tskm_immediate_exec) { 5095 KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " 5096 "task_team[1] = %p after reinit\n", 5097 team->t.t_task_team[0], team->t.t_task_team[1])); 5098 } 5099 #endif 5100 5101 #if OMPT_SUPPORT 5102 __ompt_team_assign_id(team, ompt_parallel_id); 5103 #endif 5104 5105 KMP_MB(); 5106 5107 return team; 5108 } 5109 5110 /* next, let's try to take one from the team pool */ 5111 KMP_MB(); 5112 for (team = (kmp_team_t *)__kmp_team_pool; (team);) { 5113 /* TODO: consider resizing undersized teams instead of reaping them, now 5114 that we have a resizing mechanism */ 5115 if (team->t.t_max_nproc >= max_nproc) { 5116 /* take this team from the team pool */ 5117 __kmp_team_pool = team->t.t_next_pool; 5118 5119 /* setup the team for fresh use */ 5120 __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 5121 5122 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " 5123 "task_team[1] %p to NULL\n", 5124 &team->t.t_task_team[0], &team->t.t_task_team[1])); 5125 team->t.t_task_team[0] = NULL; 5126 team->t.t_task_team[1] = NULL; 5127 5128 /* reallocate space for arguments if necessary */ 5129 __kmp_alloc_argv_entries(argc, team, TRUE); 5130 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5131 5132 KA_TRACE( 5133 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5134 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 5135 { // Initialize barrier data. 5136 int b; 5137 for (b = 0; b < bs_last_barrier; ++b) { 5138 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 5139 #if USE_DEBUGGER 5140 team->t.t_bar[b].b_master_arrived = 0; 5141 team->t.t_bar[b].b_team_arrived = 0; 5142 #endif 5143 } 5144 } 5145 5146 #if OMP_40_ENABLED 5147 team->t.t_proc_bind = new_proc_bind; 5148 #endif 5149 5150 KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", 5151 team->t.t_id)); 5152 5153 #if OMPT_SUPPORT 5154 __ompt_team_assign_id(team, ompt_parallel_id); 5155 #endif 5156 5157 KMP_MB(); 5158 5159 return team; 5160 } 5161 5162 /* reap team if it is too small, then loop back and check the next one */ 5163 // not sure if this is wise, but, will be redone during the hot-teams rewrite. 5164 /* TODO: Use technique to find the right size hot-team, don't reap them */ 5165 team = __kmp_reap_team(team); 5166 __kmp_team_pool = team; 5167 } 5168 5169 /* nothing available in the pool, no matter, make a new team! */ 5170 KMP_MB(); 5171 team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); 5172 5173 /* and set it up */ 5174 team->t.t_max_nproc = max_nproc; 5175 /* NOTE well, for some reason allocating one big buffer and dividing it up 5176 seems to really hurt performance a lot on the P4, so, let's not use this */ 5177 __kmp_allocate_team_arrays(team, max_nproc); 5178 5179 KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); 5180 __kmp_initialize_team(team, new_nproc, new_icvs, NULL); 5181 5182 KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5183 "%p to NULL\n", 5184 &team->t.t_task_team[0], &team->t.t_task_team[1])); 5185 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes 5186 // memory, no need to duplicate 5187 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes 5188 // memory, no need to duplicate 5189 5190 if (__kmp_storage_map) { 5191 __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); 5192 } 5193 5194 /* allocate space for arguments */ 5195 __kmp_alloc_argv_entries(argc, team, FALSE); 5196 team->t.t_argc = argc; 5197 5198 KA_TRACE(20, 5199 ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5200 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); 5201 { // Initialize barrier data. 5202 int b; 5203 for (b = 0; b < bs_last_barrier; ++b) { 5204 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; 5205 #if USE_DEBUGGER 5206 team->t.t_bar[b].b_master_arrived = 0; 5207 team->t.t_bar[b].b_team_arrived = 0; 5208 #endif 5209 } 5210 } 5211 5212 #if OMP_40_ENABLED 5213 team->t.t_proc_bind = new_proc_bind; 5214 #endif 5215 5216 #if OMPT_SUPPORT 5217 __ompt_team_assign_id(team, ompt_parallel_id); 5218 team->t.ompt_serialized_team_info = NULL; 5219 #endif 5220 5221 KMP_MB(); 5222 5223 KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", 5224 team->t.t_id)); 5225 5226 return team; 5227 } 5228 5229 /* TODO implement hot-teams at all levels */ 5230 /* TODO implement lazy thread release on demand (disband request) */ 5231 5232 /* free the team. return it to the team pool. release all the threads 5233 * associated with it */ 5234 void __kmp_free_team(kmp_root_t *root, 5235 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { 5236 int f; 5237 KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), 5238 team->t.t_id)); 5239 5240 /* verify state */ 5241 KMP_DEBUG_ASSERT(root); 5242 KMP_DEBUG_ASSERT(team); 5243 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); 5244 KMP_DEBUG_ASSERT(team->t.t_threads); 5245 5246 int use_hot_team = team == root->r.r_hot_team; 5247 #if KMP_NESTED_HOT_TEAMS 5248 int level; 5249 kmp_hot_team_ptr_t *hot_teams; 5250 if (master) { 5251 level = team->t.t_active_level - 1; 5252 if (master->th.th_teams_microtask) { // in teams construct? 5253 if (master->th.th_teams_size.nteams > 1) { 5254 ++level; // level was not increased in teams construct for 5255 // team_of_masters 5256 } 5257 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && 5258 master->th.th_teams_level == team->t.t_level) { 5259 ++level; // level was not increased in teams construct for 5260 // team_of_workers before the parallel 5261 } // team->t.t_level will be increased inside parallel 5262 } 5263 hot_teams = master->th.th_hot_teams; 5264 if (level < __kmp_hot_teams_max_level) { 5265 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); 5266 use_hot_team = 1; 5267 } 5268 } 5269 #endif // KMP_NESTED_HOT_TEAMS 5270 5271 /* team is done working */ 5272 TCW_SYNC_PTR(team->t.t_pkfn, 5273 NULL); // Important for Debugging Support Library. 5274 team->t.t_copyin_counter = 0; // init counter for possible reuse 5275 // Do not reset pointer to parent team to NULL for hot teams. 5276 5277 /* if we are non-hot team, release our threads */ 5278 if (!use_hot_team) { 5279 if (__kmp_tasking_mode != tskm_immediate_exec) { 5280 // Wait for threads to reach reapable state 5281 for (f = 1; f < team->t.t_nproc; ++f) { 5282 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5283 kmp_info_t *th = team->t.t_threads[f]; 5284 volatile kmp_uint32 *state = &th->th.th_reap_state; 5285 while (*state != KMP_SAFE_TO_REAP) { 5286 #if KMP_OS_WINDOWS 5287 // On Windows a thread can be killed at any time, check this 5288 DWORD ecode; 5289 if (!__kmp_is_thread_alive(th, &ecode)) { 5290 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 5291 break; 5292 } 5293 #endif 5294 // first check if thread is sleeping 5295 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 5296 if (fl.is_sleeping()) 5297 fl.resume(__kmp_gtid_from_thread(th)); 5298 KMP_CPU_PAUSE(); 5299 } 5300 } 5301 5302 // Delete task teams 5303 int tt_idx; 5304 for (tt_idx = 0; tt_idx < 2; ++tt_idx) { 5305 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 5306 if (task_team != NULL) { 5307 for (f = 0; f < team->t.t_nproc; 5308 ++f) { // Have all threads unref task teams 5309 team->t.t_threads[f]->th.th_task_team = NULL; 5310 } 5311 KA_TRACE( 5312 20, 5313 ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", 5314 __kmp_get_gtid(), task_team, team->t.t_id)); 5315 #if KMP_NESTED_HOT_TEAMS 5316 __kmp_free_task_team(master, task_team); 5317 #endif 5318 team->t.t_task_team[tt_idx] = NULL; 5319 } 5320 } 5321 } 5322 5323 // Reset pointer to parent team only for non-hot teams. 5324 team->t.t_parent = NULL; 5325 team->t.t_level = 0; 5326 team->t.t_active_level = 0; 5327 5328 /* free the worker threads */ 5329 for (f = 1; f < team->t.t_nproc; ++f) { 5330 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5331 __kmp_free_thread(team->t.t_threads[f]); 5332 team->t.t_threads[f] = NULL; 5333 } 5334 5335 /* put the team back in the team pool */ 5336 /* TODO limit size of team pool, call reap_team if pool too large */ 5337 team->t.t_next_pool = (kmp_team_t *)__kmp_team_pool; 5338 __kmp_team_pool = (volatile kmp_team_t *)team; 5339 } 5340 5341 KMP_MB(); 5342 } 5343 5344 /* reap the team. destroy it, reclaim all its resources and free its memory */ 5345 kmp_team_t *__kmp_reap_team(kmp_team_t *team) { 5346 kmp_team_t *next_pool = team->t.t_next_pool; 5347 5348 KMP_DEBUG_ASSERT(team); 5349 KMP_DEBUG_ASSERT(team->t.t_dispatch); 5350 KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 5351 KMP_DEBUG_ASSERT(team->t.t_threads); 5352 KMP_DEBUG_ASSERT(team->t.t_argv); 5353 5354 /* TODO clean the threads that are a part of this? */ 5355 5356 /* free stuff */ 5357 __kmp_free_team_arrays(team); 5358 if (team->t.t_argv != &team->t.t_inline_argv[0]) 5359 __kmp_free((void *)team->t.t_argv); 5360 __kmp_free(team); 5361 5362 KMP_MB(); 5363 return next_pool; 5364 } 5365 5366 // Free the thread. Don't reap it, just place it on the pool of available 5367 // threads. 5368 // 5369 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 5370 // binding for the affinity mechanism to be useful. 5371 // 5372 // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 5373 // However, we want to avoid a potential performance problem by always 5374 // scanning through the list to find the correct point at which to insert 5375 // the thread (potential N**2 behavior). To do this we keep track of the 5376 // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 5377 // With single-level parallelism, threads will always be added to the tail 5378 // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 5379 // parallelism, all bets are off and we may need to scan through the entire 5380 // free list. 5381 // 5382 // This change also has a potentially large performance benefit, for some 5383 // applications. Previously, as threads were freed from the hot team, they 5384 // would be placed back on the free list in inverse order. If the hot team 5385 // grew back to it's original size, then the freed thread would be placed 5386 // back on the hot team in reverse order. This could cause bad cache 5387 // locality problems on programs where the size of the hot team regularly 5388 // grew and shrunk. 5389 // 5390 // Now, for single-level parallelism, the OMP tid is alway == gtid. 5391 void __kmp_free_thread(kmp_info_t *this_th) { 5392 int gtid; 5393 kmp_info_t **scan; 5394 5395 KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 5396 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); 5397 5398 KMP_DEBUG_ASSERT(this_th); 5399 5400 // When moving thread to pool, switch thread to wait on own b_go flag, and 5401 // uninitialized (NULL team). 5402 int b; 5403 kmp_balign_t *balign = this_th->th.th_bar; 5404 for (b = 0; b < bs_last_barrier; ++b) { 5405 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 5406 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 5407 balign[b].bb.team = NULL; 5408 balign[b].bb.leaf_kids = 0; 5409 } 5410 this_th->th.th_task_state = 0; 5411 5412 /* put thread back on the free pool */ 5413 TCW_PTR(this_th->th.th_team, NULL); 5414 TCW_PTR(this_th->th.th_root, NULL); 5415 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 5416 5417 // If the __kmp_thread_pool_insert_pt is already past the new insert 5418 // point, then we need to re-scan the entire list. 5419 gtid = this_th->th.th_info.ds.ds_gtid; 5420 if (__kmp_thread_pool_insert_pt != NULL) { 5421 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); 5422 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { 5423 __kmp_thread_pool_insert_pt = NULL; 5424 } 5425 } 5426 5427 // Scan down the list to find the place to insert the thread. 5428 // scan is the address of a link in the list, possibly the address of 5429 // __kmp_thread_pool itself. 5430 // 5431 // In the absence of nested parallism, the for loop will have 0 iterations. 5432 if (__kmp_thread_pool_insert_pt != NULL) { 5433 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); 5434 } else { 5435 scan = (kmp_info_t **)&__kmp_thread_pool; 5436 } 5437 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); 5438 scan = &((*scan)->th.th_next_pool)) 5439 ; 5440 5441 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 5442 // to its address. 5443 TCW_PTR(this_th->th.th_next_pool, *scan); 5444 __kmp_thread_pool_insert_pt = *scan = this_th; 5445 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || 5446 (this_th->th.th_info.ds.ds_gtid < 5447 this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); 5448 TCW_4(this_th->th.th_in_pool, TRUE); 5449 __kmp_thread_pool_nth++; 5450 5451 TCW_4(__kmp_nth, __kmp_nth - 1); 5452 5453 #ifdef KMP_ADJUST_BLOCKTIME 5454 /* Adjust blocktime back to user setting or default if necessary */ 5455 /* Middle initialization might never have occurred */ 5456 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 5457 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 5458 if (__kmp_nth <= __kmp_avail_proc) { 5459 __kmp_zero_bt = FALSE; 5460 } 5461 } 5462 #endif /* KMP_ADJUST_BLOCKTIME */ 5463 5464 KMP_MB(); 5465 } 5466 5467 /* ------------------------------------------------------------------------ */ 5468 5469 void *__kmp_launch_thread(kmp_info_t *this_thr) { 5470 int gtid = this_thr->th.th_info.ds.ds_gtid; 5471 /* void *stack_data;*/ 5472 kmp_team_t *(*volatile pteam); 5473 5474 KMP_MB(); 5475 KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); 5476 5477 if (__kmp_env_consistency_check) { 5478 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? 5479 } 5480 5481 #if OMPT_SUPPORT 5482 if (ompt_enabled) { 5483 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5484 this_thr->th.ompt_thread_info.wait_id = 0; 5485 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); 5486 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { 5487 __ompt_thread_begin(ompt_thread_worker, gtid); 5488 } 5489 } 5490 #endif 5491 5492 /* This is the place where threads wait for work */ 5493 while (!TCR_4(__kmp_global.g.g_done)) { 5494 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); 5495 KMP_MB(); 5496 5497 /* wait for work to do */ 5498 KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); 5499 5500 #if OMPT_SUPPORT 5501 if (ompt_enabled) { 5502 this_thr->th.ompt_thread_info.state = ompt_state_idle; 5503 } 5504 #endif 5505 5506 /* No tid yet since not part of a team */ 5507 __kmp_fork_barrier(gtid, KMP_GTID_DNE); 5508 5509 #if OMPT_SUPPORT 5510 if (ompt_enabled) { 5511 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5512 } 5513 #endif 5514 5515 pteam = (kmp_team_t * (*))(&this_thr->th.th_team); 5516 5517 /* have we been allocated? */ 5518 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { 5519 #if OMPT_SUPPORT 5520 ompt_task_info_t *task_info; 5521 ompt_parallel_id_t my_parallel_id; 5522 if (ompt_enabled) { 5523 task_info = __ompt_get_taskinfo(0); 5524 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; 5525 } 5526 #endif 5527 /* we were just woken up, so run our new task */ 5528 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { 5529 int rc; 5530 KA_TRACE(20, 5531 ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 5532 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 5533 (*pteam)->t.t_pkfn)); 5534 5535 updateHWFPControl(*pteam); 5536 5537 #if OMPT_SUPPORT 5538 if (ompt_enabled) { 5539 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 5540 // Initialize OMPT task id for implicit task. 5541 int tid = __kmp_tid_from_gtid(gtid); 5542 task_info->task_id = __ompt_task_id_new(tid); 5543 } 5544 #endif 5545 5546 { 5547 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 5548 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 5549 rc = (*pteam)->t.t_invoke(gtid); 5550 } 5551 KMP_ASSERT(rc); 5552 5553 #if OMPT_SUPPORT 5554 if (ompt_enabled) { 5555 /* no frame set while outside task */ 5556 task_info->frame.exit_runtime_frame = NULL; 5557 5558 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5559 } 5560 #endif 5561 KMP_MB(); 5562 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 5563 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), 5564 (*pteam)->t.t_pkfn)); 5565 } 5566 /* join barrier after parallel region */ 5567 __kmp_join_barrier(gtid); 5568 #if OMPT_SUPPORT && OMPT_TRACE 5569 if (ompt_enabled) { 5570 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 5571 // don't access *pteam here: it may have already been freed 5572 // by the master thread behind the barrier (possible race) 5573 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 5574 my_parallel_id, task_info->task_id); 5575 } 5576 task_info->frame.exit_runtime_frame = NULL; 5577 task_info->task_id = 0; 5578 } 5579 #endif 5580 } 5581 } 5582 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 5583 5584 #if OMPT_SUPPORT 5585 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 5586 __ompt_thread_end(ompt_thread_worker, gtid); 5587 } 5588 #endif 5589 5590 this_thr->th.th_task_team = NULL; 5591 /* run the destructors for the threadprivate data for this thread */ 5592 __kmp_common_destroy_gtid(gtid); 5593 5594 KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); 5595 KMP_MB(); 5596 return this_thr; 5597 } 5598 5599 /* ------------------------------------------------------------------------ */ 5600 5601 void __kmp_internal_end_dest(void *specific_gtid) { 5602 #if KMP_COMPILER_ICC 5603 #pragma warning(push) 5604 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5605 // significant bits 5606 #endif 5607 // Make sure no significant bits are lost 5608 int gtid = (kmp_intptr_t)specific_gtid - 1; 5609 #if KMP_COMPILER_ICC 5610 #pragma warning(pop) 5611 #endif 5612 5613 KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 5614 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 5615 * this is because 0 is reserved for the nothing-stored case */ 5616 5617 /* josh: One reason for setting the gtid specific data even when it is being 5618 destroyed by pthread is to allow gtid lookup through thread specific data 5619 (__kmp_gtid_get_specific). Some of the code, especially stat code, 5620 that gets executed in the call to __kmp_internal_end_thread, actually 5621 gets the gtid through the thread specific data. Setting it here seems 5622 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread 5623 to run smoothly. 5624 todo: get rid of this after we remove the dependence on 5625 __kmp_gtid_get_specific */ 5626 if (gtid >= 0 && KMP_UBER_GTID(gtid)) 5627 __kmp_gtid_set_specific(gtid); 5628 #ifdef KMP_TDATA_GTID 5629 __kmp_gtid = gtid; 5630 #endif 5631 __kmp_internal_end_thread(gtid); 5632 } 5633 5634 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5635 5636 // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases 5637 // destructors work perfectly, but in real libomp.so I have no evidence it is 5638 // ever called. However, -fini linker option in makefile.mk works fine. 5639 5640 __attribute__((destructor)) void __kmp_internal_end_dtor(void) { 5641 __kmp_internal_end_atexit(); 5642 } 5643 5644 void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); } 5645 5646 #endif 5647 5648 /* [Windows] josh: when the atexit handler is called, there may still be more 5649 than one thread alive */ 5650 void __kmp_internal_end_atexit(void) { 5651 KA_TRACE(30, ("__kmp_internal_end_atexit\n")); 5652 /* [Windows] 5653 josh: ideally, we want to completely shutdown the library in this atexit 5654 handler, but stat code that depends on thread specific data for gtid fails 5655 because that data becomes unavailable at some point during the shutdown, so 5656 we call __kmp_internal_end_thread instead. We should eventually remove the 5657 dependency on __kmp_get_specific_gtid in the stat code and use 5658 __kmp_internal_end_library to cleanly shutdown the library. 5659 5660 // TODO: Can some of this comment about GVS be removed? 5661 I suspect that the offending stat code is executed when the calling thread 5662 tries to clean up a dead root thread's data structures, resulting in GVS 5663 code trying to close the GVS structures for that thread, but since the stat 5664 code uses __kmp_get_specific_gtid to get the gtid with the assumption that 5665 the calling thread is cleaning up itself instead of another thread, it get 5666 confused. This happens because allowing a thread to unregister and cleanup 5667 another thread is a recent modification for addressing an issue. 5668 Based on the current design (20050722), a thread may end up 5669 trying to unregister another thread only if thread death does not trigger 5670 the calling of __kmp_internal_end_thread. For Linux* OS, there is the 5671 thread specific data destructor function to detect thread death. For 5672 Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there 5673 is nothing. Thus, the workaround is applicable only for Windows static 5674 stat library. */ 5675 __kmp_internal_end_library(-1); 5676 #if KMP_OS_WINDOWS 5677 __kmp_close_console(); 5678 #endif 5679 } 5680 5681 static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { 5682 // It is assumed __kmp_forkjoin_lock is acquired. 5683 5684 int gtid; 5685 5686 KMP_DEBUG_ASSERT(thread != NULL); 5687 5688 gtid = thread->th.th_info.ds.ds_gtid; 5689 5690 if (!is_root) { 5691 5692 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { 5693 /* Assume the threads are at the fork barrier here */ 5694 KA_TRACE( 5695 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", 5696 gtid)); 5697 /* Need release fence here to prevent seg faults for tree forkjoin barrier 5698 * (GEH) */ 5699 ANNOTATE_HAPPENS_BEFORE(thread); 5700 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); 5701 __kmp_release_64(&flag); 5702 }; // if 5703 5704 // Terminate OS thread. 5705 __kmp_reap_worker(thread); 5706 5707 // The thread was killed asynchronously. If it was actively 5708 // spinning in the thread pool, decrement the global count. 5709 // 5710 // There is a small timing hole here - if the worker thread was just waking 5711 // up after sleeping in the pool, had reset it's th_active_in_pool flag but 5712 // not decremented the global counter __kmp_thread_pool_active_nth yet, then 5713 // the global counter might not get updated. 5714 // 5715 // Currently, this can only happen as the library is unloaded, 5716 // so there are no harmful side effects. 5717 if (thread->th.th_active_in_pool) { 5718 thread->th.th_active_in_pool = FALSE; 5719 KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth); 5720 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); 5721 } 5722 5723 // Decrement # of [worker] threads in the pool. 5724 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0); 5725 --__kmp_thread_pool_nth; 5726 }; // if 5727 5728 __kmp_free_implicit_task(thread); 5729 5730 // Free the fast memory for tasking 5731 #if USE_FAST_MEMORY 5732 __kmp_free_fast_memory(thread); 5733 #endif /* USE_FAST_MEMORY */ 5734 5735 __kmp_suspend_uninitialize_thread(thread); 5736 5737 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); 5738 TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 5739 5740 --__kmp_all_nth; 5741 // __kmp_nth was decremented when thread is added to the pool. 5742 5743 #ifdef KMP_ADJUST_BLOCKTIME 5744 /* Adjust blocktime back to user setting or default if necessary */ 5745 /* Middle initialization might never have occurred */ 5746 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 5747 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 5748 if (__kmp_nth <= __kmp_avail_proc) { 5749 __kmp_zero_bt = FALSE; 5750 } 5751 } 5752 #endif /* KMP_ADJUST_BLOCKTIME */ 5753 5754 /* free the memory being used */ 5755 if (__kmp_env_consistency_check) { 5756 if (thread->th.th_cons) { 5757 __kmp_free_cons_stack(thread->th.th_cons); 5758 thread->th.th_cons = NULL; 5759 }; // if 5760 } 5761 5762 if (thread->th.th_pri_common != NULL) { 5763 __kmp_free(thread->th.th_pri_common); 5764 thread->th.th_pri_common = NULL; 5765 }; // if 5766 5767 if (thread->th.th_task_state_memo_stack != NULL) { 5768 __kmp_free(thread->th.th_task_state_memo_stack); 5769 thread->th.th_task_state_memo_stack = NULL; 5770 } 5771 5772 #if KMP_USE_BGET 5773 if (thread->th.th_local.bget_data != NULL) { 5774 __kmp_finalize_bget(thread); 5775 }; // if 5776 #endif 5777 5778 #if KMP_AFFINITY_SUPPORTED 5779 if (thread->th.th_affin_mask != NULL) { 5780 KMP_CPU_FREE(thread->th.th_affin_mask); 5781 thread->th.th_affin_mask = NULL; 5782 }; // if 5783 #endif /* KMP_AFFINITY_SUPPORTED */ 5784 5785 __kmp_reap_team(thread->th.th_serial_team); 5786 thread->th.th_serial_team = NULL; 5787 __kmp_free(thread); 5788 5789 KMP_MB(); 5790 5791 } // __kmp_reap_thread 5792 5793 static void __kmp_internal_end(void) { 5794 int i; 5795 5796 /* First, unregister the library */ 5797 __kmp_unregister_library(); 5798 5799 #if KMP_OS_WINDOWS 5800 /* In Win static library, we can't tell when a root actually dies, so we 5801 reclaim the data structures for any root threads that have died but not 5802 unregistered themselves, in order to shut down cleanly. 5803 In Win dynamic library we also can't tell when a thread dies. */ 5804 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of 5805 // dead roots 5806 #endif 5807 5808 for (i = 0; i < __kmp_threads_capacity; i++) 5809 if (__kmp_root[i]) 5810 if (__kmp_root[i]->r.r_active) 5811 break; 5812 KMP_MB(); /* Flush all pending memory write invalidates. */ 5813 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5814 5815 if (i < __kmp_threads_capacity) { 5816 #if KMP_USE_MONITOR 5817 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 5818 KMP_MB(); /* Flush all pending memory write invalidates. */ 5819 5820 // Need to check that monitor was initialized before reaping it. If we are 5821 // called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then 5822 // __kmp_monitor will appear to contain valid data, but it is only valid in the 5823 // parent process, not the child. 5824 // New behavior (201008): instead of keying off of the flag 5825 // __kmp_init_parallel, the monitor thread creation is keyed off 5826 // of the new flag __kmp_init_monitor. 5827 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 5828 if (TCR_4(__kmp_init_monitor)) { 5829 __kmp_reap_monitor(&__kmp_monitor); 5830 TCW_4(__kmp_init_monitor, 0); 5831 } 5832 __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 5833 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 5834 #endif // KMP_USE_MONITOR 5835 } else { 5836 /* TODO move this to cleanup code */ 5837 #ifdef KMP_DEBUG 5838 /* make sure that everything has properly ended */ 5839 for (i = 0; i < __kmp_threads_capacity; i++) { 5840 if (__kmp_root[i]) { 5841 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: 5842 // there can be uber threads alive here 5843 KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? 5844 } 5845 } 5846 #endif 5847 5848 KMP_MB(); 5849 5850 // Reap the worker threads. 5851 // This is valid for now, but be careful if threads are reaped sooner. 5852 while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. 5853 // Get the next thread from the pool. 5854 kmp_info_t *thread = (kmp_info_t *)__kmp_thread_pool; 5855 __kmp_thread_pool = thread->th.th_next_pool; 5856 // Reap it. 5857 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 5858 thread->th.th_next_pool = NULL; 5859 thread->th.th_in_pool = FALSE; 5860 __kmp_reap_thread(thread, 0); 5861 }; // while 5862 __kmp_thread_pool_insert_pt = NULL; 5863 5864 // Reap teams. 5865 while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. 5866 // Get the next team from the pool. 5867 kmp_team_t *team = (kmp_team_t *)__kmp_team_pool; 5868 __kmp_team_pool = team->t.t_next_pool; 5869 // Reap it. 5870 team->t.t_next_pool = NULL; 5871 __kmp_reap_team(team); 5872 }; // while 5873 5874 __kmp_reap_task_teams(); 5875 5876 for (i = 0; i < __kmp_threads_capacity; ++i) { 5877 // TBD: Add some checking... 5878 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 5879 } 5880 5881 /* Make sure all threadprivate destructors get run by joining with all 5882 worker threads before resetting this flag */ 5883 TCW_SYNC_4(__kmp_init_common, FALSE); 5884 5885 KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); 5886 KMP_MB(); 5887 5888 #if KMP_USE_MONITOR 5889 // See note above: One of the possible fixes for CQ138434 / CQ140126 5890 // 5891 // FIXME: push both code fragments down and CSE them? 5892 // push them into __kmp_cleanup() ? 5893 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); 5894 if (TCR_4(__kmp_init_monitor)) { 5895 __kmp_reap_monitor(&__kmp_monitor); 5896 TCW_4(__kmp_init_monitor, 0); 5897 } 5898 __kmp_release_bootstrap_lock(&__kmp_monitor_lock); 5899 KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); 5900 #endif 5901 } /* else !__kmp_global.t_active */ 5902 TCW_4(__kmp_init_gtid, FALSE); 5903 KMP_MB(); /* Flush all pending memory write invalidates. */ 5904 5905 __kmp_cleanup(); 5906 #if OMPT_SUPPORT 5907 ompt_fini(); 5908 #endif 5909 } 5910 5911 void __kmp_internal_end_library(int gtid_req) { 5912 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 5913 /* this shouldn't be a race condition because __kmp_internal_end() is the 5914 only place to clear __kmp_serial_init */ 5915 /* we'll check this later too, after we get the lock */ 5916 // 2009-09-06: We do not set g_abort without setting g_done. This check looks 5917 // redundaant, because the next check will work in any case. 5918 if (__kmp_global.g.g_abort) { 5919 KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); 5920 /* TODO abort? */ 5921 return; 5922 } 5923 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 5924 KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); 5925 return; 5926 } 5927 5928 KMP_MB(); /* Flush all pending memory write invalidates. */ 5929 5930 /* find out who we are and what we should do */ 5931 { 5932 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 5933 KA_TRACE( 5934 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); 5935 if (gtid == KMP_GTID_SHUTDOWN) { 5936 KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " 5937 "already shutdown\n")); 5938 return; 5939 } else if (gtid == KMP_GTID_MONITOR) { 5940 KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " 5941 "registered, or system shutdown\n")); 5942 return; 5943 } else if (gtid == KMP_GTID_DNE) { 5944 KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " 5945 "shutdown\n")); 5946 /* we don't know who we are, but we may still shutdown the library */ 5947 } else if (KMP_UBER_GTID(gtid)) { 5948 /* unregister ourselves as an uber thread. gtid is no longer valid */ 5949 if (__kmp_root[gtid]->r.r_active) { 5950 __kmp_global.g.g_abort = -1; 5951 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5952 KA_TRACE(10, 5953 ("__kmp_internal_end_library: root still active, abort T#%d\n", 5954 gtid)); 5955 return; 5956 } else { 5957 KA_TRACE( 5958 10, 5959 ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); 5960 __kmp_unregister_root_current_thread(gtid); 5961 } 5962 } else { 5963 /* worker threads may call this function through the atexit handler, if they 5964 * call exit() */ 5965 /* For now, skip the usual subsequent processing and just dump the debug buffer. 5966 TODO: do a thorough shutdown instead */ 5967 #ifdef DUMP_DEBUG_ON_EXIT 5968 if (__kmp_debug_buf) 5969 __kmp_dump_debug_buffer(); 5970 #endif 5971 return; 5972 } 5973 } 5974 /* synchronize the termination process */ 5975 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 5976 5977 /* have we already finished */ 5978 if (__kmp_global.g.g_abort) { 5979 KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); 5980 /* TODO abort? */ 5981 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 5982 return; 5983 } 5984 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 5985 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 5986 return; 5987 } 5988 5989 /* We need this lock to enforce mutex between this reading of 5990 __kmp_threads_capacity and the writing by __kmp_register_root. 5991 Alternatively, we can use a counter of roots that is atomically updated by 5992 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 5993 __kmp_internal_end_*. */ 5994 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 5995 5996 /* now we can safely conduct the actual termination */ 5997 __kmp_internal_end(); 5998 5999 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 6000 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6001 6002 KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); 6003 6004 #ifdef DUMP_DEBUG_ON_EXIT 6005 if (__kmp_debug_buf) 6006 __kmp_dump_debug_buffer(); 6007 #endif 6008 6009 #if KMP_OS_WINDOWS 6010 __kmp_close_console(); 6011 #endif 6012 6013 __kmp_fini_allocator(); 6014 6015 } // __kmp_internal_end_library 6016 6017 void __kmp_internal_end_thread(int gtid_req) { 6018 int i; 6019 6020 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 6021 /* this shouldn't be a race condition because __kmp_internal_end() is the 6022 * only place to clear __kmp_serial_init */ 6023 /* we'll check this later too, after we get the lock */ 6024 // 2009-09-06: We do not set g_abort without setting g_done. This check looks 6025 // redundant, because the next check will work in any case. 6026 if (__kmp_global.g.g_abort) { 6027 KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); 6028 /* TODO abort? */ 6029 return; 6030 } 6031 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 6032 KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); 6033 return; 6034 } 6035 6036 KMP_MB(); /* Flush all pending memory write invalidates. */ 6037 6038 /* find out who we are and what we should do */ 6039 { 6040 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); 6041 KA_TRACE(10, 6042 ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); 6043 if (gtid == KMP_GTID_SHUTDOWN) { 6044 KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " 6045 "already shutdown\n")); 6046 return; 6047 } else if (gtid == KMP_GTID_MONITOR) { 6048 KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " 6049 "registered, or system shutdown\n")); 6050 return; 6051 } else if (gtid == KMP_GTID_DNE) { 6052 KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " 6053 "shutdown\n")); 6054 return; 6055 /* we don't know who we are */ 6056 } else if (KMP_UBER_GTID(gtid)) { 6057 /* unregister ourselves as an uber thread. gtid is no longer valid */ 6058 if (__kmp_root[gtid]->r.r_active) { 6059 __kmp_global.g.g_abort = -1; 6060 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6061 KA_TRACE(10, 6062 ("__kmp_internal_end_thread: root still active, abort T#%d\n", 6063 gtid)); 6064 return; 6065 } else { 6066 KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", 6067 gtid)); 6068 __kmp_unregister_root_current_thread(gtid); 6069 } 6070 } else { 6071 /* just a worker thread, let's leave */ 6072 KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); 6073 6074 if (gtid >= 0) { 6075 __kmp_threads[gtid]->th.th_task_team = NULL; 6076 } 6077 6078 KA_TRACE(10, 6079 ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", 6080 gtid)); 6081 return; 6082 } 6083 } 6084 #if defined KMP_DYNAMIC_LIB 6085 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber 6086 // thread, because we will better shutdown later in the library destructor. 6087 // The reason of this change is performance problem when non-openmp thread in 6088 // a loop forks and joins many openmp threads. We can save a lot of time 6089 // keeping worker threads alive until the program shutdown. 6090 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) 6091 // and Windows(DPD200287443) that occurs when using critical sections from 6092 // foreign threads. 6093 KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); 6094 return; 6095 #endif 6096 /* synchronize the termination process */ 6097 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6098 6099 /* have we already finished */ 6100 if (__kmp_global.g.g_abort) { 6101 KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); 6102 /* TODO abort? */ 6103 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6104 return; 6105 } 6106 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { 6107 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6108 return; 6109 } 6110 6111 /* We need this lock to enforce mutex between this reading of 6112 __kmp_threads_capacity and the writing by __kmp_register_root. 6113 Alternatively, we can use a counter of roots that is atomically updated by 6114 __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and 6115 __kmp_internal_end_*. */ 6116 6117 /* should we finish the run-time? are all siblings done? */ 6118 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); 6119 6120 for (i = 0; i < __kmp_threads_capacity; ++i) { 6121 if (KMP_UBER_GTID(i)) { 6122 KA_TRACE( 6123 10, 6124 ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); 6125 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 6126 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6127 return; 6128 }; 6129 } 6130 6131 /* now we can safely conduct the actual termination */ 6132 6133 __kmp_internal_end(); 6134 6135 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); 6136 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6137 6138 KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); 6139 6140 #ifdef DUMP_DEBUG_ON_EXIT 6141 if (__kmp_debug_buf) 6142 __kmp_dump_debug_buffer(); 6143 #endif 6144 } // __kmp_internal_end_thread 6145 6146 // ----------------------------------------------------------------------------- 6147 // Library registration stuff. 6148 6149 static long __kmp_registration_flag = 0; 6150 // Random value used to indicate library initialization. 6151 static char *__kmp_registration_str = NULL; 6152 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 6153 6154 static inline char *__kmp_reg_status_name() { 6155 /* On RHEL 3u5 if linked statically, getpid() returns different values in 6156 each thread. If registration and unregistration go in different threads 6157 (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env 6158 env var can not be found, because the name will contain different pid. */ 6159 return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); 6160 } // __kmp_reg_status_get 6161 6162 void __kmp_register_library_startup(void) { 6163 6164 char *name = __kmp_reg_status_name(); // Name of the environment variable. 6165 int done = 0; 6166 union { 6167 double dtime; 6168 long ltime; 6169 } time; 6170 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6171 __kmp_initialize_system_tick(); 6172 #endif 6173 __kmp_read_system_time(&time.dtime); 6174 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); 6175 __kmp_registration_str = 6176 __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, 6177 __kmp_registration_flag, KMP_LIBRARY_FILE); 6178 6179 KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, 6180 __kmp_registration_str)); 6181 6182 while (!done) { 6183 6184 char *value = NULL; // Actual value of the environment variable. 6185 6186 // Set environment variable, but do not overwrite if it is exist. 6187 __kmp_env_set(name, __kmp_registration_str, 0); 6188 // Check the variable is written. 6189 value = __kmp_env_get(name); 6190 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 6191 6192 done = 1; // Ok, environment variable set successfully, exit the loop. 6193 6194 } else { 6195 6196 // Oops. Write failed. Another copy of OpenMP RTL is in memory. 6197 // Check whether it alive or dead. 6198 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 6199 char *tail = value; 6200 char *flag_addr_str = NULL; 6201 char *flag_val_str = NULL; 6202 char const *file_name = NULL; 6203 __kmp_str_split(tail, '-', &flag_addr_str, &tail); 6204 __kmp_str_split(tail, '-', &flag_val_str, &tail); 6205 file_name = tail; 6206 if (tail != NULL) { 6207 long *flag_addr = 0; 6208 long flag_val = 0; 6209 KMP_SSCANF(flag_addr_str, "%p", &flag_addr); 6210 KMP_SSCANF(flag_val_str, "%lx", &flag_val); 6211 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { 6212 // First, check whether environment-encoded address is mapped into 6213 // addr space. 6214 // If so, dereference it to see if it still has the right value. 6215 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { 6216 neighbor = 1; 6217 } else { 6218 // If not, then we know the other copy of the library is no longer 6219 // running. 6220 neighbor = 2; 6221 }; // if 6222 }; // if 6223 }; // if 6224 switch (neighbor) { 6225 case 0: // Cannot parse environment variable -- neighbor status unknown. 6226 // Assume it is the incompatible format of future version of the 6227 // library. Assume the other library is alive. 6228 // WARN( ... ); // TODO: Issue a warning. 6229 file_name = "unknown library"; 6230 // Attention! Falling to the next case. That's intentional. 6231 case 1: { // Neighbor is alive. 6232 // Check it is allowed. 6233 char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); 6234 if (!__kmp_str_match_true(duplicate_ok)) { 6235 // That's not allowed. Issue fatal error. 6236 __kmp_msg(kmp_ms_fatal, 6237 KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), 6238 KMP_HNT(DuplicateLibrary), __kmp_msg_null); 6239 }; // if 6240 KMP_INTERNAL_FREE(duplicate_ok); 6241 __kmp_duplicate_library_ok = 1; 6242 done = 1; // Exit the loop. 6243 } break; 6244 case 2: { // Neighbor is dead. 6245 // Clear the variable and try to register library again. 6246 __kmp_env_unset(name); 6247 } break; 6248 default: { KMP_DEBUG_ASSERT(0); } break; 6249 }; // switch 6250 6251 }; // if 6252 KMP_INTERNAL_FREE((void *)value); 6253 6254 }; // while 6255 KMP_INTERNAL_FREE((void *)name); 6256 6257 } // func __kmp_register_library_startup 6258 6259 void __kmp_unregister_library(void) { 6260 6261 char *name = __kmp_reg_status_name(); 6262 char *value = __kmp_env_get(name); 6263 6264 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); 6265 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); 6266 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { 6267 // Ok, this is our variable. Delete it. 6268 __kmp_env_unset(name); 6269 }; // if 6270 6271 KMP_INTERNAL_FREE(__kmp_registration_str); 6272 KMP_INTERNAL_FREE(value); 6273 KMP_INTERNAL_FREE(name); 6274 6275 __kmp_registration_flag = 0; 6276 __kmp_registration_str = NULL; 6277 6278 } // __kmp_unregister_library 6279 6280 // End of Library registration stuff. 6281 // ----------------------------------------------------------------------------- 6282 6283 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6284 6285 static void __kmp_check_mic_type() { 6286 kmp_cpuid_t cpuid_state = {0}; 6287 kmp_cpuid_t *cs_p = &cpuid_state; 6288 __kmp_x86_cpuid(1, 0, cs_p); 6289 // We don't support mic1 at the moment 6290 if ((cs_p->eax & 0xff0) == 0xB10) { 6291 __kmp_mic_type = mic2; 6292 } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { 6293 __kmp_mic_type = mic3; 6294 } else { 6295 __kmp_mic_type = non_mic; 6296 } 6297 } 6298 6299 #endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ 6300 6301 static void __kmp_do_serial_initialize(void) { 6302 int i, gtid; 6303 int size; 6304 6305 KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); 6306 6307 KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); 6308 KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); 6309 KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); 6310 KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); 6311 KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); 6312 6313 #if OMPT_SUPPORT 6314 ompt_pre_init(); 6315 #endif 6316 6317 __kmp_validate_locks(); 6318 6319 /* Initialize internal memory allocator */ 6320 __kmp_init_allocator(); 6321 6322 /* Register the library startup via an environment variable and check to see 6323 whether another copy of the library is already registered. */ 6324 6325 __kmp_register_library_startup(); 6326 6327 /* TODO reinitialization of library */ 6328 if (TCR_4(__kmp_global.g.g_done)) { 6329 KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); 6330 } 6331 6332 __kmp_global.g.g_abort = 0; 6333 TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 6334 6335 /* initialize the locks */ 6336 #if KMP_USE_ADAPTIVE_LOCKS 6337 #if KMP_DEBUG_ADAPTIVE_LOCKS 6338 __kmp_init_speculative_stats(); 6339 #endif 6340 #endif 6341 #if KMP_STATS_ENABLED 6342 __kmp_stats_init(); 6343 #endif 6344 __kmp_init_lock(&__kmp_global_lock); 6345 __kmp_init_queuing_lock(&__kmp_dispatch_lock); 6346 __kmp_init_lock(&__kmp_debug_lock); 6347 __kmp_init_atomic_lock(&__kmp_atomic_lock); 6348 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); 6349 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); 6350 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); 6351 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); 6352 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); 6353 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); 6354 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); 6355 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); 6356 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); 6357 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); 6358 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); 6359 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); 6360 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); 6361 __kmp_init_bootstrap_lock(&__kmp_exit_lock); 6362 #if KMP_USE_MONITOR 6363 __kmp_init_bootstrap_lock(&__kmp_monitor_lock); 6364 #endif 6365 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); 6366 6367 /* conduct initialization and initial setup of configuration */ 6368 6369 __kmp_runtime_initialize(); 6370 6371 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6372 __kmp_check_mic_type(); 6373 #endif 6374 6375 // Some global variable initialization moved here from kmp_env_initialize() 6376 #ifdef KMP_DEBUG 6377 kmp_diag = 0; 6378 #endif 6379 __kmp_abort_delay = 0; 6380 6381 // From __kmp_init_dflt_team_nth() 6382 /* assume the entire machine will be used */ 6383 __kmp_dflt_team_nth_ub = __kmp_xproc; 6384 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { 6385 __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 6386 } 6387 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { 6388 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 6389 } 6390 __kmp_max_nth = __kmp_sys_max_nth; 6391 6392 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" 6393 // part 6394 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 6395 #if KMP_USE_MONITOR 6396 __kmp_monitor_wakeups = 6397 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 6398 __kmp_bt_intervals = 6399 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); 6400 #endif 6401 // From "KMP_LIBRARY" part of __kmp_env_initialize() 6402 __kmp_library = library_throughput; 6403 // From KMP_SCHEDULE initialization 6404 __kmp_static = kmp_sch_static_balanced; 6405 // AC: do not use analytical here, because it is non-monotonous 6406 //__kmp_guided = kmp_sch_guided_iterative_chunked; 6407 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no 6408 // need to repeat assignment 6409 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch 6410 // bit control and barrier method control parts 6411 #if KMP_FAST_REDUCTION_BARRIER 6412 #define kmp_reduction_barrier_gather_bb ((int)1) 6413 #define kmp_reduction_barrier_release_bb ((int)1) 6414 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6415 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6416 #endif // KMP_FAST_REDUCTION_BARRIER 6417 for (i = bs_plain_barrier; i < bs_last_barrier; i++) { 6418 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; 6419 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; 6420 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt; 6421 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt; 6422 #if KMP_FAST_REDUCTION_BARRIER 6423 if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only ( 6424 // lin_64 ): hyper,1 6425 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb; 6426 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb; 6427 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat; 6428 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat; 6429 } 6430 #endif // KMP_FAST_REDUCTION_BARRIER 6431 } 6432 #if KMP_FAST_REDUCTION_BARRIER 6433 #undef kmp_reduction_barrier_release_pat 6434 #undef kmp_reduction_barrier_gather_pat 6435 #undef kmp_reduction_barrier_release_bb 6436 #undef kmp_reduction_barrier_gather_bb 6437 #endif // KMP_FAST_REDUCTION_BARRIER 6438 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6439 if (__kmp_mic_type == mic2) { // KNC 6440 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 6441 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather 6442 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] = 6443 1; // forkjoin release 6444 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 6445 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar; 6446 } 6447 #if KMP_FAST_REDUCTION_BARRIER 6448 if (__kmp_mic_type == mic2) { // KNC 6449 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 6450 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar; 6451 } 6452 #endif 6453 #endif 6454 6455 // From KMP_CHECKS initialization 6456 #ifdef KMP_DEBUG 6457 __kmp_env_checks = TRUE; /* development versions have the extra checks */ 6458 #else 6459 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 6460 #endif 6461 6462 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 6463 __kmp_foreign_tp = TRUE; 6464 6465 __kmp_global.g.g_dynamic = FALSE; 6466 __kmp_global.g.g_dynamic_mode = dynamic_default; 6467 6468 __kmp_env_initialize(NULL); 6469 6470 // Print all messages in message catalog for testing purposes. 6471 #ifdef KMP_DEBUG 6472 char const *val = __kmp_env_get("KMP_DUMP_CATALOG"); 6473 if (__kmp_str_match_true(val)) { 6474 kmp_str_buf_t buffer; 6475 __kmp_str_buf_init(&buffer); 6476 __kmp_i18n_dump_catalog(&buffer); 6477 __kmp_printf("%s", buffer.str); 6478 __kmp_str_buf_free(&buffer); 6479 }; // if 6480 __kmp_env_free(&val); 6481 #endif 6482 6483 __kmp_threads_capacity = 6484 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); 6485 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 6486 __kmp_tp_capacity = __kmp_default_tp_capacity( 6487 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 6488 6489 // If the library is shut down properly, both pools must be NULL. Just in 6490 // case, set them to NULL -- some memory may leak, but subsequent code will 6491 // work even if pools are not freed. 6492 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL); 6493 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL); 6494 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL); 6495 __kmp_thread_pool = NULL; 6496 __kmp_thread_pool_insert_pt = NULL; 6497 __kmp_team_pool = NULL; 6498 6499 /* Allocate all of the variable sized records */ 6500 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are 6501 * expandable */ 6502 /* Since allocation is cache-aligned, just add extra padding at the end */ 6503 size = 6504 (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity + 6505 CACHE_LINE; 6506 __kmp_threads = (kmp_info_t **)__kmp_allocate(size); 6507 __kmp_root = (kmp_root_t **)((char *)__kmp_threads + 6508 sizeof(kmp_info_t *) * __kmp_threads_capacity); 6509 6510 /* init thread counts */ 6511 KMP_DEBUG_ASSERT(__kmp_all_nth == 6512 0); // Asserts fail if the library is reinitializing and 6513 KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination. 6514 __kmp_all_nth = 0; 6515 __kmp_nth = 0; 6516 6517 /* setup the uber master thread and hierarchy */ 6518 gtid = __kmp_register_root(TRUE); 6519 KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid)); 6520 KMP_ASSERT(KMP_UBER_GTID(gtid)); 6521 KMP_ASSERT(KMP_INITIAL_GTID(gtid)); 6522 6523 KMP_MB(); /* Flush all pending memory write invalidates. */ 6524 6525 __kmp_common_initialize(); 6526 6527 #if KMP_OS_UNIX 6528 /* invoke the child fork handler */ 6529 __kmp_register_atfork(); 6530 #endif 6531 6532 #if !defined KMP_DYNAMIC_LIB 6533 { 6534 /* Invoke the exit handler when the program finishes, only for static 6535 library. For dynamic library, we already have _fini and DllMain. */ 6536 int rc = atexit(__kmp_internal_end_atexit); 6537 if (rc != 0) { 6538 __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc), 6539 __kmp_msg_null); 6540 }; // if 6541 } 6542 #endif 6543 6544 #if KMP_HANDLE_SIGNALS 6545 #if KMP_OS_UNIX 6546 /* NOTE: make sure that this is called before the user installs their own 6547 signal handlers so that the user handlers are called first. this way they 6548 can return false, not call our handler, avoid terminating the library, and 6549 continue execution where they left off. */ 6550 __kmp_install_signals(FALSE); 6551 #endif /* KMP_OS_UNIX */ 6552 #if KMP_OS_WINDOWS 6553 __kmp_install_signals(TRUE); 6554 #endif /* KMP_OS_WINDOWS */ 6555 #endif 6556 6557 /* we have finished the serial initialization */ 6558 __kmp_init_counter++; 6559 6560 __kmp_init_serial = TRUE; 6561 6562 if (__kmp_settings) { 6563 __kmp_env_print(); 6564 } 6565 6566 #if OMP_40_ENABLED 6567 if (__kmp_display_env || __kmp_display_env_verbose) { 6568 __kmp_env_print_2(); 6569 } 6570 #endif // OMP_40_ENABLED 6571 6572 #if OMPT_SUPPORT 6573 ompt_post_init(); 6574 #endif 6575 6576 KMP_MB(); 6577 6578 KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n")); 6579 } 6580 6581 void __kmp_serial_initialize(void) { 6582 if (__kmp_init_serial) { 6583 return; 6584 } 6585 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6586 if (__kmp_init_serial) { 6587 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6588 return; 6589 } 6590 __kmp_do_serial_initialize(); 6591 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6592 } 6593 6594 static void __kmp_do_middle_initialize(void) { 6595 int i, j; 6596 int prev_dflt_team_nth; 6597 6598 if (!__kmp_init_serial) { 6599 __kmp_do_serial_initialize(); 6600 } 6601 6602 KA_TRACE(10, ("__kmp_middle_initialize: enter\n")); 6603 6604 // Save the previous value for the __kmp_dflt_team_nth so that 6605 // we can avoid some reinitialization if it hasn't changed. 6606 prev_dflt_team_nth = __kmp_dflt_team_nth; 6607 6608 #if KMP_AFFINITY_SUPPORTED 6609 // __kmp_affinity_initialize() will try to set __kmp_ncores to the 6610 // number of cores on the machine. 6611 __kmp_affinity_initialize(); 6612 6613 // Run through the __kmp_threads array and set the affinity mask 6614 // for each root thread that is currently registered with the RTL. 6615 for (i = 0; i < __kmp_threads_capacity; i++) { 6616 if (TCR_PTR(__kmp_threads[i]) != NULL) { 6617 __kmp_affinity_set_init_mask(i, TRUE); 6618 } 6619 } 6620 #endif /* KMP_AFFINITY_SUPPORTED */ 6621 6622 KMP_ASSERT(__kmp_xproc > 0); 6623 if (__kmp_avail_proc == 0) { 6624 __kmp_avail_proc = __kmp_xproc; 6625 } 6626 6627 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), 6628 // correct them now 6629 j = 0; 6630 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { 6631 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = 6632 __kmp_avail_proc; 6633 j++; 6634 } 6635 6636 if (__kmp_dflt_team_nth == 0) { 6637 #ifdef KMP_DFLT_NTH_CORES 6638 // Default #threads = #cores 6639 __kmp_dflt_team_nth = __kmp_ncores; 6640 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6641 "__kmp_ncores (%d)\n", 6642 __kmp_dflt_team_nth)); 6643 #else 6644 // Default #threads = #available OS procs 6645 __kmp_dflt_team_nth = __kmp_avail_proc; 6646 KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6647 "__kmp_avail_proc(%d)\n", 6648 __kmp_dflt_team_nth)); 6649 #endif /* KMP_DFLT_NTH_CORES */ 6650 } 6651 6652 if (__kmp_dflt_team_nth < KMP_MIN_NTH) { 6653 __kmp_dflt_team_nth = KMP_MIN_NTH; 6654 } 6655 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { 6656 __kmp_dflt_team_nth = __kmp_sys_max_nth; 6657 } 6658 6659 // There's no harm in continuing if the following check fails, 6660 // but it indicates an error in the previous logic. 6661 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); 6662 6663 if (__kmp_dflt_team_nth != prev_dflt_team_nth) { 6664 // Run through the __kmp_threads array and set the num threads icv for each 6665 // root thread that is currently registered with the RTL (which has not 6666 // already explicitly set its nthreads-var with a call to 6667 // omp_set_num_threads()). 6668 for (i = 0; i < __kmp_threads_capacity; i++) { 6669 kmp_info_t *thread = __kmp_threads[i]; 6670 if (thread == NULL) 6671 continue; 6672 if (thread->th.th_current_task->td_icvs.nproc != 0) 6673 continue; 6674 6675 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); 6676 } 6677 } 6678 KA_TRACE( 6679 20, 6680 ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 6681 __kmp_dflt_team_nth)); 6682 6683 #ifdef KMP_ADJUST_BLOCKTIME 6684 /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ 6685 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { 6686 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); 6687 if (__kmp_nth > __kmp_avail_proc) { 6688 __kmp_zero_bt = TRUE; 6689 } 6690 } 6691 #endif /* KMP_ADJUST_BLOCKTIME */ 6692 6693 /* we have finished middle initialization */ 6694 TCW_SYNC_4(__kmp_init_middle, TRUE); 6695 6696 KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); 6697 } 6698 6699 void __kmp_middle_initialize(void) { 6700 if (__kmp_init_middle) { 6701 return; 6702 } 6703 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6704 if (__kmp_init_middle) { 6705 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6706 return; 6707 } 6708 __kmp_do_middle_initialize(); 6709 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6710 } 6711 6712 void __kmp_parallel_initialize(void) { 6713 int gtid = __kmp_entry_gtid(); // this might be a new root 6714 6715 /* synchronize parallel initialization (for sibling) */ 6716 if (TCR_4(__kmp_init_parallel)) 6717 return; 6718 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 6719 if (TCR_4(__kmp_init_parallel)) { 6720 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6721 return; 6722 } 6723 6724 /* TODO reinitialization after we have already shut down */ 6725 if (TCR_4(__kmp_global.g.g_done)) { 6726 KA_TRACE( 6727 10, 6728 ("__kmp_parallel_initialize: attempt to init while shutting down\n")); 6729 __kmp_infinite_loop(); 6730 } 6731 6732 /* jc: The lock __kmp_initz_lock is already held, so calling 6733 __kmp_serial_initialize would cause a deadlock. So we call 6734 __kmp_do_serial_initialize directly. */ 6735 if (!__kmp_init_middle) { 6736 __kmp_do_middle_initialize(); 6737 } 6738 6739 /* begin initialization */ 6740 KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); 6741 KMP_ASSERT(KMP_UBER_GTID(gtid)); 6742 6743 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6744 // Save the FP control regs. 6745 // Worker threads will set theirs to these values at thread startup. 6746 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); 6747 __kmp_store_mxcsr(&__kmp_init_mxcsr); 6748 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 6749 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 6750 6751 #if KMP_OS_UNIX 6752 #if KMP_HANDLE_SIGNALS 6753 /* must be after __kmp_serial_initialize */ 6754 __kmp_install_signals(TRUE); 6755 #endif 6756 #endif 6757 6758 __kmp_suspend_initialize(); 6759 6760 #if defined(USE_LOAD_BALANCE) 6761 if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 6762 __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 6763 } 6764 #else 6765 if (__kmp_global.g.g_dynamic_mode == dynamic_default) { 6766 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 6767 } 6768 #endif 6769 6770 if (__kmp_version) { 6771 __kmp_print_version_2(); 6772 } 6773 6774 /* we have finished parallel initialization */ 6775 TCW_SYNC_4(__kmp_init_parallel, TRUE); 6776 6777 KMP_MB(); 6778 KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); 6779 6780 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 6781 } 6782 6783 /* ------------------------------------------------------------------------ */ 6784 6785 void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 6786 kmp_team_t *team) { 6787 kmp_disp_t *dispatch; 6788 6789 KMP_MB(); 6790 6791 /* none of the threads have encountered any constructs, yet. */ 6792 this_thr->th.th_local.this_construct = 0; 6793 #if KMP_CACHE_MANAGE 6794 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); 6795 #endif /* KMP_CACHE_MANAGE */ 6796 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 6797 KMP_DEBUG_ASSERT(dispatch); 6798 KMP_DEBUG_ASSERT(team->t.t_dispatch); 6799 // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ 6800 // this_thr->th.th_info.ds.ds_tid ] ); 6801 6802 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 6803 #if OMP_45_ENABLED 6804 dispatch->th_doacross_buf_idx = 6805 0; /* reset the doacross dispatch buffer counter */ 6806 #endif 6807 if (__kmp_env_consistency_check) 6808 __kmp_push_parallel(gtid, team->t.t_ident); 6809 6810 KMP_MB(); /* Flush all pending memory write invalidates. */ 6811 } 6812 6813 void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, 6814 kmp_team_t *team) { 6815 if (__kmp_env_consistency_check) 6816 __kmp_pop_parallel(gtid, team->t.t_ident); 6817 6818 __kmp_finish_implicit_task(this_thr); 6819 } 6820 6821 int __kmp_invoke_task_func(int gtid) { 6822 int rc; 6823 int tid = __kmp_tid_from_gtid(gtid); 6824 kmp_info_t *this_thr = __kmp_threads[gtid]; 6825 kmp_team_t *team = this_thr->th.th_team; 6826 6827 __kmp_run_before_invoked_task(gtid, tid, this_thr, team); 6828 #if USE_ITT_BUILD 6829 if (__itt_stack_caller_create_ptr) { 6830 __kmp_itt_stack_callee_enter( 6831 (__itt_caller) 6832 team->t.t_stack_id); // inform ittnotify about entering user's code 6833 } 6834 #endif /* USE_ITT_BUILD */ 6835 #if INCLUDE_SSC_MARKS 6836 SSC_MARK_INVOKING(); 6837 #endif 6838 6839 #if OMPT_SUPPORT 6840 void *dummy; 6841 void **exit_runtime_p; 6842 ompt_task_id_t my_task_id; 6843 ompt_parallel_id_t my_parallel_id; 6844 6845 if (ompt_enabled) { 6846 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid] 6847 .ompt_task_info.frame.exit_runtime_frame); 6848 } else { 6849 exit_runtime_p = &dummy; 6850 } 6851 6852 #if OMPT_TRACE 6853 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; 6854 my_parallel_id = team->t.ompt_team_info.parallel_id; 6855 if (ompt_enabled && 6856 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 6857 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id, 6858 my_task_id); 6859 } 6860 #endif 6861 #endif 6862 6863 { 6864 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 6865 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 6866 rc = 6867 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, 6868 tid, (int)team->t.t_argc, (void **)team->t.t_argv 6869 #if OMPT_SUPPORT 6870 , 6871 exit_runtime_p 6872 #endif 6873 ); 6874 #if OMPT_SUPPORT 6875 *exit_runtime_p = NULL; 6876 #endif 6877 } 6878 6879 #if USE_ITT_BUILD 6880 if (__itt_stack_caller_create_ptr) { 6881 __kmp_itt_stack_callee_leave( 6882 (__itt_caller) 6883 team->t.t_stack_id); // inform ittnotify about leaving user's code 6884 } 6885 #endif /* USE_ITT_BUILD */ 6886 __kmp_run_after_invoked_task(gtid, tid, this_thr, team); 6887 6888 return rc; 6889 } 6890 6891 #if OMP_40_ENABLED 6892 void __kmp_teams_master(int gtid) { 6893 // This routine is called by all master threads in teams construct 6894 kmp_info_t *thr = __kmp_threads[gtid]; 6895 kmp_team_t *team = thr->th.th_team; 6896 ident_t *loc = team->t.t_ident; 6897 thr->th.th_set_nproc = thr->th.th_teams_size.nth; 6898 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); 6899 KMP_DEBUG_ASSERT(thr->th.th_set_nproc); 6900 KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, 6901 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); 6902 // Launch league of teams now, but not let workers execute 6903 // (they hang on fork barrier until next parallel) 6904 #if INCLUDE_SSC_MARKS 6905 SSC_MARK_FORKING(); 6906 #endif 6907 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, 6908 #if OMPT_SUPPORT 6909 (void *)thr->th.th_teams_microtask, // "unwrapped" task 6910 #endif 6911 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 6912 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); 6913 #if INCLUDE_SSC_MARKS 6914 SSC_MARK_JOINING(); 6915 #endif 6916 6917 // AC: last parameter "1" eliminates join barrier which won't work because 6918 // worker threads are in a fork barrier waiting for more parallel regions 6919 __kmp_join_call(loc, gtid 6920 #if OMPT_SUPPORT 6921 , 6922 fork_context_intel 6923 #endif 6924 , 6925 1); 6926 } 6927 6928 int __kmp_invoke_teams_master(int gtid) { 6929 kmp_info_t *this_thr = __kmp_threads[gtid]; 6930 kmp_team_t *team = this_thr->th.th_team; 6931 #if KMP_DEBUG 6932 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) 6933 KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == 6934 (void *)__kmp_teams_master); 6935 #endif 6936 __kmp_run_before_invoked_task(gtid, 0, this_thr, team); 6937 __kmp_teams_master(gtid); 6938 __kmp_run_after_invoked_task(gtid, 0, this_thr, team); 6939 return 1; 6940 } 6941 #endif /* OMP_40_ENABLED */ 6942 6943 /* this sets the requested number of threads for the next parallel region 6944 encountered by this team. since this should be enclosed in the forkjoin 6945 critical section it should avoid race conditions with assymmetrical nested 6946 parallelism */ 6947 6948 void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { 6949 kmp_info_t *thr = __kmp_threads[gtid]; 6950 6951 if (num_threads > 0) 6952 thr->th.th_set_nproc = num_threads; 6953 } 6954 6955 #if OMP_40_ENABLED 6956 6957 /* this sets the requested number of teams for the teams region and/or 6958 the number of threads for the next parallel region encountered */ 6959 void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, 6960 int num_threads) { 6961 kmp_info_t *thr = __kmp_threads[gtid]; 6962 KMP_DEBUG_ASSERT(num_teams >= 0); 6963 KMP_DEBUG_ASSERT(num_threads >= 0); 6964 6965 if (num_teams == 0) 6966 num_teams = 1; // default number of teams is 1. 6967 if (num_teams > __kmp_max_nth) { // if too many teams requested? 6968 if (!__kmp_reserve_warn) { 6969 __kmp_reserve_warn = 1; 6970 __kmp_msg(kmp_ms_warning, 6971 KMP_MSG(CantFormThrTeam, num_teams, __kmp_max_nth), 6972 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 6973 } 6974 num_teams = __kmp_max_nth; 6975 } 6976 // Set number of teams (number of threads in the outer "parallel" of the 6977 // teams) 6978 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 6979 6980 // Remember the number of threads for inner parallel regions 6981 if (num_threads == 0) { 6982 if (!TCR_4(__kmp_init_middle)) 6983 __kmp_middle_initialize(); // get __kmp_avail_proc calculated 6984 num_threads = __kmp_avail_proc / num_teams; 6985 if (num_teams * num_threads > __kmp_max_nth) { 6986 // adjust num_threads w/o warning as it is not user setting 6987 num_threads = __kmp_max_nth / num_teams; 6988 } 6989 } else { 6990 if (num_teams * num_threads > __kmp_max_nth) { 6991 int new_threads = __kmp_max_nth / num_teams; 6992 if (!__kmp_reserve_warn) { // user asked for too many threads 6993 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT 6994 __kmp_msg(kmp_ms_warning, 6995 KMP_MSG(CantFormThrTeam, num_threads, new_threads), 6996 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); 6997 } 6998 num_threads = new_threads; 6999 } 7000 } 7001 thr->th.th_teams_size.nth = num_threads; 7002 } 7003 7004 // Set the proc_bind var to use in the following parallel region. 7005 void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { 7006 kmp_info_t *thr = __kmp_threads[gtid]; 7007 thr->th.th_set_proc_bind = proc_bind; 7008 } 7009 7010 #endif /* OMP_40_ENABLED */ 7011 7012 /* Launch the worker threads into the microtask. */ 7013 7014 void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { 7015 kmp_info_t *this_thr = __kmp_threads[gtid]; 7016 7017 #ifdef KMP_DEBUG 7018 int f; 7019 #endif /* KMP_DEBUG */ 7020 7021 KMP_DEBUG_ASSERT(team); 7022 KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 7023 KMP_ASSERT(KMP_MASTER_GTID(gtid)); 7024 KMP_MB(); /* Flush all pending memory write invalidates. */ 7025 7026 team->t.t_construct = 0; /* no single directives seen yet */ 7027 team->t.t_ordered.dt.t_value = 7028 0; /* thread 0 enters the ordered section first */ 7029 7030 /* Reset the identifiers on the dispatch buffer */ 7031 KMP_DEBUG_ASSERT(team->t.t_disp_buffer); 7032 if (team->t.t_max_nproc > 1) { 7033 int i; 7034 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 7035 team->t.t_disp_buffer[i].buffer_index = i; 7036 #if OMP_45_ENABLED 7037 team->t.t_disp_buffer[i].doacross_buf_idx = i; 7038 #endif 7039 } 7040 } else { 7041 team->t.t_disp_buffer[0].buffer_index = 0; 7042 #if OMP_45_ENABLED 7043 team->t.t_disp_buffer[0].doacross_buf_idx = 0; 7044 #endif 7045 } 7046 7047 KMP_MB(); /* Flush all pending memory write invalidates. */ 7048 KMP_ASSERT(this_thr->th.th_team == team); 7049 7050 #ifdef KMP_DEBUG 7051 for (f = 0; f < team->t.t_nproc; f++) { 7052 KMP_DEBUG_ASSERT(team->t.t_threads[f] && 7053 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); 7054 } 7055 #endif /* KMP_DEBUG */ 7056 7057 /* release the worker threads so they may begin working */ 7058 __kmp_fork_barrier(gtid, 0); 7059 } 7060 7061 void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { 7062 kmp_info_t *this_thr = __kmp_threads[gtid]; 7063 7064 KMP_DEBUG_ASSERT(team); 7065 KMP_DEBUG_ASSERT(this_thr->th.th_team == team); 7066 KMP_ASSERT(KMP_MASTER_GTID(gtid)); 7067 KMP_MB(); /* Flush all pending memory write invalidates. */ 7068 7069 /* Join barrier after fork */ 7070 7071 #ifdef KMP_DEBUG 7072 if (__kmp_threads[gtid] && 7073 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { 7074 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, 7075 __kmp_threads[gtid]); 7076 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7077 "team->t.t_nproc=%d\n", 7078 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, 7079 team->t.t_nproc); 7080 __kmp_print_structure(); 7081 } 7082 KMP_DEBUG_ASSERT(__kmp_threads[gtid] && 7083 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); 7084 #endif /* KMP_DEBUG */ 7085 7086 __kmp_join_barrier(gtid); /* wait for everyone */ 7087 7088 KMP_MB(); /* Flush all pending memory write invalidates. */ 7089 KMP_ASSERT(this_thr->th.th_team == team); 7090 } 7091 7092 /* ------------------------------------------------------------------------ */ 7093 7094 #ifdef USE_LOAD_BALANCE 7095 7096 // Return the worker threads actively spinning in the hot team, if we 7097 // are at the outermost level of parallelism. Otherwise, return 0. 7098 static int __kmp_active_hot_team_nproc(kmp_root_t *root) { 7099 int i; 7100 int retval; 7101 kmp_team_t *hot_team; 7102 7103 if (root->r.r_active) { 7104 return 0; 7105 } 7106 hot_team = root->r.r_hot_team; 7107 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { 7108 return hot_team->t.t_nproc - 1; // Don't count master thread 7109 } 7110 7111 // Skip the master thread - it is accounted for elsewhere. 7112 retval = 0; 7113 for (i = 1; i < hot_team->t.t_nproc; i++) { 7114 if (hot_team->t.t_threads[i]->th.th_active) { 7115 retval++; 7116 } 7117 } 7118 return retval; 7119 } 7120 7121 // Perform an automatic adjustment to the number of 7122 // threads used by the next parallel region. 7123 static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { 7124 int retval; 7125 int pool_active; 7126 int hot_team_active; 7127 int team_curr_active; 7128 int system_active; 7129 7130 KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, 7131 set_nproc)); 7132 KMP_DEBUG_ASSERT(root); 7133 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] 7134 ->th.th_current_task->td_icvs.dynamic == TRUE); 7135 KMP_DEBUG_ASSERT(set_nproc > 1); 7136 7137 if (set_nproc == 1) { 7138 KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); 7139 return 1; 7140 } 7141 7142 // Threads that are active in the thread pool, active in the hot team for this 7143 // particular root (if we are at the outer par level), and the currently 7144 // executing thread (to become the master) are available to add to the new 7145 // team, but are currently contributing to the system load, and must be 7146 // accounted for. 7147 pool_active = TCR_4(__kmp_thread_pool_active_nth); 7148 hot_team_active = __kmp_active_hot_team_nproc(root); 7149 team_curr_active = pool_active + hot_team_active + 1; 7150 7151 // Check the system load. 7152 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); 7153 KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " 7154 "hot team active = %d\n", 7155 system_active, pool_active, hot_team_active)); 7156 7157 if (system_active < 0) { 7158 // There was an error reading the necessary info from /proc, so use the 7159 // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode 7160 // = dynamic_thread_limit, we shouldn't wind up getting back here. 7161 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 7162 KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); 7163 7164 // Make this call behave like the thread limit algorithm. 7165 retval = __kmp_avail_proc - __kmp_nth + 7166 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); 7167 if (retval > set_nproc) { 7168 retval = set_nproc; 7169 } 7170 if (retval < KMP_MIN_NTH) { 7171 retval = KMP_MIN_NTH; 7172 } 7173 7174 KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", 7175 retval)); 7176 return retval; 7177 } 7178 7179 // There is a slight delay in the load balance algorithm in detecting new 7180 // running procs. The real system load at this instant should be at least as 7181 // large as the #active omp thread that are available to add to the team. 7182 if (system_active < team_curr_active) { 7183 system_active = team_curr_active; 7184 } 7185 retval = __kmp_avail_proc - system_active + team_curr_active; 7186 if (retval > set_nproc) { 7187 retval = set_nproc; 7188 } 7189 if (retval < KMP_MIN_NTH) { 7190 retval = KMP_MIN_NTH; 7191 } 7192 7193 KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); 7194 return retval; 7195 } // __kmp_load_balance_nproc() 7196 7197 #endif /* USE_LOAD_BALANCE */ 7198 7199 /* ------------------------------------------------------------------------ */ 7200 7201 /* NOTE: this is called with the __kmp_init_lock held */ 7202 void __kmp_cleanup(void) { 7203 int f; 7204 7205 KA_TRACE(10, ("__kmp_cleanup: enter\n")); 7206 7207 if (TCR_4(__kmp_init_parallel)) { 7208 #if KMP_HANDLE_SIGNALS 7209 __kmp_remove_signals(); 7210 #endif 7211 TCW_4(__kmp_init_parallel, FALSE); 7212 } 7213 7214 if (TCR_4(__kmp_init_middle)) { 7215 #if KMP_AFFINITY_SUPPORTED 7216 __kmp_affinity_uninitialize(); 7217 #endif /* KMP_AFFINITY_SUPPORTED */ 7218 __kmp_cleanup_hierarchy(); 7219 TCW_4(__kmp_init_middle, FALSE); 7220 } 7221 7222 KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); 7223 7224 if (__kmp_init_serial) { 7225 __kmp_runtime_destroy(); 7226 __kmp_init_serial = FALSE; 7227 } 7228 7229 for (f = 0; f < __kmp_threads_capacity; f++) { 7230 if (__kmp_root[f] != NULL) { 7231 __kmp_free(__kmp_root[f]); 7232 __kmp_root[f] = NULL; 7233 } 7234 } 7235 __kmp_free(__kmp_threads); 7236 // __kmp_threads and __kmp_root were allocated at once, as single block, so 7237 // there is no need in freeing __kmp_root. 7238 __kmp_threads = NULL; 7239 __kmp_root = NULL; 7240 __kmp_threads_capacity = 0; 7241 7242 #if KMP_USE_DYNAMIC_LOCK 7243 __kmp_cleanup_indirect_user_locks(); 7244 #else 7245 __kmp_cleanup_user_locks(); 7246 #endif 7247 7248 #if KMP_AFFINITY_SUPPORTED 7249 KMP_INTERNAL_FREE((void *)__kmp_cpuinfo_file); 7250 __kmp_cpuinfo_file = NULL; 7251 #endif /* KMP_AFFINITY_SUPPORTED */ 7252 7253 #if KMP_USE_ADAPTIVE_LOCKS 7254 #if KMP_DEBUG_ADAPTIVE_LOCKS 7255 __kmp_print_speculative_stats(); 7256 #endif 7257 #endif 7258 KMP_INTERNAL_FREE(__kmp_nested_nth.nth); 7259 __kmp_nested_nth.nth = NULL; 7260 __kmp_nested_nth.size = 0; 7261 __kmp_nested_nth.used = 0; 7262 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); 7263 __kmp_nested_proc_bind.bind_types = NULL; 7264 __kmp_nested_proc_bind.size = 0; 7265 __kmp_nested_proc_bind.used = 0; 7266 7267 __kmp_i18n_catclose(); 7268 7269 #if KMP_STATS_ENABLED 7270 __kmp_stats_fini(); 7271 #endif 7272 7273 KA_TRACE(10, ("__kmp_cleanup: exit\n")); 7274 } 7275 7276 /* ------------------------------------------------------------------------ */ 7277 7278 int __kmp_ignore_mppbeg(void) { 7279 char *env; 7280 7281 if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { 7282 if (__kmp_str_match_false(env)) 7283 return FALSE; 7284 } 7285 // By default __kmpc_begin() is no-op. 7286 return TRUE; 7287 } 7288 7289 int __kmp_ignore_mppend(void) { 7290 char *env; 7291 7292 if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { 7293 if (__kmp_str_match_false(env)) 7294 return FALSE; 7295 } 7296 // By default __kmpc_end() is no-op. 7297 return TRUE; 7298 } 7299 7300 void __kmp_internal_begin(void) { 7301 int gtid; 7302 kmp_root_t *root; 7303 7304 /* this is a very important step as it will register new sibling threads 7305 and assign these new uber threads a new gtid */ 7306 gtid = __kmp_entry_gtid(); 7307 root = __kmp_threads[gtid]->th.th_root; 7308 KMP_ASSERT(KMP_UBER_GTID(gtid)); 7309 7310 if (root->r.r_begin) 7311 return; 7312 __kmp_acquire_lock(&root->r.r_begin_lock, gtid); 7313 if (root->r.r_begin) { 7314 __kmp_release_lock(&root->r.r_begin_lock, gtid); 7315 return; 7316 } 7317 7318 root->r.r_begin = TRUE; 7319 7320 __kmp_release_lock(&root->r.r_begin_lock, gtid); 7321 } 7322 7323 /* ------------------------------------------------------------------------ */ 7324 7325 void __kmp_user_set_library(enum library_type arg) { 7326 int gtid; 7327 kmp_root_t *root; 7328 kmp_info_t *thread; 7329 7330 /* first, make sure we are initialized so we can get our gtid */ 7331 7332 gtid = __kmp_entry_gtid(); 7333 thread = __kmp_threads[gtid]; 7334 7335 root = thread->th.th_root; 7336 7337 KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, 7338 library_serial)); 7339 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level 7340 thread */ 7341 KMP_WARNING(SetLibraryIncorrectCall); 7342 return; 7343 } 7344 7345 switch (arg) { 7346 case library_serial: 7347 thread->th.th_set_nproc = 0; 7348 set__nproc(thread, 1); 7349 break; 7350 case library_turnaround: 7351 thread->th.th_set_nproc = 0; 7352 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 7353 : __kmp_dflt_team_nth_ub); 7354 break; 7355 case library_throughput: 7356 thread->th.th_set_nproc = 0; 7357 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth 7358 : __kmp_dflt_team_nth_ub); 7359 break; 7360 default: 7361 KMP_FATAL(UnknownLibraryType, arg); 7362 } 7363 7364 __kmp_aux_set_library(arg); 7365 } 7366 7367 void __kmp_aux_set_stacksize(size_t arg) { 7368 if (!__kmp_init_serial) 7369 __kmp_serial_initialize(); 7370 7371 #if KMP_OS_DARWIN 7372 if (arg & (0x1000 - 1)) { 7373 arg &= ~(0x1000 - 1); 7374 if (arg + 0x1000) /* check for overflow if we round up */ 7375 arg += 0x1000; 7376 } 7377 #endif 7378 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); 7379 7380 /* only change the default stacksize before the first parallel region */ 7381 if (!TCR_4(__kmp_init_parallel)) { 7382 size_t value = arg; /* argument is in bytes */ 7383 7384 if (value < __kmp_sys_min_stksize) 7385 value = __kmp_sys_min_stksize; 7386 else if (value > KMP_MAX_STKSIZE) 7387 value = KMP_MAX_STKSIZE; 7388 7389 __kmp_stksize = value; 7390 7391 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 7392 } 7393 7394 __kmp_release_bootstrap_lock(&__kmp_initz_lock); 7395 } 7396 7397 /* set the behaviour of the runtime library */ 7398 /* TODO this can cause some odd behaviour with sibling parallelism... */ 7399 void __kmp_aux_set_library(enum library_type arg) { 7400 __kmp_library = arg; 7401 7402 switch (__kmp_library) { 7403 case library_serial: { 7404 KMP_INFORM(LibraryIsSerial); 7405 (void)__kmp_change_library(TRUE); 7406 } break; 7407 case library_turnaround: 7408 (void)__kmp_change_library(TRUE); 7409 break; 7410 case library_throughput: 7411 (void)__kmp_change_library(FALSE); 7412 break; 7413 default: 7414 KMP_FATAL(UnknownLibraryType, arg); 7415 } 7416 } 7417 7418 /* ------------------------------------------------------------------------ */ 7419 7420 void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { 7421 int blocktime = arg; /* argument is in milliseconds */ 7422 #if KMP_USE_MONITOR 7423 int bt_intervals; 7424 #endif 7425 int bt_set; 7426 7427 __kmp_save_internal_controls(thread); 7428 7429 /* Normalize and set blocktime for the teams */ 7430 if (blocktime < KMP_MIN_BLOCKTIME) 7431 blocktime = KMP_MIN_BLOCKTIME; 7432 else if (blocktime > KMP_MAX_BLOCKTIME) 7433 blocktime = KMP_MAX_BLOCKTIME; 7434 7435 set__blocktime_team(thread->th.th_team, tid, blocktime); 7436 set__blocktime_team(thread->th.th_serial_team, 0, blocktime); 7437 7438 #if KMP_USE_MONITOR 7439 /* Calculate and set blocktime intervals for the teams */ 7440 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 7441 7442 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); 7443 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); 7444 #endif 7445 7446 /* Set whether blocktime has been set to "TRUE" */ 7447 bt_set = TRUE; 7448 7449 set__bt_set_team(thread->th.th_team, tid, bt_set); 7450 set__bt_set_team(thread->th.th_serial_team, 0, bt_set); 7451 #if KMP_USE_MONITOR 7452 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7453 "bt_intervals=%d, monitor_updates=%d\n", 7454 __kmp_gtid_from_tid(tid, thread->th.th_team), 7455 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 7456 __kmp_monitor_wakeups)); 7457 #else 7458 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 7459 __kmp_gtid_from_tid(tid, thread->th.th_team), 7460 thread->th.th_team->t.t_id, tid, blocktime)); 7461 #endif 7462 } 7463 7464 void __kmp_aux_set_defaults(char const *str, int len) { 7465 if (!__kmp_init_serial) { 7466 __kmp_serial_initialize(); 7467 }; 7468 __kmp_env_initialize(str); 7469 7470 if (__kmp_settings 7471 #if OMP_40_ENABLED 7472 || __kmp_display_env || __kmp_display_env_verbose 7473 #endif // OMP_40_ENABLED 7474 ) { 7475 __kmp_env_print(); 7476 } 7477 } // __kmp_aux_set_defaults 7478 7479 /* ------------------------------------------------------------------------ */ 7480 /* internal fast reduction routines */ 7481 7482 PACKED_REDUCTION_METHOD_T 7483 __kmp_determine_reduction_method( 7484 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, 7485 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 7486 kmp_critical_name *lck) { 7487 7488 // Default reduction method: critical construct ( lck != NULL, like in current 7489 // PAROPT ) 7490 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method 7491 // can be selected by RTL 7492 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method 7493 // can be selected by RTL 7494 // Finally, it's up to OpenMP RTL to make a decision on which method to select 7495 // among generated by PAROPT. 7496 7497 PACKED_REDUCTION_METHOD_T retval; 7498 7499 int team_size; 7500 7501 KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) 7502 KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) 7503 7504 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7505 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7506 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7507 7508 retval = critical_reduce_block; 7509 7510 // another choice of getting a team size (with 1 dynamic deference) is slower 7511 team_size = __kmp_get_team_num_threads(global_tid); 7512 if (team_size == 1) { 7513 7514 retval = empty_reduce_block; 7515 7516 } else { 7517 7518 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7519 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7520 7521 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7522 7523 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7524 KMP_OS_DARWIN 7525 7526 int teamsize_cutoff = 4; 7527 7528 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7529 if (__kmp_mic_type != non_mic) { 7530 teamsize_cutoff = 8; 7531 } 7532 #endif 7533 if (tree_available) { 7534 if (team_size <= teamsize_cutoff) { 7535 if (atomic_available) { 7536 retval = atomic_reduce_block; 7537 } 7538 } else { 7539 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7540 } 7541 } else if (atomic_available) { 7542 retval = atomic_reduce_block; 7543 } 7544 #else 7545 #error "Unknown or unsupported OS" 7546 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7547 // KMP_OS_DARWIN 7548 7549 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7550 7551 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7552 7553 // basic tuning 7554 7555 if (atomic_available) { 7556 if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? 7557 retval = atomic_reduce_block; 7558 } 7559 } // otherwise: use critical section 7560 7561 #elif KMP_OS_DARWIN 7562 7563 if (atomic_available && (num_vars <= 3)) { 7564 retval = atomic_reduce_block; 7565 } else if (tree_available) { 7566 if ((reduce_size > (9 * sizeof(kmp_real64))) && 7567 (reduce_size < (2000 * sizeof(kmp_real64)))) { 7568 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 7569 } 7570 } // otherwise: use critical section 7571 7572 #else 7573 #error "Unknown or unsupported OS" 7574 #endif 7575 7576 #else 7577 #error "Unknown or unsupported architecture" 7578 #endif 7579 } 7580 7581 // KMP_FORCE_REDUCTION 7582 7583 // If the team is serialized (team_size == 1), ignore the forced reduction 7584 // method and stay with the unsynchronized method (empty_reduce_block) 7585 if (__kmp_force_reduction_method != reduction_method_not_defined && 7586 team_size != 1) { 7587 7588 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 7589 7590 int atomic_available, tree_available; 7591 7592 switch ((forced_retval = __kmp_force_reduction_method)) { 7593 case critical_reduce_block: 7594 KMP_ASSERT(lck); // lck should be != 0 7595 break; 7596 7597 case atomic_reduce_block: 7598 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7599 if (!atomic_available) { 7600 KMP_WARNING(RedMethodNotSupported, "atomic"); 7601 forced_retval = critical_reduce_block; 7602 } 7603 break; 7604 7605 case tree_reduce_block: 7606 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7607 if (!tree_available) { 7608 KMP_WARNING(RedMethodNotSupported, "tree"); 7609 forced_retval = critical_reduce_block; 7610 } else { 7611 #if KMP_FAST_REDUCTION_BARRIER 7612 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7613 #endif 7614 } 7615 break; 7616 7617 default: 7618 KMP_ASSERT(0); // "unsupported method specified" 7619 } 7620 7621 retval = forced_retval; 7622 } 7623 7624 KA_TRACE(10, ("reduction method selected=%08x\n", retval)); 7625 7626 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7627 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7628 7629 return (retval); 7630 } 7631 7632 // this function is for testing set/get/determine reduce method 7633 kmp_int32 __kmp_get_reduce_method(void) { 7634 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); 7635 } 7636