1 /* 2 * kmp_runtime.cpp -- KPTS runtime support library 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp.h" 17 #include "kmp_atomic.h" 18 #include "kmp_wrapper_getpid.h" 19 #include "kmp_environment.h" 20 #include "kmp_itt.h" 21 #include "kmp_str.h" 22 #include "kmp_settings.h" 23 #include "kmp_i18n.h" 24 #include "kmp_io.h" 25 #include "kmp_error.h" 26 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 28 #include "kmp_affinity.h" 29 30 #if OMPT_SUPPORT 31 #include "ompt-specific.h" 32 #endif 33 34 /* these are temporary issues to be dealt with */ 35 #define KMP_USE_PRCTL 0 36 37 #if KMP_OS_WINDOWS 38 #include <process.h> 39 #endif 40 41 #include "tsan_annotations.h" 42 43 #if defined(KMP_GOMP_COMPAT) 44 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; 45 #endif /* defined(KMP_GOMP_COMPAT) */ 46 47 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " 48 #if OMP_50_ENABLED 49 "5.0 (201611)"; 50 #elif OMP_45_ENABLED 51 "4.5 (201511)"; 52 #elif OMP_40_ENABLED 53 "4.0 (201307)"; 54 #else 55 "3.1 (201107)"; 56 #endif 57 58 #ifdef KMP_DEBUG 59 char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; 60 #endif /* KMP_DEBUG */ 61 62 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 63 64 /* ------------------------------------------------------------------------ */ 65 /* ------------------------------------------------------------------------ */ 66 67 kmp_info_t __kmp_monitor; 68 69 /* ------------------------------------------------------------------------ */ 70 /* ------------------------------------------------------------------------ */ 71 72 /* Forward declarations */ 73 74 void __kmp_cleanup( void ); 75 76 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); 77 static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); 78 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 79 static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 ); 80 #endif 81 static void __kmp_do_serial_initialize( void ); 82 void __kmp_fork_barrier( int gtid, int tid ); 83 void __kmp_join_barrier( int gtid ); 84 void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); 85 86 #ifdef USE_LOAD_BALANCE 87 static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); 88 #endif 89 90 static int __kmp_expand_threads(int nWish, int nNeed); 91 #if KMP_OS_WINDOWS 92 static int __kmp_unregister_root_other_thread( int gtid ); 93 #endif 94 static void __kmp_unregister_library( void ); // called by __kmp_internal_end() 95 static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); 96 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 97 98 /* ------------------------------------------------------------------------ */ 99 /* ------------------------------------------------------------------------ */ 100 101 /* Calculate the identifier of the current thread */ 102 /* fast (and somewhat portable) way to get unique */ 103 /* identifier of executing thread. */ 104 /* returns KMP_GTID_DNE if we haven't been assigned a gtid */ 105 106 int 107 __kmp_get_global_thread_id( ) 108 { 109 int i; 110 kmp_info_t **other_threads; 111 size_t stack_data; 112 char *stack_addr; 113 size_t stack_size; 114 char *stack_base; 115 116 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 117 __kmp_nth, __kmp_all_nth )); 118 119 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a 120 parallel region, made it return KMP_GTID_DNE to force serial_initialize by 121 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 122 __kmp_init_gtid for this to work. */ 123 124 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; 125 126 #ifdef KMP_TDATA_GTID 127 if ( TCR_4(__kmp_gtid_mode) >= 3) { 128 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); 129 return __kmp_gtid; 130 } 131 #endif 132 if ( TCR_4(__kmp_gtid_mode) >= 2) { 133 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); 134 return __kmp_gtid_get_specific(); 135 } 136 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); 137 138 stack_addr = (char*) & stack_data; 139 other_threads = __kmp_threads; 140 141 /* 142 ATT: The code below is a source of potential bugs due to unsynchronized access to 143 __kmp_threads array. For example: 144 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. 145 2. Current thread is suspended by OS. 146 3. Another thread unregisters and finishes (debug versions of free() may fill memory 147 with something like 0xEF). 148 4. Current thread is resumed. 149 5. Current thread reads junk from *thr. 150 TODO: Fix it. 151 --ln 152 */ 153 154 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { 155 156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 157 if( !thr ) continue; 158 159 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 160 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 161 162 /* stack grows down -- search through all of the active threads */ 163 164 if( stack_addr <= stack_base ) { 165 size_t stack_diff = stack_base - stack_addr; 166 167 if( stack_diff <= stack_size ) { 168 /* The only way we can be closer than the allocated */ 169 /* stack size is if we are running on this thread. */ 170 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); 171 return i; 172 } 173 } 174 } 175 176 /* get specific to try and determine our gtid */ 177 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " 178 "thread, using TLS\n" )); 179 i = __kmp_gtid_get_specific(); 180 181 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 182 183 /* if we havn't been assigned a gtid, then return code */ 184 if( i<0 ) return i; 185 186 /* dynamically updated stack window for uber threads to avoid get_specific call */ 187 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { 188 KMP_FATAL( StackOverflow, i ); 189 } 190 191 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; 192 if( stack_addr > stack_base ) { 193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 195 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); 196 } else { 197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); 198 } 199 200 /* Reprint stack bounds for ubermaster since they have been refined */ 201 if ( __kmp_storage_map ) { 202 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; 203 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 204 __kmp_print_storage_map_gtid( i, stack_beg, stack_end, 205 other_threads[i]->th.th_info.ds.ds_stacksize, 206 "th_%d stack (refinement)", i ); 207 } 208 return i; 209 } 210 211 int 212 __kmp_get_global_thread_id_reg( ) 213 { 214 int gtid; 215 216 if ( !__kmp_init_serial ) { 217 gtid = KMP_GTID_DNE; 218 } else 219 #ifdef KMP_TDATA_GTID 220 if ( TCR_4(__kmp_gtid_mode) >= 3 ) { 221 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); 222 gtid = __kmp_gtid; 223 } else 224 #endif 225 if ( TCR_4(__kmp_gtid_mode) >= 2 ) { 226 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); 227 gtid = __kmp_gtid_get_specific(); 228 } else { 229 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); 230 gtid = __kmp_get_global_thread_id(); 231 } 232 233 /* we must be a new uber master sibling thread */ 234 if( gtid == KMP_GTID_DNE ) { 235 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " 236 "Registering a new gtid.\n" )); 237 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 238 if( !__kmp_init_serial ) { 239 __kmp_do_serial_initialize(); 240 gtid = __kmp_gtid_get_specific(); 241 } else { 242 gtid = __kmp_register_root(FALSE); 243 } 244 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 245 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 246 } 247 248 KMP_DEBUG_ASSERT( gtid >=0 ); 249 250 return gtid; 251 } 252 253 /* caller must hold forkjoin_lock */ 254 void 255 __kmp_check_stack_overlap( kmp_info_t *th ) 256 { 257 int f; 258 char *stack_beg = NULL; 259 char *stack_end = NULL; 260 int gtid; 261 262 KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); 263 if ( __kmp_storage_map ) { 264 stack_end = (char *) th->th.th_info.ds.ds_stackbase; 265 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 266 267 gtid = __kmp_gtid_from_thread( th ); 268 269 if (gtid == KMP_GTID_MONITOR) { 270 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 271 "th_%s stack (%s)", "mon", 272 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); 273 } else { 274 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 275 "th_%d stack (%s)", gtid, 276 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); 277 } 278 } 279 280 /* No point in checking ubermaster threads since they use refinement and cannot overlap */ 281 gtid = __kmp_gtid_from_thread( th ); 282 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) 283 { 284 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); 285 if ( stack_beg == NULL ) { 286 stack_end = (char *) th->th.th_info.ds.ds_stackbase; 287 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 288 } 289 290 for( f=0 ; f < __kmp_threads_capacity ; f++ ) { 291 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 292 293 if( f_th && f_th != th ) { 294 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 295 char *other_stack_beg = other_stack_end - 296 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 297 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 298 (stack_end > other_stack_beg && stack_end < other_stack_end)) { 299 300 /* Print the other stack values before the abort */ 301 if ( __kmp_storage_map ) 302 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, 303 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 304 "th_%d stack (overlapped)", 305 __kmp_gtid_from_thread( f_th ) ); 306 307 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); 308 } 309 } 310 } 311 } 312 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); 313 } 314 315 316 /* ------------------------------------------------------------------------ */ 317 318 /* ------------------------------------------------------------------------ */ 319 320 void 321 __kmp_infinite_loop( void ) 322 { 323 static int done = FALSE; 324 325 while (! done) { 326 KMP_YIELD( 1 ); 327 } 328 } 329 330 #define MAX_MESSAGE 512 331 332 void 333 __kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { 334 char buffer[MAX_MESSAGE]; 335 va_list ap; 336 337 va_start( ap, format); 338 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); 339 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 340 __kmp_vprintf( kmp_err, buffer, ap ); 341 #if KMP_PRINT_DATA_PLACEMENT 342 int node; 343 if(gtid >= 0) { 344 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { 345 if( __kmp_storage_map_verbose ) { 346 node = __kmp_get_host_node(p1); 347 if(node < 0) /* doesn't work, so don't try this next time */ 348 __kmp_storage_map_verbose = FALSE; 349 else { 350 char *last; 351 int lastNode; 352 int localProc = __kmp_get_cpu_from_gtid(gtid); 353 354 const int page_size = KMP_GET_PAGE_SIZE(); 355 356 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) ); 357 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) ); 358 if(localProc >= 0) 359 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1); 360 else 361 __kmp_printf_no_lock(" GTID %d\n", gtid); 362 # if KMP_USE_PRCTL 363 /* The more elaborate format is disabled for now because of the prctl hanging bug. */ 364 do { 365 last = p1; 366 lastNode = node; 367 /* This loop collates adjacent pages with the same host node. */ 368 do { 369 (char*)p1 += page_size; 370 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 371 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, 372 (char*)p1 - 1, lastNode); 373 } while(p1 <= p2); 374 # else 375 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 376 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1)); 377 if(p1 < p2) { 378 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 379 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2)); 380 } 381 # endif 382 } 383 } 384 } else 385 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) ); 386 } 387 #endif /* KMP_PRINT_DATA_PLACEMENT */ 388 __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 389 } 390 391 void 392 __kmp_warn( char const * format, ... ) 393 { 394 char buffer[MAX_MESSAGE]; 395 va_list ap; 396 397 if ( __kmp_generate_warnings == kmp_warnings_off ) { 398 return; 399 } 400 401 va_start( ap, format ); 402 403 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); 404 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 405 __kmp_vprintf( kmp_err, buffer, ap ); 406 __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 407 408 va_end( ap ); 409 } 410 411 void 412 __kmp_abort_process() 413 { 414 415 // Later threads may stall here, but that's ok because abort() will kill them. 416 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); 417 418 if ( __kmp_debug_buf ) { 419 __kmp_dump_debug_buffer(); 420 }; // if 421 422 if ( KMP_OS_WINDOWS ) { 423 // Let other threads know of abnormal termination and prevent deadlock 424 // if abort happened during library initialization or shutdown 425 __kmp_global.g.g_abort = SIGABRT; 426 427 /* 428 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. 429 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() 430 works well, but this function is not available in VS7 (this is not problem for DLL, but 431 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does 432 not help, at least in some versions of MS C RTL. 433 434 It seems following sequence is the only way to simulate abort() and avoid pop-up error 435 box. 436 */ 437 raise( SIGABRT ); 438 _exit( 3 ); // Just in case, if signal ignored, exit anyway. 439 } else { 440 abort(); 441 }; // if 442 443 __kmp_infinite_loop(); 444 __kmp_release_bootstrap_lock( & __kmp_exit_lock ); 445 446 } // __kmp_abort_process 447 448 void 449 __kmp_abort_thread( void ) 450 { 451 // TODO: Eliminate g_abort global variable and this function. 452 // In case of abort just call abort(), it will kill all the threads. 453 __kmp_infinite_loop(); 454 } // __kmp_abort_thread 455 456 /* ------------------------------------------------------------------------ */ 457 458 /* 459 * Print out the storage map for the major kmp_info_t thread data structures 460 * that are allocated together. 461 */ 462 463 static void 464 __kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) 465 { 466 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); 467 468 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), 469 "th_%d.th_info", gtid ); 470 471 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), 472 "th_%d.th_local", gtid ); 473 474 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 475 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); 476 477 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], 478 &thr->th.th_bar[bs_plain_barrier+1], 479 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); 480 481 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], 482 &thr->th.th_bar[bs_forkjoin_barrier+1], 483 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); 484 485 #if KMP_FAST_REDUCTION_BARRIER 486 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], 487 &thr->th.th_bar[bs_reduction_barrier+1], 488 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); 489 #endif // KMP_FAST_REDUCTION_BARRIER 490 } 491 492 /* 493 * Print out the storage map for the major kmp_team_t team data structures 494 * that are allocated together. 495 */ 496 497 static void 498 __kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) 499 { 500 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 501 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 502 header, team_id ); 503 504 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], 505 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); 506 507 508 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], 509 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); 510 511 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], 512 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); 513 514 #if KMP_FAST_REDUCTION_BARRIER 515 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], 516 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); 517 #endif // KMP_FAST_REDUCTION_BARRIER 518 519 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 520 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); 521 522 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 523 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); 524 525 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], 526 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", 527 header, team_id ); 528 529 530 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, 531 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); 532 } 533 534 static void __kmp_init_allocator() {} 535 static void __kmp_fini_allocator() {} 536 537 /* ------------------------------------------------------------------------ */ 538 539 #ifdef KMP_DYNAMIC_LIB 540 # if KMP_OS_WINDOWS 541 542 static void 543 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { 544 // TODO: Change to __kmp_break_bootstrap_lock(). 545 __kmp_init_bootstrap_lock( lck ); // make the lock released 546 } 547 548 static void 549 __kmp_reset_locks_on_process_detach( int gtid_req ) { 550 int i; 551 int thread_count; 552 553 // PROCESS_DETACH is expected to be called by a thread 554 // that executes ProcessExit() or FreeLibrary(). 555 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). 556 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. 557 // However, in fact, some threads can be still alive here, although being about to be terminated. 558 // The threads in the array with ds_thread==0 are most suspicious. 559 // Actually, it can be not safe to access the __kmp_threads[]. 560 561 // TODO: does it make sense to check __kmp_roots[] ? 562 563 // Let's check that there are no other alive threads registered with the OMP lib. 564 while( 1 ) { 565 thread_count = 0; 566 for( i = 0; i < __kmp_threads_capacity; ++i ) { 567 if( !__kmp_threads ) continue; 568 kmp_info_t* th = __kmp_threads[ i ]; 569 if( th == NULL ) continue; 570 int gtid = th->th.th_info.ds.ds_gtid; 571 if( gtid == gtid_req ) continue; 572 if( gtid < 0 ) continue; 573 DWORD exit_val; 574 int alive = __kmp_is_thread_alive( th, &exit_val ); 575 if( alive ) { 576 ++thread_count; 577 } 578 } 579 if( thread_count == 0 ) break; // success 580 } 581 582 // Assume that I'm alone. 583 584 // Now it might be probably safe to check and reset locks. 585 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. 586 __kmp_reset_lock( &__kmp_forkjoin_lock ); 587 #ifdef KMP_DEBUG 588 __kmp_reset_lock( &__kmp_stdio_lock ); 589 #endif // KMP_DEBUG 590 } 591 592 BOOL WINAPI 593 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { 594 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 595 596 switch( fdwReason ) { 597 598 case DLL_PROCESS_ATTACH: 599 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); 600 601 return TRUE; 602 603 case DLL_PROCESS_DETACH: 604 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", 605 __kmp_gtid_get_specific() )); 606 607 if( lpReserved != NULL ) 608 { 609 // lpReserved is used for telling the difference: 610 // lpReserved == NULL when FreeLibrary() was called, 611 // lpReserved != NULL when the process terminates. 612 // When FreeLibrary() is called, worker threads remain alive. 613 // So they will release the forkjoin lock by themselves. 614 // When the process terminates, worker threads disappear triggering 615 // the problem of unreleased forkjoin lock as described below. 616 617 // A worker thread can take the forkjoin lock. 618 // The problem comes up if that worker thread becomes dead 619 // before it releases the forkjoin lock. 620 // The forkjoin lock remains taken, while the thread 621 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below 622 // will try to take the forkjoin lock and will always fail, 623 // so that the application will never finish [normally]. 624 // This scenario is possible if __kmpc_end() has not been executed. 625 // It looks like it's not a corner case, but common cases: 626 // - the main function was compiled by an alternative compiler; 627 // - the main function was compiled by icl but without /Qopenmp (application with plugins); 628 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. 629 // - alive foreign thread prevented __kmpc_end from doing cleanup. 630 631 // This is a hack to work around the problem. 632 // TODO: !!! to figure out something better. 633 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); 634 } 635 636 __kmp_internal_end_library( __kmp_gtid_get_specific() ); 637 638 return TRUE; 639 640 case DLL_THREAD_ATTACH: 641 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); 642 643 /* if we wanted to register new siblings all the time here call 644 * __kmp_get_gtid(); */ 645 return TRUE; 646 647 case DLL_THREAD_DETACH: 648 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", 649 __kmp_gtid_get_specific() )); 650 651 __kmp_internal_end_thread( __kmp_gtid_get_specific() ); 652 return TRUE; 653 } 654 655 return TRUE; 656 } 657 658 # endif /* KMP_OS_WINDOWS */ 659 #endif /* KMP_DYNAMIC_LIB */ 660 661 662 /* ------------------------------------------------------------------------ */ 663 664 /* Change the library type to "status" and return the old type */ 665 /* called from within initialization routines where __kmp_initz_lock is held */ 666 int 667 __kmp_change_library( int status ) 668 { 669 int old_status; 670 671 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count) 672 673 if (status) { 674 __kmp_yield_init |= 1; // throughput => turnaround (odd init count) 675 } 676 else { 677 __kmp_yield_init &= ~1; // turnaround => throughput (even init count) 678 } 679 680 return old_status; // return previous setting of whether KMP_LIBRARY=throughput 681 } 682 683 /* ------------------------------------------------------------------------ */ 684 /* ------------------------------------------------------------------------ */ 685 686 /* __kmp_parallel_deo -- 687 * Wait until it's our turn. 688 */ 689 void 690 __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 691 { 692 int gtid = *gtid_ref; 693 #ifdef BUILD_PARALLEL_ORDERED 694 kmp_team_t *team = __kmp_team_from_gtid( gtid ); 695 #endif /* BUILD_PARALLEL_ORDERED */ 696 697 if( __kmp_env_consistency_check ) { 698 if( __kmp_threads[gtid]->th.th_root->r.r_active ) 699 #if KMP_USE_DYNAMIC_LOCK 700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); 701 #else 702 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); 703 #endif 704 } 705 #ifdef BUILD_PARALLEL_ORDERED 706 if( !team->t.t_serialized ) { 707 KMP_MB(); 708 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); 709 KMP_MB(); 710 } 711 #endif /* BUILD_PARALLEL_ORDERED */ 712 } 713 714 /* __kmp_parallel_dxo -- 715 * Signal the next task. 716 */ 717 718 void 719 __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 720 { 721 int gtid = *gtid_ref; 722 #ifdef BUILD_PARALLEL_ORDERED 723 int tid = __kmp_tid_from_gtid( gtid ); 724 kmp_team_t *team = __kmp_team_from_gtid( gtid ); 725 #endif /* BUILD_PARALLEL_ORDERED */ 726 727 if( __kmp_env_consistency_check ) { 728 if( __kmp_threads[gtid]->th.th_root->r.r_active ) 729 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); 730 } 731 #ifdef BUILD_PARALLEL_ORDERED 732 if ( ! team->t.t_serialized ) { 733 KMP_MB(); /* Flush all pending memory write invalidates. */ 734 735 /* use the tid of the next thread in this team */ 736 /* TODO repleace with general release procedure */ 737 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); 738 739 #if OMPT_SUPPORT && OMPT_BLAME 740 if (ompt_enabled && 741 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { 742 /* accept blame for "ordered" waiting */ 743 kmp_info_t *this_thread = __kmp_threads[gtid]; 744 ompt_callbacks.ompt_callback(ompt_event_release_ordered)( 745 this_thread->th.ompt_thread_info.wait_id); 746 } 747 #endif 748 749 KMP_MB(); /* Flush all pending memory write invalidates. */ 750 } 751 #endif /* BUILD_PARALLEL_ORDERED */ 752 } 753 754 /* ------------------------------------------------------------------------ */ 755 /* ------------------------------------------------------------------------ */ 756 757 /* ------------------------------------------------------------------------ */ 758 /* ------------------------------------------------------------------------ */ 759 760 /* The BARRIER for a SINGLE process section is always explicit */ 761 762 int 763 __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) 764 { 765 int status; 766 kmp_info_t *th; 767 kmp_team_t *team; 768 769 if( ! TCR_4(__kmp_init_parallel) ) 770 __kmp_parallel_initialize(); 771 772 th = __kmp_threads[ gtid ]; 773 team = th->th.th_team; 774 status = 0; 775 776 th->th.th_ident = id_ref; 777 778 if ( team->t.t_serialized ) { 779 status = 1; 780 } else { 781 kmp_int32 old_this = th->th.th_local.this_construct; 782 783 ++th->th.th_local.this_construct; 784 /* try to set team count to thread count--success means thread got the 785 single block 786 */ 787 /* TODO: Should this be acquire or release? */ 788 if (team->t.t_construct == old_this) { 789 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, 790 th->th.th_local.this_construct); 791 } 792 #if USE_ITT_BUILD 793 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && 794 #if OMP_40_ENABLED 795 th->th.th_teams_microtask == NULL && 796 #endif 797 team->t.t_active_level == 1 ) 798 { // Only report metadata by master of active team at level 1 799 __kmp_itt_metadata_single( id_ref ); 800 } 801 #endif /* USE_ITT_BUILD */ 802 } 803 804 if( __kmp_env_consistency_check ) { 805 if (status && push_ws) { 806 __kmp_push_workshare( gtid, ct_psingle, id_ref ); 807 } else { 808 __kmp_check_workshare( gtid, ct_psingle, id_ref ); 809 } 810 } 811 #if USE_ITT_BUILD 812 if ( status ) { 813 __kmp_itt_single_start( gtid ); 814 } 815 #endif /* USE_ITT_BUILD */ 816 return status; 817 } 818 819 void 820 __kmp_exit_single( int gtid ) 821 { 822 #if USE_ITT_BUILD 823 __kmp_itt_single_end( gtid ); 824 #endif /* USE_ITT_BUILD */ 825 if( __kmp_env_consistency_check ) 826 __kmp_pop_workshare( gtid, ct_psingle, NULL ); 827 } 828 829 830 /* 831 * determine if we can go parallel or must use a serialized parallel region and 832 * how many threads we can use 833 * set_nproc is the number of threads requested for the team 834 * returns 0 if we should serialize or only use one thread, 835 * otherwise the number of threads to use 836 * The forkjoin lock is held by the caller. 837 */ 838 static int 839 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, 840 int master_tid, int set_nthreads 841 #if OMP_40_ENABLED 842 , int enter_teams 843 #endif /* OMP_40_ENABLED */ 844 ) 845 { 846 int capacity; 847 int new_nthreads; 848 KMP_DEBUG_ASSERT( __kmp_init_serial ); 849 KMP_DEBUG_ASSERT( root && parent_team ); 850 851 // 852 // If dyn-var is set, dynamically adjust the number of desired threads, 853 // according to the method specified by dynamic_mode. 854 // 855 new_nthreads = set_nthreads; 856 if ( ! get__dynamic_2( parent_team, master_tid ) ) { 857 ; 858 } 859 #ifdef USE_LOAD_BALANCE 860 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { 861 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); 862 if ( new_nthreads == 1 ) { 863 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", 864 master_tid )); 865 return 1; 866 } 867 if ( new_nthreads < set_nthreads ) { 868 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", 869 master_tid, new_nthreads )); 870 } 871 } 872 #endif /* USE_LOAD_BALANCE */ 873 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { 874 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 875 : root->r.r_hot_team->t.t_nproc); 876 if ( new_nthreads <= 1 ) { 877 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", 878 master_tid )); 879 return 1; 880 } 881 if ( new_nthreads < set_nthreads ) { 882 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", 883 master_tid, new_nthreads )); 884 } 885 else { 886 new_nthreads = set_nthreads; 887 } 888 } 889 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { 890 if ( set_nthreads > 2 ) { 891 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); 892 new_nthreads = ( new_nthreads % set_nthreads ) + 1; 893 if ( new_nthreads == 1 ) { 894 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", 895 master_tid )); 896 return 1; 897 } 898 if ( new_nthreads < set_nthreads ) { 899 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", 900 master_tid, new_nthreads )); 901 } 902 } 903 } 904 else { 905 KMP_ASSERT( 0 ); 906 } 907 908 // 909 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. 910 // 911 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 912 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { 913 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : 914 root->r.r_hot_team->t.t_nproc ); 915 if ( tl_nthreads <= 0 ) { 916 tl_nthreads = 1; 917 } 918 919 // 920 // If dyn-var is false, emit a 1-time warning. 921 // 922 if ( ! get__dynamic_2( parent_team, master_tid ) 923 && ( ! __kmp_reserve_warn ) ) { 924 __kmp_reserve_warn = 1; 925 __kmp_msg( 926 kmp_ms_warning, 927 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), 928 KMP_HNT( Unset_ALL_THREADS ), 929 __kmp_msg_null 930 ); 931 } 932 if ( tl_nthreads == 1 ) { 933 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", 934 master_tid )); 935 return 1; 936 } 937 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", 938 master_tid, tl_nthreads )); 939 new_nthreads = tl_nthreads; 940 } 941 942 // 943 // Check if the threads array is large enough, or needs expanding. 944 // 945 // See comment in __kmp_register_root() about the adjustment if 946 // __kmp_threads[0] == NULL. 947 // 948 capacity = __kmp_threads_capacity; 949 if ( TCR_PTR(__kmp_threads[0]) == NULL ) { 950 --capacity; 951 } 952 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 953 root->r.r_hot_team->t.t_nproc ) > capacity ) { 954 // 955 // Expand the threads array. 956 // 957 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 958 root->r.r_hot_team->t.t_nproc ) - capacity; 959 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); 960 if ( slotsAdded < slotsRequired ) { 961 // 962 // The threads array was not expanded enough. 963 // 964 new_nthreads -= ( slotsRequired - slotsAdded ); 965 KMP_ASSERT( new_nthreads >= 1 ); 966 967 // 968 // If dyn-var is false, emit a 1-time warning. 969 // 970 if ( ! get__dynamic_2( parent_team, master_tid ) 971 && ( ! __kmp_reserve_warn ) ) { 972 __kmp_reserve_warn = 1; 973 if ( __kmp_tp_cached ) { 974 __kmp_msg( 975 kmp_ms_warning, 976 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), 977 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), 978 KMP_HNT( PossibleSystemLimitOnThreads ), 979 __kmp_msg_null 980 ); 981 } 982 else { 983 __kmp_msg( 984 kmp_ms_warning, 985 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), 986 KMP_HNT( SystemLimitOnThreads ), 987 __kmp_msg_null 988 ); 989 } 990 } 991 } 992 } 993 994 if ( new_nthreads == 1 ) { 995 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", 996 __kmp_get_gtid(), set_nthreads ) ); 997 return 1; 998 } 999 1000 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", 1001 __kmp_get_gtid(), new_nthreads, set_nthreads )); 1002 return new_nthreads; 1003 } 1004 1005 /* ------------------------------------------------------------------------ */ 1006 /* ------------------------------------------------------------------------ */ 1007 1008 /* allocate threads from the thread pool and assign them to the new team */ 1009 /* we are assured that there are enough threads available, because we 1010 * checked on that earlier within critical section forkjoin */ 1011 1012 static void 1013 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, 1014 kmp_info_t *master_th, int master_gtid ) 1015 { 1016 int i; 1017 int use_hot_team; 1018 1019 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); 1020 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); 1021 KMP_MB(); 1022 1023 /* first, let's setup the master thread */ 1024 master_th->th.th_info.ds.ds_tid = 0; 1025 master_th->th.th_team = team; 1026 master_th->th.th_team_nproc = team->t.t_nproc; 1027 master_th->th.th_team_master = master_th; 1028 master_th->th.th_team_serialized = FALSE; 1029 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ]; 1030 1031 /* make sure we are not the optimized hot team */ 1032 #if KMP_NESTED_HOT_TEAMS 1033 use_hot_team = 0; 1034 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 1035 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 1036 int level = team->t.t_active_level - 1; // index in array of hot teams 1037 if( master_th->th.th_teams_microtask ) { // are we inside the teams? 1038 if( master_th->th.th_teams_size.nteams > 1 ) { 1039 ++level; // level was not increased in teams construct for team_of_masters 1040 } 1041 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && 1042 master_th->th.th_teams_level == team->t.t_level ) { 1043 ++level; // level was not increased in teams construct for team_of_workers before the parallel 1044 } // team->t.t_level will be increased inside parallel 1045 } 1046 if( level < __kmp_hot_teams_max_level ) { 1047 if( hot_teams[level].hot_team ) { 1048 // hot team has already been allocated for given level 1049 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 1050 use_hot_team = 1; // the team is ready to use 1051 } else { 1052 use_hot_team = 0; // AC: threads are not allocated yet 1053 hot_teams[level].hot_team = team; // remember new hot team 1054 hot_teams[level].hot_team_nth = team->t.t_nproc; 1055 } 1056 } else { 1057 use_hot_team = 0; 1058 } 1059 } 1060 #else 1061 use_hot_team = team == root->r.r_hot_team; 1062 #endif 1063 if ( !use_hot_team ) { 1064 1065 /* install the master thread */ 1066 team->t.t_threads[ 0 ] = master_th; 1067 __kmp_initialize_info( master_th, team, 0, master_gtid ); 1068 1069 /* now, install the worker threads */ 1070 for ( i=1 ; i < team->t.t_nproc ; i++ ) { 1071 1072 /* fork or reallocate a new thread and install it in team */ 1073 kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); 1074 team->t.t_threads[ i ] = thr; 1075 KMP_DEBUG_ASSERT( thr ); 1076 KMP_DEBUG_ASSERT( thr->th.th_team == team ); 1077 /* align team and thread arrived states */ 1078 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n", 1079 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, 1080 __kmp_gtid_from_tid( i, team ), team->t.t_id, i, 1081 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, 1082 team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); 1083 #if OMP_40_ENABLED 1084 thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 1085 thr->th.th_teams_level = master_th->th.th_teams_level; 1086 thr->th.th_teams_size = master_th->th.th_teams_size; 1087 #endif 1088 { // Initialize threads' barrier data. 1089 int b; 1090 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; 1091 for ( b = 0; b < bs_last_barrier; ++ b ) { 1092 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 1093 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 1094 #if USE_DEBUGGER 1095 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 1096 #endif 1097 }; // for b 1098 } 1099 } 1100 1101 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1102 __kmp_partition_places( team ); 1103 #endif 1104 1105 } 1106 1107 KMP_MB(); 1108 } 1109 1110 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1111 // 1112 // Propagate any changes to the floating point control registers out to the team 1113 // We try to avoid unnecessary writes to the relevant cache line in the team structure, 1114 // so we don't make changes unless they are needed. 1115 // 1116 inline static void 1117 propagateFPControl(kmp_team_t * team) 1118 { 1119 if ( __kmp_inherit_fp_control ) { 1120 kmp_int16 x87_fpu_control_word; 1121 kmp_uint32 mxcsr; 1122 1123 // Get master values of FPU control flags (both X87 and vector) 1124 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); 1125 __kmp_store_mxcsr( &mxcsr ); 1126 mxcsr &= KMP_X86_MXCSR_MASK; 1127 1128 // There is no point looking at t_fp_control_saved here. 1129 // If it is TRUE, we still have to update the values if they are different from those we now have. 1130 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure 1131 // that the values in the team are the same as those we have. 1132 // So, this code achieves what we need whether or not t_fp_control_saved is true. 1133 // By checking whether the value needs updating we avoid unnecessary writes that would put the 1134 // cache-line into a written state, causing all threads in the team to have to read it again. 1135 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 1136 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 1137 // Although we don't use this value, other code in the runtime wants to know whether it should restore them. 1138 // So we must ensure it is correct. 1139 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 1140 } 1141 else { 1142 // Similarly here. Don't write to this cache-line in the team structure unless we have to. 1143 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 1144 } 1145 } 1146 1147 // Do the opposite, setting the hardware registers to the updated values from the team. 1148 inline static void 1149 updateHWFPControl(kmp_team_t * team) 1150 { 1151 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { 1152 // 1153 // Only reset the fp control regs if they have been changed in the team. 1154 // the parallel region that we are exiting. 1155 // 1156 kmp_int16 x87_fpu_control_word; 1157 kmp_uint32 mxcsr; 1158 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); 1159 __kmp_store_mxcsr( &mxcsr ); 1160 mxcsr &= KMP_X86_MXCSR_MASK; 1161 1162 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { 1163 __kmp_clear_x87_fpu_status_word(); 1164 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); 1165 } 1166 1167 if ( team->t.t_mxcsr != mxcsr ) { 1168 __kmp_load_mxcsr( &team->t.t_mxcsr ); 1169 } 1170 } 1171 } 1172 #else 1173 # define propagateFPControl(x) ((void)0) 1174 # define updateHWFPControl(x) ((void)0) 1175 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1176 1177 static void 1178 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration 1179 1180 /* 1181 * Run a parallel region that has been serialized, so runs only in a team of the single master thread. 1182 */ 1183 void 1184 __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) 1185 { 1186 kmp_info_t *this_thr; 1187 kmp_team_t *serial_team; 1188 1189 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); 1190 1191 /* Skip all this code for autopar serialized loops since it results in 1192 unacceptable overhead */ 1193 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) 1194 return; 1195 1196 if( ! TCR_4( __kmp_init_parallel ) ) 1197 __kmp_parallel_initialize(); 1198 1199 this_thr = __kmp_threads[ global_tid ]; 1200 serial_team = this_thr->th.th_serial_team; 1201 1202 /* utilize the serialized team held by this thread */ 1203 KMP_DEBUG_ASSERT( serial_team ); 1204 KMP_MB(); 1205 1206 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 1207 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 1208 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); 1209 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", 1210 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); 1211 this_thr->th.th_task_team = NULL; 1212 } 1213 1214 #if OMP_40_ENABLED 1215 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 1216 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { 1217 proc_bind = proc_bind_false; 1218 } 1219 else if ( proc_bind == proc_bind_default ) { 1220 // 1221 // No proc_bind clause was specified, so use the current value 1222 // of proc-bind-var for this parallel region. 1223 // 1224 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 1225 } 1226 // 1227 // Reset for next parallel region 1228 // 1229 this_thr->th.th_set_proc_bind = proc_bind_default; 1230 #endif /* OMP_40_ENABLED */ 1231 1232 if( this_thr->th.th_team != serial_team ) { 1233 // Nested level will be an index in the nested nthreads array 1234 int level = this_thr->th.th_team->t.t_level; 1235 1236 if( serial_team->t.t_serialized ) { 1237 /* this serial team was already used 1238 * TODO increase performance by making this locks more specific */ 1239 kmp_team_t *new_team; 1240 1241 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 1242 1243 #if OMPT_SUPPORT 1244 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1245 #endif 1246 1247 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, 1248 #if OMPT_SUPPORT 1249 ompt_parallel_id, 1250 #endif 1251 #if OMP_40_ENABLED 1252 proc_bind, 1253 #endif 1254 & this_thr->th.th_current_task->td_icvs, 1255 0 USE_NESTED_HOT_ARG(NULL) ); 1256 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 1257 KMP_ASSERT( new_team ); 1258 1259 /* setup new serialized team and install it */ 1260 new_team->t.t_threads[0] = this_thr; 1261 new_team->t.t_parent = this_thr->th.th_team; 1262 serial_team = new_team; 1263 this_thr->th.th_serial_team = serial_team; 1264 1265 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 1266 global_tid, serial_team ) ); 1267 1268 1269 /* TODO the above breaks the requirement that if we run out of 1270 * resources, then we can still guarantee that serialized teams 1271 * are ok, since we may need to allocate a new one */ 1272 } else { 1273 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 1274 global_tid, serial_team ) ); 1275 } 1276 1277 /* we have to initialize this serial team */ 1278 KMP_DEBUG_ASSERT( serial_team->t.t_threads ); 1279 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); 1280 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); 1281 serial_team->t.t_ident = loc; 1282 serial_team->t.t_serialized = 1; 1283 serial_team->t.t_nproc = 1; 1284 serial_team->t.t_parent = this_thr->th.th_team; 1285 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; 1286 this_thr->th.th_team = serial_team; 1287 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 1288 1289 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", 1290 global_tid, this_thr->th.th_current_task ) ); 1291 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); 1292 this_thr->th.th_current_task->td_flags.executing = 0; 1293 1294 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); 1295 1296 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for 1297 each serialized task represented by team->t.t_serialized? */ 1298 copy_icvs( 1299 & this_thr->th.th_current_task->td_icvs, 1300 & this_thr->th.th_current_task->td_parent->td_icvs ); 1301 1302 // Thread value exists in the nested nthreads array for the next nested level 1303 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { 1304 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; 1305 } 1306 1307 #if OMP_40_ENABLED 1308 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { 1309 this_thr->th.th_current_task->td_icvs.proc_bind 1310 = __kmp_nested_proc_bind.bind_types[ level + 1 ]; 1311 } 1312 #endif /* OMP_40_ENABLED */ 1313 1314 #if USE_DEBUGGER 1315 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger. 1316 #endif 1317 this_thr->th.th_info.ds.ds_tid = 0; 1318 1319 /* set thread cache values */ 1320 this_thr->th.th_team_nproc = 1; 1321 this_thr->th.th_team_master = this_thr; 1322 this_thr->th.th_team_serialized = 1; 1323 1324 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 1325 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 1326 1327 propagateFPControl (serial_team); 1328 1329 /* check if we need to allocate dispatch buffers stack */ 1330 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1331 if ( !serial_team->t.t_dispatch->th_disp_buffer ) { 1332 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) 1333 __kmp_allocate( sizeof( dispatch_private_info_t ) ); 1334 } 1335 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1336 1337 #if OMPT_SUPPORT 1338 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1339 __ompt_team_assign_id(serial_team, ompt_parallel_id); 1340 #endif 1341 1342 KMP_MB(); 1343 1344 } else { 1345 /* this serialized team is already being used, 1346 * that's fine, just add another nested level */ 1347 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); 1348 KMP_DEBUG_ASSERT( serial_team->t.t_threads ); 1349 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); 1350 ++ serial_team->t.t_serialized; 1351 this_thr->th.th_team_serialized = serial_team->t.t_serialized; 1352 1353 // Nested level will be an index in the nested nthreads array 1354 int level = this_thr->th.th_team->t.t_level; 1355 // Thread value exists in the nested nthreads array for the next nested level 1356 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { 1357 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; 1358 } 1359 serial_team->t.t_level++; 1360 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", 1361 global_tid, serial_team, serial_team->t.t_level ) ); 1362 1363 /* allocate/push dispatch buffers stack */ 1364 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1365 { 1366 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) 1367 __kmp_allocate( sizeof( dispatch_private_info_t ) ); 1368 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 1369 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 1370 } 1371 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1372 1373 KMP_MB(); 1374 } 1375 1376 if ( __kmp_env_consistency_check ) 1377 __kmp_push_parallel( global_tid, NULL ); 1378 1379 } 1380 1381 /* most of the work for a fork */ 1382 /* return true if we really went parallel, false if serialized */ 1383 int 1384 __kmp_fork_call( 1385 ident_t * loc, 1386 int gtid, 1387 enum fork_context_e call_context, // Intel, GNU, ... 1388 kmp_int32 argc, 1389 #if OMPT_SUPPORT 1390 void *unwrapped_task, 1391 #endif 1392 microtask_t microtask, 1393 launch_t invoker, 1394 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1395 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1396 va_list * ap 1397 #else 1398 va_list ap 1399 #endif 1400 ) 1401 { 1402 void **argv; 1403 int i; 1404 int master_tid; 1405 int master_this_cons; 1406 kmp_team_t *team; 1407 kmp_team_t *parent_team; 1408 kmp_info_t *master_th; 1409 kmp_root_t *root; 1410 int nthreads; 1411 int master_active; 1412 int master_set_numthreads; 1413 int level; 1414 #if OMP_40_ENABLED 1415 int active_level; 1416 int teams_level; 1417 #endif 1418 #if KMP_NESTED_HOT_TEAMS 1419 kmp_hot_team_ptr_t **p_hot_teams; 1420 #endif 1421 { // KMP_TIME_BLOCK 1422 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 1423 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 1424 1425 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); 1426 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) { 1427 /* Some systems prefer the stack for the root thread(s) to start with */ 1428 /* some gap from the parent stack to prevent false sharing. */ 1429 void *dummy = KMP_ALLOCA(__kmp_stkpadding); 1430 /* These 2 lines below are so this does not get optimized out */ 1431 if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) 1432 __kmp_stkpadding += (short)((kmp_int64)dummy); 1433 } 1434 1435 /* initialize if needed */ 1436 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown 1437 if( ! TCR_4(__kmp_init_parallel) ) 1438 __kmp_parallel_initialize(); 1439 1440 /* setup current data */ 1441 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown 1442 parent_team = master_th->th.th_team; 1443 master_tid = master_th->th.th_info.ds.ds_tid; 1444 master_this_cons = master_th->th.th_local.this_construct; 1445 root = master_th->th.th_root; 1446 master_active = root->r.r_active; 1447 master_set_numthreads = master_th->th.th_set_nproc; 1448 1449 #if OMPT_SUPPORT 1450 ompt_parallel_id_t ompt_parallel_id; 1451 ompt_task_id_t ompt_task_id; 1452 ompt_frame_t *ompt_frame; 1453 ompt_task_id_t my_task_id; 1454 ompt_parallel_id_t my_parallel_id; 1455 1456 if (ompt_enabled) { 1457 ompt_parallel_id = __ompt_parallel_id_new(gtid); 1458 ompt_task_id = __ompt_get_task_id_internal(0); 1459 ompt_frame = __ompt_get_task_frame_internal(0); 1460 } 1461 #endif 1462 1463 // Nested level will be an index in the nested nthreads array 1464 level = parent_team->t.t_level; 1465 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed 1466 #if OMP_40_ENABLED 1467 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams 1468 #endif 1469 #if KMP_NESTED_HOT_TEAMS 1470 p_hot_teams = &master_th->th.th_hot_teams; 1471 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { 1472 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( 1473 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 1474 (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 1475 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) 1476 } 1477 #endif 1478 1479 #if OMPT_SUPPORT 1480 if (ompt_enabled && 1481 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { 1482 int team_size = master_set_numthreads; 1483 1484 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( 1485 ompt_task_id, ompt_frame, ompt_parallel_id, 1486 team_size, unwrapped_task, OMPT_INVOKER(call_context)); 1487 } 1488 #endif 1489 1490 master_th->th.th_ident = loc; 1491 1492 #if OMP_40_ENABLED 1493 if ( master_th->th.th_teams_microtask && 1494 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { 1495 // AC: This is start of parallel that is nested inside teams construct. 1496 // The team is actual (hot), all workers are ready at the fork barrier. 1497 // No lock needed to initialize the team a bit, then free workers. 1498 parent_team->t.t_ident = loc; 1499 __kmp_alloc_argv_entries( argc, parent_team, TRUE ); 1500 parent_team->t.t_argc = argc; 1501 argv = (void**)parent_team->t.t_argv; 1502 for( i=argc-1; i >= 0; --i ) 1503 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1504 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1505 *argv++ = va_arg( *ap, void * ); 1506 #else 1507 *argv++ = va_arg( ap, void * ); 1508 #endif 1509 /* Increment our nested depth levels, but not increase the serialization */ 1510 if ( parent_team == master_th->th.th_serial_team ) { 1511 // AC: we are in serialized parallel 1512 __kmpc_serialized_parallel(loc, gtid); 1513 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); 1514 parent_team->t.t_serialized--; // AC: need this in order enquiry functions 1515 // work correctly, will restore at join time 1516 1517 #if OMPT_SUPPORT 1518 void *dummy; 1519 void **exit_runtime_p; 1520 1521 ompt_lw_taskteam_t lw_taskteam; 1522 1523 if (ompt_enabled) { 1524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1525 unwrapped_task, ompt_parallel_id); 1526 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1527 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1528 1529 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1530 1531 #if OMPT_TRACE 1532 /* OMPT implicit task begin */ 1533 my_task_id = lw_taskteam.ompt_task_info.task_id; 1534 my_parallel_id = parent_team->t.ompt_team_info.parallel_id; 1535 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1536 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1537 my_parallel_id, my_task_id); 1538 } 1539 #endif 1540 1541 /* OMPT state */ 1542 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1543 } else { 1544 exit_runtime_p = &dummy; 1545 } 1546 #endif 1547 1548 { 1549 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1550 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1551 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv 1552 #if OMPT_SUPPORT 1553 , exit_runtime_p 1554 #endif 1555 ); 1556 } 1557 1558 #if OMPT_SUPPORT 1559 *exit_runtime_p = NULL; 1560 if (ompt_enabled) { 1561 #if OMPT_TRACE 1562 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1563 1564 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1565 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1566 ompt_parallel_id, ompt_task_id); 1567 } 1568 1569 __ompt_lw_taskteam_unlink(master_th); 1570 // reset clear the task id only after unlinking the task 1571 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1572 #endif 1573 1574 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1575 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1576 ompt_parallel_id, ompt_task_id, 1577 OMPT_INVOKER(call_context)); 1578 } 1579 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1580 } 1581 #endif 1582 return TRUE; 1583 } 1584 1585 parent_team->t.t_pkfn = microtask; 1586 #if OMPT_SUPPORT 1587 parent_team->t.ompt_team_info.microtask = unwrapped_task; 1588 #endif 1589 parent_team->t.t_invoke = invoker; 1590 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); 1591 parent_team->t.t_active_level ++; 1592 parent_team->t.t_level ++; 1593 1594 /* Change number of threads in the team if requested */ 1595 if ( master_set_numthreads ) { // The parallel has num_threads clause 1596 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { 1597 // AC: only can reduce the number of threads dynamically, cannot increase 1598 kmp_info_t **other_threads = parent_team->t.t_threads; 1599 parent_team->t.t_nproc = master_set_numthreads; 1600 for ( i = 0; i < master_set_numthreads; ++i ) { 1601 other_threads[i]->th.th_team_nproc = master_set_numthreads; 1602 } 1603 // Keep extra threads hot in the team for possible next parallels 1604 } 1605 master_th->th.th_set_nproc = 0; 1606 } 1607 1608 #if USE_DEBUGGER 1609 if ( __kmp_debugging ) { // Let debugger override number of threads. 1610 int nth = __kmp_omp_num_threads( loc ); 1611 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads. 1612 master_set_numthreads = nth; 1613 }; // if 1614 }; // if 1615 #endif 1616 1617 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); 1618 __kmp_internal_fork( loc, gtid, parent_team ); 1619 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); 1620 1621 /* Invoke microtask for MASTER thread */ 1622 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", 1623 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); 1624 1625 { 1626 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1627 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1628 if (! parent_team->t.t_invoke( gtid )) { 1629 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); 1630 } 1631 } 1632 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", 1633 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); 1634 KMP_MB(); /* Flush all pending memory write invalidates. */ 1635 1636 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 1637 1638 return TRUE; 1639 } // Parallel closely nested in teams construct 1640 #endif /* OMP_40_ENABLED */ 1641 1642 #if KMP_DEBUG 1643 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 1644 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); 1645 } 1646 #endif 1647 1648 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { 1649 nthreads = 1; 1650 } else { 1651 #if OMP_40_ENABLED 1652 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level)); 1653 #endif 1654 nthreads = master_set_numthreads ? 1655 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task 1656 1657 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct). 1658 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels. 1659 if (nthreads > 1) { 1660 if ( ( !get__nested(master_th) && (root->r.r_in_parallel 1661 #if OMP_40_ENABLED 1662 && !enter_teams 1663 #endif /* OMP_40_ENABLED */ 1664 ) ) || ( __kmp_library == library_serial ) ) { 1665 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n", 1666 gtid, nthreads )); 1667 nthreads = 1; 1668 } 1669 } 1670 if ( nthreads > 1 ) { 1671 /* determine how many new threads we can use */ 1672 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 1673 1674 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads 1675 #if OMP_40_ENABLED 1676 /* AC: If we execute teams from parallel region (on host), then teams should be created 1677 but each can only have 1 thread if nesting is disabled. If teams called from serial region, 1678 then teams and their threads should be created regardless of the nesting setting. */ 1679 , enter_teams 1680 #endif /* OMP_40_ENABLED */ 1681 ); 1682 if ( nthreads == 1 ) { 1683 // Free lock for single thread execution here; 1684 // for multi-thread execution it will be freed later 1685 // after team of threads created and initialized 1686 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 1687 } 1688 } 1689 } 1690 KMP_DEBUG_ASSERT( nthreads > 0 ); 1691 1692 /* If we temporarily changed the set number of threads then restore it now */ 1693 master_th->th.th_set_nproc = 0; 1694 1695 /* create a serialized parallel region? */ 1696 if ( nthreads == 1 ) { 1697 /* josh todo: hypothetical question: what do we do for OS X*? */ 1698 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1699 void * args[ argc ]; 1700 #else 1701 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) ); 1702 #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ 1703 1704 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); 1705 1706 __kmpc_serialized_parallel(loc, gtid); 1707 1708 if ( call_context == fork_context_intel ) { 1709 /* TODO this sucks, use the compiler itself to pass args! :) */ 1710 master_th->th.th_serial_team->t.t_ident = loc; 1711 #if OMP_40_ENABLED 1712 if ( !ap ) { 1713 // revert change made in __kmpc_serialized_parallel() 1714 master_th->th.th_serial_team->t.t_level--; 1715 // Get args from parent team for teams construct 1716 1717 #if OMPT_SUPPORT 1718 void *dummy; 1719 void **exit_runtime_p; 1720 1721 ompt_lw_taskteam_t lw_taskteam; 1722 1723 if (ompt_enabled) { 1724 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1725 unwrapped_task, ompt_parallel_id); 1726 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1727 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1728 1729 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1730 1731 #if OMPT_TRACE 1732 my_task_id = lw_taskteam.ompt_task_info.task_id; 1733 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1734 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1735 ompt_parallel_id, my_task_id); 1736 } 1737 #endif 1738 1739 /* OMPT state */ 1740 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1741 } else { 1742 exit_runtime_p = &dummy; 1743 } 1744 #endif 1745 1746 { 1747 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1748 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1749 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv 1750 #if OMPT_SUPPORT 1751 , exit_runtime_p 1752 #endif 1753 ); 1754 } 1755 1756 #if OMPT_SUPPORT 1757 *exit_runtime_p = NULL; 1758 if (ompt_enabled) { 1759 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1760 1761 #if OMPT_TRACE 1762 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1763 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1764 ompt_parallel_id, ompt_task_id); 1765 } 1766 #endif 1767 1768 __ompt_lw_taskteam_unlink(master_th); 1769 // reset clear the task id only after unlinking the task 1770 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1771 1772 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1773 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1774 ompt_parallel_id, ompt_task_id, 1775 OMPT_INVOKER(call_context)); 1776 } 1777 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1778 } 1779 #endif 1780 } else if ( microtask == (microtask_t)__kmp_teams_master ) { 1781 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); 1782 team = master_th->th.th_team; 1783 //team->t.t_pkfn = microtask; 1784 team->t.t_invoke = invoker; 1785 __kmp_alloc_argv_entries( argc, team, TRUE ); 1786 team->t.t_argc = argc; 1787 argv = (void**) team->t.t_argv; 1788 if ( ap ) { 1789 for( i=argc-1; i >= 0; --i ) 1790 // TODO: revert workaround for Intel(R) 64 tracker #96 1791 # if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1792 *argv++ = va_arg( *ap, void * ); 1793 # else 1794 *argv++ = va_arg( ap, void * ); 1795 # endif 1796 } else { 1797 for( i=0; i < argc; ++i ) 1798 // Get args from parent team for teams construct 1799 argv[i] = parent_team->t.t_argv[i]; 1800 } 1801 // AC: revert change made in __kmpc_serialized_parallel() 1802 // because initial code in teams should have level=0 1803 team->t.t_level--; 1804 // AC: call special invoker for outer "parallel" of the teams construct 1805 { 1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1808 invoker(gtid); 1809 } 1810 } else { 1811 #endif /* OMP_40_ENABLED */ 1812 argv = args; 1813 for( i=argc-1; i >= 0; --i ) 1814 // TODO: revert workaround for Intel(R) 64 tracker #96 1815 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1816 *argv++ = va_arg( *ap, void * ); 1817 #else 1818 *argv++ = va_arg( ap, void * ); 1819 #endif 1820 KMP_MB(); 1821 1822 #if OMPT_SUPPORT 1823 void *dummy; 1824 void **exit_runtime_p; 1825 1826 ompt_lw_taskteam_t lw_taskteam; 1827 1828 if (ompt_enabled) { 1829 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1830 unwrapped_task, ompt_parallel_id); 1831 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1832 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1833 1834 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1835 1836 #if OMPT_TRACE 1837 /* OMPT implicit task begin */ 1838 my_task_id = lw_taskteam.ompt_task_info.task_id; 1839 my_parallel_id = ompt_parallel_id; 1840 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1841 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1842 my_parallel_id, my_task_id); 1843 } 1844 #endif 1845 1846 /* OMPT state */ 1847 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1848 } else { 1849 exit_runtime_p = &dummy; 1850 } 1851 #endif 1852 1853 { 1854 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1855 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1856 __kmp_invoke_microtask( microtask, gtid, 0, argc, args 1857 #if OMPT_SUPPORT 1858 , exit_runtime_p 1859 #endif 1860 ); 1861 } 1862 1863 #if OMPT_SUPPORT 1864 *exit_runtime_p = NULL; 1865 if (ompt_enabled) { 1866 #if OMPT_TRACE 1867 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1868 1869 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1870 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1871 my_parallel_id, my_task_id); 1872 } 1873 #endif 1874 1875 __ompt_lw_taskteam_unlink(master_th); 1876 // reset clear the task id only after unlinking the task 1877 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1878 1879 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1880 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1881 ompt_parallel_id, ompt_task_id, 1882 OMPT_INVOKER(call_context)); 1883 } 1884 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1885 } 1886 #endif 1887 #if OMP_40_ENABLED 1888 } 1889 #endif /* OMP_40_ENABLED */ 1890 } 1891 else if ( call_context == fork_context_gnu ) { 1892 #if OMPT_SUPPORT 1893 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) 1894 __kmp_allocate(sizeof(ompt_lw_taskteam_t)); 1895 __ompt_lw_taskteam_init(lwt, master_th, gtid, 1896 unwrapped_task, ompt_parallel_id); 1897 1898 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); 1899 lwt->ompt_task_info.frame.exit_runtime_frame = NULL; 1900 __ompt_lw_taskteam_link(lwt, master_th); 1901 #endif 1902 1903 // we were called from GNU native code 1904 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); 1905 return FALSE; 1906 } 1907 else { 1908 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); 1909 } 1910 1911 1912 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); 1913 KMP_MB(); 1914 return FALSE; 1915 } 1916 1917 // GEH: only modify the executing flag in the case when not serialized 1918 // serialized case is handled in kmpc_serialized_parallel 1919 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", 1920 parent_team->t.t_active_level, master_th, master_th->th.th_current_task, 1921 master_th->th.th_current_task->td_icvs.max_active_levels ) ); 1922 // TODO: GEH - cannot do this assertion because root thread not set up as executing 1923 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 1924 master_th->th.th_current_task->td_flags.executing = 0; 1925 1926 #if OMP_40_ENABLED 1927 if ( !master_th->th.th_teams_microtask || level > teams_level ) 1928 #endif /* OMP_40_ENABLED */ 1929 { 1930 /* Increment our nested depth level */ 1931 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); 1932 } 1933 1934 // See if we need to make a copy of the ICVs. 1935 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 1936 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { 1937 nthreads_icv = __kmp_nested_nth.nth[level+1]; 1938 } 1939 else { 1940 nthreads_icv = 0; // don't update 1941 } 1942 1943 #if OMP_40_ENABLED 1944 // Figure out the proc_bind_policy for the new team. 1945 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1946 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update 1947 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { 1948 proc_bind = proc_bind_false; 1949 } 1950 else { 1951 if (proc_bind == proc_bind_default) { 1952 // No proc_bind clause specified; use current proc-bind-var for this parallel region 1953 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1954 } 1955 /* else: The proc_bind policy was specified explicitly on parallel clause. This 1956 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ 1957 // Figure the value of proc-bind-var for the child threads. 1958 if ((level+1 < __kmp_nested_proc_bind.used) 1959 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { 1960 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; 1961 } 1962 } 1963 1964 // Reset for next parallel region 1965 master_th->th.th_set_proc_bind = proc_bind_default; 1966 #endif /* OMP_40_ENABLED */ 1967 1968 if ((nthreads_icv > 0) 1969 #if OMP_40_ENABLED 1970 || (proc_bind_icv != proc_bind_default) 1971 #endif /* OMP_40_ENABLED */ 1972 ) { 1973 kmp_internal_control_t new_icvs; 1974 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 1975 new_icvs.next = NULL; 1976 if (nthreads_icv > 0) { 1977 new_icvs.nproc = nthreads_icv; 1978 } 1979 1980 #if OMP_40_ENABLED 1981 if (proc_bind_icv != proc_bind_default) { 1982 new_icvs.proc_bind = proc_bind_icv; 1983 } 1984 #endif /* OMP_40_ENABLED */ 1985 1986 /* allocate a new parallel team */ 1987 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); 1988 team = __kmp_allocate_team(root, nthreads, nthreads, 1989 #if OMPT_SUPPORT 1990 ompt_parallel_id, 1991 #endif 1992 #if OMP_40_ENABLED 1993 proc_bind, 1994 #endif 1995 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); 1996 } else { 1997 /* allocate a new parallel team */ 1998 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); 1999 team = __kmp_allocate_team(root, nthreads, nthreads, 2000 #if OMPT_SUPPORT 2001 ompt_parallel_id, 2002 #endif 2003 #if OMP_40_ENABLED 2004 proc_bind, 2005 #endif 2006 &master_th->th.th_current_task->td_icvs, argc 2007 USE_NESTED_HOT_ARG(master_th) ); 2008 } 2009 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); 2010 2011 /* setup the new team */ 2012 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 2013 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 2014 KMP_CHECK_UPDATE(team->t.t_ident, loc); 2015 KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 2016 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 2017 #if OMPT_SUPPORT 2018 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); 2019 #endif 2020 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */ 2021 // TODO: parent_team->t.t_level == INT_MAX ??? 2022 #if OMP_40_ENABLED 2023 if ( !master_th->th.th_teams_microtask || level > teams_level ) { 2024 #endif /* OMP_40_ENABLED */ 2025 int new_level = parent_team->t.t_level + 1; 2026 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2027 new_level = parent_team->t.t_active_level + 1; 2028 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2029 #if OMP_40_ENABLED 2030 } else { 2031 // AC: Do not increase parallel level at start of the teams construct 2032 int new_level = parent_team->t.t_level; 2033 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2034 new_level = parent_team->t.t_active_level; 2035 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2036 } 2037 #endif /* OMP_40_ENABLED */ 2038 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2039 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk) 2040 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule 2041 2042 #if OMP_40_ENABLED 2043 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 2044 #endif 2045 2046 // Update the floating point rounding in the team if required. 2047 propagateFPControl(team); 2048 2049 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2050 // Set master's task team to team's task team. Unless this is hot team, it should be NULL. 2051 #if 0 2052 // Patch out an assertion that trips while the runtime seems to operate correctly. 2053 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch. 2054 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); 2055 #endif 2056 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", 2057 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, 2058 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); 2059 2060 if ( active_level || master_th->th.th_task_team ) { 2061 // Take a memo of master's task_state 2062 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2063 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size 2064 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; 2065 kmp_uint8 *old_stack, *new_stack; 2066 kmp_uint32 i; 2067 new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 2068 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) { 2069 new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 2070 } 2071 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack 2072 new_stack[i] = 0; 2073 } 2074 old_stack = master_th->th.th_task_state_memo_stack; 2075 master_th->th.th_task_state_memo_stack = new_stack; 2076 master_th->th.th_task_state_stack_sz = new_size; 2077 __kmp_free(old_stack); 2078 } 2079 // Store master's task_state on stack 2080 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; 2081 master_th->th.th_task_state_top++; 2082 #if KMP_NESTED_HOT_TEAMS 2083 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team 2084 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; 2085 } 2086 else { 2087 #endif 2088 master_th->th.th_task_state = 0; 2089 #if KMP_NESTED_HOT_TEAMS 2090 } 2091 #endif 2092 } 2093 #if !KMP_NESTED_HOT_TEAMS 2094 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); 2095 #endif 2096 } 2097 2098 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 2099 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); 2100 KMP_DEBUG_ASSERT( team != root->r.r_hot_team || 2101 ( team->t.t_master_tid == 0 && 2102 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); 2103 KMP_MB(); 2104 2105 /* now, setup the arguments */ 2106 argv = (void**)team->t.t_argv; 2107 #if OMP_40_ENABLED 2108 if ( ap ) { 2109 #endif /* OMP_40_ENABLED */ 2110 for ( i=argc-1; i >= 0; --i ) { 2111 // TODO: revert workaround for Intel(R) 64 tracker #96 2112 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2113 void *new_argv = va_arg(*ap, void *); 2114 #else 2115 void *new_argv = va_arg(ap, void *); 2116 #endif 2117 KMP_CHECK_UPDATE(*argv, new_argv); 2118 argv++; 2119 } 2120 #if OMP_40_ENABLED 2121 } else { 2122 for ( i=0; i < argc; ++i ) { 2123 // Get args from parent team for teams construct 2124 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 2125 } 2126 } 2127 #endif /* OMP_40_ENABLED */ 2128 2129 /* now actually fork the threads */ 2130 KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 2131 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 2132 root->r.r_active = TRUE; 2133 2134 __kmp_fork_team_threads( root, team, master_th, gtid ); 2135 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); 2136 2137 #if OMPT_SUPPORT 2138 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 2139 #endif 2140 2141 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 2142 2143 #if USE_ITT_BUILD 2144 if ( team->t.t_active_level == 1 // only report frames at level 1 2145 # if OMP_40_ENABLED 2146 && !master_th->th.th_teams_microtask // not in teams construct 2147 # endif /* OMP_40_ENABLED */ 2148 ) { 2149 #if USE_ITT_NOTIFY 2150 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && 2151 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) 2152 { 2153 kmp_uint64 tmp_time = 0; 2154 if ( __itt_get_timestamp_ptr ) 2155 tmp_time = __itt_get_timestamp(); 2156 // Internal fork - report frame begin 2157 master_th->th.th_frame_time = tmp_time; 2158 if ( __kmp_forkjoin_frames_mode == 3 ) 2159 team->t.t_region_time = tmp_time; 2160 } else // only one notification scheme (either "submit" or "forking/joined", not both) 2161 #endif /* USE_ITT_NOTIFY */ 2162 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && 2163 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) 2164 { // Mark start of "parallel" region for VTune. 2165 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 2166 } 2167 } 2168 #endif /* USE_ITT_BUILD */ 2169 2170 /* now go on and do the work */ 2171 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); 2172 KMP_MB(); 2173 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 2174 root, team, master_th, gtid)); 2175 2176 #if USE_ITT_BUILD 2177 if ( __itt_stack_caller_create_ptr ) { 2178 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier 2179 } 2180 #endif /* USE_ITT_BUILD */ 2181 2182 #if OMP_40_ENABLED 2183 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute 2184 #endif /* OMP_40_ENABLED */ 2185 { 2186 __kmp_internal_fork( loc, gtid, team ); 2187 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", 2188 root, team, master_th, gtid)); 2189 } 2190 2191 if (call_context == fork_context_gnu) { 2192 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 2193 return TRUE; 2194 } 2195 2196 /* Invoke microtask for MASTER thread */ 2197 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", 2198 gtid, team->t.t_id, team->t.t_pkfn ) ); 2199 } // END of timer KMP_fork_call block 2200 2201 { 2202 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 2203 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 2204 if (! team->t.t_invoke( gtid )) { 2205 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); 2206 } 2207 } 2208 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", 2209 gtid, team->t.t_id, team->t.t_pkfn ) ); 2210 KMP_MB(); /* Flush all pending memory write invalidates. */ 2211 2212 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 2213 2214 #if OMPT_SUPPORT 2215 if (ompt_enabled) { 2216 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2217 } 2218 #endif 2219 2220 return TRUE; 2221 } 2222 2223 #if OMPT_SUPPORT 2224 static inline void 2225 __kmp_join_restore_state( 2226 kmp_info_t *thread, 2227 kmp_team_t *team) 2228 { 2229 // restore state outside the region 2230 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ? 2231 ompt_state_work_serial : ompt_state_work_parallel); 2232 } 2233 2234 static inline void 2235 __kmp_join_ompt( 2236 kmp_info_t *thread, 2237 kmp_team_t *team, 2238 ompt_parallel_id_t parallel_id, 2239 fork_context_e fork_context) 2240 { 2241 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2242 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 2243 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 2244 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); 2245 } 2246 2247 task_info->frame.reenter_runtime_frame = NULL; 2248 __kmp_join_restore_state(thread,team); 2249 } 2250 #endif 2251 2252 void 2253 __kmp_join_call(ident_t *loc, int gtid 2254 #if OMPT_SUPPORT 2255 , enum fork_context_e fork_context 2256 #endif 2257 #if OMP_40_ENABLED 2258 , int exit_teams 2259 #endif /* OMP_40_ENABLED */ 2260 ) 2261 { 2262 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 2263 kmp_team_t *team; 2264 kmp_team_t *parent_team; 2265 kmp_info_t *master_th; 2266 kmp_root_t *root; 2267 int master_active; 2268 int i; 2269 2270 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); 2271 2272 /* setup current data */ 2273 master_th = __kmp_threads[ gtid ]; 2274 root = master_th->th.th_root; 2275 team = master_th->th.th_team; 2276 parent_team = team->t.t_parent; 2277 2278 master_th->th.th_ident = loc; 2279 2280 #if OMPT_SUPPORT 2281 if (ompt_enabled) { 2282 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2283 } 2284 #endif 2285 2286 #if KMP_DEBUG 2287 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2288 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", 2289 __kmp_gtid_from_thread( master_th ), team, 2290 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); 2291 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); 2292 } 2293 #endif 2294 2295 if( team->t.t_serialized ) { 2296 #if OMP_40_ENABLED 2297 if ( master_th->th.th_teams_microtask ) { 2298 // We are in teams construct 2299 int level = team->t.t_level; 2300 int tlevel = master_th->th.th_teams_level; 2301 if ( level == tlevel ) { 2302 // AC: we haven't incremented it earlier at start of teams construct, 2303 // so do it here - at the end of teams construct 2304 team->t.t_level++; 2305 } else if ( level == tlevel + 1 ) { 2306 // AC: we are exiting parallel inside teams, need to increment serialization 2307 // in order to restore it in the next call to __kmpc_end_serialized_parallel 2308 team->t.t_serialized++; 2309 } 2310 } 2311 #endif /* OMP_40_ENABLED */ 2312 __kmpc_end_serialized_parallel( loc, gtid ); 2313 2314 #if OMPT_SUPPORT 2315 if (ompt_enabled) { 2316 __kmp_join_restore_state(master_th, parent_team); 2317 } 2318 #endif 2319 2320 return; 2321 } 2322 2323 master_active = team->t.t_master_active; 2324 2325 #if OMP_40_ENABLED 2326 if (!exit_teams) 2327 #endif /* OMP_40_ENABLED */ 2328 { 2329 // AC: No barrier for internal teams at exit from teams construct. 2330 // But there is barrier for external team (league). 2331 __kmp_internal_join( loc, gtid, team ); 2332 } 2333 #if OMP_40_ENABLED 2334 else { 2335 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) 2336 } 2337 #endif /* OMP_40_ENABLED */ 2338 2339 KMP_MB(); 2340 2341 #if OMPT_SUPPORT 2342 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; 2343 #endif 2344 2345 #if USE_ITT_BUILD 2346 if ( __itt_stack_caller_create_ptr ) { 2347 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier 2348 } 2349 2350 // Mark end of "parallel" region for VTune. 2351 if ( team->t.t_active_level == 1 2352 # if OMP_40_ENABLED 2353 && !master_th->th.th_teams_microtask /* not in teams construct */ 2354 # endif /* OMP_40_ENABLED */ 2355 ) { 2356 master_th->th.th_ident = loc; 2357 // only one notification scheme (either "submit" or "forking/joined", not both) 2358 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) 2359 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 2360 0, loc, master_th->th.th_team_nproc, 1 ); 2361 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && 2362 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) 2363 __kmp_itt_region_joined( gtid ); 2364 } // active_level == 1 2365 #endif /* USE_ITT_BUILD */ 2366 2367 #if OMP_40_ENABLED 2368 if ( master_th->th.th_teams_microtask && 2369 !exit_teams && 2370 team->t.t_pkfn != (microtask_t)__kmp_teams_master && 2371 team->t.t_level == master_th->th.th_teams_level + 1 ) { 2372 // AC: We need to leave the team structure intact at the end 2373 // of parallel inside the teams construct, so that at the next 2374 // parallel same (hot) team works, only adjust nesting levels 2375 2376 /* Decrement our nested depth level */ 2377 team->t.t_level --; 2378 team->t.t_active_level --; 2379 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); 2380 2381 /* Restore number of threads in the team if needed */ 2382 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { 2383 int old_num = master_th->th.th_team_nproc; 2384 int new_num = master_th->th.th_teams_size.nth; 2385 kmp_info_t **other_threads = team->t.t_threads; 2386 team->t.t_nproc = new_num; 2387 for ( i = 0; i < old_num; ++i ) { 2388 other_threads[i]->th.th_team_nproc = new_num; 2389 } 2390 // Adjust states of non-used threads of the team 2391 for ( i = old_num; i < new_num; ++i ) { 2392 // Re-initialize thread's barrier data. 2393 int b; 2394 kmp_balign_t * balign = other_threads[i]->th.th_bar; 2395 for ( b = 0; b < bs_last_barrier; ++ b ) { 2396 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 2397 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 2398 #if USE_DEBUGGER 2399 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 2400 #endif 2401 } 2402 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2403 // Synchronize thread's task state 2404 other_threads[i]->th.th_task_state = master_th->th.th_task_state; 2405 } 2406 } 2407 } 2408 2409 #if OMPT_SUPPORT 2410 if (ompt_enabled) { 2411 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2412 } 2413 #endif 2414 2415 return; 2416 } 2417 #endif /* OMP_40_ENABLED */ 2418 2419 /* do cleanup and restore the parent team */ 2420 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; 2421 master_th->th.th_local.this_construct = team->t.t_master_this_cons; 2422 2423 master_th->th.th_dispatch = 2424 & parent_team->t.t_dispatch[ team->t.t_master_tid ]; 2425 2426 /* jc: The following lock has instructions with REL and ACQ semantics, 2427 separating the parallel user code called in this parallel region 2428 from the serial user code called after this function returns. 2429 */ 2430 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 2431 2432 #if OMP_40_ENABLED 2433 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) 2434 #endif /* OMP_40_ENABLED */ 2435 { 2436 /* Decrement our nested depth level */ 2437 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); 2438 } 2439 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); 2440 2441 #if OMPT_SUPPORT && OMPT_TRACE 2442 if(ompt_enabled){ 2443 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2444 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 2445 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 2446 parallel_id, task_info->task_id); 2447 } 2448 task_info->frame.exit_runtime_frame = NULL; 2449 task_info->task_id = 0; 2450 } 2451 #endif 2452 2453 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 2454 0, master_th, team ) ); 2455 __kmp_pop_current_task_from_thread( master_th ); 2456 2457 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2458 // 2459 // Restore master thread's partition. 2460 // 2461 master_th->th.th_first_place = team->t.t_first_place; 2462 master_th->th.th_last_place = team->t.t_last_place; 2463 #endif /* OMP_40_ENABLED */ 2464 2465 updateHWFPControl (team); 2466 2467 if ( root->r.r_active != master_active ) 2468 root->r.r_active = master_active; 2469 2470 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads 2471 2472 /* this race was fun to find. make sure the following is in the critical 2473 * region otherwise assertions may fail occasionally since the old team 2474 * may be reallocated and the hierarchy appears inconsistent. it is 2475 * actually safe to run and won't cause any bugs, but will cause those 2476 * assertion failures. it's only one deref&assign so might as well put this 2477 * in the critical region */ 2478 master_th->th.th_team = parent_team; 2479 master_th->th.th_team_nproc = parent_team->t.t_nproc; 2480 master_th->th.th_team_master = parent_team->t.t_threads[0]; 2481 master_th->th.th_team_serialized = parent_team->t.t_serialized; 2482 2483 /* restore serialized team, if need be */ 2484 if( parent_team->t.t_serialized && 2485 parent_team != master_th->th.th_serial_team && 2486 parent_team != root->r.r_root_team ) { 2487 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); 2488 master_th->th.th_serial_team = parent_team; 2489 } 2490 2491 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2492 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack 2493 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2494 // Remember master's state if we re-use this nested hot team 2495 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; 2496 --master_th->th.th_task_state_top; // pop 2497 // Now restore state at this level 2498 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; 2499 } 2500 // Copy the task team from the parent team to the master thread 2501 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; 2502 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", 2503 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); 2504 } 2505 2506 // TODO: GEH - cannot do this assertion because root thread not set up as executing 2507 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 2508 master_th->th.th_current_task->td_flags.executing = 1; 2509 2510 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 2511 2512 #if OMPT_SUPPORT 2513 if (ompt_enabled) { 2514 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2515 } 2516 #endif 2517 2518 KMP_MB(); 2519 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); 2520 } 2521 2522 /* ------------------------------------------------------------------------ */ 2523 /* ------------------------------------------------------------------------ */ 2524 2525 /* Check whether we should push an internal control record onto the 2526 serial team stack. If so, do it. */ 2527 void 2528 __kmp_save_internal_controls ( kmp_info_t * thread ) 2529 { 2530 2531 if ( thread->th.th_team != thread->th.th_serial_team ) { 2532 return; 2533 } 2534 if (thread->th.th_team->t.t_serialized > 1) { 2535 int push = 0; 2536 2537 if (thread->th.th_team->t.t_control_stack_top == NULL) { 2538 push = 1; 2539 } else { 2540 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 2541 thread->th.th_team->t.t_serialized ) { 2542 push = 1; 2543 } 2544 } 2545 if (push) { /* push a record on the serial team's stack */ 2546 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); 2547 2548 copy_icvs( control, & thread->th.th_current_task->td_icvs ); 2549 2550 control->serial_nesting_level = thread->th.th_team->t.t_serialized; 2551 2552 control->next = thread->th.th_team->t.t_control_stack_top; 2553 thread->th.th_team->t.t_control_stack_top = control; 2554 } 2555 } 2556 } 2557 2558 /* Changes set_nproc */ 2559 void 2560 __kmp_set_num_threads( int new_nth, int gtid ) 2561 { 2562 kmp_info_t *thread; 2563 kmp_root_t *root; 2564 2565 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); 2566 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2567 2568 if (new_nth < 1) 2569 new_nth = 1; 2570 else if (new_nth > __kmp_max_nth) 2571 new_nth = __kmp_max_nth; 2572 2573 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 2574 thread = __kmp_threads[gtid]; 2575 2576 __kmp_save_internal_controls( thread ); 2577 2578 set__nproc( thread, new_nth ); 2579 2580 // 2581 // If this omp_set_num_threads() call will cause the hot team size to be 2582 // reduced (in the absence of a num_threads clause), then reduce it now, 2583 // rather than waiting for the next parallel region. 2584 // 2585 root = thread->th.th_root; 2586 if ( __kmp_init_parallel && ( ! root->r.r_active ) 2587 && ( root->r.r_hot_team->t.t_nproc > new_nth ) 2588 #if KMP_NESTED_HOT_TEAMS 2589 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 2590 #endif 2591 ) { 2592 kmp_team_t *hot_team = root->r.r_hot_team; 2593 int f; 2594 2595 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 2596 2597 // Release the extra threads we don't need any more. 2598 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { 2599 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); 2600 if ( __kmp_tasking_mode != tskm_immediate_exec) { 2601 // When decreasing team size, threads no longer in the team should unref task team. 2602 hot_team->t.t_threads[f]->th.th_task_team = NULL; 2603 } 2604 __kmp_free_thread( hot_team->t.t_threads[f] ); 2605 hot_team->t.t_threads[f] = NULL; 2606 } 2607 hot_team->t.t_nproc = new_nth; 2608 #if KMP_NESTED_HOT_TEAMS 2609 if( thread->th.th_hot_teams ) { 2610 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); 2611 thread->th.th_hot_teams[0].hot_team_nth = new_nth; 2612 } 2613 #endif 2614 2615 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 2616 2617 // 2618 // Update the t_nproc field in the threads that are still active. 2619 // 2620 for( f=0 ; f < new_nth; f++ ) { 2621 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); 2622 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 2623 } 2624 // Special flag in case omp_set_num_threads() call 2625 hot_team->t.t_size_changed = -1; 2626 } 2627 } 2628 2629 /* Changes max_active_levels */ 2630 void 2631 __kmp_set_max_active_levels( int gtid, int max_active_levels ) 2632 { 2633 kmp_info_t *thread; 2634 2635 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 2636 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2637 2638 // validate max_active_levels 2639 if( max_active_levels < 0 ) { 2640 KMP_WARNING( ActiveLevelsNegative, max_active_levels ); 2641 // We ignore this call if the user has specified a negative value. 2642 // The current setting won't be changed. The last valid setting will be used. 2643 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). 2644 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 2645 return; 2646 } 2647 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { 2648 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] 2649 // We allow a zero value. (implementation defined behavior) 2650 } else { 2651 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT ); 2652 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 2653 // Current upper limit is MAX_INT. (implementation defined behavior) 2654 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) 2655 // Actually, the flow should never get here until we use MAX_INT limit. 2656 } 2657 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 2658 2659 thread = __kmp_threads[ gtid ]; 2660 2661 __kmp_save_internal_controls( thread ); 2662 2663 set__max_active_levels( thread, max_active_levels ); 2664 2665 } 2666 2667 /* Gets max_active_levels */ 2668 int 2669 __kmp_get_max_active_levels( int gtid ) 2670 { 2671 kmp_info_t *thread; 2672 2673 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); 2674 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2675 2676 thread = __kmp_threads[ gtid ]; 2677 KMP_DEBUG_ASSERT( thread->th.th_current_task ); 2678 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", 2679 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); 2680 return thread->th.th_current_task->td_icvs.max_active_levels; 2681 } 2682 2683 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 2684 void 2685 __kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) 2686 { 2687 kmp_info_t *thread; 2688 // kmp_team_t *team; 2689 2690 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); 2691 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2692 2693 // Check if the kind parameter is valid, correct if needed. 2694 // Valid parameters should fit in one of two intervals - standard or extended: 2695 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 2696 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 2697 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || 2698 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) 2699 { 2700 // TODO: Hint needs attention in case we change the default schedule. 2701 __kmp_msg( 2702 kmp_ms_warning, 2703 KMP_MSG( ScheduleKindOutOfRange, kind ), 2704 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), 2705 __kmp_msg_null 2706 ); 2707 kind = kmp_sched_default; 2708 chunk = 0; // ignore chunk value in case of bad kind 2709 } 2710 2711 thread = __kmp_threads[ gtid ]; 2712 2713 __kmp_save_internal_controls( thread ); 2714 2715 if ( kind < kmp_sched_upper_std ) { 2716 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { 2717 // differ static chunked vs. unchunked: 2718 // chunk should be invalid to indicate unchunked schedule (which is the default) 2719 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 2720 } else { 2721 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; 2722 } 2723 } else { 2724 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; 2725 thread->th.th_current_task->td_icvs.sched.r_sched_type = 2726 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; 2727 } 2728 if ( kind == kmp_sched_auto ) { 2729 // ignore parameter chunk for schedule auto 2730 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 2731 } else { 2732 thread->th.th_current_task->td_icvs.sched.chunk = chunk; 2733 } 2734 } 2735 2736 /* Gets def_sched_var ICV values */ 2737 void 2738 __kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) 2739 { 2740 kmp_info_t *thread; 2741 enum sched_type th_type; 2742 2743 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); 2744 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2745 2746 thread = __kmp_threads[ gtid ]; 2747 2748 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 2749 2750 switch ( th_type ) { 2751 case kmp_sch_static: 2752 case kmp_sch_static_greedy: 2753 case kmp_sch_static_balanced: 2754 *kind = kmp_sched_static; 2755 *chunk = 0; // chunk was not set, try to show this fact via zero value 2756 return; 2757 case kmp_sch_static_chunked: 2758 *kind = kmp_sched_static; 2759 break; 2760 case kmp_sch_dynamic_chunked: 2761 *kind = kmp_sched_dynamic; 2762 break; 2763 case kmp_sch_guided_chunked: 2764 case kmp_sch_guided_iterative_chunked: 2765 case kmp_sch_guided_analytical_chunked: 2766 *kind = kmp_sched_guided; 2767 break; 2768 case kmp_sch_auto: 2769 *kind = kmp_sched_auto; 2770 break; 2771 case kmp_sch_trapezoidal: 2772 *kind = kmp_sched_trapezoidal; 2773 break; 2774 #if KMP_STATIC_STEAL_ENABLED 2775 case kmp_sch_static_steal: 2776 *kind = kmp_sched_static_steal; 2777 break; 2778 #endif 2779 default: 2780 KMP_FATAL( UnknownSchedulingType, th_type ); 2781 } 2782 2783 *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 2784 } 2785 2786 int 2787 __kmp_get_ancestor_thread_num( int gtid, int level ) { 2788 2789 int ii, dd; 2790 kmp_team_t *team; 2791 kmp_info_t *thr; 2792 2793 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); 2794 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2795 2796 // validate level 2797 if( level == 0 ) return 0; 2798 if( level < 0 ) return -1; 2799 thr = __kmp_threads[ gtid ]; 2800 team = thr->th.th_team; 2801 ii = team->t.t_level; 2802 if( level > ii ) return -1; 2803 2804 #if OMP_40_ENABLED 2805 if( thr->th.th_teams_microtask ) { 2806 // AC: we are in teams region where multiple nested teams have same level 2807 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2808 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) 2809 KMP_DEBUG_ASSERT( ii >= tlevel ); 2810 // AC: As we need to pass by the teams league, we need to artificially increase ii 2811 if ( ii == tlevel ) { 2812 ii += 2; // three teams have same level 2813 } else { 2814 ii ++; // two teams have same level 2815 } 2816 } 2817 } 2818 #endif 2819 2820 if( ii == level ) return __kmp_tid_from_gtid( gtid ); 2821 2822 dd = team->t.t_serialized; 2823 level++; 2824 while( ii > level ) 2825 { 2826 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 2827 { 2828 } 2829 if( ( team->t.t_serialized ) && ( !dd ) ) { 2830 team = team->t.t_parent; 2831 continue; 2832 } 2833 if( ii > level ) { 2834 team = team->t.t_parent; 2835 dd = team->t.t_serialized; 2836 ii--; 2837 } 2838 } 2839 2840 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); 2841 } 2842 2843 int 2844 __kmp_get_team_size( int gtid, int level ) { 2845 2846 int ii, dd; 2847 kmp_team_t *team; 2848 kmp_info_t *thr; 2849 2850 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); 2851 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2852 2853 // validate level 2854 if( level == 0 ) return 1; 2855 if( level < 0 ) return -1; 2856 thr = __kmp_threads[ gtid ]; 2857 team = thr->th.th_team; 2858 ii = team->t.t_level; 2859 if( level > ii ) return -1; 2860 2861 #if OMP_40_ENABLED 2862 if( thr->th.th_teams_microtask ) { 2863 // AC: we are in teams region where multiple nested teams have same level 2864 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2865 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) 2866 KMP_DEBUG_ASSERT( ii >= tlevel ); 2867 // AC: As we need to pass by the teams league, we need to artificially increase ii 2868 if ( ii == tlevel ) { 2869 ii += 2; // three teams have same level 2870 } else { 2871 ii ++; // two teams have same level 2872 } 2873 } 2874 } 2875 #endif 2876 2877 while( ii > level ) 2878 { 2879 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 2880 { 2881 } 2882 if( team->t.t_serialized && ( !dd ) ) { 2883 team = team->t.t_parent; 2884 continue; 2885 } 2886 if( ii > level ) { 2887 team = team->t.t_parent; 2888 ii--; 2889 } 2890 } 2891 2892 return team->t.t_nproc; 2893 } 2894 2895 kmp_r_sched_t 2896 __kmp_get_schedule_global() { 2897 // This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) 2898 // may be changed by kmp_set_defaults independently. So one can get the updated schedule here. 2899 2900 kmp_r_sched_t r_sched; 2901 2902 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided 2903 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, 2904 // and thus have different run-time schedules in different roots (even in OMP 2.5) 2905 if ( __kmp_sched == kmp_sch_static ) { 2906 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) 2907 } else if ( __kmp_sched == kmp_sch_guided_chunked ) { 2908 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) 2909 } else { 2910 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 2911 } 2912 2913 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) 2914 r_sched.chunk = KMP_DEFAULT_CHUNK; 2915 } else { 2916 r_sched.chunk = __kmp_chunk; 2917 } 2918 2919 return r_sched; 2920 } 2921 2922 /* ------------------------------------------------------------------------ */ 2923 /* ------------------------------------------------------------------------ */ 2924 2925 2926 /* 2927 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 2928 * at least argc number of *t_argv entries for the requested team. 2929 */ 2930 static void 2931 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) 2932 { 2933 2934 KMP_DEBUG_ASSERT( team ); 2935 if( !realloc || argc > team->t.t_max_argc ) { 2936 2937 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", 2938 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); 2939 /* if previously allocated heap space for args, free them */ 2940 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) 2941 __kmp_free( (void *) team->t.t_argv ); 2942 2943 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { 2944 /* use unused space in the cache line for arguments */ 2945 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 2946 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", 2947 team->t.t_id, team->t.t_max_argc )); 2948 team->t.t_argv = &team->t.t_inline_argv[0]; 2949 if ( __kmp_storage_map ) { 2950 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], 2951 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 2952 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), 2953 "team_%d.t_inline_argv", 2954 team->t.t_id ); 2955 } 2956 } else { 2957 /* allocate space for arguments in the heap */ 2958 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? 2959 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; 2960 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", 2961 team->t.t_id, team->t.t_max_argc )); 2962 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); 2963 if ( __kmp_storage_map ) { 2964 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], 2965 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", 2966 team->t.t_id ); 2967 } 2968 } 2969 } 2970 } 2971 2972 static void 2973 __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) 2974 { 2975 int i; 2976 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 2977 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); 2978 team->t.t_disp_buffer = (dispatch_shared_info_t*) 2979 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); 2980 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); 2981 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); 2982 team->t.t_max_nproc = max_nth; 2983 2984 /* setup dispatch buffers */ 2985 for(i = 0 ; i < num_disp_buff; ++i) { 2986 team->t.t_disp_buffer[i].buffer_index = i; 2987 #if OMP_45_ENABLED 2988 team->t.t_disp_buffer[i].doacross_buf_idx = i; 2989 #endif 2990 } 2991 } 2992 2993 static void 2994 __kmp_free_team_arrays(kmp_team_t *team) { 2995 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 2996 int i; 2997 for ( i = 0; i < team->t.t_max_nproc; ++ i ) { 2998 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { 2999 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); 3000 team->t.t_dispatch[ i ].th_disp_buffer = NULL; 3001 }; // if 3002 }; // for 3003 __kmp_free(team->t.t_threads); 3004 __kmp_free(team->t.t_disp_buffer); 3005 __kmp_free(team->t.t_dispatch); 3006 __kmp_free(team->t.t_implicit_task_taskdata); 3007 team->t.t_threads = NULL; 3008 team->t.t_disp_buffer = NULL; 3009 team->t.t_dispatch = NULL; 3010 team->t.t_implicit_task_taskdata = 0; 3011 } 3012 3013 static void 3014 __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 3015 kmp_info_t **oldThreads = team->t.t_threads; 3016 3017 __kmp_free(team->t.t_disp_buffer); 3018 __kmp_free(team->t.t_dispatch); 3019 __kmp_free(team->t.t_implicit_task_taskdata); 3020 __kmp_allocate_team_arrays(team, max_nth); 3021 3022 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); 3023 3024 __kmp_free(oldThreads); 3025 } 3026 3027 static kmp_internal_control_t 3028 __kmp_get_global_icvs( void ) { 3029 3030 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals 3031 3032 #if OMP_40_ENABLED 3033 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); 3034 #endif /* OMP_40_ENABLED */ 3035 3036 kmp_internal_control_t g_icvs = { 3037 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field 3038 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread) 3039 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread) 3040 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set 3041 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime 3042 #if KMP_USE_MONITOR 3043 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals 3044 #endif 3045 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread) 3046 // (use a max ub on value if __kmp_parallel_initialize not called yet) 3047 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels 3048 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair 3049 #if OMP_40_ENABLED 3050 __kmp_nested_proc_bind.bind_types[0], 3051 __kmp_default_device, 3052 #endif /* OMP_40_ENABLED */ 3053 NULL //struct kmp_internal_control *next; 3054 }; 3055 3056 return g_icvs; 3057 } 3058 3059 static kmp_internal_control_t 3060 __kmp_get_x_global_icvs( const kmp_team_t *team ) { 3061 3062 kmp_internal_control_t gx_icvs; 3063 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls 3064 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); 3065 gx_icvs.next = NULL; 3066 3067 return gx_icvs; 3068 } 3069 3070 static void 3071 __kmp_initialize_root( kmp_root_t *root ) 3072 { 3073 int f; 3074 kmp_team_t *root_team; 3075 kmp_team_t *hot_team; 3076 int hot_team_max_nth; 3077 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals 3078 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3079 KMP_DEBUG_ASSERT( root ); 3080 KMP_ASSERT( ! root->r.r_begin ); 3081 3082 /* setup the root state structure */ 3083 __kmp_init_lock( &root->r.r_begin_lock ); 3084 root->r.r_begin = FALSE; 3085 root->r.r_active = FALSE; 3086 root->r.r_in_parallel = 0; 3087 root->r.r_blocktime = __kmp_dflt_blocktime; 3088 root->r.r_nested = __kmp_dflt_nested; 3089 3090 /* setup the root team for this task */ 3091 /* allocate the root team structure */ 3092 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); 3093 3094 root_team = 3095 __kmp_allocate_team( 3096 root, 3097 1, // new_nproc 3098 1, // max_nproc 3099 #if OMPT_SUPPORT 3100 0, // root parallel id 3101 #endif 3102 #if OMP_40_ENABLED 3103 __kmp_nested_proc_bind.bind_types[0], 3104 #endif 3105 &r_icvs, 3106 0 // argc 3107 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3108 ); 3109 #if USE_DEBUGGER 3110 // Non-NULL value should be assigned to make the debugger display the root team. 3111 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 )); 3112 #endif 3113 3114 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); 3115 3116 root->r.r_root_team = root_team; 3117 root_team->t.t_control_stack_top = NULL; 3118 3119 /* initialize root team */ 3120 root_team->t.t_threads[0] = NULL; 3121 root_team->t.t_nproc = 1; 3122 root_team->t.t_serialized = 1; 3123 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3124 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3125 root_team->t.t_sched.chunk = r_sched.chunk; 3126 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 3127 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 3128 3129 /* setup the hot team for this task */ 3130 /* allocate the hot team structure */ 3131 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); 3132 3133 hot_team = 3134 __kmp_allocate_team( 3135 root, 3136 1, // new_nproc 3137 __kmp_dflt_team_nth_ub * 2, // max_nproc 3138 #if OMPT_SUPPORT 3139 0, // root parallel id 3140 #endif 3141 #if OMP_40_ENABLED 3142 __kmp_nested_proc_bind.bind_types[0], 3143 #endif 3144 &r_icvs, 3145 0 // argc 3146 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3147 ); 3148 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); 3149 3150 root->r.r_hot_team = hot_team; 3151 root_team->t.t_control_stack_top = NULL; 3152 3153 /* first-time initialization */ 3154 hot_team->t.t_parent = root_team; 3155 3156 /* initialize hot team */ 3157 hot_team_max_nth = hot_team->t.t_max_nproc; 3158 for ( f = 0; f < hot_team_max_nth; ++ f ) { 3159 hot_team->t.t_threads[ f ] = NULL; 3160 }; // for 3161 hot_team->t.t_nproc = 1; 3162 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3163 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3164 hot_team->t.t_sched.chunk = r_sched.chunk; 3165 hot_team->t.t_size_changed = 0; 3166 } 3167 3168 #ifdef KMP_DEBUG 3169 3170 3171 typedef struct kmp_team_list_item { 3172 kmp_team_p const * entry; 3173 struct kmp_team_list_item * next; 3174 } kmp_team_list_item_t; 3175 typedef kmp_team_list_item_t * kmp_team_list_t; 3176 3177 3178 static void 3179 __kmp_print_structure_team_accum( // Add team to list of teams. 3180 kmp_team_list_t list, // List of teams. 3181 kmp_team_p const * team // Team to add. 3182 ) { 3183 3184 // List must terminate with item where both entry and next are NULL. 3185 // Team is added to the list only once. 3186 // List is sorted in ascending order by team id. 3187 // Team id is *not* a key. 3188 3189 kmp_team_list_t l; 3190 3191 KMP_DEBUG_ASSERT( list != NULL ); 3192 if ( team == NULL ) { 3193 return; 3194 }; // if 3195 3196 __kmp_print_structure_team_accum( list, team->t.t_parent ); 3197 __kmp_print_structure_team_accum( list, team->t.t_next_pool ); 3198 3199 // Search list for the team. 3200 l = list; 3201 while ( l->next != NULL && l->entry != team ) { 3202 l = l->next; 3203 }; // while 3204 if ( l->next != NULL ) { 3205 return; // Team has been added before, exit. 3206 }; // if 3207 3208 // Team is not found. Search list again for insertion point. 3209 l = list; 3210 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) { 3211 l = l->next; 3212 }; // while 3213 3214 // Insert team. 3215 { 3216 kmp_team_list_item_t * item = 3217 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); 3218 * item = * l; 3219 l->entry = team; 3220 l->next = item; 3221 } 3222 3223 } 3224 3225 static void 3226 __kmp_print_structure_team( 3227 char const * title, 3228 kmp_team_p const * team 3229 3230 ) { 3231 __kmp_printf( "%s", title ); 3232 if ( team != NULL ) { 3233 __kmp_printf( "%2x %p\n", team->t.t_id, team ); 3234 } else { 3235 __kmp_printf( " - (nil)\n" ); 3236 }; // if 3237 } 3238 3239 static void 3240 __kmp_print_structure_thread( 3241 char const * title, 3242 kmp_info_p const * thread 3243 3244 ) { 3245 __kmp_printf( "%s", title ); 3246 if ( thread != NULL ) { 3247 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread ); 3248 } else { 3249 __kmp_printf( " - (nil)\n" ); 3250 }; // if 3251 } 3252 3253 void 3254 __kmp_print_structure( 3255 void 3256 ) { 3257 3258 kmp_team_list_t list; 3259 3260 // Initialize list of teams. 3261 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); 3262 list->entry = NULL; 3263 list->next = NULL; 3264 3265 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" ); 3266 { 3267 int gtid; 3268 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 3269 __kmp_printf( "%2d", gtid ); 3270 if ( __kmp_threads != NULL ) { 3271 __kmp_printf( " %p", __kmp_threads[ gtid ] ); 3272 }; // if 3273 if ( __kmp_root != NULL ) { 3274 __kmp_printf( " %p", __kmp_root[ gtid ] ); 3275 }; // if 3276 __kmp_printf( "\n" ); 3277 }; // for gtid 3278 } 3279 3280 // Print out __kmp_threads array. 3281 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" ); 3282 if ( __kmp_threads != NULL ) { 3283 int gtid; 3284 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 3285 kmp_info_t const * thread = __kmp_threads[ gtid ]; 3286 if ( thread != NULL ) { 3287 __kmp_printf( "GTID %2d %p:\n", gtid, thread ); 3288 __kmp_printf( " Our Root: %p\n", thread->th.th_root ); 3289 __kmp_print_structure_team( " Our Team: ", thread->th.th_team ); 3290 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team ); 3291 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc ); 3292 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master ); 3293 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized ); 3294 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc ); 3295 #if OMP_40_ENABLED 3296 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind ); 3297 #endif 3298 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool ); 3299 __kmp_printf( "\n" ); 3300 __kmp_print_structure_team_accum( list, thread->th.th_team ); 3301 __kmp_print_structure_team_accum( list, thread->th.th_serial_team ); 3302 }; // if 3303 }; // for gtid 3304 } else { 3305 __kmp_printf( "Threads array is not allocated.\n" ); 3306 }; // if 3307 3308 // Print out __kmp_root array. 3309 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" ); 3310 if ( __kmp_root != NULL ) { 3311 int gtid; 3312 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 3313 kmp_root_t const * root = __kmp_root[ gtid ]; 3314 if ( root != NULL ) { 3315 __kmp_printf( "GTID %2d %p:\n", gtid, root ); 3316 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team ); 3317 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team ); 3318 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread ); 3319 __kmp_printf( " Active?: %2d\n", root->r.r_active ); 3320 __kmp_printf( " Nested?: %2d\n", root->r.r_nested ); 3321 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel ); 3322 __kmp_printf( "\n" ); 3323 __kmp_print_structure_team_accum( list, root->r.r_root_team ); 3324 __kmp_print_structure_team_accum( list, root->r.r_hot_team ); 3325 }; // if 3326 }; // for gtid 3327 } else { 3328 __kmp_printf( "Ubers array is not allocated.\n" ); 3329 }; // if 3330 3331 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" ); 3332 while ( list->next != NULL ) { 3333 kmp_team_p const * team = list->entry; 3334 int i; 3335 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team ); 3336 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent ); 3337 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid ); 3338 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc ); 3339 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized ); 3340 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc ); 3341 for ( i = 0; i < team->t.t_nproc; ++ i ) { 3342 __kmp_printf( " Thread %2d: ", i ); 3343 __kmp_print_structure_thread( "", team->t.t_threads[ i ] ); 3344 }; // for i 3345 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool ); 3346 __kmp_printf( "\n" ); 3347 list = list->next; 3348 }; // while 3349 3350 // Print out __kmp_thread_pool and __kmp_team_pool. 3351 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" ); 3352 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool ); 3353 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool ); 3354 __kmp_printf( "\n" ); 3355 3356 // Free team list. 3357 while ( list != NULL ) { 3358 kmp_team_list_item_t * item = list; 3359 list = list->next; 3360 KMP_INTERNAL_FREE( item ); 3361 }; // while 3362 3363 } 3364 3365 #endif 3366 3367 3368 //--------------------------------------------------------------------------- 3369 // Stuff for per-thread fast random number generator 3370 // Table of primes 3371 3372 static const unsigned __kmp_primes[] = { 3373 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 3374 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b, 3375 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 3376 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 3377 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801, 3378 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 3379 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 3380 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b, 3381 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 3382 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 3383 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7, 3384 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 3385 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 3386 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b, 3387 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 3388 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f 3389 }; 3390 3391 //--------------------------------------------------------------------------- 3392 // __kmp_get_random: Get a random number using a linear congruential method. 3393 3394 unsigned short 3395 __kmp_get_random( kmp_info_t * thread ) 3396 { 3397 unsigned x = thread->th.th_x; 3398 unsigned short r = x>>16; 3399 3400 thread->th.th_x = x*thread->th.th_a+1; 3401 3402 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 3403 thread->th.th_info.ds.ds_tid, r) ); 3404 3405 return r; 3406 } 3407 //-------------------------------------------------------- 3408 // __kmp_init_random: Initialize a random number generator 3409 3410 void 3411 __kmp_init_random( kmp_info_t * thread ) 3412 { 3413 unsigned seed = thread->th.th_info.ds.ds_tid; 3414 3415 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))]; 3416 thread->th.th_x = (seed+1)*thread->th.th_a+1; 3417 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) ); 3418 } 3419 3420 3421 #if KMP_OS_WINDOWS 3422 /* reclaim array entries for root threads that are already dead, returns number reclaimed */ 3423 static int 3424 __kmp_reclaim_dead_roots(void) { 3425 int i, r = 0; 3426 3427 for(i = 0; i < __kmp_threads_capacity; ++i) { 3428 if( KMP_UBER_GTID( i ) && 3429 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 3430 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state 3431 r += __kmp_unregister_root_other_thread(i); 3432 } 3433 } 3434 return r; 3435 } 3436 #endif 3437 3438 /* 3439 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of 3440 free entries generated. 3441 3442 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are 3443 already dead. 3444 3445 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate 3446 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to 3447 __kmp_tp_capacity, if threadprivate cache array has been created. 3448 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 3449 3450 After any dead root reclamation, if the clipping value allows array expansion to result in the generation 3451 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows 3452 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion. 3453 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero, 3454 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create 3455 as many free slots as possible up to nWish. 3456 3457 If any argument is negative, the behavior is undefined. 3458 */ 3459 static int 3460 __kmp_expand_threads(int nWish, int nNeed) { 3461 int added = 0; 3462 int old_tp_cached; 3463 int __kmp_actual_max_nth; 3464 3465 if(nNeed > nWish) /* normalize the arguments */ 3466 nWish = nNeed; 3467 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3468 /* only for Windows static library */ 3469 /* reclaim array entries for root threads that are already dead */ 3470 added = __kmp_reclaim_dead_roots(); 3471 3472 if(nNeed) { 3473 nNeed -= added; 3474 if(nNeed < 0) 3475 nNeed = 0; 3476 } 3477 if(nWish) { 3478 nWish -= added; 3479 if(nWish < 0) 3480 nWish = 0; 3481 } 3482 #endif 3483 if(nWish <= 0) 3484 return added; 3485 3486 while(1) { 3487 int nTarget; 3488 int minimumRequiredCapacity; 3489 int newCapacity; 3490 kmp_info_t **newThreads; 3491 kmp_root_t **newRoot; 3492 3493 // 3494 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. 3495 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth 3496 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may 3497 // become > __kmp_max_nth in one of two ways: 3498 // 3499 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 3500 // may not be resused by another thread, so we may need to increase 3501 // __kmp_threads_capacity to __kmp_max_threads + 1. 3502 // 3503 // 2) New foreign root(s) are encountered. We always register new 3504 // foreign roots. This may cause a smaller # of threads to be 3505 // allocated at subsequent parallel regions, but the worker threads 3506 // hang around (and eventually go to sleep) and need slots in the 3507 // __kmp_threads[] array. 3508 // 3509 // Anyway, that is the reason for moving the check to see if 3510 // __kmp_max_threads was exceeded into __kmp_reseerve_threads() 3511 // instead of having it performed here. -BB 3512 // 3513 old_tp_cached = __kmp_tp_cached; 3514 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; 3515 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); 3516 3517 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */ 3518 nTarget = nWish; 3519 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3520 /* can't fulfil nWish, so try nNeed */ 3521 if(nNeed) { 3522 nTarget = nNeed; 3523 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3524 /* possible expansion too small -- give up */ 3525 break; 3526 } 3527 } else { 3528 /* best-effort */ 3529 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; 3530 if(!nTarget) { 3531 /* can expand at all -- give up */ 3532 break; 3533 } 3534 } 3535 } 3536 minimumRequiredCapacity = __kmp_threads_capacity + nTarget; 3537 3538 newCapacity = __kmp_threads_capacity; 3539 do{ 3540 newCapacity = 3541 newCapacity <= (__kmp_actual_max_nth >> 1) ? 3542 (newCapacity << 1) : 3543 __kmp_actual_max_nth; 3544 } while(newCapacity < minimumRequiredCapacity); 3545 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE); 3546 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity ); 3547 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*)); 3548 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*)); 3549 memset(newThreads + __kmp_threads_capacity, 0, 3550 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*)); 3551 memset(newRoot + __kmp_threads_capacity, 0, 3552 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*)); 3553 3554 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3555 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache 3556 while we were allocating the expanded array, and our new capacity is larger than the threadprivate 3557 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check 3558 of a double-check pair. 3559 */ 3560 __kmp_free(newThreads); 3561 continue; /* start over and try again */ 3562 } 3563 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 3564 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3565 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */ 3566 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3567 __kmp_free(newThreads); 3568 continue; /* start over and try again */ 3569 } else { 3570 /* success */ 3571 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. 3572 // 3573 *(kmp_info_t**volatile*)&__kmp_threads = newThreads; 3574 *(kmp_root_t**volatile*)&__kmp_root = newRoot; 3575 added += newCapacity - __kmp_threads_capacity; 3576 *(volatile int*)&__kmp_threads_capacity = newCapacity; 3577 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3578 break; /* succeeded, so we can exit the loop */ 3579 } 3580 } 3581 return added; 3582 } 3583 3584 /* register the current thread as a root thread and obtain our gtid */ 3585 /* we must have the __kmp_initz_lock held at this point */ 3586 /* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */ 3587 int 3588 __kmp_register_root( int initial_thread ) 3589 { 3590 kmp_info_t *root_thread; 3591 kmp_root_t *root; 3592 int gtid; 3593 int capacity; 3594 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 3595 KA_TRACE( 20, ("__kmp_register_root: entered\n")); 3596 KMP_MB(); 3597 3598 3599 /* 3600 2007-03-02: 3601 3602 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one, 3603 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may 3604 return false (that means there is at least one empty slot in __kmp_threads array), but it 3605 is possible the only free slot is #0, which is reserved for initial thread and so cannot be 3606 used for this one. Following code workarounds this bug. 3607 3608 However, right solution seems to be not reserving slot #0 for initial thread because: 3609 (1) there is no magic in slot #0, 3610 (2) we cannot detect initial thread reliably (the first thread which does serial 3611 initialization may be not a real initial thread). 3612 */ 3613 capacity = __kmp_threads_capacity; 3614 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) { 3615 -- capacity; 3616 }; // if 3617 3618 /* see if there are too many threads */ 3619 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) { 3620 if ( __kmp_tp_cached ) { 3621 __kmp_msg( 3622 kmp_ms_fatal, 3623 KMP_MSG( CantRegisterNewThread ), 3624 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), 3625 KMP_HNT( PossibleSystemLimitOnThreads ), 3626 __kmp_msg_null 3627 ); 3628 } 3629 else { 3630 __kmp_msg( 3631 kmp_ms_fatal, 3632 KMP_MSG( CantRegisterNewThread ), 3633 KMP_HNT( SystemLimitOnThreads ), 3634 __kmp_msg_null 3635 ); 3636 } 3637 }; // if 3638 3639 /* find an available thread slot */ 3640 /* Don't reassign the zero slot since we need that to only be used by initial 3641 thread */ 3642 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ ) 3643 ; 3644 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid )); 3645 KMP_ASSERT( gtid < __kmp_threads_capacity ); 3646 3647 /* update global accounting */ 3648 __kmp_all_nth ++; 3649 TCW_4(__kmp_nth, __kmp_nth + 1); 3650 3651 // 3652 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) 3653 // for low numbers of procs, and method #2 (keyed API call) for higher 3654 // numbers of procs. 3655 // 3656 if ( __kmp_adjust_gtid_mode ) { 3657 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { 3658 if ( TCR_4(__kmp_gtid_mode) != 2) { 3659 TCW_4(__kmp_gtid_mode, 2); 3660 } 3661 } 3662 else { 3663 if (TCR_4(__kmp_gtid_mode) != 1 ) { 3664 TCW_4(__kmp_gtid_mode, 1); 3665 } 3666 } 3667 } 3668 3669 #ifdef KMP_ADJUST_BLOCKTIME 3670 /* Adjust blocktime to zero if necessary */ 3671 /* Middle initialization might not have occurred yet */ 3672 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 3673 if ( __kmp_nth > __kmp_avail_proc ) { 3674 __kmp_zero_bt = TRUE; 3675 } 3676 } 3677 #endif /* KMP_ADJUST_BLOCKTIME */ 3678 3679 /* setup this new hierarchy */ 3680 if( ! ( root = __kmp_root[gtid] )) { 3681 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) ); 3682 KMP_DEBUG_ASSERT( ! root->r.r_root_team ); 3683 } 3684 3685 #if KMP_STATS_ENABLED 3686 // Initialize stats as soon as possible (right after gtid assignment). 3687 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 3688 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); 3689 KMP_SET_THREAD_STATE(SERIAL_REGION); 3690 KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 3691 #endif 3692 __kmp_initialize_root( root ); 3693 3694 /* setup new root thread structure */ 3695 if( root->r.r_uber_thread ) { 3696 root_thread = root->r.r_uber_thread; 3697 } else { 3698 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); 3699 if ( __kmp_storage_map ) { 3700 __kmp_print_thread_storage_map( root_thread, gtid ); 3701 } 3702 root_thread->th.th_info .ds.ds_gtid = gtid; 3703 root_thread->th.th_root = root; 3704 if( __kmp_env_consistency_check ) { 3705 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid ); 3706 } 3707 #if USE_FAST_MEMORY 3708 __kmp_initialize_fast_memory( root_thread ); 3709 #endif /* USE_FAST_MEMORY */ 3710 3711 #if KMP_USE_BGET 3712 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL ); 3713 __kmp_initialize_bget( root_thread ); 3714 #endif 3715 __kmp_init_random( root_thread ); // Initialize random number generator 3716 } 3717 3718 /* setup the serial team held in reserve by the root thread */ 3719 if( ! root_thread->th.th_serial_team ) { 3720 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3721 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) ); 3722 3723 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1, 3724 #if OMPT_SUPPORT 3725 0, // root parallel id 3726 #endif 3727 #if OMP_40_ENABLED 3728 proc_bind_default, 3729 #endif 3730 &r_icvs, 3731 0 USE_NESTED_HOT_ARG(NULL) ); 3732 } 3733 KMP_ASSERT( root_thread->th.th_serial_team ); 3734 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n", 3735 root_thread->th.th_serial_team ) ); 3736 3737 /* drop root_thread into place */ 3738 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 3739 3740 root->r.r_root_team->t.t_threads[0] = root_thread; 3741 root->r.r_hot_team ->t.t_threads[0] = root_thread; 3742 root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 3743 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). 3744 root->r.r_uber_thread = root_thread; 3745 3746 /* initialize the thread, get it ready to go */ 3747 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid ); 3748 TCW_4(__kmp_init_gtid, TRUE); 3749 3750 /* prepare the master thread for get_gtid() */ 3751 __kmp_gtid_set_specific( gtid ); 3752 3753 #if USE_ITT_BUILD 3754 __kmp_itt_thread_name( gtid ); 3755 #endif /* USE_ITT_BUILD */ 3756 3757 #ifdef KMP_TDATA_GTID 3758 __kmp_gtid = gtid; 3759 #endif 3760 __kmp_create_worker( gtid, root_thread, __kmp_stksize ); 3761 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid ); 3762 3763 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n", 3764 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ), 3765 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 3766 KMP_INIT_BARRIER_STATE ) ); 3767 { // Initialize barrier data. 3768 int b; 3769 for ( b = 0; b < bs_last_barrier; ++ b ) { 3770 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE; 3771 #if USE_DEBUGGER 3772 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0; 3773 #endif 3774 }; // for 3775 } 3776 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE ); 3777 3778 #if KMP_AFFINITY_SUPPORTED 3779 # if OMP_40_ENABLED 3780 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 3781 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 3782 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 3783 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 3784 # endif 3785 3786 if ( TCR_4(__kmp_init_middle) ) { 3787 __kmp_affinity_set_init_mask( gtid, TRUE ); 3788 } 3789 #endif /* KMP_AFFINITY_SUPPORTED */ 3790 3791 __kmp_root_counter ++; 3792 3793 KMP_MB(); 3794 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 3795 3796 return gtid; 3797 } 3798 3799 #if KMP_NESTED_HOT_TEAMS 3800 static int 3801 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level ) 3802 { 3803 int i, n, nth; 3804 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 3805 if( !hot_teams || !hot_teams[level].hot_team ) { 3806 return 0; 3807 } 3808 KMP_DEBUG_ASSERT( level < max_level ); 3809 kmp_team_t *team = hot_teams[level].hot_team; 3810 nth = hot_teams[level].hot_team_nth; 3811 n = nth - 1; // master is not freed 3812 if( level < max_level - 1 ) { 3813 for( i = 0; i < nth; ++i ) { 3814 kmp_info_t *th = team->t.t_threads[i]; 3815 n += __kmp_free_hot_teams( root, th, level + 1, max_level ); 3816 if( i > 0 && th->th.th_hot_teams ) { 3817 __kmp_free( th->th.th_hot_teams ); 3818 th->th.th_hot_teams = NULL; 3819 } 3820 } 3821 } 3822 __kmp_free_team( root, team, NULL ); 3823 return n; 3824 } 3825 #endif 3826 3827 /* Resets a root thread and clear its root and hot teams. 3828 Returns the number of __kmp_threads entries directly and indirectly freed. 3829 */ 3830 static int 3831 __kmp_reset_root(int gtid, kmp_root_t *root) 3832 { 3833 kmp_team_t * root_team = root->r.r_root_team; 3834 kmp_team_t * hot_team = root->r.r_hot_team; 3835 int n = hot_team->t.t_nproc; 3836 int i; 3837 3838 KMP_DEBUG_ASSERT( ! root->r.r_active ); 3839 3840 root->r.r_root_team = NULL; 3841 root->r.r_hot_team = NULL; 3842 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call 3843 // to __kmp_free_team(). 3844 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) ); 3845 #if KMP_NESTED_HOT_TEAMS 3846 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any 3847 for( i = 0; i < hot_team->t.t_nproc; ++i ) { 3848 kmp_info_t *th = hot_team->t.t_threads[i]; 3849 if( __kmp_hot_teams_max_level > 1 ) { 3850 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level ); 3851 } 3852 if( th->th.th_hot_teams ) { 3853 __kmp_free( th->th.th_hot_teams ); 3854 th->th.th_hot_teams = NULL; 3855 } 3856 } 3857 } 3858 #endif 3859 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) ); 3860 3861 // 3862 // Before we can reap the thread, we need to make certain that all 3863 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks. 3864 // 3865 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 3866 __kmp_wait_to_unref_task_teams(); 3867 } 3868 3869 #if KMP_OS_WINDOWS 3870 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 3871 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", 3872 (LPVOID)&(root->r.r_uber_thread->th), 3873 root->r.r_uber_thread->th.th_info.ds.ds_thread ) ); 3874 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread ); 3875 #endif /* KMP_OS_WINDOWS */ 3876 3877 #if OMPT_SUPPORT 3878 if (ompt_enabled && 3879 ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 3880 int gtid = __kmp_get_gtid(); 3881 __ompt_thread_end(ompt_thread_initial, gtid); 3882 } 3883 #endif 3884 3885 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 3886 __kmp_reap_thread( root->r.r_uber_thread, 1 ); 3887 3888 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing. 3889 root->r.r_uber_thread = NULL; 3890 /* mark root as no longer in use */ 3891 root->r.r_begin = FALSE; 3892 3893 return n; 3894 } 3895 3896 void 3897 __kmp_unregister_root_current_thread( int gtid ) 3898 { 3899 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid )); 3900 /* this lock should be ok, since unregister_root_current_thread is never called during 3901 * and abort, only during a normal close. furthermore, if you have the 3902 * forkjoin lock, you should never try to get the initz lock */ 3903 3904 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 3905 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 3906 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid )); 3907 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 3908 return; 3909 } 3910 kmp_root_t *root = __kmp_root[gtid]; 3911 3912 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); 3913 KMP_ASSERT( KMP_UBER_GTID( gtid )); 3914 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); 3915 KMP_ASSERT( root->r.r_active == FALSE ); 3916 3917 3918 KMP_MB(); 3919 3920 #if OMP_45_ENABLED 3921 kmp_info_t * thread = __kmp_threads[gtid]; 3922 kmp_team_t * team = thread->th.th_team; 3923 kmp_task_team_t * task_team = thread->th.th_task_team; 3924 3925 // we need to wait for the proxy tasks before finishing the thread 3926 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) { 3927 #if OMPT_SUPPORT 3928 // the runtime is shutting down so we won't report any events 3929 thread->th.ompt_thread_info.state = ompt_state_undefined; 3930 #endif 3931 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 3932 } 3933 #endif 3934 3935 __kmp_reset_root(gtid, root); 3936 3937 /* free up this thread slot */ 3938 __kmp_gtid_set_specific( KMP_GTID_DNE ); 3939 #ifdef KMP_TDATA_GTID 3940 __kmp_gtid = KMP_GTID_DNE; 3941 #endif 3942 3943 KMP_MB(); 3944 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid )); 3945 3946 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 3947 } 3948 3949 #if KMP_OS_WINDOWS 3950 /* __kmp_forkjoin_lock must be already held 3951 Unregisters a root thread that is not the current thread. Returns the number of 3952 __kmp_threads entries freed as a result. 3953 */ 3954 static int 3955 __kmp_unregister_root_other_thread( int gtid ) 3956 { 3957 kmp_root_t *root = __kmp_root[gtid]; 3958 int r; 3959 3960 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid )); 3961 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); 3962 KMP_ASSERT( KMP_UBER_GTID( gtid )); 3963 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); 3964 KMP_ASSERT( root->r.r_active == FALSE ); 3965 3966 r = __kmp_reset_root(gtid, root); 3967 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid )); 3968 return r; 3969 } 3970 #endif 3971 3972 #if KMP_DEBUG 3973 void __kmp_task_info() { 3974 3975 kmp_int32 gtid = __kmp_entry_gtid(); 3976 kmp_int32 tid = __kmp_tid_from_gtid( gtid ); 3977 kmp_info_t *this_thr = __kmp_threads[ gtid ]; 3978 kmp_team_t *steam = this_thr->th.th_serial_team; 3979 kmp_team_t *team = this_thr->th.th_team; 3980 3981 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n", 3982 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent ); 3983 } 3984 #endif // KMP_DEBUG 3985 3986 /* TODO optimize with one big memclr, take out what isn't needed, 3987 * split responsibility to workers as much as possible, and delay 3988 * initialization of features as much as possible */ 3989 static void 3990 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) 3991 { 3992 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker 3993 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 3994 kmp_info_t *master = team->t.t_threads[0]; 3995 KMP_DEBUG_ASSERT( this_thr != NULL ); 3996 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team ); 3997 KMP_DEBUG_ASSERT( team ); 3998 KMP_DEBUG_ASSERT( team->t.t_threads ); 3999 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 4000 KMP_DEBUG_ASSERT( master ); 4001 KMP_DEBUG_ASSERT( master->th.th_root ); 4002 4003 KMP_MB(); 4004 4005 TCW_SYNC_PTR(this_thr->th.th_team, team); 4006 4007 this_thr->th.th_info.ds.ds_tid = tid; 4008 this_thr->th.th_set_nproc = 0; 4009 if (__kmp_tasking_mode != tskm_immediate_exec) 4010 // When tasking is possible, threads are not safe to reap until they are 4011 // done tasking; this will be set when tasking code is exited in wait 4012 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 4013 else // no tasking --> always safe to reap 4014 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 4015 #if OMP_40_ENABLED 4016 this_thr->th.th_set_proc_bind = proc_bind_default; 4017 # if KMP_AFFINITY_SUPPORTED 4018 this_thr->th.th_new_place = this_thr->th.th_current_place; 4019 # endif 4020 #endif 4021 this_thr->th.th_root = master->th.th_root; 4022 4023 /* setup the thread's cache of the team structure */ 4024 this_thr->th.th_team_nproc = team->t.t_nproc; 4025 this_thr->th.th_team_master = master; 4026 this_thr->th.th_team_serialized = team->t.t_serialized; 4027 TCW_PTR(this_thr->th.th_sleep_loc, NULL); 4028 4029 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); 4030 4031 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 4032 tid, gtid, this_thr, this_thr->th.th_current_task ) ); 4033 4034 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE ); 4035 4036 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 4037 tid, gtid, this_thr, this_thr->th.th_current_task ) ); 4038 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()? 4039 4040 /* TODO no worksharing in speculative threads */ 4041 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ]; 4042 4043 this_thr->th.th_local.this_construct = 0; 4044 4045 #ifdef BUILD_TV 4046 this_thr->th.th_local.tv_data = 0; 4047 #endif 4048 4049 if ( ! this_thr->th.th_pri_common ) { 4050 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) ); 4051 if ( __kmp_storage_map ) { 4052 __kmp_print_storage_map_gtid( 4053 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 4054 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid 4055 ); 4056 }; // if 4057 this_thr->th.th_pri_head = NULL; 4058 }; // if 4059 4060 /* Initialize dynamic dispatch */ 4061 { 4062 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 4063 /* 4064 * Use team max_nproc since this will never change for the team. 4065 */ 4066 size_t disp_size = sizeof( dispatch_private_info_t ) * 4067 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ); 4068 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) ); 4069 KMP_ASSERT( dispatch ); 4070 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 4071 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] ); 4072 4073 dispatch->th_disp_index = 0; 4074 #if OMP_45_ENABLED 4075 dispatch->th_doacross_buf_idx = 0; 4076 #endif 4077 if( ! dispatch->th_disp_buffer ) { 4078 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size ); 4079 4080 if ( __kmp_storage_map ) { 4081 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ], 4082 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ], 4083 disp_size, "th_%d.th_dispatch.th_disp_buffer " 4084 "(team_%d.t_dispatch[%d].th_disp_buffer)", 4085 gtid, team->t.t_id, gtid ); 4086 } 4087 } else { 4088 memset( & dispatch->th_disp_buffer[0], '\0', disp_size ); 4089 } 4090 4091 dispatch->th_dispatch_pr_current = 0; 4092 dispatch->th_dispatch_sh_current = 0; 4093 4094 dispatch->th_deo_fcn = 0; /* ORDERED */ 4095 dispatch->th_dxo_fcn = 0; /* END ORDERED */ 4096 } 4097 4098 this_thr->th.th_next_pool = NULL; 4099 4100 if (!this_thr->th.th_task_state_memo_stack) { 4101 size_t i; 4102 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); 4103 this_thr->th.th_task_state_top = 0; 4104 this_thr->th.th_task_state_stack_sz = 4; 4105 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack 4106 this_thr->th.th_task_state_memo_stack[i] = 0; 4107 } 4108 4109 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 4110 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 4111 4112 KMP_MB(); 4113 } 4114 4115 4116 /* allocate a new thread for the requesting team. this is only called from within a 4117 * forkjoin critical section. we will first try to get an available thread from the 4118 * thread pool. if none is available, we will fork a new one assuming we are able 4119 * to create a new one. this should be assured, as the caller should check on this 4120 * first. 4121 */ 4122 kmp_info_t * 4123 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) 4124 { 4125 kmp_team_t *serial_team; 4126 kmp_info_t *new_thr; 4127 int new_gtid; 4128 4129 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() )); 4130 KMP_DEBUG_ASSERT( root && team ); 4131 #if !KMP_NESTED_HOT_TEAMS 4132 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() )); 4133 #endif 4134 KMP_MB(); 4135 4136 /* first, try to get one from the thread pool */ 4137 if ( __kmp_thread_pool ) { 4138 4139 new_thr = (kmp_info_t*)__kmp_thread_pool; 4140 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool; 4141 if ( new_thr == __kmp_thread_pool_insert_pt ) { 4142 __kmp_thread_pool_insert_pt = NULL; 4143 } 4144 TCW_4(new_thr->th.th_in_pool, FALSE); 4145 // 4146 // Don't touch th_active_in_pool or th_active. 4147 // The worker thread adjusts those flags as it sleeps/awakens. 4148 // 4149 __kmp_thread_pool_nth--; 4150 4151 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 4152 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid )); 4153 KMP_ASSERT( ! new_thr->th.th_team ); 4154 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity ); 4155 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 ); 4156 4157 /* setup the thread structure */ 4158 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid ); 4159 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team ); 4160 4161 TCW_4(__kmp_nth, __kmp_nth + 1); 4162 4163 new_thr->th.th_task_state = 0; 4164 new_thr->th.th_task_state_top = 0; 4165 new_thr->th.th_task_state_stack_sz = 4; 4166 4167 #ifdef KMP_ADJUST_BLOCKTIME 4168 /* Adjust blocktime back to zero if necessar y */ 4169 /* Middle initialization might not have occurred yet */ 4170 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 4171 if ( __kmp_nth > __kmp_avail_proc ) { 4172 __kmp_zero_bt = TRUE; 4173 } 4174 } 4175 #endif /* KMP_ADJUST_BLOCKTIME */ 4176 4177 #if KMP_DEBUG 4178 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG. 4179 int b; 4180 kmp_balign_t * balign = new_thr->th.th_bar; 4181 for( b = 0; b < bs_last_barrier; ++ b ) 4182 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4183 #endif 4184 4185 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 4186 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid )); 4187 4188 KMP_MB(); 4189 return new_thr; 4190 } 4191 4192 4193 /* no, well fork a new one */ 4194 KMP_ASSERT( __kmp_nth == __kmp_all_nth ); 4195 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity ); 4196 4197 #if KMP_USE_MONITOR 4198 // 4199 // If this is the first worker thread the RTL is creating, then also 4200 // launch the monitor thread. We try to do this as early as possible. 4201 // 4202 if ( ! TCR_4( __kmp_init_monitor ) ) { 4203 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 4204 if ( ! TCR_4( __kmp_init_monitor ) ) { 4205 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) ); 4206 TCW_4( __kmp_init_monitor, 1 ); 4207 __kmp_create_monitor( & __kmp_monitor ); 4208 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); 4209 #if KMP_OS_WINDOWS 4210 // AC: wait until monitor has started. This is a fix for CQ232808. 4211 // The reason is that if the library is loaded/unloaded in a loop with small (parallel) 4212 // work in between, then there is high probability that monitor thread started after 4213 // the library shutdown. At shutdown it is too late to cope with the problem, because 4214 // when the master is in DllMain (process detach) the monitor has no chances to start 4215 // (it is blocked), and master has no means to inform the monitor that the library has gone, 4216 // because all the memory which the monitor can access is going to be released/reset. 4217 while ( TCR_4(__kmp_init_monitor) < 2 ) { 4218 KMP_YIELD( TRUE ); 4219 } 4220 KF_TRACE( 10, ( "after monitor thread has started\n" ) ); 4221 #endif 4222 } 4223 __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 4224 } 4225 #endif 4226 4227 KMP_MB(); 4228 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) { 4229 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity ); 4230 } 4231 4232 /* allocate space for it. */ 4233 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); 4234 4235 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 4236 4237 if ( __kmp_storage_map ) { 4238 __kmp_print_thread_storage_map( new_thr, new_gtid ); 4239 } 4240 4241 /* add the reserve serialized team, initialized from the team's master thread */ 4242 { 4243 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team ); 4244 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) ); 4245 4246 new_thr->th.th_serial_team = serial_team = 4247 (kmp_team_t*) __kmp_allocate_team( root, 1, 1, 4248 #if OMPT_SUPPORT 4249 0, // root parallel id 4250 #endif 4251 #if OMP_40_ENABLED 4252 proc_bind_default, 4253 #endif 4254 &r_icvs, 4255 0 USE_NESTED_HOT_ARG(NULL) ); 4256 } 4257 KMP_ASSERT ( serial_team ); 4258 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). 4259 serial_team->t.t_threads[0] = new_thr; 4260 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 4261 new_thr ) ); 4262 4263 /* setup the thread structures */ 4264 __kmp_initialize_info( new_thr, team, new_tid, new_gtid ); 4265 4266 #if USE_FAST_MEMORY 4267 __kmp_initialize_fast_memory( new_thr ); 4268 #endif /* USE_FAST_MEMORY */ 4269 4270 #if KMP_USE_BGET 4271 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL ); 4272 __kmp_initialize_bget( new_thr ); 4273 #endif 4274 4275 __kmp_init_random( new_thr ); // Initialize random number generator 4276 4277 /* Initialize these only once when thread is grabbed for a team allocation */ 4278 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 4279 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 4280 4281 int b; 4282 kmp_balign_t * balign = new_thr->th.th_bar; 4283 for(b=0; b<bs_last_barrier; ++b) { 4284 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 4285 balign[b].bb.team = NULL; 4286 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 4287 balign[b].bb.use_oncore_barrier = 0; 4288 } 4289 4290 new_thr->th.th_spin_here = FALSE; 4291 new_thr->th.th_next_waiting = 0; 4292 4293 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4294 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 4295 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 4296 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 4297 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 4298 #endif 4299 4300 TCW_4(new_thr->th.th_in_pool, FALSE); 4301 new_thr->th.th_active_in_pool = FALSE; 4302 TCW_4(new_thr->th.th_active, TRUE); 4303 4304 /* adjust the global counters */ 4305 __kmp_all_nth ++; 4306 __kmp_nth ++; 4307 4308 // 4309 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) 4310 // for low numbers of procs, and method #2 (keyed API call) for higher 4311 // numbers of procs. 4312 // 4313 if ( __kmp_adjust_gtid_mode ) { 4314 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { 4315 if ( TCR_4(__kmp_gtid_mode) != 2) { 4316 TCW_4(__kmp_gtid_mode, 2); 4317 } 4318 } 4319 else { 4320 if (TCR_4(__kmp_gtid_mode) != 1 ) { 4321 TCW_4(__kmp_gtid_mode, 1); 4322 } 4323 } 4324 } 4325 4326 #ifdef KMP_ADJUST_BLOCKTIME 4327 /* Adjust blocktime back to zero if necessary */ 4328 /* Middle initialization might not have occurred yet */ 4329 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 4330 if ( __kmp_nth > __kmp_avail_proc ) { 4331 __kmp_zero_bt = TRUE; 4332 } 4333 } 4334 #endif /* KMP_ADJUST_BLOCKTIME */ 4335 4336 /* actually fork it and create the new worker thread */ 4337 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr )); 4338 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize ); 4339 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr )); 4340 4341 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid )); 4342 KMP_MB(); 4343 return new_thr; 4344 } 4345 4346 /* 4347 * reinitialize team for reuse. 4348 * 4349 * The hot team code calls this case at every fork barrier, so EPCC barrier 4350 * test are extremely sensitive to changes in it, esp. writes to the team 4351 * struct, which cause a cache invalidation in all threads. 4352 * 4353 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! 4354 */ 4355 static void 4356 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) { 4357 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 4358 team->t.t_threads[0], team ) ); 4359 KMP_DEBUG_ASSERT( team && new_icvs); 4360 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); 4361 KMP_CHECK_UPDATE(team->t.t_ident, loc); 4362 4363 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4364 4365 // Copy ICVs to the master thread's implicit taskdata 4366 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); 4367 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 4368 4369 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 4370 team->t.t_threads[0], team ) ); 4371 } 4372 4373 4374 /* initialize the team data structure 4375 * this assumes the t_threads and t_max_nproc are already set 4376 * also, we don't touch the arguments */ 4377 static void 4378 __kmp_initialize_team( 4379 kmp_team_t * team, 4380 int new_nproc, 4381 kmp_internal_control_t * new_icvs, 4382 ident_t * loc 4383 ) { 4384 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); 4385 4386 /* verify */ 4387 KMP_DEBUG_ASSERT( team ); 4388 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); 4389 KMP_DEBUG_ASSERT( team->t.t_threads ); 4390 KMP_MB(); 4391 4392 team->t.t_master_tid = 0; /* not needed */ 4393 /* team->t.t_master_bar; not needed */ 4394 team->t.t_serialized = new_nproc > 1 ? 0 : 1; 4395 team->t.t_nproc = new_nproc; 4396 4397 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 4398 team->t.t_next_pool = NULL; 4399 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */ 4400 4401 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 4402 team->t.t_invoke = NULL; /* not needed */ 4403 4404 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4405 team->t.t_sched = new_icvs->sched; 4406 4407 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4408 team->t.t_fp_control_saved = FALSE; /* not needed */ 4409 team->t.t_x87_fpu_control_word = 0; /* not needed */ 4410 team->t.t_mxcsr = 0; /* not needed */ 4411 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 4412 4413 team->t.t_construct = 0; 4414 __kmp_init_lock( & team->t.t_single_lock ); 4415 4416 team->t.t_ordered .dt.t_value = 0; 4417 team->t.t_master_active = FALSE; 4418 4419 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t )); 4420 4421 #ifdef KMP_DEBUG 4422 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 4423 #endif 4424 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 4425 4426 team->t.t_control_stack_top = NULL; 4427 4428 __kmp_reinitialize_team( team, new_icvs, loc ); 4429 4430 KMP_MB(); 4431 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); 4432 } 4433 4434 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4435 /* Sets full mask for thread and returns old mask, no changes to structures. */ 4436 static void 4437 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask ) 4438 { 4439 if ( KMP_AFFINITY_CAPABLE() ) { 4440 int status; 4441 if ( old_mask != NULL ) { 4442 status = __kmp_get_system_affinity( old_mask, TRUE ); 4443 int error = errno; 4444 if ( status != 0 ) { 4445 __kmp_msg( 4446 kmp_ms_fatal, 4447 KMP_MSG( ChangeThreadAffMaskError ), 4448 KMP_ERR( error ), 4449 __kmp_msg_null 4450 ); 4451 } 4452 } 4453 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE ); 4454 } 4455 } 4456 #endif 4457 4458 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4459 4460 // 4461 // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4462 // It calculats the worker + master thread's partition based upon the parent 4463 // thread's partition, and binds each worker to a thread in their partition. 4464 // The master thread's partition should already include its current binding. 4465 // 4466 static void 4467 __kmp_partition_places( kmp_team_t *team, int update_master_only ) 4468 { 4469 // 4470 // Copy the master thread's place partion to the team struct 4471 // 4472 kmp_info_t *master_th = team->t.t_threads[0]; 4473 KMP_DEBUG_ASSERT( master_th != NULL ); 4474 kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 4475 int first_place = master_th->th.th_first_place; 4476 int last_place = master_th->th.th_last_place; 4477 int masters_place = master_th->th.th_current_place; 4478 team->t.t_first_place = first_place; 4479 team->t.t_last_place = last_place; 4480 4481 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n", 4482 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id, 4483 masters_place, first_place, last_place ) ); 4484 4485 switch ( proc_bind ) { 4486 4487 case proc_bind_default: 4488 // 4489 // serial teams might have the proc_bind policy set to 4490 // proc_bind_default. It doesn't matter, as we don't 4491 // rebind the master thread for any proc_bind policy. 4492 // 4493 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 ); 4494 break; 4495 4496 case proc_bind_master: 4497 { 4498 int f; 4499 int n_th = team->t.t_nproc; 4500 for ( f = 1; f < n_th; f++ ) { 4501 kmp_info_t *th = team->t.t_threads[f]; 4502 KMP_DEBUG_ASSERT( th != NULL ); 4503 th->th.th_first_place = first_place; 4504 th->th.th_last_place = last_place; 4505 th->th.th_new_place = masters_place; 4506 4507 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4508 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4509 team->t.t_id, f, masters_place, first_place, last_place ) ); 4510 } 4511 } 4512 break; 4513 4514 case proc_bind_close: 4515 { 4516 int f; 4517 int n_th = team->t.t_nproc; 4518 int n_places; 4519 if ( first_place <= last_place ) { 4520 n_places = last_place - first_place + 1; 4521 } 4522 else { 4523 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4524 } 4525 if ( n_th <= n_places ) { 4526 int place = masters_place; 4527 for ( f = 1; f < n_th; f++ ) { 4528 kmp_info_t *th = team->t.t_threads[f]; 4529 KMP_DEBUG_ASSERT( th != NULL ); 4530 4531 if ( place == last_place ) { 4532 place = first_place; 4533 } 4534 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4535 place = 0; 4536 } 4537 else { 4538 place++; 4539 } 4540 th->th.th_first_place = first_place; 4541 th->th.th_last_place = last_place; 4542 th->th.th_new_place = place; 4543 4544 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4545 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4546 team->t.t_id, f, place, first_place, last_place ) ); 4547 } 4548 } 4549 else { 4550 int S, rem, gap, s_count; 4551 S = n_th / n_places; 4552 s_count = 0; 4553 rem = n_th - ( S * n_places ); 4554 gap = rem > 0 ? n_places/rem : n_places; 4555 int place = masters_place; 4556 int gap_ct = gap; 4557 for ( f = 0; f < n_th; f++ ) { 4558 kmp_info_t *th = team->t.t_threads[f]; 4559 KMP_DEBUG_ASSERT( th != NULL ); 4560 4561 th->th.th_first_place = first_place; 4562 th->th.th_last_place = last_place; 4563 th->th.th_new_place = place; 4564 s_count++; 4565 4566 if ( (s_count == S) && rem && (gap_ct == gap) ) { 4567 // do nothing, add an extra thread to place on next iteration 4568 } 4569 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { 4570 // we added an extra thread to this place; move to next place 4571 if ( place == last_place ) { 4572 place = first_place; 4573 } 4574 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4575 place = 0; 4576 } 4577 else { 4578 place++; 4579 } 4580 s_count = 0; 4581 gap_ct = 1; 4582 rem--; 4583 } 4584 else if (s_count == S) { // place full; don't add extra 4585 if ( place == last_place ) { 4586 place = first_place; 4587 } 4588 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4589 place = 0; 4590 } 4591 else { 4592 place++; 4593 } 4594 gap_ct++; 4595 s_count = 0; 4596 } 4597 4598 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4599 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4600 team->t.t_id, f, th->th.th_new_place, first_place, 4601 last_place ) ); 4602 } 4603 KMP_DEBUG_ASSERT( place == masters_place ); 4604 } 4605 } 4606 break; 4607 4608 case proc_bind_spread: 4609 { 4610 int f; 4611 int n_th = team->t.t_nproc; 4612 int n_places; 4613 int thidx; 4614 if ( first_place <= last_place ) { 4615 n_places = last_place - first_place + 1; 4616 } 4617 else { 4618 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4619 } 4620 if ( n_th <= n_places ) { 4621 int place = masters_place; 4622 int S = n_places/n_th; 4623 int s_count, rem, gap, gap_ct; 4624 rem = n_places - n_th*S; 4625 gap = rem ? n_th/rem : 1; 4626 gap_ct = gap; 4627 thidx = n_th; 4628 if (update_master_only == 1) 4629 thidx = 1; 4630 for ( f = 0; f < thidx; f++ ) { 4631 kmp_info_t *th = team->t.t_threads[f]; 4632 KMP_DEBUG_ASSERT( th != NULL ); 4633 4634 th->th.th_first_place = place; 4635 th->th.th_new_place = place; 4636 s_count = 1; 4637 while (s_count < S) { 4638 if ( place == last_place ) { 4639 place = first_place; 4640 } 4641 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4642 place = 0; 4643 } 4644 else { 4645 place++; 4646 } 4647 s_count++; 4648 } 4649 if (rem && (gap_ct == gap)) { 4650 if ( place == last_place ) { 4651 place = first_place; 4652 } 4653 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4654 place = 0; 4655 } 4656 else { 4657 place++; 4658 } 4659 rem--; 4660 gap_ct = 0; 4661 } 4662 th->th.th_last_place = place; 4663 gap_ct++; 4664 4665 if ( place == last_place ) { 4666 place = first_place; 4667 } 4668 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4669 place = 0; 4670 } 4671 else { 4672 place++; 4673 } 4674 4675 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4676 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4677 team->t.t_id, f, th->th.th_new_place, 4678 th->th.th_first_place, th->th.th_last_place ) ); 4679 } 4680 KMP_DEBUG_ASSERT( update_master_only || place == masters_place ); 4681 } 4682 else { 4683 int S, rem, gap, s_count; 4684 S = n_th / n_places; 4685 s_count = 0; 4686 rem = n_th - ( S * n_places ); 4687 gap = rem > 0 ? n_places/rem : n_places; 4688 int place = masters_place; 4689 int gap_ct = gap; 4690 thidx = n_th; 4691 if (update_master_only == 1) 4692 thidx = 1; 4693 for ( f = 0; f < thidx; f++ ) { 4694 kmp_info_t *th = team->t.t_threads[f]; 4695 KMP_DEBUG_ASSERT( th != NULL ); 4696 4697 th->th.th_first_place = place; 4698 th->th.th_last_place = place; 4699 th->th.th_new_place = place; 4700 s_count++; 4701 4702 if ( (s_count == S) && rem && (gap_ct == gap) ) { 4703 // do nothing, add an extra thread to place on next iteration 4704 } 4705 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { 4706 // we added an extra thread to this place; move on to next place 4707 if ( place == last_place ) { 4708 place = first_place; 4709 } 4710 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4711 place = 0; 4712 } 4713 else { 4714 place++; 4715 } 4716 s_count = 0; 4717 gap_ct = 1; 4718 rem--; 4719 } 4720 else if (s_count == S) { // place is full; don't add extra thread 4721 if ( place == last_place ) { 4722 place = first_place; 4723 } 4724 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4725 place = 0; 4726 } 4727 else { 4728 place++; 4729 } 4730 gap_ct++; 4731 s_count = 0; 4732 } 4733 4734 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4735 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4736 team->t.t_id, f, th->th.th_new_place, 4737 th->th.th_first_place, th->th.th_last_place) ); 4738 } 4739 KMP_DEBUG_ASSERT( update_master_only || place == masters_place ); 4740 } 4741 } 4742 break; 4743 4744 default: 4745 break; 4746 } 4747 4748 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) ); 4749 } 4750 4751 #endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ 4752 4753 /* allocate a new team data structure to use. take one off of the free pool if available */ 4754 kmp_team_t * 4755 __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 4756 #if OMPT_SUPPORT 4757 ompt_parallel_id_t ompt_parallel_id, 4758 #endif 4759 #if OMP_40_ENABLED 4760 kmp_proc_bind_t new_proc_bind, 4761 #endif 4762 kmp_internal_control_t *new_icvs, 4763 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) 4764 { 4765 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 4766 int f; 4767 kmp_team_t *team; 4768 int use_hot_team = ! root->r.r_active; 4769 int level = 0; 4770 4771 KA_TRACE( 20, ("__kmp_allocate_team: called\n")); 4772 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); 4773 KMP_DEBUG_ASSERT( max_nproc >= new_nproc ); 4774 KMP_MB(); 4775 4776 #if KMP_NESTED_HOT_TEAMS 4777 kmp_hot_team_ptr_t *hot_teams; 4778 if( master ) { 4779 team = master->th.th_team; 4780 level = team->t.t_active_level; 4781 if( master->th.th_teams_microtask ) { // in teams construct? 4782 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1 4783 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams 4784 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams 4785 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise 4786 } 4787 } 4788 hot_teams = master->th.th_hot_teams; 4789 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team ) 4790 { // hot team has already been allocated for given level 4791 use_hot_team = 1; 4792 } else { 4793 use_hot_team = 0; 4794 } 4795 } 4796 #endif 4797 // Optimization to use a "hot" team 4798 if( use_hot_team && new_nproc > 1 ) { 4799 KMP_DEBUG_ASSERT( new_nproc == max_nproc ); 4800 #if KMP_NESTED_HOT_TEAMS 4801 team = hot_teams[level].hot_team; 4802 #else 4803 team = root->r.r_hot_team; 4804 #endif 4805 #if KMP_DEBUG 4806 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 4807 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", 4808 team->t.t_task_team[0], team->t.t_task_team[1] )); 4809 } 4810 #endif 4811 4812 // Has the number of threads changed? 4813 /* Let's assume the most common case is that the number of threads is unchanged, and 4814 put that case first. */ 4815 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 4816 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); 4817 // This case can mean that omp_set_num_threads() was called and the hot team size 4818 // was already reduced, so we check the special flag 4819 if ( team->t.t_size_changed == -1 ) { 4820 team->t.t_size_changed = 1; 4821 } else { 4822 KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 4823 } 4824 4825 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4826 kmp_r_sched_t new_sched = new_icvs->sched; 4827 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || 4828 team->t.t_sched.chunk != new_sched.chunk) 4829 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule 4830 4831 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); 4832 4833 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 4834 0, team->t.t_threads[0], team ) ); 4835 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); 4836 4837 #if OMP_40_ENABLED 4838 # if KMP_AFFINITY_SUPPORTED 4839 if ( ( team->t.t_size_changed == 0 ) 4840 && ( team->t.t_proc_bind == new_proc_bind ) ) { 4841 if (new_proc_bind == proc_bind_spread) { 4842 __kmp_partition_places(team, 1); // add flag to update only master for spread 4843 } 4844 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n", 4845 team->t.t_id, new_proc_bind, team->t.t_first_place, 4846 team->t.t_last_place ) ); 4847 } 4848 else { 4849 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4850 __kmp_partition_places( team ); 4851 } 4852 # else 4853 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4854 # endif /* KMP_AFFINITY_SUPPORTED */ 4855 #endif /* OMP_40_ENABLED */ 4856 } 4857 else if( team->t.t_nproc > new_nproc ) { 4858 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); 4859 4860 team->t.t_size_changed = 1; 4861 #if KMP_NESTED_HOT_TEAMS 4862 if( __kmp_hot_teams_mode == 0 ) { 4863 // AC: saved number of threads should correspond to team's value in this mode, 4864 // can be bigger in mode 1, when hot team has some threads in reserve 4865 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 4866 hot_teams[level].hot_team_nth = new_nproc; 4867 #endif // KMP_NESTED_HOT_TEAMS 4868 /* release the extra threads we don't need any more */ 4869 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) { 4870 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); 4871 if ( __kmp_tasking_mode != tskm_immediate_exec) { 4872 // When decreasing team size, threads no longer in the team should unref task team. 4873 team->t.t_threads[f]->th.th_task_team = NULL; 4874 } 4875 __kmp_free_thread( team->t.t_threads[ f ] ); 4876 team->t.t_threads[ f ] = NULL; 4877 } 4878 #if KMP_NESTED_HOT_TEAMS 4879 } // (__kmp_hot_teams_mode == 0) 4880 else { 4881 // When keeping extra threads in team, switch threads to wait on own b_go flag 4882 for (f=new_nproc; f<team->t.t_nproc; ++f) { 4883 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 4884 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 4885 for (int b=0; b<bs_last_barrier; ++b) { 4886 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 4887 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 4888 } 4889 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 4890 } 4891 } 4892 } 4893 #endif // KMP_NESTED_HOT_TEAMS 4894 team->t.t_nproc = new_nproc; 4895 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4896 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type || 4897 team->t.t_sched.chunk != new_icvs->sched.chunk) 4898 team->t.t_sched = new_icvs->sched; 4899 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); 4900 4901 /* update the remaining threads */ 4902 for(f = 0; f < new_nproc; ++f) { 4903 team->t.t_threads[f]->th.th_team_nproc = new_nproc; 4904 } 4905 // restore the current task state of the master thread: should be the implicit task 4906 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 4907 0, team->t.t_threads[0], team ) ); 4908 4909 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); 4910 4911 #ifdef KMP_DEBUG 4912 for ( f = 0; f < team->t.t_nproc; f++ ) { 4913 KMP_DEBUG_ASSERT( team->t.t_threads[f] && 4914 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 4915 } 4916 #endif 4917 4918 #if OMP_40_ENABLED 4919 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4920 # if KMP_AFFINITY_SUPPORTED 4921 __kmp_partition_places( team ); 4922 # endif 4923 #endif 4924 } 4925 else { // team->t.t_nproc < new_nproc 4926 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4927 kmp_affin_mask_t *old_mask; 4928 if ( KMP_AFFINITY_CAPABLE() ) { 4929 KMP_CPU_ALLOC(old_mask); 4930 } 4931 #endif 4932 4933 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc )); 4934 4935 team->t.t_size_changed = 1; 4936 4937 #if KMP_NESTED_HOT_TEAMS 4938 int avail_threads = hot_teams[level].hot_team_nth; 4939 if( new_nproc < avail_threads ) 4940 avail_threads = new_nproc; 4941 kmp_info_t **other_threads = team->t.t_threads; 4942 for ( f = team->t.t_nproc; f < avail_threads; ++f ) { 4943 // Adjust barrier data of reserved threads (if any) of the team 4944 // Other data will be set in __kmp_initialize_info() below. 4945 int b; 4946 kmp_balign_t * balign = other_threads[f]->th.th_bar; 4947 for ( b = 0; b < bs_last_barrier; ++ b ) { 4948 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 4949 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4950 #if USE_DEBUGGER 4951 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 4952 #endif 4953 } 4954 } 4955 if( hot_teams[level].hot_team_nth >= new_nproc ) { 4956 // we have all needed threads in reserve, no need to allocate any 4957 // this only possible in mode 1, cannot have reserved threads in mode 0 4958 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 4959 team->t.t_nproc = new_nproc; // just get reserved threads involved 4960 } else { 4961 // we may have some threads in reserve, but not enough 4962 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any 4963 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 4964 #endif // KMP_NESTED_HOT_TEAMS 4965 if(team->t.t_max_nproc < new_nproc) { 4966 /* reallocate larger arrays */ 4967 __kmp_reallocate_team_arrays(team, new_nproc); 4968 __kmp_reinitialize_team( team, new_icvs, NULL ); 4969 } 4970 4971 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4972 /* Temporarily set full mask for master thread before 4973 creation of workers. The reason is that workers inherit 4974 the affinity from master, so if a lot of workers are 4975 created on the single core quickly, they don't get 4976 a chance to set their own affinity for a long time. 4977 */ 4978 __kmp_set_thread_affinity_mask_full_tmp( old_mask ); 4979 #endif 4980 4981 /* allocate new threads for the hot team */ 4982 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) { 4983 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f ); 4984 KMP_DEBUG_ASSERT( new_worker ); 4985 team->t.t_threads[ f ] = new_worker; 4986 4987 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n", 4988 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f, 4989 team->t.t_bar[bs_forkjoin_barrier].b_arrived, 4990 team->t.t_bar[bs_plain_barrier].b_arrived ) ); 4991 4992 { // Initialize barrier data for new threads. 4993 int b; 4994 kmp_balign_t * balign = new_worker->th.th_bar; 4995 for( b = 0; b < bs_last_barrier; ++ b ) { 4996 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 4997 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4998 #if USE_DEBUGGER 4999 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 5000 #endif 5001 } 5002 } 5003 } 5004 5005 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5006 if ( KMP_AFFINITY_CAPABLE() ) { 5007 /* Restore initial master thread's affinity mask */ 5008 __kmp_set_system_affinity( old_mask, TRUE ); 5009 KMP_CPU_FREE(old_mask); 5010 } 5011 #endif 5012 #if KMP_NESTED_HOT_TEAMS 5013 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 5014 #endif // KMP_NESTED_HOT_TEAMS 5015 /* make sure everyone is syncronized */ 5016 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below 5017 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); 5018 5019 /* reinitialize the threads */ 5020 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 5021 for (f=0; f < team->t.t_nproc; ++f) 5022 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); 5023 if (level) { // set th_task_state for new threads in nested hot team 5024 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the 5025 // th_task_state for the new threads. th_task_state for master thread will not be accurate until 5026 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. 5027 for (f=old_nproc; f < team->t.t_nproc; ++f) 5028 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 5029 } 5030 else { // set th_task_state for new threads in non-nested hot team 5031 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state 5032 for (f=old_nproc; f < team->t.t_nproc; ++f) 5033 team->t.t_threads[f]->th.th_task_state = old_state; 5034 } 5035 5036 #ifdef KMP_DEBUG 5037 for ( f = 0; f < team->t.t_nproc; ++ f ) { 5038 KMP_DEBUG_ASSERT( team->t.t_threads[f] && 5039 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 5040 } 5041 #endif 5042 5043 #if OMP_40_ENABLED 5044 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 5045 # if KMP_AFFINITY_SUPPORTED 5046 __kmp_partition_places( team ); 5047 # endif 5048 #endif 5049 } // Check changes in number of threads 5050 5051 #if OMP_40_ENABLED 5052 kmp_info_t *master = team->t.t_threads[0]; 5053 if( master->th.th_teams_microtask ) { 5054 for( f = 1; f < new_nproc; ++f ) { 5055 // propagate teams construct specific info to workers 5056 kmp_info_t *thr = team->t.t_threads[f]; 5057 thr->th.th_teams_microtask = master->th.th_teams_microtask; 5058 thr->th.th_teams_level = master->th.th_teams_level; 5059 thr->th.th_teams_size = master->th.th_teams_size; 5060 } 5061 } 5062 #endif /* OMP_40_ENABLED */ 5063 #if KMP_NESTED_HOT_TEAMS 5064 if( level ) { 5065 // Sync barrier state for nested hot teams, not needed for outermost hot team. 5066 for( f = 1; f < new_nproc; ++f ) { 5067 kmp_info_t *thr = team->t.t_threads[f]; 5068 int b; 5069 kmp_balign_t * balign = thr->th.th_bar; 5070 for( b = 0; b < bs_last_barrier; ++ b ) { 5071 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 5072 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 5073 #if USE_DEBUGGER 5074 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 5075 #endif 5076 } 5077 } 5078 } 5079 #endif // KMP_NESTED_HOT_TEAMS 5080 5081 /* reallocate space for arguments if necessary */ 5082 __kmp_alloc_argv_entries( argc, team, TRUE ); 5083 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5084 // 5085 // The hot team re-uses the previous task team, 5086 // if untouched during the previous release->gather phase. 5087 // 5088 5089 KF_TRACE( 10, ( " hot_team = %p\n", team ) ); 5090 5091 #if KMP_DEBUG 5092 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 5093 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", 5094 team->t.t_task_team[0], team->t.t_task_team[1] )); 5095 } 5096 #endif 5097 5098 #if OMPT_SUPPORT 5099 __ompt_team_assign_id(team, ompt_parallel_id); 5100 #endif 5101 5102 KMP_MB(); 5103 5104 return team; 5105 } 5106 5107 /* next, let's try to take one from the team pool */ 5108 KMP_MB(); 5109 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; ) 5110 { 5111 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */ 5112 if ( team->t.t_max_nproc >= max_nproc ) { 5113 /* take this team from the team pool */ 5114 __kmp_team_pool = team->t.t_next_pool; 5115 5116 /* setup the team for fresh use */ 5117 __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); 5118 5119 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", 5120 &team->t.t_task_team[0], &team->t.t_task_team[1]) ); 5121 team->t.t_task_team[0] = NULL; 5122 team->t.t_task_team[1] = NULL; 5123 5124 /* reallocate space for arguments if necessary */ 5125 __kmp_alloc_argv_entries( argc, team, TRUE ); 5126 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5127 5128 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5129 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 5130 { // Initialize barrier data. 5131 int b; 5132 for ( b = 0; b < bs_last_barrier; ++ b) { 5133 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; 5134 #if USE_DEBUGGER 5135 team->t.t_bar[ b ].b_master_arrived = 0; 5136 team->t.t_bar[ b ].b_team_arrived = 0; 5137 #endif 5138 } 5139 } 5140 5141 #if OMP_40_ENABLED 5142 team->t.t_proc_bind = new_proc_bind; 5143 #endif 5144 5145 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id )); 5146 5147 #if OMPT_SUPPORT 5148 __ompt_team_assign_id(team, ompt_parallel_id); 5149 #endif 5150 5151 KMP_MB(); 5152 5153 return team; 5154 } 5155 5156 /* reap team if it is too small, then loop back and check the next one */ 5157 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */ 5158 /* TODO: Use technique to find the right size hot-team, don't reap them */ 5159 team = __kmp_reap_team( team ); 5160 __kmp_team_pool = team; 5161 } 5162 5163 /* nothing available in the pool, no matter, make a new team! */ 5164 KMP_MB(); 5165 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) ); 5166 5167 /* and set it up */ 5168 team->t.t_max_nproc = max_nproc; 5169 /* NOTE well, for some reason allocating one big buffer and dividing it 5170 * up seems to really hurt performance a lot on the P4, so, let's not use 5171 * this... */ 5172 __kmp_allocate_team_arrays( team, max_nproc ); 5173 5174 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); 5175 __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); 5176 5177 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", 5178 &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); 5179 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate 5180 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate 5181 5182 if ( __kmp_storage_map ) { 5183 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); 5184 } 5185 5186 /* allocate space for arguments */ 5187 __kmp_alloc_argv_entries( argc, team, FALSE ); 5188 team->t.t_argc = argc; 5189 5190 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5191 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 5192 { // Initialize barrier data. 5193 int b; 5194 for ( b = 0; b < bs_last_barrier; ++ b ) { 5195 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; 5196 #if USE_DEBUGGER 5197 team->t.t_bar[ b ].b_master_arrived = 0; 5198 team->t.t_bar[ b ].b_team_arrived = 0; 5199 #endif 5200 } 5201 } 5202 5203 #if OMP_40_ENABLED 5204 team->t.t_proc_bind = new_proc_bind; 5205 #endif 5206 5207 #if OMPT_SUPPORT 5208 __ompt_team_assign_id(team, ompt_parallel_id); 5209 team->t.ompt_serialized_team_info = NULL; 5210 #endif 5211 5212 KMP_MB(); 5213 5214 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id )); 5215 5216 return team; 5217 } 5218 5219 /* TODO implement hot-teams at all levels */ 5220 /* TODO implement lazy thread release on demand (disband request) */ 5221 5222 /* free the team. return it to the team pool. release all the threads 5223 * associated with it */ 5224 void 5225 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) ) 5226 { 5227 int f; 5228 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id )); 5229 5230 /* verify state */ 5231 KMP_DEBUG_ASSERT( root ); 5232 KMP_DEBUG_ASSERT( team ); 5233 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc ); 5234 KMP_DEBUG_ASSERT( team->t.t_threads ); 5235 5236 int use_hot_team = team == root->r.r_hot_team; 5237 #if KMP_NESTED_HOT_TEAMS 5238 int level; 5239 kmp_hot_team_ptr_t *hot_teams; 5240 if( master ) { 5241 level = team->t.t_active_level - 1; 5242 if( master->th.th_teams_microtask ) { // in teams construct? 5243 if( master->th.th_teams_size.nteams > 1 ) { 5244 ++level; // level was not increased in teams construct for team_of_masters 5245 } 5246 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && 5247 master->th.th_teams_level == team->t.t_level ) { 5248 ++level; // level was not increased in teams construct for team_of_workers before the parallel 5249 } // team->t.t_level will be increased inside parallel 5250 } 5251 hot_teams = master->th.th_hot_teams; 5252 if( level < __kmp_hot_teams_max_level ) { 5253 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team ); 5254 use_hot_team = 1; 5255 } 5256 } 5257 #endif // KMP_NESTED_HOT_TEAMS 5258 5259 /* team is done working */ 5260 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library. 5261 team->t.t_copyin_counter = 0; // init counter for possible reuse 5262 // Do not reset pointer to parent team to NULL for hot teams. 5263 5264 /* if we are non-hot team, release our threads */ 5265 if( ! use_hot_team ) { 5266 if (__kmp_tasking_mode != tskm_immediate_exec) { 5267 // Wait for threads to reach reapable state 5268 for (f = 1; f < team->t.t_nproc; ++f) { 5269 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5270 kmp_info_t *th = team->t.t_threads[f]; 5271 volatile kmp_uint32 *state = &th->th.th_reap_state; 5272 while (*state != KMP_SAFE_TO_REAP) { 5273 #if KMP_OS_WINDOWS 5274 // On Windows a thread can be killed at any time, check this 5275 DWORD ecode; 5276 if (!__kmp_is_thread_alive(th, &ecode)) { 5277 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 5278 break; 5279 } 5280 #endif 5281 // first check if thread is sleeping 5282 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); 5283 if (fl.is_sleeping()) 5284 fl.resume(__kmp_gtid_from_thread(th)); 5285 KMP_CPU_PAUSE(); 5286 } 5287 } 5288 5289 // Delete task teams 5290 int tt_idx; 5291 for (tt_idx=0; tt_idx<2; ++tt_idx) { 5292 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 5293 if ( task_team != NULL ) { 5294 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams 5295 team->t.t_threads[f]->th.th_task_team = NULL; 5296 } 5297 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); 5298 #if KMP_NESTED_HOT_TEAMS 5299 __kmp_free_task_team( master, task_team ); 5300 #endif 5301 team->t.t_task_team[tt_idx] = NULL; 5302 } 5303 } 5304 } 5305 5306 // Reset pointer to parent team only for non-hot teams. 5307 team->t.t_parent = NULL; 5308 team->t.t_level = 0; 5309 team->t.t_active_level = 0; 5310 5311 /* free the worker threads */ 5312 for ( f = 1; f < team->t.t_nproc; ++ f ) { 5313 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); 5314 __kmp_free_thread( team->t.t_threads[ f ] ); 5315 team->t.t_threads[ f ] = NULL; 5316 } 5317 5318 /* put the team back in the team pool */ 5319 /* TODO limit size of team pool, call reap_team if pool too large */ 5320 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool; 5321 __kmp_team_pool = (volatile kmp_team_t*) team; 5322 } 5323 5324 KMP_MB(); 5325 } 5326 5327 5328 /* reap the team. destroy it, reclaim all its resources and free its memory */ 5329 kmp_team_t * 5330 __kmp_reap_team( kmp_team_t *team ) 5331 { 5332 kmp_team_t *next_pool = team->t.t_next_pool; 5333 5334 KMP_DEBUG_ASSERT( team ); 5335 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 5336 KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); 5337 KMP_DEBUG_ASSERT( team->t.t_threads ); 5338 KMP_DEBUG_ASSERT( team->t.t_argv ); 5339 5340 /* TODO clean the threads that are a part of this? */ 5341 5342 /* free stuff */ 5343 5344 __kmp_free_team_arrays( team ); 5345 if ( team->t.t_argv != &team->t.t_inline_argv[0] ) 5346 __kmp_free( (void*) team->t.t_argv ); 5347 __kmp_free( team ); 5348 5349 KMP_MB(); 5350 return next_pool; 5351 } 5352 5353 // 5354 // Free the thread. Don't reap it, just place it on the pool of available 5355 // threads. 5356 // 5357 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 5358 // binding for the affinity mechanism to be useful. 5359 // 5360 // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 5361 // However, we want to avoid a potential performance problem by always 5362 // scanning through the list to find the correct point at which to insert 5363 // the thread (potential N**2 behavior). To do this we keep track of the 5364 // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 5365 // With single-level parallelism, threads will always be added to the tail 5366 // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 5367 // parallelism, all bets are off and we may need to scan through the entire 5368 // free list. 5369 // 5370 // This change also has a potentially large performance benefit, for some 5371 // applications. Previously, as threads were freed from the hot team, they 5372 // would be placed back on the free list in inverse order. If the hot team 5373 // grew back to it's original size, then the freed thread would be placed 5374 // back on the hot team in reverse order. This could cause bad cache 5375 // locality problems on programs where the size of the hot team regularly 5376 // grew and shrunk. 5377 // 5378 // Now, for single-level parallelism, the OMP tid is alway == gtid. 5379 // 5380 void 5381 __kmp_free_thread( kmp_info_t *this_th ) 5382 { 5383 int gtid; 5384 kmp_info_t **scan; 5385 5386 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 5387 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid )); 5388 5389 KMP_DEBUG_ASSERT( this_th ); 5390 5391 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team). 5392 int b; 5393 kmp_balign_t *balign = this_th->th.th_bar; 5394 for (b=0; b<bs_last_barrier; ++b) { 5395 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 5396 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 5397 balign[b].bb.team = NULL; 5398 balign[b].bb.leaf_kids = 0; 5399 } 5400 this_th->th.th_task_state = 0; 5401 5402 /* put thread back on the free pool */ 5403 TCW_PTR(this_th->th.th_team, NULL); 5404 TCW_PTR(this_th->th.th_root, NULL); 5405 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 5406 5407 // 5408 // If the __kmp_thread_pool_insert_pt is already past the new insert 5409 // point, then we need to re-scan the entire list. 5410 // 5411 gtid = this_th->th.th_info.ds.ds_gtid; 5412 if ( __kmp_thread_pool_insert_pt != NULL ) { 5413 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL ); 5414 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) { 5415 __kmp_thread_pool_insert_pt = NULL; 5416 } 5417 } 5418 5419 // 5420 // Scan down the list to find the place to insert the thread. 5421 // scan is the address of a link in the list, possibly the address of 5422 // __kmp_thread_pool itself. 5423 // 5424 // In the absence of nested parallism, the for loop will have 0 iterations. 5425 // 5426 if ( __kmp_thread_pool_insert_pt != NULL ) { 5427 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool ); 5428 } 5429 else { 5430 scan = (kmp_info_t **)&__kmp_thread_pool; 5431 } 5432 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid ); 5433 scan = &( (*scan)->th.th_next_pool ) ); 5434 5435 // 5436 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 5437 // to its address. 5438 // 5439 TCW_PTR(this_th->th.th_next_pool, *scan); 5440 __kmp_thread_pool_insert_pt = *scan = this_th; 5441 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL ) 5442 || ( this_th->th.th_info.ds.ds_gtid 5443 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) ); 5444 TCW_4(this_th->th.th_in_pool, TRUE); 5445 __kmp_thread_pool_nth++; 5446 5447 TCW_4(__kmp_nth, __kmp_nth - 1); 5448 5449 #ifdef KMP_ADJUST_BLOCKTIME 5450 /* Adjust blocktime back to user setting or default if necessary */ 5451 /* Middle initialization might never have occurred */ 5452 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 5453 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 5454 if ( __kmp_nth <= __kmp_avail_proc ) { 5455 __kmp_zero_bt = FALSE; 5456 } 5457 } 5458 #endif /* KMP_ADJUST_BLOCKTIME */ 5459 5460 KMP_MB(); 5461 } 5462 5463 5464 /* ------------------------------------------------------------------------ */ 5465 5466 void * 5467 __kmp_launch_thread( kmp_info_t *this_thr ) 5468 { 5469 int gtid = this_thr->th.th_info.ds.ds_gtid; 5470 /* void *stack_data;*/ 5471 kmp_team_t *(*volatile pteam); 5472 5473 KMP_MB(); 5474 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) ); 5475 5476 if( __kmp_env_consistency_check ) { 5477 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak? 5478 } 5479 5480 #if OMPT_SUPPORT 5481 if (ompt_enabled) { 5482 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5483 this_thr->th.ompt_thread_info.wait_id = 0; 5484 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); 5485 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { 5486 __ompt_thread_begin(ompt_thread_worker, gtid); 5487 } 5488 } 5489 #endif 5490 5491 /* This is the place where threads wait for work */ 5492 while( ! TCR_4(__kmp_global.g.g_done) ) { 5493 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] ); 5494 KMP_MB(); 5495 5496 /* wait for work to do */ 5497 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid )); 5498 5499 #if OMPT_SUPPORT 5500 if (ompt_enabled) { 5501 this_thr->th.ompt_thread_info.state = ompt_state_idle; 5502 } 5503 #endif 5504 5505 /* No tid yet since not part of a team */ 5506 __kmp_fork_barrier( gtid, KMP_GTID_DNE ); 5507 5508 #if OMPT_SUPPORT 5509 if (ompt_enabled) { 5510 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5511 } 5512 #endif 5513 5514 pteam = (kmp_team_t *(*))(& this_thr->th.th_team); 5515 5516 /* have we been allocated? */ 5517 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) { 5518 #if OMPT_SUPPORT 5519 ompt_task_info_t *task_info; 5520 ompt_parallel_id_t my_parallel_id; 5521 if (ompt_enabled) { 5522 task_info = __ompt_get_taskinfo(0); 5523 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; 5524 } 5525 #endif 5526 /* we were just woken up, so run our new task */ 5527 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) { 5528 int rc; 5529 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 5530 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); 5531 5532 updateHWFPControl (*pteam); 5533 5534 #if OMPT_SUPPORT 5535 if (ompt_enabled) { 5536 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 5537 // Initialize OMPT task id for implicit task. 5538 int tid = __kmp_tid_from_gtid(gtid); 5539 task_info->task_id = __ompt_task_id_new(tid); 5540 } 5541 #endif 5542 5543 { 5544 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 5545 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 5546 rc = (*pteam)->t.t_invoke( gtid ); 5547 } 5548 KMP_ASSERT( rc ); 5549 5550 #if OMPT_SUPPORT 5551 if (ompt_enabled) { 5552 /* no frame set while outside task */ 5553 task_info->frame.exit_runtime_frame = NULL; 5554 5555 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5556 } 5557 #endif 5558 KMP_MB(); 5559 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 5560 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); 5561 } 5562 /* join barrier after parallel region */ 5563 __kmp_join_barrier( gtid ); 5564 #if OMPT_SUPPORT && OMPT_TRACE 5565 if (ompt_enabled) { 5566 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 5567 // don't access *pteam here: it may have already been freed 5568 // by the master thread behind the barrier (possible race) 5569 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 5570 my_parallel_id, task_info->task_id); 5571 } 5572 task_info->frame.exit_runtime_frame = NULL; 5573 task_info->task_id = 0; 5574 } 5575 #endif 5576 } 5577 } 5578 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 5579 5580 #if OMPT_SUPPORT 5581 if (ompt_enabled && 5582 ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 5583 __ompt_thread_end(ompt_thread_worker, gtid); 5584 } 5585 #endif 5586 5587 this_thr->th.th_task_team = NULL; 5588 /* run the destructors for the threadprivate data for this thread */ 5589 __kmp_common_destroy_gtid( gtid ); 5590 5591 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) ); 5592 KMP_MB(); 5593 return this_thr; 5594 } 5595 5596 /* ------------------------------------------------------------------------ */ 5597 /* ------------------------------------------------------------------------ */ 5598 5599 void 5600 __kmp_internal_end_dest( void *specific_gtid ) 5601 { 5602 #if KMP_COMPILER_ICC 5603 #pragma warning( push ) 5604 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits 5605 #endif 5606 // Make sure no significant bits are lost 5607 int gtid = (kmp_intptr_t)specific_gtid - 1; 5608 #if KMP_COMPILER_ICC 5609 #pragma warning( pop ) 5610 #endif 5611 5612 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 5613 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 5614 * this is because 0 is reserved for the nothing-stored case */ 5615 5616 /* josh: One reason for setting the gtid specific data even when it is being 5617 destroyed by pthread is to allow gtid lookup through thread specific data 5618 (__kmp_gtid_get_specific). Some of the code, especially stat code, 5619 that gets executed in the call to __kmp_internal_end_thread, actually 5620 gets the gtid through the thread specific data. Setting it here seems 5621 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread 5622 to run smoothly. 5623 todo: get rid of this after we remove the dependence on 5624 __kmp_gtid_get_specific 5625 */ 5626 if(gtid >= 0 && KMP_UBER_GTID(gtid)) 5627 __kmp_gtid_set_specific( gtid ); 5628 #ifdef KMP_TDATA_GTID 5629 __kmp_gtid = gtid; 5630 #endif 5631 __kmp_internal_end_thread( gtid ); 5632 } 5633 5634 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5635 5636 // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work 5637 // perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker 5638 // option in makefile.mk works fine. 5639 5640 __attribute__(( destructor )) 5641 void 5642 __kmp_internal_end_dtor( void ) 5643 { 5644 __kmp_internal_end_atexit(); 5645 } 5646 5647 void 5648 __kmp_internal_end_fini( void ) 5649 { 5650 __kmp_internal_end_atexit(); 5651 } 5652 5653 #endif 5654 5655 /* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */ 5656 void 5657 __kmp_internal_end_atexit( void ) 5658 { 5659 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) ); 5660 /* [Windows] 5661 josh: ideally, we want to completely shutdown the library in this atexit handler, but 5662 stat code that depends on thread specific data for gtid fails because that data becomes 5663 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread 5664 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the 5665 stat code and use __kmp_internal_end_library to cleanly shutdown the library. 5666 5667 // TODO: Can some of this comment about GVS be removed? 5668 I suspect that the offending stat code is executed when the calling thread tries to 5669 clean up a dead root thread's data structures, resulting in GVS code trying to close 5670 the GVS structures for that thread, but since the stat code uses 5671 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is 5672 cleaning up itself instead of another thread, it gets confused. This happens because 5673 allowing a thread to unregister and cleanup another thread is a recent modification for 5674 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a 5675 thread may end up trying to unregister another thread only if thread death does not 5676 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread 5677 specific data destructor function to detect thread death. For Windows dynamic, there 5678 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the 5679 workaround is applicable only for Windows static stat library. 5680 */ 5681 __kmp_internal_end_library( -1 ); 5682 #if KMP_OS_WINDOWS 5683 __kmp_close_console(); 5684 #endif 5685 } 5686 5687 static void 5688 __kmp_reap_thread( 5689 kmp_info_t * thread, 5690 int is_root 5691 ) { 5692 5693 // It is assumed __kmp_forkjoin_lock is acquired. 5694 5695 int gtid; 5696 5697 KMP_DEBUG_ASSERT( thread != NULL ); 5698 5699 gtid = thread->th.th_info.ds.ds_gtid; 5700 5701 if ( ! is_root ) { 5702 5703 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { 5704 /* Assume the threads are at the fork barrier here */ 5705 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) ); 5706 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */ 5707 ANNOTATE_HAPPENS_BEFORE(thread); 5708 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread); 5709 __kmp_release_64(&flag); 5710 }; // if 5711 5712 // Terminate OS thread. 5713 __kmp_reap_worker( thread ); 5714 5715 // 5716 // The thread was killed asynchronously. If it was actively 5717 // spinning in the thread pool, decrement the global count. 5718 // 5719 // There is a small timing hole here - if the worker thread was 5720 // just waking up after sleeping in the pool, had reset it's 5721 // th_active_in_pool flag but not decremented the global counter 5722 // __kmp_thread_pool_active_nth yet, then the global counter 5723 // might not get updated. 5724 // 5725 // Currently, this can only happen as the library is unloaded, 5726 // so there are no harmful side effects. 5727 // 5728 if ( thread->th.th_active_in_pool ) { 5729 thread->th.th_active_in_pool = FALSE; 5730 KMP_TEST_THEN_DEC32( 5731 (kmp_int32 *) &__kmp_thread_pool_active_nth ); 5732 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); 5733 } 5734 5735 // Decrement # of [worker] threads in the pool. 5736 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 ); 5737 --__kmp_thread_pool_nth; 5738 }; // if 5739 5740 __kmp_free_implicit_task(thread); 5741 5742 // Free the fast memory for tasking 5743 #if USE_FAST_MEMORY 5744 __kmp_free_fast_memory( thread ); 5745 #endif /* USE_FAST_MEMORY */ 5746 5747 __kmp_suspend_uninitialize_thread( thread ); 5748 5749 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread ); 5750 TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 5751 5752 -- __kmp_all_nth; 5753 // __kmp_nth was decremented when thread is added to the pool. 5754 5755 #ifdef KMP_ADJUST_BLOCKTIME 5756 /* Adjust blocktime back to user setting or default if necessary */ 5757 /* Middle initialization might never have occurred */ 5758 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 5759 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 5760 if ( __kmp_nth <= __kmp_avail_proc ) { 5761 __kmp_zero_bt = FALSE; 5762 } 5763 } 5764 #endif /* KMP_ADJUST_BLOCKTIME */ 5765 5766 /* free the memory being used */ 5767 if( __kmp_env_consistency_check ) { 5768 if ( thread->th.th_cons ) { 5769 __kmp_free_cons_stack( thread->th.th_cons ); 5770 thread->th.th_cons = NULL; 5771 }; // if 5772 } 5773 5774 if ( thread->th.th_pri_common != NULL ) { 5775 __kmp_free( thread->th.th_pri_common ); 5776 thread->th.th_pri_common = NULL; 5777 }; // if 5778 5779 if (thread->th.th_task_state_memo_stack != NULL) { 5780 __kmp_free(thread->th.th_task_state_memo_stack); 5781 thread->th.th_task_state_memo_stack = NULL; 5782 } 5783 5784 #if KMP_USE_BGET 5785 if ( thread->th.th_local.bget_data != NULL ) { 5786 __kmp_finalize_bget( thread ); 5787 }; // if 5788 #endif 5789 5790 #if KMP_AFFINITY_SUPPORTED 5791 if ( thread->th.th_affin_mask != NULL ) { 5792 KMP_CPU_FREE( thread->th.th_affin_mask ); 5793 thread->th.th_affin_mask = NULL; 5794 }; // if 5795 #endif /* KMP_AFFINITY_SUPPORTED */ 5796 5797 __kmp_reap_team( thread->th.th_serial_team ); 5798 thread->th.th_serial_team = NULL; 5799 __kmp_free( thread ); 5800 5801 KMP_MB(); 5802 5803 } // __kmp_reap_thread 5804 5805 static void 5806 __kmp_internal_end(void) 5807 { 5808 int i; 5809 5810 /* First, unregister the library */ 5811 __kmp_unregister_library(); 5812 5813 #if KMP_OS_WINDOWS 5814 /* In Win static library, we can't tell when a root actually dies, so we 5815 reclaim the data structures for any root threads that have died but not 5816 unregistered themselves, in order to shut down cleanly. 5817 In Win dynamic library we also can't tell when a thread dies. 5818 */ 5819 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots 5820 #endif 5821 5822 for( i=0 ; i<__kmp_threads_capacity ; i++ ) 5823 if( __kmp_root[i] ) 5824 if( __kmp_root[i]->r.r_active ) 5825 break; 5826 KMP_MB(); /* Flush all pending memory write invalidates. */ 5827 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5828 5829 if ( i < __kmp_threads_capacity ) { 5830 #if KMP_USE_MONITOR 5831 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 5832 KMP_MB(); /* Flush all pending memory write invalidates. */ 5833 5834 // 5835 // Need to check that monitor was initialized before reaping it. 5836 // If we are called form __kmp_atfork_child (which sets 5837 // __kmp_init_parallel = 0), then __kmp_monitor will appear to 5838 // contain valid data, but it is only valid in the parent process, 5839 // not the child. 5840 // 5841 // New behavior (201008): instead of keying off of the flag 5842 // __kmp_init_parallel, the monitor thread creation is keyed off 5843 // of the new flag __kmp_init_monitor. 5844 // 5845 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 5846 if ( TCR_4( __kmp_init_monitor ) ) { 5847 __kmp_reap_monitor( & __kmp_monitor ); 5848 TCW_4( __kmp_init_monitor, 0 ); 5849 } 5850 __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 5851 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); 5852 #endif // KMP_USE_MONITOR 5853 } else { 5854 /* TODO move this to cleanup code */ 5855 #ifdef KMP_DEBUG 5856 /* make sure that everything has properly ended */ 5857 for ( i = 0; i < __kmp_threads_capacity; i++ ) { 5858 if( __kmp_root[i] ) { 5859 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here 5860 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active? 5861 } 5862 } 5863 #endif 5864 5865 KMP_MB(); 5866 5867 // Reap the worker threads. 5868 // This is valid for now, but be careful if threads are reaped sooner. 5869 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool. 5870 // Get the next thread from the pool. 5871 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool; 5872 __kmp_thread_pool = thread->th.th_next_pool; 5873 // Reap it. 5874 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 5875 thread->th.th_next_pool = NULL; 5876 thread->th.th_in_pool = FALSE; 5877 __kmp_reap_thread( thread, 0 ); 5878 }; // while 5879 __kmp_thread_pool_insert_pt = NULL; 5880 5881 // Reap teams. 5882 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool. 5883 // Get the next team from the pool. 5884 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool; 5885 __kmp_team_pool = team->t.t_next_pool; 5886 // Reap it. 5887 team->t.t_next_pool = NULL; 5888 __kmp_reap_team( team ); 5889 }; // while 5890 5891 __kmp_reap_task_teams( ); 5892 5893 for ( i = 0; i < __kmp_threads_capacity; ++ i ) { 5894 // TBD: Add some checking... 5895 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 5896 } 5897 5898 /* Make sure all threadprivate destructors get run by joining with all worker 5899 threads before resetting this flag */ 5900 TCW_SYNC_4(__kmp_init_common, FALSE); 5901 5902 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) ); 5903 KMP_MB(); 5904 5905 #if KMP_USE_MONITOR 5906 // 5907 // See note above: One of the possible fixes for CQ138434 / CQ140126 5908 // 5909 // FIXME: push both code fragments down and CSE them? 5910 // push them into __kmp_cleanup() ? 5911 // 5912 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 5913 if ( TCR_4( __kmp_init_monitor ) ) { 5914 __kmp_reap_monitor( & __kmp_monitor ); 5915 TCW_4( __kmp_init_monitor, 0 ); 5916 } 5917 __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 5918 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); 5919 #endif 5920 } /* else !__kmp_global.t_active */ 5921 TCW_4(__kmp_init_gtid, FALSE); 5922 KMP_MB(); /* Flush all pending memory write invalidates. */ 5923 5924 __kmp_cleanup(); 5925 #if OMPT_SUPPORT 5926 ompt_fini(); 5927 #endif 5928 } 5929 5930 void 5931 __kmp_internal_end_library( int gtid_req ) 5932 { 5933 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 5934 /* this shouldn't be a race condition because __kmp_internal_end() is the 5935 * only place to clear __kmp_serial_init */ 5936 /* we'll check this later too, after we get the lock */ 5937 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant, 5938 // because the next check will work in any case. 5939 if( __kmp_global.g.g_abort ) { 5940 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" )); 5941 /* TODO abort? */ 5942 return; 5943 } 5944 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 5945 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" )); 5946 return; 5947 } 5948 5949 5950 KMP_MB(); /* Flush all pending memory write invalidates. */ 5951 5952 /* find out who we are and what we should do */ 5953 { 5954 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); 5955 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req )); 5956 if( gtid == KMP_GTID_SHUTDOWN ) { 5957 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" )); 5958 return; 5959 } else if( gtid == KMP_GTID_MONITOR ) { 5960 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" )); 5961 return; 5962 } else if( gtid == KMP_GTID_DNE ) { 5963 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" )); 5964 /* we don't know who we are, but we may still shutdown the library */ 5965 } else if( KMP_UBER_GTID( gtid )) { 5966 /* unregister ourselves as an uber thread. gtid is no longer valid */ 5967 if( __kmp_root[gtid]->r.r_active ) { 5968 __kmp_global.g.g_abort = -1; 5969 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5970 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid )); 5971 return; 5972 } else { 5973 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid )); 5974 __kmp_unregister_root_current_thread( gtid ); 5975 } 5976 } else { 5977 /* worker threads may call this function through the atexit handler, if they call exit() */ 5978 /* For now, skip the usual subsequent processing and just dump the debug buffer. 5979 TODO: do a thorough shutdown instead 5980 */ 5981 #ifdef DUMP_DEBUG_ON_EXIT 5982 if ( __kmp_debug_buf ) 5983 __kmp_dump_debug_buffer( ); 5984 #endif 5985 return; 5986 } 5987 } 5988 /* synchronize the termination process */ 5989 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5990 5991 /* have we already finished */ 5992 if( __kmp_global.g.g_abort ) { 5993 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" )); 5994 /* TODO abort? */ 5995 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 5996 return; 5997 } 5998 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 5999 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6000 return; 6001 } 6002 6003 /* We need this lock to enforce mutex between this reading of 6004 __kmp_threads_capacity and the writing by __kmp_register_root. 6005 Alternatively, we can use a counter of roots that is 6006 atomically updated by __kmp_get_global_thread_id_reg, 6007 __kmp_do_serial_initialize and __kmp_internal_end_*. 6008 */ 6009 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 6010 6011 /* now we can safely conduct the actual termination */ 6012 __kmp_internal_end(); 6013 6014 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 6015 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6016 6017 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) ); 6018 6019 #ifdef DUMP_DEBUG_ON_EXIT 6020 if ( __kmp_debug_buf ) 6021 __kmp_dump_debug_buffer(); 6022 #endif 6023 6024 #if KMP_OS_WINDOWS 6025 __kmp_close_console(); 6026 #endif 6027 6028 __kmp_fini_allocator(); 6029 6030 } // __kmp_internal_end_library 6031 6032 void 6033 __kmp_internal_end_thread( int gtid_req ) 6034 { 6035 int i; 6036 6037 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 6038 /* this shouldn't be a race condition because __kmp_internal_end() is the 6039 * only place to clear __kmp_serial_init */ 6040 /* we'll check this later too, after we get the lock */ 6041 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant, 6042 // because the next check will work in any case. 6043 if( __kmp_global.g.g_abort ) { 6044 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" )); 6045 /* TODO abort? */ 6046 return; 6047 } 6048 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 6049 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" )); 6050 return; 6051 } 6052 6053 KMP_MB(); /* Flush all pending memory write invalidates. */ 6054 6055 /* find out who we are and what we should do */ 6056 { 6057 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); 6058 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req )); 6059 if( gtid == KMP_GTID_SHUTDOWN ) { 6060 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" )); 6061 return; 6062 } else if( gtid == KMP_GTID_MONITOR ) { 6063 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" )); 6064 return; 6065 } else if( gtid == KMP_GTID_DNE ) { 6066 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" )); 6067 return; 6068 /* we don't know who we are */ 6069 } else if( KMP_UBER_GTID( gtid )) { 6070 /* unregister ourselves as an uber thread. gtid is no longer valid */ 6071 if( __kmp_root[gtid]->r.r_active ) { 6072 __kmp_global.g.g_abort = -1; 6073 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6074 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid )); 6075 return; 6076 } else { 6077 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid )); 6078 __kmp_unregister_root_current_thread( gtid ); 6079 } 6080 } else { 6081 /* just a worker thread, let's leave */ 6082 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); 6083 6084 if ( gtid >= 0 ) { 6085 __kmp_threads[gtid]->th.th_task_team = NULL; 6086 } 6087 6088 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); 6089 return; 6090 } 6091 } 6092 #if defined KMP_DYNAMIC_LIB 6093 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread, 6094 // because we will better shutdown later in the library destructor. 6095 // The reason of this change is performance problem when non-openmp thread 6096 // in a loop forks and joins many openmp threads. We can save a lot of time 6097 // keeping worker threads alive until the program shutdown. 6098 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and 6099 // Windows(DPD200287443) that occurs when using critical sections from foreign threads. 6100 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) ); 6101 return; 6102 #endif 6103 /* synchronize the termination process */ 6104 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6105 6106 /* have we already finished */ 6107 if( __kmp_global.g.g_abort ) { 6108 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" )); 6109 /* TODO abort? */ 6110 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6111 return; 6112 } 6113 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 6114 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6115 return; 6116 } 6117 6118 /* We need this lock to enforce mutex between this reading of 6119 __kmp_threads_capacity and the writing by __kmp_register_root. 6120 Alternatively, we can use a counter of roots that is 6121 atomically updated by __kmp_get_global_thread_id_reg, 6122 __kmp_do_serial_initialize and __kmp_internal_end_*. 6123 */ 6124 6125 /* should we finish the run-time? are all siblings done? */ 6126 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 6127 6128 for ( i = 0; i < __kmp_threads_capacity; ++ i ) { 6129 if ( KMP_UBER_GTID( i ) ) { 6130 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i )); 6131 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 6132 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6133 return; 6134 }; 6135 } 6136 6137 /* now we can safely conduct the actual termination */ 6138 6139 __kmp_internal_end(); 6140 6141 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 6142 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6143 6144 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) ); 6145 6146 #ifdef DUMP_DEBUG_ON_EXIT 6147 if ( __kmp_debug_buf ) 6148 __kmp_dump_debug_buffer(); 6149 #endif 6150 } // __kmp_internal_end_thread 6151 6152 // ------------------------------------------------------------------------------------------------- 6153 // Library registration stuff. 6154 6155 static long __kmp_registration_flag = 0; 6156 // Random value used to indicate library initialization. 6157 static char * __kmp_registration_str = NULL; 6158 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 6159 6160 6161 static inline 6162 char * 6163 __kmp_reg_status_name() { 6164 /* 6165 On RHEL 3u5 if linked statically, getpid() returns different values in each thread. 6166 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case), 6167 the name of registered_lib_env env var can not be found, because the name will contain different pid. 6168 */ 6169 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() ); 6170 } // __kmp_reg_status_get 6171 6172 6173 void 6174 __kmp_register_library_startup( 6175 void 6176 ) { 6177 6178 char * name = __kmp_reg_status_name(); // Name of the environment variable. 6179 int done = 0; 6180 union { 6181 double dtime; 6182 long ltime; 6183 } time; 6184 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6185 __kmp_initialize_system_tick(); 6186 #endif 6187 __kmp_read_system_time( & time.dtime ); 6188 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL ); 6189 __kmp_registration_str = 6190 __kmp_str_format( 6191 "%p-%lx-%s", 6192 & __kmp_registration_flag, 6193 __kmp_registration_flag, 6194 KMP_LIBRARY_FILE 6195 ); 6196 6197 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) ); 6198 6199 while ( ! done ) { 6200 6201 char * value = NULL; // Actual value of the environment variable. 6202 6203 // Set environment variable, but do not overwrite if it is exist. 6204 __kmp_env_set( name, __kmp_registration_str, 0 ); 6205 // Check the variable is written. 6206 value = __kmp_env_get( name ); 6207 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { 6208 6209 done = 1; // Ok, environment variable set successfully, exit the loop. 6210 6211 } else { 6212 6213 // Oops. Write failed. Another copy of OpenMP RTL is in memory. 6214 // Check whether it alive or dead. 6215 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 6216 char * tail = value; 6217 char * flag_addr_str = NULL; 6218 char * flag_val_str = NULL; 6219 char const * file_name = NULL; 6220 __kmp_str_split( tail, '-', & flag_addr_str, & tail ); 6221 __kmp_str_split( tail, '-', & flag_val_str, & tail ); 6222 file_name = tail; 6223 if ( tail != NULL ) { 6224 long * flag_addr = 0; 6225 long flag_val = 0; 6226 KMP_SSCANF( flag_addr_str, "%p", & flag_addr ); 6227 KMP_SSCANF( flag_val_str, "%lx", & flag_val ); 6228 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) { 6229 // First, check whether environment-encoded address is mapped into addr space. 6230 // If so, dereference it to see if it still has the right value. 6231 6232 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) { 6233 neighbor = 1; 6234 } else { 6235 // If not, then we know the other copy of the library is no longer running. 6236 neighbor = 2; 6237 }; // if 6238 }; // if 6239 }; // if 6240 switch ( neighbor ) { 6241 case 0 : // Cannot parse environment variable -- neighbor status unknown. 6242 // Assume it is the incompatible format of future version of the library. 6243 // Assume the other library is alive. 6244 // WARN( ... ); // TODO: Issue a warning. 6245 file_name = "unknown library"; 6246 // Attention! Falling to the next case. That's intentional. 6247 case 1 : { // Neighbor is alive. 6248 // Check it is allowed. 6249 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" ); 6250 if ( ! __kmp_str_match_true( duplicate_ok ) ) { 6251 // That's not allowed. Issue fatal error. 6252 __kmp_msg( 6253 kmp_ms_fatal, 6254 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ), 6255 KMP_HNT( DuplicateLibrary ), 6256 __kmp_msg_null 6257 ); 6258 }; // if 6259 KMP_INTERNAL_FREE( duplicate_ok ); 6260 __kmp_duplicate_library_ok = 1; 6261 done = 1; // Exit the loop. 6262 } break; 6263 case 2 : { // Neighbor is dead. 6264 // Clear the variable and try to register library again. 6265 __kmp_env_unset( name ); 6266 } break; 6267 default : { 6268 KMP_DEBUG_ASSERT( 0 ); 6269 } break; 6270 }; // switch 6271 6272 }; // if 6273 KMP_INTERNAL_FREE( (void *) value ); 6274 6275 }; // while 6276 KMP_INTERNAL_FREE( (void *) name ); 6277 6278 } // func __kmp_register_library_startup 6279 6280 6281 void 6282 __kmp_unregister_library( void ) { 6283 6284 char * name = __kmp_reg_status_name(); 6285 char * value = __kmp_env_get( name ); 6286 6287 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 ); 6288 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL ); 6289 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { 6290 // Ok, this is our variable. Delete it. 6291 __kmp_env_unset( name ); 6292 }; // if 6293 6294 KMP_INTERNAL_FREE( __kmp_registration_str ); 6295 KMP_INTERNAL_FREE( value ); 6296 KMP_INTERNAL_FREE( name ); 6297 6298 __kmp_registration_flag = 0; 6299 __kmp_registration_str = NULL; 6300 6301 } // __kmp_unregister_library 6302 6303 6304 // End of Library registration stuff. 6305 // ------------------------------------------------------------------------------------------------- 6306 6307 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6308 6309 static void __kmp_check_mic_type() 6310 { 6311 kmp_cpuid_t cpuid_state = {0}; 6312 kmp_cpuid_t * cs_p = &cpuid_state; 6313 __kmp_x86_cpuid(1, 0, cs_p); 6314 // We don't support mic1 at the moment 6315 if( (cs_p->eax & 0xff0) == 0xB10 ) { 6316 __kmp_mic_type = mic2; 6317 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { 6318 __kmp_mic_type = mic3; 6319 } else { 6320 __kmp_mic_type = non_mic; 6321 } 6322 } 6323 6324 #endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ 6325 6326 static void 6327 __kmp_do_serial_initialize( void ) 6328 { 6329 int i, gtid; 6330 int size; 6331 6332 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) ); 6333 6334 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 ); 6335 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 ); 6336 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 ); 6337 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 ); 6338 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) ); 6339 6340 #if OMPT_SUPPORT 6341 ompt_pre_init(); 6342 #endif 6343 6344 __kmp_validate_locks(); 6345 6346 /* Initialize internal memory allocator */ 6347 __kmp_init_allocator(); 6348 6349 /* Register the library startup via an environment variable 6350 and check to see whether another copy of the library is already 6351 registered. */ 6352 6353 __kmp_register_library_startup( ); 6354 6355 /* TODO reinitialization of library */ 6356 if( TCR_4(__kmp_global.g.g_done) ) { 6357 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) ); 6358 } 6359 6360 __kmp_global.g.g_abort = 0; 6361 TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 6362 6363 /* initialize the locks */ 6364 #if KMP_USE_ADAPTIVE_LOCKS 6365 #if KMP_DEBUG_ADAPTIVE_LOCKS 6366 __kmp_init_speculative_stats(); 6367 #endif 6368 #endif 6369 #if KMP_STATS_ENABLED 6370 __kmp_stats_init(); 6371 #endif 6372 __kmp_init_lock( & __kmp_global_lock ); 6373 __kmp_init_queuing_lock( & __kmp_dispatch_lock ); 6374 __kmp_init_lock( & __kmp_debug_lock ); 6375 __kmp_init_atomic_lock( & __kmp_atomic_lock ); 6376 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i ); 6377 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i ); 6378 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i ); 6379 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r ); 6380 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i ); 6381 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r ); 6382 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c ); 6383 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r ); 6384 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r ); 6385 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c ); 6386 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c ); 6387 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c ); 6388 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock ); 6389 __kmp_init_bootstrap_lock( & __kmp_exit_lock ); 6390 #if KMP_USE_MONITOR 6391 __kmp_init_bootstrap_lock( & __kmp_monitor_lock ); 6392 #endif 6393 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock ); 6394 6395 /* conduct initialization and initial setup of configuration */ 6396 6397 __kmp_runtime_initialize(); 6398 6399 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6400 __kmp_check_mic_type(); 6401 #endif 6402 6403 // Some global variable initialization moved here from kmp_env_initialize() 6404 #ifdef KMP_DEBUG 6405 kmp_diag = 0; 6406 #endif 6407 __kmp_abort_delay = 0; 6408 6409 // From __kmp_init_dflt_team_nth() 6410 /* assume the entire machine will be used */ 6411 __kmp_dflt_team_nth_ub = __kmp_xproc; 6412 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) { 6413 __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 6414 } 6415 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) { 6416 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 6417 } 6418 __kmp_max_nth = __kmp_sys_max_nth; 6419 6420 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part 6421 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 6422 #if KMP_USE_MONITOR 6423 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 6424 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 6425 #endif 6426 // From "KMP_LIBRARY" part of __kmp_env_initialize() 6427 __kmp_library = library_throughput; 6428 // From KMP_SCHEDULE initialization 6429 __kmp_static = kmp_sch_static_balanced; 6430 // AC: do not use analytical here, because it is non-monotonous 6431 //__kmp_guided = kmp_sch_guided_iterative_chunked; 6432 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment 6433 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method 6434 // control parts 6435 #if KMP_FAST_REDUCTION_BARRIER 6436 #define kmp_reduction_barrier_gather_bb ((int)1) 6437 #define kmp_reduction_barrier_release_bb ((int)1) 6438 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6439 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6440 #endif // KMP_FAST_REDUCTION_BARRIER 6441 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 6442 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt; 6443 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; 6444 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt; 6445 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt; 6446 #if KMP_FAST_REDUCTION_BARRIER 6447 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1 6448 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb; 6449 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb; 6450 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat; 6451 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat; 6452 } 6453 #endif // KMP_FAST_REDUCTION_BARRIER 6454 } 6455 #if KMP_FAST_REDUCTION_BARRIER 6456 #undef kmp_reduction_barrier_release_pat 6457 #undef kmp_reduction_barrier_gather_pat 6458 #undef kmp_reduction_barrier_release_bb 6459 #undef kmp_reduction_barrier_gather_bb 6460 #endif // KMP_FAST_REDUCTION_BARRIER 6461 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6462 if (__kmp_mic_type == mic2) { // KNC 6463 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 6464 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather 6465 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release 6466 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar; 6467 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar; 6468 } 6469 #if KMP_FAST_REDUCTION_BARRIER 6470 if (__kmp_mic_type == mic2) { // KNC 6471 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar; 6472 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar; 6473 } 6474 #endif 6475 #endif 6476 6477 // From KMP_CHECKS initialization 6478 #ifdef KMP_DEBUG 6479 __kmp_env_checks = TRUE; /* development versions have the extra checks */ 6480 #else 6481 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 6482 #endif 6483 6484 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 6485 __kmp_foreign_tp = TRUE; 6486 6487 __kmp_global.g.g_dynamic = FALSE; 6488 __kmp_global.g.g_dynamic_mode = dynamic_default; 6489 6490 __kmp_env_initialize( NULL ); 6491 6492 // Print all messages in message catalog for testing purposes. 6493 #ifdef KMP_DEBUG 6494 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" ); 6495 if ( __kmp_str_match_true( val ) ) { 6496 kmp_str_buf_t buffer; 6497 __kmp_str_buf_init( & buffer ); 6498 __kmp_i18n_dump_catalog( & buffer ); 6499 __kmp_printf( "%s", buffer.str ); 6500 __kmp_str_buf_free( & buffer ); 6501 }; // if 6502 __kmp_env_free( & val ); 6503 #endif 6504 6505 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); 6506 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 6507 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 6508 6509 // If the library is shut down properly, both pools must be NULL. Just in case, set them 6510 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed. 6511 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL ); 6512 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL ); 6513 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL ); 6514 __kmp_thread_pool = NULL; 6515 __kmp_thread_pool_insert_pt = NULL; 6516 __kmp_team_pool = NULL; 6517 6518 /* Allocate all of the variable sized records */ 6519 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */ 6520 /* Since allocation is cache-aligned, just add extra padding at the end */ 6521 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE; 6522 __kmp_threads = (kmp_info_t**) __kmp_allocate( size ); 6523 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity ); 6524 6525 /* init thread counts */ 6526 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and 6527 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination. 6528 __kmp_all_nth = 0; 6529 __kmp_nth = 0; 6530 6531 /* setup the uber master thread and hierarchy */ 6532 gtid = __kmp_register_root( TRUE ); 6533 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid )); 6534 KMP_ASSERT( KMP_UBER_GTID( gtid ) ); 6535 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) ); 6536 6537 KMP_MB(); /* Flush all pending memory write invalidates. */ 6538 6539 __kmp_common_initialize(); 6540 6541 #if KMP_OS_UNIX 6542 /* invoke the child fork handler */ 6543 __kmp_register_atfork(); 6544 #endif 6545 6546 #if ! defined KMP_DYNAMIC_LIB 6547 { 6548 /* Invoke the exit handler when the program finishes, only for static library. 6549 For dynamic library, we already have _fini and DllMain. 6550 */ 6551 int rc = atexit( __kmp_internal_end_atexit ); 6552 if ( rc != 0 ) { 6553 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null ); 6554 }; // if 6555 } 6556 #endif 6557 6558 #if KMP_HANDLE_SIGNALS 6559 #if KMP_OS_UNIX 6560 /* NOTE: make sure that this is called before the user installs 6561 * their own signal handlers so that the user handlers 6562 * are called first. this way they can return false, 6563 * not call our handler, avoid terminating the library, 6564 * and continue execution where they left off. */ 6565 __kmp_install_signals( FALSE ); 6566 #endif /* KMP_OS_UNIX */ 6567 #if KMP_OS_WINDOWS 6568 __kmp_install_signals( TRUE ); 6569 #endif /* KMP_OS_WINDOWS */ 6570 #endif 6571 6572 /* we have finished the serial initialization */ 6573 __kmp_init_counter ++; 6574 6575 __kmp_init_serial = TRUE; 6576 6577 if (__kmp_settings) { 6578 __kmp_env_print(); 6579 } 6580 6581 #if OMP_40_ENABLED 6582 if (__kmp_display_env || __kmp_display_env_verbose) { 6583 __kmp_env_print_2(); 6584 } 6585 #endif // OMP_40_ENABLED 6586 6587 #if OMPT_SUPPORT 6588 ompt_post_init(); 6589 #endif 6590 6591 KMP_MB(); 6592 6593 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) ); 6594 } 6595 6596 void 6597 __kmp_serial_initialize( void ) 6598 { 6599 if ( __kmp_init_serial ) { 6600 return; 6601 } 6602 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6603 if ( __kmp_init_serial ) { 6604 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6605 return; 6606 } 6607 __kmp_do_serial_initialize(); 6608 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6609 } 6610 6611 static void 6612 __kmp_do_middle_initialize( void ) 6613 { 6614 int i, j; 6615 int prev_dflt_team_nth; 6616 6617 if( !__kmp_init_serial ) { 6618 __kmp_do_serial_initialize(); 6619 } 6620 6621 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) ); 6622 6623 // 6624 // Save the previous value for the __kmp_dflt_team_nth so that 6625 // we can avoid some reinitialization if it hasn't changed. 6626 // 6627 prev_dflt_team_nth = __kmp_dflt_team_nth; 6628 6629 #if KMP_AFFINITY_SUPPORTED 6630 // 6631 // __kmp_affinity_initialize() will try to set __kmp_ncores to the 6632 // number of cores on the machine. 6633 // 6634 __kmp_affinity_initialize(); 6635 6636 // 6637 // Run through the __kmp_threads array and set the affinity mask 6638 // for each root thread that is currently registered with the RTL. 6639 // 6640 for ( i = 0; i < __kmp_threads_capacity; i++ ) { 6641 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) { 6642 __kmp_affinity_set_init_mask( i, TRUE ); 6643 } 6644 } 6645 #endif /* KMP_AFFINITY_SUPPORTED */ 6646 6647 KMP_ASSERT( __kmp_xproc > 0 ); 6648 if ( __kmp_avail_proc == 0 ) { 6649 __kmp_avail_proc = __kmp_xproc; 6650 } 6651 6652 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now 6653 j = 0; 6654 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) { 6655 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc; 6656 j++; 6657 } 6658 6659 if ( __kmp_dflt_team_nth == 0 ) { 6660 #ifdef KMP_DFLT_NTH_CORES 6661 // 6662 // Default #threads = #cores 6663 // 6664 __kmp_dflt_team_nth = __kmp_ncores; 6665 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n", 6666 __kmp_dflt_team_nth ) ); 6667 #else 6668 // 6669 // Default #threads = #available OS procs 6670 // 6671 __kmp_dflt_team_nth = __kmp_avail_proc; 6672 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n", 6673 __kmp_dflt_team_nth ) ); 6674 #endif /* KMP_DFLT_NTH_CORES */ 6675 } 6676 6677 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) { 6678 __kmp_dflt_team_nth = KMP_MIN_NTH; 6679 } 6680 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) { 6681 __kmp_dflt_team_nth = __kmp_sys_max_nth; 6682 } 6683 6684 // 6685 // There's no harm in continuing if the following check fails, 6686 // but it indicates an error in the previous logic. 6687 // 6688 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub ); 6689 6690 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) { 6691 // 6692 // Run through the __kmp_threads array and set the num threads icv 6693 // for each root thread that is currently registered with the RTL 6694 // (which has not already explicitly set its nthreads-var with a 6695 // call to omp_set_num_threads()). 6696 // 6697 for ( i = 0; i < __kmp_threads_capacity; i++ ) { 6698 kmp_info_t *thread = __kmp_threads[ i ]; 6699 if ( thread == NULL ) continue; 6700 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue; 6701 6702 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth ); 6703 } 6704 } 6705 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 6706 __kmp_dflt_team_nth) ); 6707 6708 #ifdef KMP_ADJUST_BLOCKTIME 6709 /* Adjust blocktime to zero if necessary */ 6710 /* now that __kmp_avail_proc is set */ 6711 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 6712 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 6713 if ( __kmp_nth > __kmp_avail_proc ) { 6714 __kmp_zero_bt = TRUE; 6715 } 6716 } 6717 #endif /* KMP_ADJUST_BLOCKTIME */ 6718 6719 /* we have finished middle initialization */ 6720 TCW_SYNC_4(__kmp_init_middle, TRUE); 6721 6722 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) ); 6723 } 6724 6725 void 6726 __kmp_middle_initialize( void ) 6727 { 6728 if ( __kmp_init_middle ) { 6729 return; 6730 } 6731 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6732 if ( __kmp_init_middle ) { 6733 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6734 return; 6735 } 6736 __kmp_do_middle_initialize(); 6737 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6738 } 6739 6740 void 6741 __kmp_parallel_initialize( void ) 6742 { 6743 int gtid = __kmp_entry_gtid(); // this might be a new root 6744 6745 /* synchronize parallel initialization (for sibling) */ 6746 if( TCR_4(__kmp_init_parallel) ) return; 6747 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6748 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; } 6749 6750 /* TODO reinitialization after we have already shut down */ 6751 if( TCR_4(__kmp_global.g.g_done) ) { 6752 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) ); 6753 __kmp_infinite_loop(); 6754 } 6755 6756 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize 6757 would cause a deadlock. So we call __kmp_do_serial_initialize directly. 6758 */ 6759 if( !__kmp_init_middle ) { 6760 __kmp_do_middle_initialize(); 6761 } 6762 6763 /* begin initialization */ 6764 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) ); 6765 KMP_ASSERT( KMP_UBER_GTID( gtid ) ); 6766 6767 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6768 // 6769 // Save the FP control regs. 6770 // Worker threads will set theirs to these values at thread startup. 6771 // 6772 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); 6773 __kmp_store_mxcsr( &__kmp_init_mxcsr ); 6774 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 6775 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 6776 6777 #if KMP_OS_UNIX 6778 # if KMP_HANDLE_SIGNALS 6779 /* must be after __kmp_serial_initialize */ 6780 __kmp_install_signals( TRUE ); 6781 # endif 6782 #endif 6783 6784 __kmp_suspend_initialize(); 6785 6786 #if defined(USE_LOAD_BALANCE) 6787 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 6788 __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 6789 } 6790 #else 6791 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 6792 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 6793 } 6794 #endif 6795 6796 if ( __kmp_version ) { 6797 __kmp_print_version_2(); 6798 } 6799 6800 /* we have finished parallel initialization */ 6801 TCW_SYNC_4(__kmp_init_parallel, TRUE); 6802 6803 KMP_MB(); 6804 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) ); 6805 6806 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6807 } 6808 6809 6810 /* ------------------------------------------------------------------------ */ 6811 6812 void 6813 __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, 6814 kmp_team_t *team ) 6815 { 6816 kmp_disp_t *dispatch; 6817 6818 KMP_MB(); 6819 6820 /* none of the threads have encountered any constructs, yet. */ 6821 this_thr->th.th_local.this_construct = 0; 6822 #if KMP_CACHE_MANAGE 6823 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived ); 6824 #endif /* KMP_CACHE_MANAGE */ 6825 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 6826 KMP_DEBUG_ASSERT( dispatch ); 6827 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 6828 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] ); 6829 6830 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 6831 #if OMP_45_ENABLED 6832 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */ 6833 #endif 6834 if( __kmp_env_consistency_check ) 6835 __kmp_push_parallel( gtid, team->t.t_ident ); 6836 6837 KMP_MB(); /* Flush all pending memory write invalidates. */ 6838 } 6839 6840 void 6841 __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, 6842 kmp_team_t *team ) 6843 { 6844 if( __kmp_env_consistency_check ) 6845 __kmp_pop_parallel( gtid, team->t.t_ident ); 6846 6847 __kmp_finish_implicit_task(this_thr); 6848 } 6849 6850 int 6851 __kmp_invoke_task_func( int gtid ) 6852 { 6853 int rc; 6854 int tid = __kmp_tid_from_gtid( gtid ); 6855 kmp_info_t *this_thr = __kmp_threads[ gtid ]; 6856 kmp_team_t *team = this_thr->th.th_team; 6857 6858 __kmp_run_before_invoked_task( gtid, tid, this_thr, team ); 6859 #if USE_ITT_BUILD 6860 if ( __itt_stack_caller_create_ptr ) { 6861 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code 6862 } 6863 #endif /* USE_ITT_BUILD */ 6864 #if INCLUDE_SSC_MARKS 6865 SSC_MARK_INVOKING(); 6866 #endif 6867 6868 #if OMPT_SUPPORT 6869 void *dummy; 6870 void **exit_runtime_p; 6871 ompt_task_id_t my_task_id; 6872 ompt_parallel_id_t my_parallel_id; 6873 6874 if (ompt_enabled) { 6875 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]. 6876 ompt_task_info.frame.exit_runtime_frame); 6877 } else { 6878 exit_runtime_p = &dummy; 6879 } 6880 6881 #if OMPT_TRACE 6882 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; 6883 my_parallel_id = team->t.ompt_team_info.parallel_id; 6884 if (ompt_enabled && 6885 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 6886 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 6887 my_parallel_id, my_task_id); 6888 } 6889 #endif 6890 #endif 6891 6892 { 6893 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 6894 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 6895 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn), 6896 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv 6897 #if OMPT_SUPPORT 6898 , exit_runtime_p 6899 #endif 6900 ); 6901 #if OMPT_SUPPORT 6902 *exit_runtime_p = NULL; 6903 #endif 6904 } 6905 6906 #if USE_ITT_BUILD 6907 if ( __itt_stack_caller_create_ptr ) { 6908 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code 6909 } 6910 #endif /* USE_ITT_BUILD */ 6911 __kmp_run_after_invoked_task( gtid, tid, this_thr, team ); 6912 6913 return rc; 6914 } 6915 6916 #if OMP_40_ENABLED 6917 void 6918 __kmp_teams_master( int gtid ) 6919 { 6920 // This routine is called by all master threads in teams construct 6921 kmp_info_t *thr = __kmp_threads[ gtid ]; 6922 kmp_team_t *team = thr->th.th_team; 6923 ident_t *loc = team->t.t_ident; 6924 thr->th.th_set_nproc = thr->th.th_teams_size.nth; 6925 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask ); 6926 KMP_DEBUG_ASSERT( thr->th.th_set_nproc ); 6927 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", 6928 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) ); 6929 // Launch league of teams now, but not let workers execute 6930 // (they hang on fork barrier until next parallel) 6931 #if INCLUDE_SSC_MARKS 6932 SSC_MARK_FORKING(); 6933 #endif 6934 __kmp_fork_call( loc, gtid, fork_context_intel, 6935 team->t.t_argc, 6936 #if OMPT_SUPPORT 6937 (void *)thr->th.th_teams_microtask, // "unwrapped" task 6938 #endif 6939 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 6940 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 6941 NULL ); 6942 #if INCLUDE_SSC_MARKS 6943 SSC_MARK_JOINING(); 6944 #endif 6945 6946 // AC: last parameter "1" eliminates join barrier which won't work because 6947 // worker threads are in a fork barrier waiting for more parallel regions 6948 __kmp_join_call( loc, gtid 6949 #if OMPT_SUPPORT 6950 , fork_context_intel 6951 #endif 6952 , 1 ); 6953 } 6954 6955 int 6956 __kmp_invoke_teams_master( int gtid ) 6957 { 6958 kmp_info_t *this_thr = __kmp_threads[ gtid ]; 6959 kmp_team_t *team = this_thr->th.th_team; 6960 #if KMP_DEBUG 6961 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized ) 6962 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master ); 6963 #endif 6964 __kmp_run_before_invoked_task( gtid, 0, this_thr, team ); 6965 __kmp_teams_master( gtid ); 6966 __kmp_run_after_invoked_task( gtid, 0, this_thr, team ); 6967 return 1; 6968 } 6969 #endif /* OMP_40_ENABLED */ 6970 6971 /* this sets the requested number of threads for the next parallel region 6972 * encountered by this team */ 6973 /* since this should be enclosed in the forkjoin critical section it 6974 * should avoid race conditions with assymmetrical nested parallelism */ 6975 6976 void 6977 __kmp_push_num_threads( ident_t *id, int gtid, int num_threads ) 6978 { 6979 kmp_info_t *thr = __kmp_threads[gtid]; 6980 6981 if( num_threads > 0 ) 6982 thr->th.th_set_nproc = num_threads; 6983 } 6984 6985 #if OMP_40_ENABLED 6986 6987 /* this sets the requested number of teams for the teams region and/or 6988 * the number of threads for the next parallel region encountered */ 6989 void 6990 __kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads ) 6991 { 6992 kmp_info_t *thr = __kmp_threads[gtid]; 6993 KMP_DEBUG_ASSERT(num_teams >= 0); 6994 KMP_DEBUG_ASSERT(num_threads >= 0); 6995 6996 if( num_teams == 0 ) 6997 num_teams = 1; // default number of teams is 1. 6998 if( num_teams > __kmp_max_nth ) { // if too many teams requested? 6999 if ( !__kmp_reserve_warn ) { 7000 __kmp_reserve_warn = 1; 7001 __kmp_msg( 7002 kmp_ms_warning, 7003 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ), 7004 KMP_HNT( Unset_ALL_THREADS ), 7005 __kmp_msg_null 7006 ); 7007 } 7008 num_teams = __kmp_max_nth; 7009 } 7010 // Set number of teams (number of threads in the outer "parallel" of the teams) 7011 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 7012 7013 // Remember the number of threads for inner parallel regions 7014 if( num_threads == 0 ) { 7015 if( !TCR_4(__kmp_init_middle) ) 7016 __kmp_middle_initialize(); // get __kmp_avail_proc calculated 7017 num_threads = __kmp_avail_proc / num_teams; 7018 if( num_teams * num_threads > __kmp_max_nth ) { 7019 // adjust num_threads w/o warning as it is not user setting 7020 num_threads = __kmp_max_nth / num_teams; 7021 } 7022 } else { 7023 if( num_teams * num_threads > __kmp_max_nth ) { 7024 int new_threads = __kmp_max_nth / num_teams; 7025 if ( !__kmp_reserve_warn ) { // user asked for too many threads 7026 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT 7027 __kmp_msg( 7028 kmp_ms_warning, 7029 KMP_MSG( CantFormThrTeam, num_threads, new_threads ), 7030 KMP_HNT( Unset_ALL_THREADS ), 7031 __kmp_msg_null 7032 ); 7033 } 7034 num_threads = new_threads; 7035 } 7036 } 7037 thr->th.th_teams_size.nth = num_threads; 7038 } 7039 7040 7041 // 7042 // Set the proc_bind var to use in the following parallel region. 7043 // 7044 void 7045 __kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind ) 7046 { 7047 kmp_info_t *thr = __kmp_threads[gtid]; 7048 thr->th.th_set_proc_bind = proc_bind; 7049 } 7050 7051 #endif /* OMP_40_ENABLED */ 7052 7053 /* Launch the worker threads into the microtask. */ 7054 7055 void 7056 __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ) 7057 { 7058 kmp_info_t *this_thr = __kmp_threads[gtid]; 7059 7060 #ifdef KMP_DEBUG 7061 int f; 7062 #endif /* KMP_DEBUG */ 7063 7064 KMP_DEBUG_ASSERT( team ); 7065 KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); 7066 KMP_ASSERT( KMP_MASTER_GTID(gtid) ); 7067 KMP_MB(); /* Flush all pending memory write invalidates. */ 7068 7069 team->t.t_construct = 0; /* no single directives seen yet */ 7070 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */ 7071 7072 /* Reset the identifiers on the dispatch buffer */ 7073 KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); 7074 if ( team->t.t_max_nproc > 1 ) { 7075 int i; 7076 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 7077 team->t.t_disp_buffer[ i ].buffer_index = i; 7078 #if OMP_45_ENABLED 7079 team->t.t_disp_buffer[i].doacross_buf_idx = i; 7080 #endif 7081 } 7082 } else { 7083 team->t.t_disp_buffer[ 0 ].buffer_index = 0; 7084 #if OMP_45_ENABLED 7085 team->t.t_disp_buffer[0].doacross_buf_idx = 0; 7086 #endif 7087 } 7088 7089 KMP_MB(); /* Flush all pending memory write invalidates. */ 7090 KMP_ASSERT( this_thr->th.th_team == team ); 7091 7092 #ifdef KMP_DEBUG 7093 for( f=0 ; f<team->t.t_nproc ; f++ ) { 7094 KMP_DEBUG_ASSERT( team->t.t_threads[f] && 7095 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 7096 } 7097 #endif /* KMP_DEBUG */ 7098 7099 /* release the worker threads so they may begin working */ 7100 __kmp_fork_barrier( gtid, 0 ); 7101 } 7102 7103 7104 void 7105 __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ) 7106 { 7107 kmp_info_t *this_thr = __kmp_threads[gtid]; 7108 7109 KMP_DEBUG_ASSERT( team ); 7110 KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); 7111 KMP_ASSERT( KMP_MASTER_GTID(gtid) ); 7112 KMP_MB(); /* Flush all pending memory write invalidates. */ 7113 7114 /* Join barrier after fork */ 7115 7116 #ifdef KMP_DEBUG 7117 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) { 7118 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]); 7119 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n", 7120 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc); 7121 __kmp_print_structure(); 7122 } 7123 KMP_DEBUG_ASSERT( __kmp_threads[gtid] && 7124 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc ); 7125 #endif /* KMP_DEBUG */ 7126 7127 __kmp_join_barrier( gtid ); /* wait for everyone */ 7128 7129 KMP_MB(); /* Flush all pending memory write invalidates. */ 7130 KMP_ASSERT( this_thr->th.th_team == team ); 7131 } 7132 7133 7134 /* ------------------------------------------------------------------------ */ 7135 /* ------------------------------------------------------------------------ */ 7136 7137 #ifdef USE_LOAD_BALANCE 7138 7139 // 7140 // Return the worker threads actively spinning in the hot team, if we 7141 // are at the outermost level of parallelism. Otherwise, return 0. 7142 // 7143 static int 7144 __kmp_active_hot_team_nproc( kmp_root_t *root ) 7145 { 7146 int i; 7147 int retval; 7148 kmp_team_t *hot_team; 7149 7150 if ( root->r.r_active ) { 7151 return 0; 7152 } 7153 hot_team = root->r.r_hot_team; 7154 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { 7155 return hot_team->t.t_nproc - 1; // Don't count master thread 7156 } 7157 7158 // 7159 // Skip the master thread - it is accounted for elsewhere. 7160 // 7161 retval = 0; 7162 for ( i = 1; i < hot_team->t.t_nproc; i++ ) { 7163 if ( hot_team->t.t_threads[i]->th.th_active ) { 7164 retval++; 7165 } 7166 } 7167 return retval; 7168 } 7169 7170 // 7171 // Perform an automatic adjustment to the number of 7172 // threads used by the next parallel region. 7173 // 7174 static int 7175 __kmp_load_balance_nproc( kmp_root_t *root, int set_nproc ) 7176 { 7177 int retval; 7178 int pool_active; 7179 int hot_team_active; 7180 int team_curr_active; 7181 int system_active; 7182 7183 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", 7184 root, set_nproc ) ); 7185 KMP_DEBUG_ASSERT( root ); 7186 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE ); 7187 KMP_DEBUG_ASSERT( set_nproc > 1 ); 7188 7189 if ( set_nproc == 1) { 7190 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) ); 7191 return 1; 7192 } 7193 7194 // 7195 // Threads that are active in the thread pool, active in the hot team 7196 // for this particular root (if we are at the outer par level), and 7197 // the currently executing thread (to become the master) are available 7198 // to add to the new team, but are currently contributing to the system 7199 // load, and must be accounted for. 7200 // 7201 pool_active = TCR_4(__kmp_thread_pool_active_nth); 7202 hot_team_active = __kmp_active_hot_team_nproc( root ); 7203 team_curr_active = pool_active + hot_team_active + 1; 7204 7205 // 7206 // Check the system load. 7207 // 7208 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active ); 7209 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n", 7210 system_active, pool_active, hot_team_active ) ); 7211 7212 if ( system_active < 0 ) { 7213 // 7214 // There was an error reading the necessary info from /proc, 7215 // so use the thread limit algorithm instead. Once we set 7216 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit, 7217 // we shouldn't wind up getting back here. 7218 // 7219 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 7220 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" ); 7221 7222 // 7223 // Make this call behave like the thread limit algorithm. 7224 // 7225 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 7226 : root->r.r_hot_team->t.t_nproc); 7227 if ( retval > set_nproc ) { 7228 retval = set_nproc; 7229 } 7230 if ( retval < KMP_MIN_NTH ) { 7231 retval = KMP_MIN_NTH; 7232 } 7233 7234 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) ); 7235 return retval; 7236 } 7237 7238 // 7239 // There is a slight delay in the load balance algorithm in detecting 7240 // new running procs. The real system load at this instant should be 7241 // at least as large as the #active omp thread that are available to 7242 // add to the team. 7243 // 7244 if ( system_active < team_curr_active ) { 7245 system_active = team_curr_active; 7246 } 7247 retval = __kmp_avail_proc - system_active + team_curr_active; 7248 if ( retval > set_nproc ) { 7249 retval = set_nproc; 7250 } 7251 if ( retval < KMP_MIN_NTH ) { 7252 retval = KMP_MIN_NTH; 7253 } 7254 7255 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) ); 7256 return retval; 7257 } // __kmp_load_balance_nproc() 7258 7259 #endif /* USE_LOAD_BALANCE */ 7260 7261 /* ------------------------------------------------------------------------ */ 7262 /* ------------------------------------------------------------------------ */ 7263 7264 /* NOTE: this is called with the __kmp_init_lock held */ 7265 void 7266 __kmp_cleanup( void ) 7267 { 7268 int f; 7269 7270 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) ); 7271 7272 if (TCR_4(__kmp_init_parallel)) { 7273 #if KMP_HANDLE_SIGNALS 7274 __kmp_remove_signals(); 7275 #endif 7276 TCW_4(__kmp_init_parallel, FALSE); 7277 } 7278 7279 if (TCR_4(__kmp_init_middle)) { 7280 #if KMP_AFFINITY_SUPPORTED 7281 __kmp_affinity_uninitialize(); 7282 #endif /* KMP_AFFINITY_SUPPORTED */ 7283 __kmp_cleanup_hierarchy(); 7284 TCW_4(__kmp_init_middle, FALSE); 7285 } 7286 7287 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) ); 7288 7289 if (__kmp_init_serial) { 7290 __kmp_runtime_destroy(); 7291 __kmp_init_serial = FALSE; 7292 } 7293 7294 for ( f = 0; f < __kmp_threads_capacity; f++ ) { 7295 if ( __kmp_root[ f ] != NULL ) { 7296 __kmp_free( __kmp_root[ f ] ); 7297 __kmp_root[ f ] = NULL; 7298 } 7299 } 7300 __kmp_free( __kmp_threads ); 7301 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in 7302 // freeing __kmp_root. 7303 __kmp_threads = NULL; 7304 __kmp_root = NULL; 7305 __kmp_threads_capacity = 0; 7306 7307 #if KMP_USE_DYNAMIC_LOCK 7308 __kmp_cleanup_indirect_user_locks(); 7309 #else 7310 __kmp_cleanup_user_locks(); 7311 #endif 7312 7313 #if KMP_AFFINITY_SUPPORTED 7314 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); 7315 __kmp_cpuinfo_file = NULL; 7316 #endif /* KMP_AFFINITY_SUPPORTED */ 7317 7318 #if KMP_USE_ADAPTIVE_LOCKS 7319 #if KMP_DEBUG_ADAPTIVE_LOCKS 7320 __kmp_print_speculative_stats(); 7321 #endif 7322 #endif 7323 KMP_INTERNAL_FREE( __kmp_nested_nth.nth ); 7324 __kmp_nested_nth.nth = NULL; 7325 __kmp_nested_nth.size = 0; 7326 __kmp_nested_nth.used = 0; 7327 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types ); 7328 __kmp_nested_proc_bind.bind_types = NULL; 7329 __kmp_nested_proc_bind.size = 0; 7330 __kmp_nested_proc_bind.used = 0; 7331 7332 __kmp_i18n_catclose(); 7333 7334 #if KMP_STATS_ENABLED 7335 __kmp_stats_fini(); 7336 #endif 7337 7338 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); 7339 } 7340 7341 /* ------------------------------------------------------------------------ */ 7342 /* ------------------------------------------------------------------------ */ 7343 7344 int 7345 __kmp_ignore_mppbeg( void ) 7346 { 7347 char *env; 7348 7349 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) { 7350 if (__kmp_str_match_false( env )) 7351 return FALSE; 7352 } 7353 // By default __kmpc_begin() is no-op. 7354 return TRUE; 7355 } 7356 7357 int 7358 __kmp_ignore_mppend( void ) 7359 { 7360 char *env; 7361 7362 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) { 7363 if (__kmp_str_match_false( env )) 7364 return FALSE; 7365 } 7366 // By default __kmpc_end() is no-op. 7367 return TRUE; 7368 } 7369 7370 void 7371 __kmp_internal_begin( void ) 7372 { 7373 int gtid; 7374 kmp_root_t *root; 7375 7376 /* this is a very important step as it will register new sibling threads 7377 * and assign these new uber threads a new gtid */ 7378 gtid = __kmp_entry_gtid(); 7379 root = __kmp_threads[ gtid ]->th.th_root; 7380 KMP_ASSERT( KMP_UBER_GTID( gtid )); 7381 7382 if( root->r.r_begin ) return; 7383 __kmp_acquire_lock( &root->r.r_begin_lock, gtid ); 7384 if( root->r.r_begin ) { 7385 __kmp_release_lock( & root->r.r_begin_lock, gtid ); 7386 return; 7387 } 7388 7389 root->r.r_begin = TRUE; 7390 7391 __kmp_release_lock( & root->r.r_begin_lock, gtid ); 7392 } 7393 7394 7395 /* ------------------------------------------------------------------------ */ 7396 /* ------------------------------------------------------------------------ */ 7397 7398 void 7399 __kmp_user_set_library (enum library_type arg) 7400 { 7401 int gtid; 7402 kmp_root_t *root; 7403 kmp_info_t *thread; 7404 7405 /* first, make sure we are initialized so we can get our gtid */ 7406 7407 gtid = __kmp_entry_gtid(); 7408 thread = __kmp_threads[ gtid ]; 7409 7410 root = thread->th.th_root; 7411 7412 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial )); 7413 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */ 7414 KMP_WARNING( SetLibraryIncorrectCall ); 7415 return; 7416 } 7417 7418 switch ( arg ) { 7419 case library_serial : 7420 thread->th.th_set_nproc = 0; 7421 set__nproc( thread, 1 ); 7422 break; 7423 case library_turnaround : 7424 thread->th.th_set_nproc = 0; 7425 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); 7426 break; 7427 case library_throughput : 7428 thread->th.th_set_nproc = 0; 7429 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); 7430 break; 7431 default: 7432 KMP_FATAL( UnknownLibraryType, arg ); 7433 } 7434 7435 __kmp_aux_set_library ( arg ); 7436 } 7437 7438 void 7439 __kmp_aux_set_stacksize( size_t arg ) 7440 { 7441 if (! __kmp_init_serial) 7442 __kmp_serial_initialize(); 7443 7444 #if KMP_OS_DARWIN 7445 if (arg & (0x1000 - 1)) { 7446 arg &= ~(0x1000 - 1); 7447 if(arg + 0x1000) /* check for overflow if we round up */ 7448 arg += 0x1000; 7449 } 7450 #endif 7451 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 7452 7453 /* only change the default stacksize before the first parallel region */ 7454 if (! TCR_4(__kmp_init_parallel)) { 7455 size_t value = arg; /* argument is in bytes */ 7456 7457 if (value < __kmp_sys_min_stksize ) 7458 value = __kmp_sys_min_stksize ; 7459 else if (value > KMP_MAX_STKSIZE) 7460 value = KMP_MAX_STKSIZE; 7461 7462 __kmp_stksize = value; 7463 7464 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 7465 } 7466 7467 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 7468 } 7469 7470 /* set the behaviour of the runtime library */ 7471 /* TODO this can cause some odd behaviour with sibling parallelism... */ 7472 void 7473 __kmp_aux_set_library (enum library_type arg) 7474 { 7475 __kmp_library = arg; 7476 7477 switch ( __kmp_library ) { 7478 case library_serial : 7479 { 7480 KMP_INFORM( LibraryIsSerial ); 7481 (void) __kmp_change_library( TRUE ); 7482 } 7483 break; 7484 case library_turnaround : 7485 (void) __kmp_change_library( TRUE ); 7486 break; 7487 case library_throughput : 7488 (void) __kmp_change_library( FALSE ); 7489 break; 7490 default: 7491 KMP_FATAL( UnknownLibraryType, arg ); 7492 } 7493 } 7494 7495 /* ------------------------------------------------------------------------ */ 7496 /* ------------------------------------------------------------------------ */ 7497 7498 void 7499 __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid) 7500 { 7501 int blocktime = arg; /* argument is in milliseconds */ 7502 #if KMP_USE_MONITOR 7503 int bt_intervals; 7504 #endif 7505 int bt_set; 7506 7507 __kmp_save_internal_controls( thread ); 7508 7509 /* Normalize and set blocktime for the teams */ 7510 if (blocktime < KMP_MIN_BLOCKTIME) 7511 blocktime = KMP_MIN_BLOCKTIME; 7512 else if (blocktime > KMP_MAX_BLOCKTIME) 7513 blocktime = KMP_MAX_BLOCKTIME; 7514 7515 set__blocktime_team( thread->th.th_team, tid, blocktime ); 7516 set__blocktime_team( thread->th.th_serial_team, 0, blocktime ); 7517 7518 #if KMP_USE_MONITOR 7519 /* Calculate and set blocktime intervals for the teams */ 7520 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 7521 7522 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals ); 7523 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals ); 7524 #endif 7525 7526 /* Set whether blocktime has been set to "TRUE" */ 7527 bt_set = TRUE; 7528 7529 set__bt_set_team( thread->th.th_team, tid, bt_set ); 7530 set__bt_set_team( thread->th.th_serial_team, 0, bt_set ); 7531 #if KMP_USE_MONITOR 7532 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7533 "bt_intervals=%d, monitor_updates=%d\n", 7534 __kmp_gtid_from_tid(tid, thread->th.th_team), 7535 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 7536 __kmp_monitor_wakeups)); 7537 #else 7538 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 7539 __kmp_gtid_from_tid(tid, thread->th.th_team), 7540 thread->th.th_team->t.t_id, tid, blocktime)); 7541 #endif 7542 } 7543 7544 void 7545 __kmp_aux_set_defaults( 7546 char const * str, 7547 int len 7548 ) { 7549 if ( ! __kmp_init_serial ) { 7550 __kmp_serial_initialize(); 7551 }; 7552 __kmp_env_initialize( str ); 7553 7554 if (__kmp_settings 7555 #if OMP_40_ENABLED 7556 || __kmp_display_env || __kmp_display_env_verbose 7557 #endif // OMP_40_ENABLED 7558 ) { 7559 __kmp_env_print(); 7560 } 7561 } // __kmp_aux_set_defaults 7562 7563 /* ------------------------------------------------------------------------ */ 7564 7565 /* 7566 * internal fast reduction routines 7567 */ 7568 7569 PACKED_REDUCTION_METHOD_T 7570 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, 7571 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 7572 kmp_critical_name *lck ) 7573 { 7574 7575 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT ) 7576 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL 7577 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL 7578 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT. 7579 7580 PACKED_REDUCTION_METHOD_T retval; 7581 7582 int team_size; 7583 7584 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 ) 7585 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 ) 7586 7587 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 7588 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) 7589 7590 retval = critical_reduce_block; 7591 7592 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower 7593 7594 if( team_size == 1 ) { 7595 7596 retval = empty_reduce_block; 7597 7598 } else { 7599 7600 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7601 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7602 7603 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7604 7605 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7606 7607 int teamsize_cutoff = 4; 7608 7609 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7610 if( __kmp_mic_type != non_mic ) { 7611 teamsize_cutoff = 8; 7612 } 7613 #endif 7614 if( tree_available ) { 7615 if( team_size <= teamsize_cutoff ) { 7616 if ( atomic_available ) { 7617 retval = atomic_reduce_block; 7618 } 7619 } else { 7620 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7621 } 7622 } else if ( atomic_available ) { 7623 retval = atomic_reduce_block; 7624 } 7625 #else 7626 #error "Unknown or unsupported OS" 7627 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7628 7629 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7630 7631 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7632 7633 // basic tuning 7634 7635 if( atomic_available ) { 7636 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ??? 7637 retval = atomic_reduce_block; 7638 } 7639 } // otherwise: use critical section 7640 7641 #elif KMP_OS_DARWIN 7642 7643 if( atomic_available && ( num_vars <= 3 ) ) { 7644 retval = atomic_reduce_block; 7645 } else if( tree_available ) { 7646 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) { 7647 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 7648 } 7649 } // otherwise: use critical section 7650 7651 #else 7652 #error "Unknown or unsupported OS" 7653 #endif 7654 7655 #else 7656 #error "Unknown or unsupported architecture" 7657 #endif 7658 7659 } 7660 7661 // KMP_FORCE_REDUCTION 7662 7663 // If the team is serialized (team_size == 1), ignore the forced reduction 7664 // method and stay with the unsynchronized method (empty_reduce_block) 7665 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) { 7666 7667 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 7668 7669 int atomic_available, tree_available; 7670 7671 switch( ( forced_retval = __kmp_force_reduction_method ) ) 7672 { 7673 case critical_reduce_block: 7674 KMP_ASSERT( lck ); // lck should be != 0 7675 break; 7676 7677 case atomic_reduce_block: 7678 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7679 if( ! atomic_available ) { 7680 KMP_WARNING(RedMethodNotSupported, "atomic"); 7681 forced_retval = critical_reduce_block; 7682 } 7683 break; 7684 7685 case tree_reduce_block: 7686 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7687 if( ! tree_available ) { 7688 KMP_WARNING(RedMethodNotSupported, "tree"); 7689 forced_retval = critical_reduce_block; 7690 } else { 7691 #if KMP_FAST_REDUCTION_BARRIER 7692 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7693 #endif 7694 } 7695 break; 7696 7697 default: 7698 KMP_ASSERT( 0 ); // "unsupported method specified" 7699 } 7700 7701 retval = forced_retval; 7702 } 7703 7704 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) ); 7705 7706 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7707 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7708 7709 return ( retval ); 7710 } 7711 7712 // this function is for testing set/get/determine reduce method 7713 kmp_int32 7714 __kmp_get_reduce_method( void ) { 7715 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 ); 7716 } 7717 7718 /* ------------------------------------------------------------------------ */ 7719