1 /* 2 * kmp_runtime.cpp -- KPTS runtime support library 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp.h" 17 #include "kmp_atomic.h" 18 #include "kmp_wrapper_getpid.h" 19 #include "kmp_environment.h" 20 #include "kmp_itt.h" 21 #include "kmp_str.h" 22 #include "kmp_settings.h" 23 #include "kmp_i18n.h" 24 #include "kmp_io.h" 25 #include "kmp_error.h" 26 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 28 #include "kmp_affinity.h" 29 30 #if OMPT_SUPPORT 31 #include "ompt-specific.h" 32 #endif 33 34 /* these are temporary issues to be dealt with */ 35 #define KMP_USE_PRCTL 0 36 37 #if KMP_OS_WINDOWS 38 #include <process.h> 39 #endif 40 41 #include "tsan_annotations.h" 42 43 #if defined(KMP_GOMP_COMPAT) 44 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; 45 #endif /* defined(KMP_GOMP_COMPAT) */ 46 47 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " 48 #if OMP_45_ENABLED 49 "4.5 (201511)"; 50 #elif OMP_40_ENABLED 51 "4.0 (201307)"; 52 #else 53 "3.1 (201107)"; 54 #endif 55 56 #ifdef KMP_DEBUG 57 char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; 58 #endif /* KMP_DEBUG */ 59 60 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 61 62 /* ------------------------------------------------------------------------ */ 63 /* ------------------------------------------------------------------------ */ 64 65 kmp_info_t __kmp_monitor; 66 67 /* ------------------------------------------------------------------------ */ 68 /* ------------------------------------------------------------------------ */ 69 70 /* Forward declarations */ 71 72 void __kmp_cleanup( void ); 73 74 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); 75 static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); 76 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 77 static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 ); 78 #endif 79 static void __kmp_do_serial_initialize( void ); 80 void __kmp_fork_barrier( int gtid, int tid ); 81 void __kmp_join_barrier( int gtid ); 82 void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); 83 84 #ifdef USE_LOAD_BALANCE 85 static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); 86 #endif 87 88 static int __kmp_expand_threads(int nWish, int nNeed); 89 #if KMP_OS_WINDOWS 90 static int __kmp_unregister_root_other_thread( int gtid ); 91 #endif 92 static void __kmp_unregister_library( void ); // called by __kmp_internal_end() 93 static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); 94 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; 95 96 /* ------------------------------------------------------------------------ */ 97 /* ------------------------------------------------------------------------ */ 98 99 /* Calculate the identifier of the current thread */ 100 /* fast (and somewhat portable) way to get unique */ 101 /* identifier of executing thread. */ 102 /* returns KMP_GTID_DNE if we haven't been assigned a gtid */ 103 104 int 105 __kmp_get_global_thread_id( ) 106 { 107 int i; 108 kmp_info_t **other_threads; 109 size_t stack_data; 110 char *stack_addr; 111 size_t stack_size; 112 char *stack_base; 113 114 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", 115 __kmp_nth, __kmp_all_nth )); 116 117 /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a 118 parallel region, made it return KMP_GTID_DNE to force serial_initialize by 119 caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee 120 __kmp_init_gtid for this to work. */ 121 122 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; 123 124 #ifdef KMP_TDATA_GTID 125 if ( TCR_4(__kmp_gtid_mode) >= 3) { 126 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); 127 return __kmp_gtid; 128 } 129 #endif 130 if ( TCR_4(__kmp_gtid_mode) >= 2) { 131 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); 132 return __kmp_gtid_get_specific(); 133 } 134 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); 135 136 stack_addr = (char*) & stack_data; 137 other_threads = __kmp_threads; 138 139 /* 140 ATT: The code below is a source of potential bugs due to unsynchronized access to 141 __kmp_threads array. For example: 142 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. 143 2. Current thread is suspended by OS. 144 3. Another thread unregisters and finishes (debug versions of free() may fill memory 145 with something like 0xEF). 146 4. Current thread is resumed. 147 5. Current thread reads junk from *thr. 148 TODO: Fix it. 149 --ln 150 */ 151 152 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { 153 154 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); 155 if( !thr ) continue; 156 157 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); 158 stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); 159 160 /* stack grows down -- search through all of the active threads */ 161 162 if( stack_addr <= stack_base ) { 163 size_t stack_diff = stack_base - stack_addr; 164 165 if( stack_diff <= stack_size ) { 166 /* The only way we can be closer than the allocated */ 167 /* stack size is if we are running on this thread. */ 168 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); 169 return i; 170 } 171 } 172 } 173 174 /* get specific to try and determine our gtid */ 175 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " 176 "thread, using TLS\n" )); 177 i = __kmp_gtid_get_specific(); 178 179 /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ 180 181 /* if we havn't been assigned a gtid, then return code */ 182 if( i<0 ) return i; 183 184 /* dynamically updated stack window for uber threads to avoid get_specific call */ 185 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { 186 KMP_FATAL( StackOverflow, i ); 187 } 188 189 stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; 190 if( stack_addr > stack_base ) { 191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); 192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, 193 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); 194 } else { 195 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); 196 } 197 198 /* Reprint stack bounds for ubermaster since they have been refined */ 199 if ( __kmp_storage_map ) { 200 char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; 201 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; 202 __kmp_print_storage_map_gtid( i, stack_beg, stack_end, 203 other_threads[i]->th.th_info.ds.ds_stacksize, 204 "th_%d stack (refinement)", i ); 205 } 206 return i; 207 } 208 209 int 210 __kmp_get_global_thread_id_reg( ) 211 { 212 int gtid; 213 214 if ( !__kmp_init_serial ) { 215 gtid = KMP_GTID_DNE; 216 } else 217 #ifdef KMP_TDATA_GTID 218 if ( TCR_4(__kmp_gtid_mode) >= 3 ) { 219 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); 220 gtid = __kmp_gtid; 221 } else 222 #endif 223 if ( TCR_4(__kmp_gtid_mode) >= 2 ) { 224 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); 225 gtid = __kmp_gtid_get_specific(); 226 } else { 227 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); 228 gtid = __kmp_get_global_thread_id(); 229 } 230 231 /* we must be a new uber master sibling thread */ 232 if( gtid == KMP_GTID_DNE ) { 233 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " 234 "Registering a new gtid.\n" )); 235 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 236 if( !__kmp_init_serial ) { 237 __kmp_do_serial_initialize(); 238 gtid = __kmp_gtid_get_specific(); 239 } else { 240 gtid = __kmp_register_root(FALSE); 241 } 242 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 243 /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ 244 } 245 246 KMP_DEBUG_ASSERT( gtid >=0 ); 247 248 return gtid; 249 } 250 251 /* caller must hold forkjoin_lock */ 252 void 253 __kmp_check_stack_overlap( kmp_info_t *th ) 254 { 255 int f; 256 char *stack_beg = NULL; 257 char *stack_end = NULL; 258 int gtid; 259 260 KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); 261 if ( __kmp_storage_map ) { 262 stack_end = (char *) th->th.th_info.ds.ds_stackbase; 263 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 264 265 gtid = __kmp_gtid_from_thread( th ); 266 267 if (gtid == KMP_GTID_MONITOR) { 268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 269 "th_%s stack (%s)", "mon", 270 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); 271 } else { 272 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, 273 "th_%d stack (%s)", gtid, 274 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); 275 } 276 } 277 278 /* No point in checking ubermaster threads since they use refinement and cannot overlap */ 279 gtid = __kmp_gtid_from_thread( th ); 280 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) 281 { 282 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); 283 if ( stack_beg == NULL ) { 284 stack_end = (char *) th->th.th_info.ds.ds_stackbase; 285 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; 286 } 287 288 for( f=0 ; f < __kmp_threads_capacity ; f++ ) { 289 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); 290 291 if( f_th && f_th != th ) { 292 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); 293 char *other_stack_beg = other_stack_end - 294 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); 295 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || 296 (stack_end > other_stack_beg && stack_end < other_stack_end)) { 297 298 /* Print the other stack values before the abort */ 299 if ( __kmp_storage_map ) 300 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, 301 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), 302 "th_%d stack (overlapped)", 303 __kmp_gtid_from_thread( f_th ) ); 304 305 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); 306 } 307 } 308 } 309 } 310 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); 311 } 312 313 314 /* ------------------------------------------------------------------------ */ 315 316 /* ------------------------------------------------------------------------ */ 317 318 void 319 __kmp_infinite_loop( void ) 320 { 321 static int done = FALSE; 322 323 while (! done) { 324 KMP_YIELD( 1 ); 325 } 326 } 327 328 #define MAX_MESSAGE 512 329 330 void 331 __kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { 332 char buffer[MAX_MESSAGE]; 333 va_list ap; 334 335 va_start( ap, format); 336 KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); 337 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 338 __kmp_vprintf( kmp_err, buffer, ap ); 339 #if KMP_PRINT_DATA_PLACEMENT 340 int node; 341 if(gtid >= 0) { 342 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { 343 if( __kmp_storage_map_verbose ) { 344 node = __kmp_get_host_node(p1); 345 if(node < 0) /* doesn't work, so don't try this next time */ 346 __kmp_storage_map_verbose = FALSE; 347 else { 348 char *last; 349 int lastNode; 350 int localProc = __kmp_get_cpu_from_gtid(gtid); 351 352 const int page_size = KMP_GET_PAGE_SIZE(); 353 354 p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) ); 355 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) ); 356 if(localProc >= 0) 357 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1); 358 else 359 __kmp_printf_no_lock(" GTID %d\n", gtid); 360 # if KMP_USE_PRCTL 361 /* The more elaborate format is disabled for now because of the prctl hanging bug. */ 362 do { 363 last = p1; 364 lastNode = node; 365 /* This loop collates adjacent pages with the same host node. */ 366 do { 367 (char*)p1 += page_size; 368 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); 369 __kmp_printf_no_lock(" %p-%p memNode %d\n", last, 370 (char*)p1 - 1, lastNode); 371 } while(p1 <= p2); 372 # else 373 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, 374 (char*)p1 + (page_size - 1), __kmp_get_host_node(p1)); 375 if(p1 < p2) { 376 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, 377 (char*)p2 + (page_size - 1), __kmp_get_host_node(p2)); 378 } 379 # endif 380 } 381 } 382 } else 383 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) ); 384 } 385 #endif /* KMP_PRINT_DATA_PLACEMENT */ 386 __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 387 } 388 389 void 390 __kmp_warn( char const * format, ... ) 391 { 392 char buffer[MAX_MESSAGE]; 393 va_list ap; 394 395 if ( __kmp_generate_warnings == kmp_warnings_off ) { 396 return; 397 } 398 399 va_start( ap, format ); 400 401 KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); 402 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); 403 __kmp_vprintf( kmp_err, buffer, ap ); 404 __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); 405 406 va_end( ap ); 407 } 408 409 void 410 __kmp_abort_process() 411 { 412 413 // Later threads may stall here, but that's ok because abort() will kill them. 414 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); 415 416 if ( __kmp_debug_buf ) { 417 __kmp_dump_debug_buffer(); 418 }; // if 419 420 if ( KMP_OS_WINDOWS ) { 421 // Let other threads know of abnormal termination and prevent deadlock 422 // if abort happened during library initialization or shutdown 423 __kmp_global.g.g_abort = SIGABRT; 424 425 /* 426 On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. 427 Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() 428 works well, but this function is not available in VS7 (this is not problem for DLL, but 429 it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does 430 not help, at least in some versions of MS C RTL. 431 432 It seems following sequence is the only way to simulate abort() and avoid pop-up error 433 box. 434 */ 435 raise( SIGABRT ); 436 _exit( 3 ); // Just in case, if signal ignored, exit anyway. 437 } else { 438 abort(); 439 }; // if 440 441 __kmp_infinite_loop(); 442 __kmp_release_bootstrap_lock( & __kmp_exit_lock ); 443 444 } // __kmp_abort_process 445 446 void 447 __kmp_abort_thread( void ) 448 { 449 // TODO: Eliminate g_abort global variable and this function. 450 // In case of abort just call abort(), it will kill all the threads. 451 __kmp_infinite_loop(); 452 } // __kmp_abort_thread 453 454 /* ------------------------------------------------------------------------ */ 455 456 /* 457 * Print out the storage map for the major kmp_info_t thread data structures 458 * that are allocated together. 459 */ 460 461 static void 462 __kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) 463 { 464 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); 465 466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), 467 "th_%d.th_info", gtid ); 468 469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), 470 "th_%d.th_local", gtid ); 471 472 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], 473 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); 474 475 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], 476 &thr->th.th_bar[bs_plain_barrier+1], 477 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); 478 479 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], 480 &thr->th.th_bar[bs_forkjoin_barrier+1], 481 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); 482 483 #if KMP_FAST_REDUCTION_BARRIER 484 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], 485 &thr->th.th_bar[bs_reduction_barrier+1], 486 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); 487 #endif // KMP_FAST_REDUCTION_BARRIER 488 } 489 490 /* 491 * Print out the storage map for the major kmp_team_t team data structures 492 * that are allocated together. 493 */ 494 495 static void 496 __kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) 497 { 498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; 499 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", 500 header, team_id ); 501 502 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], 503 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); 504 505 506 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], 507 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); 508 509 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], 510 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); 511 512 #if KMP_FAST_REDUCTION_BARRIER 513 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], 514 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); 515 #endif // KMP_FAST_REDUCTION_BARRIER 516 517 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], 518 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); 519 520 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], 521 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); 522 523 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], 524 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", 525 header, team_id ); 526 527 528 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, 529 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); 530 } 531 532 static void __kmp_init_allocator() {} 533 static void __kmp_fini_allocator() {} 534 535 /* ------------------------------------------------------------------------ */ 536 537 #ifdef KMP_DYNAMIC_LIB 538 # if KMP_OS_WINDOWS 539 540 static void 541 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { 542 // TODO: Change to __kmp_break_bootstrap_lock(). 543 __kmp_init_bootstrap_lock( lck ); // make the lock released 544 } 545 546 static void 547 __kmp_reset_locks_on_process_detach( int gtid_req ) { 548 int i; 549 int thread_count; 550 551 // PROCESS_DETACH is expected to be called by a thread 552 // that executes ProcessExit() or FreeLibrary(). 553 // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). 554 // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. 555 // However, in fact, some threads can be still alive here, although being about to be terminated. 556 // The threads in the array with ds_thread==0 are most suspicious. 557 // Actually, it can be not safe to access the __kmp_threads[]. 558 559 // TODO: does it make sense to check __kmp_roots[] ? 560 561 // Let's check that there are no other alive threads registered with the OMP lib. 562 while( 1 ) { 563 thread_count = 0; 564 for( i = 0; i < __kmp_threads_capacity; ++i ) { 565 if( !__kmp_threads ) continue; 566 kmp_info_t* th = __kmp_threads[ i ]; 567 if( th == NULL ) continue; 568 int gtid = th->th.th_info.ds.ds_gtid; 569 if( gtid == gtid_req ) continue; 570 if( gtid < 0 ) continue; 571 DWORD exit_val; 572 int alive = __kmp_is_thread_alive( th, &exit_val ); 573 if( alive ) { 574 ++thread_count; 575 } 576 } 577 if( thread_count == 0 ) break; // success 578 } 579 580 // Assume that I'm alone. 581 582 // Now it might be probably safe to check and reset locks. 583 // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. 584 __kmp_reset_lock( &__kmp_forkjoin_lock ); 585 #ifdef KMP_DEBUG 586 __kmp_reset_lock( &__kmp_stdio_lock ); 587 #endif // KMP_DEBUG 588 } 589 590 BOOL WINAPI 591 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { 592 //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 593 594 switch( fdwReason ) { 595 596 case DLL_PROCESS_ATTACH: 597 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); 598 599 return TRUE; 600 601 case DLL_PROCESS_DETACH: 602 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", 603 __kmp_gtid_get_specific() )); 604 605 if( lpReserved != NULL ) 606 { 607 // lpReserved is used for telling the difference: 608 // lpReserved == NULL when FreeLibrary() was called, 609 // lpReserved != NULL when the process terminates. 610 // When FreeLibrary() is called, worker threads remain alive. 611 // So they will release the forkjoin lock by themselves. 612 // When the process terminates, worker threads disappear triggering 613 // the problem of unreleased forkjoin lock as described below. 614 615 // A worker thread can take the forkjoin lock. 616 // The problem comes up if that worker thread becomes dead 617 // before it releases the forkjoin lock. 618 // The forkjoin lock remains taken, while the thread 619 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below 620 // will try to take the forkjoin lock and will always fail, 621 // so that the application will never finish [normally]. 622 // This scenario is possible if __kmpc_end() has not been executed. 623 // It looks like it's not a corner case, but common cases: 624 // - the main function was compiled by an alternative compiler; 625 // - the main function was compiled by icl but without /Qopenmp (application with plugins); 626 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. 627 // - alive foreign thread prevented __kmpc_end from doing cleanup. 628 629 // This is a hack to work around the problem. 630 // TODO: !!! to figure out something better. 631 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); 632 } 633 634 __kmp_internal_end_library( __kmp_gtid_get_specific() ); 635 636 return TRUE; 637 638 case DLL_THREAD_ATTACH: 639 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); 640 641 /* if we wanted to register new siblings all the time here call 642 * __kmp_get_gtid(); */ 643 return TRUE; 644 645 case DLL_THREAD_DETACH: 646 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", 647 __kmp_gtid_get_specific() )); 648 649 __kmp_internal_end_thread( __kmp_gtid_get_specific() ); 650 return TRUE; 651 } 652 653 return TRUE; 654 } 655 656 # endif /* KMP_OS_WINDOWS */ 657 #endif /* KMP_DYNAMIC_LIB */ 658 659 660 /* ------------------------------------------------------------------------ */ 661 662 /* Change the library type to "status" and return the old type */ 663 /* called from within initialization routines where __kmp_initz_lock is held */ 664 int 665 __kmp_change_library( int status ) 666 { 667 int old_status; 668 669 old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count) 670 671 if (status) { 672 __kmp_yield_init |= 1; // throughput => turnaround (odd init count) 673 } 674 else { 675 __kmp_yield_init &= ~1; // turnaround => throughput (even init count) 676 } 677 678 return old_status; // return previous setting of whether KMP_LIBRARY=throughput 679 } 680 681 /* ------------------------------------------------------------------------ */ 682 /* ------------------------------------------------------------------------ */ 683 684 /* __kmp_parallel_deo -- 685 * Wait until it's our turn. 686 */ 687 void 688 __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 689 { 690 int gtid = *gtid_ref; 691 #ifdef BUILD_PARALLEL_ORDERED 692 kmp_team_t *team = __kmp_team_from_gtid( gtid ); 693 #endif /* BUILD_PARALLEL_ORDERED */ 694 695 if( __kmp_env_consistency_check ) { 696 if( __kmp_threads[gtid]->th.th_root->r.r_active ) 697 #if KMP_USE_DYNAMIC_LOCK 698 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); 699 #else 700 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); 701 #endif 702 } 703 #ifdef BUILD_PARALLEL_ORDERED 704 if( !team->t.t_serialized ) { 705 KMP_MB(); 706 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); 707 KMP_MB(); 708 } 709 #endif /* BUILD_PARALLEL_ORDERED */ 710 } 711 712 /* __kmp_parallel_dxo -- 713 * Signal the next task. 714 */ 715 716 void 717 __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) 718 { 719 int gtid = *gtid_ref; 720 #ifdef BUILD_PARALLEL_ORDERED 721 int tid = __kmp_tid_from_gtid( gtid ); 722 kmp_team_t *team = __kmp_team_from_gtid( gtid ); 723 #endif /* BUILD_PARALLEL_ORDERED */ 724 725 if( __kmp_env_consistency_check ) { 726 if( __kmp_threads[gtid]->th.th_root->r.r_active ) 727 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); 728 } 729 #ifdef BUILD_PARALLEL_ORDERED 730 if ( ! team->t.t_serialized ) { 731 KMP_MB(); /* Flush all pending memory write invalidates. */ 732 733 /* use the tid of the next thread in this team */ 734 /* TODO repleace with general release procedure */ 735 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); 736 737 #if OMPT_SUPPORT && OMPT_BLAME 738 if (ompt_enabled && 739 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { 740 /* accept blame for "ordered" waiting */ 741 kmp_info_t *this_thread = __kmp_threads[gtid]; 742 ompt_callbacks.ompt_callback(ompt_event_release_ordered)( 743 this_thread->th.ompt_thread_info.wait_id); 744 } 745 #endif 746 747 KMP_MB(); /* Flush all pending memory write invalidates. */ 748 } 749 #endif /* BUILD_PARALLEL_ORDERED */ 750 } 751 752 /* ------------------------------------------------------------------------ */ 753 /* ------------------------------------------------------------------------ */ 754 755 /* ------------------------------------------------------------------------ */ 756 /* ------------------------------------------------------------------------ */ 757 758 /* The BARRIER for a SINGLE process section is always explicit */ 759 760 int 761 __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) 762 { 763 int status; 764 kmp_info_t *th; 765 kmp_team_t *team; 766 767 if( ! TCR_4(__kmp_init_parallel) ) 768 __kmp_parallel_initialize(); 769 770 th = __kmp_threads[ gtid ]; 771 team = th->th.th_team; 772 status = 0; 773 774 th->th.th_ident = id_ref; 775 776 if ( team->t.t_serialized ) { 777 status = 1; 778 } else { 779 kmp_int32 old_this = th->th.th_local.this_construct; 780 781 ++th->th.th_local.this_construct; 782 /* try to set team count to thread count--success means thread got the 783 single block 784 */ 785 /* TODO: Should this be acquire or release? */ 786 if (team->t.t_construct == old_this) { 787 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, 788 th->th.th_local.this_construct); 789 } 790 #if USE_ITT_BUILD 791 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && 792 #if OMP_40_ENABLED 793 th->th.th_teams_microtask == NULL && 794 #endif 795 team->t.t_active_level == 1 ) 796 { // Only report metadata by master of active team at level 1 797 __kmp_itt_metadata_single( id_ref ); 798 } 799 #endif /* USE_ITT_BUILD */ 800 } 801 802 if( __kmp_env_consistency_check ) { 803 if (status && push_ws) { 804 __kmp_push_workshare( gtid, ct_psingle, id_ref ); 805 } else { 806 __kmp_check_workshare( gtid, ct_psingle, id_ref ); 807 } 808 } 809 #if USE_ITT_BUILD 810 if ( status ) { 811 __kmp_itt_single_start( gtid ); 812 } 813 #endif /* USE_ITT_BUILD */ 814 return status; 815 } 816 817 void 818 __kmp_exit_single( int gtid ) 819 { 820 #if USE_ITT_BUILD 821 __kmp_itt_single_end( gtid ); 822 #endif /* USE_ITT_BUILD */ 823 if( __kmp_env_consistency_check ) 824 __kmp_pop_workshare( gtid, ct_psingle, NULL ); 825 } 826 827 828 /* 829 * determine if we can go parallel or must use a serialized parallel region and 830 * how many threads we can use 831 * set_nproc is the number of threads requested for the team 832 * returns 0 if we should serialize or only use one thread, 833 * otherwise the number of threads to use 834 * The forkjoin lock is held by the caller. 835 */ 836 static int 837 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, 838 int master_tid, int set_nthreads 839 #if OMP_40_ENABLED 840 , int enter_teams 841 #endif /* OMP_40_ENABLED */ 842 ) 843 { 844 int capacity; 845 int new_nthreads; 846 KMP_DEBUG_ASSERT( __kmp_init_serial ); 847 KMP_DEBUG_ASSERT( root && parent_team ); 848 849 // 850 // If dyn-var is set, dynamically adjust the number of desired threads, 851 // according to the method specified by dynamic_mode. 852 // 853 new_nthreads = set_nthreads; 854 if ( ! get__dynamic_2( parent_team, master_tid ) ) { 855 ; 856 } 857 #ifdef USE_LOAD_BALANCE 858 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { 859 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); 860 if ( new_nthreads == 1 ) { 861 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", 862 master_tid )); 863 return 1; 864 } 865 if ( new_nthreads < set_nthreads ) { 866 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", 867 master_tid, new_nthreads )); 868 } 869 } 870 #endif /* USE_LOAD_BALANCE */ 871 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { 872 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 873 : root->r.r_hot_team->t.t_nproc); 874 if ( new_nthreads <= 1 ) { 875 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", 876 master_tid )); 877 return 1; 878 } 879 if ( new_nthreads < set_nthreads ) { 880 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", 881 master_tid, new_nthreads )); 882 } 883 else { 884 new_nthreads = set_nthreads; 885 } 886 } 887 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { 888 if ( set_nthreads > 2 ) { 889 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); 890 new_nthreads = ( new_nthreads % set_nthreads ) + 1; 891 if ( new_nthreads == 1 ) { 892 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", 893 master_tid )); 894 return 1; 895 } 896 if ( new_nthreads < set_nthreads ) { 897 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", 898 master_tid, new_nthreads )); 899 } 900 } 901 } 902 else { 903 KMP_ASSERT( 0 ); 904 } 905 906 // 907 // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. 908 // 909 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 910 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { 911 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : 912 root->r.r_hot_team->t.t_nproc ); 913 if ( tl_nthreads <= 0 ) { 914 tl_nthreads = 1; 915 } 916 917 // 918 // If dyn-var is false, emit a 1-time warning. 919 // 920 if ( ! get__dynamic_2( parent_team, master_tid ) 921 && ( ! __kmp_reserve_warn ) ) { 922 __kmp_reserve_warn = 1; 923 __kmp_msg( 924 kmp_ms_warning, 925 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), 926 KMP_HNT( Unset_ALL_THREADS ), 927 __kmp_msg_null 928 ); 929 } 930 if ( tl_nthreads == 1 ) { 931 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", 932 master_tid )); 933 return 1; 934 } 935 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", 936 master_tid, tl_nthreads )); 937 new_nthreads = tl_nthreads; 938 } 939 940 // 941 // Check if the threads array is large enough, or needs expanding. 942 // 943 // See comment in __kmp_register_root() about the adjustment if 944 // __kmp_threads[0] == NULL. 945 // 946 capacity = __kmp_threads_capacity; 947 if ( TCR_PTR(__kmp_threads[0]) == NULL ) { 948 --capacity; 949 } 950 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 951 root->r.r_hot_team->t.t_nproc ) > capacity ) { 952 // 953 // Expand the threads array. 954 // 955 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : 956 root->r.r_hot_team->t.t_nproc ) - capacity; 957 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); 958 if ( slotsAdded < slotsRequired ) { 959 // 960 // The threads array was not expanded enough. 961 // 962 new_nthreads -= ( slotsRequired - slotsAdded ); 963 KMP_ASSERT( new_nthreads >= 1 ); 964 965 // 966 // If dyn-var is false, emit a 1-time warning. 967 // 968 if ( ! get__dynamic_2( parent_team, master_tid ) 969 && ( ! __kmp_reserve_warn ) ) { 970 __kmp_reserve_warn = 1; 971 if ( __kmp_tp_cached ) { 972 __kmp_msg( 973 kmp_ms_warning, 974 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), 975 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), 976 KMP_HNT( PossibleSystemLimitOnThreads ), 977 __kmp_msg_null 978 ); 979 } 980 else { 981 __kmp_msg( 982 kmp_ms_warning, 983 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), 984 KMP_HNT( SystemLimitOnThreads ), 985 __kmp_msg_null 986 ); 987 } 988 } 989 } 990 } 991 992 if ( new_nthreads == 1 ) { 993 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", 994 __kmp_get_gtid(), set_nthreads ) ); 995 return 1; 996 } 997 998 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", 999 __kmp_get_gtid(), new_nthreads, set_nthreads )); 1000 return new_nthreads; 1001 } 1002 1003 /* ------------------------------------------------------------------------ */ 1004 /* ------------------------------------------------------------------------ */ 1005 1006 /* allocate threads from the thread pool and assign them to the new team */ 1007 /* we are assured that there are enough threads available, because we 1008 * checked on that earlier within critical section forkjoin */ 1009 1010 static void 1011 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, 1012 kmp_info_t *master_th, int master_gtid ) 1013 { 1014 int i; 1015 int use_hot_team; 1016 1017 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); 1018 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); 1019 KMP_MB(); 1020 1021 /* first, let's setup the master thread */ 1022 master_th->th.th_info.ds.ds_tid = 0; 1023 master_th->th.th_team = team; 1024 master_th->th.th_team_nproc = team->t.t_nproc; 1025 master_th->th.th_team_master = master_th; 1026 master_th->th.th_team_serialized = FALSE; 1027 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ]; 1028 1029 /* make sure we are not the optimized hot team */ 1030 #if KMP_NESTED_HOT_TEAMS 1031 use_hot_team = 0; 1032 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; 1033 if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 1034 int level = team->t.t_active_level - 1; // index in array of hot teams 1035 if( master_th->th.th_teams_microtask ) { // are we inside the teams? 1036 if( master_th->th.th_teams_size.nteams > 1 ) { 1037 ++level; // level was not increased in teams construct for team_of_masters 1038 } 1039 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && 1040 master_th->th.th_teams_level == team->t.t_level ) { 1041 ++level; // level was not increased in teams construct for team_of_workers before the parallel 1042 } // team->t.t_level will be increased inside parallel 1043 } 1044 if( level < __kmp_hot_teams_max_level ) { 1045 if( hot_teams[level].hot_team ) { 1046 // hot team has already been allocated for given level 1047 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); 1048 use_hot_team = 1; // the team is ready to use 1049 } else { 1050 use_hot_team = 0; // AC: threads are not allocated yet 1051 hot_teams[level].hot_team = team; // remember new hot team 1052 hot_teams[level].hot_team_nth = team->t.t_nproc; 1053 } 1054 } else { 1055 use_hot_team = 0; 1056 } 1057 } 1058 #else 1059 use_hot_team = team == root->r.r_hot_team; 1060 #endif 1061 if ( !use_hot_team ) { 1062 1063 /* install the master thread */ 1064 team->t.t_threads[ 0 ] = master_th; 1065 __kmp_initialize_info( master_th, team, 0, master_gtid ); 1066 1067 /* now, install the worker threads */ 1068 for ( i=1 ; i < team->t.t_nproc ; i++ ) { 1069 1070 /* fork or reallocate a new thread and install it in team */ 1071 kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); 1072 team->t.t_threads[ i ] = thr; 1073 KMP_DEBUG_ASSERT( thr ); 1074 KMP_DEBUG_ASSERT( thr->th.th_team == team ); 1075 /* align team and thread arrived states */ 1076 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n", 1077 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, 1078 __kmp_gtid_from_tid( i, team ), team->t.t_id, i, 1079 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, 1080 team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); 1081 #if OMP_40_ENABLED 1082 thr->th.th_teams_microtask = master_th->th.th_teams_microtask; 1083 thr->th.th_teams_level = master_th->th.th_teams_level; 1084 thr->th.th_teams_size = master_th->th.th_teams_size; 1085 #endif 1086 { // Initialize threads' barrier data. 1087 int b; 1088 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; 1089 for ( b = 0; b < bs_last_barrier; ++ b ) { 1090 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 1091 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 1092 #if USE_DEBUGGER 1093 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 1094 #endif 1095 }; // for b 1096 } 1097 } 1098 1099 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1100 __kmp_partition_places( team ); 1101 #endif 1102 1103 } 1104 1105 KMP_MB(); 1106 } 1107 1108 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1109 // 1110 // Propagate any changes to the floating point control registers out to the team 1111 // We try to avoid unnecessary writes to the relevant cache line in the team structure, 1112 // so we don't make changes unless they are needed. 1113 // 1114 inline static void 1115 propagateFPControl(kmp_team_t * team) 1116 { 1117 if ( __kmp_inherit_fp_control ) { 1118 kmp_int16 x87_fpu_control_word; 1119 kmp_uint32 mxcsr; 1120 1121 // Get master values of FPU control flags (both X87 and vector) 1122 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); 1123 __kmp_store_mxcsr( &mxcsr ); 1124 mxcsr &= KMP_X86_MXCSR_MASK; 1125 1126 // There is no point looking at t_fp_control_saved here. 1127 // If it is TRUE, we still have to update the values if they are different from those we now have. 1128 // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure 1129 // that the values in the team are the same as those we have. 1130 // So, this code achieves what we need whether or not t_fp_control_saved is true. 1131 // By checking whether the value needs updating we avoid unnecessary writes that would put the 1132 // cache-line into a written state, causing all threads in the team to have to read it again. 1133 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); 1134 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); 1135 // Although we don't use this value, other code in the runtime wants to know whether it should restore them. 1136 // So we must ensure it is correct. 1137 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); 1138 } 1139 else { 1140 // Similarly here. Don't write to this cache-line in the team structure unless we have to. 1141 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); 1142 } 1143 } 1144 1145 // Do the opposite, setting the hardware registers to the updated values from the team. 1146 inline static void 1147 updateHWFPControl(kmp_team_t * team) 1148 { 1149 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { 1150 // 1151 // Only reset the fp control regs if they have been changed in the team. 1152 // the parallel region that we are exiting. 1153 // 1154 kmp_int16 x87_fpu_control_word; 1155 kmp_uint32 mxcsr; 1156 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); 1157 __kmp_store_mxcsr( &mxcsr ); 1158 mxcsr &= KMP_X86_MXCSR_MASK; 1159 1160 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { 1161 __kmp_clear_x87_fpu_status_word(); 1162 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); 1163 } 1164 1165 if ( team->t.t_mxcsr != mxcsr ) { 1166 __kmp_load_mxcsr( &team->t.t_mxcsr ); 1167 } 1168 } 1169 } 1170 #else 1171 # define propagateFPControl(x) ((void)0) 1172 # define updateHWFPControl(x) ((void)0) 1173 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1174 1175 static void 1176 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration 1177 1178 /* 1179 * Run a parallel region that has been serialized, so runs only in a team of the single master thread. 1180 */ 1181 void 1182 __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) 1183 { 1184 kmp_info_t *this_thr; 1185 kmp_team_t *serial_team; 1186 1187 KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); 1188 1189 /* Skip all this code for autopar serialized loops since it results in 1190 unacceptable overhead */ 1191 if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) 1192 return; 1193 1194 if( ! TCR_4( __kmp_init_parallel ) ) 1195 __kmp_parallel_initialize(); 1196 1197 this_thr = __kmp_threads[ global_tid ]; 1198 serial_team = this_thr->th.th_serial_team; 1199 1200 /* utilize the serialized team held by this thread */ 1201 KMP_DEBUG_ASSERT( serial_team ); 1202 KMP_MB(); 1203 1204 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 1205 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); 1206 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); 1207 KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", 1208 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); 1209 this_thr->th.th_task_team = NULL; 1210 } 1211 1212 #if OMP_40_ENABLED 1213 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; 1214 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { 1215 proc_bind = proc_bind_false; 1216 } 1217 else if ( proc_bind == proc_bind_default ) { 1218 // 1219 // No proc_bind clause was specified, so use the current value 1220 // of proc-bind-var for this parallel region. 1221 // 1222 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; 1223 } 1224 // 1225 // Reset for next parallel region 1226 // 1227 this_thr->th.th_set_proc_bind = proc_bind_default; 1228 #endif /* OMP_40_ENABLED */ 1229 1230 if( this_thr->th.th_team != serial_team ) { 1231 // Nested level will be an index in the nested nthreads array 1232 int level = this_thr->th.th_team->t.t_level; 1233 1234 if( serial_team->t.t_serialized ) { 1235 /* this serial team was already used 1236 * TODO increase performance by making this locks more specific */ 1237 kmp_team_t *new_team; 1238 1239 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 1240 1241 #if OMPT_SUPPORT 1242 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1243 #endif 1244 1245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, 1246 #if OMPT_SUPPORT 1247 ompt_parallel_id, 1248 #endif 1249 #if OMP_40_ENABLED 1250 proc_bind, 1251 #endif 1252 & this_thr->th.th_current_task->td_icvs, 1253 0 USE_NESTED_HOT_ARG(NULL) ); 1254 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 1255 KMP_ASSERT( new_team ); 1256 1257 /* setup new serialized team and install it */ 1258 new_team->t.t_threads[0] = this_thr; 1259 new_team->t.t_parent = this_thr->th.th_team; 1260 serial_team = new_team; 1261 this_thr->th.th_serial_team = serial_team; 1262 1263 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", 1264 global_tid, serial_team ) ); 1265 1266 1267 /* TODO the above breaks the requirement that if we run out of 1268 * resources, then we can still guarantee that serialized teams 1269 * are ok, since we may need to allocate a new one */ 1270 } else { 1271 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", 1272 global_tid, serial_team ) ); 1273 } 1274 1275 /* we have to initialize this serial team */ 1276 KMP_DEBUG_ASSERT( serial_team->t.t_threads ); 1277 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); 1278 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); 1279 serial_team->t.t_ident = loc; 1280 serial_team->t.t_serialized = 1; 1281 serial_team->t.t_nproc = 1; 1282 serial_team->t.t_parent = this_thr->th.th_team; 1283 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; 1284 this_thr->th.th_team = serial_team; 1285 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; 1286 1287 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", 1288 global_tid, this_thr->th.th_current_task ) ); 1289 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); 1290 this_thr->th.th_current_task->td_flags.executing = 0; 1291 1292 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); 1293 1294 /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for 1295 each serialized task represented by team->t.t_serialized? */ 1296 copy_icvs( 1297 & this_thr->th.th_current_task->td_icvs, 1298 & this_thr->th.th_current_task->td_parent->td_icvs ); 1299 1300 // Thread value exists in the nested nthreads array for the next nested level 1301 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { 1302 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; 1303 } 1304 1305 #if OMP_40_ENABLED 1306 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { 1307 this_thr->th.th_current_task->td_icvs.proc_bind 1308 = __kmp_nested_proc_bind.bind_types[ level + 1 ]; 1309 } 1310 #endif /* OMP_40_ENABLED */ 1311 1312 #if USE_DEBUGGER 1313 serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger. 1314 #endif 1315 this_thr->th.th_info.ds.ds_tid = 0; 1316 1317 /* set thread cache values */ 1318 this_thr->th.th_team_nproc = 1; 1319 this_thr->th.th_team_master = this_thr; 1320 this_thr->th.th_team_serialized = 1; 1321 1322 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; 1323 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; 1324 1325 propagateFPControl (serial_team); 1326 1327 /* check if we need to allocate dispatch buffers stack */ 1328 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1329 if ( !serial_team->t.t_dispatch->th_disp_buffer ) { 1330 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) 1331 __kmp_allocate( sizeof( dispatch_private_info_t ) ); 1332 } 1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1334 1335 #if OMPT_SUPPORT 1336 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); 1337 __ompt_team_assign_id(serial_team, ompt_parallel_id); 1338 #endif 1339 1340 KMP_MB(); 1341 1342 } else { 1343 /* this serialized team is already being used, 1344 * that's fine, just add another nested level */ 1345 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); 1346 KMP_DEBUG_ASSERT( serial_team->t.t_threads ); 1347 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); 1348 ++ serial_team->t.t_serialized; 1349 this_thr->th.th_team_serialized = serial_team->t.t_serialized; 1350 1351 // Nested level will be an index in the nested nthreads array 1352 int level = this_thr->th.th_team->t.t_level; 1353 // Thread value exists in the nested nthreads array for the next nested level 1354 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { 1355 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; 1356 } 1357 serial_team->t.t_level++; 1358 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", 1359 global_tid, serial_team, serial_team->t.t_level ) ); 1360 1361 /* allocate/push dispatch buffers stack */ 1362 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); 1363 { 1364 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) 1365 __kmp_allocate( sizeof( dispatch_private_info_t ) ); 1366 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; 1367 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; 1368 } 1369 this_thr->th.th_dispatch = serial_team->t.t_dispatch; 1370 1371 KMP_MB(); 1372 } 1373 1374 if ( __kmp_env_consistency_check ) 1375 __kmp_push_parallel( global_tid, NULL ); 1376 1377 } 1378 1379 /* most of the work for a fork */ 1380 /* return true if we really went parallel, false if serialized */ 1381 int 1382 __kmp_fork_call( 1383 ident_t * loc, 1384 int gtid, 1385 enum fork_context_e call_context, // Intel, GNU, ... 1386 kmp_int32 argc, 1387 #if OMPT_SUPPORT 1388 void *unwrapped_task, 1389 #endif 1390 microtask_t microtask, 1391 launch_t invoker, 1392 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1393 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1394 va_list * ap 1395 #else 1396 va_list ap 1397 #endif 1398 ) 1399 { 1400 void **argv; 1401 int i; 1402 int master_tid; 1403 int master_this_cons; 1404 kmp_team_t *team; 1405 kmp_team_t *parent_team; 1406 kmp_info_t *master_th; 1407 kmp_root_t *root; 1408 int nthreads; 1409 int master_active; 1410 int master_set_numthreads; 1411 int level; 1412 #if OMP_40_ENABLED 1413 int active_level; 1414 int teams_level; 1415 #endif 1416 #if KMP_NESTED_HOT_TEAMS 1417 kmp_hot_team_ptr_t **p_hot_teams; 1418 #endif 1419 { // KMP_TIME_BLOCK 1420 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); 1421 KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); 1422 1423 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); 1424 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) { 1425 /* Some systems prefer the stack for the root thread(s) to start with */ 1426 /* some gap from the parent stack to prevent false sharing. */ 1427 void *dummy = KMP_ALLOCA(__kmp_stkpadding); 1428 /* These 2 lines below are so this does not get optimized out */ 1429 if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) 1430 __kmp_stkpadding += (short)((kmp_int64)dummy); 1431 } 1432 1433 /* initialize if needed */ 1434 KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown 1435 if( ! TCR_4(__kmp_init_parallel) ) 1436 __kmp_parallel_initialize(); 1437 1438 /* setup current data */ 1439 master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown 1440 parent_team = master_th->th.th_team; 1441 master_tid = master_th->th.th_info.ds.ds_tid; 1442 master_this_cons = master_th->th.th_local.this_construct; 1443 root = master_th->th.th_root; 1444 master_active = root->r.r_active; 1445 master_set_numthreads = master_th->th.th_set_nproc; 1446 1447 #if OMPT_SUPPORT 1448 ompt_parallel_id_t ompt_parallel_id; 1449 ompt_task_id_t ompt_task_id; 1450 ompt_frame_t *ompt_frame; 1451 ompt_task_id_t my_task_id; 1452 ompt_parallel_id_t my_parallel_id; 1453 1454 if (ompt_enabled) { 1455 ompt_parallel_id = __ompt_parallel_id_new(gtid); 1456 ompt_task_id = __ompt_get_task_id_internal(0); 1457 ompt_frame = __ompt_get_task_frame_internal(0); 1458 } 1459 #endif 1460 1461 // Nested level will be an index in the nested nthreads array 1462 level = parent_team->t.t_level; 1463 active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed 1464 #if OMP_40_ENABLED 1465 teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams 1466 #endif 1467 #if KMP_NESTED_HOT_TEAMS 1468 p_hot_teams = &master_th->th.th_hot_teams; 1469 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { 1470 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( 1471 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); 1472 (*p_hot_teams)[0].hot_team = root->r.r_hot_team; 1473 (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) 1474 } 1475 #endif 1476 1477 #if OMPT_SUPPORT 1478 if (ompt_enabled && 1479 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { 1480 int team_size = master_set_numthreads; 1481 1482 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( 1483 ompt_task_id, ompt_frame, ompt_parallel_id, 1484 team_size, unwrapped_task, OMPT_INVOKER(call_context)); 1485 } 1486 #endif 1487 1488 master_th->th.th_ident = loc; 1489 1490 #if OMP_40_ENABLED 1491 if ( master_th->th.th_teams_microtask && 1492 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { 1493 // AC: This is start of parallel that is nested inside teams construct. 1494 // The team is actual (hot), all workers are ready at the fork barrier. 1495 // No lock needed to initialize the team a bit, then free workers. 1496 parent_team->t.t_ident = loc; 1497 __kmp_alloc_argv_entries( argc, parent_team, TRUE ); 1498 parent_team->t.t_argc = argc; 1499 argv = (void**)parent_team->t.t_argv; 1500 for( i=argc-1; i >= 0; --i ) 1501 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 1502 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1503 *argv++ = va_arg( *ap, void * ); 1504 #else 1505 *argv++ = va_arg( ap, void * ); 1506 #endif 1507 /* Increment our nested depth levels, but not increase the serialization */ 1508 if ( parent_team == master_th->th.th_serial_team ) { 1509 // AC: we are in serialized parallel 1510 __kmpc_serialized_parallel(loc, gtid); 1511 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); 1512 parent_team->t.t_serialized--; // AC: need this in order enquiry functions 1513 // work correctly, will restore at join time 1514 1515 #if OMPT_SUPPORT 1516 void *dummy; 1517 void **exit_runtime_p; 1518 1519 ompt_lw_taskteam_t lw_taskteam; 1520 1521 if (ompt_enabled) { 1522 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1523 unwrapped_task, ompt_parallel_id); 1524 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1525 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1526 1527 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1528 1529 #if OMPT_TRACE 1530 /* OMPT implicit task begin */ 1531 my_task_id = lw_taskteam.ompt_task_info.task_id; 1532 my_parallel_id = parent_team->t.ompt_team_info.parallel_id; 1533 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1534 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1535 my_parallel_id, my_task_id); 1536 } 1537 #endif 1538 1539 /* OMPT state */ 1540 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1541 } else { 1542 exit_runtime_p = &dummy; 1543 } 1544 #endif 1545 1546 { 1547 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1548 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1549 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv 1550 #if OMPT_SUPPORT 1551 , exit_runtime_p 1552 #endif 1553 ); 1554 } 1555 1556 #if OMPT_SUPPORT 1557 *exit_runtime_p = NULL; 1558 if (ompt_enabled) { 1559 #if OMPT_TRACE 1560 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1561 1562 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1563 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1564 ompt_parallel_id, ompt_task_id); 1565 } 1566 1567 __ompt_lw_taskteam_unlink(master_th); 1568 // reset clear the task id only after unlinking the task 1569 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1570 #endif 1571 1572 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1573 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1574 ompt_parallel_id, ompt_task_id, 1575 OMPT_INVOKER(call_context)); 1576 } 1577 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1578 } 1579 #endif 1580 return TRUE; 1581 } 1582 1583 parent_team->t.t_pkfn = microtask; 1584 #if OMPT_SUPPORT 1585 parent_team->t.ompt_team_info.microtask = unwrapped_task; 1586 #endif 1587 parent_team->t.t_invoke = invoker; 1588 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); 1589 parent_team->t.t_active_level ++; 1590 parent_team->t.t_level ++; 1591 1592 /* Change number of threads in the team if requested */ 1593 if ( master_set_numthreads ) { // The parallel has num_threads clause 1594 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { 1595 // AC: only can reduce the number of threads dynamically, cannot increase 1596 kmp_info_t **other_threads = parent_team->t.t_threads; 1597 parent_team->t.t_nproc = master_set_numthreads; 1598 for ( i = 0; i < master_set_numthreads; ++i ) { 1599 other_threads[i]->th.th_team_nproc = master_set_numthreads; 1600 } 1601 // Keep extra threads hot in the team for possible next parallels 1602 } 1603 master_th->th.th_set_nproc = 0; 1604 } 1605 1606 #if USE_DEBUGGER 1607 if ( __kmp_debugging ) { // Let debugger override number of threads. 1608 int nth = __kmp_omp_num_threads( loc ); 1609 if ( nth > 0 ) { // 0 means debugger does not want to change number of threads. 1610 master_set_numthreads = nth; 1611 }; // if 1612 }; // if 1613 #endif 1614 1615 KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); 1616 __kmp_internal_fork( loc, gtid, parent_team ); 1617 KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); 1618 1619 /* Invoke microtask for MASTER thread */ 1620 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", 1621 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); 1622 1623 { 1624 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1625 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1626 if (! parent_team->t.t_invoke( gtid )) { 1627 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); 1628 } 1629 } 1630 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", 1631 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); 1632 KMP_MB(); /* Flush all pending memory write invalidates. */ 1633 1634 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 1635 1636 return TRUE; 1637 } // Parallel closely nested in teams construct 1638 #endif /* OMP_40_ENABLED */ 1639 1640 #if KMP_DEBUG 1641 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 1642 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); 1643 } 1644 #endif 1645 1646 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { 1647 nthreads = 1; 1648 } else { 1649 #if OMP_40_ENABLED 1650 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level)); 1651 #endif 1652 nthreads = master_set_numthreads ? 1653 master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task 1654 1655 // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct). 1656 // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels. 1657 if (nthreads > 1) { 1658 if ( ( !get__nested(master_th) && (root->r.r_in_parallel 1659 #if OMP_40_ENABLED 1660 && !enter_teams 1661 #endif /* OMP_40_ENABLED */ 1662 ) ) || ( __kmp_library == library_serial ) ) { 1663 KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n", 1664 gtid, nthreads )); 1665 nthreads = 1; 1666 } 1667 } 1668 if ( nthreads > 1 ) { 1669 /* determine how many new threads we can use */ 1670 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 1671 1672 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads 1673 #if OMP_40_ENABLED 1674 /* AC: If we execute teams from parallel region (on host), then teams should be created 1675 but each can only have 1 thread if nesting is disabled. If teams called from serial region, 1676 then teams and their threads should be created regardless of the nesting setting. */ 1677 , enter_teams 1678 #endif /* OMP_40_ENABLED */ 1679 ); 1680 if ( nthreads == 1 ) { 1681 // Free lock for single thread execution here; 1682 // for multi-thread execution it will be freed later 1683 // after team of threads created and initialized 1684 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 1685 } 1686 } 1687 } 1688 KMP_DEBUG_ASSERT( nthreads > 0 ); 1689 1690 /* If we temporarily changed the set number of threads then restore it now */ 1691 master_th->th.th_set_nproc = 0; 1692 1693 /* create a serialized parallel region? */ 1694 if ( nthreads == 1 ) { 1695 /* josh todo: hypothetical question: what do we do for OS X*? */ 1696 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1697 void * args[ argc ]; 1698 #else 1699 void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) ); 1700 #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ 1701 1702 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); 1703 1704 __kmpc_serialized_parallel(loc, gtid); 1705 1706 if ( call_context == fork_context_intel ) { 1707 /* TODO this sucks, use the compiler itself to pass args! :) */ 1708 master_th->th.th_serial_team->t.t_ident = loc; 1709 #if OMP_40_ENABLED 1710 if ( !ap ) { 1711 // revert change made in __kmpc_serialized_parallel() 1712 master_th->th.th_serial_team->t.t_level--; 1713 // Get args from parent team for teams construct 1714 1715 #if OMPT_SUPPORT 1716 void *dummy; 1717 void **exit_runtime_p; 1718 1719 ompt_lw_taskteam_t lw_taskteam; 1720 1721 if (ompt_enabled) { 1722 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1723 unwrapped_task, ompt_parallel_id); 1724 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1725 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1726 1727 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1728 1729 #if OMPT_TRACE 1730 my_task_id = lw_taskteam.ompt_task_info.task_id; 1731 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1732 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1733 ompt_parallel_id, my_task_id); 1734 } 1735 #endif 1736 1737 /* OMPT state */ 1738 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1739 } else { 1740 exit_runtime_p = &dummy; 1741 } 1742 #endif 1743 1744 { 1745 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1746 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1747 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv 1748 #if OMPT_SUPPORT 1749 , exit_runtime_p 1750 #endif 1751 ); 1752 } 1753 1754 #if OMPT_SUPPORT 1755 *exit_runtime_p = NULL; 1756 if (ompt_enabled) { 1757 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1758 1759 #if OMPT_TRACE 1760 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1761 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1762 ompt_parallel_id, ompt_task_id); 1763 } 1764 #endif 1765 1766 __ompt_lw_taskteam_unlink(master_th); 1767 // reset clear the task id only after unlinking the task 1768 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1769 1770 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1771 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1772 ompt_parallel_id, ompt_task_id, 1773 OMPT_INVOKER(call_context)); 1774 } 1775 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1776 } 1777 #endif 1778 } else if ( microtask == (microtask_t)__kmp_teams_master ) { 1779 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); 1780 team = master_th->th.th_team; 1781 //team->t.t_pkfn = microtask; 1782 team->t.t_invoke = invoker; 1783 __kmp_alloc_argv_entries( argc, team, TRUE ); 1784 team->t.t_argc = argc; 1785 argv = (void**) team->t.t_argv; 1786 if ( ap ) { 1787 for( i=argc-1; i >= 0; --i ) 1788 // TODO: revert workaround for Intel(R) 64 tracker #96 1789 # if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1790 *argv++ = va_arg( *ap, void * ); 1791 # else 1792 *argv++ = va_arg( ap, void * ); 1793 # endif 1794 } else { 1795 for( i=0; i < argc; ++i ) 1796 // Get args from parent team for teams construct 1797 argv[i] = parent_team->t.t_argv[i]; 1798 } 1799 // AC: revert change made in __kmpc_serialized_parallel() 1800 // because initial code in teams should have level=0 1801 team->t.t_level--; 1802 // AC: call special invoker for outer "parallel" of the teams construct 1803 { 1804 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1805 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1806 invoker(gtid); 1807 } 1808 } else { 1809 #endif /* OMP_40_ENABLED */ 1810 argv = args; 1811 for( i=argc-1; i >= 0; --i ) 1812 // TODO: revert workaround for Intel(R) 64 tracker #96 1813 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1814 *argv++ = va_arg( *ap, void * ); 1815 #else 1816 *argv++ = va_arg( ap, void * ); 1817 #endif 1818 KMP_MB(); 1819 1820 #if OMPT_SUPPORT 1821 void *dummy; 1822 void **exit_runtime_p; 1823 1824 ompt_lw_taskteam_t lw_taskteam; 1825 1826 if (ompt_enabled) { 1827 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, 1828 unwrapped_task, ompt_parallel_id); 1829 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); 1830 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); 1831 1832 __ompt_lw_taskteam_link(&lw_taskteam, master_th); 1833 1834 #if OMPT_TRACE 1835 /* OMPT implicit task begin */ 1836 my_task_id = lw_taskteam.ompt_task_info.task_id; 1837 my_parallel_id = ompt_parallel_id; 1838 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 1839 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 1840 my_parallel_id, my_task_id); 1841 } 1842 #endif 1843 1844 /* OMPT state */ 1845 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 1846 } else { 1847 exit_runtime_p = &dummy; 1848 } 1849 #endif 1850 1851 { 1852 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 1853 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 1854 __kmp_invoke_microtask( microtask, gtid, 0, argc, args 1855 #if OMPT_SUPPORT 1856 , exit_runtime_p 1857 #endif 1858 ); 1859 } 1860 1861 #if OMPT_SUPPORT 1862 *exit_runtime_p = NULL; 1863 if (ompt_enabled) { 1864 #if OMPT_TRACE 1865 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; 1866 1867 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 1868 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 1869 my_parallel_id, my_task_id); 1870 } 1871 #endif 1872 1873 __ompt_lw_taskteam_unlink(master_th); 1874 // reset clear the task id only after unlinking the task 1875 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; 1876 1877 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 1878 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 1879 ompt_parallel_id, ompt_task_id, 1880 OMPT_INVOKER(call_context)); 1881 } 1882 master_th->th.ompt_thread_info.state = ompt_state_overhead; 1883 } 1884 #endif 1885 #if OMP_40_ENABLED 1886 } 1887 #endif /* OMP_40_ENABLED */ 1888 } 1889 else if ( call_context == fork_context_gnu ) { 1890 #if OMPT_SUPPORT 1891 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) 1892 __kmp_allocate(sizeof(ompt_lw_taskteam_t)); 1893 __ompt_lw_taskteam_init(lwt, master_th, gtid, 1894 unwrapped_task, ompt_parallel_id); 1895 1896 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); 1897 lwt->ompt_task_info.frame.exit_runtime_frame = NULL; 1898 __ompt_lw_taskteam_link(lwt, master_th); 1899 #endif 1900 1901 // we were called from GNU native code 1902 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); 1903 return FALSE; 1904 } 1905 else { 1906 KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); 1907 } 1908 1909 1910 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); 1911 KMP_MB(); 1912 return FALSE; 1913 } 1914 1915 // GEH: only modify the executing flag in the case when not serialized 1916 // serialized case is handled in kmpc_serialized_parallel 1917 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", 1918 parent_team->t.t_active_level, master_th, master_th->th.th_current_task, 1919 master_th->th.th_current_task->td_icvs.max_active_levels ) ); 1920 // TODO: GEH - cannot do this assertion because root thread not set up as executing 1921 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); 1922 master_th->th.th_current_task->td_flags.executing = 0; 1923 1924 #if OMP_40_ENABLED 1925 if ( !master_th->th.th_teams_microtask || level > teams_level ) 1926 #endif /* OMP_40_ENABLED */ 1927 { 1928 /* Increment our nested depth level */ 1929 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); 1930 } 1931 1932 // See if we need to make a copy of the ICVs. 1933 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; 1934 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { 1935 nthreads_icv = __kmp_nested_nth.nth[level+1]; 1936 } 1937 else { 1938 nthreads_icv = 0; // don't update 1939 } 1940 1941 #if OMP_40_ENABLED 1942 // Figure out the proc_bind_policy for the new team. 1943 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; 1944 kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update 1945 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { 1946 proc_bind = proc_bind_false; 1947 } 1948 else { 1949 if (proc_bind == proc_bind_default) { 1950 // No proc_bind clause specified; use current proc-bind-var for this parallel region 1951 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; 1952 } 1953 /* else: The proc_bind policy was specified explicitly on parallel clause. This 1954 overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ 1955 // Figure the value of proc-bind-var for the child threads. 1956 if ((level+1 < __kmp_nested_proc_bind.used) 1957 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { 1958 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; 1959 } 1960 } 1961 1962 // Reset for next parallel region 1963 master_th->th.th_set_proc_bind = proc_bind_default; 1964 #endif /* OMP_40_ENABLED */ 1965 1966 if ((nthreads_icv > 0) 1967 #if OMP_40_ENABLED 1968 || (proc_bind_icv != proc_bind_default) 1969 #endif /* OMP_40_ENABLED */ 1970 ) { 1971 kmp_internal_control_t new_icvs; 1972 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); 1973 new_icvs.next = NULL; 1974 if (nthreads_icv > 0) { 1975 new_icvs.nproc = nthreads_icv; 1976 } 1977 1978 #if OMP_40_ENABLED 1979 if (proc_bind_icv != proc_bind_default) { 1980 new_icvs.proc_bind = proc_bind_icv; 1981 } 1982 #endif /* OMP_40_ENABLED */ 1983 1984 /* allocate a new parallel team */ 1985 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); 1986 team = __kmp_allocate_team(root, nthreads, nthreads, 1987 #if OMPT_SUPPORT 1988 ompt_parallel_id, 1989 #endif 1990 #if OMP_40_ENABLED 1991 proc_bind, 1992 #endif 1993 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); 1994 } else { 1995 /* allocate a new parallel team */ 1996 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); 1997 team = __kmp_allocate_team(root, nthreads, nthreads, 1998 #if OMPT_SUPPORT 1999 ompt_parallel_id, 2000 #endif 2001 #if OMP_40_ENABLED 2002 proc_bind, 2003 #endif 2004 &master_th->th.th_current_task->td_icvs, argc 2005 USE_NESTED_HOT_ARG(master_th) ); 2006 } 2007 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); 2008 2009 /* setup the new team */ 2010 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); 2011 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); 2012 KMP_CHECK_UPDATE(team->t.t_ident, loc); 2013 KMP_CHECK_UPDATE(team->t.t_parent, parent_team); 2014 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); 2015 #if OMPT_SUPPORT 2016 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); 2017 #endif 2018 KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */ 2019 // TODO: parent_team->t.t_level == INT_MAX ??? 2020 #if OMP_40_ENABLED 2021 if ( !master_th->th.th_teams_microtask || level > teams_level ) { 2022 #endif /* OMP_40_ENABLED */ 2023 int new_level = parent_team->t.t_level + 1; 2024 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2025 new_level = parent_team->t.t_active_level + 1; 2026 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2027 #if OMP_40_ENABLED 2028 } else { 2029 // AC: Do not increase parallel level at start of the teams construct 2030 int new_level = parent_team->t.t_level; 2031 KMP_CHECK_UPDATE(team->t.t_level, new_level); 2032 new_level = parent_team->t.t_active_level; 2033 KMP_CHECK_UPDATE(team->t.t_active_level, new_level); 2034 } 2035 #endif /* OMP_40_ENABLED */ 2036 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); 2037 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk) 2038 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule 2039 2040 #if OMP_40_ENABLED 2041 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); 2042 #endif 2043 2044 // Update the floating point rounding in the team if required. 2045 propagateFPControl(team); 2046 2047 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2048 // Set master's task team to team's task team. Unless this is hot team, it should be NULL. 2049 #if 0 2050 // Patch out an assertion that trips while the runtime seems to operate correctly. 2051 // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch. 2052 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); 2053 #endif 2054 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", 2055 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, 2056 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); 2057 2058 if ( active_level || master_th->th.th_task_team ) { 2059 // Take a memo of master's task_state 2060 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2061 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size 2062 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; 2063 kmp_uint8 *old_stack, *new_stack; 2064 kmp_uint32 i; 2065 new_stack = (kmp_uint8 *)__kmp_allocate(new_size); 2066 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) { 2067 new_stack[i] = master_th->th.th_task_state_memo_stack[i]; 2068 } 2069 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) { // zero-init rest of stack 2070 new_stack[i] = 0; 2071 } 2072 old_stack = master_th->th.th_task_state_memo_stack; 2073 master_th->th.th_task_state_memo_stack = new_stack; 2074 master_th->th.th_task_state_stack_sz = new_size; 2075 __kmp_free(old_stack); 2076 } 2077 // Store master's task_state on stack 2078 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; 2079 master_th->th.th_task_state_top++; 2080 #if KMP_NESTED_HOT_TEAMS 2081 if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team 2082 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; 2083 } 2084 else { 2085 #endif 2086 master_th->th.th_task_state = 0; 2087 #if KMP_NESTED_HOT_TEAMS 2088 } 2089 #endif 2090 } 2091 #if !KMP_NESTED_HOT_TEAMS 2092 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); 2093 #endif 2094 } 2095 2096 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", 2097 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); 2098 KMP_DEBUG_ASSERT( team != root->r.r_hot_team || 2099 ( team->t.t_master_tid == 0 && 2100 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); 2101 KMP_MB(); 2102 2103 /* now, setup the arguments */ 2104 argv = (void**)team->t.t_argv; 2105 #if OMP_40_ENABLED 2106 if ( ap ) { 2107 #endif /* OMP_40_ENABLED */ 2108 for ( i=argc-1; i >= 0; --i ) { 2109 // TODO: revert workaround for Intel(R) 64 tracker #96 2110 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2111 void *new_argv = va_arg(*ap, void *); 2112 #else 2113 void *new_argv = va_arg(ap, void *); 2114 #endif 2115 KMP_CHECK_UPDATE(*argv, new_argv); 2116 argv++; 2117 } 2118 #if OMP_40_ENABLED 2119 } else { 2120 for ( i=0; i < argc; ++i ) { 2121 // Get args from parent team for teams construct 2122 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); 2123 } 2124 } 2125 #endif /* OMP_40_ENABLED */ 2126 2127 /* now actually fork the threads */ 2128 KMP_CHECK_UPDATE(team->t.t_master_active, master_active); 2129 if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong 2130 root->r.r_active = TRUE; 2131 2132 __kmp_fork_team_threads( root, team, master_th, gtid ); 2133 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); 2134 2135 #if OMPT_SUPPORT 2136 master_th->th.ompt_thread_info.state = ompt_state_work_parallel; 2137 #endif 2138 2139 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 2140 2141 #if USE_ITT_BUILD 2142 if ( team->t.t_active_level == 1 // only report frames at level 1 2143 # if OMP_40_ENABLED 2144 && !master_th->th.th_teams_microtask // not in teams construct 2145 # endif /* OMP_40_ENABLED */ 2146 ) { 2147 #if USE_ITT_NOTIFY 2148 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && 2149 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) 2150 { 2151 kmp_uint64 tmp_time = 0; 2152 if ( __itt_get_timestamp_ptr ) 2153 tmp_time = __itt_get_timestamp(); 2154 // Internal fork - report frame begin 2155 master_th->th.th_frame_time = tmp_time; 2156 if ( __kmp_forkjoin_frames_mode == 3 ) 2157 team->t.t_region_time = tmp_time; 2158 } else // only one notification scheme (either "submit" or "forking/joined", not both) 2159 #endif /* USE_ITT_NOTIFY */ 2160 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && 2161 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) 2162 { // Mark start of "parallel" region for VTune. 2163 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); 2164 } 2165 } 2166 #endif /* USE_ITT_BUILD */ 2167 2168 /* now go on and do the work */ 2169 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); 2170 KMP_MB(); 2171 KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", 2172 root, team, master_th, gtid)); 2173 2174 #if USE_ITT_BUILD 2175 if ( __itt_stack_caller_create_ptr ) { 2176 team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier 2177 } 2178 #endif /* USE_ITT_BUILD */ 2179 2180 #if OMP_40_ENABLED 2181 if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute 2182 #endif /* OMP_40_ENABLED */ 2183 { 2184 __kmp_internal_fork( loc, gtid, team ); 2185 KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", 2186 root, team, master_th, gtid)); 2187 } 2188 2189 if (call_context == fork_context_gnu) { 2190 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 2191 return TRUE; 2192 } 2193 2194 /* Invoke microtask for MASTER thread */ 2195 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", 2196 gtid, team->t.t_id, team->t.t_pkfn ) ); 2197 } // END of timer KMP_fork_call block 2198 2199 { 2200 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 2201 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 2202 if (! team->t.t_invoke( gtid )) { 2203 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); 2204 } 2205 } 2206 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", 2207 gtid, team->t.t_id, team->t.t_pkfn ) ); 2208 KMP_MB(); /* Flush all pending memory write invalidates. */ 2209 2210 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); 2211 2212 #if OMPT_SUPPORT 2213 if (ompt_enabled) { 2214 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2215 } 2216 #endif 2217 2218 return TRUE; 2219 } 2220 2221 #if OMPT_SUPPORT 2222 static inline void 2223 __kmp_join_restore_state( 2224 kmp_info_t *thread, 2225 kmp_team_t *team) 2226 { 2227 // restore state outside the region 2228 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ? 2229 ompt_state_work_serial : ompt_state_work_parallel); 2230 } 2231 2232 static inline void 2233 __kmp_join_ompt( 2234 kmp_info_t *thread, 2235 kmp_team_t *team, 2236 ompt_parallel_id_t parallel_id, 2237 fork_context_e fork_context) 2238 { 2239 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2240 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { 2241 ompt_callbacks.ompt_callback(ompt_event_parallel_end)( 2242 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); 2243 } 2244 2245 task_info->frame.reenter_runtime_frame = NULL; 2246 __kmp_join_restore_state(thread,team); 2247 } 2248 #endif 2249 2250 void 2251 __kmp_join_call(ident_t *loc, int gtid 2252 #if OMPT_SUPPORT 2253 , enum fork_context_e fork_context 2254 #endif 2255 #if OMP_40_ENABLED 2256 , int exit_teams 2257 #endif /* OMP_40_ENABLED */ 2258 ) 2259 { 2260 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); 2261 kmp_team_t *team; 2262 kmp_team_t *parent_team; 2263 kmp_info_t *master_th; 2264 kmp_root_t *root; 2265 int master_active; 2266 int i; 2267 2268 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); 2269 2270 /* setup current data */ 2271 master_th = __kmp_threads[ gtid ]; 2272 root = master_th->th.th_root; 2273 team = master_th->th.th_team; 2274 parent_team = team->t.t_parent; 2275 2276 master_th->th.th_ident = loc; 2277 2278 #if OMPT_SUPPORT 2279 if (ompt_enabled) { 2280 master_th->th.ompt_thread_info.state = ompt_state_overhead; 2281 } 2282 #endif 2283 2284 #if KMP_DEBUG 2285 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2286 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", 2287 __kmp_gtid_from_thread( master_th ), team, 2288 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); 2289 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); 2290 } 2291 #endif 2292 2293 if( team->t.t_serialized ) { 2294 #if OMP_40_ENABLED 2295 if ( master_th->th.th_teams_microtask ) { 2296 // We are in teams construct 2297 int level = team->t.t_level; 2298 int tlevel = master_th->th.th_teams_level; 2299 if ( level == tlevel ) { 2300 // AC: we haven't incremented it earlier at start of teams construct, 2301 // so do it here - at the end of teams construct 2302 team->t.t_level++; 2303 } else if ( level == tlevel + 1 ) { 2304 // AC: we are exiting parallel inside teams, need to increment serialization 2305 // in order to restore it in the next call to __kmpc_end_serialized_parallel 2306 team->t.t_serialized++; 2307 } 2308 } 2309 #endif /* OMP_40_ENABLED */ 2310 __kmpc_end_serialized_parallel( loc, gtid ); 2311 2312 #if OMPT_SUPPORT 2313 if (ompt_enabled) { 2314 __kmp_join_restore_state(master_th, parent_team); 2315 } 2316 #endif 2317 2318 return; 2319 } 2320 2321 master_active = team->t.t_master_active; 2322 2323 #if OMP_40_ENABLED 2324 if (!exit_teams) 2325 #endif /* OMP_40_ENABLED */ 2326 { 2327 // AC: No barrier for internal teams at exit from teams construct. 2328 // But there is barrier for external team (league). 2329 __kmp_internal_join( loc, gtid, team ); 2330 } 2331 #if OMP_40_ENABLED 2332 else { 2333 master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) 2334 } 2335 #endif /* OMP_40_ENABLED */ 2336 2337 KMP_MB(); 2338 2339 #if OMPT_SUPPORT 2340 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; 2341 #endif 2342 2343 #if USE_ITT_BUILD 2344 if ( __itt_stack_caller_create_ptr ) { 2345 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier 2346 } 2347 2348 // Mark end of "parallel" region for VTune. 2349 if ( team->t.t_active_level == 1 2350 # if OMP_40_ENABLED 2351 && !master_th->th.th_teams_microtask /* not in teams construct */ 2352 # endif /* OMP_40_ENABLED */ 2353 ) { 2354 master_th->th.th_ident = loc; 2355 // only one notification scheme (either "submit" or "forking/joined", not both) 2356 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) 2357 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, 2358 0, loc, master_th->th.th_team_nproc, 1 ); 2359 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && 2360 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) 2361 __kmp_itt_region_joined( gtid ); 2362 } // active_level == 1 2363 #endif /* USE_ITT_BUILD */ 2364 2365 #if OMP_40_ENABLED 2366 if ( master_th->th.th_teams_microtask && 2367 !exit_teams && 2368 team->t.t_pkfn != (microtask_t)__kmp_teams_master && 2369 team->t.t_level == master_th->th.th_teams_level + 1 ) { 2370 // AC: We need to leave the team structure intact at the end 2371 // of parallel inside the teams construct, so that at the next 2372 // parallel same (hot) team works, only adjust nesting levels 2373 2374 /* Decrement our nested depth level */ 2375 team->t.t_level --; 2376 team->t.t_active_level --; 2377 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); 2378 2379 /* Restore number of threads in the team if needed */ 2380 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { 2381 int old_num = master_th->th.th_team_nproc; 2382 int new_num = master_th->th.th_teams_size.nth; 2383 kmp_info_t **other_threads = team->t.t_threads; 2384 team->t.t_nproc = new_num; 2385 for ( i = 0; i < old_num; ++i ) { 2386 other_threads[i]->th.th_team_nproc = new_num; 2387 } 2388 // Adjust states of non-used threads of the team 2389 for ( i = old_num; i < new_num; ++i ) { 2390 // Re-initialize thread's barrier data. 2391 int b; 2392 kmp_balign_t * balign = other_threads[i]->th.th_bar; 2393 for ( b = 0; b < bs_last_barrier; ++ b ) { 2394 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 2395 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 2396 #if USE_DEBUGGER 2397 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 2398 #endif 2399 } 2400 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2401 // Synchronize thread's task state 2402 other_threads[i]->th.th_task_state = master_th->th.th_task_state; 2403 } 2404 } 2405 } 2406 2407 #if OMPT_SUPPORT 2408 if (ompt_enabled) { 2409 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2410 } 2411 #endif 2412 2413 return; 2414 } 2415 #endif /* OMP_40_ENABLED */ 2416 2417 /* do cleanup and restore the parent team */ 2418 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; 2419 master_th->th.th_local.this_construct = team->t.t_master_this_cons; 2420 2421 master_th->th.th_dispatch = 2422 & parent_team->t.t_dispatch[ team->t.t_master_tid ]; 2423 2424 /* jc: The following lock has instructions with REL and ACQ semantics, 2425 separating the parallel user code called in this parallel region 2426 from the serial user code called after this function returns. 2427 */ 2428 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 2429 2430 #if OMP_40_ENABLED 2431 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) 2432 #endif /* OMP_40_ENABLED */ 2433 { 2434 /* Decrement our nested depth level */ 2435 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); 2436 } 2437 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); 2438 2439 #if OMPT_SUPPORT && OMPT_TRACE 2440 if(ompt_enabled){ 2441 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); 2442 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 2443 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 2444 parallel_id, task_info->task_id); 2445 } 2446 task_info->frame.exit_runtime_frame = NULL; 2447 task_info->task_id = 0; 2448 } 2449 #endif 2450 2451 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 2452 0, master_th, team ) ); 2453 __kmp_pop_current_task_from_thread( master_th ); 2454 2455 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2456 // 2457 // Restore master thread's partition. 2458 // 2459 master_th->th.th_first_place = team->t.t_first_place; 2460 master_th->th.th_last_place = team->t.t_last_place; 2461 #endif /* OMP_40_ENABLED */ 2462 2463 updateHWFPControl (team); 2464 2465 if ( root->r.r_active != master_active ) 2466 root->r.r_active = master_active; 2467 2468 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads 2469 2470 /* this race was fun to find. make sure the following is in the critical 2471 * region otherwise assertions may fail occasionally since the old team 2472 * may be reallocated and the hierarchy appears inconsistent. it is 2473 * actually safe to run and won't cause any bugs, but will cause those 2474 * assertion failures. it's only one deref&assign so might as well put this 2475 * in the critical region */ 2476 master_th->th.th_team = parent_team; 2477 master_th->th.th_team_nproc = parent_team->t.t_nproc; 2478 master_th->th.th_team_master = parent_team->t.t_threads[0]; 2479 master_th->th.th_team_serialized = parent_team->t.t_serialized; 2480 2481 /* restore serialized team, if need be */ 2482 if( parent_team->t.t_serialized && 2483 parent_team != master_th->th.th_serial_team && 2484 parent_team != root->r.r_root_team ) { 2485 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); 2486 master_th->th.th_serial_team = parent_team; 2487 } 2488 2489 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 2490 if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack 2491 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); 2492 // Remember master's state if we re-use this nested hot team 2493 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; 2494 --master_th->th.th_task_state_top; // pop 2495 // Now restore state at this level 2496 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; 2497 } 2498 // Copy the task team from the parent team to the master thread 2499 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; 2500 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", 2501 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); 2502 } 2503 2504 // TODO: GEH - cannot do this assertion because root thread not set up as executing 2505 // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); 2506 master_th->th.th_current_task->td_flags.executing = 1; 2507 2508 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 2509 2510 #if OMPT_SUPPORT 2511 if (ompt_enabled) { 2512 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); 2513 } 2514 #endif 2515 2516 KMP_MB(); 2517 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); 2518 } 2519 2520 /* ------------------------------------------------------------------------ */ 2521 /* ------------------------------------------------------------------------ */ 2522 2523 /* Check whether we should push an internal control record onto the 2524 serial team stack. If so, do it. */ 2525 void 2526 __kmp_save_internal_controls ( kmp_info_t * thread ) 2527 { 2528 2529 if ( thread->th.th_team != thread->th.th_serial_team ) { 2530 return; 2531 } 2532 if (thread->th.th_team->t.t_serialized > 1) { 2533 int push = 0; 2534 2535 if (thread->th.th_team->t.t_control_stack_top == NULL) { 2536 push = 1; 2537 } else { 2538 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != 2539 thread->th.th_team->t.t_serialized ) { 2540 push = 1; 2541 } 2542 } 2543 if (push) { /* push a record on the serial team's stack */ 2544 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); 2545 2546 copy_icvs( control, & thread->th.th_current_task->td_icvs ); 2547 2548 control->serial_nesting_level = thread->th.th_team->t.t_serialized; 2549 2550 control->next = thread->th.th_team->t.t_control_stack_top; 2551 thread->th.th_team->t.t_control_stack_top = control; 2552 } 2553 } 2554 } 2555 2556 /* Changes set_nproc */ 2557 void 2558 __kmp_set_num_threads( int new_nth, int gtid ) 2559 { 2560 kmp_info_t *thread; 2561 kmp_root_t *root; 2562 2563 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); 2564 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2565 2566 if (new_nth < 1) 2567 new_nth = 1; 2568 else if (new_nth > __kmp_max_nth) 2569 new_nth = __kmp_max_nth; 2570 2571 KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); 2572 thread = __kmp_threads[gtid]; 2573 2574 __kmp_save_internal_controls( thread ); 2575 2576 set__nproc( thread, new_nth ); 2577 2578 // 2579 // If this omp_set_num_threads() call will cause the hot team size to be 2580 // reduced (in the absence of a num_threads clause), then reduce it now, 2581 // rather than waiting for the next parallel region. 2582 // 2583 root = thread->th.th_root; 2584 if ( __kmp_init_parallel && ( ! root->r.r_active ) 2585 && ( root->r.r_hot_team->t.t_nproc > new_nth ) 2586 #if KMP_NESTED_HOT_TEAMS 2587 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode 2588 #endif 2589 ) { 2590 kmp_team_t *hot_team = root->r.r_hot_team; 2591 int f; 2592 2593 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 2594 2595 // Release the extra threads we don't need any more. 2596 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { 2597 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); 2598 if ( __kmp_tasking_mode != tskm_immediate_exec) { 2599 // When decreasing team size, threads no longer in the team should unref task team. 2600 hot_team->t.t_threads[f]->th.th_task_team = NULL; 2601 } 2602 __kmp_free_thread( hot_team->t.t_threads[f] ); 2603 hot_team->t.t_threads[f] = NULL; 2604 } 2605 hot_team->t.t_nproc = new_nth; 2606 #if KMP_NESTED_HOT_TEAMS 2607 if( thread->th.th_hot_teams ) { 2608 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); 2609 thread->th.th_hot_teams[0].hot_team_nth = new_nth; 2610 } 2611 #endif 2612 2613 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 2614 2615 // 2616 // Update the t_nproc field in the threads that are still active. 2617 // 2618 for( f=0 ; f < new_nth; f++ ) { 2619 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); 2620 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; 2621 } 2622 // Special flag in case omp_set_num_threads() call 2623 hot_team->t.t_size_changed = -1; 2624 } 2625 } 2626 2627 /* Changes max_active_levels */ 2628 void 2629 __kmp_set_max_active_levels( int gtid, int max_active_levels ) 2630 { 2631 kmp_info_t *thread; 2632 2633 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 2634 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2635 2636 // validate max_active_levels 2637 if( max_active_levels < 0 ) { 2638 KMP_WARNING( ActiveLevelsNegative, max_active_levels ); 2639 // We ignore this call if the user has specified a negative value. 2640 // The current setting won't be changed. The last valid setting will be used. 2641 // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). 2642 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 2643 return; 2644 } 2645 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { 2646 // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] 2647 // We allow a zero value. (implementation defined behavior) 2648 } else { 2649 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT ); 2650 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; 2651 // Current upper limit is MAX_INT. (implementation defined behavior) 2652 // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) 2653 // Actually, the flow should never get here until we use MAX_INT limit. 2654 } 2655 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); 2656 2657 thread = __kmp_threads[ gtid ]; 2658 2659 __kmp_save_internal_controls( thread ); 2660 2661 set__max_active_levels( thread, max_active_levels ); 2662 2663 } 2664 2665 /* Gets max_active_levels */ 2666 int 2667 __kmp_get_max_active_levels( int gtid ) 2668 { 2669 kmp_info_t *thread; 2670 2671 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); 2672 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2673 2674 thread = __kmp_threads[ gtid ]; 2675 KMP_DEBUG_ASSERT( thread->th.th_current_task ); 2676 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", 2677 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); 2678 return thread->th.th_current_task->td_icvs.max_active_levels; 2679 } 2680 2681 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ 2682 void 2683 __kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) 2684 { 2685 kmp_info_t *thread; 2686 // kmp_team_t *team; 2687 2688 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); 2689 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2690 2691 // Check if the kind parameter is valid, correct if needed. 2692 // Valid parameters should fit in one of two intervals - standard or extended: 2693 // <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper> 2694 // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 2695 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || 2696 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) 2697 { 2698 // TODO: Hint needs attention in case we change the default schedule. 2699 __kmp_msg( 2700 kmp_ms_warning, 2701 KMP_MSG( ScheduleKindOutOfRange, kind ), 2702 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), 2703 __kmp_msg_null 2704 ); 2705 kind = kmp_sched_default; 2706 chunk = 0; // ignore chunk value in case of bad kind 2707 } 2708 2709 thread = __kmp_threads[ gtid ]; 2710 2711 __kmp_save_internal_controls( thread ); 2712 2713 if ( kind < kmp_sched_upper_std ) { 2714 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { 2715 // differ static chunked vs. unchunked: 2716 // chunk should be invalid to indicate unchunked schedule (which is the default) 2717 thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; 2718 } else { 2719 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; 2720 } 2721 } else { 2722 // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; 2723 thread->th.th_current_task->td_icvs.sched.r_sched_type = 2724 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; 2725 } 2726 if ( kind == kmp_sched_auto ) { 2727 // ignore parameter chunk for schedule auto 2728 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; 2729 } else { 2730 thread->th.th_current_task->td_icvs.sched.chunk = chunk; 2731 } 2732 } 2733 2734 /* Gets def_sched_var ICV values */ 2735 void 2736 __kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) 2737 { 2738 kmp_info_t *thread; 2739 enum sched_type th_type; 2740 2741 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); 2742 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2743 2744 thread = __kmp_threads[ gtid ]; 2745 2746 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; 2747 2748 switch ( th_type ) { 2749 case kmp_sch_static: 2750 case kmp_sch_static_greedy: 2751 case kmp_sch_static_balanced: 2752 *kind = kmp_sched_static; 2753 *chunk = 0; // chunk was not set, try to show this fact via zero value 2754 return; 2755 case kmp_sch_static_chunked: 2756 *kind = kmp_sched_static; 2757 break; 2758 case kmp_sch_dynamic_chunked: 2759 *kind = kmp_sched_dynamic; 2760 break; 2761 case kmp_sch_guided_chunked: 2762 case kmp_sch_guided_iterative_chunked: 2763 case kmp_sch_guided_analytical_chunked: 2764 *kind = kmp_sched_guided; 2765 break; 2766 case kmp_sch_auto: 2767 *kind = kmp_sched_auto; 2768 break; 2769 case kmp_sch_trapezoidal: 2770 *kind = kmp_sched_trapezoidal; 2771 break; 2772 #if KMP_STATIC_STEAL_ENABLED 2773 case kmp_sch_static_steal: 2774 *kind = kmp_sched_static_steal; 2775 break; 2776 #endif 2777 default: 2778 KMP_FATAL( UnknownSchedulingType, th_type ); 2779 } 2780 2781 *chunk = thread->th.th_current_task->td_icvs.sched.chunk; 2782 } 2783 2784 int 2785 __kmp_get_ancestor_thread_num( int gtid, int level ) { 2786 2787 int ii, dd; 2788 kmp_team_t *team; 2789 kmp_info_t *thr; 2790 2791 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); 2792 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2793 2794 // validate level 2795 if( level == 0 ) return 0; 2796 if( level < 0 ) return -1; 2797 thr = __kmp_threads[ gtid ]; 2798 team = thr->th.th_team; 2799 ii = team->t.t_level; 2800 if( level > ii ) return -1; 2801 2802 #if OMP_40_ENABLED 2803 if( thr->th.th_teams_microtask ) { 2804 // AC: we are in teams region where multiple nested teams have same level 2805 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2806 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) 2807 KMP_DEBUG_ASSERT( ii >= tlevel ); 2808 // AC: As we need to pass by the teams league, we need to artificially increase ii 2809 if ( ii == tlevel ) { 2810 ii += 2; // three teams have same level 2811 } else { 2812 ii ++; // two teams have same level 2813 } 2814 } 2815 } 2816 #endif 2817 2818 if( ii == level ) return __kmp_tid_from_gtid( gtid ); 2819 2820 dd = team->t.t_serialized; 2821 level++; 2822 while( ii > level ) 2823 { 2824 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 2825 { 2826 } 2827 if( ( team->t.t_serialized ) && ( !dd ) ) { 2828 team = team->t.t_parent; 2829 continue; 2830 } 2831 if( ii > level ) { 2832 team = team->t.t_parent; 2833 dd = team->t.t_serialized; 2834 ii--; 2835 } 2836 } 2837 2838 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); 2839 } 2840 2841 int 2842 __kmp_get_team_size( int gtid, int level ) { 2843 2844 int ii, dd; 2845 kmp_team_t *team; 2846 kmp_info_t *thr; 2847 2848 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); 2849 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2850 2851 // validate level 2852 if( level == 0 ) return 1; 2853 if( level < 0 ) return -1; 2854 thr = __kmp_threads[ gtid ]; 2855 team = thr->th.th_team; 2856 ii = team->t.t_level; 2857 if( level > ii ) return -1; 2858 2859 #if OMP_40_ENABLED 2860 if( thr->th.th_teams_microtask ) { 2861 // AC: we are in teams region where multiple nested teams have same level 2862 int tlevel = thr->th.th_teams_level; // the level of the teams construct 2863 if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) 2864 KMP_DEBUG_ASSERT( ii >= tlevel ); 2865 // AC: As we need to pass by the teams league, we need to artificially increase ii 2866 if ( ii == tlevel ) { 2867 ii += 2; // three teams have same level 2868 } else { 2869 ii ++; // two teams have same level 2870 } 2871 } 2872 } 2873 #endif 2874 2875 while( ii > level ) 2876 { 2877 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) 2878 { 2879 } 2880 if( team->t.t_serialized && ( !dd ) ) { 2881 team = team->t.t_parent; 2882 continue; 2883 } 2884 if( ii > level ) { 2885 team = team->t.t_parent; 2886 ii--; 2887 } 2888 } 2889 2890 return team->t.t_nproc; 2891 } 2892 2893 kmp_r_sched_t 2894 __kmp_get_schedule_global() { 2895 // This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) 2896 // may be changed by kmp_set_defaults independently. So one can get the updated schedule here. 2897 2898 kmp_r_sched_t r_sched; 2899 2900 // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided 2901 // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, 2902 // and thus have different run-time schedules in different roots (even in OMP 2.5) 2903 if ( __kmp_sched == kmp_sch_static ) { 2904 r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) 2905 } else if ( __kmp_sched == kmp_sch_guided_chunked ) { 2906 r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) 2907 } else { 2908 r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other 2909 } 2910 2911 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) 2912 r_sched.chunk = KMP_DEFAULT_CHUNK; 2913 } else { 2914 r_sched.chunk = __kmp_chunk; 2915 } 2916 2917 return r_sched; 2918 } 2919 2920 /* ------------------------------------------------------------------------ */ 2921 /* ------------------------------------------------------------------------ */ 2922 2923 2924 /* 2925 * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) 2926 * at least argc number of *t_argv entries for the requested team. 2927 */ 2928 static void 2929 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) 2930 { 2931 2932 KMP_DEBUG_ASSERT( team ); 2933 if( !realloc || argc > team->t.t_max_argc ) { 2934 2935 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", 2936 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); 2937 /* if previously allocated heap space for args, free them */ 2938 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) 2939 __kmp_free( (void *) team->t.t_argv ); 2940 2941 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { 2942 /* use unused space in the cache line for arguments */ 2943 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; 2944 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", 2945 team->t.t_id, team->t.t_max_argc )); 2946 team->t.t_argv = &team->t.t_inline_argv[0]; 2947 if ( __kmp_storage_map ) { 2948 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], 2949 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], 2950 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), 2951 "team_%d.t_inline_argv", 2952 team->t.t_id ); 2953 } 2954 } else { 2955 /* allocate space for arguments in the heap */ 2956 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? 2957 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; 2958 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", 2959 team->t.t_id, team->t.t_max_argc )); 2960 team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); 2961 if ( __kmp_storage_map ) { 2962 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], 2963 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", 2964 team->t.t_id ); 2965 } 2966 } 2967 } 2968 } 2969 2970 static void 2971 __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) 2972 { 2973 int i; 2974 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; 2975 team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); 2976 team->t.t_disp_buffer = (dispatch_shared_info_t*) 2977 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); 2978 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); 2979 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); 2980 team->t.t_max_nproc = max_nth; 2981 2982 /* setup dispatch buffers */ 2983 for(i = 0 ; i < num_disp_buff; ++i) { 2984 team->t.t_disp_buffer[i].buffer_index = i; 2985 #if OMP_45_ENABLED 2986 team->t.t_disp_buffer[i].doacross_buf_idx = i; 2987 #endif 2988 } 2989 } 2990 2991 static void 2992 __kmp_free_team_arrays(kmp_team_t *team) { 2993 /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ 2994 int i; 2995 for ( i = 0; i < team->t.t_max_nproc; ++ i ) { 2996 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { 2997 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); 2998 team->t.t_dispatch[ i ].th_disp_buffer = NULL; 2999 }; // if 3000 }; // for 3001 __kmp_free(team->t.t_threads); 3002 __kmp_free(team->t.t_disp_buffer); 3003 __kmp_free(team->t.t_dispatch); 3004 __kmp_free(team->t.t_implicit_task_taskdata); 3005 team->t.t_threads = NULL; 3006 team->t.t_disp_buffer = NULL; 3007 team->t.t_dispatch = NULL; 3008 team->t.t_implicit_task_taskdata = 0; 3009 } 3010 3011 static void 3012 __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { 3013 kmp_info_t **oldThreads = team->t.t_threads; 3014 3015 __kmp_free(team->t.t_disp_buffer); 3016 __kmp_free(team->t.t_dispatch); 3017 __kmp_free(team->t.t_implicit_task_taskdata); 3018 __kmp_allocate_team_arrays(team, max_nth); 3019 3020 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); 3021 3022 __kmp_free(oldThreads); 3023 } 3024 3025 static kmp_internal_control_t 3026 __kmp_get_global_icvs( void ) { 3027 3028 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals 3029 3030 #if OMP_40_ENABLED 3031 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); 3032 #endif /* OMP_40_ENABLED */ 3033 3034 kmp_internal_control_t g_icvs = { 3035 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field 3036 (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread) 3037 (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread) 3038 (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set 3039 __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime 3040 #if KMP_USE_MONITOR 3041 __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals 3042 #endif 3043 __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread) 3044 // (use a max ub on value if __kmp_parallel_initialize not called yet) 3045 __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels 3046 r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair 3047 #if OMP_40_ENABLED 3048 __kmp_nested_proc_bind.bind_types[0], 3049 __kmp_default_device, 3050 #endif /* OMP_40_ENABLED */ 3051 NULL //struct kmp_internal_control *next; 3052 }; 3053 3054 return g_icvs; 3055 } 3056 3057 static kmp_internal_control_t 3058 __kmp_get_x_global_icvs( const kmp_team_t *team ) { 3059 3060 kmp_internal_control_t gx_icvs; 3061 gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls 3062 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); 3063 gx_icvs.next = NULL; 3064 3065 return gx_icvs; 3066 } 3067 3068 static void 3069 __kmp_initialize_root( kmp_root_t *root ) 3070 { 3071 int f; 3072 kmp_team_t *root_team; 3073 kmp_team_t *hot_team; 3074 int hot_team_max_nth; 3075 kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals 3076 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3077 KMP_DEBUG_ASSERT( root ); 3078 KMP_ASSERT( ! root->r.r_begin ); 3079 3080 /* setup the root state structure */ 3081 __kmp_init_lock( &root->r.r_begin_lock ); 3082 root->r.r_begin = FALSE; 3083 root->r.r_active = FALSE; 3084 root->r.r_in_parallel = 0; 3085 root->r.r_blocktime = __kmp_dflt_blocktime; 3086 root->r.r_nested = __kmp_dflt_nested; 3087 3088 /* setup the root team for this task */ 3089 /* allocate the root team structure */ 3090 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); 3091 3092 root_team = 3093 __kmp_allocate_team( 3094 root, 3095 1, // new_nproc 3096 1, // max_nproc 3097 #if OMPT_SUPPORT 3098 0, // root parallel id 3099 #endif 3100 #if OMP_40_ENABLED 3101 __kmp_nested_proc_bind.bind_types[0], 3102 #endif 3103 &r_icvs, 3104 0 // argc 3105 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3106 ); 3107 #if USE_DEBUGGER 3108 // Non-NULL value should be assigned to make the debugger display the root team. 3109 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 )); 3110 #endif 3111 3112 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); 3113 3114 root->r.r_root_team = root_team; 3115 root_team->t.t_control_stack_top = NULL; 3116 3117 /* initialize root team */ 3118 root_team->t.t_threads[0] = NULL; 3119 root_team->t.t_nproc = 1; 3120 root_team->t.t_serialized = 1; 3121 // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3122 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3123 root_team->t.t_sched.chunk = r_sched.chunk; 3124 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", 3125 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 3126 3127 /* setup the hot team for this task */ 3128 /* allocate the hot team structure */ 3129 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); 3130 3131 hot_team = 3132 __kmp_allocate_team( 3133 root, 3134 1, // new_nproc 3135 __kmp_dflt_team_nth_ub * 2, // max_nproc 3136 #if OMPT_SUPPORT 3137 0, // root parallel id 3138 #endif 3139 #if OMP_40_ENABLED 3140 __kmp_nested_proc_bind.bind_types[0], 3141 #endif 3142 &r_icvs, 3143 0 // argc 3144 USE_NESTED_HOT_ARG(NULL) // master thread is unknown 3145 ); 3146 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); 3147 3148 root->r.r_hot_team = hot_team; 3149 root_team->t.t_control_stack_top = NULL; 3150 3151 /* first-time initialization */ 3152 hot_team->t.t_parent = root_team; 3153 3154 /* initialize hot team */ 3155 hot_team_max_nth = hot_team->t.t_max_nproc; 3156 for ( f = 0; f < hot_team_max_nth; ++ f ) { 3157 hot_team->t.t_threads[ f ] = NULL; 3158 }; // for 3159 hot_team->t.t_nproc = 1; 3160 // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; 3161 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; 3162 hot_team->t.t_sched.chunk = r_sched.chunk; 3163 hot_team->t.t_size_changed = 0; 3164 } 3165 3166 #ifdef KMP_DEBUG 3167 3168 3169 typedef struct kmp_team_list_item { 3170 kmp_team_p const * entry; 3171 struct kmp_team_list_item * next; 3172 } kmp_team_list_item_t; 3173 typedef kmp_team_list_item_t * kmp_team_list_t; 3174 3175 3176 static void 3177 __kmp_print_structure_team_accum( // Add team to list of teams. 3178 kmp_team_list_t list, // List of teams. 3179 kmp_team_p const * team // Team to add. 3180 ) { 3181 3182 // List must terminate with item where both entry and next are NULL. 3183 // Team is added to the list only once. 3184 // List is sorted in ascending order by team id. 3185 // Team id is *not* a key. 3186 3187 kmp_team_list_t l; 3188 3189 KMP_DEBUG_ASSERT( list != NULL ); 3190 if ( team == NULL ) { 3191 return; 3192 }; // if 3193 3194 __kmp_print_structure_team_accum( list, team->t.t_parent ); 3195 __kmp_print_structure_team_accum( list, team->t.t_next_pool ); 3196 3197 // Search list for the team. 3198 l = list; 3199 while ( l->next != NULL && l->entry != team ) { 3200 l = l->next; 3201 }; // while 3202 if ( l->next != NULL ) { 3203 return; // Team has been added before, exit. 3204 }; // if 3205 3206 // Team is not found. Search list again for insertion point. 3207 l = list; 3208 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) { 3209 l = l->next; 3210 }; // while 3211 3212 // Insert team. 3213 { 3214 kmp_team_list_item_t * item = 3215 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); 3216 * item = * l; 3217 l->entry = team; 3218 l->next = item; 3219 } 3220 3221 } 3222 3223 static void 3224 __kmp_print_structure_team( 3225 char const * title, 3226 kmp_team_p const * team 3227 3228 ) { 3229 __kmp_printf( "%s", title ); 3230 if ( team != NULL ) { 3231 __kmp_printf( "%2x %p\n", team->t.t_id, team ); 3232 } else { 3233 __kmp_printf( " - (nil)\n" ); 3234 }; // if 3235 } 3236 3237 static void 3238 __kmp_print_structure_thread( 3239 char const * title, 3240 kmp_info_p const * thread 3241 3242 ) { 3243 __kmp_printf( "%s", title ); 3244 if ( thread != NULL ) { 3245 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread ); 3246 } else { 3247 __kmp_printf( " - (nil)\n" ); 3248 }; // if 3249 } 3250 3251 void 3252 __kmp_print_structure( 3253 void 3254 ) { 3255 3256 kmp_team_list_t list; 3257 3258 // Initialize list of teams. 3259 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); 3260 list->entry = NULL; 3261 list->next = NULL; 3262 3263 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" ); 3264 { 3265 int gtid; 3266 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 3267 __kmp_printf( "%2d", gtid ); 3268 if ( __kmp_threads != NULL ) { 3269 __kmp_printf( " %p", __kmp_threads[ gtid ] ); 3270 }; // if 3271 if ( __kmp_root != NULL ) { 3272 __kmp_printf( " %p", __kmp_root[ gtid ] ); 3273 }; // if 3274 __kmp_printf( "\n" ); 3275 }; // for gtid 3276 } 3277 3278 // Print out __kmp_threads array. 3279 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" ); 3280 if ( __kmp_threads != NULL ) { 3281 int gtid; 3282 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 3283 kmp_info_t const * thread = __kmp_threads[ gtid ]; 3284 if ( thread != NULL ) { 3285 __kmp_printf( "GTID %2d %p:\n", gtid, thread ); 3286 __kmp_printf( " Our Root: %p\n", thread->th.th_root ); 3287 __kmp_print_structure_team( " Our Team: ", thread->th.th_team ); 3288 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team ); 3289 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc ); 3290 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master ); 3291 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized ); 3292 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc ); 3293 #if OMP_40_ENABLED 3294 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind ); 3295 #endif 3296 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool ); 3297 __kmp_printf( "\n" ); 3298 __kmp_print_structure_team_accum( list, thread->th.th_team ); 3299 __kmp_print_structure_team_accum( list, thread->th.th_serial_team ); 3300 }; // if 3301 }; // for gtid 3302 } else { 3303 __kmp_printf( "Threads array is not allocated.\n" ); 3304 }; // if 3305 3306 // Print out __kmp_root array. 3307 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" ); 3308 if ( __kmp_root != NULL ) { 3309 int gtid; 3310 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { 3311 kmp_root_t const * root = __kmp_root[ gtid ]; 3312 if ( root != NULL ) { 3313 __kmp_printf( "GTID %2d %p:\n", gtid, root ); 3314 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team ); 3315 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team ); 3316 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread ); 3317 __kmp_printf( " Active?: %2d\n", root->r.r_active ); 3318 __kmp_printf( " Nested?: %2d\n", root->r.r_nested ); 3319 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel ); 3320 __kmp_printf( "\n" ); 3321 __kmp_print_structure_team_accum( list, root->r.r_root_team ); 3322 __kmp_print_structure_team_accum( list, root->r.r_hot_team ); 3323 }; // if 3324 }; // for gtid 3325 } else { 3326 __kmp_printf( "Ubers array is not allocated.\n" ); 3327 }; // if 3328 3329 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" ); 3330 while ( list->next != NULL ) { 3331 kmp_team_p const * team = list->entry; 3332 int i; 3333 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team ); 3334 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent ); 3335 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid ); 3336 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc ); 3337 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized ); 3338 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc ); 3339 for ( i = 0; i < team->t.t_nproc; ++ i ) { 3340 __kmp_printf( " Thread %2d: ", i ); 3341 __kmp_print_structure_thread( "", team->t.t_threads[ i ] ); 3342 }; // for i 3343 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool ); 3344 __kmp_printf( "\n" ); 3345 list = list->next; 3346 }; // while 3347 3348 // Print out __kmp_thread_pool and __kmp_team_pool. 3349 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" ); 3350 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool ); 3351 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool ); 3352 __kmp_printf( "\n" ); 3353 3354 // Free team list. 3355 while ( list != NULL ) { 3356 kmp_team_list_item_t * item = list; 3357 list = list->next; 3358 KMP_INTERNAL_FREE( item ); 3359 }; // while 3360 3361 } 3362 3363 #endif 3364 3365 3366 //--------------------------------------------------------------------------- 3367 // Stuff for per-thread fast random number generator 3368 // Table of primes 3369 3370 static const unsigned __kmp_primes[] = { 3371 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 3372 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b, 3373 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, 3374 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 3375 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801, 3376 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, 3377 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 3378 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b, 3379 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, 3380 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 3381 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7, 3382 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, 3383 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 3384 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b, 3385 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, 3386 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f 3387 }; 3388 3389 //--------------------------------------------------------------------------- 3390 // __kmp_get_random: Get a random number using a linear congruential method. 3391 3392 unsigned short 3393 __kmp_get_random( kmp_info_t * thread ) 3394 { 3395 unsigned x = thread->th.th_x; 3396 unsigned short r = x>>16; 3397 3398 thread->th.th_x = x*thread->th.th_a+1; 3399 3400 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", 3401 thread->th.th_info.ds.ds_tid, r) ); 3402 3403 return r; 3404 } 3405 //-------------------------------------------------------- 3406 // __kmp_init_random: Initialize a random number generator 3407 3408 void 3409 __kmp_init_random( kmp_info_t * thread ) 3410 { 3411 unsigned seed = thread->th.th_info.ds.ds_tid; 3412 3413 thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))]; 3414 thread->th.th_x = (seed+1)*thread->th.th_a+1; 3415 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) ); 3416 } 3417 3418 3419 #if KMP_OS_WINDOWS 3420 /* reclaim array entries for root threads that are already dead, returns number reclaimed */ 3421 static int 3422 __kmp_reclaim_dead_roots(void) { 3423 int i, r = 0; 3424 3425 for(i = 0; i < __kmp_threads_capacity; ++i) { 3426 if( KMP_UBER_GTID( i ) && 3427 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && 3428 !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state 3429 r += __kmp_unregister_root_other_thread(i); 3430 } 3431 } 3432 return r; 3433 } 3434 #endif 3435 3436 /* 3437 This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of 3438 free entries generated. 3439 3440 For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are 3441 already dead. 3442 3443 On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate 3444 update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to 3445 __kmp_tp_capacity, if threadprivate cache array has been created. 3446 Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. 3447 3448 After any dead root reclamation, if the clipping value allows array expansion to result in the generation 3449 of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows 3450 array expansion to result in the generation of a total of nNeed free slots, the function does that expansion. 3451 Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero, 3452 a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create 3453 as many free slots as possible up to nWish. 3454 3455 If any argument is negative, the behavior is undefined. 3456 */ 3457 static int 3458 __kmp_expand_threads(int nWish, int nNeed) { 3459 int added = 0; 3460 int old_tp_cached; 3461 int __kmp_actual_max_nth; 3462 3463 if(nNeed > nWish) /* normalize the arguments */ 3464 nWish = nNeed; 3465 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3466 /* only for Windows static library */ 3467 /* reclaim array entries for root threads that are already dead */ 3468 added = __kmp_reclaim_dead_roots(); 3469 3470 if(nNeed) { 3471 nNeed -= added; 3472 if(nNeed < 0) 3473 nNeed = 0; 3474 } 3475 if(nWish) { 3476 nWish -= added; 3477 if(nWish < 0) 3478 nWish = 0; 3479 } 3480 #endif 3481 if(nWish <= 0) 3482 return added; 3483 3484 while(1) { 3485 int nTarget; 3486 int minimumRequiredCapacity; 3487 int newCapacity; 3488 kmp_info_t **newThreads; 3489 kmp_root_t **newRoot; 3490 3491 // 3492 // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. 3493 // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth 3494 // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may 3495 // become > __kmp_max_nth in one of two ways: 3496 // 3497 // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] 3498 // may not be resused by another thread, so we may need to increase 3499 // __kmp_threads_capacity to __kmp_max_threads + 1. 3500 // 3501 // 2) New foreign root(s) are encountered. We always register new 3502 // foreign roots. This may cause a smaller # of threads to be 3503 // allocated at subsequent parallel regions, but the worker threads 3504 // hang around (and eventually go to sleep) and need slots in the 3505 // __kmp_threads[] array. 3506 // 3507 // Anyway, that is the reason for moving the check to see if 3508 // __kmp_max_threads was exceeded into __kmp_reseerve_threads() 3509 // instead of having it performed here. -BB 3510 // 3511 old_tp_cached = __kmp_tp_cached; 3512 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; 3513 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); 3514 3515 /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */ 3516 nTarget = nWish; 3517 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3518 /* can't fulfil nWish, so try nNeed */ 3519 if(nNeed) { 3520 nTarget = nNeed; 3521 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { 3522 /* possible expansion too small -- give up */ 3523 break; 3524 } 3525 } else { 3526 /* best-effort */ 3527 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; 3528 if(!nTarget) { 3529 /* can expand at all -- give up */ 3530 break; 3531 } 3532 } 3533 } 3534 minimumRequiredCapacity = __kmp_threads_capacity + nTarget; 3535 3536 newCapacity = __kmp_threads_capacity; 3537 do{ 3538 newCapacity = 3539 newCapacity <= (__kmp_actual_max_nth >> 1) ? 3540 (newCapacity << 1) : 3541 __kmp_actual_max_nth; 3542 } while(newCapacity < minimumRequiredCapacity); 3543 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE); 3544 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity ); 3545 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*)); 3546 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*)); 3547 memset(newThreads + __kmp_threads_capacity, 0, 3548 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*)); 3549 memset(newRoot + __kmp_threads_capacity, 0, 3550 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*)); 3551 3552 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3553 /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache 3554 while we were allocating the expanded array, and our new capacity is larger than the threadprivate 3555 cache capacity, so we should deallocate the expanded arrays and try again. This is the first check 3556 of a double-check pair. 3557 */ 3558 __kmp_free(newThreads); 3559 continue; /* start over and try again */ 3560 } 3561 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); 3562 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { 3563 /* Same check as above, but this time with the lock so we can be sure if we can succeed. */ 3564 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3565 __kmp_free(newThreads); 3566 continue; /* start over and try again */ 3567 } else { 3568 /* success */ 3569 // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. 3570 // 3571 *(kmp_info_t**volatile*)&__kmp_threads = newThreads; 3572 *(kmp_root_t**volatile*)&__kmp_root = newRoot; 3573 added += newCapacity - __kmp_threads_capacity; 3574 *(volatile int*)&__kmp_threads_capacity = newCapacity; 3575 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); 3576 break; /* succeeded, so we can exit the loop */ 3577 } 3578 } 3579 return added; 3580 } 3581 3582 /* register the current thread as a root thread and obtain our gtid */ 3583 /* we must have the __kmp_initz_lock held at this point */ 3584 /* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */ 3585 int 3586 __kmp_register_root( int initial_thread ) 3587 { 3588 kmp_info_t *root_thread; 3589 kmp_root_t *root; 3590 int gtid; 3591 int capacity; 3592 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 3593 KA_TRACE( 20, ("__kmp_register_root: entered\n")); 3594 KMP_MB(); 3595 3596 3597 /* 3598 2007-03-02: 3599 3600 If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one, 3601 "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may 3602 return false (that means there is at least one empty slot in __kmp_threads array), but it 3603 is possible the only free slot is #0, which is reserved for initial thread and so cannot be 3604 used for this one. Following code workarounds this bug. 3605 3606 However, right solution seems to be not reserving slot #0 for initial thread because: 3607 (1) there is no magic in slot #0, 3608 (2) we cannot detect initial thread reliably (the first thread which does serial 3609 initialization may be not a real initial thread). 3610 */ 3611 capacity = __kmp_threads_capacity; 3612 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) { 3613 -- capacity; 3614 }; // if 3615 3616 /* see if there are too many threads */ 3617 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) { 3618 if ( __kmp_tp_cached ) { 3619 __kmp_msg( 3620 kmp_ms_fatal, 3621 KMP_MSG( CantRegisterNewThread ), 3622 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), 3623 KMP_HNT( PossibleSystemLimitOnThreads ), 3624 __kmp_msg_null 3625 ); 3626 } 3627 else { 3628 __kmp_msg( 3629 kmp_ms_fatal, 3630 KMP_MSG( CantRegisterNewThread ), 3631 KMP_HNT( SystemLimitOnThreads ), 3632 __kmp_msg_null 3633 ); 3634 } 3635 }; // if 3636 3637 /* find an available thread slot */ 3638 /* Don't reassign the zero slot since we need that to only be used by initial 3639 thread */ 3640 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ ) 3641 ; 3642 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid )); 3643 KMP_ASSERT( gtid < __kmp_threads_capacity ); 3644 3645 /* update global accounting */ 3646 __kmp_all_nth ++; 3647 TCW_4(__kmp_nth, __kmp_nth + 1); 3648 3649 // 3650 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) 3651 // for low numbers of procs, and method #2 (keyed API call) for higher 3652 // numbers of procs. 3653 // 3654 if ( __kmp_adjust_gtid_mode ) { 3655 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { 3656 if ( TCR_4(__kmp_gtid_mode) != 2) { 3657 TCW_4(__kmp_gtid_mode, 2); 3658 } 3659 } 3660 else { 3661 if (TCR_4(__kmp_gtid_mode) != 1 ) { 3662 TCW_4(__kmp_gtid_mode, 1); 3663 } 3664 } 3665 } 3666 3667 #ifdef KMP_ADJUST_BLOCKTIME 3668 /* Adjust blocktime to zero if necessary */ 3669 /* Middle initialization might not have occurred yet */ 3670 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 3671 if ( __kmp_nth > __kmp_avail_proc ) { 3672 __kmp_zero_bt = TRUE; 3673 } 3674 } 3675 #endif /* KMP_ADJUST_BLOCKTIME */ 3676 3677 /* setup this new hierarchy */ 3678 if( ! ( root = __kmp_root[gtid] )) { 3679 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) ); 3680 KMP_DEBUG_ASSERT( ! root->r.r_root_team ); 3681 } 3682 3683 #if KMP_STATS_ENABLED 3684 // Initialize stats as soon as possible (right after gtid assignment). 3685 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); 3686 KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); 3687 KMP_SET_THREAD_STATE(SERIAL_REGION); 3688 KMP_INIT_PARTITIONED_TIMERS(OMP_serial); 3689 #endif 3690 __kmp_initialize_root( root ); 3691 3692 /* setup new root thread structure */ 3693 if( root->r.r_uber_thread ) { 3694 root_thread = root->r.r_uber_thread; 3695 } else { 3696 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); 3697 if ( __kmp_storage_map ) { 3698 __kmp_print_thread_storage_map( root_thread, gtid ); 3699 } 3700 root_thread->th.th_info .ds.ds_gtid = gtid; 3701 root_thread->th.th_root = root; 3702 if( __kmp_env_consistency_check ) { 3703 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid ); 3704 } 3705 #if USE_FAST_MEMORY 3706 __kmp_initialize_fast_memory( root_thread ); 3707 #endif /* USE_FAST_MEMORY */ 3708 3709 #if KMP_USE_BGET 3710 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL ); 3711 __kmp_initialize_bget( root_thread ); 3712 #endif 3713 __kmp_init_random( root_thread ); // Initialize random number generator 3714 } 3715 3716 /* setup the serial team held in reserve by the root thread */ 3717 if( ! root_thread->th.th_serial_team ) { 3718 kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); 3719 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) ); 3720 3721 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1, 3722 #if OMPT_SUPPORT 3723 0, // root parallel id 3724 #endif 3725 #if OMP_40_ENABLED 3726 proc_bind_default, 3727 #endif 3728 &r_icvs, 3729 0 USE_NESTED_HOT_ARG(NULL) ); 3730 } 3731 KMP_ASSERT( root_thread->th.th_serial_team ); 3732 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n", 3733 root_thread->th.th_serial_team ) ); 3734 3735 /* drop root_thread into place */ 3736 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); 3737 3738 root->r.r_root_team->t.t_threads[0] = root_thread; 3739 root->r.r_hot_team ->t.t_threads[0] = root_thread; 3740 root_thread->th.th_serial_team->t.t_threads[0] = root_thread; 3741 root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). 3742 root->r.r_uber_thread = root_thread; 3743 3744 /* initialize the thread, get it ready to go */ 3745 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid ); 3746 TCW_4(__kmp_init_gtid, TRUE); 3747 3748 /* prepare the master thread for get_gtid() */ 3749 __kmp_gtid_set_specific( gtid ); 3750 3751 #if USE_ITT_BUILD 3752 __kmp_itt_thread_name( gtid ); 3753 #endif /* USE_ITT_BUILD */ 3754 3755 #ifdef KMP_TDATA_GTID 3756 __kmp_gtid = gtid; 3757 #endif 3758 __kmp_create_worker( gtid, root_thread, __kmp_stksize ); 3759 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid ); 3760 3761 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n", 3762 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ), 3763 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, 3764 KMP_INIT_BARRIER_STATE ) ); 3765 { // Initialize barrier data. 3766 int b; 3767 for ( b = 0; b < bs_last_barrier; ++ b ) { 3768 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE; 3769 #if USE_DEBUGGER 3770 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0; 3771 #endif 3772 }; // for 3773 } 3774 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE ); 3775 3776 #if KMP_AFFINITY_SUPPORTED 3777 # if OMP_40_ENABLED 3778 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; 3779 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; 3780 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; 3781 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; 3782 # endif 3783 3784 if ( TCR_4(__kmp_init_middle) ) { 3785 __kmp_affinity_set_init_mask( gtid, TRUE ); 3786 } 3787 #endif /* KMP_AFFINITY_SUPPORTED */ 3788 3789 __kmp_root_counter ++; 3790 3791 KMP_MB(); 3792 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 3793 3794 return gtid; 3795 } 3796 3797 #if KMP_NESTED_HOT_TEAMS 3798 static int 3799 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level ) 3800 { 3801 int i, n, nth; 3802 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; 3803 if( !hot_teams || !hot_teams[level].hot_team ) { 3804 return 0; 3805 } 3806 KMP_DEBUG_ASSERT( level < max_level ); 3807 kmp_team_t *team = hot_teams[level].hot_team; 3808 nth = hot_teams[level].hot_team_nth; 3809 n = nth - 1; // master is not freed 3810 if( level < max_level - 1 ) { 3811 for( i = 0; i < nth; ++i ) { 3812 kmp_info_t *th = team->t.t_threads[i]; 3813 n += __kmp_free_hot_teams( root, th, level + 1, max_level ); 3814 if( i > 0 && th->th.th_hot_teams ) { 3815 __kmp_free( th->th.th_hot_teams ); 3816 th->th.th_hot_teams = NULL; 3817 } 3818 } 3819 } 3820 __kmp_free_team( root, team, NULL ); 3821 return n; 3822 } 3823 #endif 3824 3825 /* Resets a root thread and clear its root and hot teams. 3826 Returns the number of __kmp_threads entries directly and indirectly freed. 3827 */ 3828 static int 3829 __kmp_reset_root(int gtid, kmp_root_t *root) 3830 { 3831 kmp_team_t * root_team = root->r.r_root_team; 3832 kmp_team_t * hot_team = root->r.r_hot_team; 3833 int n = hot_team->t.t_nproc; 3834 int i; 3835 3836 KMP_DEBUG_ASSERT( ! root->r.r_active ); 3837 3838 root->r.r_root_team = NULL; 3839 root->r.r_hot_team = NULL; 3840 // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call 3841 // to __kmp_free_team(). 3842 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) ); 3843 #if KMP_NESTED_HOT_TEAMS 3844 if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any 3845 for( i = 0; i < hot_team->t.t_nproc; ++i ) { 3846 kmp_info_t *th = hot_team->t.t_threads[i]; 3847 if( __kmp_hot_teams_max_level > 1 ) { 3848 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level ); 3849 } 3850 if( th->th.th_hot_teams ) { 3851 __kmp_free( th->th.th_hot_teams ); 3852 th->th.th_hot_teams = NULL; 3853 } 3854 } 3855 } 3856 #endif 3857 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) ); 3858 3859 // 3860 // Before we can reap the thread, we need to make certain that all 3861 // other threads in the teams that had this root as ancestor have stopped trying to steal tasks. 3862 // 3863 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 3864 __kmp_wait_to_unref_task_teams(); 3865 } 3866 3867 #if KMP_OS_WINDOWS 3868 /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ 3869 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", 3870 (LPVOID)&(root->r.r_uber_thread->th), 3871 root->r.r_uber_thread->th.th_info.ds.ds_thread ) ); 3872 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread ); 3873 #endif /* KMP_OS_WINDOWS */ 3874 3875 #if OMPT_SUPPORT 3876 if (ompt_enabled && 3877 ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 3878 int gtid = __kmp_get_gtid(); 3879 __ompt_thread_end(ompt_thread_initial, gtid); 3880 } 3881 #endif 3882 3883 TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. 3884 __kmp_reap_thread( root->r.r_uber_thread, 1 ); 3885 3886 // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing. 3887 root->r.r_uber_thread = NULL; 3888 /* mark root as no longer in use */ 3889 root->r.r_begin = FALSE; 3890 3891 return n; 3892 } 3893 3894 void 3895 __kmp_unregister_root_current_thread( int gtid ) 3896 { 3897 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid )); 3898 /* this lock should be ok, since unregister_root_current_thread is never called during 3899 * and abort, only during a normal close. furthermore, if you have the 3900 * forkjoin lock, you should never try to get the initz lock */ 3901 3902 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 3903 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 3904 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid )); 3905 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 3906 return; 3907 } 3908 kmp_root_t *root = __kmp_root[gtid]; 3909 3910 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); 3911 KMP_ASSERT( KMP_UBER_GTID( gtid )); 3912 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); 3913 KMP_ASSERT( root->r.r_active == FALSE ); 3914 3915 3916 KMP_MB(); 3917 3918 #if OMP_45_ENABLED 3919 kmp_info_t * thread = __kmp_threads[gtid]; 3920 kmp_team_t * team = thread->th.th_team; 3921 kmp_task_team_t * task_team = thread->th.th_task_team; 3922 3923 // we need to wait for the proxy tasks before finishing the thread 3924 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) { 3925 #if OMPT_SUPPORT 3926 // the runtime is shutting down so we won't report any events 3927 thread->th.ompt_thread_info.state = ompt_state_undefined; 3928 #endif 3929 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); 3930 } 3931 #endif 3932 3933 __kmp_reset_root(gtid, root); 3934 3935 /* free up this thread slot */ 3936 __kmp_gtid_set_specific( KMP_GTID_DNE ); 3937 #ifdef KMP_TDATA_GTID 3938 __kmp_gtid = KMP_GTID_DNE; 3939 #endif 3940 3941 KMP_MB(); 3942 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid )); 3943 3944 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 3945 } 3946 3947 #if KMP_OS_WINDOWS 3948 /* __kmp_forkjoin_lock must be already held 3949 Unregisters a root thread that is not the current thread. Returns the number of 3950 __kmp_threads entries freed as a result. 3951 */ 3952 static int 3953 __kmp_unregister_root_other_thread( int gtid ) 3954 { 3955 kmp_root_t *root = __kmp_root[gtid]; 3956 int r; 3957 3958 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid )); 3959 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); 3960 KMP_ASSERT( KMP_UBER_GTID( gtid )); 3961 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); 3962 KMP_ASSERT( root->r.r_active == FALSE ); 3963 3964 r = __kmp_reset_root(gtid, root); 3965 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid )); 3966 return r; 3967 } 3968 #endif 3969 3970 #if KMP_DEBUG 3971 void __kmp_task_info() { 3972 3973 kmp_int32 gtid = __kmp_entry_gtid(); 3974 kmp_int32 tid = __kmp_tid_from_gtid( gtid ); 3975 kmp_info_t *this_thr = __kmp_threads[ gtid ]; 3976 kmp_team_t *steam = this_thr->th.th_serial_team; 3977 kmp_team_t *team = this_thr->th.th_team; 3978 3979 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n", 3980 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent ); 3981 } 3982 #endif // KMP_DEBUG 3983 3984 /* TODO optimize with one big memclr, take out what isn't needed, 3985 * split responsibility to workers as much as possible, and delay 3986 * initialization of features as much as possible */ 3987 static void 3988 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) 3989 { 3990 /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker 3991 * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ 3992 kmp_info_t *master = team->t.t_threads[0]; 3993 KMP_DEBUG_ASSERT( this_thr != NULL ); 3994 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team ); 3995 KMP_DEBUG_ASSERT( team ); 3996 KMP_DEBUG_ASSERT( team->t.t_threads ); 3997 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 3998 KMP_DEBUG_ASSERT( master ); 3999 KMP_DEBUG_ASSERT( master->th.th_root ); 4000 4001 KMP_MB(); 4002 4003 TCW_SYNC_PTR(this_thr->th.th_team, team); 4004 4005 this_thr->th.th_info.ds.ds_tid = tid; 4006 this_thr->th.th_set_nproc = 0; 4007 if (__kmp_tasking_mode != tskm_immediate_exec) 4008 // When tasking is possible, threads are not safe to reap until they are 4009 // done tasking; this will be set when tasking code is exited in wait 4010 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; 4011 else // no tasking --> always safe to reap 4012 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; 4013 #if OMP_40_ENABLED 4014 this_thr->th.th_set_proc_bind = proc_bind_default; 4015 # if KMP_AFFINITY_SUPPORTED 4016 this_thr->th.th_new_place = this_thr->th.th_current_place; 4017 # endif 4018 #endif 4019 this_thr->th.th_root = master->th.th_root; 4020 4021 /* setup the thread's cache of the team structure */ 4022 this_thr->th.th_team_nproc = team->t.t_nproc; 4023 this_thr->th.th_team_master = master; 4024 this_thr->th.th_team_serialized = team->t.t_serialized; 4025 TCW_PTR(this_thr->th.th_sleep_loc, NULL); 4026 4027 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); 4028 4029 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", 4030 tid, gtid, this_thr, this_thr->th.th_current_task ) ); 4031 4032 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE ); 4033 4034 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", 4035 tid, gtid, this_thr, this_thr->th.th_current_task ) ); 4036 // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()? 4037 4038 /* TODO no worksharing in speculative threads */ 4039 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ]; 4040 4041 this_thr->th.th_local.this_construct = 0; 4042 4043 #ifdef BUILD_TV 4044 this_thr->th.th_local.tv_data = 0; 4045 #endif 4046 4047 if ( ! this_thr->th.th_pri_common ) { 4048 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) ); 4049 if ( __kmp_storage_map ) { 4050 __kmp_print_storage_map_gtid( 4051 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, 4052 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid 4053 ); 4054 }; // if 4055 this_thr->th.th_pri_head = NULL; 4056 }; // if 4057 4058 /* Initialize dynamic dispatch */ 4059 { 4060 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; 4061 /* 4062 * Use team max_nproc since this will never change for the team. 4063 */ 4064 size_t disp_size = sizeof( dispatch_private_info_t ) * 4065 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ); 4066 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) ); 4067 KMP_ASSERT( dispatch ); 4068 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 4069 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] ); 4070 4071 dispatch->th_disp_index = 0; 4072 #if OMP_45_ENABLED 4073 dispatch->th_doacross_buf_idx = 0; 4074 #endif 4075 if( ! dispatch->th_disp_buffer ) { 4076 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size ); 4077 4078 if ( __kmp_storage_map ) { 4079 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ], 4080 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ], 4081 disp_size, "th_%d.th_dispatch.th_disp_buffer " 4082 "(team_%d.t_dispatch[%d].th_disp_buffer)", 4083 gtid, team->t.t_id, gtid ); 4084 } 4085 } else { 4086 memset( & dispatch->th_disp_buffer[0], '\0', disp_size ); 4087 } 4088 4089 dispatch->th_dispatch_pr_current = 0; 4090 dispatch->th_dispatch_sh_current = 0; 4091 4092 dispatch->th_deo_fcn = 0; /* ORDERED */ 4093 dispatch->th_dxo_fcn = 0; /* END ORDERED */ 4094 } 4095 4096 this_thr->th.th_next_pool = NULL; 4097 4098 if (!this_thr->th.th_task_state_memo_stack) { 4099 size_t i; 4100 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); 4101 this_thr->th.th_task_state_top = 0; 4102 this_thr->th.th_task_state_stack_sz = 4; 4103 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i) // zero init the stack 4104 this_thr->th.th_task_state_memo_stack[i] = 0; 4105 } 4106 4107 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 4108 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 4109 4110 KMP_MB(); 4111 } 4112 4113 4114 /* allocate a new thread for the requesting team. this is only called from within a 4115 * forkjoin critical section. we will first try to get an available thread from the 4116 * thread pool. if none is available, we will fork a new one assuming we are able 4117 * to create a new one. this should be assured, as the caller should check on this 4118 * first. 4119 */ 4120 kmp_info_t * 4121 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) 4122 { 4123 kmp_team_t *serial_team; 4124 kmp_info_t *new_thr; 4125 int new_gtid; 4126 4127 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() )); 4128 KMP_DEBUG_ASSERT( root && team ); 4129 #if !KMP_NESTED_HOT_TEAMS 4130 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() )); 4131 #endif 4132 KMP_MB(); 4133 4134 /* first, try to get one from the thread pool */ 4135 if ( __kmp_thread_pool ) { 4136 4137 new_thr = (kmp_info_t*)__kmp_thread_pool; 4138 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool; 4139 if ( new_thr == __kmp_thread_pool_insert_pt ) { 4140 __kmp_thread_pool_insert_pt = NULL; 4141 } 4142 TCW_4(new_thr->th.th_in_pool, FALSE); 4143 // 4144 // Don't touch th_active_in_pool or th_active. 4145 // The worker thread adjusts those flags as it sleeps/awakens. 4146 // 4147 __kmp_thread_pool_nth--; 4148 4149 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", 4150 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid )); 4151 KMP_ASSERT( ! new_thr->th.th_team ); 4152 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity ); 4153 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 ); 4154 4155 /* setup the thread structure */ 4156 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid ); 4157 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team ); 4158 4159 TCW_4(__kmp_nth, __kmp_nth + 1); 4160 4161 new_thr->th.th_task_state = 0; 4162 new_thr->th.th_task_state_top = 0; 4163 new_thr->th.th_task_state_stack_sz = 4; 4164 4165 #ifdef KMP_ADJUST_BLOCKTIME 4166 /* Adjust blocktime back to zero if necessar y */ 4167 /* Middle initialization might not have occurred yet */ 4168 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 4169 if ( __kmp_nth > __kmp_avail_proc ) { 4170 __kmp_zero_bt = TRUE; 4171 } 4172 } 4173 #endif /* KMP_ADJUST_BLOCKTIME */ 4174 4175 #if KMP_DEBUG 4176 // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG. 4177 int b; 4178 kmp_balign_t * balign = new_thr->th.th_bar; 4179 for( b = 0; b < bs_last_barrier; ++ b ) 4180 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4181 #endif 4182 4183 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", 4184 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid )); 4185 4186 KMP_MB(); 4187 return new_thr; 4188 } 4189 4190 4191 /* no, well fork a new one */ 4192 KMP_ASSERT( __kmp_nth == __kmp_all_nth ); 4193 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity ); 4194 4195 #if KMP_USE_MONITOR 4196 // 4197 // If this is the first worker thread the RTL is creating, then also 4198 // launch the monitor thread. We try to do this as early as possible. 4199 // 4200 if ( ! TCR_4( __kmp_init_monitor ) ) { 4201 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 4202 if ( ! TCR_4( __kmp_init_monitor ) ) { 4203 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) ); 4204 TCW_4( __kmp_init_monitor, 1 ); 4205 __kmp_create_monitor( & __kmp_monitor ); 4206 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); 4207 #if KMP_OS_WINDOWS 4208 // AC: wait until monitor has started. This is a fix for CQ232808. 4209 // The reason is that if the library is loaded/unloaded in a loop with small (parallel) 4210 // work in between, then there is high probability that monitor thread started after 4211 // the library shutdown. At shutdown it is too late to cope with the problem, because 4212 // when the master is in DllMain (process detach) the monitor has no chances to start 4213 // (it is blocked), and master has no means to inform the monitor that the library has gone, 4214 // because all the memory which the monitor can access is going to be released/reset. 4215 while ( TCR_4(__kmp_init_monitor) < 2 ) { 4216 KMP_YIELD( TRUE ); 4217 } 4218 KF_TRACE( 10, ( "after monitor thread has started\n" ) ); 4219 #endif 4220 } 4221 __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 4222 } 4223 #endif 4224 4225 KMP_MB(); 4226 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) { 4227 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity ); 4228 } 4229 4230 /* allocate space for it. */ 4231 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); 4232 4233 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); 4234 4235 if ( __kmp_storage_map ) { 4236 __kmp_print_thread_storage_map( new_thr, new_gtid ); 4237 } 4238 4239 /* add the reserve serialized team, initialized from the team's master thread */ 4240 { 4241 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team ); 4242 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) ); 4243 4244 new_thr->th.th_serial_team = serial_team = 4245 (kmp_team_t*) __kmp_allocate_team( root, 1, 1, 4246 #if OMPT_SUPPORT 4247 0, // root parallel id 4248 #endif 4249 #if OMP_40_ENABLED 4250 proc_bind_default, 4251 #endif 4252 &r_icvs, 4253 0 USE_NESTED_HOT_ARG(NULL) ); 4254 } 4255 KMP_ASSERT ( serial_team ); 4256 serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). 4257 serial_team->t.t_threads[0] = new_thr; 4258 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", 4259 new_thr ) ); 4260 4261 /* setup the thread structures */ 4262 __kmp_initialize_info( new_thr, team, new_tid, new_gtid ); 4263 4264 #if USE_FAST_MEMORY 4265 __kmp_initialize_fast_memory( new_thr ); 4266 #endif /* USE_FAST_MEMORY */ 4267 4268 #if KMP_USE_BGET 4269 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL ); 4270 __kmp_initialize_bget( new_thr ); 4271 #endif 4272 4273 __kmp_init_random( new_thr ); // Initialize random number generator 4274 4275 /* Initialize these only once when thread is grabbed for a team allocation */ 4276 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", 4277 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 4278 4279 int b; 4280 kmp_balign_t * balign = new_thr->th.th_bar; 4281 for(b=0; b<bs_last_barrier; ++b) { 4282 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; 4283 balign[b].bb.team = NULL; 4284 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; 4285 balign[b].bb.use_oncore_barrier = 0; 4286 } 4287 4288 new_thr->th.th_spin_here = FALSE; 4289 new_thr->th.th_next_waiting = 0; 4290 4291 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4292 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; 4293 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; 4294 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; 4295 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; 4296 #endif 4297 4298 TCW_4(new_thr->th.th_in_pool, FALSE); 4299 new_thr->th.th_active_in_pool = FALSE; 4300 TCW_4(new_thr->th.th_active, TRUE); 4301 4302 /* adjust the global counters */ 4303 __kmp_all_nth ++; 4304 __kmp_nth ++; 4305 4306 // 4307 // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) 4308 // for low numbers of procs, and method #2 (keyed API call) for higher 4309 // numbers of procs. 4310 // 4311 if ( __kmp_adjust_gtid_mode ) { 4312 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { 4313 if ( TCR_4(__kmp_gtid_mode) != 2) { 4314 TCW_4(__kmp_gtid_mode, 2); 4315 } 4316 } 4317 else { 4318 if (TCR_4(__kmp_gtid_mode) != 1 ) { 4319 TCW_4(__kmp_gtid_mode, 1); 4320 } 4321 } 4322 } 4323 4324 #ifdef KMP_ADJUST_BLOCKTIME 4325 /* Adjust blocktime back to zero if necessary */ 4326 /* Middle initialization might not have occurred yet */ 4327 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 4328 if ( __kmp_nth > __kmp_avail_proc ) { 4329 __kmp_zero_bt = TRUE; 4330 } 4331 } 4332 #endif /* KMP_ADJUST_BLOCKTIME */ 4333 4334 /* actually fork it and create the new worker thread */ 4335 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr )); 4336 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize ); 4337 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr )); 4338 4339 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid )); 4340 KMP_MB(); 4341 return new_thr; 4342 } 4343 4344 /* 4345 * reinitialize team for reuse. 4346 * 4347 * The hot team code calls this case at every fork barrier, so EPCC barrier 4348 * test are extremely sensitive to changes in it, esp. writes to the team 4349 * struct, which cause a cache invalidation in all threads. 4350 * 4351 * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! 4352 */ 4353 static void 4354 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) { 4355 KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", 4356 team->t.t_threads[0], team ) ); 4357 KMP_DEBUG_ASSERT( team && new_icvs); 4358 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); 4359 KMP_CHECK_UPDATE(team->t.t_ident, loc); 4360 4361 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); 4362 4363 // Copy ICVs to the master thread's implicit taskdata 4364 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); 4365 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); 4366 4367 KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", 4368 team->t.t_threads[0], team ) ); 4369 } 4370 4371 4372 /* initialize the team data structure 4373 * this assumes the t_threads and t_max_nproc are already set 4374 * also, we don't touch the arguments */ 4375 static void 4376 __kmp_initialize_team( 4377 kmp_team_t * team, 4378 int new_nproc, 4379 kmp_internal_control_t * new_icvs, 4380 ident_t * loc 4381 ) { 4382 KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); 4383 4384 /* verify */ 4385 KMP_DEBUG_ASSERT( team ); 4386 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); 4387 KMP_DEBUG_ASSERT( team->t.t_threads ); 4388 KMP_MB(); 4389 4390 team->t.t_master_tid = 0; /* not needed */ 4391 /* team->t.t_master_bar; not needed */ 4392 team->t.t_serialized = new_nproc > 1 ? 0 : 1; 4393 team->t.t_nproc = new_nproc; 4394 4395 /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ 4396 team->t.t_next_pool = NULL; 4397 /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */ 4398 4399 TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ 4400 team->t.t_invoke = NULL; /* not needed */ 4401 4402 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4403 team->t.t_sched = new_icvs->sched; 4404 4405 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4406 team->t.t_fp_control_saved = FALSE; /* not needed */ 4407 team->t.t_x87_fpu_control_word = 0; /* not needed */ 4408 team->t.t_mxcsr = 0; /* not needed */ 4409 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 4410 4411 team->t.t_construct = 0; 4412 __kmp_init_lock( & team->t.t_single_lock ); 4413 4414 team->t.t_ordered .dt.t_value = 0; 4415 team->t.t_master_active = FALSE; 4416 4417 memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t )); 4418 4419 #ifdef KMP_DEBUG 4420 team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ 4421 #endif 4422 team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ 4423 4424 team->t.t_control_stack_top = NULL; 4425 4426 __kmp_reinitialize_team( team, new_icvs, loc ); 4427 4428 KMP_MB(); 4429 KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); 4430 } 4431 4432 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4433 /* Sets full mask for thread and returns old mask, no changes to structures. */ 4434 static void 4435 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask ) 4436 { 4437 if ( KMP_AFFINITY_CAPABLE() ) { 4438 int status; 4439 if ( old_mask != NULL ) { 4440 status = __kmp_get_system_affinity( old_mask, TRUE ); 4441 int error = errno; 4442 if ( status != 0 ) { 4443 __kmp_msg( 4444 kmp_ms_fatal, 4445 KMP_MSG( ChangeThreadAffMaskError ), 4446 KMP_ERR( error ), 4447 __kmp_msg_null 4448 ); 4449 } 4450 } 4451 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE ); 4452 } 4453 } 4454 #endif 4455 4456 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4457 4458 // 4459 // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. 4460 // It calculats the worker + master thread's partition based upon the parent 4461 // thread's partition, and binds each worker to a thread in their partition. 4462 // The master thread's partition should already include its current binding. 4463 // 4464 static void 4465 __kmp_partition_places( kmp_team_t *team, int update_master_only ) 4466 { 4467 // 4468 // Copy the master thread's place partion to the team struct 4469 // 4470 kmp_info_t *master_th = team->t.t_threads[0]; 4471 KMP_DEBUG_ASSERT( master_th != NULL ); 4472 kmp_proc_bind_t proc_bind = team->t.t_proc_bind; 4473 int first_place = master_th->th.th_first_place; 4474 int last_place = master_th->th.th_last_place; 4475 int masters_place = master_th->th.th_current_place; 4476 team->t.t_first_place = first_place; 4477 team->t.t_last_place = last_place; 4478 4479 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n", 4480 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id, 4481 masters_place, first_place, last_place ) ); 4482 4483 switch ( proc_bind ) { 4484 4485 case proc_bind_default: 4486 // 4487 // serial teams might have the proc_bind policy set to 4488 // proc_bind_default. It doesn't matter, as we don't 4489 // rebind the master thread for any proc_bind policy. 4490 // 4491 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 ); 4492 break; 4493 4494 case proc_bind_master: 4495 { 4496 int f; 4497 int n_th = team->t.t_nproc; 4498 for ( f = 1; f < n_th; f++ ) { 4499 kmp_info_t *th = team->t.t_threads[f]; 4500 KMP_DEBUG_ASSERT( th != NULL ); 4501 th->th.th_first_place = first_place; 4502 th->th.th_last_place = last_place; 4503 th->th.th_new_place = masters_place; 4504 4505 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4506 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4507 team->t.t_id, f, masters_place, first_place, last_place ) ); 4508 } 4509 } 4510 break; 4511 4512 case proc_bind_close: 4513 { 4514 int f; 4515 int n_th = team->t.t_nproc; 4516 int n_places; 4517 if ( first_place <= last_place ) { 4518 n_places = last_place - first_place + 1; 4519 } 4520 else { 4521 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4522 } 4523 if ( n_th <= n_places ) { 4524 int place = masters_place; 4525 for ( f = 1; f < n_th; f++ ) { 4526 kmp_info_t *th = team->t.t_threads[f]; 4527 KMP_DEBUG_ASSERT( th != NULL ); 4528 4529 if ( place == last_place ) { 4530 place = first_place; 4531 } 4532 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4533 place = 0; 4534 } 4535 else { 4536 place++; 4537 } 4538 th->th.th_first_place = first_place; 4539 th->th.th_last_place = last_place; 4540 th->th.th_new_place = place; 4541 4542 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4543 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4544 team->t.t_id, f, place, first_place, last_place ) ); 4545 } 4546 } 4547 else { 4548 int S, rem, gap, s_count; 4549 S = n_th / n_places; 4550 s_count = 0; 4551 rem = n_th - ( S * n_places ); 4552 gap = rem > 0 ? n_places/rem : n_places; 4553 int place = masters_place; 4554 int gap_ct = gap; 4555 for ( f = 0; f < n_th; f++ ) { 4556 kmp_info_t *th = team->t.t_threads[f]; 4557 KMP_DEBUG_ASSERT( th != NULL ); 4558 4559 th->th.th_first_place = first_place; 4560 th->th.th_last_place = last_place; 4561 th->th.th_new_place = place; 4562 s_count++; 4563 4564 if ( (s_count == S) && rem && (gap_ct == gap) ) { 4565 // do nothing, add an extra thread to place on next iteration 4566 } 4567 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { 4568 // we added an extra thread to this place; move to next place 4569 if ( place == last_place ) { 4570 place = first_place; 4571 } 4572 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4573 place = 0; 4574 } 4575 else { 4576 place++; 4577 } 4578 s_count = 0; 4579 gap_ct = 1; 4580 rem--; 4581 } 4582 else if (s_count == S) { // place full; don't add extra 4583 if ( place == last_place ) { 4584 place = first_place; 4585 } 4586 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4587 place = 0; 4588 } 4589 else { 4590 place++; 4591 } 4592 gap_ct++; 4593 s_count = 0; 4594 } 4595 4596 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4597 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4598 team->t.t_id, f, th->th.th_new_place, first_place, 4599 last_place ) ); 4600 } 4601 KMP_DEBUG_ASSERT( place == masters_place ); 4602 } 4603 } 4604 break; 4605 4606 case proc_bind_spread: 4607 { 4608 int f; 4609 int n_th = team->t.t_nproc; 4610 int n_places; 4611 int thidx; 4612 if ( first_place <= last_place ) { 4613 n_places = last_place - first_place + 1; 4614 } 4615 else { 4616 n_places = __kmp_affinity_num_masks - first_place + last_place + 1; 4617 } 4618 if ( n_th <= n_places ) { 4619 int place = masters_place; 4620 int S = n_places/n_th; 4621 int s_count, rem, gap, gap_ct; 4622 rem = n_places - n_th*S; 4623 gap = rem ? n_th/rem : 1; 4624 gap_ct = gap; 4625 thidx = n_th; 4626 if (update_master_only == 1) 4627 thidx = 1; 4628 for ( f = 0; f < thidx; f++ ) { 4629 kmp_info_t *th = team->t.t_threads[f]; 4630 KMP_DEBUG_ASSERT( th != NULL ); 4631 4632 th->th.th_first_place = place; 4633 th->th.th_new_place = place; 4634 s_count = 1; 4635 while (s_count < S) { 4636 if ( place == last_place ) { 4637 place = first_place; 4638 } 4639 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4640 place = 0; 4641 } 4642 else { 4643 place++; 4644 } 4645 s_count++; 4646 } 4647 if (rem && (gap_ct == gap)) { 4648 if ( place == last_place ) { 4649 place = first_place; 4650 } 4651 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4652 place = 0; 4653 } 4654 else { 4655 place++; 4656 } 4657 rem--; 4658 gap_ct = 0; 4659 } 4660 th->th.th_last_place = place; 4661 gap_ct++; 4662 4663 if ( place == last_place ) { 4664 place = first_place; 4665 } 4666 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4667 place = 0; 4668 } 4669 else { 4670 place++; 4671 } 4672 4673 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4674 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4675 team->t.t_id, f, th->th.th_new_place, 4676 th->th.th_first_place, th->th.th_last_place ) ); 4677 } 4678 KMP_DEBUG_ASSERT( update_master_only || place == masters_place ); 4679 } 4680 else { 4681 int S, rem, gap, s_count; 4682 S = n_th / n_places; 4683 s_count = 0; 4684 rem = n_th - ( S * n_places ); 4685 gap = rem > 0 ? n_places/rem : n_places; 4686 int place = masters_place; 4687 int gap_ct = gap; 4688 thidx = n_th; 4689 if (update_master_only == 1) 4690 thidx = 1; 4691 for ( f = 0; f < thidx; f++ ) { 4692 kmp_info_t *th = team->t.t_threads[f]; 4693 KMP_DEBUG_ASSERT( th != NULL ); 4694 4695 th->th.th_first_place = place; 4696 th->th.th_last_place = place; 4697 th->th.th_new_place = place; 4698 s_count++; 4699 4700 if ( (s_count == S) && rem && (gap_ct == gap) ) { 4701 // do nothing, add an extra thread to place on next iteration 4702 } 4703 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { 4704 // we added an extra thread to this place; move on to next place 4705 if ( place == last_place ) { 4706 place = first_place; 4707 } 4708 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4709 place = 0; 4710 } 4711 else { 4712 place++; 4713 } 4714 s_count = 0; 4715 gap_ct = 1; 4716 rem--; 4717 } 4718 else if (s_count == S) { // place is full; don't add extra thread 4719 if ( place == last_place ) { 4720 place = first_place; 4721 } 4722 else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { 4723 place = 0; 4724 } 4725 else { 4726 place++; 4727 } 4728 gap_ct++; 4729 s_count = 0; 4730 } 4731 4732 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", 4733 __kmp_gtid_from_thread( team->t.t_threads[f] ), 4734 team->t.t_id, f, th->th.th_new_place, 4735 th->th.th_first_place, th->th.th_last_place) ); 4736 } 4737 KMP_DEBUG_ASSERT( update_master_only || place == masters_place ); 4738 } 4739 } 4740 break; 4741 4742 default: 4743 break; 4744 } 4745 4746 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) ); 4747 } 4748 4749 #endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ 4750 4751 /* allocate a new team data structure to use. take one off of the free pool if available */ 4752 kmp_team_t * 4753 __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 4754 #if OMPT_SUPPORT 4755 ompt_parallel_id_t ompt_parallel_id, 4756 #endif 4757 #if OMP_40_ENABLED 4758 kmp_proc_bind_t new_proc_bind, 4759 #endif 4760 kmp_internal_control_t *new_icvs, 4761 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) 4762 { 4763 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); 4764 int f; 4765 kmp_team_t *team; 4766 int use_hot_team = ! root->r.r_active; 4767 int level = 0; 4768 4769 KA_TRACE( 20, ("__kmp_allocate_team: called\n")); 4770 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); 4771 KMP_DEBUG_ASSERT( max_nproc >= new_nproc ); 4772 KMP_MB(); 4773 4774 #if KMP_NESTED_HOT_TEAMS 4775 kmp_hot_team_ptr_t *hot_teams; 4776 if( master ) { 4777 team = master->th.th_team; 4778 level = team->t.t_active_level; 4779 if( master->th.th_teams_microtask ) { // in teams construct? 4780 if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1 4781 team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams 4782 master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams 4783 ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise 4784 } 4785 } 4786 hot_teams = master->th.th_hot_teams; 4787 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team ) 4788 { // hot team has already been allocated for given level 4789 use_hot_team = 1; 4790 } else { 4791 use_hot_team = 0; 4792 } 4793 } 4794 #endif 4795 // Optimization to use a "hot" team 4796 if( use_hot_team && new_nproc > 1 ) { 4797 KMP_DEBUG_ASSERT( new_nproc == max_nproc ); 4798 #if KMP_NESTED_HOT_TEAMS 4799 team = hot_teams[level].hot_team; 4800 #else 4801 team = root->r.r_hot_team; 4802 #endif 4803 #if KMP_DEBUG 4804 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 4805 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", 4806 team->t.t_task_team[0], team->t.t_task_team[1] )); 4807 } 4808 #endif 4809 4810 // Has the number of threads changed? 4811 /* Let's assume the most common case is that the number of threads is unchanged, and 4812 put that case first. */ 4813 if (team->t.t_nproc == new_nproc) { // Check changes in number of threads 4814 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); 4815 // This case can mean that omp_set_num_threads() was called and the hot team size 4816 // was already reduced, so we check the special flag 4817 if ( team->t.t_size_changed == -1 ) { 4818 team->t.t_size_changed = 1; 4819 } else { 4820 KMP_CHECK_UPDATE(team->t.t_size_changed, 0); 4821 } 4822 4823 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4824 kmp_r_sched_t new_sched = new_icvs->sched; 4825 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || 4826 team->t.t_sched.chunk != new_sched.chunk) 4827 team->t.t_sched = new_sched; // set master's schedule as new run-time schedule 4828 4829 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); 4830 4831 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 4832 0, team->t.t_threads[0], team ) ); 4833 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); 4834 4835 #if OMP_40_ENABLED 4836 # if KMP_AFFINITY_SUPPORTED 4837 if ( ( team->t.t_size_changed == 0 ) 4838 && ( team->t.t_proc_bind == new_proc_bind ) ) { 4839 if (new_proc_bind == proc_bind_spread) { 4840 __kmp_partition_places(team, 1); // add flag to update only master for spread 4841 } 4842 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n", 4843 team->t.t_id, new_proc_bind, team->t.t_first_place, 4844 team->t.t_last_place ) ); 4845 } 4846 else { 4847 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4848 __kmp_partition_places( team ); 4849 } 4850 # else 4851 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4852 # endif /* KMP_AFFINITY_SUPPORTED */ 4853 #endif /* OMP_40_ENABLED */ 4854 } 4855 else if( team->t.t_nproc > new_nproc ) { 4856 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); 4857 4858 team->t.t_size_changed = 1; 4859 #if KMP_NESTED_HOT_TEAMS 4860 if( __kmp_hot_teams_mode == 0 ) { 4861 // AC: saved number of threads should correspond to team's value in this mode, 4862 // can be bigger in mode 1, when hot team has some threads in reserve 4863 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); 4864 hot_teams[level].hot_team_nth = new_nproc; 4865 #endif // KMP_NESTED_HOT_TEAMS 4866 /* release the extra threads we don't need any more */ 4867 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) { 4868 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); 4869 if ( __kmp_tasking_mode != tskm_immediate_exec) { 4870 // When decreasing team size, threads no longer in the team should unref task team. 4871 team->t.t_threads[f]->th.th_task_team = NULL; 4872 } 4873 __kmp_free_thread( team->t.t_threads[ f ] ); 4874 team->t.t_threads[ f ] = NULL; 4875 } 4876 #if KMP_NESTED_HOT_TEAMS 4877 } // (__kmp_hot_teams_mode == 0) 4878 else { 4879 // When keeping extra threads in team, switch threads to wait on own b_go flag 4880 for (f=new_nproc; f<team->t.t_nproc; ++f) { 4881 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 4882 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; 4883 for (int b=0; b<bs_last_barrier; ++b) { 4884 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { 4885 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 4886 } 4887 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); 4888 } 4889 } 4890 } 4891 #endif // KMP_NESTED_HOT_TEAMS 4892 team->t.t_nproc = new_nproc; 4893 // TODO???: team->t.t_max_active_levels = new_max_active_levels; 4894 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type || 4895 team->t.t_sched.chunk != new_icvs->sched.chunk) 4896 team->t.t_sched = new_icvs->sched; 4897 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); 4898 4899 /* update the remaining threads */ 4900 for(f = 0; f < new_nproc; ++f) { 4901 team->t.t_threads[f]->th.th_team_nproc = new_nproc; 4902 } 4903 // restore the current task state of the master thread: should be the implicit task 4904 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 4905 0, team->t.t_threads[0], team ) ); 4906 4907 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); 4908 4909 #ifdef KMP_DEBUG 4910 for ( f = 0; f < team->t.t_nproc; f++ ) { 4911 KMP_DEBUG_ASSERT( team->t.t_threads[f] && 4912 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 4913 } 4914 #endif 4915 4916 #if OMP_40_ENABLED 4917 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 4918 # if KMP_AFFINITY_SUPPORTED 4919 __kmp_partition_places( team ); 4920 # endif 4921 #endif 4922 } 4923 else { // team->t.t_nproc < new_nproc 4924 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4925 kmp_affin_mask_t *old_mask; 4926 if ( KMP_AFFINITY_CAPABLE() ) { 4927 KMP_CPU_ALLOC(old_mask); 4928 } 4929 #endif 4930 4931 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc )); 4932 4933 team->t.t_size_changed = 1; 4934 4935 #if KMP_NESTED_HOT_TEAMS 4936 int avail_threads = hot_teams[level].hot_team_nth; 4937 if( new_nproc < avail_threads ) 4938 avail_threads = new_nproc; 4939 kmp_info_t **other_threads = team->t.t_threads; 4940 for ( f = team->t.t_nproc; f < avail_threads; ++f ) { 4941 // Adjust barrier data of reserved threads (if any) of the team 4942 // Other data will be set in __kmp_initialize_info() below. 4943 int b; 4944 kmp_balign_t * balign = other_threads[f]->th.th_bar; 4945 for ( b = 0; b < bs_last_barrier; ++ b ) { 4946 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; 4947 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4948 #if USE_DEBUGGER 4949 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; 4950 #endif 4951 } 4952 } 4953 if( hot_teams[level].hot_team_nth >= new_nproc ) { 4954 // we have all needed threads in reserve, no need to allocate any 4955 // this only possible in mode 1, cannot have reserved threads in mode 0 4956 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); 4957 team->t.t_nproc = new_nproc; // just get reserved threads involved 4958 } else { 4959 // we may have some threads in reserve, but not enough 4960 team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any 4961 hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size 4962 #endif // KMP_NESTED_HOT_TEAMS 4963 if(team->t.t_max_nproc < new_nproc) { 4964 /* reallocate larger arrays */ 4965 __kmp_reallocate_team_arrays(team, new_nproc); 4966 __kmp_reinitialize_team( team, new_icvs, NULL ); 4967 } 4968 4969 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4970 /* Temporarily set full mask for master thread before 4971 creation of workers. The reason is that workers inherit 4972 the affinity from master, so if a lot of workers are 4973 created on the single core quickly, they don't get 4974 a chance to set their own affinity for a long time. 4975 */ 4976 __kmp_set_thread_affinity_mask_full_tmp( old_mask ); 4977 #endif 4978 4979 /* allocate new threads for the hot team */ 4980 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) { 4981 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f ); 4982 KMP_DEBUG_ASSERT( new_worker ); 4983 team->t.t_threads[ f ] = new_worker; 4984 4985 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n", 4986 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f, 4987 team->t.t_bar[bs_forkjoin_barrier].b_arrived, 4988 team->t.t_bar[bs_plain_barrier].b_arrived ) ); 4989 4990 { // Initialize barrier data for new threads. 4991 int b; 4992 kmp_balign_t * balign = new_worker->th.th_bar; 4993 for( b = 0; b < bs_last_barrier; ++ b ) { 4994 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 4995 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 4996 #if USE_DEBUGGER 4997 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 4998 #endif 4999 } 5000 } 5001 } 5002 5003 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5004 if ( KMP_AFFINITY_CAPABLE() ) { 5005 /* Restore initial master thread's affinity mask */ 5006 __kmp_set_system_affinity( old_mask, TRUE ); 5007 KMP_CPU_FREE(old_mask); 5008 } 5009 #endif 5010 #if KMP_NESTED_HOT_TEAMS 5011 } // end of check of t_nproc vs. new_nproc vs. hot_team_nth 5012 #endif // KMP_NESTED_HOT_TEAMS 5013 /* make sure everyone is syncronized */ 5014 int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below 5015 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); 5016 5017 /* reinitialize the threads */ 5018 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); 5019 for (f=0; f < team->t.t_nproc; ++f) 5020 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); 5021 if (level) { // set th_task_state for new threads in nested hot team 5022 // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the 5023 // th_task_state for the new threads. th_task_state for master thread will not be accurate until 5024 // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. 5025 for (f=old_nproc; f < team->t.t_nproc; ++f) 5026 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; 5027 } 5028 else { // set th_task_state for new threads in non-nested hot team 5029 int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state 5030 for (f=old_nproc; f < team->t.t_nproc; ++f) 5031 team->t.t_threads[f]->th.th_task_state = old_state; 5032 } 5033 5034 #ifdef KMP_DEBUG 5035 for ( f = 0; f < team->t.t_nproc; ++ f ) { 5036 KMP_DEBUG_ASSERT( team->t.t_threads[f] && 5037 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 5038 } 5039 #endif 5040 5041 #if OMP_40_ENABLED 5042 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); 5043 # if KMP_AFFINITY_SUPPORTED 5044 __kmp_partition_places( team ); 5045 # endif 5046 #endif 5047 } // Check changes in number of threads 5048 5049 #if OMP_40_ENABLED 5050 kmp_info_t *master = team->t.t_threads[0]; 5051 if( master->th.th_teams_microtask ) { 5052 for( f = 1; f < new_nproc; ++f ) { 5053 // propagate teams construct specific info to workers 5054 kmp_info_t *thr = team->t.t_threads[f]; 5055 thr->th.th_teams_microtask = master->th.th_teams_microtask; 5056 thr->th.th_teams_level = master->th.th_teams_level; 5057 thr->th.th_teams_size = master->th.th_teams_size; 5058 } 5059 } 5060 #endif /* OMP_40_ENABLED */ 5061 #if KMP_NESTED_HOT_TEAMS 5062 if( level ) { 5063 // Sync barrier state for nested hot teams, not needed for outermost hot team. 5064 for( f = 1; f < new_nproc; ++f ) { 5065 kmp_info_t *thr = team->t.t_threads[f]; 5066 int b; 5067 kmp_balign_t * balign = thr->th.th_bar; 5068 for( b = 0; b < bs_last_barrier; ++ b ) { 5069 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; 5070 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); 5071 #if USE_DEBUGGER 5072 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; 5073 #endif 5074 } 5075 } 5076 } 5077 #endif // KMP_NESTED_HOT_TEAMS 5078 5079 /* reallocate space for arguments if necessary */ 5080 __kmp_alloc_argv_entries( argc, team, TRUE ); 5081 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5082 // 5083 // The hot team re-uses the previous task team, 5084 // if untouched during the previous release->gather phase. 5085 // 5086 5087 KF_TRACE( 10, ( " hot_team = %p\n", team ) ); 5088 5089 #if KMP_DEBUG 5090 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 5091 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", 5092 team->t.t_task_team[0], team->t.t_task_team[1] )); 5093 } 5094 #endif 5095 5096 #if OMPT_SUPPORT 5097 __ompt_team_assign_id(team, ompt_parallel_id); 5098 #endif 5099 5100 KMP_MB(); 5101 5102 return team; 5103 } 5104 5105 /* next, let's try to take one from the team pool */ 5106 KMP_MB(); 5107 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; ) 5108 { 5109 /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */ 5110 if ( team->t.t_max_nproc >= max_nproc ) { 5111 /* take this team from the team pool */ 5112 __kmp_team_pool = team->t.t_next_pool; 5113 5114 /* setup the team for fresh use */ 5115 __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); 5116 5117 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", 5118 &team->t.t_task_team[0], &team->t.t_task_team[1]) ); 5119 team->t.t_task_team[0] = NULL; 5120 team->t.t_task_team[1] = NULL; 5121 5122 /* reallocate space for arguments if necessary */ 5123 __kmp_alloc_argv_entries( argc, team, TRUE ); 5124 KMP_CHECK_UPDATE(team->t.t_argc, argc); 5125 5126 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5127 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 5128 { // Initialize barrier data. 5129 int b; 5130 for ( b = 0; b < bs_last_barrier; ++ b) { 5131 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; 5132 #if USE_DEBUGGER 5133 team->t.t_bar[ b ].b_master_arrived = 0; 5134 team->t.t_bar[ b ].b_team_arrived = 0; 5135 #endif 5136 } 5137 } 5138 5139 #if OMP_40_ENABLED 5140 team->t.t_proc_bind = new_proc_bind; 5141 #endif 5142 5143 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id )); 5144 5145 #if OMPT_SUPPORT 5146 __ompt_team_assign_id(team, ompt_parallel_id); 5147 #endif 5148 5149 KMP_MB(); 5150 5151 return team; 5152 } 5153 5154 /* reap team if it is too small, then loop back and check the next one */ 5155 /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */ 5156 /* TODO: Use technique to find the right size hot-team, don't reap them */ 5157 team = __kmp_reap_team( team ); 5158 __kmp_team_pool = team; 5159 } 5160 5161 /* nothing available in the pool, no matter, make a new team! */ 5162 KMP_MB(); 5163 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) ); 5164 5165 /* and set it up */ 5166 team->t.t_max_nproc = max_nproc; 5167 /* NOTE well, for some reason allocating one big buffer and dividing it 5168 * up seems to really hurt performance a lot on the P4, so, let's not use 5169 * this... */ 5170 __kmp_allocate_team_arrays( team, max_nproc ); 5171 5172 KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); 5173 __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); 5174 5175 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", 5176 &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); 5177 team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate 5178 team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate 5179 5180 if ( __kmp_storage_map ) { 5181 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); 5182 } 5183 5184 /* allocate space for arguments */ 5185 __kmp_alloc_argv_entries( argc, team, FALSE ); 5186 team->t.t_argc = argc; 5187 5188 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", 5189 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); 5190 { // Initialize barrier data. 5191 int b; 5192 for ( b = 0; b < bs_last_barrier; ++ b ) { 5193 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; 5194 #if USE_DEBUGGER 5195 team->t.t_bar[ b ].b_master_arrived = 0; 5196 team->t.t_bar[ b ].b_team_arrived = 0; 5197 #endif 5198 } 5199 } 5200 5201 #if OMP_40_ENABLED 5202 team->t.t_proc_bind = new_proc_bind; 5203 #endif 5204 5205 #if OMPT_SUPPORT 5206 __ompt_team_assign_id(team, ompt_parallel_id); 5207 team->t.ompt_serialized_team_info = NULL; 5208 #endif 5209 5210 KMP_MB(); 5211 5212 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id )); 5213 5214 return team; 5215 } 5216 5217 /* TODO implement hot-teams at all levels */ 5218 /* TODO implement lazy thread release on demand (disband request) */ 5219 5220 /* free the team. return it to the team pool. release all the threads 5221 * associated with it */ 5222 void 5223 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) ) 5224 { 5225 int f; 5226 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id )); 5227 5228 /* verify state */ 5229 KMP_DEBUG_ASSERT( root ); 5230 KMP_DEBUG_ASSERT( team ); 5231 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc ); 5232 KMP_DEBUG_ASSERT( team->t.t_threads ); 5233 5234 int use_hot_team = team == root->r.r_hot_team; 5235 #if KMP_NESTED_HOT_TEAMS 5236 int level; 5237 kmp_hot_team_ptr_t *hot_teams; 5238 if( master ) { 5239 level = team->t.t_active_level - 1; 5240 if( master->th.th_teams_microtask ) { // in teams construct? 5241 if( master->th.th_teams_size.nteams > 1 ) { 5242 ++level; // level was not increased in teams construct for team_of_masters 5243 } 5244 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && 5245 master->th.th_teams_level == team->t.t_level ) { 5246 ++level; // level was not increased in teams construct for team_of_workers before the parallel 5247 } // team->t.t_level will be increased inside parallel 5248 } 5249 hot_teams = master->th.th_hot_teams; 5250 if( level < __kmp_hot_teams_max_level ) { 5251 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team ); 5252 use_hot_team = 1; 5253 } 5254 } 5255 #endif // KMP_NESTED_HOT_TEAMS 5256 5257 /* team is done working */ 5258 TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library. 5259 team->t.t_copyin_counter = 0; // init counter for possible reuse 5260 // Do not reset pointer to parent team to NULL for hot teams. 5261 5262 /* if we are non-hot team, release our threads */ 5263 if( ! use_hot_team ) { 5264 if (__kmp_tasking_mode != tskm_immediate_exec) { 5265 // Wait for threads to reach reapable state 5266 for (f = 1; f < team->t.t_nproc; ++f) { 5267 KMP_DEBUG_ASSERT(team->t.t_threads[f]); 5268 volatile kmp_uint32 *state = &team->t.t_threads[f]->th.th_reap_state; 5269 while (*state != KMP_SAFE_TO_REAP) { 5270 #if KMP_OS_WINDOWS 5271 // On Windows a thread can be killed at any time, check this 5272 DWORD ecode; 5273 if (__kmp_is_thread_alive(team->t.t_threads[f], &ecode)) 5274 KMP_CPU_PAUSE(); 5275 else 5276 *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread 5277 #else 5278 KMP_CPU_PAUSE(); 5279 #endif 5280 } 5281 } 5282 5283 // Delete task teams 5284 int tt_idx; 5285 for (tt_idx=0; tt_idx<2; ++tt_idx) { 5286 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; 5287 if ( task_team != NULL ) { 5288 for (f=0; f<team->t.t_nproc; ++f) { // Have all threads unref task teams 5289 team->t.t_threads[f]->th.th_task_team = NULL; 5290 } 5291 KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); 5292 #if KMP_NESTED_HOT_TEAMS 5293 __kmp_free_task_team( master, task_team ); 5294 #endif 5295 team->t.t_task_team[tt_idx] = NULL; 5296 } 5297 } 5298 } 5299 5300 // Reset pointer to parent team only for non-hot teams. 5301 team->t.t_parent = NULL; 5302 team->t.t_level = 0; 5303 team->t.t_active_level = 0; 5304 5305 /* free the worker threads */ 5306 for ( f = 1; f < team->t.t_nproc; ++ f ) { 5307 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); 5308 __kmp_free_thread( team->t.t_threads[ f ] ); 5309 team->t.t_threads[ f ] = NULL; 5310 } 5311 5312 /* put the team back in the team pool */ 5313 /* TODO limit size of team pool, call reap_team if pool too large */ 5314 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool; 5315 __kmp_team_pool = (volatile kmp_team_t*) team; 5316 } 5317 5318 KMP_MB(); 5319 } 5320 5321 5322 /* reap the team. destroy it, reclaim all its resources and free its memory */ 5323 kmp_team_t * 5324 __kmp_reap_team( kmp_team_t *team ) 5325 { 5326 kmp_team_t *next_pool = team->t.t_next_pool; 5327 5328 KMP_DEBUG_ASSERT( team ); 5329 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 5330 KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); 5331 KMP_DEBUG_ASSERT( team->t.t_threads ); 5332 KMP_DEBUG_ASSERT( team->t.t_argv ); 5333 5334 /* TODO clean the threads that are a part of this? */ 5335 5336 /* free stuff */ 5337 5338 __kmp_free_team_arrays( team ); 5339 if ( team->t.t_argv != &team->t.t_inline_argv[0] ) 5340 __kmp_free( (void*) team->t.t_argv ); 5341 __kmp_free( team ); 5342 5343 KMP_MB(); 5344 return next_pool; 5345 } 5346 5347 // 5348 // Free the thread. Don't reap it, just place it on the pool of available 5349 // threads. 5350 // 5351 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid 5352 // binding for the affinity mechanism to be useful. 5353 // 5354 // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid. 5355 // However, we want to avoid a potential performance problem by always 5356 // scanning through the list to find the correct point at which to insert 5357 // the thread (potential N**2 behavior). To do this we keep track of the 5358 // last place a thread struct was inserted (__kmp_thread_pool_insert_pt). 5359 // With single-level parallelism, threads will always be added to the tail 5360 // of the list, kept track of by __kmp_thread_pool_insert_pt. With nested 5361 // parallelism, all bets are off and we may need to scan through the entire 5362 // free list. 5363 // 5364 // This change also has a potentially large performance benefit, for some 5365 // applications. Previously, as threads were freed from the hot team, they 5366 // would be placed back on the free list in inverse order. If the hot team 5367 // grew back to it's original size, then the freed thread would be placed 5368 // back on the hot team in reverse order. This could cause bad cache 5369 // locality problems on programs where the size of the hot team regularly 5370 // grew and shrunk. 5371 // 5372 // Now, for single-level parallelism, the OMP tid is alway == gtid. 5373 // 5374 void 5375 __kmp_free_thread( kmp_info_t *this_th ) 5376 { 5377 int gtid; 5378 kmp_info_t **scan; 5379 5380 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", 5381 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid )); 5382 5383 KMP_DEBUG_ASSERT( this_th ); 5384 5385 // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team). 5386 int b; 5387 kmp_balign_t *balign = this_th->th.th_bar; 5388 for (b=0; b<bs_last_barrier; ++b) { 5389 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) 5390 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; 5391 balign[b].bb.team = NULL; 5392 balign[b].bb.leaf_kids = 0; 5393 } 5394 this_th->th.th_task_state = 0; 5395 5396 /* put thread back on the free pool */ 5397 TCW_PTR(this_th->th.th_team, NULL); 5398 TCW_PTR(this_th->th.th_root, NULL); 5399 TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ 5400 5401 // 5402 // If the __kmp_thread_pool_insert_pt is already past the new insert 5403 // point, then we need to re-scan the entire list. 5404 // 5405 gtid = this_th->th.th_info.ds.ds_gtid; 5406 if ( __kmp_thread_pool_insert_pt != NULL ) { 5407 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL ); 5408 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) { 5409 __kmp_thread_pool_insert_pt = NULL; 5410 } 5411 } 5412 5413 // 5414 // Scan down the list to find the place to insert the thread. 5415 // scan is the address of a link in the list, possibly the address of 5416 // __kmp_thread_pool itself. 5417 // 5418 // In the absence of nested parallism, the for loop will have 0 iterations. 5419 // 5420 if ( __kmp_thread_pool_insert_pt != NULL ) { 5421 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool ); 5422 } 5423 else { 5424 scan = (kmp_info_t **)&__kmp_thread_pool; 5425 } 5426 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid ); 5427 scan = &( (*scan)->th.th_next_pool ) ); 5428 5429 // 5430 // Insert the new element on the list, and set __kmp_thread_pool_insert_pt 5431 // to its address. 5432 // 5433 TCW_PTR(this_th->th.th_next_pool, *scan); 5434 __kmp_thread_pool_insert_pt = *scan = this_th; 5435 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL ) 5436 || ( this_th->th.th_info.ds.ds_gtid 5437 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) ); 5438 TCW_4(this_th->th.th_in_pool, TRUE); 5439 __kmp_thread_pool_nth++; 5440 5441 TCW_4(__kmp_nth, __kmp_nth - 1); 5442 5443 #ifdef KMP_ADJUST_BLOCKTIME 5444 /* Adjust blocktime back to user setting or default if necessary */ 5445 /* Middle initialization might never have occurred */ 5446 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 5447 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 5448 if ( __kmp_nth <= __kmp_avail_proc ) { 5449 __kmp_zero_bt = FALSE; 5450 } 5451 } 5452 #endif /* KMP_ADJUST_BLOCKTIME */ 5453 5454 KMP_MB(); 5455 } 5456 5457 5458 /* ------------------------------------------------------------------------ */ 5459 5460 void * 5461 __kmp_launch_thread( kmp_info_t *this_thr ) 5462 { 5463 int gtid = this_thr->th.th_info.ds.ds_gtid; 5464 /* void *stack_data;*/ 5465 kmp_team_t *(*volatile pteam); 5466 5467 KMP_MB(); 5468 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) ); 5469 5470 if( __kmp_env_consistency_check ) { 5471 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak? 5472 } 5473 5474 #if OMPT_SUPPORT 5475 if (ompt_enabled) { 5476 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5477 this_thr->th.ompt_thread_info.wait_id = 0; 5478 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); 5479 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { 5480 __ompt_thread_begin(ompt_thread_worker, gtid); 5481 } 5482 } 5483 #endif 5484 5485 /* This is the place where threads wait for work */ 5486 while( ! TCR_4(__kmp_global.g.g_done) ) { 5487 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] ); 5488 KMP_MB(); 5489 5490 /* wait for work to do */ 5491 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid )); 5492 5493 #if OMPT_SUPPORT 5494 if (ompt_enabled) { 5495 this_thr->th.ompt_thread_info.state = ompt_state_idle; 5496 } 5497 #endif 5498 5499 /* No tid yet since not part of a team */ 5500 __kmp_fork_barrier( gtid, KMP_GTID_DNE ); 5501 5502 #if OMPT_SUPPORT 5503 if (ompt_enabled) { 5504 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5505 } 5506 #endif 5507 5508 pteam = (kmp_team_t *(*))(& this_thr->th.th_team); 5509 5510 /* have we been allocated? */ 5511 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) { 5512 #if OMPT_SUPPORT 5513 ompt_task_info_t *task_info; 5514 ompt_parallel_id_t my_parallel_id; 5515 if (ompt_enabled) { 5516 task_info = __ompt_get_taskinfo(0); 5517 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; 5518 } 5519 #endif 5520 /* we were just woken up, so run our new task */ 5521 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) { 5522 int rc; 5523 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", 5524 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); 5525 5526 updateHWFPControl (*pteam); 5527 5528 #if OMPT_SUPPORT 5529 if (ompt_enabled) { 5530 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; 5531 // Initialize OMPT task id for implicit task. 5532 int tid = __kmp_tid_from_gtid(gtid); 5533 task_info->task_id = __ompt_task_id_new(tid); 5534 } 5535 #endif 5536 5537 { 5538 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 5539 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 5540 rc = (*pteam)->t.t_invoke( gtid ); 5541 } 5542 KMP_ASSERT( rc ); 5543 5544 #if OMPT_SUPPORT 5545 if (ompt_enabled) { 5546 /* no frame set while outside task */ 5547 task_info->frame.exit_runtime_frame = NULL; 5548 5549 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 5550 } 5551 #endif 5552 KMP_MB(); 5553 KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", 5554 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); 5555 } 5556 /* join barrier after parallel region */ 5557 __kmp_join_barrier( gtid ); 5558 #if OMPT_SUPPORT && OMPT_TRACE 5559 if (ompt_enabled) { 5560 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { 5561 // don't access *pteam here: it may have already been freed 5562 // by the master thread behind the barrier (possible race) 5563 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( 5564 my_parallel_id, task_info->task_id); 5565 } 5566 task_info->frame.exit_runtime_frame = NULL; 5567 task_info->task_id = 0; 5568 } 5569 #endif 5570 } 5571 } 5572 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); 5573 5574 #if OMPT_SUPPORT 5575 if (ompt_enabled && 5576 ompt_callbacks.ompt_callback(ompt_event_thread_end)) { 5577 __ompt_thread_end(ompt_thread_worker, gtid); 5578 } 5579 #endif 5580 5581 this_thr->th.th_task_team = NULL; 5582 /* run the destructors for the threadprivate data for this thread */ 5583 __kmp_common_destroy_gtid( gtid ); 5584 5585 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) ); 5586 KMP_MB(); 5587 return this_thr; 5588 } 5589 5590 /* ------------------------------------------------------------------------ */ 5591 /* ------------------------------------------------------------------------ */ 5592 5593 void 5594 __kmp_internal_end_dest( void *specific_gtid ) 5595 { 5596 #if KMP_COMPILER_ICC 5597 #pragma warning( push ) 5598 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits 5599 #endif 5600 // Make sure no significant bits are lost 5601 int gtid = (kmp_intptr_t)specific_gtid - 1; 5602 #if KMP_COMPILER_ICC 5603 #pragma warning( pop ) 5604 #endif 5605 5606 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid)); 5607 /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage 5608 * this is because 0 is reserved for the nothing-stored case */ 5609 5610 /* josh: One reason for setting the gtid specific data even when it is being 5611 destroyed by pthread is to allow gtid lookup through thread specific data 5612 (__kmp_gtid_get_specific). Some of the code, especially stat code, 5613 that gets executed in the call to __kmp_internal_end_thread, actually 5614 gets the gtid through the thread specific data. Setting it here seems 5615 rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread 5616 to run smoothly. 5617 todo: get rid of this after we remove the dependence on 5618 __kmp_gtid_get_specific 5619 */ 5620 if(gtid >= 0 && KMP_UBER_GTID(gtid)) 5621 __kmp_gtid_set_specific( gtid ); 5622 #ifdef KMP_TDATA_GTID 5623 __kmp_gtid = gtid; 5624 #endif 5625 __kmp_internal_end_thread( gtid ); 5626 } 5627 5628 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5629 5630 // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work 5631 // perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker 5632 // option in makefile.mk works fine. 5633 5634 __attribute__(( destructor )) 5635 void 5636 __kmp_internal_end_dtor( void ) 5637 { 5638 __kmp_internal_end_atexit(); 5639 } 5640 5641 void 5642 __kmp_internal_end_fini( void ) 5643 { 5644 __kmp_internal_end_atexit(); 5645 } 5646 5647 #endif 5648 5649 /* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */ 5650 void 5651 __kmp_internal_end_atexit( void ) 5652 { 5653 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) ); 5654 /* [Windows] 5655 josh: ideally, we want to completely shutdown the library in this atexit handler, but 5656 stat code that depends on thread specific data for gtid fails because that data becomes 5657 unavailable at some point during the shutdown, so we call __kmp_internal_end_thread 5658 instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the 5659 stat code and use __kmp_internal_end_library to cleanly shutdown the library. 5660 5661 // TODO: Can some of this comment about GVS be removed? 5662 I suspect that the offending stat code is executed when the calling thread tries to 5663 clean up a dead root thread's data structures, resulting in GVS code trying to close 5664 the GVS structures for that thread, but since the stat code uses 5665 __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is 5666 cleaning up itself instead of another thread, it gets confused. This happens because 5667 allowing a thread to unregister and cleanup another thread is a recent modification for 5668 addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a 5669 thread may end up trying to unregister another thread only if thread death does not 5670 trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread 5671 specific data destructor function to detect thread death. For Windows dynamic, there 5672 is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the 5673 workaround is applicable only for Windows static stat library. 5674 */ 5675 __kmp_internal_end_library( -1 ); 5676 #if KMP_OS_WINDOWS 5677 __kmp_close_console(); 5678 #endif 5679 } 5680 5681 static void 5682 __kmp_reap_thread( 5683 kmp_info_t * thread, 5684 int is_root 5685 ) { 5686 5687 // It is assumed __kmp_forkjoin_lock is acquired. 5688 5689 int gtid; 5690 5691 KMP_DEBUG_ASSERT( thread != NULL ); 5692 5693 gtid = thread->th.th_info.ds.ds_gtid; 5694 5695 if ( ! is_root ) { 5696 5697 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { 5698 /* Assume the threads are at the fork barrier here */ 5699 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) ); 5700 /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */ 5701 ANNOTATE_HAPPENS_BEFORE(thread); 5702 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread); 5703 __kmp_release_64(&flag); 5704 }; // if 5705 5706 // Terminate OS thread. 5707 __kmp_reap_worker( thread ); 5708 5709 // 5710 // The thread was killed asynchronously. If it was actively 5711 // spinning in the thread pool, decrement the global count. 5712 // 5713 // There is a small timing hole here - if the worker thread was 5714 // just waking up after sleeping in the pool, had reset it's 5715 // th_active_in_pool flag but not decremented the global counter 5716 // __kmp_thread_pool_active_nth yet, then the global counter 5717 // might not get updated. 5718 // 5719 // Currently, this can only happen as the library is unloaded, 5720 // so there are no harmful side effects. 5721 // 5722 if ( thread->th.th_active_in_pool ) { 5723 thread->th.th_active_in_pool = FALSE; 5724 KMP_TEST_THEN_DEC32( 5725 (kmp_int32 *) &__kmp_thread_pool_active_nth ); 5726 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); 5727 } 5728 5729 // Decrement # of [worker] threads in the pool. 5730 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 ); 5731 --__kmp_thread_pool_nth; 5732 }; // if 5733 5734 __kmp_free_implicit_task(thread); 5735 5736 // Free the fast memory for tasking 5737 #if USE_FAST_MEMORY 5738 __kmp_free_fast_memory( thread ); 5739 #endif /* USE_FAST_MEMORY */ 5740 5741 __kmp_suspend_uninitialize_thread( thread ); 5742 5743 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread ); 5744 TCW_SYNC_PTR(__kmp_threads[gtid], NULL); 5745 5746 -- __kmp_all_nth; 5747 // __kmp_nth was decremented when thread is added to the pool. 5748 5749 #ifdef KMP_ADJUST_BLOCKTIME 5750 /* Adjust blocktime back to user setting or default if necessary */ 5751 /* Middle initialization might never have occurred */ 5752 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 5753 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 5754 if ( __kmp_nth <= __kmp_avail_proc ) { 5755 __kmp_zero_bt = FALSE; 5756 } 5757 } 5758 #endif /* KMP_ADJUST_BLOCKTIME */ 5759 5760 /* free the memory being used */ 5761 if( __kmp_env_consistency_check ) { 5762 if ( thread->th.th_cons ) { 5763 __kmp_free_cons_stack( thread->th.th_cons ); 5764 thread->th.th_cons = NULL; 5765 }; // if 5766 } 5767 5768 if ( thread->th.th_pri_common != NULL ) { 5769 __kmp_free( thread->th.th_pri_common ); 5770 thread->th.th_pri_common = NULL; 5771 }; // if 5772 5773 if (thread->th.th_task_state_memo_stack != NULL) { 5774 __kmp_free(thread->th.th_task_state_memo_stack); 5775 thread->th.th_task_state_memo_stack = NULL; 5776 } 5777 5778 #if KMP_USE_BGET 5779 if ( thread->th.th_local.bget_data != NULL ) { 5780 __kmp_finalize_bget( thread ); 5781 }; // if 5782 #endif 5783 5784 #if KMP_AFFINITY_SUPPORTED 5785 if ( thread->th.th_affin_mask != NULL ) { 5786 KMP_CPU_FREE( thread->th.th_affin_mask ); 5787 thread->th.th_affin_mask = NULL; 5788 }; // if 5789 #endif /* KMP_AFFINITY_SUPPORTED */ 5790 5791 __kmp_reap_team( thread->th.th_serial_team ); 5792 thread->th.th_serial_team = NULL; 5793 __kmp_free( thread ); 5794 5795 KMP_MB(); 5796 5797 } // __kmp_reap_thread 5798 5799 static void 5800 __kmp_internal_end(void) 5801 { 5802 int i; 5803 5804 /* First, unregister the library */ 5805 __kmp_unregister_library(); 5806 5807 #if KMP_OS_WINDOWS 5808 /* In Win static library, we can't tell when a root actually dies, so we 5809 reclaim the data structures for any root threads that have died but not 5810 unregistered themselves, in order to shut down cleanly. 5811 In Win dynamic library we also can't tell when a thread dies. 5812 */ 5813 __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots 5814 #endif 5815 5816 for( i=0 ; i<__kmp_threads_capacity ; i++ ) 5817 if( __kmp_root[i] ) 5818 if( __kmp_root[i]->r.r_active ) 5819 break; 5820 KMP_MB(); /* Flush all pending memory write invalidates. */ 5821 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5822 5823 if ( i < __kmp_threads_capacity ) { 5824 #if KMP_USE_MONITOR 5825 // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? 5826 KMP_MB(); /* Flush all pending memory write invalidates. */ 5827 5828 // 5829 // Need to check that monitor was initialized before reaping it. 5830 // If we are called form __kmp_atfork_child (which sets 5831 // __kmp_init_parallel = 0), then __kmp_monitor will appear to 5832 // contain valid data, but it is only valid in the parent process, 5833 // not the child. 5834 // 5835 // New behavior (201008): instead of keying off of the flag 5836 // __kmp_init_parallel, the monitor thread creation is keyed off 5837 // of the new flag __kmp_init_monitor. 5838 // 5839 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 5840 if ( TCR_4( __kmp_init_monitor ) ) { 5841 __kmp_reap_monitor( & __kmp_monitor ); 5842 TCW_4( __kmp_init_monitor, 0 ); 5843 } 5844 __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 5845 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); 5846 #endif // KMP_USE_MONITOR 5847 } else { 5848 /* TODO move this to cleanup code */ 5849 #ifdef KMP_DEBUG 5850 /* make sure that everything has properly ended */ 5851 for ( i = 0; i < __kmp_threads_capacity; i++ ) { 5852 if( __kmp_root[i] ) { 5853 // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here 5854 KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active? 5855 } 5856 } 5857 #endif 5858 5859 KMP_MB(); 5860 5861 // Reap the worker threads. 5862 // This is valid for now, but be careful if threads are reaped sooner. 5863 while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool. 5864 // Get the next thread from the pool. 5865 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool; 5866 __kmp_thread_pool = thread->th.th_next_pool; 5867 // Reap it. 5868 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); 5869 thread->th.th_next_pool = NULL; 5870 thread->th.th_in_pool = FALSE; 5871 __kmp_reap_thread( thread, 0 ); 5872 }; // while 5873 __kmp_thread_pool_insert_pt = NULL; 5874 5875 // Reap teams. 5876 while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool. 5877 // Get the next team from the pool. 5878 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool; 5879 __kmp_team_pool = team->t.t_next_pool; 5880 // Reap it. 5881 team->t.t_next_pool = NULL; 5882 __kmp_reap_team( team ); 5883 }; // while 5884 5885 __kmp_reap_task_teams( ); 5886 5887 for ( i = 0; i < __kmp_threads_capacity; ++ i ) { 5888 // TBD: Add some checking... 5889 // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); 5890 } 5891 5892 /* Make sure all threadprivate destructors get run by joining with all worker 5893 threads before resetting this flag */ 5894 TCW_SYNC_4(__kmp_init_common, FALSE); 5895 5896 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) ); 5897 KMP_MB(); 5898 5899 #if KMP_USE_MONITOR 5900 // 5901 // See note above: One of the possible fixes for CQ138434 / CQ140126 5902 // 5903 // FIXME: push both code fragments down and CSE them? 5904 // push them into __kmp_cleanup() ? 5905 // 5906 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); 5907 if ( TCR_4( __kmp_init_monitor ) ) { 5908 __kmp_reap_monitor( & __kmp_monitor ); 5909 TCW_4( __kmp_init_monitor, 0 ); 5910 } 5911 __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); 5912 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); 5913 #endif 5914 } /* else !__kmp_global.t_active */ 5915 TCW_4(__kmp_init_gtid, FALSE); 5916 KMP_MB(); /* Flush all pending memory write invalidates. */ 5917 5918 __kmp_cleanup(); 5919 #if OMPT_SUPPORT 5920 ompt_fini(); 5921 #endif 5922 } 5923 5924 void 5925 __kmp_internal_end_library( int gtid_req ) 5926 { 5927 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 5928 /* this shouldn't be a race condition because __kmp_internal_end() is the 5929 * only place to clear __kmp_serial_init */ 5930 /* we'll check this later too, after we get the lock */ 5931 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant, 5932 // because the next check will work in any case. 5933 if( __kmp_global.g.g_abort ) { 5934 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" )); 5935 /* TODO abort? */ 5936 return; 5937 } 5938 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 5939 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" )); 5940 return; 5941 } 5942 5943 5944 KMP_MB(); /* Flush all pending memory write invalidates. */ 5945 5946 /* find out who we are and what we should do */ 5947 { 5948 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); 5949 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req )); 5950 if( gtid == KMP_GTID_SHUTDOWN ) { 5951 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" )); 5952 return; 5953 } else if( gtid == KMP_GTID_MONITOR ) { 5954 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" )); 5955 return; 5956 } else if( gtid == KMP_GTID_DNE ) { 5957 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" )); 5958 /* we don't know who we are, but we may still shutdown the library */ 5959 } else if( KMP_UBER_GTID( gtid )) { 5960 /* unregister ourselves as an uber thread. gtid is no longer valid */ 5961 if( __kmp_root[gtid]->r.r_active ) { 5962 __kmp_global.g.g_abort = -1; 5963 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 5964 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid )); 5965 return; 5966 } else { 5967 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid )); 5968 __kmp_unregister_root_current_thread( gtid ); 5969 } 5970 } else { 5971 /* worker threads may call this function through the atexit handler, if they call exit() */ 5972 /* For now, skip the usual subsequent processing and just dump the debug buffer. 5973 TODO: do a thorough shutdown instead 5974 */ 5975 #ifdef DUMP_DEBUG_ON_EXIT 5976 if ( __kmp_debug_buf ) 5977 __kmp_dump_debug_buffer( ); 5978 #endif 5979 return; 5980 } 5981 } 5982 /* synchronize the termination process */ 5983 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 5984 5985 /* have we already finished */ 5986 if( __kmp_global.g.g_abort ) { 5987 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" )); 5988 /* TODO abort? */ 5989 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 5990 return; 5991 } 5992 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 5993 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 5994 return; 5995 } 5996 5997 /* We need this lock to enforce mutex between this reading of 5998 __kmp_threads_capacity and the writing by __kmp_register_root. 5999 Alternatively, we can use a counter of roots that is 6000 atomically updated by __kmp_get_global_thread_id_reg, 6001 __kmp_do_serial_initialize and __kmp_internal_end_*. 6002 */ 6003 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 6004 6005 /* now we can safely conduct the actual termination */ 6006 __kmp_internal_end(); 6007 6008 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 6009 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6010 6011 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) ); 6012 6013 #ifdef DUMP_DEBUG_ON_EXIT 6014 if ( __kmp_debug_buf ) 6015 __kmp_dump_debug_buffer(); 6016 #endif 6017 6018 #if KMP_OS_WINDOWS 6019 __kmp_close_console(); 6020 #endif 6021 6022 __kmp_fini_allocator(); 6023 6024 } // __kmp_internal_end_library 6025 6026 void 6027 __kmp_internal_end_thread( int gtid_req ) 6028 { 6029 int i; 6030 6031 /* if we have already cleaned up, don't try again, it wouldn't be pretty */ 6032 /* this shouldn't be a race condition because __kmp_internal_end() is the 6033 * only place to clear __kmp_serial_init */ 6034 /* we'll check this later too, after we get the lock */ 6035 // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant, 6036 // because the next check will work in any case. 6037 if( __kmp_global.g.g_abort ) { 6038 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" )); 6039 /* TODO abort? */ 6040 return; 6041 } 6042 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 6043 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" )); 6044 return; 6045 } 6046 6047 KMP_MB(); /* Flush all pending memory write invalidates. */ 6048 6049 /* find out who we are and what we should do */ 6050 { 6051 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); 6052 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req )); 6053 if( gtid == KMP_GTID_SHUTDOWN ) { 6054 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" )); 6055 return; 6056 } else if( gtid == KMP_GTID_MONITOR ) { 6057 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" )); 6058 return; 6059 } else if( gtid == KMP_GTID_DNE ) { 6060 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" )); 6061 return; 6062 /* we don't know who we are */ 6063 } else if( KMP_UBER_GTID( gtid )) { 6064 /* unregister ourselves as an uber thread. gtid is no longer valid */ 6065 if( __kmp_root[gtid]->r.r_active ) { 6066 __kmp_global.g.g_abort = -1; 6067 TCW_SYNC_4(__kmp_global.g.g_done, TRUE); 6068 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid )); 6069 return; 6070 } else { 6071 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid )); 6072 __kmp_unregister_root_current_thread( gtid ); 6073 } 6074 } else { 6075 /* just a worker thread, let's leave */ 6076 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); 6077 6078 if ( gtid >= 0 ) { 6079 __kmp_threads[gtid]->th.th_task_team = NULL; 6080 } 6081 6082 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); 6083 return; 6084 } 6085 } 6086 #if defined KMP_DYNAMIC_LIB 6087 // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread, 6088 // because we will better shutdown later in the library destructor. 6089 // The reason of this change is performance problem when non-openmp thread 6090 // in a loop forks and joins many openmp threads. We can save a lot of time 6091 // keeping worker threads alive until the program shutdown. 6092 // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and 6093 // Windows(DPD200287443) that occurs when using critical sections from foreign threads. 6094 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) ); 6095 return; 6096 #endif 6097 /* synchronize the termination process */ 6098 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6099 6100 /* have we already finished */ 6101 if( __kmp_global.g.g_abort ) { 6102 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" )); 6103 /* TODO abort? */ 6104 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6105 return; 6106 } 6107 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { 6108 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6109 return; 6110 } 6111 6112 /* We need this lock to enforce mutex between this reading of 6113 __kmp_threads_capacity and the writing by __kmp_register_root. 6114 Alternatively, we can use a counter of roots that is 6115 atomically updated by __kmp_get_global_thread_id_reg, 6116 __kmp_do_serial_initialize and __kmp_internal_end_*. 6117 */ 6118 6119 /* should we finish the run-time? are all siblings done? */ 6120 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); 6121 6122 for ( i = 0; i < __kmp_threads_capacity; ++ i ) { 6123 if ( KMP_UBER_GTID( i ) ) { 6124 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i )); 6125 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 6126 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6127 return; 6128 }; 6129 } 6130 6131 /* now we can safely conduct the actual termination */ 6132 6133 __kmp_internal_end(); 6134 6135 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); 6136 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6137 6138 KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) ); 6139 6140 #ifdef DUMP_DEBUG_ON_EXIT 6141 if ( __kmp_debug_buf ) 6142 __kmp_dump_debug_buffer(); 6143 #endif 6144 } // __kmp_internal_end_thread 6145 6146 // ------------------------------------------------------------------------------------------------- 6147 // Library registration stuff. 6148 6149 static long __kmp_registration_flag = 0; 6150 // Random value used to indicate library initialization. 6151 static char * __kmp_registration_str = NULL; 6152 // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>. 6153 6154 6155 static inline 6156 char * 6157 __kmp_reg_status_name() { 6158 /* 6159 On RHEL 3u5 if linked statically, getpid() returns different values in each thread. 6160 If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case), 6161 the name of registered_lib_env env var can not be found, because the name will contain different pid. 6162 */ 6163 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() ); 6164 } // __kmp_reg_status_get 6165 6166 6167 void 6168 __kmp_register_library_startup( 6169 void 6170 ) { 6171 6172 char * name = __kmp_reg_status_name(); // Name of the environment variable. 6173 int done = 0; 6174 union { 6175 double dtime; 6176 long ltime; 6177 } time; 6178 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6179 __kmp_initialize_system_tick(); 6180 #endif 6181 __kmp_read_system_time( & time.dtime ); 6182 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL ); 6183 __kmp_registration_str = 6184 __kmp_str_format( 6185 "%p-%lx-%s", 6186 & __kmp_registration_flag, 6187 __kmp_registration_flag, 6188 KMP_LIBRARY_FILE 6189 ); 6190 6191 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) ); 6192 6193 while ( ! done ) { 6194 6195 char * value = NULL; // Actual value of the environment variable. 6196 6197 // Set environment variable, but do not overwrite if it is exist. 6198 __kmp_env_set( name, __kmp_registration_str, 0 ); 6199 // Check the variable is written. 6200 value = __kmp_env_get( name ); 6201 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { 6202 6203 done = 1; // Ok, environment variable set successfully, exit the loop. 6204 6205 } else { 6206 6207 // Oops. Write failed. Another copy of OpenMP RTL is in memory. 6208 // Check whether it alive or dead. 6209 int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. 6210 char * tail = value; 6211 char * flag_addr_str = NULL; 6212 char * flag_val_str = NULL; 6213 char const * file_name = NULL; 6214 __kmp_str_split( tail, '-', & flag_addr_str, & tail ); 6215 __kmp_str_split( tail, '-', & flag_val_str, & tail ); 6216 file_name = tail; 6217 if ( tail != NULL ) { 6218 long * flag_addr = 0; 6219 long flag_val = 0; 6220 KMP_SSCANF( flag_addr_str, "%p", & flag_addr ); 6221 KMP_SSCANF( flag_val_str, "%lx", & flag_val ); 6222 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) { 6223 // First, check whether environment-encoded address is mapped into addr space. 6224 // If so, dereference it to see if it still has the right value. 6225 6226 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) { 6227 neighbor = 1; 6228 } else { 6229 // If not, then we know the other copy of the library is no longer running. 6230 neighbor = 2; 6231 }; // if 6232 }; // if 6233 }; // if 6234 switch ( neighbor ) { 6235 case 0 : // Cannot parse environment variable -- neighbor status unknown. 6236 // Assume it is the incompatible format of future version of the library. 6237 // Assume the other library is alive. 6238 // WARN( ... ); // TODO: Issue a warning. 6239 file_name = "unknown library"; 6240 // Attention! Falling to the next case. That's intentional. 6241 case 1 : { // Neighbor is alive. 6242 // Check it is allowed. 6243 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" ); 6244 if ( ! __kmp_str_match_true( duplicate_ok ) ) { 6245 // That's not allowed. Issue fatal error. 6246 __kmp_msg( 6247 kmp_ms_fatal, 6248 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ), 6249 KMP_HNT( DuplicateLibrary ), 6250 __kmp_msg_null 6251 ); 6252 }; // if 6253 KMP_INTERNAL_FREE( duplicate_ok ); 6254 __kmp_duplicate_library_ok = 1; 6255 done = 1; // Exit the loop. 6256 } break; 6257 case 2 : { // Neighbor is dead. 6258 // Clear the variable and try to register library again. 6259 __kmp_env_unset( name ); 6260 } break; 6261 default : { 6262 KMP_DEBUG_ASSERT( 0 ); 6263 } break; 6264 }; // switch 6265 6266 }; // if 6267 KMP_INTERNAL_FREE( (void *) value ); 6268 6269 }; // while 6270 KMP_INTERNAL_FREE( (void *) name ); 6271 6272 } // func __kmp_register_library_startup 6273 6274 6275 void 6276 __kmp_unregister_library( void ) { 6277 6278 char * name = __kmp_reg_status_name(); 6279 char * value = __kmp_env_get( name ); 6280 6281 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 ); 6282 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL ); 6283 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { 6284 // Ok, this is our variable. Delete it. 6285 __kmp_env_unset( name ); 6286 }; // if 6287 6288 KMP_INTERNAL_FREE( __kmp_registration_str ); 6289 KMP_INTERNAL_FREE( value ); 6290 KMP_INTERNAL_FREE( name ); 6291 6292 __kmp_registration_flag = 0; 6293 __kmp_registration_str = NULL; 6294 6295 } // __kmp_unregister_library 6296 6297 6298 // End of Library registration stuff. 6299 // ------------------------------------------------------------------------------------------------- 6300 6301 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6302 6303 static void __kmp_check_mic_type() 6304 { 6305 kmp_cpuid_t cpuid_state = {0}; 6306 kmp_cpuid_t * cs_p = &cpuid_state; 6307 __kmp_x86_cpuid(1, 0, cs_p); 6308 // We don't support mic1 at the moment 6309 if( (cs_p->eax & 0xff0) == 0xB10 ) { 6310 __kmp_mic_type = mic2; 6311 } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { 6312 __kmp_mic_type = mic3; 6313 } else { 6314 __kmp_mic_type = non_mic; 6315 } 6316 } 6317 6318 #endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ 6319 6320 static void 6321 __kmp_do_serial_initialize( void ) 6322 { 6323 int i, gtid; 6324 int size; 6325 6326 KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) ); 6327 6328 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 ); 6329 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 ); 6330 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 ); 6331 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 ); 6332 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) ); 6333 6334 #if OMPT_SUPPORT 6335 ompt_pre_init(); 6336 #endif 6337 6338 __kmp_validate_locks(); 6339 6340 /* Initialize internal memory allocator */ 6341 __kmp_init_allocator(); 6342 6343 /* Register the library startup via an environment variable 6344 and check to see whether another copy of the library is already 6345 registered. */ 6346 6347 __kmp_register_library_startup( ); 6348 6349 /* TODO reinitialization of library */ 6350 if( TCR_4(__kmp_global.g.g_done) ) { 6351 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) ); 6352 } 6353 6354 __kmp_global.g.g_abort = 0; 6355 TCW_SYNC_4(__kmp_global.g.g_done, FALSE); 6356 6357 /* initialize the locks */ 6358 #if KMP_USE_ADAPTIVE_LOCKS 6359 #if KMP_DEBUG_ADAPTIVE_LOCKS 6360 __kmp_init_speculative_stats(); 6361 #endif 6362 #endif 6363 #if KMP_STATS_ENABLED 6364 __kmp_stats_init(); 6365 #endif 6366 __kmp_init_lock( & __kmp_global_lock ); 6367 __kmp_init_queuing_lock( & __kmp_dispatch_lock ); 6368 __kmp_init_lock( & __kmp_debug_lock ); 6369 __kmp_init_atomic_lock( & __kmp_atomic_lock ); 6370 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i ); 6371 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i ); 6372 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i ); 6373 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r ); 6374 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i ); 6375 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r ); 6376 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c ); 6377 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r ); 6378 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r ); 6379 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c ); 6380 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c ); 6381 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c ); 6382 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock ); 6383 __kmp_init_bootstrap_lock( & __kmp_exit_lock ); 6384 #if KMP_USE_MONITOR 6385 __kmp_init_bootstrap_lock( & __kmp_monitor_lock ); 6386 #endif 6387 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock ); 6388 6389 /* conduct initialization and initial setup of configuration */ 6390 6391 __kmp_runtime_initialize(); 6392 6393 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6394 __kmp_check_mic_type(); 6395 #endif 6396 6397 // Some global variable initialization moved here from kmp_env_initialize() 6398 #ifdef KMP_DEBUG 6399 kmp_diag = 0; 6400 #endif 6401 __kmp_abort_delay = 0; 6402 6403 // From __kmp_init_dflt_team_nth() 6404 /* assume the entire machine will be used */ 6405 __kmp_dflt_team_nth_ub = __kmp_xproc; 6406 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) { 6407 __kmp_dflt_team_nth_ub = KMP_MIN_NTH; 6408 } 6409 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) { 6410 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; 6411 } 6412 __kmp_max_nth = __kmp_sys_max_nth; 6413 6414 // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part 6415 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; 6416 #if KMP_USE_MONITOR 6417 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 6418 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); 6419 #endif 6420 // From "KMP_LIBRARY" part of __kmp_env_initialize() 6421 __kmp_library = library_throughput; 6422 // From KMP_SCHEDULE initialization 6423 __kmp_static = kmp_sch_static_balanced; 6424 // AC: do not use analytical here, because it is non-monotonous 6425 //__kmp_guided = kmp_sch_guided_iterative_chunked; 6426 //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment 6427 // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method 6428 // control parts 6429 #if KMP_FAST_REDUCTION_BARRIER 6430 #define kmp_reduction_barrier_gather_bb ((int)1) 6431 #define kmp_reduction_barrier_release_bb ((int)1) 6432 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6433 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6434 #endif // KMP_FAST_REDUCTION_BARRIER 6435 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) { 6436 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt; 6437 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; 6438 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt; 6439 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt; 6440 #if KMP_FAST_REDUCTION_BARRIER 6441 if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1 6442 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb; 6443 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb; 6444 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat; 6445 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat; 6446 } 6447 #endif // KMP_FAST_REDUCTION_BARRIER 6448 } 6449 #if KMP_FAST_REDUCTION_BARRIER 6450 #undef kmp_reduction_barrier_release_pat 6451 #undef kmp_reduction_barrier_gather_pat 6452 #undef kmp_reduction_barrier_release_bb 6453 #undef kmp_reduction_barrier_gather_bb 6454 #endif // KMP_FAST_REDUCTION_BARRIER 6455 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6456 if (__kmp_mic_type == mic2) { // KNC 6457 // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC 6458 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3; // plain gather 6459 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1; // forkjoin release 6460 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar; 6461 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar; 6462 } 6463 #if KMP_FAST_REDUCTION_BARRIER 6464 if (__kmp_mic_type == mic2) { // KNC 6465 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar; 6466 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar; 6467 } 6468 #endif 6469 #endif 6470 6471 // From KMP_CHECKS initialization 6472 #ifdef KMP_DEBUG 6473 __kmp_env_checks = TRUE; /* development versions have the extra checks */ 6474 #else 6475 __kmp_env_checks = FALSE; /* port versions do not have the extra checks */ 6476 #endif 6477 6478 // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization 6479 __kmp_foreign_tp = TRUE; 6480 6481 __kmp_global.g.g_dynamic = FALSE; 6482 __kmp_global.g.g_dynamic_mode = dynamic_default; 6483 6484 __kmp_env_initialize( NULL ); 6485 6486 // Print all messages in message catalog for testing purposes. 6487 #ifdef KMP_DEBUG 6488 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" ); 6489 if ( __kmp_str_match_true( val ) ) { 6490 kmp_str_buf_t buffer; 6491 __kmp_str_buf_init( & buffer ); 6492 __kmp_i18n_dump_catalog( & buffer ); 6493 __kmp_printf( "%s", buffer.str ); 6494 __kmp_str_buf_free( & buffer ); 6495 }; // if 6496 __kmp_env_free( & val ); 6497 #endif 6498 6499 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); 6500 // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part 6501 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified); 6502 6503 // If the library is shut down properly, both pools must be NULL. Just in case, set them 6504 // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed. 6505 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL ); 6506 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL ); 6507 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL ); 6508 __kmp_thread_pool = NULL; 6509 __kmp_thread_pool_insert_pt = NULL; 6510 __kmp_team_pool = NULL; 6511 6512 /* Allocate all of the variable sized records */ 6513 /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */ 6514 /* Since allocation is cache-aligned, just add extra padding at the end */ 6515 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE; 6516 __kmp_threads = (kmp_info_t**) __kmp_allocate( size ); 6517 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity ); 6518 6519 /* init thread counts */ 6520 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and 6521 KMP_DEBUG_ASSERT( __kmp_nth == 0 ); // something was wrong in termination. 6522 __kmp_all_nth = 0; 6523 __kmp_nth = 0; 6524 6525 /* setup the uber master thread and hierarchy */ 6526 gtid = __kmp_register_root( TRUE ); 6527 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid )); 6528 KMP_ASSERT( KMP_UBER_GTID( gtid ) ); 6529 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) ); 6530 6531 KMP_MB(); /* Flush all pending memory write invalidates. */ 6532 6533 __kmp_common_initialize(); 6534 6535 #if KMP_OS_UNIX 6536 /* invoke the child fork handler */ 6537 __kmp_register_atfork(); 6538 #endif 6539 6540 #if ! defined KMP_DYNAMIC_LIB 6541 { 6542 /* Invoke the exit handler when the program finishes, only for static library. 6543 For dynamic library, we already have _fini and DllMain. 6544 */ 6545 int rc = atexit( __kmp_internal_end_atexit ); 6546 if ( rc != 0 ) { 6547 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null ); 6548 }; // if 6549 } 6550 #endif 6551 6552 #if KMP_HANDLE_SIGNALS 6553 #if KMP_OS_UNIX 6554 /* NOTE: make sure that this is called before the user installs 6555 * their own signal handlers so that the user handlers 6556 * are called first. this way they can return false, 6557 * not call our handler, avoid terminating the library, 6558 * and continue execution where they left off. */ 6559 __kmp_install_signals( FALSE ); 6560 #endif /* KMP_OS_UNIX */ 6561 #if KMP_OS_WINDOWS 6562 __kmp_install_signals( TRUE ); 6563 #endif /* KMP_OS_WINDOWS */ 6564 #endif 6565 6566 /* we have finished the serial initialization */ 6567 __kmp_init_counter ++; 6568 6569 __kmp_init_serial = TRUE; 6570 6571 if (__kmp_settings) { 6572 __kmp_env_print(); 6573 } 6574 6575 #if OMP_40_ENABLED 6576 if (__kmp_display_env || __kmp_display_env_verbose) { 6577 __kmp_env_print_2(); 6578 } 6579 #endif // OMP_40_ENABLED 6580 6581 #if OMPT_SUPPORT 6582 ompt_post_init(); 6583 #endif 6584 6585 KMP_MB(); 6586 6587 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) ); 6588 } 6589 6590 void 6591 __kmp_serial_initialize( void ) 6592 { 6593 if ( __kmp_init_serial ) { 6594 return; 6595 } 6596 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6597 if ( __kmp_init_serial ) { 6598 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6599 return; 6600 } 6601 __kmp_do_serial_initialize(); 6602 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6603 } 6604 6605 static void 6606 __kmp_do_middle_initialize( void ) 6607 { 6608 int i, j; 6609 int prev_dflt_team_nth; 6610 6611 if( !__kmp_init_serial ) { 6612 __kmp_do_serial_initialize(); 6613 } 6614 6615 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) ); 6616 6617 // 6618 // Save the previous value for the __kmp_dflt_team_nth so that 6619 // we can avoid some reinitialization if it hasn't changed. 6620 // 6621 prev_dflt_team_nth = __kmp_dflt_team_nth; 6622 6623 #if KMP_AFFINITY_SUPPORTED 6624 // 6625 // __kmp_affinity_initialize() will try to set __kmp_ncores to the 6626 // number of cores on the machine. 6627 // 6628 __kmp_affinity_initialize(); 6629 6630 // 6631 // Run through the __kmp_threads array and set the affinity mask 6632 // for each root thread that is currently registered with the RTL. 6633 // 6634 for ( i = 0; i < __kmp_threads_capacity; i++ ) { 6635 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) { 6636 __kmp_affinity_set_init_mask( i, TRUE ); 6637 } 6638 } 6639 #endif /* KMP_AFFINITY_SUPPORTED */ 6640 6641 KMP_ASSERT( __kmp_xproc > 0 ); 6642 if ( __kmp_avail_proc == 0 ) { 6643 __kmp_avail_proc = __kmp_xproc; 6644 } 6645 6646 // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now 6647 j = 0; 6648 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) { 6649 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc; 6650 j++; 6651 } 6652 6653 if ( __kmp_dflt_team_nth == 0 ) { 6654 #ifdef KMP_DFLT_NTH_CORES 6655 // 6656 // Default #threads = #cores 6657 // 6658 __kmp_dflt_team_nth = __kmp_ncores; 6659 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n", 6660 __kmp_dflt_team_nth ) ); 6661 #else 6662 // 6663 // Default #threads = #available OS procs 6664 // 6665 __kmp_dflt_team_nth = __kmp_avail_proc; 6666 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n", 6667 __kmp_dflt_team_nth ) ); 6668 #endif /* KMP_DFLT_NTH_CORES */ 6669 } 6670 6671 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) { 6672 __kmp_dflt_team_nth = KMP_MIN_NTH; 6673 } 6674 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) { 6675 __kmp_dflt_team_nth = __kmp_sys_max_nth; 6676 } 6677 6678 // 6679 // There's no harm in continuing if the following check fails, 6680 // but it indicates an error in the previous logic. 6681 // 6682 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub ); 6683 6684 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) { 6685 // 6686 // Run through the __kmp_threads array and set the num threads icv 6687 // for each root thread that is currently registered with the RTL 6688 // (which has not already explicitly set its nthreads-var with a 6689 // call to omp_set_num_threads()). 6690 // 6691 for ( i = 0; i < __kmp_threads_capacity; i++ ) { 6692 kmp_info_t *thread = __kmp_threads[ i ]; 6693 if ( thread == NULL ) continue; 6694 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue; 6695 6696 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth ); 6697 } 6698 } 6699 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", 6700 __kmp_dflt_team_nth) ); 6701 6702 #ifdef KMP_ADJUST_BLOCKTIME 6703 /* Adjust blocktime to zero if necessary */ 6704 /* now that __kmp_avail_proc is set */ 6705 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { 6706 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); 6707 if ( __kmp_nth > __kmp_avail_proc ) { 6708 __kmp_zero_bt = TRUE; 6709 } 6710 } 6711 #endif /* KMP_ADJUST_BLOCKTIME */ 6712 6713 /* we have finished middle initialization */ 6714 TCW_SYNC_4(__kmp_init_middle, TRUE); 6715 6716 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) ); 6717 } 6718 6719 void 6720 __kmp_middle_initialize( void ) 6721 { 6722 if ( __kmp_init_middle ) { 6723 return; 6724 } 6725 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6726 if ( __kmp_init_middle ) { 6727 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6728 return; 6729 } 6730 __kmp_do_middle_initialize(); 6731 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6732 } 6733 6734 void 6735 __kmp_parallel_initialize( void ) 6736 { 6737 int gtid = __kmp_entry_gtid(); // this might be a new root 6738 6739 /* synchronize parallel initialization (for sibling) */ 6740 if( TCR_4(__kmp_init_parallel) ) return; 6741 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 6742 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; } 6743 6744 /* TODO reinitialization after we have already shut down */ 6745 if( TCR_4(__kmp_global.g.g_done) ) { 6746 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) ); 6747 __kmp_infinite_loop(); 6748 } 6749 6750 /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize 6751 would cause a deadlock. So we call __kmp_do_serial_initialize directly. 6752 */ 6753 if( !__kmp_init_middle ) { 6754 __kmp_do_middle_initialize(); 6755 } 6756 6757 /* begin initialization */ 6758 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) ); 6759 KMP_ASSERT( KMP_UBER_GTID( gtid ) ); 6760 6761 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6762 // 6763 // Save the FP control regs. 6764 // Worker threads will set theirs to these values at thread startup. 6765 // 6766 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); 6767 __kmp_store_mxcsr( &__kmp_init_mxcsr ); 6768 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; 6769 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 6770 6771 #if KMP_OS_UNIX 6772 # if KMP_HANDLE_SIGNALS 6773 /* must be after __kmp_serial_initialize */ 6774 __kmp_install_signals( TRUE ); 6775 # endif 6776 #endif 6777 6778 __kmp_suspend_initialize(); 6779 6780 #if defined(USE_LOAD_BALANCE) 6781 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 6782 __kmp_global.g.g_dynamic_mode = dynamic_load_balance; 6783 } 6784 #else 6785 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { 6786 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 6787 } 6788 #endif 6789 6790 if ( __kmp_version ) { 6791 __kmp_print_version_2(); 6792 } 6793 6794 /* we have finished parallel initialization */ 6795 TCW_SYNC_4(__kmp_init_parallel, TRUE); 6796 6797 KMP_MB(); 6798 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) ); 6799 6800 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 6801 } 6802 6803 6804 /* ------------------------------------------------------------------------ */ 6805 6806 void 6807 __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, 6808 kmp_team_t *team ) 6809 { 6810 kmp_disp_t *dispatch; 6811 6812 KMP_MB(); 6813 6814 /* none of the threads have encountered any constructs, yet. */ 6815 this_thr->th.th_local.this_construct = 0; 6816 #if KMP_CACHE_MANAGE 6817 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived ); 6818 #endif /* KMP_CACHE_MANAGE */ 6819 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); 6820 KMP_DEBUG_ASSERT( dispatch ); 6821 KMP_DEBUG_ASSERT( team->t.t_dispatch ); 6822 //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] ); 6823 6824 dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ 6825 #if OMP_45_ENABLED 6826 dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */ 6827 #endif 6828 if( __kmp_env_consistency_check ) 6829 __kmp_push_parallel( gtid, team->t.t_ident ); 6830 6831 KMP_MB(); /* Flush all pending memory write invalidates. */ 6832 } 6833 6834 void 6835 __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, 6836 kmp_team_t *team ) 6837 { 6838 if( __kmp_env_consistency_check ) 6839 __kmp_pop_parallel( gtid, team->t.t_ident ); 6840 6841 __kmp_finish_implicit_task(this_thr); 6842 } 6843 6844 int 6845 __kmp_invoke_task_func( int gtid ) 6846 { 6847 int rc; 6848 int tid = __kmp_tid_from_gtid( gtid ); 6849 kmp_info_t *this_thr = __kmp_threads[ gtid ]; 6850 kmp_team_t *team = this_thr->th.th_team; 6851 6852 __kmp_run_before_invoked_task( gtid, tid, this_thr, team ); 6853 #if USE_ITT_BUILD 6854 if ( __itt_stack_caller_create_ptr ) { 6855 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code 6856 } 6857 #endif /* USE_ITT_BUILD */ 6858 #if INCLUDE_SSC_MARKS 6859 SSC_MARK_INVOKING(); 6860 #endif 6861 6862 #if OMPT_SUPPORT 6863 void *dummy; 6864 void **exit_runtime_p; 6865 ompt_task_id_t my_task_id; 6866 ompt_parallel_id_t my_parallel_id; 6867 6868 if (ompt_enabled) { 6869 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]. 6870 ompt_task_info.frame.exit_runtime_frame); 6871 } else { 6872 exit_runtime_p = &dummy; 6873 } 6874 6875 #if OMPT_TRACE 6876 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; 6877 my_parallel_id = team->t.ompt_team_info.parallel_id; 6878 if (ompt_enabled && 6879 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { 6880 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( 6881 my_parallel_id, my_task_id); 6882 } 6883 #endif 6884 #endif 6885 6886 { 6887 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); 6888 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); 6889 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn), 6890 gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv 6891 #if OMPT_SUPPORT 6892 , exit_runtime_p 6893 #endif 6894 ); 6895 #if OMPT_SUPPORT 6896 *exit_runtime_p = NULL; 6897 #endif 6898 } 6899 6900 #if USE_ITT_BUILD 6901 if ( __itt_stack_caller_create_ptr ) { 6902 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code 6903 } 6904 #endif /* USE_ITT_BUILD */ 6905 __kmp_run_after_invoked_task( gtid, tid, this_thr, team ); 6906 6907 return rc; 6908 } 6909 6910 #if OMP_40_ENABLED 6911 void 6912 __kmp_teams_master( int gtid ) 6913 { 6914 // This routine is called by all master threads in teams construct 6915 kmp_info_t *thr = __kmp_threads[ gtid ]; 6916 kmp_team_t *team = thr->th.th_team; 6917 ident_t *loc = team->t.t_ident; 6918 thr->th.th_set_nproc = thr->th.th_teams_size.nth; 6919 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask ); 6920 KMP_DEBUG_ASSERT( thr->th.th_set_nproc ); 6921 KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", 6922 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) ); 6923 // Launch league of teams now, but not let workers execute 6924 // (they hang on fork barrier until next parallel) 6925 #if INCLUDE_SSC_MARKS 6926 SSC_MARK_FORKING(); 6927 #endif 6928 __kmp_fork_call( loc, gtid, fork_context_intel, 6929 team->t.t_argc, 6930 #if OMPT_SUPPORT 6931 (void *)thr->th.th_teams_microtask, // "unwrapped" task 6932 #endif 6933 (microtask_t)thr->th.th_teams_microtask, // "wrapped" task 6934 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 6935 NULL ); 6936 #if INCLUDE_SSC_MARKS 6937 SSC_MARK_JOINING(); 6938 #endif 6939 6940 // AC: last parameter "1" eliminates join barrier which won't work because 6941 // worker threads are in a fork barrier waiting for more parallel regions 6942 __kmp_join_call( loc, gtid 6943 #if OMPT_SUPPORT 6944 , fork_context_intel 6945 #endif 6946 , 1 ); 6947 } 6948 6949 int 6950 __kmp_invoke_teams_master( int gtid ) 6951 { 6952 kmp_info_t *this_thr = __kmp_threads[ gtid ]; 6953 kmp_team_t *team = this_thr->th.th_team; 6954 #if KMP_DEBUG 6955 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized ) 6956 KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master ); 6957 #endif 6958 __kmp_run_before_invoked_task( gtid, 0, this_thr, team ); 6959 __kmp_teams_master( gtid ); 6960 __kmp_run_after_invoked_task( gtid, 0, this_thr, team ); 6961 return 1; 6962 } 6963 #endif /* OMP_40_ENABLED */ 6964 6965 /* this sets the requested number of threads for the next parallel region 6966 * encountered by this team */ 6967 /* since this should be enclosed in the forkjoin critical section it 6968 * should avoid race conditions with assymmetrical nested parallelism */ 6969 6970 void 6971 __kmp_push_num_threads( ident_t *id, int gtid, int num_threads ) 6972 { 6973 kmp_info_t *thr = __kmp_threads[gtid]; 6974 6975 if( num_threads > 0 ) 6976 thr->th.th_set_nproc = num_threads; 6977 } 6978 6979 #if OMP_40_ENABLED 6980 6981 /* this sets the requested number of teams for the teams region and/or 6982 * the number of threads for the next parallel region encountered */ 6983 void 6984 __kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads ) 6985 { 6986 kmp_info_t *thr = __kmp_threads[gtid]; 6987 KMP_DEBUG_ASSERT(num_teams >= 0); 6988 KMP_DEBUG_ASSERT(num_threads >= 0); 6989 6990 if( num_teams == 0 ) 6991 num_teams = 1; // default number of teams is 1. 6992 if( num_teams > __kmp_max_nth ) { // if too many teams requested? 6993 if ( !__kmp_reserve_warn ) { 6994 __kmp_reserve_warn = 1; 6995 __kmp_msg( 6996 kmp_ms_warning, 6997 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ), 6998 KMP_HNT( Unset_ALL_THREADS ), 6999 __kmp_msg_null 7000 ); 7001 } 7002 num_teams = __kmp_max_nth; 7003 } 7004 // Set number of teams (number of threads in the outer "parallel" of the teams) 7005 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; 7006 7007 // Remember the number of threads for inner parallel regions 7008 if( num_threads == 0 ) { 7009 if( !TCR_4(__kmp_init_middle) ) 7010 __kmp_middle_initialize(); // get __kmp_avail_proc calculated 7011 num_threads = __kmp_avail_proc / num_teams; 7012 if( num_teams * num_threads > __kmp_max_nth ) { 7013 // adjust num_threads w/o warning as it is not user setting 7014 num_threads = __kmp_max_nth / num_teams; 7015 } 7016 } else { 7017 if( num_teams * num_threads > __kmp_max_nth ) { 7018 int new_threads = __kmp_max_nth / num_teams; 7019 if ( !__kmp_reserve_warn ) { // user asked for too many threads 7020 __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT 7021 __kmp_msg( 7022 kmp_ms_warning, 7023 KMP_MSG( CantFormThrTeam, num_threads, new_threads ), 7024 KMP_HNT( Unset_ALL_THREADS ), 7025 __kmp_msg_null 7026 ); 7027 } 7028 num_threads = new_threads; 7029 } 7030 } 7031 thr->th.th_teams_size.nth = num_threads; 7032 } 7033 7034 7035 // 7036 // Set the proc_bind var to use in the following parallel region. 7037 // 7038 void 7039 __kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind ) 7040 { 7041 kmp_info_t *thr = __kmp_threads[gtid]; 7042 thr->th.th_set_proc_bind = proc_bind; 7043 } 7044 7045 #endif /* OMP_40_ENABLED */ 7046 7047 /* Launch the worker threads into the microtask. */ 7048 7049 void 7050 __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ) 7051 { 7052 kmp_info_t *this_thr = __kmp_threads[gtid]; 7053 7054 #ifdef KMP_DEBUG 7055 int f; 7056 #endif /* KMP_DEBUG */ 7057 7058 KMP_DEBUG_ASSERT( team ); 7059 KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); 7060 KMP_ASSERT( KMP_MASTER_GTID(gtid) ); 7061 KMP_MB(); /* Flush all pending memory write invalidates. */ 7062 7063 team->t.t_construct = 0; /* no single directives seen yet */ 7064 team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */ 7065 7066 /* Reset the identifiers on the dispatch buffer */ 7067 KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); 7068 if ( team->t.t_max_nproc > 1 ) { 7069 int i; 7070 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { 7071 team->t.t_disp_buffer[ i ].buffer_index = i; 7072 #if OMP_45_ENABLED 7073 team->t.t_disp_buffer[i].doacross_buf_idx = i; 7074 #endif 7075 } 7076 } else { 7077 team->t.t_disp_buffer[ 0 ].buffer_index = 0; 7078 #if OMP_45_ENABLED 7079 team->t.t_disp_buffer[0].doacross_buf_idx = 0; 7080 #endif 7081 } 7082 7083 KMP_MB(); /* Flush all pending memory write invalidates. */ 7084 KMP_ASSERT( this_thr->th.th_team == team ); 7085 7086 #ifdef KMP_DEBUG 7087 for( f=0 ; f<team->t.t_nproc ; f++ ) { 7088 KMP_DEBUG_ASSERT( team->t.t_threads[f] && 7089 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); 7090 } 7091 #endif /* KMP_DEBUG */ 7092 7093 /* release the worker threads so they may begin working */ 7094 __kmp_fork_barrier( gtid, 0 ); 7095 } 7096 7097 7098 void 7099 __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ) 7100 { 7101 kmp_info_t *this_thr = __kmp_threads[gtid]; 7102 7103 KMP_DEBUG_ASSERT( team ); 7104 KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); 7105 KMP_ASSERT( KMP_MASTER_GTID(gtid) ); 7106 KMP_MB(); /* Flush all pending memory write invalidates. */ 7107 7108 /* Join barrier after fork */ 7109 7110 #ifdef KMP_DEBUG 7111 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) { 7112 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]); 7113 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n", 7114 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc); 7115 __kmp_print_structure(); 7116 } 7117 KMP_DEBUG_ASSERT( __kmp_threads[gtid] && 7118 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc ); 7119 #endif /* KMP_DEBUG */ 7120 7121 __kmp_join_barrier( gtid ); /* wait for everyone */ 7122 7123 KMP_MB(); /* Flush all pending memory write invalidates. */ 7124 KMP_ASSERT( this_thr->th.th_team == team ); 7125 } 7126 7127 7128 /* ------------------------------------------------------------------------ */ 7129 /* ------------------------------------------------------------------------ */ 7130 7131 #ifdef USE_LOAD_BALANCE 7132 7133 // 7134 // Return the worker threads actively spinning in the hot team, if we 7135 // are at the outermost level of parallelism. Otherwise, return 0. 7136 // 7137 static int 7138 __kmp_active_hot_team_nproc( kmp_root_t *root ) 7139 { 7140 int i; 7141 int retval; 7142 kmp_team_t *hot_team; 7143 7144 if ( root->r.r_active ) { 7145 return 0; 7146 } 7147 hot_team = root->r.r_hot_team; 7148 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { 7149 return hot_team->t.t_nproc - 1; // Don't count master thread 7150 } 7151 7152 // 7153 // Skip the master thread - it is accounted for elsewhere. 7154 // 7155 retval = 0; 7156 for ( i = 1; i < hot_team->t.t_nproc; i++ ) { 7157 if ( hot_team->t.t_threads[i]->th.th_active ) { 7158 retval++; 7159 } 7160 } 7161 return retval; 7162 } 7163 7164 // 7165 // Perform an automatic adjustment to the number of 7166 // threads used by the next parallel region. 7167 // 7168 static int 7169 __kmp_load_balance_nproc( kmp_root_t *root, int set_nproc ) 7170 { 7171 int retval; 7172 int pool_active; 7173 int hot_team_active; 7174 int team_curr_active; 7175 int system_active; 7176 7177 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", 7178 root, set_nproc ) ); 7179 KMP_DEBUG_ASSERT( root ); 7180 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE ); 7181 KMP_DEBUG_ASSERT( set_nproc > 1 ); 7182 7183 if ( set_nproc == 1) { 7184 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) ); 7185 return 1; 7186 } 7187 7188 // 7189 // Threads that are active in the thread pool, active in the hot team 7190 // for this particular root (if we are at the outer par level), and 7191 // the currently executing thread (to become the master) are available 7192 // to add to the new team, but are currently contributing to the system 7193 // load, and must be accounted for. 7194 // 7195 pool_active = TCR_4(__kmp_thread_pool_active_nth); 7196 hot_team_active = __kmp_active_hot_team_nproc( root ); 7197 team_curr_active = pool_active + hot_team_active + 1; 7198 7199 // 7200 // Check the system load. 7201 // 7202 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active ); 7203 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n", 7204 system_active, pool_active, hot_team_active ) ); 7205 7206 if ( system_active < 0 ) { 7207 // 7208 // There was an error reading the necessary info from /proc, 7209 // so use the thread limit algorithm instead. Once we set 7210 // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit, 7211 // we shouldn't wind up getting back here. 7212 // 7213 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; 7214 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" ); 7215 7216 // 7217 // Make this call behave like the thread limit algorithm. 7218 // 7219 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 7220 : root->r.r_hot_team->t.t_nproc); 7221 if ( retval > set_nproc ) { 7222 retval = set_nproc; 7223 } 7224 if ( retval < KMP_MIN_NTH ) { 7225 retval = KMP_MIN_NTH; 7226 } 7227 7228 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) ); 7229 return retval; 7230 } 7231 7232 // 7233 // There is a slight delay in the load balance algorithm in detecting 7234 // new running procs. The real system load at this instant should be 7235 // at least as large as the #active omp thread that are available to 7236 // add to the team. 7237 // 7238 if ( system_active < team_curr_active ) { 7239 system_active = team_curr_active; 7240 } 7241 retval = __kmp_avail_proc - system_active + team_curr_active; 7242 if ( retval > set_nproc ) { 7243 retval = set_nproc; 7244 } 7245 if ( retval < KMP_MIN_NTH ) { 7246 retval = KMP_MIN_NTH; 7247 } 7248 7249 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) ); 7250 return retval; 7251 } // __kmp_load_balance_nproc() 7252 7253 #endif /* USE_LOAD_BALANCE */ 7254 7255 /* ------------------------------------------------------------------------ */ 7256 /* ------------------------------------------------------------------------ */ 7257 7258 /* NOTE: this is called with the __kmp_init_lock held */ 7259 void 7260 __kmp_cleanup( void ) 7261 { 7262 int f; 7263 7264 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) ); 7265 7266 if (TCR_4(__kmp_init_parallel)) { 7267 #if KMP_HANDLE_SIGNALS 7268 __kmp_remove_signals(); 7269 #endif 7270 TCW_4(__kmp_init_parallel, FALSE); 7271 } 7272 7273 if (TCR_4(__kmp_init_middle)) { 7274 #if KMP_AFFINITY_SUPPORTED 7275 __kmp_affinity_uninitialize(); 7276 #endif /* KMP_AFFINITY_SUPPORTED */ 7277 __kmp_cleanup_hierarchy(); 7278 TCW_4(__kmp_init_middle, FALSE); 7279 } 7280 7281 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) ); 7282 7283 if (__kmp_init_serial) { 7284 __kmp_runtime_destroy(); 7285 __kmp_init_serial = FALSE; 7286 } 7287 7288 for ( f = 0; f < __kmp_threads_capacity; f++ ) { 7289 if ( __kmp_root[ f ] != NULL ) { 7290 __kmp_free( __kmp_root[ f ] ); 7291 __kmp_root[ f ] = NULL; 7292 } 7293 } 7294 __kmp_free( __kmp_threads ); 7295 // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in 7296 // freeing __kmp_root. 7297 __kmp_threads = NULL; 7298 __kmp_root = NULL; 7299 __kmp_threads_capacity = 0; 7300 7301 #if KMP_USE_DYNAMIC_LOCK 7302 __kmp_cleanup_indirect_user_locks(); 7303 #else 7304 __kmp_cleanup_user_locks(); 7305 #endif 7306 7307 #if KMP_AFFINITY_SUPPORTED 7308 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); 7309 __kmp_cpuinfo_file = NULL; 7310 #endif /* KMP_AFFINITY_SUPPORTED */ 7311 7312 #if KMP_USE_ADAPTIVE_LOCKS 7313 #if KMP_DEBUG_ADAPTIVE_LOCKS 7314 __kmp_print_speculative_stats(); 7315 #endif 7316 #endif 7317 KMP_INTERNAL_FREE( __kmp_nested_nth.nth ); 7318 __kmp_nested_nth.nth = NULL; 7319 __kmp_nested_nth.size = 0; 7320 __kmp_nested_nth.used = 0; 7321 KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types ); 7322 __kmp_nested_proc_bind.bind_types = NULL; 7323 __kmp_nested_proc_bind.size = 0; 7324 __kmp_nested_proc_bind.used = 0; 7325 7326 __kmp_i18n_catclose(); 7327 7328 #if KMP_STATS_ENABLED 7329 __kmp_stats_fini(); 7330 #endif 7331 7332 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); 7333 } 7334 7335 /* ------------------------------------------------------------------------ */ 7336 /* ------------------------------------------------------------------------ */ 7337 7338 int 7339 __kmp_ignore_mppbeg( void ) 7340 { 7341 char *env; 7342 7343 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) { 7344 if (__kmp_str_match_false( env )) 7345 return FALSE; 7346 } 7347 // By default __kmpc_begin() is no-op. 7348 return TRUE; 7349 } 7350 7351 int 7352 __kmp_ignore_mppend( void ) 7353 { 7354 char *env; 7355 7356 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) { 7357 if (__kmp_str_match_false( env )) 7358 return FALSE; 7359 } 7360 // By default __kmpc_end() is no-op. 7361 return TRUE; 7362 } 7363 7364 void 7365 __kmp_internal_begin( void ) 7366 { 7367 int gtid; 7368 kmp_root_t *root; 7369 7370 /* this is a very important step as it will register new sibling threads 7371 * and assign these new uber threads a new gtid */ 7372 gtid = __kmp_entry_gtid(); 7373 root = __kmp_threads[ gtid ]->th.th_root; 7374 KMP_ASSERT( KMP_UBER_GTID( gtid )); 7375 7376 if( root->r.r_begin ) return; 7377 __kmp_acquire_lock( &root->r.r_begin_lock, gtid ); 7378 if( root->r.r_begin ) { 7379 __kmp_release_lock( & root->r.r_begin_lock, gtid ); 7380 return; 7381 } 7382 7383 root->r.r_begin = TRUE; 7384 7385 __kmp_release_lock( & root->r.r_begin_lock, gtid ); 7386 } 7387 7388 7389 /* ------------------------------------------------------------------------ */ 7390 /* ------------------------------------------------------------------------ */ 7391 7392 void 7393 __kmp_user_set_library (enum library_type arg) 7394 { 7395 int gtid; 7396 kmp_root_t *root; 7397 kmp_info_t *thread; 7398 7399 /* first, make sure we are initialized so we can get our gtid */ 7400 7401 gtid = __kmp_entry_gtid(); 7402 thread = __kmp_threads[ gtid ]; 7403 7404 root = thread->th.th_root; 7405 7406 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial )); 7407 if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */ 7408 KMP_WARNING( SetLibraryIncorrectCall ); 7409 return; 7410 } 7411 7412 switch ( arg ) { 7413 case library_serial : 7414 thread->th.th_set_nproc = 0; 7415 set__nproc( thread, 1 ); 7416 break; 7417 case library_turnaround : 7418 thread->th.th_set_nproc = 0; 7419 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); 7420 break; 7421 case library_throughput : 7422 thread->th.th_set_nproc = 0; 7423 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); 7424 break; 7425 default: 7426 KMP_FATAL( UnknownLibraryType, arg ); 7427 } 7428 7429 __kmp_aux_set_library ( arg ); 7430 } 7431 7432 void 7433 __kmp_aux_set_stacksize( size_t arg ) 7434 { 7435 if (! __kmp_init_serial) 7436 __kmp_serial_initialize(); 7437 7438 #if KMP_OS_DARWIN 7439 if (arg & (0x1000 - 1)) { 7440 arg &= ~(0x1000 - 1); 7441 if(arg + 0x1000) /* check for overflow if we round up */ 7442 arg += 0x1000; 7443 } 7444 #endif 7445 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); 7446 7447 /* only change the default stacksize before the first parallel region */ 7448 if (! TCR_4(__kmp_init_parallel)) { 7449 size_t value = arg; /* argument is in bytes */ 7450 7451 if (value < __kmp_sys_min_stksize ) 7452 value = __kmp_sys_min_stksize ; 7453 else if (value > KMP_MAX_STKSIZE) 7454 value = KMP_MAX_STKSIZE; 7455 7456 __kmp_stksize = value; 7457 7458 __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ 7459 } 7460 7461 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); 7462 } 7463 7464 /* set the behaviour of the runtime library */ 7465 /* TODO this can cause some odd behaviour with sibling parallelism... */ 7466 void 7467 __kmp_aux_set_library (enum library_type arg) 7468 { 7469 __kmp_library = arg; 7470 7471 switch ( __kmp_library ) { 7472 case library_serial : 7473 { 7474 KMP_INFORM( LibraryIsSerial ); 7475 (void) __kmp_change_library( TRUE ); 7476 } 7477 break; 7478 case library_turnaround : 7479 (void) __kmp_change_library( TRUE ); 7480 break; 7481 case library_throughput : 7482 (void) __kmp_change_library( FALSE ); 7483 break; 7484 default: 7485 KMP_FATAL( UnknownLibraryType, arg ); 7486 } 7487 } 7488 7489 /* ------------------------------------------------------------------------ */ 7490 /* ------------------------------------------------------------------------ */ 7491 7492 void 7493 __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid) 7494 { 7495 int blocktime = arg; /* argument is in milliseconds */ 7496 #if KMP_USE_MONITOR 7497 int bt_intervals; 7498 #endif 7499 int bt_set; 7500 7501 __kmp_save_internal_controls( thread ); 7502 7503 /* Normalize and set blocktime for the teams */ 7504 if (blocktime < KMP_MIN_BLOCKTIME) 7505 blocktime = KMP_MIN_BLOCKTIME; 7506 else if (blocktime > KMP_MAX_BLOCKTIME) 7507 blocktime = KMP_MAX_BLOCKTIME; 7508 7509 set__blocktime_team( thread->th.th_team, tid, blocktime ); 7510 set__blocktime_team( thread->th.th_serial_team, 0, blocktime ); 7511 7512 #if KMP_USE_MONITOR 7513 /* Calculate and set blocktime intervals for the teams */ 7514 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); 7515 7516 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals ); 7517 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals ); 7518 #endif 7519 7520 /* Set whether blocktime has been set to "TRUE" */ 7521 bt_set = TRUE; 7522 7523 set__bt_set_team( thread->th.th_team, tid, bt_set ); 7524 set__bt_set_team( thread->th.th_serial_team, 0, bt_set ); 7525 #if KMP_USE_MONITOR 7526 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7527 "bt_intervals=%d, monitor_updates=%d\n", 7528 __kmp_gtid_from_tid(tid, thread->th.th_team), 7529 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, 7530 __kmp_monitor_wakeups)); 7531 #else 7532 KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", 7533 __kmp_gtid_from_tid(tid, thread->th.th_team), 7534 thread->th.th_team->t.t_id, tid, blocktime)); 7535 #endif 7536 } 7537 7538 void 7539 __kmp_aux_set_defaults( 7540 char const * str, 7541 int len 7542 ) { 7543 if ( ! __kmp_init_serial ) { 7544 __kmp_serial_initialize(); 7545 }; 7546 __kmp_env_initialize( str ); 7547 7548 if (__kmp_settings 7549 #if OMP_40_ENABLED 7550 || __kmp_display_env || __kmp_display_env_verbose 7551 #endif // OMP_40_ENABLED 7552 ) { 7553 __kmp_env_print(); 7554 } 7555 } // __kmp_aux_set_defaults 7556 7557 /* ------------------------------------------------------------------------ */ 7558 7559 /* 7560 * internal fast reduction routines 7561 */ 7562 7563 PACKED_REDUCTION_METHOD_T 7564 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, 7565 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 7566 kmp_critical_name *lck ) 7567 { 7568 7569 // Default reduction method: critical construct ( lck != NULL, like in current PAROPT ) 7570 // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL 7571 // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL 7572 // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT. 7573 7574 PACKED_REDUCTION_METHOD_T retval; 7575 7576 int team_size; 7577 7578 KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 ) 7579 KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 ) 7580 7581 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 7582 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) 7583 7584 retval = critical_reduce_block; 7585 7586 team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower 7587 7588 if( team_size == 1 ) { 7589 7590 retval = empty_reduce_block; 7591 7592 } else { 7593 7594 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7595 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7596 7597 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7598 7599 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7600 7601 int teamsize_cutoff = 4; 7602 7603 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7604 if( __kmp_mic_type != non_mic ) { 7605 teamsize_cutoff = 8; 7606 } 7607 #endif 7608 if( tree_available ) { 7609 if( team_size <= teamsize_cutoff ) { 7610 if ( atomic_available ) { 7611 retval = atomic_reduce_block; 7612 } 7613 } else { 7614 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7615 } 7616 } else if ( atomic_available ) { 7617 retval = atomic_reduce_block; 7618 } 7619 #else 7620 #error "Unknown or unsupported OS" 7621 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7622 7623 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7624 7625 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7626 7627 // basic tuning 7628 7629 if( atomic_available ) { 7630 if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ??? 7631 retval = atomic_reduce_block; 7632 } 7633 } // otherwise: use critical section 7634 7635 #elif KMP_OS_DARWIN 7636 7637 if( atomic_available && ( num_vars <= 3 ) ) { 7638 retval = atomic_reduce_block; 7639 } else if( tree_available ) { 7640 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) { 7641 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; 7642 } 7643 } // otherwise: use critical section 7644 7645 #else 7646 #error "Unknown or unsupported OS" 7647 #endif 7648 7649 #else 7650 #error "Unknown or unsupported architecture" 7651 #endif 7652 7653 } 7654 7655 // KMP_FORCE_REDUCTION 7656 7657 // If the team is serialized (team_size == 1), ignore the forced reduction 7658 // method and stay with the unsynchronized method (empty_reduce_block) 7659 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) { 7660 7661 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; 7662 7663 int atomic_available, tree_available; 7664 7665 switch( ( forced_retval = __kmp_force_reduction_method ) ) 7666 { 7667 case critical_reduce_block: 7668 KMP_ASSERT( lck ); // lck should be != 0 7669 break; 7670 7671 case atomic_reduce_block: 7672 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; 7673 if( ! atomic_available ) { 7674 KMP_WARNING(RedMethodNotSupported, "atomic"); 7675 forced_retval = critical_reduce_block; 7676 } 7677 break; 7678 7679 case tree_reduce_block: 7680 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; 7681 if( ! tree_available ) { 7682 KMP_WARNING(RedMethodNotSupported, "tree"); 7683 forced_retval = critical_reduce_block; 7684 } else { 7685 #if KMP_FAST_REDUCTION_BARRIER 7686 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; 7687 #endif 7688 } 7689 break; 7690 7691 default: 7692 KMP_ASSERT( 0 ); // "unsupported method specified" 7693 } 7694 7695 retval = forced_retval; 7696 } 7697 7698 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) ); 7699 7700 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7701 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7702 7703 return ( retval ); 7704 } 7705 7706 // this function is for testing set/get/determine reduce method 7707 kmp_int32 7708 __kmp_get_reduce_method( void ) { 7709 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 ); 7710 } 7711 7712 /* ------------------------------------------------------------------------ */ 7713