1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 #if OMP_40_ENABLED 248 249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 250 kmp_int32 proc_bind) { 251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 252 proc_bind)); 253 254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 255 } 256 257 #endif /* OMP_40_ENABLED */ 258 259 /*! 260 @ingroup PARALLEL 261 @param loc source location information 262 @param argc total number of arguments in the ellipsis 263 @param microtask pointer to callback routine consisting of outlined parallel 264 construct 265 @param ... pointers to shared variables that aren't global 266 267 Do the actual fork and call the microtask in the relevant number of threads. 268 */ 269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 270 int gtid = __kmp_entry_gtid(); 271 272 #if (KMP_STATS_ENABLED) 273 // If we were in a serial region, then stop the serial timer, record 274 // the event, and start parallel region timer 275 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 276 if (previous_state == stats_state_e::SERIAL_REGION) { 277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 278 } else { 279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 280 } 281 int inParallel = __kmpc_in_parallel(loc); 282 if (inParallel) { 283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 284 } else { 285 KMP_COUNT_BLOCK(OMP_PARALLEL); 286 } 287 #endif 288 289 // maybe to save thr_state is enough here 290 { 291 va_list ap; 292 va_start(ap, microtask); 293 294 #if OMPT_SUPPORT 295 ompt_frame_t *ompt_frame; 296 if (ompt_enabled.enabled) { 297 kmp_info_t *master_th = __kmp_threads[gtid]; 298 kmp_team_t *parent_team = master_th->th.th_team; 299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 300 if (lwt) 301 ompt_frame = &(lwt->ompt_task_info.frame); 302 else { 303 int tid = __kmp_tid_from_gtid(gtid); 304 ompt_frame = &( 305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 306 } 307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 308 OMPT_STORE_RETURN_ADDRESS(gtid); 309 } 310 #endif 311 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_FORKING(); 314 #endif 315 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 320 &ap 321 #else 322 ap 323 #endif 324 ); 325 #if INCLUDE_SSC_MARKS 326 SSC_MARK_JOINING(); 327 #endif 328 __kmp_join_call(loc, gtid 329 #if OMPT_SUPPORT 330 , 331 fork_context_intel 332 #endif 333 ); 334 335 va_end(ap); 336 } 337 338 #if KMP_STATS_ENABLED 339 if (previous_state == stats_state_e::SERIAL_REGION) { 340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 341 } else { 342 KMP_POP_PARTITIONED_TIMER(); 343 } 344 #endif // KMP_STATS_ENABLED 345 } 346 347 #if OMP_40_ENABLED 348 /*! 349 @ingroup PARALLEL 350 @param loc source location information 351 @param global_tid global thread number 352 @param num_teams number of teams requested for the teams construct 353 @param num_threads number of threads per team requested for the teams construct 354 355 Set the number of teams to be used by the teams construct. 356 This call is only required if the teams construct has a `num_teams` clause 357 or a `thread_limit` clause (or both). 358 */ 359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 360 kmp_int32 num_teams, kmp_int32 num_threads) { 361 KA_TRACE(20, 362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 363 global_tid, num_teams, num_threads)); 364 365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 366 } 367 368 /*! 369 @ingroup PARALLEL 370 @param loc source location information 371 @param argc total number of arguments in the ellipsis 372 @param microtask pointer to callback routine consisting of outlined teams 373 construct 374 @param ... pointers to shared variables that aren't global 375 376 Do the actual fork and call the microtask in the relevant number of threads. 377 */ 378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 379 ...) { 380 int gtid = __kmp_entry_gtid(); 381 kmp_info_t *this_thr = __kmp_threads[gtid]; 382 va_list ap; 383 va_start(ap, microtask); 384 385 #if KMP_STATS_ENABLED 386 KMP_COUNT_BLOCK(OMP_TEAMS); 387 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 388 if (previous_state == stats_state_e::SERIAL_REGION) { 389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 390 } else { 391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 392 } 393 #endif 394 395 // remember teams entry point and nesting level 396 this_thr->th.th_teams_microtask = microtask; 397 this_thr->th.th_teams_level = 398 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 399 400 #if OMPT_SUPPORT 401 kmp_team_t *parent_team = this_thr->th.th_team; 402 int tid = __kmp_tid_from_gtid(gtid); 403 if (ompt_enabled.enabled) { 404 parent_team->t.t_implicit_task_taskdata[tid] 405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 406 } 407 OMPT_STORE_RETURN_ADDRESS(gtid); 408 #endif 409 410 // check if __kmpc_push_num_teams called, set default number of teams 411 // otherwise 412 if (this_thr->th.th_teams_size.nteams == 0) { 413 __kmp_push_num_teams(loc, gtid, 0, 0); 414 } 415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 418 419 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 420 VOLATILE_CAST(microtask_t) 421 __kmp_teams_master, // "wrapped" task 422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 423 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 424 &ap 425 #else 426 ap 427 #endif 428 ); 429 __kmp_join_call(loc, gtid 430 #if OMPT_SUPPORT 431 , 432 fork_context_intel 433 #endif 434 ); 435 436 // Pop current CG root off list 437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 439 this_thr->th.th_cg_roots = tmp->up; 440 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 441 " to node %p. cg_nthreads was %d\n", 442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 443 __kmp_free(tmp); 444 // Restore current task's thread_limit from CG root 445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 446 this_thr->th.th_current_task->td_icvs.thread_limit = 447 this_thr->th.th_cg_roots->cg_thread_limit; 448 449 this_thr->th.th_teams_microtask = NULL; 450 this_thr->th.th_teams_level = 0; 451 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 452 va_end(ap); 453 #if KMP_STATS_ENABLED 454 if (previous_state == stats_state_e::SERIAL_REGION) { 455 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 456 } else { 457 KMP_POP_PARTITIONED_TIMER(); 458 } 459 #endif // KMP_STATS_ENABLED 460 } 461 #endif /* OMP_40_ENABLED */ 462 463 // I don't think this function should ever have been exported. 464 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 465 // openmp code ever called it, but it's been exported from the RTL for so 466 // long that I'm afraid to remove the definition. 467 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 468 469 /*! 470 @ingroup PARALLEL 471 @param loc source location information 472 @param global_tid global thread number 473 474 Enter a serialized parallel construct. This interface is used to handle a 475 conditional parallel region, like this, 476 @code 477 #pragma omp parallel if (condition) 478 @endcode 479 when the condition is false. 480 */ 481 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 482 // The implementation is now in kmp_runtime.cpp so that it can share static 483 // functions with kmp_fork_call since the tasks to be done are similar in 484 // each case. 485 #if OMPT_SUPPORT 486 OMPT_STORE_RETURN_ADDRESS(global_tid); 487 #endif 488 __kmp_serialized_parallel(loc, global_tid); 489 } 490 491 /*! 492 @ingroup PARALLEL 493 @param loc source location information 494 @param global_tid global thread number 495 496 Leave a serialized parallel construct. 497 */ 498 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 499 kmp_internal_control_t *top; 500 kmp_info_t *this_thr; 501 kmp_team_t *serial_team; 502 503 KC_TRACE(10, 504 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 505 506 /* skip all this code for autopar serialized loops since it results in 507 unacceptable overhead */ 508 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 509 return; 510 511 // Not autopar code 512 if (!TCR_4(__kmp_init_parallel)) 513 __kmp_parallel_initialize(); 514 515 #if OMP_50_ENABLED 516 __kmp_resume_if_soft_paused(); 517 #endif 518 519 this_thr = __kmp_threads[global_tid]; 520 serial_team = this_thr->th.th_serial_team; 521 522 #if OMP_45_ENABLED 523 kmp_task_team_t *task_team = this_thr->th.th_task_team; 524 525 // we need to wait for the proxy tasks before finishing the thread 526 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 527 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 528 #endif 529 530 KMP_MB(); 531 KMP_DEBUG_ASSERT(serial_team); 532 KMP_ASSERT(serial_team->t.t_serialized); 533 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 534 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 535 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 536 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 537 538 #if OMPT_SUPPORT 539 if (ompt_enabled.enabled && 540 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 541 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 542 if (ompt_enabled.ompt_callback_implicit_task) { 543 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 544 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 545 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 546 } 547 548 // reset clear the task id only after unlinking the task 549 ompt_data_t *parent_task_data; 550 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 551 552 if (ompt_enabled.ompt_callback_parallel_end) { 553 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 554 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 555 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 556 } 557 __ompt_lw_taskteam_unlink(this_thr); 558 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 559 } 560 #endif 561 562 /* If necessary, pop the internal control stack values and replace the team 563 * values */ 564 top = serial_team->t.t_control_stack_top; 565 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 566 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 567 serial_team->t.t_control_stack_top = top->next; 568 __kmp_free(top); 569 } 570 571 // if( serial_team -> t.t_serialized > 1 ) 572 serial_team->t.t_level--; 573 574 /* pop dispatch buffers stack */ 575 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 576 { 577 dispatch_private_info_t *disp_buffer = 578 serial_team->t.t_dispatch->th_disp_buffer; 579 serial_team->t.t_dispatch->th_disp_buffer = 580 serial_team->t.t_dispatch->th_disp_buffer->next; 581 __kmp_free(disp_buffer); 582 } 583 #if OMP_50_ENABLED 584 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 585 #endif 586 587 --serial_team->t.t_serialized; 588 if (serial_team->t.t_serialized == 0) { 589 590 /* return to the parallel section */ 591 592 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 593 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 594 __kmp_clear_x87_fpu_status_word(); 595 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 596 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 597 } 598 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 599 600 this_thr->th.th_team = serial_team->t.t_parent; 601 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 602 603 /* restore values cached in the thread */ 604 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 605 this_thr->th.th_team_master = 606 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 607 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 608 609 /* TODO the below shouldn't need to be adjusted for serialized teams */ 610 this_thr->th.th_dispatch = 611 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 612 613 __kmp_pop_current_task_from_thread(this_thr); 614 615 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 616 this_thr->th.th_current_task->td_flags.executing = 1; 617 618 if (__kmp_tasking_mode != tskm_immediate_exec) { 619 // Copy the task team from the new child / old parent team to the thread. 620 this_thr->th.th_task_team = 621 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 622 KA_TRACE(20, 623 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 624 "team %p\n", 625 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 626 } 627 } else { 628 if (__kmp_tasking_mode != tskm_immediate_exec) { 629 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 630 "depth of serial team %p to %d\n", 631 global_tid, serial_team, serial_team->t.t_serialized)); 632 } 633 } 634 635 if (__kmp_env_consistency_check) 636 __kmp_pop_parallel(global_tid, NULL); 637 #if OMPT_SUPPORT 638 if (ompt_enabled.enabled) 639 this_thr->th.ompt_thread_info.state = 640 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 641 : ompt_state_work_parallel); 642 #endif 643 } 644 645 /*! 646 @ingroup SYNCHRONIZATION 647 @param loc source location information. 648 649 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 650 depending on the memory ordering convention obeyed by the compiler 651 even that may not be necessary). 652 */ 653 void __kmpc_flush(ident_t *loc) { 654 KC_TRACE(10, ("__kmpc_flush: called\n")); 655 656 /* need explicit __mf() here since use volatile instead in library */ 657 KMP_MB(); /* Flush all pending memory write invalidates. */ 658 659 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 660 #if KMP_MIC 661 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 662 // We shouldn't need it, though, since the ABI rules require that 663 // * If the compiler generates NGO stores it also generates the fence 664 // * If users hand-code NGO stores they should insert the fence 665 // therefore no incomplete unordered stores should be visible. 666 #else 667 // C74404 668 // This is to address non-temporal store instructions (sfence needed). 669 // The clflush instruction is addressed either (mfence needed). 670 // Probably the non-temporal load monvtdqa instruction should also be 671 // addressed. 672 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 673 if (!__kmp_cpuinfo.initialized) { 674 __kmp_query_cpuid(&__kmp_cpuinfo); 675 } 676 if (!__kmp_cpuinfo.sse2) { 677 // CPU cannot execute SSE2 instructions. 678 } else { 679 #if KMP_COMPILER_ICC 680 _mm_mfence(); 681 #elif KMP_COMPILER_MSVC 682 MemoryBarrier(); 683 #else 684 __sync_synchronize(); 685 #endif // KMP_COMPILER_ICC 686 } 687 #endif // KMP_MIC 688 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 689 // Nothing to see here move along 690 #elif KMP_ARCH_PPC64 691 // Nothing needed here (we have a real MB above). 692 #if KMP_OS_CNK 693 // The flushing thread needs to yield here; this prevents a 694 // busy-waiting thread from saturating the pipeline. flush is 695 // often used in loops like this: 696 // while (!flag) { 697 // #pragma omp flush(flag) 698 // } 699 // and adding the yield here is good for at least a 10x speedup 700 // when running >2 threads per core (on the NAS LU benchmark). 701 __kmp_yield(); 702 #endif 703 #else 704 #error Unknown or unsupported architecture 705 #endif 706 707 #if OMPT_SUPPORT && OMPT_OPTIONAL 708 if (ompt_enabled.ompt_callback_flush) { 709 ompt_callbacks.ompt_callback(ompt_callback_flush)( 710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 711 } 712 #endif 713 } 714 715 /* -------------------------------------------------------------------------- */ 716 /*! 717 @ingroup SYNCHRONIZATION 718 @param loc source location information 719 @param global_tid thread id. 720 721 Execute a barrier. 722 */ 723 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 724 KMP_COUNT_BLOCK(OMP_BARRIER); 725 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 726 727 if (!TCR_4(__kmp_init_parallel)) 728 __kmp_parallel_initialize(); 729 730 #if OMP_50_ENABLED 731 __kmp_resume_if_soft_paused(); 732 #endif 733 734 if (__kmp_env_consistency_check) { 735 if (loc == 0) { 736 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 737 } 738 739 __kmp_check_barrier(global_tid, ct_barrier, loc); 740 } 741 742 #if OMPT_SUPPORT 743 ompt_frame_t *ompt_frame; 744 if (ompt_enabled.enabled) { 745 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 746 if (ompt_frame->enter_frame.ptr == NULL) 747 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 748 OMPT_STORE_RETURN_ADDRESS(global_tid); 749 } 750 #endif 751 __kmp_threads[global_tid]->th.th_ident = loc; 752 // TODO: explicit barrier_wait_id: 753 // this function is called when 'barrier' directive is present or 754 // implicit barrier at the end of a worksharing construct. 755 // 1) better to add a per-thread barrier counter to a thread data structure 756 // 2) set to 0 when a new team is created 757 // 4) no sync is required 758 759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 760 #if OMPT_SUPPORT && OMPT_OPTIONAL 761 if (ompt_enabled.enabled) { 762 ompt_frame->enter_frame = ompt_data_none; 763 } 764 #endif 765 } 766 767 /* The BARRIER for a MASTER section is always explicit */ 768 /*! 769 @ingroup WORK_SHARING 770 @param loc source location information. 771 @param global_tid global thread number . 772 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 773 */ 774 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 775 int status = 0; 776 777 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 778 779 if (!TCR_4(__kmp_init_parallel)) 780 __kmp_parallel_initialize(); 781 782 #if OMP_50_ENABLED 783 __kmp_resume_if_soft_paused(); 784 #endif 785 786 if (KMP_MASTER_GTID(global_tid)) { 787 KMP_COUNT_BLOCK(OMP_MASTER); 788 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 789 status = 1; 790 } 791 792 #if OMPT_SUPPORT && OMPT_OPTIONAL 793 if (status) { 794 if (ompt_enabled.ompt_callback_master) { 795 kmp_info_t *this_thr = __kmp_threads[global_tid]; 796 kmp_team_t *team = this_thr->th.th_team; 797 798 int tid = __kmp_tid_from_gtid(global_tid); 799 ompt_callbacks.ompt_callback(ompt_callback_master)( 800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 802 OMPT_GET_RETURN_ADDRESS(0)); 803 } 804 } 805 #endif 806 807 if (__kmp_env_consistency_check) { 808 #if KMP_USE_DYNAMIC_LOCK 809 if (status) 810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 811 else 812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 813 #else 814 if (status) 815 __kmp_push_sync(global_tid, ct_master, loc, NULL); 816 else 817 __kmp_check_sync(global_tid, ct_master, loc, NULL); 818 #endif 819 } 820 821 return status; 822 } 823 824 /*! 825 @ingroup WORK_SHARING 826 @param loc source location information. 827 @param global_tid global thread number . 828 829 Mark the end of a <tt>master</tt> region. This should only be called by the 830 thread that executes the <tt>master</tt> region. 831 */ 832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 834 835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 836 KMP_POP_PARTITIONED_TIMER(); 837 838 #if OMPT_SUPPORT && OMPT_OPTIONAL 839 kmp_info_t *this_thr = __kmp_threads[global_tid]; 840 kmp_team_t *team = this_thr->th.th_team; 841 if (ompt_enabled.ompt_callback_master) { 842 int tid = __kmp_tid_from_gtid(global_tid); 843 ompt_callbacks.ompt_callback(ompt_callback_master)( 844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 846 OMPT_GET_RETURN_ADDRESS(0)); 847 } 848 #endif 849 850 if (__kmp_env_consistency_check) { 851 if (global_tid < 0) 852 KMP_WARNING(ThreadIdentInvalid); 853 854 if (KMP_MASTER_GTID(global_tid)) 855 __kmp_pop_sync(global_tid, ct_master, loc); 856 } 857 } 858 859 /*! 860 @ingroup WORK_SHARING 861 @param loc source location information. 862 @param gtid global thread number. 863 864 Start execution of an <tt>ordered</tt> construct. 865 */ 866 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 867 int cid = 0; 868 kmp_info_t *th; 869 KMP_DEBUG_ASSERT(__kmp_init_serial); 870 871 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 872 873 if (!TCR_4(__kmp_init_parallel)) 874 __kmp_parallel_initialize(); 875 876 #if OMP_50_ENABLED 877 __kmp_resume_if_soft_paused(); 878 #endif 879 880 #if USE_ITT_BUILD 881 __kmp_itt_ordered_prep(gtid); 882 // TODO: ordered_wait_id 883 #endif /* USE_ITT_BUILD */ 884 885 th = __kmp_threads[gtid]; 886 887 #if OMPT_SUPPORT && OMPT_OPTIONAL 888 kmp_team_t *team; 889 ompt_wait_id_t lck; 890 void *codeptr_ra; 891 if (ompt_enabled.enabled) { 892 OMPT_STORE_RETURN_ADDRESS(gtid); 893 team = __kmp_team_from_gtid(gtid); 894 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; 895 /* OMPT state update */ 896 th->th.ompt_thread_info.wait_id = lck; 897 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 898 899 /* OMPT event callback */ 900 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 901 if (ompt_enabled.ompt_callback_mutex_acquire) { 902 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 903 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, 904 (ompt_wait_id_t)lck, codeptr_ra); 905 } 906 } 907 #endif 908 909 if (th->th.th_dispatch->th_deo_fcn != 0) 910 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 911 else 912 __kmp_parallel_deo(>id, &cid, loc); 913 914 #if OMPT_SUPPORT && OMPT_OPTIONAL 915 if (ompt_enabled.enabled) { 916 /* OMPT state update */ 917 th->th.ompt_thread_info.state = ompt_state_work_parallel; 918 th->th.ompt_thread_info.wait_id = 0; 919 920 /* OMPT event callback */ 921 if (ompt_enabled.ompt_callback_mutex_acquired) { 922 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 923 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); 924 } 925 } 926 #endif 927 928 #if USE_ITT_BUILD 929 __kmp_itt_ordered_start(gtid); 930 #endif /* USE_ITT_BUILD */ 931 } 932 933 /*! 934 @ingroup WORK_SHARING 935 @param loc source location information. 936 @param gtid global thread number. 937 938 End execution of an <tt>ordered</tt> construct. 939 */ 940 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 941 int cid = 0; 942 kmp_info_t *th; 943 944 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 945 946 #if USE_ITT_BUILD 947 __kmp_itt_ordered_end(gtid); 948 // TODO: ordered_wait_id 949 #endif /* USE_ITT_BUILD */ 950 951 th = __kmp_threads[gtid]; 952 953 if (th->th.th_dispatch->th_dxo_fcn != 0) 954 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 955 else 956 __kmp_parallel_dxo(>id, &cid, loc); 957 958 #if OMPT_SUPPORT && OMPT_OPTIONAL 959 OMPT_STORE_RETURN_ADDRESS(gtid); 960 if (ompt_enabled.ompt_callback_mutex_released) { 961 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 962 ompt_mutex_ordered, 963 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, 964 OMPT_LOAD_RETURN_ADDRESS(gtid)); 965 } 966 #endif 967 } 968 969 #if KMP_USE_DYNAMIC_LOCK 970 971 static __forceinline void 972 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 973 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 974 // Pointer to the allocated indirect lock is written to crit, while indexing 975 // is ignored. 976 void *idx; 977 kmp_indirect_lock_t **lck; 978 lck = (kmp_indirect_lock_t **)crit; 979 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 980 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 981 KMP_SET_I_LOCK_LOCATION(ilk, loc); 982 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 983 KA_TRACE(20, 984 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 985 #if USE_ITT_BUILD 986 __kmp_itt_critical_creating(ilk->lock, loc); 987 #endif 988 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 989 if (status == 0) { 990 #if USE_ITT_BUILD 991 __kmp_itt_critical_destroyed(ilk->lock); 992 #endif 993 // We don't really need to destroy the unclaimed lock here since it will be 994 // cleaned up at program exit. 995 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 996 } 997 KMP_DEBUG_ASSERT(*lck != NULL); 998 } 999 1000 // Fast-path acquire tas lock 1001 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1002 { \ 1003 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1004 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1005 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1006 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1007 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1008 kmp_uint32 spins; \ 1009 KMP_FSYNC_PREPARE(l); \ 1010 KMP_INIT_YIELD(spins); \ 1011 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1012 do { \ 1013 if (TCR_4(__kmp_nth) > \ 1014 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1015 KMP_YIELD(TRUE); \ 1016 } else { \ 1017 KMP_YIELD_SPIN(spins); \ 1018 } \ 1019 __kmp_spin_backoff(&backoff); \ 1020 } while ( \ 1021 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1022 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1023 } \ 1024 KMP_FSYNC_ACQUIRED(l); \ 1025 } 1026 1027 // Fast-path test tas lock 1028 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1029 { \ 1030 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1031 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1032 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1033 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1034 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1035 } 1036 1037 // Fast-path release tas lock 1038 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1039 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1040 1041 #if KMP_USE_FUTEX 1042 1043 #include <sys/syscall.h> 1044 #include <unistd.h> 1045 #ifndef FUTEX_WAIT 1046 #define FUTEX_WAIT 0 1047 #endif 1048 #ifndef FUTEX_WAKE 1049 #define FUTEX_WAKE 1 1050 #endif 1051 1052 // Fast-path acquire futex lock 1053 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1054 { \ 1055 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1056 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1057 KMP_MB(); \ 1058 KMP_FSYNC_PREPARE(ftx); \ 1059 kmp_int32 poll_val; \ 1060 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1061 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1062 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1063 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1064 if (!cond) { \ 1065 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1066 poll_val | \ 1067 KMP_LOCK_BUSY(1, futex))) { \ 1068 continue; \ 1069 } \ 1070 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1071 } \ 1072 kmp_int32 rc; \ 1073 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1074 NULL, NULL, 0)) != 0) { \ 1075 continue; \ 1076 } \ 1077 gtid_code |= 1; \ 1078 } \ 1079 KMP_FSYNC_ACQUIRED(ftx); \ 1080 } 1081 1082 // Fast-path test futex lock 1083 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1084 { \ 1085 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1086 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1087 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1088 KMP_FSYNC_ACQUIRED(ftx); \ 1089 rc = TRUE; \ 1090 } else { \ 1091 rc = FALSE; \ 1092 } \ 1093 } 1094 1095 // Fast-path release futex lock 1096 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1097 { \ 1098 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1099 KMP_MB(); \ 1100 KMP_FSYNC_RELEASING(ftx); \ 1101 kmp_int32 poll_val = \ 1102 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1103 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1104 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1105 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1106 } \ 1107 KMP_MB(); \ 1108 KMP_YIELD_OVERSUB(); \ 1109 } 1110 1111 #endif // KMP_USE_FUTEX 1112 1113 #else // KMP_USE_DYNAMIC_LOCK 1114 1115 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1116 ident_t const *loc, 1117 kmp_int32 gtid) { 1118 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1119 1120 // Because of the double-check, the following load doesn't need to be volatile 1121 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1122 1123 if (lck == NULL) { 1124 void *idx; 1125 1126 // Allocate & initialize the lock. 1127 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1128 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1129 __kmp_init_user_lock_with_checks(lck); 1130 __kmp_set_user_lock_location(lck, loc); 1131 #if USE_ITT_BUILD 1132 __kmp_itt_critical_creating(lck); 1133 // __kmp_itt_critical_creating() should be called *before* the first usage 1134 // of underlying lock. It is the only place where we can guarantee it. There 1135 // are chances the lock will destroyed with no usage, but it is not a 1136 // problem, because this is not real event seen by user but rather setting 1137 // name for object (lock). See more details in kmp_itt.h. 1138 #endif /* USE_ITT_BUILD */ 1139 1140 // Use a cmpxchg instruction to slam the start of the critical section with 1141 // the lock pointer. If another thread beat us to it, deallocate the lock, 1142 // and use the lock that the other thread allocated. 1143 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1144 1145 if (status == 0) { 1146 // Deallocate the lock and reload the value. 1147 #if USE_ITT_BUILD 1148 __kmp_itt_critical_destroyed(lck); 1149 // Let ITT know the lock is destroyed and the same memory location may be reused 1150 // for another purpose. 1151 #endif /* USE_ITT_BUILD */ 1152 __kmp_destroy_user_lock_with_checks(lck); 1153 __kmp_user_lock_free(&idx, gtid, lck); 1154 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1155 KMP_DEBUG_ASSERT(lck != NULL); 1156 } 1157 } 1158 return lck; 1159 } 1160 1161 #endif // KMP_USE_DYNAMIC_LOCK 1162 1163 /*! 1164 @ingroup WORK_SHARING 1165 @param loc source location information. 1166 @param global_tid global thread number . 1167 @param crit identity of the critical section. This could be a pointer to a lock 1168 associated with the critical section, or some other suitably unique value. 1169 1170 Enter code protected by a `critical` construct. 1171 This function blocks until the executing thread can enter the critical section. 1172 */ 1173 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1174 kmp_critical_name *crit) { 1175 #if KMP_USE_DYNAMIC_LOCK 1176 #if OMPT_SUPPORT && OMPT_OPTIONAL 1177 OMPT_STORE_RETURN_ADDRESS(global_tid); 1178 #endif // OMPT_SUPPORT 1179 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1180 #else 1181 KMP_COUNT_BLOCK(OMP_CRITICAL); 1182 #if OMPT_SUPPORT && OMPT_OPTIONAL 1183 ompt_state_t prev_state = ompt_state_undefined; 1184 ompt_thread_info_t ti; 1185 #endif 1186 kmp_user_lock_p lck; 1187 1188 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1189 1190 // TODO: add THR_OVHD_STATE 1191 1192 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1193 KMP_CHECK_USER_LOCK_INIT(); 1194 1195 if ((__kmp_user_lock_kind == lk_tas) && 1196 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1197 lck = (kmp_user_lock_p)crit; 1198 } 1199 #if KMP_USE_FUTEX 1200 else if ((__kmp_user_lock_kind == lk_futex) && 1201 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1202 lck = (kmp_user_lock_p)crit; 1203 } 1204 #endif 1205 else { // ticket, queuing or drdpa 1206 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1207 } 1208 1209 if (__kmp_env_consistency_check) 1210 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1211 1212 // since the critical directive binds to all threads, not just the current 1213 // team we have to check this even if we are in a serialized team. 1214 // also, even if we are the uber thread, we still have to conduct the lock, 1215 // as we have to contend with sibling threads. 1216 1217 #if USE_ITT_BUILD 1218 __kmp_itt_critical_acquiring(lck); 1219 #endif /* USE_ITT_BUILD */ 1220 #if OMPT_SUPPORT && OMPT_OPTIONAL 1221 OMPT_STORE_RETURN_ADDRESS(gtid); 1222 void *codeptr_ra = NULL; 1223 if (ompt_enabled.enabled) { 1224 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1225 /* OMPT state update */ 1226 prev_state = ti.state; 1227 ti.wait_id = (ompt_wait_id_t)lck; 1228 ti.state = ompt_state_wait_critical; 1229 1230 /* OMPT event callback */ 1231 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1232 if (ompt_enabled.ompt_callback_mutex_acquire) { 1233 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1234 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1235 (ompt_wait_id_t)crit, codeptr_ra); 1236 } 1237 } 1238 #endif 1239 // Value of 'crit' should be good for using as a critical_id of the critical 1240 // section directive. 1241 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1242 1243 #if USE_ITT_BUILD 1244 __kmp_itt_critical_acquired(lck); 1245 #endif /* USE_ITT_BUILD */ 1246 #if OMPT_SUPPORT && OMPT_OPTIONAL 1247 if (ompt_enabled.enabled) { 1248 /* OMPT state update */ 1249 ti.state = prev_state; 1250 ti.wait_id = 0; 1251 1252 /* OMPT event callback */ 1253 if (ompt_enabled.ompt_callback_mutex_acquired) { 1254 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1255 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); 1256 } 1257 } 1258 #endif 1259 KMP_POP_PARTITIONED_TIMER(); 1260 1261 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1262 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1263 #endif // KMP_USE_DYNAMIC_LOCK 1264 } 1265 1266 #if KMP_USE_DYNAMIC_LOCK 1267 1268 // Converts the given hint to an internal lock implementation 1269 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1270 #if KMP_USE_TSX 1271 #define KMP_TSX_LOCK(seq) lockseq_##seq 1272 #else 1273 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1274 #endif 1275 1276 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1277 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1278 #else 1279 #define KMP_CPUINFO_RTM 0 1280 #endif 1281 1282 // Hints that do not require further logic 1283 if (hint & kmp_lock_hint_hle) 1284 return KMP_TSX_LOCK(hle); 1285 if (hint & kmp_lock_hint_rtm) 1286 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1287 if (hint & kmp_lock_hint_adaptive) 1288 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1289 1290 // Rule out conflicting hints first by returning the default lock 1291 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1292 return __kmp_user_lock_seq; 1293 if ((hint & omp_lock_hint_speculative) && 1294 (hint & omp_lock_hint_nonspeculative)) 1295 return __kmp_user_lock_seq; 1296 1297 // Do not even consider speculation when it appears to be contended 1298 if (hint & omp_lock_hint_contended) 1299 return lockseq_queuing; 1300 1301 // Uncontended lock without speculation 1302 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1303 return lockseq_tas; 1304 1305 // HLE lock for speculation 1306 if (hint & omp_lock_hint_speculative) 1307 return KMP_TSX_LOCK(hle); 1308 1309 return __kmp_user_lock_seq; 1310 } 1311 1312 #if OMPT_SUPPORT && OMPT_OPTIONAL 1313 #if KMP_USE_DYNAMIC_LOCK 1314 static kmp_mutex_impl_t 1315 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1316 if (user_lock) { 1317 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1318 case 0: 1319 break; 1320 #if KMP_USE_FUTEX 1321 case locktag_futex: 1322 return kmp_mutex_impl_queuing; 1323 #endif 1324 case locktag_tas: 1325 return kmp_mutex_impl_spin; 1326 #if KMP_USE_TSX 1327 case locktag_hle: 1328 return kmp_mutex_impl_speculative; 1329 #endif 1330 default: 1331 return kmp_mutex_impl_none; 1332 } 1333 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1334 } 1335 KMP_ASSERT(ilock); 1336 switch (ilock->type) { 1337 #if KMP_USE_TSX 1338 case locktag_adaptive: 1339 case locktag_rtm: 1340 return kmp_mutex_impl_speculative; 1341 #endif 1342 case locktag_nested_tas: 1343 return kmp_mutex_impl_spin; 1344 #if KMP_USE_FUTEX 1345 case locktag_nested_futex: 1346 #endif 1347 case locktag_ticket: 1348 case locktag_queuing: 1349 case locktag_drdpa: 1350 case locktag_nested_ticket: 1351 case locktag_nested_queuing: 1352 case locktag_nested_drdpa: 1353 return kmp_mutex_impl_queuing; 1354 default: 1355 return kmp_mutex_impl_none; 1356 } 1357 } 1358 #else 1359 // For locks without dynamic binding 1360 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1361 switch (__kmp_user_lock_kind) { 1362 case lk_tas: 1363 return kmp_mutex_impl_spin; 1364 #if KMP_USE_FUTEX 1365 case lk_futex: 1366 #endif 1367 case lk_ticket: 1368 case lk_queuing: 1369 case lk_drdpa: 1370 return kmp_mutex_impl_queuing; 1371 #if KMP_USE_TSX 1372 case lk_hle: 1373 case lk_rtm: 1374 case lk_adaptive: 1375 return kmp_mutex_impl_speculative; 1376 #endif 1377 default: 1378 return kmp_mutex_impl_none; 1379 } 1380 } 1381 #endif // KMP_USE_DYNAMIC_LOCK 1382 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1383 1384 /*! 1385 @ingroup WORK_SHARING 1386 @param loc source location information. 1387 @param global_tid global thread number. 1388 @param crit identity of the critical section. This could be a pointer to a lock 1389 associated with the critical section, or some other suitably unique value. 1390 @param hint the lock hint. 1391 1392 Enter code protected by a `critical` construct with a hint. The hint value is 1393 used to suggest a lock implementation. This function blocks until the executing 1394 thread can enter the critical section unless the hint suggests use of 1395 speculative execution and the hardware supports it. 1396 */ 1397 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1398 kmp_critical_name *crit, uint32_t hint) { 1399 KMP_COUNT_BLOCK(OMP_CRITICAL); 1400 kmp_user_lock_p lck; 1401 #if OMPT_SUPPORT && OMPT_OPTIONAL 1402 ompt_state_t prev_state = ompt_state_undefined; 1403 ompt_thread_info_t ti; 1404 // This is the case, if called from __kmpc_critical: 1405 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1406 if (!codeptr) 1407 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1408 #endif 1409 1410 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1411 1412 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1413 // Check if it is initialized. 1414 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1415 if (*lk == 0) { 1416 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1417 if (KMP_IS_D_LOCK(lckseq)) { 1418 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1419 KMP_GET_D_TAG(lckseq)); 1420 } else { 1421 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1422 } 1423 } 1424 // Branch for accessing the actual lock object and set operation. This 1425 // branching is inevitable since this lock initialization does not follow the 1426 // normal dispatch path (lock table is not used). 1427 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1428 lck = (kmp_user_lock_p)lk; 1429 if (__kmp_env_consistency_check) { 1430 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1431 __kmp_map_hint_to_lock(hint)); 1432 } 1433 #if USE_ITT_BUILD 1434 __kmp_itt_critical_acquiring(lck); 1435 #endif 1436 #if OMPT_SUPPORT && OMPT_OPTIONAL 1437 if (ompt_enabled.enabled) { 1438 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1439 /* OMPT state update */ 1440 prev_state = ti.state; 1441 ti.wait_id = (ompt_wait_id_t)lck; 1442 ti.state = ompt_state_wait_critical; 1443 1444 /* OMPT event callback */ 1445 if (ompt_enabled.ompt_callback_mutex_acquire) { 1446 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1447 ompt_mutex_critical, (unsigned int)hint, 1448 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); 1449 } 1450 } 1451 #endif 1452 #if KMP_USE_INLINED_TAS 1453 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1454 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1455 } else 1456 #elif KMP_USE_INLINED_FUTEX 1457 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1458 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1459 } else 1460 #endif 1461 { 1462 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1463 } 1464 } else { 1465 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1466 lck = ilk->lock; 1467 if (__kmp_env_consistency_check) { 1468 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1469 __kmp_map_hint_to_lock(hint)); 1470 } 1471 #if USE_ITT_BUILD 1472 __kmp_itt_critical_acquiring(lck); 1473 #endif 1474 #if OMPT_SUPPORT && OMPT_OPTIONAL 1475 if (ompt_enabled.enabled) { 1476 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1477 /* OMPT state update */ 1478 prev_state = ti.state; 1479 ti.wait_id = (ompt_wait_id_t)lck; 1480 ti.state = ompt_state_wait_critical; 1481 1482 /* OMPT event callback */ 1483 if (ompt_enabled.ompt_callback_mutex_acquire) { 1484 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1485 ompt_mutex_critical, (unsigned int)hint, 1486 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); 1487 } 1488 } 1489 #endif 1490 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1491 } 1492 KMP_POP_PARTITIONED_TIMER(); 1493 1494 #if USE_ITT_BUILD 1495 __kmp_itt_critical_acquired(lck); 1496 #endif /* USE_ITT_BUILD */ 1497 #if OMPT_SUPPORT && OMPT_OPTIONAL 1498 if (ompt_enabled.enabled) { 1499 /* OMPT state update */ 1500 ti.state = prev_state; 1501 ti.wait_id = 0; 1502 1503 /* OMPT event callback */ 1504 if (ompt_enabled.ompt_callback_mutex_acquired) { 1505 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1506 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); 1507 } 1508 } 1509 #endif 1510 1511 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1512 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1513 } // __kmpc_critical_with_hint 1514 1515 #endif // KMP_USE_DYNAMIC_LOCK 1516 1517 /*! 1518 @ingroup WORK_SHARING 1519 @param loc source location information. 1520 @param global_tid global thread number . 1521 @param crit identity of the critical section. This could be a pointer to a lock 1522 associated with the critical section, or some other suitably unique value. 1523 1524 Leave a critical section, releasing any lock that was held during its execution. 1525 */ 1526 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1527 kmp_critical_name *crit) { 1528 kmp_user_lock_p lck; 1529 1530 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1531 1532 #if KMP_USE_DYNAMIC_LOCK 1533 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1534 lck = (kmp_user_lock_p)crit; 1535 KMP_ASSERT(lck != NULL); 1536 if (__kmp_env_consistency_check) { 1537 __kmp_pop_sync(global_tid, ct_critical, loc); 1538 } 1539 #if USE_ITT_BUILD 1540 __kmp_itt_critical_releasing(lck); 1541 #endif 1542 #if KMP_USE_INLINED_TAS 1543 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1544 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1545 } else 1546 #elif KMP_USE_INLINED_FUTEX 1547 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1548 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1549 } else 1550 #endif 1551 { 1552 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1553 } 1554 } else { 1555 kmp_indirect_lock_t *ilk = 1556 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1557 KMP_ASSERT(ilk != NULL); 1558 lck = ilk->lock; 1559 if (__kmp_env_consistency_check) { 1560 __kmp_pop_sync(global_tid, ct_critical, loc); 1561 } 1562 #if USE_ITT_BUILD 1563 __kmp_itt_critical_releasing(lck); 1564 #endif 1565 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1566 } 1567 1568 #else // KMP_USE_DYNAMIC_LOCK 1569 1570 if ((__kmp_user_lock_kind == lk_tas) && 1571 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1572 lck = (kmp_user_lock_p)crit; 1573 } 1574 #if KMP_USE_FUTEX 1575 else if ((__kmp_user_lock_kind == lk_futex) && 1576 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1577 lck = (kmp_user_lock_p)crit; 1578 } 1579 #endif 1580 else { // ticket, queuing or drdpa 1581 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1582 } 1583 1584 KMP_ASSERT(lck != NULL); 1585 1586 if (__kmp_env_consistency_check) 1587 __kmp_pop_sync(global_tid, ct_critical, loc); 1588 1589 #if USE_ITT_BUILD 1590 __kmp_itt_critical_releasing(lck); 1591 #endif /* USE_ITT_BUILD */ 1592 // Value of 'crit' should be good for using as a critical_id of the critical 1593 // section directive. 1594 __kmp_release_user_lock_with_checks(lck, global_tid); 1595 1596 #endif // KMP_USE_DYNAMIC_LOCK 1597 1598 #if OMPT_SUPPORT && OMPT_OPTIONAL 1599 /* OMPT release event triggers after lock is released; place here to trigger 1600 * for all #if branches */ 1601 OMPT_STORE_RETURN_ADDRESS(global_tid); 1602 if (ompt_enabled.ompt_callback_mutex_released) { 1603 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1604 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); 1605 } 1606 #endif 1607 1608 KMP_POP_PARTITIONED_TIMER(); 1609 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1610 } 1611 1612 /*! 1613 @ingroup SYNCHRONIZATION 1614 @param loc source location information 1615 @param global_tid thread id. 1616 @return one if the thread should execute the master block, zero otherwise 1617 1618 Start execution of a combined barrier and master. The barrier is executed inside 1619 this function. 1620 */ 1621 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1622 int status; 1623 1624 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1625 1626 if (!TCR_4(__kmp_init_parallel)) 1627 __kmp_parallel_initialize(); 1628 1629 #if OMP_50_ENABLED 1630 __kmp_resume_if_soft_paused(); 1631 #endif 1632 1633 if (__kmp_env_consistency_check) 1634 __kmp_check_barrier(global_tid, ct_barrier, loc); 1635 1636 #if OMPT_SUPPORT 1637 ompt_frame_t *ompt_frame; 1638 if (ompt_enabled.enabled) { 1639 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1640 if (ompt_frame->enter_frame.ptr == NULL) 1641 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1642 OMPT_STORE_RETURN_ADDRESS(global_tid); 1643 } 1644 #endif 1645 #if USE_ITT_NOTIFY 1646 __kmp_threads[global_tid]->th.th_ident = loc; 1647 #endif 1648 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1649 #if OMPT_SUPPORT && OMPT_OPTIONAL 1650 if (ompt_enabled.enabled) { 1651 ompt_frame->enter_frame = ompt_data_none; 1652 } 1653 #endif 1654 1655 return (status != 0) ? 0 : 1; 1656 } 1657 1658 /*! 1659 @ingroup SYNCHRONIZATION 1660 @param loc source location information 1661 @param global_tid thread id. 1662 1663 Complete the execution of a combined barrier and master. This function should 1664 only be called at the completion of the <tt>master</tt> code. Other threads will 1665 still be waiting at the barrier and this call releases them. 1666 */ 1667 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1668 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1669 1670 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1671 } 1672 1673 /*! 1674 @ingroup SYNCHRONIZATION 1675 @param loc source location information 1676 @param global_tid thread id. 1677 @return one if the thread should execute the master block, zero otherwise 1678 1679 Start execution of a combined barrier and master(nowait) construct. 1680 The barrier is executed inside this function. 1681 There is no equivalent "end" function, since the 1682 */ 1683 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1684 kmp_int32 ret; 1685 1686 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1687 1688 if (!TCR_4(__kmp_init_parallel)) 1689 __kmp_parallel_initialize(); 1690 1691 #if OMP_50_ENABLED 1692 __kmp_resume_if_soft_paused(); 1693 #endif 1694 1695 if (__kmp_env_consistency_check) { 1696 if (loc == 0) { 1697 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1698 } 1699 __kmp_check_barrier(global_tid, ct_barrier, loc); 1700 } 1701 1702 #if OMPT_SUPPORT 1703 ompt_frame_t *ompt_frame; 1704 if (ompt_enabled.enabled) { 1705 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1706 if (ompt_frame->enter_frame.ptr == NULL) 1707 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1708 OMPT_STORE_RETURN_ADDRESS(global_tid); 1709 } 1710 #endif 1711 #if USE_ITT_NOTIFY 1712 __kmp_threads[global_tid]->th.th_ident = loc; 1713 #endif 1714 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1715 #if OMPT_SUPPORT && OMPT_OPTIONAL 1716 if (ompt_enabled.enabled) { 1717 ompt_frame->enter_frame = ompt_data_none; 1718 } 1719 #endif 1720 1721 ret = __kmpc_master(loc, global_tid); 1722 1723 if (__kmp_env_consistency_check) { 1724 /* there's no __kmpc_end_master called; so the (stats) */ 1725 /* actions of __kmpc_end_master are done here */ 1726 1727 if (global_tid < 0) { 1728 KMP_WARNING(ThreadIdentInvalid); 1729 } 1730 if (ret) { 1731 /* only one thread should do the pop since only */ 1732 /* one did the push (see __kmpc_master()) */ 1733 1734 __kmp_pop_sync(global_tid, ct_master, loc); 1735 } 1736 } 1737 1738 return (ret); 1739 } 1740 1741 /* The BARRIER for a SINGLE process section is always explicit */ 1742 /*! 1743 @ingroup WORK_SHARING 1744 @param loc source location information 1745 @param global_tid global thread number 1746 @return One if this thread should execute the single construct, zero otherwise. 1747 1748 Test whether to execute a <tt>single</tt> construct. 1749 There are no implicit barriers in the two "single" calls, rather the compiler 1750 should introduce an explicit barrier if it is required. 1751 */ 1752 1753 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1754 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1755 1756 if (rc) { 1757 // We are going to execute the single statement, so we should count it. 1758 KMP_COUNT_BLOCK(OMP_SINGLE); 1759 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1760 } 1761 1762 #if OMPT_SUPPORT && OMPT_OPTIONAL 1763 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1764 kmp_team_t *team = this_thr->th.th_team; 1765 int tid = __kmp_tid_from_gtid(global_tid); 1766 1767 if (ompt_enabled.enabled) { 1768 if (rc) { 1769 if (ompt_enabled.ompt_callback_work) { 1770 ompt_callbacks.ompt_callback(ompt_callback_work)( 1771 ompt_work_single_executor, ompt_scope_begin, 1772 &(team->t.ompt_team_info.parallel_data), 1773 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1774 1, OMPT_GET_RETURN_ADDRESS(0)); 1775 } 1776 } else { 1777 if (ompt_enabled.ompt_callback_work) { 1778 ompt_callbacks.ompt_callback(ompt_callback_work)( 1779 ompt_work_single_other, ompt_scope_begin, 1780 &(team->t.ompt_team_info.parallel_data), 1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1782 1, OMPT_GET_RETURN_ADDRESS(0)); 1783 ompt_callbacks.ompt_callback(ompt_callback_work)( 1784 ompt_work_single_other, ompt_scope_end, 1785 &(team->t.ompt_team_info.parallel_data), 1786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1787 1, OMPT_GET_RETURN_ADDRESS(0)); 1788 } 1789 } 1790 } 1791 #endif 1792 1793 return rc; 1794 } 1795 1796 /*! 1797 @ingroup WORK_SHARING 1798 @param loc source location information 1799 @param global_tid global thread number 1800 1801 Mark the end of a <tt>single</tt> construct. This function should 1802 only be called by the thread that executed the block of code protected 1803 by the `single` construct. 1804 */ 1805 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1806 __kmp_exit_single(global_tid); 1807 KMP_POP_PARTITIONED_TIMER(); 1808 1809 #if OMPT_SUPPORT && OMPT_OPTIONAL 1810 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1811 kmp_team_t *team = this_thr->th.th_team; 1812 int tid = __kmp_tid_from_gtid(global_tid); 1813 1814 if (ompt_enabled.ompt_callback_work) { 1815 ompt_callbacks.ompt_callback(ompt_callback_work)( 1816 ompt_work_single_executor, ompt_scope_end, 1817 &(team->t.ompt_team_info.parallel_data), 1818 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1819 OMPT_GET_RETURN_ADDRESS(0)); 1820 } 1821 #endif 1822 } 1823 1824 /*! 1825 @ingroup WORK_SHARING 1826 @param loc Source location 1827 @param global_tid Global thread id 1828 1829 Mark the end of a statically scheduled loop. 1830 */ 1831 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1832 KMP_POP_PARTITIONED_TIMER(); 1833 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1834 1835 #if OMPT_SUPPORT && OMPT_OPTIONAL 1836 if (ompt_enabled.ompt_callback_work) { 1837 ompt_work_t ompt_work_type = ompt_work_loop; 1838 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1839 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1840 // Determine workshare type 1841 if (loc != NULL) { 1842 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1843 ompt_work_type = ompt_work_loop; 1844 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1845 ompt_work_type = ompt_work_sections; 1846 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1847 ompt_work_type = ompt_work_distribute; 1848 } else { 1849 // use default set above. 1850 // a warning about this case is provided in __kmpc_for_static_init 1851 } 1852 KMP_DEBUG_ASSERT(ompt_work_type); 1853 } 1854 ompt_callbacks.ompt_callback(ompt_callback_work)( 1855 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1856 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1857 } 1858 #endif 1859 if (__kmp_env_consistency_check) 1860 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1861 } 1862 1863 // User routines which take C-style arguments (call by value) 1864 // different from the Fortran equivalent routines 1865 1866 void ompc_set_num_threads(int arg) { 1867 // !!!!! TODO: check the per-task binding 1868 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1869 } 1870 1871 void ompc_set_dynamic(int flag) { 1872 kmp_info_t *thread; 1873 1874 /* For the thread-private implementation of the internal controls */ 1875 thread = __kmp_entry_thread(); 1876 1877 __kmp_save_internal_controls(thread); 1878 1879 set__dynamic(thread, flag ? TRUE : FALSE); 1880 } 1881 1882 void ompc_set_nested(int flag) { 1883 kmp_info_t *thread; 1884 1885 /* For the thread-private internal controls implementation */ 1886 thread = __kmp_entry_thread(); 1887 1888 __kmp_save_internal_controls(thread); 1889 1890 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1891 } 1892 1893 void ompc_set_max_active_levels(int max_active_levels) { 1894 /* TO DO */ 1895 /* we want per-task implementation of this internal control */ 1896 1897 /* For the per-thread internal controls implementation */ 1898 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1899 } 1900 1901 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1902 // !!!!! TODO: check the per-task binding 1903 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1904 } 1905 1906 int ompc_get_ancestor_thread_num(int level) { 1907 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1908 } 1909 1910 int ompc_get_team_size(int level) { 1911 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1912 } 1913 1914 #if OMP_50_ENABLED 1915 /* OpenMP 5.0 Affinity Format API */ 1916 1917 void ompc_set_affinity_format(char const *format) { 1918 if (!__kmp_init_serial) { 1919 __kmp_serial_initialize(); 1920 } 1921 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1922 format, KMP_STRLEN(format) + 1); 1923 } 1924 1925 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1926 size_t format_size; 1927 if (!__kmp_init_serial) { 1928 __kmp_serial_initialize(); 1929 } 1930 format_size = KMP_STRLEN(__kmp_affinity_format); 1931 if (buffer && size) { 1932 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1933 format_size + 1); 1934 } 1935 return format_size; 1936 } 1937 1938 void ompc_display_affinity(char const *format) { 1939 int gtid; 1940 if (!TCR_4(__kmp_init_middle)) { 1941 __kmp_middle_initialize(); 1942 } 1943 gtid = __kmp_get_gtid(); 1944 __kmp_aux_display_affinity(gtid, format); 1945 } 1946 1947 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1948 char const *format) { 1949 int gtid; 1950 size_t num_required; 1951 kmp_str_buf_t capture_buf; 1952 if (!TCR_4(__kmp_init_middle)) { 1953 __kmp_middle_initialize(); 1954 } 1955 gtid = __kmp_get_gtid(); 1956 __kmp_str_buf_init(&capture_buf); 1957 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1958 if (buffer && buf_size) { 1959 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1960 capture_buf.used + 1); 1961 } 1962 __kmp_str_buf_free(&capture_buf); 1963 return num_required; 1964 } 1965 #endif /* OMP_50_ENABLED */ 1966 1967 void kmpc_set_stacksize(int arg) { 1968 // __kmp_aux_set_stacksize initializes the library if needed 1969 __kmp_aux_set_stacksize(arg); 1970 } 1971 1972 void kmpc_set_stacksize_s(size_t arg) { 1973 // __kmp_aux_set_stacksize initializes the library if needed 1974 __kmp_aux_set_stacksize(arg); 1975 } 1976 1977 void kmpc_set_blocktime(int arg) { 1978 int gtid, tid; 1979 kmp_info_t *thread; 1980 1981 gtid = __kmp_entry_gtid(); 1982 tid = __kmp_tid_from_gtid(gtid); 1983 thread = __kmp_thread_from_gtid(gtid); 1984 1985 __kmp_aux_set_blocktime(arg, thread, tid); 1986 } 1987 1988 void kmpc_set_library(int arg) { 1989 // __kmp_user_set_library initializes the library if needed 1990 __kmp_user_set_library((enum library_type)arg); 1991 } 1992 1993 void kmpc_set_defaults(char const *str) { 1994 // __kmp_aux_set_defaults initializes the library if needed 1995 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1996 } 1997 1998 void kmpc_set_disp_num_buffers(int arg) { 1999 // ignore after initialization because some teams have already 2000 // allocated dispatch buffers 2001 if (__kmp_init_serial == 0 && arg > 0) 2002 __kmp_dispatch_num_buffers = arg; 2003 } 2004 2005 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2006 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2007 return -1; 2008 #else 2009 if (!TCR_4(__kmp_init_middle)) { 2010 __kmp_middle_initialize(); 2011 } 2012 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2013 #endif 2014 } 2015 2016 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2017 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2018 return -1; 2019 #else 2020 if (!TCR_4(__kmp_init_middle)) { 2021 __kmp_middle_initialize(); 2022 } 2023 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2024 #endif 2025 } 2026 2027 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2028 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2029 return -1; 2030 #else 2031 if (!TCR_4(__kmp_init_middle)) { 2032 __kmp_middle_initialize(); 2033 } 2034 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2035 #endif 2036 } 2037 2038 /* -------------------------------------------------------------------------- */ 2039 /*! 2040 @ingroup THREADPRIVATE 2041 @param loc source location information 2042 @param gtid global thread number 2043 @param cpy_size size of the cpy_data buffer 2044 @param cpy_data pointer to data to be copied 2045 @param cpy_func helper function to call for copying data 2046 @param didit flag variable: 1=single thread; 0=not single thread 2047 2048 __kmpc_copyprivate implements the interface for the private data broadcast 2049 needed for the copyprivate clause associated with a single region in an 2050 OpenMP<sup>*</sup> program (both C and Fortran). 2051 All threads participating in the parallel region call this routine. 2052 One of the threads (called the single thread) should have the <tt>didit</tt> 2053 variable set to 1 and all other threads should have that variable set to 0. 2054 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2055 2056 The OpenMP specification forbids the use of nowait on the single region when a 2057 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2058 barrier internally to avoid race conditions, so the code generation for the 2059 single region should avoid generating a barrier after the call to @ref 2060 __kmpc_copyprivate. 2061 2062 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2063 The <tt>loc</tt> parameter is a pointer to source location information. 2064 2065 Internal implementation: The single thread will first copy its descriptor 2066 address (cpy_data) to a team-private location, then the other threads will each 2067 call the function pointed to by the parameter cpy_func, which carries out the 2068 copy by copying the data using the cpy_data buffer. 2069 2070 The cpy_func routine used for the copy and the contents of the data area defined 2071 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2072 to be done. For instance, the cpy_data buffer can hold the actual data to be 2073 copied or it may hold a list of pointers to the data. The cpy_func routine must 2074 interpret the cpy_data buffer appropriately. 2075 2076 The interface to cpy_func is as follows: 2077 @code 2078 void cpy_func( void *destination, void *source ) 2079 @endcode 2080 where void *destination is the cpy_data pointer for the thread being copied to 2081 and void *source is the cpy_data pointer for the thread being copied from. 2082 */ 2083 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2084 void *cpy_data, void (*cpy_func)(void *, void *), 2085 kmp_int32 didit) { 2086 void **data_ptr; 2087 2088 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2089 2090 KMP_MB(); 2091 2092 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2093 2094 if (__kmp_env_consistency_check) { 2095 if (loc == 0) { 2096 KMP_WARNING(ConstructIdentInvalid); 2097 } 2098 } 2099 2100 // ToDo: Optimize the following two barriers into some kind of split barrier 2101 2102 if (didit) 2103 *data_ptr = cpy_data; 2104 2105 #if OMPT_SUPPORT 2106 ompt_frame_t *ompt_frame; 2107 if (ompt_enabled.enabled) { 2108 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2109 if (ompt_frame->enter_frame.ptr == NULL) 2110 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2111 OMPT_STORE_RETURN_ADDRESS(gtid); 2112 } 2113 #endif 2114 /* This barrier is not a barrier region boundary */ 2115 #if USE_ITT_NOTIFY 2116 __kmp_threads[gtid]->th.th_ident = loc; 2117 #endif 2118 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2119 2120 if (!didit) 2121 (*cpy_func)(cpy_data, *data_ptr); 2122 2123 // Consider next barrier a user-visible barrier for barrier region boundaries 2124 // Nesting checks are already handled by the single construct checks 2125 2126 #if OMPT_SUPPORT 2127 if (ompt_enabled.enabled) { 2128 OMPT_STORE_RETURN_ADDRESS(gtid); 2129 } 2130 #endif 2131 #if USE_ITT_NOTIFY 2132 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2133 // tasks can overwrite the location) 2134 #endif 2135 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2136 #if OMPT_SUPPORT && OMPT_OPTIONAL 2137 if (ompt_enabled.enabled) { 2138 ompt_frame->enter_frame = ompt_data_none; 2139 } 2140 #endif 2141 } 2142 2143 /* -------------------------------------------------------------------------- */ 2144 2145 #define INIT_LOCK __kmp_init_user_lock_with_checks 2146 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2147 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2148 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2149 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2150 #define ACQUIRE_NESTED_LOCK_TIMED \ 2151 __kmp_acquire_nested_user_lock_with_checks_timed 2152 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2153 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2154 #define TEST_LOCK __kmp_test_user_lock_with_checks 2155 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2156 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2157 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2158 2159 // TODO: Make check abort messages use location info & pass it into 2160 // with_checks routines 2161 2162 #if KMP_USE_DYNAMIC_LOCK 2163 2164 // internal lock initializer 2165 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2166 kmp_dyna_lockseq_t seq) { 2167 if (KMP_IS_D_LOCK(seq)) { 2168 KMP_INIT_D_LOCK(lock, seq); 2169 #if USE_ITT_BUILD 2170 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2171 #endif 2172 } else { 2173 KMP_INIT_I_LOCK(lock, seq); 2174 #if USE_ITT_BUILD 2175 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2176 __kmp_itt_lock_creating(ilk->lock, loc); 2177 #endif 2178 } 2179 } 2180 2181 // internal nest lock initializer 2182 static __forceinline void 2183 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2184 kmp_dyna_lockseq_t seq) { 2185 #if KMP_USE_TSX 2186 // Don't have nested lock implementation for speculative locks 2187 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2188 seq = __kmp_user_lock_seq; 2189 #endif 2190 switch (seq) { 2191 case lockseq_tas: 2192 seq = lockseq_nested_tas; 2193 break; 2194 #if KMP_USE_FUTEX 2195 case lockseq_futex: 2196 seq = lockseq_nested_futex; 2197 break; 2198 #endif 2199 case lockseq_ticket: 2200 seq = lockseq_nested_ticket; 2201 break; 2202 case lockseq_queuing: 2203 seq = lockseq_nested_queuing; 2204 break; 2205 case lockseq_drdpa: 2206 seq = lockseq_nested_drdpa; 2207 break; 2208 default: 2209 seq = lockseq_nested_queuing; 2210 } 2211 KMP_INIT_I_LOCK(lock, seq); 2212 #if USE_ITT_BUILD 2213 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2214 __kmp_itt_lock_creating(ilk->lock, loc); 2215 #endif 2216 } 2217 2218 /* initialize the lock with a hint */ 2219 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2220 uintptr_t hint) { 2221 KMP_DEBUG_ASSERT(__kmp_init_serial); 2222 if (__kmp_env_consistency_check && user_lock == NULL) { 2223 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2224 } 2225 2226 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2227 2228 #if OMPT_SUPPORT && OMPT_OPTIONAL 2229 // This is the case, if called from omp_init_lock_with_hint: 2230 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2231 if (!codeptr) 2232 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2233 if (ompt_enabled.ompt_callback_lock_init) { 2234 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2235 ompt_mutex_lock, (omp_lock_hint_t)hint, 2236 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2237 codeptr); 2238 } 2239 #endif 2240 } 2241 2242 /* initialize the lock with a hint */ 2243 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2244 void **user_lock, uintptr_t hint) { 2245 KMP_DEBUG_ASSERT(__kmp_init_serial); 2246 if (__kmp_env_consistency_check && user_lock == NULL) { 2247 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2248 } 2249 2250 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2251 2252 #if OMPT_SUPPORT && OMPT_OPTIONAL 2253 // This is the case, if called from omp_init_lock_with_hint: 2254 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2255 if (!codeptr) 2256 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2257 if (ompt_enabled.ompt_callback_lock_init) { 2258 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2259 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2260 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2261 codeptr); 2262 } 2263 #endif 2264 } 2265 2266 #endif // KMP_USE_DYNAMIC_LOCK 2267 2268 /* initialize the lock */ 2269 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2270 #if KMP_USE_DYNAMIC_LOCK 2271 2272 KMP_DEBUG_ASSERT(__kmp_init_serial); 2273 if (__kmp_env_consistency_check && user_lock == NULL) { 2274 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2275 } 2276 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2277 2278 #if OMPT_SUPPORT && OMPT_OPTIONAL 2279 // This is the case, if called from omp_init_lock_with_hint: 2280 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2281 if (!codeptr) 2282 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2283 if (ompt_enabled.ompt_callback_lock_init) { 2284 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2285 ompt_mutex_lock, omp_lock_hint_none, 2286 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2287 codeptr); 2288 } 2289 #endif 2290 2291 #else // KMP_USE_DYNAMIC_LOCK 2292 2293 static char const *const func = "omp_init_lock"; 2294 kmp_user_lock_p lck; 2295 KMP_DEBUG_ASSERT(__kmp_init_serial); 2296 2297 if (__kmp_env_consistency_check) { 2298 if (user_lock == NULL) { 2299 KMP_FATAL(LockIsUninitialized, func); 2300 } 2301 } 2302 2303 KMP_CHECK_USER_LOCK_INIT(); 2304 2305 if ((__kmp_user_lock_kind == lk_tas) && 2306 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2307 lck = (kmp_user_lock_p)user_lock; 2308 } 2309 #if KMP_USE_FUTEX 2310 else if ((__kmp_user_lock_kind == lk_futex) && 2311 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2312 lck = (kmp_user_lock_p)user_lock; 2313 } 2314 #endif 2315 else { 2316 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2317 } 2318 INIT_LOCK(lck); 2319 __kmp_set_user_lock_location(lck, loc); 2320 2321 #if OMPT_SUPPORT && OMPT_OPTIONAL 2322 // This is the case, if called from omp_init_lock_with_hint: 2323 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2324 if (!codeptr) 2325 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2326 if (ompt_enabled.ompt_callback_lock_init) { 2327 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2328 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2329 (ompt_wait_id_t)user_lock, codeptr); 2330 } 2331 #endif 2332 2333 #if USE_ITT_BUILD 2334 __kmp_itt_lock_creating(lck); 2335 #endif /* USE_ITT_BUILD */ 2336 2337 #endif // KMP_USE_DYNAMIC_LOCK 2338 } // __kmpc_init_lock 2339 2340 /* initialize the lock */ 2341 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2342 #if KMP_USE_DYNAMIC_LOCK 2343 2344 KMP_DEBUG_ASSERT(__kmp_init_serial); 2345 if (__kmp_env_consistency_check && user_lock == NULL) { 2346 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2347 } 2348 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2349 2350 #if OMPT_SUPPORT && OMPT_OPTIONAL 2351 // This is the case, if called from omp_init_lock_with_hint: 2352 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2353 if (!codeptr) 2354 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2355 if (ompt_enabled.ompt_callback_lock_init) { 2356 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2357 ompt_mutex_nest_lock, omp_lock_hint_none, 2358 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2359 codeptr); 2360 } 2361 #endif 2362 2363 #else // KMP_USE_DYNAMIC_LOCK 2364 2365 static char const *const func = "omp_init_nest_lock"; 2366 kmp_user_lock_p lck; 2367 KMP_DEBUG_ASSERT(__kmp_init_serial); 2368 2369 if (__kmp_env_consistency_check) { 2370 if (user_lock == NULL) { 2371 KMP_FATAL(LockIsUninitialized, func); 2372 } 2373 } 2374 2375 KMP_CHECK_USER_LOCK_INIT(); 2376 2377 if ((__kmp_user_lock_kind == lk_tas) && 2378 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2379 OMP_NEST_LOCK_T_SIZE)) { 2380 lck = (kmp_user_lock_p)user_lock; 2381 } 2382 #if KMP_USE_FUTEX 2383 else if ((__kmp_user_lock_kind == lk_futex) && 2384 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2385 OMP_NEST_LOCK_T_SIZE)) { 2386 lck = (kmp_user_lock_p)user_lock; 2387 } 2388 #endif 2389 else { 2390 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2391 } 2392 2393 INIT_NESTED_LOCK(lck); 2394 __kmp_set_user_lock_location(lck, loc); 2395 2396 #if OMPT_SUPPORT && OMPT_OPTIONAL 2397 // This is the case, if called from omp_init_lock_with_hint: 2398 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2399 if (!codeptr) 2400 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2401 if (ompt_enabled.ompt_callback_lock_init) { 2402 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2403 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2404 (ompt_wait_id_t)user_lock, codeptr); 2405 } 2406 #endif 2407 2408 #if USE_ITT_BUILD 2409 __kmp_itt_lock_creating(lck); 2410 #endif /* USE_ITT_BUILD */ 2411 2412 #endif // KMP_USE_DYNAMIC_LOCK 2413 } // __kmpc_init_nest_lock 2414 2415 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2416 #if KMP_USE_DYNAMIC_LOCK 2417 2418 #if USE_ITT_BUILD 2419 kmp_user_lock_p lck; 2420 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2421 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2422 } else { 2423 lck = (kmp_user_lock_p)user_lock; 2424 } 2425 __kmp_itt_lock_destroyed(lck); 2426 #endif 2427 #if OMPT_SUPPORT && OMPT_OPTIONAL 2428 // This is the case, if called from omp_init_lock_with_hint: 2429 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2430 if (!codeptr) 2431 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2432 if (ompt_enabled.ompt_callback_lock_destroy) { 2433 kmp_user_lock_p lck; 2434 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2435 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2436 } else { 2437 lck = (kmp_user_lock_p)user_lock; 2438 } 2439 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2440 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2441 } 2442 #endif 2443 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2444 #else 2445 kmp_user_lock_p lck; 2446 2447 if ((__kmp_user_lock_kind == lk_tas) && 2448 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2449 lck = (kmp_user_lock_p)user_lock; 2450 } 2451 #if KMP_USE_FUTEX 2452 else if ((__kmp_user_lock_kind == lk_futex) && 2453 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2454 lck = (kmp_user_lock_p)user_lock; 2455 } 2456 #endif 2457 else { 2458 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2459 } 2460 2461 #if OMPT_SUPPORT && OMPT_OPTIONAL 2462 // This is the case, if called from omp_init_lock_with_hint: 2463 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2464 if (!codeptr) 2465 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2466 if (ompt_enabled.ompt_callback_lock_destroy) { 2467 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2468 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2469 } 2470 #endif 2471 2472 #if USE_ITT_BUILD 2473 __kmp_itt_lock_destroyed(lck); 2474 #endif /* USE_ITT_BUILD */ 2475 DESTROY_LOCK(lck); 2476 2477 if ((__kmp_user_lock_kind == lk_tas) && 2478 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2479 ; 2480 } 2481 #if KMP_USE_FUTEX 2482 else if ((__kmp_user_lock_kind == lk_futex) && 2483 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2484 ; 2485 } 2486 #endif 2487 else { 2488 __kmp_user_lock_free(user_lock, gtid, lck); 2489 } 2490 #endif // KMP_USE_DYNAMIC_LOCK 2491 } // __kmpc_destroy_lock 2492 2493 /* destroy the lock */ 2494 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2495 #if KMP_USE_DYNAMIC_LOCK 2496 2497 #if USE_ITT_BUILD 2498 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2499 __kmp_itt_lock_destroyed(ilk->lock); 2500 #endif 2501 #if OMPT_SUPPORT && OMPT_OPTIONAL 2502 // This is the case, if called from omp_init_lock_with_hint: 2503 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2504 if (!codeptr) 2505 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2506 if (ompt_enabled.ompt_callback_lock_destroy) { 2507 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2508 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2509 } 2510 #endif 2511 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2512 2513 #else // KMP_USE_DYNAMIC_LOCK 2514 2515 kmp_user_lock_p lck; 2516 2517 if ((__kmp_user_lock_kind == lk_tas) && 2518 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2519 OMP_NEST_LOCK_T_SIZE)) { 2520 lck = (kmp_user_lock_p)user_lock; 2521 } 2522 #if KMP_USE_FUTEX 2523 else if ((__kmp_user_lock_kind == lk_futex) && 2524 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2525 OMP_NEST_LOCK_T_SIZE)) { 2526 lck = (kmp_user_lock_p)user_lock; 2527 } 2528 #endif 2529 else { 2530 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2531 } 2532 2533 #if OMPT_SUPPORT && OMPT_OPTIONAL 2534 // This is the case, if called from omp_init_lock_with_hint: 2535 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2536 if (!codeptr) 2537 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2538 if (ompt_enabled.ompt_callback_lock_destroy) { 2539 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2540 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2541 } 2542 #endif 2543 2544 #if USE_ITT_BUILD 2545 __kmp_itt_lock_destroyed(lck); 2546 #endif /* USE_ITT_BUILD */ 2547 2548 DESTROY_NESTED_LOCK(lck); 2549 2550 if ((__kmp_user_lock_kind == lk_tas) && 2551 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2552 OMP_NEST_LOCK_T_SIZE)) { 2553 ; 2554 } 2555 #if KMP_USE_FUTEX 2556 else if ((__kmp_user_lock_kind == lk_futex) && 2557 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2558 OMP_NEST_LOCK_T_SIZE)) { 2559 ; 2560 } 2561 #endif 2562 else { 2563 __kmp_user_lock_free(user_lock, gtid, lck); 2564 } 2565 #endif // KMP_USE_DYNAMIC_LOCK 2566 } // __kmpc_destroy_nest_lock 2567 2568 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2569 KMP_COUNT_BLOCK(OMP_set_lock); 2570 #if KMP_USE_DYNAMIC_LOCK 2571 int tag = KMP_EXTRACT_D_TAG(user_lock); 2572 #if USE_ITT_BUILD 2573 __kmp_itt_lock_acquiring( 2574 (kmp_user_lock_p) 2575 user_lock); // itt function will get to the right lock object. 2576 #endif 2577 #if OMPT_SUPPORT && OMPT_OPTIONAL 2578 // This is the case, if called from omp_init_lock_with_hint: 2579 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2580 if (!codeptr) 2581 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2582 if (ompt_enabled.ompt_callback_mutex_acquire) { 2583 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2584 ompt_mutex_lock, omp_lock_hint_none, 2585 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2586 codeptr); 2587 } 2588 #endif 2589 #if KMP_USE_INLINED_TAS 2590 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2591 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2592 } else 2593 #elif KMP_USE_INLINED_FUTEX 2594 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2595 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2596 } else 2597 #endif 2598 { 2599 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2600 } 2601 #if USE_ITT_BUILD 2602 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2603 #endif 2604 #if OMPT_SUPPORT && OMPT_OPTIONAL 2605 if (ompt_enabled.ompt_callback_mutex_acquired) { 2606 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2607 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2608 } 2609 #endif 2610 2611 #else // KMP_USE_DYNAMIC_LOCK 2612 2613 kmp_user_lock_p lck; 2614 2615 if ((__kmp_user_lock_kind == lk_tas) && 2616 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2617 lck = (kmp_user_lock_p)user_lock; 2618 } 2619 #if KMP_USE_FUTEX 2620 else if ((__kmp_user_lock_kind == lk_futex) && 2621 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2622 lck = (kmp_user_lock_p)user_lock; 2623 } 2624 #endif 2625 else { 2626 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2627 } 2628 2629 #if USE_ITT_BUILD 2630 __kmp_itt_lock_acquiring(lck); 2631 #endif /* USE_ITT_BUILD */ 2632 #if OMPT_SUPPORT && OMPT_OPTIONAL 2633 // This is the case, if called from omp_init_lock_with_hint: 2634 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2635 if (!codeptr) 2636 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2637 if (ompt_enabled.ompt_callback_mutex_acquire) { 2638 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2639 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2640 (ompt_wait_id_t)lck, codeptr); 2641 } 2642 #endif 2643 2644 ACQUIRE_LOCK(lck, gtid); 2645 2646 #if USE_ITT_BUILD 2647 __kmp_itt_lock_acquired(lck); 2648 #endif /* USE_ITT_BUILD */ 2649 2650 #if OMPT_SUPPORT && OMPT_OPTIONAL 2651 if (ompt_enabled.ompt_callback_mutex_acquired) { 2652 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2653 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2654 } 2655 #endif 2656 2657 #endif // KMP_USE_DYNAMIC_LOCK 2658 } 2659 2660 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2661 #if KMP_USE_DYNAMIC_LOCK 2662 2663 #if USE_ITT_BUILD 2664 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2665 #endif 2666 #if OMPT_SUPPORT && OMPT_OPTIONAL 2667 // This is the case, if called from omp_init_lock_with_hint: 2668 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2669 if (!codeptr) 2670 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2671 if (ompt_enabled.enabled) { 2672 if (ompt_enabled.ompt_callback_mutex_acquire) { 2673 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2674 ompt_mutex_nest_lock, omp_lock_hint_none, 2675 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2676 codeptr); 2677 } 2678 } 2679 #endif 2680 int acquire_status = 2681 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2682 (void) acquire_status; 2683 #if USE_ITT_BUILD 2684 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2685 #endif 2686 2687 #if OMPT_SUPPORT && OMPT_OPTIONAL 2688 if (ompt_enabled.enabled) { 2689 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2690 if (ompt_enabled.ompt_callback_mutex_acquired) { 2691 // lock_first 2692 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2693 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2694 } 2695 } else { 2696 if (ompt_enabled.ompt_callback_nest_lock) { 2697 // lock_next 2698 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2699 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); 2700 } 2701 } 2702 } 2703 #endif 2704 2705 #else // KMP_USE_DYNAMIC_LOCK 2706 int acquire_status; 2707 kmp_user_lock_p lck; 2708 2709 if ((__kmp_user_lock_kind == lk_tas) && 2710 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2711 OMP_NEST_LOCK_T_SIZE)) { 2712 lck = (kmp_user_lock_p)user_lock; 2713 } 2714 #if KMP_USE_FUTEX 2715 else if ((__kmp_user_lock_kind == lk_futex) && 2716 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2717 OMP_NEST_LOCK_T_SIZE)) { 2718 lck = (kmp_user_lock_p)user_lock; 2719 } 2720 #endif 2721 else { 2722 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2723 } 2724 2725 #if USE_ITT_BUILD 2726 __kmp_itt_lock_acquiring(lck); 2727 #endif /* USE_ITT_BUILD */ 2728 #if OMPT_SUPPORT && OMPT_OPTIONAL 2729 // This is the case, if called from omp_init_lock_with_hint: 2730 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2731 if (!codeptr) 2732 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2733 if (ompt_enabled.enabled) { 2734 if (ompt_enabled.ompt_callback_mutex_acquire) { 2735 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2736 ompt_mutex_nest_lock, omp_lock_hint_none, 2737 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); 2738 } 2739 } 2740 #endif 2741 2742 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2743 2744 #if USE_ITT_BUILD 2745 __kmp_itt_lock_acquired(lck); 2746 #endif /* USE_ITT_BUILD */ 2747 2748 #if OMPT_SUPPORT && OMPT_OPTIONAL 2749 if (ompt_enabled.enabled) { 2750 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2751 if (ompt_enabled.ompt_callback_mutex_acquired) { 2752 // lock_first 2753 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2754 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2755 } 2756 } else { 2757 if (ompt_enabled.ompt_callback_nest_lock) { 2758 // lock_next 2759 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2760 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); 2761 } 2762 } 2763 } 2764 #endif 2765 2766 #endif // KMP_USE_DYNAMIC_LOCK 2767 } 2768 2769 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2770 #if KMP_USE_DYNAMIC_LOCK 2771 2772 int tag = KMP_EXTRACT_D_TAG(user_lock); 2773 #if USE_ITT_BUILD 2774 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2775 #endif 2776 #if KMP_USE_INLINED_TAS 2777 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2778 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2779 } else 2780 #elif KMP_USE_INLINED_FUTEX 2781 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2782 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2783 } else 2784 #endif 2785 { 2786 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2787 } 2788 2789 #if OMPT_SUPPORT && OMPT_OPTIONAL 2790 // This is the case, if called from omp_init_lock_with_hint: 2791 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2792 if (!codeptr) 2793 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2794 if (ompt_enabled.ompt_callback_mutex_released) { 2795 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2796 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2797 } 2798 #endif 2799 2800 #else // KMP_USE_DYNAMIC_LOCK 2801 2802 kmp_user_lock_p lck; 2803 2804 /* Can't use serial interval since not block structured */ 2805 /* release the lock */ 2806 2807 if ((__kmp_user_lock_kind == lk_tas) && 2808 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2809 #if KMP_OS_LINUX && \ 2810 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2811 // "fast" path implemented to fix customer performance issue 2812 #if USE_ITT_BUILD 2813 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2814 #endif /* USE_ITT_BUILD */ 2815 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2816 KMP_MB(); 2817 2818 #if OMPT_SUPPORT && OMPT_OPTIONAL 2819 // This is the case, if called from omp_init_lock_with_hint: 2820 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2821 if (!codeptr) 2822 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2823 if (ompt_enabled.ompt_callback_mutex_released) { 2824 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2825 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2826 } 2827 #endif 2828 2829 return; 2830 #else 2831 lck = (kmp_user_lock_p)user_lock; 2832 #endif 2833 } 2834 #if KMP_USE_FUTEX 2835 else if ((__kmp_user_lock_kind == lk_futex) && 2836 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2837 lck = (kmp_user_lock_p)user_lock; 2838 } 2839 #endif 2840 else { 2841 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2842 } 2843 2844 #if USE_ITT_BUILD 2845 __kmp_itt_lock_releasing(lck); 2846 #endif /* USE_ITT_BUILD */ 2847 2848 RELEASE_LOCK(lck, gtid); 2849 2850 #if OMPT_SUPPORT && OMPT_OPTIONAL 2851 // This is the case, if called from omp_init_lock_with_hint: 2852 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2853 if (!codeptr) 2854 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2855 if (ompt_enabled.ompt_callback_mutex_released) { 2856 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2857 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2858 } 2859 #endif 2860 2861 #endif // KMP_USE_DYNAMIC_LOCK 2862 } 2863 2864 /* release the lock */ 2865 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2866 #if KMP_USE_DYNAMIC_LOCK 2867 2868 #if USE_ITT_BUILD 2869 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2870 #endif 2871 int release_status = 2872 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2873 (void) release_status; 2874 2875 #if OMPT_SUPPORT && OMPT_OPTIONAL 2876 // This is the case, if called from omp_init_lock_with_hint: 2877 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2878 if (!codeptr) 2879 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2880 if (ompt_enabled.enabled) { 2881 if (release_status == KMP_LOCK_RELEASED) { 2882 if (ompt_enabled.ompt_callback_mutex_released) { 2883 // release_lock_last 2884 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2885 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2886 } 2887 } else if (ompt_enabled.ompt_callback_nest_lock) { 2888 // release_lock_prev 2889 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2890 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); 2891 } 2892 } 2893 #endif 2894 2895 #else // KMP_USE_DYNAMIC_LOCK 2896 2897 kmp_user_lock_p lck; 2898 2899 /* Can't use serial interval since not block structured */ 2900 2901 if ((__kmp_user_lock_kind == lk_tas) && 2902 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2903 OMP_NEST_LOCK_T_SIZE)) { 2904 #if KMP_OS_LINUX && \ 2905 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2906 // "fast" path implemented to fix customer performance issue 2907 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2908 #if USE_ITT_BUILD 2909 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2910 #endif /* USE_ITT_BUILD */ 2911 2912 #if OMPT_SUPPORT && OMPT_OPTIONAL 2913 int release_status = KMP_LOCK_STILL_HELD; 2914 #endif 2915 2916 if (--(tl->lk.depth_locked) == 0) { 2917 TCW_4(tl->lk.poll, 0); 2918 #if OMPT_SUPPORT && OMPT_OPTIONAL 2919 release_status = KMP_LOCK_RELEASED; 2920 #endif 2921 } 2922 KMP_MB(); 2923 2924 #if OMPT_SUPPORT && OMPT_OPTIONAL 2925 // This is the case, if called from omp_init_lock_with_hint: 2926 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2927 if (!codeptr) 2928 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2929 if (ompt_enabled.enabled) { 2930 if (release_status == KMP_LOCK_RELEASED) { 2931 if (ompt_enabled.ompt_callback_mutex_released) { 2932 // release_lock_last 2933 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2934 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2935 } 2936 } else if (ompt_enabled.ompt_callback_nest_lock) { 2937 // release_lock_previous 2938 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2939 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); 2940 } 2941 } 2942 #endif 2943 2944 return; 2945 #else 2946 lck = (kmp_user_lock_p)user_lock; 2947 #endif 2948 } 2949 #if KMP_USE_FUTEX 2950 else if ((__kmp_user_lock_kind == lk_futex) && 2951 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2952 OMP_NEST_LOCK_T_SIZE)) { 2953 lck = (kmp_user_lock_p)user_lock; 2954 } 2955 #endif 2956 else { 2957 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2958 } 2959 2960 #if USE_ITT_BUILD 2961 __kmp_itt_lock_releasing(lck); 2962 #endif /* USE_ITT_BUILD */ 2963 2964 int release_status; 2965 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2966 #if OMPT_SUPPORT && OMPT_OPTIONAL 2967 // This is the case, if called from omp_init_lock_with_hint: 2968 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2969 if (!codeptr) 2970 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2971 if (ompt_enabled.enabled) { 2972 if (release_status == KMP_LOCK_RELEASED) { 2973 if (ompt_enabled.ompt_callback_mutex_released) { 2974 // release_lock_last 2975 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2976 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2977 } 2978 } else if (ompt_enabled.ompt_callback_nest_lock) { 2979 // release_lock_previous 2980 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2981 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); 2982 } 2983 } 2984 #endif 2985 2986 #endif // KMP_USE_DYNAMIC_LOCK 2987 } 2988 2989 /* try to acquire the lock */ 2990 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2991 KMP_COUNT_BLOCK(OMP_test_lock); 2992 2993 #if KMP_USE_DYNAMIC_LOCK 2994 int rc; 2995 int tag = KMP_EXTRACT_D_TAG(user_lock); 2996 #if USE_ITT_BUILD 2997 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2998 #endif 2999 #if OMPT_SUPPORT && OMPT_OPTIONAL 3000 // This is the case, if called from omp_init_lock_with_hint: 3001 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3002 if (!codeptr) 3003 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3004 if (ompt_enabled.ompt_callback_mutex_acquire) { 3005 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3006 ompt_mutex_lock, omp_lock_hint_none, 3007 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 3008 codeptr); 3009 } 3010 #endif 3011 #if KMP_USE_INLINED_TAS 3012 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3013 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3014 } else 3015 #elif KMP_USE_INLINED_FUTEX 3016 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3017 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3018 } else 3019 #endif 3020 { 3021 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3022 } 3023 if (rc) { 3024 #if USE_ITT_BUILD 3025 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3026 #endif 3027 #if OMPT_SUPPORT && OMPT_OPTIONAL 3028 if (ompt_enabled.ompt_callback_mutex_acquired) { 3029 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3030 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 3031 } 3032 #endif 3033 return FTN_TRUE; 3034 } else { 3035 #if USE_ITT_BUILD 3036 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3037 #endif 3038 return FTN_FALSE; 3039 } 3040 3041 #else // KMP_USE_DYNAMIC_LOCK 3042 3043 kmp_user_lock_p lck; 3044 int rc; 3045 3046 if ((__kmp_user_lock_kind == lk_tas) && 3047 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3048 lck = (kmp_user_lock_p)user_lock; 3049 } 3050 #if KMP_USE_FUTEX 3051 else if ((__kmp_user_lock_kind == lk_futex) && 3052 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3053 lck = (kmp_user_lock_p)user_lock; 3054 } 3055 #endif 3056 else { 3057 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3058 } 3059 3060 #if USE_ITT_BUILD 3061 __kmp_itt_lock_acquiring(lck); 3062 #endif /* USE_ITT_BUILD */ 3063 #if OMPT_SUPPORT && OMPT_OPTIONAL 3064 // This is the case, if called from omp_init_lock_with_hint: 3065 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3066 if (!codeptr) 3067 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3068 if (ompt_enabled.ompt_callback_mutex_acquire) { 3069 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3070 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3071 (ompt_wait_id_t)lck, codeptr); 3072 } 3073 #endif 3074 3075 rc = TEST_LOCK(lck, gtid); 3076 #if USE_ITT_BUILD 3077 if (rc) { 3078 __kmp_itt_lock_acquired(lck); 3079 } else { 3080 __kmp_itt_lock_cancelled(lck); 3081 } 3082 #endif /* USE_ITT_BUILD */ 3083 #if OMPT_SUPPORT && OMPT_OPTIONAL 3084 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3086 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 3087 } 3088 #endif 3089 3090 return (rc ? FTN_TRUE : FTN_FALSE); 3091 3092 /* Can't use serial interval since not block structured */ 3093 3094 #endif // KMP_USE_DYNAMIC_LOCK 3095 } 3096 3097 /* try to acquire the lock */ 3098 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3099 #if KMP_USE_DYNAMIC_LOCK 3100 int rc; 3101 #if USE_ITT_BUILD 3102 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3103 #endif 3104 #if OMPT_SUPPORT && OMPT_OPTIONAL 3105 // This is the case, if called from omp_init_lock_with_hint: 3106 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3107 if (!codeptr) 3108 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3109 if (ompt_enabled.ompt_callback_mutex_acquire) { 3110 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3111 ompt_mutex_nest_lock, omp_lock_hint_none, 3112 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 3113 codeptr); 3114 } 3115 #endif 3116 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3117 #if USE_ITT_BUILD 3118 if (rc) { 3119 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3120 } else { 3121 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3122 } 3123 #endif 3124 #if OMPT_SUPPORT && OMPT_OPTIONAL 3125 if (ompt_enabled.enabled && rc) { 3126 if (rc == 1) { 3127 if (ompt_enabled.ompt_callback_mutex_acquired) { 3128 // lock_first 3129 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3130 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 3131 } 3132 } else { 3133 if (ompt_enabled.ompt_callback_nest_lock) { 3134 // lock_next 3135 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3136 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); 3137 } 3138 } 3139 } 3140 #endif 3141 return rc; 3142 3143 #else // KMP_USE_DYNAMIC_LOCK 3144 3145 kmp_user_lock_p lck; 3146 int rc; 3147 3148 if ((__kmp_user_lock_kind == lk_tas) && 3149 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3150 OMP_NEST_LOCK_T_SIZE)) { 3151 lck = (kmp_user_lock_p)user_lock; 3152 } 3153 #if KMP_USE_FUTEX 3154 else if ((__kmp_user_lock_kind == lk_futex) && 3155 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3156 OMP_NEST_LOCK_T_SIZE)) { 3157 lck = (kmp_user_lock_p)user_lock; 3158 } 3159 #endif 3160 else { 3161 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3162 } 3163 3164 #if USE_ITT_BUILD 3165 __kmp_itt_lock_acquiring(lck); 3166 #endif /* USE_ITT_BUILD */ 3167 3168 #if OMPT_SUPPORT && OMPT_OPTIONAL 3169 // This is the case, if called from omp_init_lock_with_hint: 3170 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3171 if (!codeptr) 3172 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3173 if (ompt_enabled.enabled) && 3174 ompt_enabled.ompt_callback_mutex_acquire) { 3175 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3176 ompt_mutex_nest_lock, omp_lock_hint_none, 3177 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); 3178 } 3179 #endif 3180 3181 rc = TEST_NESTED_LOCK(lck, gtid); 3182 #if USE_ITT_BUILD 3183 if (rc) { 3184 __kmp_itt_lock_acquired(lck); 3185 } else { 3186 __kmp_itt_lock_cancelled(lck); 3187 } 3188 #endif /* USE_ITT_BUILD */ 3189 #if OMPT_SUPPORT && OMPT_OPTIONAL 3190 if (ompt_enabled.enabled && rc) { 3191 if (rc == 1) { 3192 if (ompt_enabled.ompt_callback_mutex_acquired) { 3193 // lock_first 3194 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3195 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 3196 } 3197 } else { 3198 if (ompt_enabled.ompt_callback_nest_lock) { 3199 // lock_next 3200 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3201 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); 3202 } 3203 } 3204 } 3205 #endif 3206 return rc; 3207 3208 /* Can't use serial interval since not block structured */ 3209 3210 #endif // KMP_USE_DYNAMIC_LOCK 3211 } 3212 3213 // Interface to fast scalable reduce methods routines 3214 3215 // keep the selected method in a thread local structure for cross-function 3216 // usage: will be used in __kmpc_end_reduce* functions; 3217 // another solution: to re-determine the method one more time in 3218 // __kmpc_end_reduce* functions (new prototype required then) 3219 // AT: which solution is better? 3220 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3221 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3222 3223 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3224 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3225 3226 // description of the packed_reduction_method variable: look at the macros in 3227 // kmp.h 3228 3229 // used in a critical section reduce block 3230 static __forceinline void 3231 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3232 kmp_critical_name *crit) { 3233 3234 // this lock was visible to a customer and to the threading profile tool as a 3235 // serial overhead span (although it's used for an internal purpose only) 3236 // why was it visible in previous implementation? 3237 // should we keep it visible in new reduce block? 3238 kmp_user_lock_p lck; 3239 3240 #if KMP_USE_DYNAMIC_LOCK 3241 3242 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3243 // Check if it is initialized. 3244 if (*lk == 0) { 3245 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3246 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3247 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3248 } else { 3249 __kmp_init_indirect_csptr(crit, loc, global_tid, 3250 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3251 } 3252 } 3253 // Branch for accessing the actual lock object and set operation. This 3254 // branching is inevitable since this lock initialization does not follow the 3255 // normal dispatch path (lock table is not used). 3256 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3257 lck = (kmp_user_lock_p)lk; 3258 KMP_DEBUG_ASSERT(lck != NULL); 3259 if (__kmp_env_consistency_check) { 3260 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3261 } 3262 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3263 } else { 3264 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3265 lck = ilk->lock; 3266 KMP_DEBUG_ASSERT(lck != NULL); 3267 if (__kmp_env_consistency_check) { 3268 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3269 } 3270 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3271 } 3272 3273 #else // KMP_USE_DYNAMIC_LOCK 3274 3275 // We know that the fast reduction code is only emitted by Intel compilers 3276 // with 32 byte critical sections. If there isn't enough space, then we 3277 // have to use a pointer. 3278 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3279 lck = (kmp_user_lock_p)crit; 3280 } else { 3281 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3282 } 3283 KMP_DEBUG_ASSERT(lck != NULL); 3284 3285 if (__kmp_env_consistency_check) 3286 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3287 3288 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3289 3290 #endif // KMP_USE_DYNAMIC_LOCK 3291 } 3292 3293 // used in a critical section reduce block 3294 static __forceinline void 3295 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3296 kmp_critical_name *crit) { 3297 3298 kmp_user_lock_p lck; 3299 3300 #if KMP_USE_DYNAMIC_LOCK 3301 3302 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3303 lck = (kmp_user_lock_p)crit; 3304 if (__kmp_env_consistency_check) 3305 __kmp_pop_sync(global_tid, ct_critical, loc); 3306 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3307 } else { 3308 kmp_indirect_lock_t *ilk = 3309 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3310 if (__kmp_env_consistency_check) 3311 __kmp_pop_sync(global_tid, ct_critical, loc); 3312 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3313 } 3314 3315 #else // KMP_USE_DYNAMIC_LOCK 3316 3317 // We know that the fast reduction code is only emitted by Intel compilers 3318 // with 32 byte critical sections. If there isn't enough space, then we have 3319 // to use a pointer. 3320 if (__kmp_base_user_lock_size > 32) { 3321 lck = *((kmp_user_lock_p *)crit); 3322 KMP_ASSERT(lck != NULL); 3323 } else { 3324 lck = (kmp_user_lock_p)crit; 3325 } 3326 3327 if (__kmp_env_consistency_check) 3328 __kmp_pop_sync(global_tid, ct_critical, loc); 3329 3330 __kmp_release_user_lock_with_checks(lck, global_tid); 3331 3332 #endif // KMP_USE_DYNAMIC_LOCK 3333 } // __kmp_end_critical_section_reduce_block 3334 3335 #if OMP_40_ENABLED 3336 static __forceinline int 3337 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3338 int *task_state) { 3339 kmp_team_t *team; 3340 3341 // Check if we are inside the teams construct? 3342 if (th->th.th_teams_microtask) { 3343 *team_p = team = th->th.th_team; 3344 if (team->t.t_level == th->th.th_teams_level) { 3345 // This is reduction at teams construct. 3346 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3347 // Let's swap teams temporarily for the reduction. 3348 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3349 th->th.th_team = team->t.t_parent; 3350 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3351 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3352 *task_state = th->th.th_task_state; 3353 th->th.th_task_state = 0; 3354 3355 return 1; 3356 } 3357 } 3358 return 0; 3359 } 3360 3361 static __forceinline void 3362 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3363 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3364 th->th.th_info.ds.ds_tid = 0; 3365 th->th.th_team = team; 3366 th->th.th_team_nproc = team->t.t_nproc; 3367 th->th.th_task_team = team->t.t_task_team[task_state]; 3368 th->th.th_task_state = task_state; 3369 } 3370 #endif 3371 3372 /* 2.a.i. Reduce Block without a terminating barrier */ 3373 /*! 3374 @ingroup SYNCHRONIZATION 3375 @param loc source location information 3376 @param global_tid global thread number 3377 @param num_vars number of items (variables) to be reduced 3378 @param reduce_size size of data in bytes to be reduced 3379 @param reduce_data pointer to data to be reduced 3380 @param reduce_func callback function providing reduction operation on two 3381 operands and returning result of reduction in lhs_data 3382 @param lck pointer to the unique lock data structure 3383 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3384 threads if atomic reduction needed 3385 3386 The nowait version is used for a reduce clause with the nowait argument. 3387 */ 3388 kmp_int32 3389 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3390 size_t reduce_size, void *reduce_data, 3391 void (*reduce_func)(void *lhs_data, void *rhs_data), 3392 kmp_critical_name *lck) { 3393 3394 KMP_COUNT_BLOCK(REDUCE_nowait); 3395 int retval = 0; 3396 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3397 #if OMP_40_ENABLED 3398 kmp_info_t *th; 3399 kmp_team_t *team; 3400 int teams_swapped = 0, task_state; 3401 #endif 3402 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3403 3404 // why do we need this initialization here at all? 3405 // Reduction clause can not be used as a stand-alone directive. 3406 3407 // do not call __kmp_serial_initialize(), it will be called by 3408 // __kmp_parallel_initialize() if needed 3409 // possible detection of false-positive race by the threadchecker ??? 3410 if (!TCR_4(__kmp_init_parallel)) 3411 __kmp_parallel_initialize(); 3412 3413 #if OMP_50_ENABLED 3414 __kmp_resume_if_soft_paused(); 3415 #endif 3416 3417 // check correctness of reduce block nesting 3418 #if KMP_USE_DYNAMIC_LOCK 3419 if (__kmp_env_consistency_check) 3420 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3421 #else 3422 if (__kmp_env_consistency_check) 3423 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3424 #endif 3425 3426 #if OMP_40_ENABLED 3427 th = __kmp_thread_from_gtid(global_tid); 3428 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3429 #endif // OMP_40_ENABLED 3430 3431 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3432 // the value should be kept in a variable 3433 // the variable should be either a construct-specific or thread-specific 3434 // property, not a team specific property 3435 // (a thread can reach the next reduce block on the next construct, reduce 3436 // method may differ on the next construct) 3437 // an ident_t "loc" parameter could be used as a construct-specific property 3438 // (what if loc == 0?) 3439 // (if both construct-specific and team-specific variables were shared, 3440 // then unness extra syncs should be needed) 3441 // a thread-specific variable is better regarding two issues above (next 3442 // construct and extra syncs) 3443 // a thread-specific "th_local.reduction_method" variable is used currently 3444 // each thread executes 'determine' and 'set' lines (no need to execute by one 3445 // thread, to avoid unness extra syncs) 3446 3447 packed_reduction_method = __kmp_determine_reduction_method( 3448 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3449 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3450 3451 if (packed_reduction_method == critical_reduce_block) { 3452 3453 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3454 retval = 1; 3455 3456 } else if (packed_reduction_method == empty_reduce_block) { 3457 3458 // usage: if team size == 1, no synchronization is required ( Intel 3459 // platforms only ) 3460 retval = 1; 3461 3462 } else if (packed_reduction_method == atomic_reduce_block) { 3463 3464 retval = 2; 3465 3466 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3467 // won't be called by the code gen) 3468 // (it's not quite good, because the checking block has been closed by 3469 // this 'pop', 3470 // but atomic operation has not been executed yet, will be executed 3471 // slightly later, literally on next instruction) 3472 if (__kmp_env_consistency_check) 3473 __kmp_pop_sync(global_tid, ct_reduce, loc); 3474 3475 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3476 tree_reduce_block)) { 3477 3478 // AT: performance issue: a real barrier here 3479 // AT: (if master goes slow, other threads are blocked here waiting for the 3480 // master to come and release them) 3481 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3482 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3483 // be confusing to a customer) 3484 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3485 // might go faster and be more in line with sense of NOWAIT 3486 // AT: TO DO: do epcc test and compare times 3487 3488 // this barrier should be invisible to a customer and to the threading profile 3489 // tool (it's neither a terminating barrier nor customer's code, it's 3490 // used for an internal purpose) 3491 #if OMPT_SUPPORT 3492 // JP: can this barrier potentially leed to task scheduling? 3493 // JP: as long as there is a barrier in the implementation, OMPT should and 3494 // will provide the barrier events 3495 // so we set-up the necessary frame/return addresses. 3496 ompt_frame_t *ompt_frame; 3497 if (ompt_enabled.enabled) { 3498 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3499 if (ompt_frame->enter_frame.ptr == NULL) 3500 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3501 OMPT_STORE_RETURN_ADDRESS(global_tid); 3502 } 3503 #endif 3504 #if USE_ITT_NOTIFY 3505 __kmp_threads[global_tid]->th.th_ident = loc; 3506 #endif 3507 retval = 3508 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3509 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3510 retval = (retval != 0) ? (0) : (1); 3511 #if OMPT_SUPPORT && OMPT_OPTIONAL 3512 if (ompt_enabled.enabled) { 3513 ompt_frame->enter_frame = ompt_data_none; 3514 } 3515 #endif 3516 3517 // all other workers except master should do this pop here 3518 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3519 if (__kmp_env_consistency_check) { 3520 if (retval == 0) { 3521 __kmp_pop_sync(global_tid, ct_reduce, loc); 3522 } 3523 } 3524 3525 } else { 3526 3527 // should never reach this block 3528 KMP_ASSERT(0); // "unexpected method" 3529 } 3530 #if OMP_40_ENABLED 3531 if (teams_swapped) { 3532 __kmp_restore_swapped_teams(th, team, task_state); 3533 } 3534 #endif 3535 KA_TRACE( 3536 10, 3537 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3538 global_tid, packed_reduction_method, retval)); 3539 3540 return retval; 3541 } 3542 3543 /*! 3544 @ingroup SYNCHRONIZATION 3545 @param loc source location information 3546 @param global_tid global thread id. 3547 @param lck pointer to the unique lock data structure 3548 3549 Finish the execution of a reduce nowait. 3550 */ 3551 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3552 kmp_critical_name *lck) { 3553 3554 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3555 3556 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3557 3558 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3559 3560 if (packed_reduction_method == critical_reduce_block) { 3561 3562 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3563 3564 } else if (packed_reduction_method == empty_reduce_block) { 3565 3566 // usage: if team size == 1, no synchronization is required ( on Intel 3567 // platforms only ) 3568 3569 } else if (packed_reduction_method == atomic_reduce_block) { 3570 3571 // neither master nor other workers should get here 3572 // (code gen does not generate this call in case 2: atomic reduce block) 3573 // actually it's better to remove this elseif at all; 3574 // after removal this value will checked by the 'else' and will assert 3575 3576 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3577 tree_reduce_block)) { 3578 3579 // only master gets here 3580 3581 } else { 3582 3583 // should never reach this block 3584 KMP_ASSERT(0); // "unexpected method" 3585 } 3586 3587 if (__kmp_env_consistency_check) 3588 __kmp_pop_sync(global_tid, ct_reduce, loc); 3589 3590 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3591 global_tid, packed_reduction_method)); 3592 3593 return; 3594 } 3595 3596 /* 2.a.ii. Reduce Block with a terminating barrier */ 3597 3598 /*! 3599 @ingroup SYNCHRONIZATION 3600 @param loc source location information 3601 @param global_tid global thread number 3602 @param num_vars number of items (variables) to be reduced 3603 @param reduce_size size of data in bytes to be reduced 3604 @param reduce_data pointer to data to be reduced 3605 @param reduce_func callback function providing reduction operation on two 3606 operands and returning result of reduction in lhs_data 3607 @param lck pointer to the unique lock data structure 3608 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3609 threads if atomic reduction needed 3610 3611 A blocking reduce that includes an implicit barrier. 3612 */ 3613 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3614 size_t reduce_size, void *reduce_data, 3615 void (*reduce_func)(void *lhs_data, void *rhs_data), 3616 kmp_critical_name *lck) { 3617 KMP_COUNT_BLOCK(REDUCE_wait); 3618 int retval = 0; 3619 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3620 #if OMP_40_ENABLED 3621 kmp_info_t *th; 3622 kmp_team_t *team; 3623 int teams_swapped = 0, task_state; 3624 #endif 3625 3626 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3627 3628 // why do we need this initialization here at all? 3629 // Reduction clause can not be a stand-alone directive. 3630 3631 // do not call __kmp_serial_initialize(), it will be called by 3632 // __kmp_parallel_initialize() if needed 3633 // possible detection of false-positive race by the threadchecker ??? 3634 if (!TCR_4(__kmp_init_parallel)) 3635 __kmp_parallel_initialize(); 3636 3637 #if OMP_50_ENABLED 3638 __kmp_resume_if_soft_paused(); 3639 #endif 3640 3641 // check correctness of reduce block nesting 3642 #if KMP_USE_DYNAMIC_LOCK 3643 if (__kmp_env_consistency_check) 3644 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3645 #else 3646 if (__kmp_env_consistency_check) 3647 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3648 #endif 3649 3650 #if OMP_40_ENABLED 3651 th = __kmp_thread_from_gtid(global_tid); 3652 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3653 #endif // OMP_40_ENABLED 3654 3655 packed_reduction_method = __kmp_determine_reduction_method( 3656 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3657 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3658 3659 if (packed_reduction_method == critical_reduce_block) { 3660 3661 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3662 retval = 1; 3663 3664 } else if (packed_reduction_method == empty_reduce_block) { 3665 3666 // usage: if team size == 1, no synchronization is required ( Intel 3667 // platforms only ) 3668 retval = 1; 3669 3670 } else if (packed_reduction_method == atomic_reduce_block) { 3671 3672 retval = 2; 3673 3674 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3675 tree_reduce_block)) { 3676 3677 // case tree_reduce_block: 3678 // this barrier should be visible to a customer and to the threading profile 3679 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3680 #if OMPT_SUPPORT 3681 ompt_frame_t *ompt_frame; 3682 if (ompt_enabled.enabled) { 3683 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3684 if (ompt_frame->enter_frame.ptr == NULL) 3685 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3686 OMPT_STORE_RETURN_ADDRESS(global_tid); 3687 } 3688 #endif 3689 #if USE_ITT_NOTIFY 3690 __kmp_threads[global_tid]->th.th_ident = 3691 loc; // needed for correct notification of frames 3692 #endif 3693 retval = 3694 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3695 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3696 retval = (retval != 0) ? (0) : (1); 3697 #if OMPT_SUPPORT && OMPT_OPTIONAL 3698 if (ompt_enabled.enabled) { 3699 ompt_frame->enter_frame = ompt_data_none; 3700 } 3701 #endif 3702 3703 // all other workers except master should do this pop here 3704 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3705 if (__kmp_env_consistency_check) { 3706 if (retval == 0) { // 0: all other workers; 1: master 3707 __kmp_pop_sync(global_tid, ct_reduce, loc); 3708 } 3709 } 3710 3711 } else { 3712 3713 // should never reach this block 3714 KMP_ASSERT(0); // "unexpected method" 3715 } 3716 #if OMP_40_ENABLED 3717 if (teams_swapped) { 3718 __kmp_restore_swapped_teams(th, team, task_state); 3719 } 3720 #endif 3721 3722 KA_TRACE(10, 3723 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3724 global_tid, packed_reduction_method, retval)); 3725 3726 return retval; 3727 } 3728 3729 /*! 3730 @ingroup SYNCHRONIZATION 3731 @param loc source location information 3732 @param global_tid global thread id. 3733 @param lck pointer to the unique lock data structure 3734 3735 Finish the execution of a blocking reduce. 3736 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3737 start function. 3738 */ 3739 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3740 kmp_critical_name *lck) { 3741 3742 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3743 #if OMP_40_ENABLED 3744 kmp_info_t *th; 3745 kmp_team_t *team; 3746 int teams_swapped = 0, task_state; 3747 #endif 3748 3749 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3750 3751 #if OMP_40_ENABLED 3752 th = __kmp_thread_from_gtid(global_tid); 3753 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3754 #endif // OMP_40_ENABLED 3755 3756 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3757 3758 // this barrier should be visible to a customer and to the threading profile 3759 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3760 3761 if (packed_reduction_method == critical_reduce_block) { 3762 3763 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3764 3765 // TODO: implicit barrier: should be exposed 3766 #if OMPT_SUPPORT 3767 ompt_frame_t *ompt_frame; 3768 if (ompt_enabled.enabled) { 3769 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3770 if (ompt_frame->enter_frame.ptr == NULL) 3771 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3772 OMPT_STORE_RETURN_ADDRESS(global_tid); 3773 } 3774 #endif 3775 #if USE_ITT_NOTIFY 3776 __kmp_threads[global_tid]->th.th_ident = loc; 3777 #endif 3778 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3779 #if OMPT_SUPPORT && OMPT_OPTIONAL 3780 if (ompt_enabled.enabled) { 3781 ompt_frame->enter_frame = ompt_data_none; 3782 } 3783 #endif 3784 3785 } else if (packed_reduction_method == empty_reduce_block) { 3786 3787 // usage: if team size==1, no synchronization is required (Intel platforms only) 3788 3789 // TODO: implicit barrier: should be exposed 3790 #if OMPT_SUPPORT 3791 ompt_frame_t *ompt_frame; 3792 if (ompt_enabled.enabled) { 3793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3794 if (ompt_frame->enter_frame.ptr == NULL) 3795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3796 OMPT_STORE_RETURN_ADDRESS(global_tid); 3797 } 3798 #endif 3799 #if USE_ITT_NOTIFY 3800 __kmp_threads[global_tid]->th.th_ident = loc; 3801 #endif 3802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3803 #if OMPT_SUPPORT && OMPT_OPTIONAL 3804 if (ompt_enabled.enabled) { 3805 ompt_frame->enter_frame = ompt_data_none; 3806 } 3807 #endif 3808 3809 } else if (packed_reduction_method == atomic_reduce_block) { 3810 3811 #if OMPT_SUPPORT 3812 ompt_frame_t *ompt_frame; 3813 if (ompt_enabled.enabled) { 3814 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3815 if (ompt_frame->enter_frame.ptr == NULL) 3816 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3817 OMPT_STORE_RETURN_ADDRESS(global_tid); 3818 } 3819 #endif 3820 // TODO: implicit barrier: should be exposed 3821 #if USE_ITT_NOTIFY 3822 __kmp_threads[global_tid]->th.th_ident = loc; 3823 #endif 3824 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3825 #if OMPT_SUPPORT && OMPT_OPTIONAL 3826 if (ompt_enabled.enabled) { 3827 ompt_frame->enter_frame = ompt_data_none; 3828 } 3829 #endif 3830 3831 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3832 tree_reduce_block)) { 3833 3834 // only master executes here (master releases all other workers) 3835 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3836 global_tid); 3837 3838 } else { 3839 3840 // should never reach this block 3841 KMP_ASSERT(0); // "unexpected method" 3842 } 3843 #if OMP_40_ENABLED 3844 if (teams_swapped) { 3845 __kmp_restore_swapped_teams(th, team, task_state); 3846 } 3847 #endif 3848 3849 if (__kmp_env_consistency_check) 3850 __kmp_pop_sync(global_tid, ct_reduce, loc); 3851 3852 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3853 global_tid, packed_reduction_method)); 3854 3855 return; 3856 } 3857 3858 #undef __KMP_GET_REDUCTION_METHOD 3859 #undef __KMP_SET_REDUCTION_METHOD 3860 3861 /* end of interface to fast scalable reduce routines */ 3862 3863 kmp_uint64 __kmpc_get_taskid() { 3864 3865 kmp_int32 gtid; 3866 kmp_info_t *thread; 3867 3868 gtid = __kmp_get_gtid(); 3869 if (gtid < 0) { 3870 return 0; 3871 } 3872 thread = __kmp_thread_from_gtid(gtid); 3873 return thread->th.th_current_task->td_task_id; 3874 3875 } // __kmpc_get_taskid 3876 3877 kmp_uint64 __kmpc_get_parent_taskid() { 3878 3879 kmp_int32 gtid; 3880 kmp_info_t *thread; 3881 kmp_taskdata_t *parent_task; 3882 3883 gtid = __kmp_get_gtid(); 3884 if (gtid < 0) { 3885 return 0; 3886 } 3887 thread = __kmp_thread_from_gtid(gtid); 3888 parent_task = thread->th.th_current_task->td_parent; 3889 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3890 3891 } // __kmpc_get_parent_taskid 3892 3893 #if OMP_45_ENABLED 3894 /*! 3895 @ingroup WORK_SHARING 3896 @param loc source location information. 3897 @param gtid global thread number. 3898 @param num_dims number of associated doacross loops. 3899 @param dims info on loops bounds. 3900 3901 Initialize doacross loop information. 3902 Expect compiler send us inclusive bounds, 3903 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3904 */ 3905 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3906 const struct kmp_dim *dims) { 3907 int j, idx; 3908 kmp_int64 last, trace_count; 3909 kmp_info_t *th = __kmp_threads[gtid]; 3910 kmp_team_t *team = th->th.th_team; 3911 kmp_uint32 *flags; 3912 kmp_disp_t *pr_buf = th->th.th_dispatch; 3913 dispatch_shared_info_t *sh_buf; 3914 3915 KA_TRACE( 3916 20, 3917 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3918 gtid, num_dims, !team->t.t_serialized)); 3919 KMP_DEBUG_ASSERT(dims != NULL); 3920 KMP_DEBUG_ASSERT(num_dims > 0); 3921 3922 if (team->t.t_serialized) { 3923 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3924 return; // no dependencies if team is serialized 3925 } 3926 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3927 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3928 // the next loop 3929 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3930 3931 // Save bounds info into allocated private buffer 3932 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3933 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3934 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3935 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3936 pr_buf->th_doacross_info[0] = 3937 (kmp_int64)num_dims; // first element is number of dimensions 3938 // Save also address of num_done in order to access it later without knowing 3939 // the buffer index 3940 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3941 pr_buf->th_doacross_info[2] = dims[0].lo; 3942 pr_buf->th_doacross_info[3] = dims[0].up; 3943 pr_buf->th_doacross_info[4] = dims[0].st; 3944 last = 5; 3945 for (j = 1; j < num_dims; ++j) { 3946 kmp_int64 3947 range_length; // To keep ranges of all dimensions but the first dims[0] 3948 if (dims[j].st == 1) { // most common case 3949 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3950 range_length = dims[j].up - dims[j].lo + 1; 3951 } else { 3952 if (dims[j].st > 0) { 3953 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3954 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3955 } else { // negative increment 3956 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3957 range_length = 3958 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3959 } 3960 } 3961 pr_buf->th_doacross_info[last++] = range_length; 3962 pr_buf->th_doacross_info[last++] = dims[j].lo; 3963 pr_buf->th_doacross_info[last++] = dims[j].up; 3964 pr_buf->th_doacross_info[last++] = dims[j].st; 3965 } 3966 3967 // Compute total trip count. 3968 // Start with range of dims[0] which we don't need to keep in the buffer. 3969 if (dims[0].st == 1) { // most common case 3970 trace_count = dims[0].up - dims[0].lo + 1; 3971 } else if (dims[0].st > 0) { 3972 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3973 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3974 } else { // negative increment 3975 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3976 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3977 } 3978 for (j = 1; j < num_dims; ++j) { 3979 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3980 } 3981 KMP_DEBUG_ASSERT(trace_count > 0); 3982 3983 // Check if shared buffer is not occupied by other loop (idx - 3984 // __kmp_dispatch_num_buffers) 3985 if (idx != sh_buf->doacross_buf_idx) { 3986 // Shared buffer is occupied, wait for it to be free 3987 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3988 __kmp_eq_4, NULL); 3989 } 3990 #if KMP_32_BIT_ARCH 3991 // Check if we are the first thread. After the CAS the first thread gets 0, 3992 // others get 1 if initialization is in progress, allocated pointer otherwise. 3993 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3994 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3995 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3996 #else 3997 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3998 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3999 #endif 4000 if (flags == NULL) { 4001 // we are the first thread, allocate the array of flags 4002 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 4003 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4004 KMP_MB(); 4005 sh_buf->doacross_flags = flags; 4006 } else if (flags == (kmp_uint32 *)1) { 4007 #if KMP_32_BIT_ARCH 4008 // initialization is still in progress, need to wait 4009 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4010 #else 4011 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4012 #endif 4013 KMP_YIELD(TRUE); 4014 KMP_MB(); 4015 } else { 4016 KMP_MB(); 4017 } 4018 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4019 pr_buf->th_doacross_flags = 4020 sh_buf->doacross_flags; // save private copy in order to not 4021 // touch shared buffer on each iteration 4022 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4023 } 4024 4025 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4026 kmp_int32 shft, num_dims, i; 4027 kmp_uint32 flag; 4028 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4029 kmp_info_t *th = __kmp_threads[gtid]; 4030 kmp_team_t *team = th->th.th_team; 4031 kmp_disp_t *pr_buf; 4032 kmp_int64 lo, up, st; 4033 4034 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4035 if (team->t.t_serialized) { 4036 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4037 return; // no dependencies if team is serialized 4038 } 4039 4040 // calculate sequential iteration number and check out-of-bounds condition 4041 pr_buf = th->th.th_dispatch; 4042 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4043 num_dims = pr_buf->th_doacross_info[0]; 4044 lo = pr_buf->th_doacross_info[2]; 4045 up = pr_buf->th_doacross_info[3]; 4046 st = pr_buf->th_doacross_info[4]; 4047 if (st == 1) { // most common case 4048 if (vec[0] < lo || vec[0] > up) { 4049 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4050 "bounds [%lld,%lld]\n", 4051 gtid, vec[0], lo, up)); 4052 return; 4053 } 4054 iter_number = vec[0] - lo; 4055 } else if (st > 0) { 4056 if (vec[0] < lo || vec[0] > up) { 4057 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4058 "bounds [%lld,%lld]\n", 4059 gtid, vec[0], lo, up)); 4060 return; 4061 } 4062 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4063 } else { // negative increment 4064 if (vec[0] > lo || vec[0] < up) { 4065 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4066 "bounds [%lld,%lld]\n", 4067 gtid, vec[0], lo, up)); 4068 return; 4069 } 4070 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4071 } 4072 for (i = 1; i < num_dims; ++i) { 4073 kmp_int64 iter, ln; 4074 kmp_int32 j = i * 4; 4075 ln = pr_buf->th_doacross_info[j + 1]; 4076 lo = pr_buf->th_doacross_info[j + 2]; 4077 up = pr_buf->th_doacross_info[j + 3]; 4078 st = pr_buf->th_doacross_info[j + 4]; 4079 if (st == 1) { 4080 if (vec[i] < lo || vec[i] > up) { 4081 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4082 "bounds [%lld,%lld]\n", 4083 gtid, vec[i], lo, up)); 4084 return; 4085 } 4086 iter = vec[i] - lo; 4087 } else if (st > 0) { 4088 if (vec[i] < lo || vec[i] > up) { 4089 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4090 "bounds [%lld,%lld]\n", 4091 gtid, vec[i], lo, up)); 4092 return; 4093 } 4094 iter = (kmp_uint64)(vec[i] - lo) / st; 4095 } else { // st < 0 4096 if (vec[i] > lo || vec[i] < up) { 4097 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4098 "bounds [%lld,%lld]\n", 4099 gtid, vec[i], lo, up)); 4100 return; 4101 } 4102 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4103 } 4104 iter_number = iter + ln * iter_number; 4105 } 4106 shft = iter_number % 32; // use 32-bit granularity 4107 iter_number >>= 5; // divided by 32 4108 flag = 1 << shft; 4109 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4110 KMP_YIELD(TRUE); 4111 } 4112 KMP_MB(); 4113 KA_TRACE(20, 4114 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4115 gtid, (iter_number << 5) + shft)); 4116 } 4117 4118 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4119 kmp_int32 shft, num_dims, i; 4120 kmp_uint32 flag; 4121 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4122 kmp_info_t *th = __kmp_threads[gtid]; 4123 kmp_team_t *team = th->th.th_team; 4124 kmp_disp_t *pr_buf; 4125 kmp_int64 lo, st; 4126 4127 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4128 if (team->t.t_serialized) { 4129 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4130 return; // no dependencies if team is serialized 4131 } 4132 4133 // calculate sequential iteration number (same as in "wait" but no 4134 // out-of-bounds checks) 4135 pr_buf = th->th.th_dispatch; 4136 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4137 num_dims = pr_buf->th_doacross_info[0]; 4138 lo = pr_buf->th_doacross_info[2]; 4139 st = pr_buf->th_doacross_info[4]; 4140 if (st == 1) { // most common case 4141 iter_number = vec[0] - lo; 4142 } else if (st > 0) { 4143 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4144 } else { // negative increment 4145 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4146 } 4147 for (i = 1; i < num_dims; ++i) { 4148 kmp_int64 iter, ln; 4149 kmp_int32 j = i * 4; 4150 ln = pr_buf->th_doacross_info[j + 1]; 4151 lo = pr_buf->th_doacross_info[j + 2]; 4152 st = pr_buf->th_doacross_info[j + 4]; 4153 if (st == 1) { 4154 iter = vec[i] - lo; 4155 } else if (st > 0) { 4156 iter = (kmp_uint64)(vec[i] - lo) / st; 4157 } else { // st < 0 4158 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4159 } 4160 iter_number = iter + ln * iter_number; 4161 } 4162 shft = iter_number % 32; // use 32-bit granularity 4163 iter_number >>= 5; // divided by 32 4164 flag = 1 << shft; 4165 KMP_MB(); 4166 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4167 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4168 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4169 (iter_number << 5) + shft)); 4170 } 4171 4172 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4173 kmp_int32 num_done; 4174 kmp_info_t *th = __kmp_threads[gtid]; 4175 kmp_team_t *team = th->th.th_team; 4176 kmp_disp_t *pr_buf = th->th.th_dispatch; 4177 4178 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4179 if (team->t.t_serialized) { 4180 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4181 return; // nothing to do 4182 } 4183 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4184 if (num_done == th->th.th_team_nproc) { 4185 // we are the last thread, need to free shared resources 4186 int idx = pr_buf->th_doacross_buf_idx - 1; 4187 dispatch_shared_info_t *sh_buf = 4188 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4189 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4190 (kmp_int64)&sh_buf->doacross_num_done); 4191 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4192 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4193 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4194 sh_buf->doacross_flags = NULL; 4195 sh_buf->doacross_num_done = 0; 4196 sh_buf->doacross_buf_idx += 4197 __kmp_dispatch_num_buffers; // free buffer for future re-use 4198 } 4199 // free private resources (need to keep buffer index forever) 4200 pr_buf->th_doacross_flags = NULL; 4201 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4202 pr_buf->th_doacross_info = NULL; 4203 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4204 } 4205 #endif 4206 4207 #if OMP_50_ENABLED 4208 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4209 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4210 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4211 } 4212 4213 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4214 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4215 } 4216 4217 int __kmpc_get_target_offload(void) { 4218 if (!__kmp_init_serial) { 4219 __kmp_serial_initialize(); 4220 } 4221 return __kmp_target_offload; 4222 } 4223 4224 int __kmpc_pause_resource(kmp_pause_status_t level) { 4225 if (!__kmp_init_serial) { 4226 return 1; // Can't pause if runtime is not initialized 4227 } 4228 return __kmp_pause_resource(level); 4229 } 4230 #endif // OMP_50_ENABLED 4231 4232 // end of file // 4233