1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param global_tid global thread number 358 @param num_teams_lo lower bound on number of teams requested for the teams 359 construct 360 @param num_teams_up upper bound on number of teams requested for the teams 361 construct 362 @param num_threads number of threads per team requested for the teams construct 363 364 Set the number of teams to be used by the teams construct. The number of initial 365 teams cretaed will be greater than or equal to the lower bound and less than or 366 equal to the upper bound. 367 This call is only required if the teams construct has a `num_teams` clause 368 or a `thread_limit` clause (or both). 369 */ 370 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 371 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 372 kmp_int32 num_threads) { 373 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 374 " num_teams_ub=%d num_threads=%d\n", 375 global_tid, num_teams_lb, num_teams_ub, num_threads)); 376 __kmp_assert_valid_gtid(global_tid); 377 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 378 num_threads); 379 } 380 381 /*! 382 @ingroup PARALLEL 383 @param loc source location information 384 @param argc total number of arguments in the ellipsis 385 @param microtask pointer to callback routine consisting of outlined teams 386 construct 387 @param ... pointers to shared variables that aren't global 388 389 Do the actual fork and call the microtask in the relevant number of threads. 390 */ 391 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 392 ...) { 393 int gtid = __kmp_entry_gtid(); 394 kmp_info_t *this_thr = __kmp_threads[gtid]; 395 va_list ap; 396 va_start(ap, microtask); 397 398 #if KMP_STATS_ENABLED 399 KMP_COUNT_BLOCK(OMP_TEAMS); 400 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 401 if (previous_state == stats_state_e::SERIAL_REGION) { 402 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 403 } else { 404 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 405 } 406 #endif 407 408 // remember teams entry point and nesting level 409 this_thr->th.th_teams_microtask = microtask; 410 this_thr->th.th_teams_level = 411 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 412 413 #if OMPT_SUPPORT 414 kmp_team_t *parent_team = this_thr->th.th_team; 415 int tid = __kmp_tid_from_gtid(gtid); 416 if (ompt_enabled.enabled) { 417 parent_team->t.t_implicit_task_taskdata[tid] 418 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 419 } 420 OMPT_STORE_RETURN_ADDRESS(gtid); 421 #endif 422 423 // check if __kmpc_push_num_teams called, set default number of teams 424 // otherwise 425 if (this_thr->th.th_teams_size.nteams == 0) { 426 __kmp_push_num_teams(loc, gtid, 0, 0); 427 } 428 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 431 432 __kmp_fork_call( 433 loc, gtid, fork_context_intel, argc, 434 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 435 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 436 __kmp_join_call(loc, gtid 437 #if OMPT_SUPPORT 438 , 439 fork_context_intel 440 #endif 441 ); 442 443 // Pop current CG root off list 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 446 this_thr->th.th_cg_roots = tmp->up; 447 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 448 " to node %p. cg_nthreads was %d\n", 449 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 450 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 451 int i = tmp->cg_nthreads--; 452 if (i == 1) { // check is we are the last thread in CG (not always the case) 453 __kmp_free(tmp); 454 } 455 // Restore current task's thread_limit from CG root 456 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 457 this_thr->th.th_current_task->td_icvs.thread_limit = 458 this_thr->th.th_cg_roots->cg_thread_limit; 459 460 this_thr->th.th_teams_microtask = NULL; 461 this_thr->th.th_teams_level = 0; 462 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 463 va_end(ap); 464 #if KMP_STATS_ENABLED 465 if (previous_state == stats_state_e::SERIAL_REGION) { 466 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 467 KMP_SET_THREAD_STATE(previous_state); 468 } else { 469 KMP_POP_PARTITIONED_TIMER(); 470 } 471 #endif // KMP_STATS_ENABLED 472 } 473 474 // I don't think this function should ever have been exported. 475 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 476 // openmp code ever called it, but it's been exported from the RTL for so 477 // long that I'm afraid to remove the definition. 478 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 479 480 /*! 481 @ingroup PARALLEL 482 @param loc source location information 483 @param global_tid global thread number 484 485 Enter a serialized parallel construct. This interface is used to handle a 486 conditional parallel region, like this, 487 @code 488 #pragma omp parallel if (condition) 489 @endcode 490 when the condition is false. 491 */ 492 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 493 // The implementation is now in kmp_runtime.cpp so that it can share static 494 // functions with kmp_fork_call since the tasks to be done are similar in 495 // each case. 496 __kmp_assert_valid_gtid(global_tid); 497 #if OMPT_SUPPORT 498 OMPT_STORE_RETURN_ADDRESS(global_tid); 499 #endif 500 __kmp_serialized_parallel(loc, global_tid); 501 } 502 503 /*! 504 @ingroup PARALLEL 505 @param loc source location information 506 @param global_tid global thread number 507 508 Leave a serialized parallel construct. 509 */ 510 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 511 kmp_internal_control_t *top; 512 kmp_info_t *this_thr; 513 kmp_team_t *serial_team; 514 515 KC_TRACE(10, 516 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 517 518 /* skip all this code for autopar serialized loops since it results in 519 unacceptable overhead */ 520 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 521 return; 522 523 // Not autopar code 524 __kmp_assert_valid_gtid(global_tid); 525 if (!TCR_4(__kmp_init_parallel)) 526 __kmp_parallel_initialize(); 527 528 __kmp_resume_if_soft_paused(); 529 530 this_thr = __kmp_threads[global_tid]; 531 serial_team = this_thr->th.th_serial_team; 532 533 kmp_task_team_t *task_team = this_thr->th.th_task_team; 534 // we need to wait for the proxy tasks before finishing the thread 535 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 // if( serial_team -> t.t_serialized > 1 ) 581 serial_team->t.t_level--; 582 583 /* pop dispatch buffers stack */ 584 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 585 { 586 dispatch_private_info_t *disp_buffer = 587 serial_team->t.t_dispatch->th_disp_buffer; 588 serial_team->t.t_dispatch->th_disp_buffer = 589 serial_team->t.t_dispatch->th_disp_buffer->next; 590 __kmp_free(disp_buffer); 591 } 592 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 593 594 --serial_team->t.t_serialized; 595 if (serial_team->t.t_serialized == 0) { 596 597 /* return to the parallel section */ 598 599 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 600 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 601 __kmp_clear_x87_fpu_status_word(); 602 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 603 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 604 } 605 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 606 607 this_thr->th.th_team = serial_team->t.t_parent; 608 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 609 610 /* restore values cached in the thread */ 611 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 612 this_thr->th.th_team_master = 613 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 614 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 615 616 /* TODO the below shouldn't need to be adjusted for serialized teams */ 617 this_thr->th.th_dispatch = 618 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 619 620 __kmp_pop_current_task_from_thread(this_thr); 621 622 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 623 this_thr->th.th_current_task->td_flags.executing = 1; 624 625 if (__kmp_tasking_mode != tskm_immediate_exec) { 626 // Copy the task team from the new child / old parent team to the thread. 627 this_thr->th.th_task_team = 628 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 629 KA_TRACE(20, 630 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 631 "team %p\n", 632 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 633 } 634 } else { 635 if (__kmp_tasking_mode != tskm_immediate_exec) { 636 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 637 "depth of serial team %p to %d\n", 638 global_tid, serial_team, serial_team->t.t_serialized)); 639 } 640 } 641 642 if (__kmp_env_consistency_check) 643 __kmp_pop_parallel(global_tid, NULL); 644 #if OMPT_SUPPORT 645 if (ompt_enabled.enabled) 646 this_thr->th.ompt_thread_info.state = 647 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 648 : ompt_state_work_parallel); 649 #endif 650 } 651 652 /*! 653 @ingroup SYNCHRONIZATION 654 @param loc source location information. 655 656 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 657 depending on the memory ordering convention obeyed by the compiler 658 even that may not be necessary). 659 */ 660 void __kmpc_flush(ident_t *loc) { 661 KC_TRACE(10, ("__kmpc_flush: called\n")); 662 663 /* need explicit __mf() here since use volatile instead in library */ 664 KMP_MB(); /* Flush all pending memory write invalidates. */ 665 666 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 667 #if KMP_MIC 668 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 669 // We shouldn't need it, though, since the ABI rules require that 670 // * If the compiler generates NGO stores it also generates the fence 671 // * If users hand-code NGO stores they should insert the fence 672 // therefore no incomplete unordered stores should be visible. 673 #else 674 // C74404 675 // This is to address non-temporal store instructions (sfence needed). 676 // The clflush instruction is addressed either (mfence needed). 677 // Probably the non-temporal load monvtdqa instruction should also be 678 // addressed. 679 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 680 if (!__kmp_cpuinfo.initialized) { 681 __kmp_query_cpuid(&__kmp_cpuinfo); 682 } 683 if (!__kmp_cpuinfo.sse2) { 684 // CPU cannot execute SSE2 instructions. 685 } else { 686 #if KMP_COMPILER_ICC 687 _mm_mfence(); 688 #elif KMP_COMPILER_MSVC 689 MemoryBarrier(); 690 #else 691 __sync_synchronize(); 692 #endif // KMP_COMPILER_ICC 693 } 694 #endif // KMP_MIC 695 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 696 KMP_ARCH_RISCV64) 697 // Nothing to see here move along 698 #elif KMP_ARCH_PPC64 699 // Nothing needed here (we have a real MB above). 700 #else 701 #error Unknown or unsupported architecture 702 #endif 703 704 #if OMPT_SUPPORT && OMPT_OPTIONAL 705 if (ompt_enabled.ompt_callback_flush) { 706 ompt_callbacks.ompt_callback(ompt_callback_flush)( 707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 708 } 709 #endif 710 } 711 712 /* -------------------------------------------------------------------------- */ 713 /*! 714 @ingroup SYNCHRONIZATION 715 @param loc source location information 716 @param global_tid thread id. 717 718 Execute a barrier. 719 */ 720 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 721 KMP_COUNT_BLOCK(OMP_BARRIER); 722 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 723 __kmp_assert_valid_gtid(global_tid); 724 725 if (!TCR_4(__kmp_init_parallel)) 726 __kmp_parallel_initialize(); 727 728 __kmp_resume_if_soft_paused(); 729 730 if (__kmp_env_consistency_check) { 731 if (loc == 0) { 732 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 733 } 734 __kmp_check_barrier(global_tid, ct_barrier, loc); 735 } 736 737 #if OMPT_SUPPORT 738 ompt_frame_t *ompt_frame; 739 if (ompt_enabled.enabled) { 740 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 741 if (ompt_frame->enter_frame.ptr == NULL) 742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 743 } 744 OMPT_STORE_RETURN_ADDRESS(global_tid); 745 #endif 746 __kmp_threads[global_tid]->th.th_ident = loc; 747 // TODO: explicit barrier_wait_id: 748 // this function is called when 'barrier' directive is present or 749 // implicit barrier at the end of a worksharing construct. 750 // 1) better to add a per-thread barrier counter to a thread data structure 751 // 2) set to 0 when a new team is created 752 // 4) no sync is required 753 754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 755 #if OMPT_SUPPORT && OMPT_OPTIONAL 756 if (ompt_enabled.enabled) { 757 ompt_frame->enter_frame = ompt_data_none; 758 } 759 #endif 760 } 761 762 /* The BARRIER for a MASTER section is always explicit */ 763 /*! 764 @ingroup WORK_SHARING 765 @param loc source location information. 766 @param global_tid global thread number . 767 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 768 */ 769 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 770 int status = 0; 771 772 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 773 __kmp_assert_valid_gtid(global_tid); 774 775 if (!TCR_4(__kmp_init_parallel)) 776 __kmp_parallel_initialize(); 777 778 __kmp_resume_if_soft_paused(); 779 780 if (KMP_MASTER_GTID(global_tid)) { 781 KMP_COUNT_BLOCK(OMP_MASTER); 782 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 783 status = 1; 784 } 785 786 #if OMPT_SUPPORT && OMPT_OPTIONAL 787 if (status) { 788 if (ompt_enabled.ompt_callback_masked) { 789 kmp_info_t *this_thr = __kmp_threads[global_tid]; 790 kmp_team_t *team = this_thr->th.th_team; 791 792 int tid = __kmp_tid_from_gtid(global_tid); 793 ompt_callbacks.ompt_callback(ompt_callback_masked)( 794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 796 OMPT_GET_RETURN_ADDRESS(0)); 797 } 798 } 799 #endif 800 801 if (__kmp_env_consistency_check) { 802 #if KMP_USE_DYNAMIC_LOCK 803 if (status) 804 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 805 else 806 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 807 #else 808 if (status) 809 __kmp_push_sync(global_tid, ct_master, loc, NULL); 810 else 811 __kmp_check_sync(global_tid, ct_master, loc, NULL); 812 #endif 813 } 814 815 return status; 816 } 817 818 /*! 819 @ingroup WORK_SHARING 820 @param loc source location information. 821 @param global_tid global thread number . 822 823 Mark the end of a <tt>master</tt> region. This should only be called by the 824 thread that executes the <tt>master</tt> region. 825 */ 826 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 827 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 828 __kmp_assert_valid_gtid(global_tid); 829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 830 KMP_POP_PARTITIONED_TIMER(); 831 832 #if OMPT_SUPPORT && OMPT_OPTIONAL 833 kmp_info_t *this_thr = __kmp_threads[global_tid]; 834 kmp_team_t *team = this_thr->th.th_team; 835 if (ompt_enabled.ompt_callback_masked) { 836 int tid = __kmp_tid_from_gtid(global_tid); 837 ompt_callbacks.ompt_callback(ompt_callback_masked)( 838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 840 OMPT_GET_RETURN_ADDRESS(0)); 841 } 842 #endif 843 844 if (__kmp_env_consistency_check) { 845 if (KMP_MASTER_GTID(global_tid)) 846 __kmp_pop_sync(global_tid, ct_master, loc); 847 } 848 } 849 850 /*! 851 @ingroup WORK_SHARING 852 @param loc source location information. 853 @param gtid global thread number. 854 855 Start execution of an <tt>ordered</tt> construct. 856 */ 857 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 858 int cid = 0; 859 kmp_info_t *th; 860 KMP_DEBUG_ASSERT(__kmp_init_serial); 861 862 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 863 __kmp_assert_valid_gtid(gtid); 864 865 if (!TCR_4(__kmp_init_parallel)) 866 __kmp_parallel_initialize(); 867 868 __kmp_resume_if_soft_paused(); 869 870 #if USE_ITT_BUILD 871 __kmp_itt_ordered_prep(gtid); 872 // TODO: ordered_wait_id 873 #endif /* USE_ITT_BUILD */ 874 875 th = __kmp_threads[gtid]; 876 877 #if OMPT_SUPPORT && OMPT_OPTIONAL 878 kmp_team_t *team; 879 ompt_wait_id_t lck; 880 void *codeptr_ra; 881 OMPT_STORE_RETURN_ADDRESS(gtid); 882 if (ompt_enabled.enabled) { 883 team = __kmp_team_from_gtid(gtid); 884 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 885 /* OMPT state update */ 886 th->th.ompt_thread_info.wait_id = lck; 887 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 888 889 /* OMPT event callback */ 890 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 891 if (ompt_enabled.ompt_callback_mutex_acquire) { 892 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 893 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 894 codeptr_ra); 895 } 896 } 897 #endif 898 899 if (th->th.th_dispatch->th_deo_fcn != 0) 900 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 901 else 902 __kmp_parallel_deo(>id, &cid, loc); 903 904 #if OMPT_SUPPORT && OMPT_OPTIONAL 905 if (ompt_enabled.enabled) { 906 /* OMPT state update */ 907 th->th.ompt_thread_info.state = ompt_state_work_parallel; 908 th->th.ompt_thread_info.wait_id = 0; 909 910 /* OMPT event callback */ 911 if (ompt_enabled.ompt_callback_mutex_acquired) { 912 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 913 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 914 } 915 } 916 #endif 917 918 #if USE_ITT_BUILD 919 __kmp_itt_ordered_start(gtid); 920 #endif /* USE_ITT_BUILD */ 921 } 922 923 /*! 924 @ingroup WORK_SHARING 925 @param loc source location information. 926 @param gtid global thread number. 927 928 End execution of an <tt>ordered</tt> construct. 929 */ 930 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 931 int cid = 0; 932 kmp_info_t *th; 933 934 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 935 __kmp_assert_valid_gtid(gtid); 936 937 #if USE_ITT_BUILD 938 __kmp_itt_ordered_end(gtid); 939 // TODO: ordered_wait_id 940 #endif /* USE_ITT_BUILD */ 941 942 th = __kmp_threads[gtid]; 943 944 if (th->th.th_dispatch->th_dxo_fcn != 0) 945 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 946 else 947 __kmp_parallel_dxo(>id, &cid, loc); 948 949 #if OMPT_SUPPORT && OMPT_OPTIONAL 950 OMPT_STORE_RETURN_ADDRESS(gtid); 951 if (ompt_enabled.ompt_callback_mutex_released) { 952 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 953 ompt_mutex_ordered, 954 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 955 ->t.t_ordered.dt.t_value, 956 OMPT_LOAD_RETURN_ADDRESS(gtid)); 957 } 958 #endif 959 } 960 961 #if KMP_USE_DYNAMIC_LOCK 962 963 static __forceinline void 964 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 965 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 966 // Pointer to the allocated indirect lock is written to crit, while indexing 967 // is ignored. 968 void *idx; 969 kmp_indirect_lock_t **lck; 970 lck = (kmp_indirect_lock_t **)crit; 971 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 972 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 973 KMP_SET_I_LOCK_LOCATION(ilk, loc); 974 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 975 KA_TRACE(20, 976 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 977 #if USE_ITT_BUILD 978 __kmp_itt_critical_creating(ilk->lock, loc); 979 #endif 980 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 981 if (status == 0) { 982 #if USE_ITT_BUILD 983 __kmp_itt_critical_destroyed(ilk->lock); 984 #endif 985 // We don't really need to destroy the unclaimed lock here since it will be 986 // cleaned up at program exit. 987 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 988 } 989 KMP_DEBUG_ASSERT(*lck != NULL); 990 } 991 992 // Fast-path acquire tas lock 993 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 994 { \ 995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 998 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 999 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1000 kmp_uint32 spins; \ 1001 KMP_FSYNC_PREPARE(l); \ 1002 KMP_INIT_YIELD(spins); \ 1003 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1004 do { \ 1005 if (TCR_4(__kmp_nth) > \ 1006 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1007 KMP_YIELD(TRUE); \ 1008 } else { \ 1009 KMP_YIELD_SPIN(spins); \ 1010 } \ 1011 __kmp_spin_backoff(&backoff); \ 1012 } while ( \ 1013 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1014 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1015 } \ 1016 KMP_FSYNC_ACQUIRED(l); \ 1017 } 1018 1019 // Fast-path test tas lock 1020 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1021 { \ 1022 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1023 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1024 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1025 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1026 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1027 } 1028 1029 // Fast-path release tas lock 1030 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1031 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1032 1033 #if KMP_USE_FUTEX 1034 1035 #include <sys/syscall.h> 1036 #include <unistd.h> 1037 #ifndef FUTEX_WAIT 1038 #define FUTEX_WAIT 0 1039 #endif 1040 #ifndef FUTEX_WAKE 1041 #define FUTEX_WAKE 1 1042 #endif 1043 1044 // Fast-path acquire futex lock 1045 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1046 { \ 1047 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1048 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1049 KMP_MB(); \ 1050 KMP_FSYNC_PREPARE(ftx); \ 1051 kmp_int32 poll_val; \ 1052 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1053 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1054 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1055 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1056 if (!cond) { \ 1057 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1058 poll_val | \ 1059 KMP_LOCK_BUSY(1, futex))) { \ 1060 continue; \ 1061 } \ 1062 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1063 } \ 1064 kmp_int32 rc; \ 1065 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1066 NULL, NULL, 0)) != 0) { \ 1067 continue; \ 1068 } \ 1069 gtid_code |= 1; \ 1070 } \ 1071 KMP_FSYNC_ACQUIRED(ftx); \ 1072 } 1073 1074 // Fast-path test futex lock 1075 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1076 { \ 1077 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1078 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1079 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1080 KMP_FSYNC_ACQUIRED(ftx); \ 1081 rc = TRUE; \ 1082 } else { \ 1083 rc = FALSE; \ 1084 } \ 1085 } 1086 1087 // Fast-path release futex lock 1088 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1089 { \ 1090 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1091 KMP_MB(); \ 1092 KMP_FSYNC_RELEASING(ftx); \ 1093 kmp_int32 poll_val = \ 1094 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1095 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1096 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1097 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1098 } \ 1099 KMP_MB(); \ 1100 KMP_YIELD_OVERSUB(); \ 1101 } 1102 1103 #endif // KMP_USE_FUTEX 1104 1105 #else // KMP_USE_DYNAMIC_LOCK 1106 1107 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1108 ident_t const *loc, 1109 kmp_int32 gtid) { 1110 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1111 1112 // Because of the double-check, the following load doesn't need to be volatile 1113 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1114 1115 if (lck == NULL) { 1116 void *idx; 1117 1118 // Allocate & initialize the lock. 1119 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1120 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1121 __kmp_init_user_lock_with_checks(lck); 1122 __kmp_set_user_lock_location(lck, loc); 1123 #if USE_ITT_BUILD 1124 __kmp_itt_critical_creating(lck); 1125 // __kmp_itt_critical_creating() should be called *before* the first usage 1126 // of underlying lock. It is the only place where we can guarantee it. There 1127 // are chances the lock will destroyed with no usage, but it is not a 1128 // problem, because this is not real event seen by user but rather setting 1129 // name for object (lock). See more details in kmp_itt.h. 1130 #endif /* USE_ITT_BUILD */ 1131 1132 // Use a cmpxchg instruction to slam the start of the critical section with 1133 // the lock pointer. If another thread beat us to it, deallocate the lock, 1134 // and use the lock that the other thread allocated. 1135 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1136 1137 if (status == 0) { 1138 // Deallocate the lock and reload the value. 1139 #if USE_ITT_BUILD 1140 __kmp_itt_critical_destroyed(lck); 1141 // Let ITT know the lock is destroyed and the same memory location may be reused 1142 // for another purpose. 1143 #endif /* USE_ITT_BUILD */ 1144 __kmp_destroy_user_lock_with_checks(lck); 1145 __kmp_user_lock_free(&idx, gtid, lck); 1146 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1147 KMP_DEBUG_ASSERT(lck != NULL); 1148 } 1149 } 1150 return lck; 1151 } 1152 1153 #endif // KMP_USE_DYNAMIC_LOCK 1154 1155 /*! 1156 @ingroup WORK_SHARING 1157 @param loc source location information. 1158 @param global_tid global thread number. 1159 @param crit identity of the critical section. This could be a pointer to a lock 1160 associated with the critical section, or some other suitably unique value. 1161 1162 Enter code protected by a `critical` construct. 1163 This function blocks until the executing thread can enter the critical section. 1164 */ 1165 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1166 kmp_critical_name *crit) { 1167 #if KMP_USE_DYNAMIC_LOCK 1168 #if OMPT_SUPPORT && OMPT_OPTIONAL 1169 OMPT_STORE_RETURN_ADDRESS(global_tid); 1170 #endif // OMPT_SUPPORT 1171 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1172 #else 1173 KMP_COUNT_BLOCK(OMP_CRITICAL); 1174 #if OMPT_SUPPORT && OMPT_OPTIONAL 1175 ompt_state_t prev_state = ompt_state_undefined; 1176 ompt_thread_info_t ti; 1177 #endif 1178 kmp_user_lock_p lck; 1179 1180 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1181 __kmp_assert_valid_gtid(global_tid); 1182 1183 // TODO: add THR_OVHD_STATE 1184 1185 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1186 KMP_CHECK_USER_LOCK_INIT(); 1187 1188 if ((__kmp_user_lock_kind == lk_tas) && 1189 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1190 lck = (kmp_user_lock_p)crit; 1191 } 1192 #if KMP_USE_FUTEX 1193 else if ((__kmp_user_lock_kind == lk_futex) && 1194 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1195 lck = (kmp_user_lock_p)crit; 1196 } 1197 #endif 1198 else { // ticket, queuing or drdpa 1199 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1200 } 1201 1202 if (__kmp_env_consistency_check) 1203 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1204 1205 // since the critical directive binds to all threads, not just the current 1206 // team we have to check this even if we are in a serialized team. 1207 // also, even if we are the uber thread, we still have to conduct the lock, 1208 // as we have to contend with sibling threads. 1209 1210 #if USE_ITT_BUILD 1211 __kmp_itt_critical_acquiring(lck); 1212 #endif /* USE_ITT_BUILD */ 1213 #if OMPT_SUPPORT && OMPT_OPTIONAL 1214 OMPT_STORE_RETURN_ADDRESS(gtid); 1215 void *codeptr_ra = NULL; 1216 if (ompt_enabled.enabled) { 1217 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1218 /* OMPT state update */ 1219 prev_state = ti.state; 1220 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1221 ti.state = ompt_state_wait_critical; 1222 1223 /* OMPT event callback */ 1224 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1225 if (ompt_enabled.ompt_callback_mutex_acquire) { 1226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1227 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1228 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1229 } 1230 } 1231 #endif 1232 // Value of 'crit' should be good for using as a critical_id of the critical 1233 // section directive. 1234 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1235 1236 #if USE_ITT_BUILD 1237 __kmp_itt_critical_acquired(lck); 1238 #endif /* USE_ITT_BUILD */ 1239 #if OMPT_SUPPORT && OMPT_OPTIONAL 1240 if (ompt_enabled.enabled) { 1241 /* OMPT state update */ 1242 ti.state = prev_state; 1243 ti.wait_id = 0; 1244 1245 /* OMPT event callback */ 1246 if (ompt_enabled.ompt_callback_mutex_acquired) { 1247 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1248 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1249 } 1250 } 1251 #endif 1252 KMP_POP_PARTITIONED_TIMER(); 1253 1254 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1255 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1256 #endif // KMP_USE_DYNAMIC_LOCK 1257 } 1258 1259 #if KMP_USE_DYNAMIC_LOCK 1260 1261 // Converts the given hint to an internal lock implementation 1262 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1263 #if KMP_USE_TSX 1264 #define KMP_TSX_LOCK(seq) lockseq_##seq 1265 #else 1266 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1267 #endif 1268 1269 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1270 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1271 #else 1272 #define KMP_CPUINFO_RTM 0 1273 #endif 1274 1275 // Hints that do not require further logic 1276 if (hint & kmp_lock_hint_hle) 1277 return KMP_TSX_LOCK(hle); 1278 if (hint & kmp_lock_hint_rtm) 1279 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1280 if (hint & kmp_lock_hint_adaptive) 1281 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1282 1283 // Rule out conflicting hints first by returning the default lock 1284 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1285 return __kmp_user_lock_seq; 1286 if ((hint & omp_lock_hint_speculative) && 1287 (hint & omp_lock_hint_nonspeculative)) 1288 return __kmp_user_lock_seq; 1289 1290 // Do not even consider speculation when it appears to be contended 1291 if (hint & omp_lock_hint_contended) 1292 return lockseq_queuing; 1293 1294 // Uncontended lock without speculation 1295 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1296 return lockseq_tas; 1297 1298 // Use RTM lock for speculation 1299 if (hint & omp_lock_hint_speculative) 1300 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1301 1302 return __kmp_user_lock_seq; 1303 } 1304 1305 #if OMPT_SUPPORT && OMPT_OPTIONAL 1306 #if KMP_USE_DYNAMIC_LOCK 1307 static kmp_mutex_impl_t 1308 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1309 if (user_lock) { 1310 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1311 case 0: 1312 break; 1313 #if KMP_USE_FUTEX 1314 case locktag_futex: 1315 return kmp_mutex_impl_queuing; 1316 #endif 1317 case locktag_tas: 1318 return kmp_mutex_impl_spin; 1319 #if KMP_USE_TSX 1320 case locktag_hle: 1321 case locktag_rtm_spin: 1322 return kmp_mutex_impl_speculative; 1323 #endif 1324 default: 1325 return kmp_mutex_impl_none; 1326 } 1327 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1328 } 1329 KMP_ASSERT(ilock); 1330 switch (ilock->type) { 1331 #if KMP_USE_TSX 1332 case locktag_adaptive: 1333 case locktag_rtm_queuing: 1334 return kmp_mutex_impl_speculative; 1335 #endif 1336 case locktag_nested_tas: 1337 return kmp_mutex_impl_spin; 1338 #if KMP_USE_FUTEX 1339 case locktag_nested_futex: 1340 #endif 1341 case locktag_ticket: 1342 case locktag_queuing: 1343 case locktag_drdpa: 1344 case locktag_nested_ticket: 1345 case locktag_nested_queuing: 1346 case locktag_nested_drdpa: 1347 return kmp_mutex_impl_queuing; 1348 default: 1349 return kmp_mutex_impl_none; 1350 } 1351 } 1352 #else 1353 // For locks without dynamic binding 1354 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1355 switch (__kmp_user_lock_kind) { 1356 case lk_tas: 1357 return kmp_mutex_impl_spin; 1358 #if KMP_USE_FUTEX 1359 case lk_futex: 1360 #endif 1361 case lk_ticket: 1362 case lk_queuing: 1363 case lk_drdpa: 1364 return kmp_mutex_impl_queuing; 1365 #if KMP_USE_TSX 1366 case lk_hle: 1367 case lk_rtm_queuing: 1368 case lk_rtm_spin: 1369 case lk_adaptive: 1370 return kmp_mutex_impl_speculative; 1371 #endif 1372 default: 1373 return kmp_mutex_impl_none; 1374 } 1375 } 1376 #endif // KMP_USE_DYNAMIC_LOCK 1377 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1378 1379 /*! 1380 @ingroup WORK_SHARING 1381 @param loc source location information. 1382 @param global_tid global thread number. 1383 @param crit identity of the critical section. This could be a pointer to a lock 1384 associated with the critical section, or some other suitably unique value. 1385 @param hint the lock hint. 1386 1387 Enter code protected by a `critical` construct with a hint. The hint value is 1388 used to suggest a lock implementation. This function blocks until the executing 1389 thread can enter the critical section unless the hint suggests use of 1390 speculative execution and the hardware supports it. 1391 */ 1392 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1393 kmp_critical_name *crit, uint32_t hint) { 1394 KMP_COUNT_BLOCK(OMP_CRITICAL); 1395 kmp_user_lock_p lck; 1396 #if OMPT_SUPPORT && OMPT_OPTIONAL 1397 ompt_state_t prev_state = ompt_state_undefined; 1398 ompt_thread_info_t ti; 1399 // This is the case, if called from __kmpc_critical: 1400 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1401 if (!codeptr) 1402 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1403 #endif 1404 1405 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1406 __kmp_assert_valid_gtid(global_tid); 1407 1408 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1409 // Check if it is initialized. 1410 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1411 if (*lk == 0) { 1412 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1413 if (KMP_IS_D_LOCK(lckseq)) { 1414 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1415 KMP_GET_D_TAG(lckseq)); 1416 } else { 1417 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1418 } 1419 } 1420 // Branch for accessing the actual lock object and set operation. This 1421 // branching is inevitable since this lock initialization does not follow the 1422 // normal dispatch path (lock table is not used). 1423 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1424 lck = (kmp_user_lock_p)lk; 1425 if (__kmp_env_consistency_check) { 1426 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1427 __kmp_map_hint_to_lock(hint)); 1428 } 1429 #if USE_ITT_BUILD 1430 __kmp_itt_critical_acquiring(lck); 1431 #endif 1432 #if OMPT_SUPPORT && OMPT_OPTIONAL 1433 if (ompt_enabled.enabled) { 1434 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1435 /* OMPT state update */ 1436 prev_state = ti.state; 1437 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1438 ti.state = ompt_state_wait_critical; 1439 1440 /* OMPT event callback */ 1441 if (ompt_enabled.ompt_callback_mutex_acquire) { 1442 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1443 ompt_mutex_critical, (unsigned int)hint, 1444 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1445 codeptr); 1446 } 1447 } 1448 #endif 1449 #if KMP_USE_INLINED_TAS 1450 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1451 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1452 } else 1453 #elif KMP_USE_INLINED_FUTEX 1454 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1455 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1456 } else 1457 #endif 1458 { 1459 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1460 } 1461 } else { 1462 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1463 lck = ilk->lock; 1464 if (__kmp_env_consistency_check) { 1465 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1466 __kmp_map_hint_to_lock(hint)); 1467 } 1468 #if USE_ITT_BUILD 1469 __kmp_itt_critical_acquiring(lck); 1470 #endif 1471 #if OMPT_SUPPORT && OMPT_OPTIONAL 1472 if (ompt_enabled.enabled) { 1473 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1474 /* OMPT state update */ 1475 prev_state = ti.state; 1476 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1477 ti.state = ompt_state_wait_critical; 1478 1479 /* OMPT event callback */ 1480 if (ompt_enabled.ompt_callback_mutex_acquire) { 1481 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1482 ompt_mutex_critical, (unsigned int)hint, 1483 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1484 codeptr); 1485 } 1486 } 1487 #endif 1488 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1489 } 1490 KMP_POP_PARTITIONED_TIMER(); 1491 1492 #if USE_ITT_BUILD 1493 __kmp_itt_critical_acquired(lck); 1494 #endif /* USE_ITT_BUILD */ 1495 #if OMPT_SUPPORT && OMPT_OPTIONAL 1496 if (ompt_enabled.enabled) { 1497 /* OMPT state update */ 1498 ti.state = prev_state; 1499 ti.wait_id = 0; 1500 1501 /* OMPT event callback */ 1502 if (ompt_enabled.ompt_callback_mutex_acquired) { 1503 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1504 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1505 } 1506 } 1507 #endif 1508 1509 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1510 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1511 } // __kmpc_critical_with_hint 1512 1513 #endif // KMP_USE_DYNAMIC_LOCK 1514 1515 /*! 1516 @ingroup WORK_SHARING 1517 @param loc source location information. 1518 @param global_tid global thread number . 1519 @param crit identity of the critical section. This could be a pointer to a lock 1520 associated with the critical section, or some other suitably unique value. 1521 1522 Leave a critical section, releasing any lock that was held during its execution. 1523 */ 1524 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1525 kmp_critical_name *crit) { 1526 kmp_user_lock_p lck; 1527 1528 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1529 1530 #if KMP_USE_DYNAMIC_LOCK 1531 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1532 lck = (kmp_user_lock_p)crit; 1533 KMP_ASSERT(lck != NULL); 1534 if (__kmp_env_consistency_check) { 1535 __kmp_pop_sync(global_tid, ct_critical, loc); 1536 } 1537 #if USE_ITT_BUILD 1538 __kmp_itt_critical_releasing(lck); 1539 #endif 1540 #if KMP_USE_INLINED_TAS 1541 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1542 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1543 } else 1544 #elif KMP_USE_INLINED_FUTEX 1545 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1546 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1547 } else 1548 #endif 1549 { 1550 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1551 } 1552 } else { 1553 kmp_indirect_lock_t *ilk = 1554 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1555 KMP_ASSERT(ilk != NULL); 1556 lck = ilk->lock; 1557 if (__kmp_env_consistency_check) { 1558 __kmp_pop_sync(global_tid, ct_critical, loc); 1559 } 1560 #if USE_ITT_BUILD 1561 __kmp_itt_critical_releasing(lck); 1562 #endif 1563 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1564 } 1565 1566 #else // KMP_USE_DYNAMIC_LOCK 1567 1568 if ((__kmp_user_lock_kind == lk_tas) && 1569 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1570 lck = (kmp_user_lock_p)crit; 1571 } 1572 #if KMP_USE_FUTEX 1573 else if ((__kmp_user_lock_kind == lk_futex) && 1574 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1575 lck = (kmp_user_lock_p)crit; 1576 } 1577 #endif 1578 else { // ticket, queuing or drdpa 1579 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1580 } 1581 1582 KMP_ASSERT(lck != NULL); 1583 1584 if (__kmp_env_consistency_check) 1585 __kmp_pop_sync(global_tid, ct_critical, loc); 1586 1587 #if USE_ITT_BUILD 1588 __kmp_itt_critical_releasing(lck); 1589 #endif /* USE_ITT_BUILD */ 1590 // Value of 'crit' should be good for using as a critical_id of the critical 1591 // section directive. 1592 __kmp_release_user_lock_with_checks(lck, global_tid); 1593 1594 #endif // KMP_USE_DYNAMIC_LOCK 1595 1596 #if OMPT_SUPPORT && OMPT_OPTIONAL 1597 /* OMPT release event triggers after lock is released; place here to trigger 1598 * for all #if branches */ 1599 OMPT_STORE_RETURN_ADDRESS(global_tid); 1600 if (ompt_enabled.ompt_callback_mutex_released) { 1601 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1602 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1603 OMPT_LOAD_RETURN_ADDRESS(0)); 1604 } 1605 #endif 1606 1607 KMP_POP_PARTITIONED_TIMER(); 1608 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1609 } 1610 1611 /*! 1612 @ingroup SYNCHRONIZATION 1613 @param loc source location information 1614 @param global_tid thread id. 1615 @return one if the thread should execute the master block, zero otherwise 1616 1617 Start execution of a combined barrier and master. The barrier is executed inside 1618 this function. 1619 */ 1620 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1621 int status; 1622 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1623 __kmp_assert_valid_gtid(global_tid); 1624 1625 if (!TCR_4(__kmp_init_parallel)) 1626 __kmp_parallel_initialize(); 1627 1628 __kmp_resume_if_soft_paused(); 1629 1630 if (__kmp_env_consistency_check) 1631 __kmp_check_barrier(global_tid, ct_barrier, loc); 1632 1633 #if OMPT_SUPPORT 1634 ompt_frame_t *ompt_frame; 1635 if (ompt_enabled.enabled) { 1636 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1637 if (ompt_frame->enter_frame.ptr == NULL) 1638 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1639 } 1640 OMPT_STORE_RETURN_ADDRESS(global_tid); 1641 #endif 1642 #if USE_ITT_NOTIFY 1643 __kmp_threads[global_tid]->th.th_ident = loc; 1644 #endif 1645 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1646 #if OMPT_SUPPORT && OMPT_OPTIONAL 1647 if (ompt_enabled.enabled) { 1648 ompt_frame->enter_frame = ompt_data_none; 1649 } 1650 #endif 1651 1652 return (status != 0) ? 0 : 1; 1653 } 1654 1655 /*! 1656 @ingroup SYNCHRONIZATION 1657 @param loc source location information 1658 @param global_tid thread id. 1659 1660 Complete the execution of a combined barrier and master. This function should 1661 only be called at the completion of the <tt>master</tt> code. Other threads will 1662 still be waiting at the barrier and this call releases them. 1663 */ 1664 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1665 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1666 __kmp_assert_valid_gtid(global_tid); 1667 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1668 } 1669 1670 /*! 1671 @ingroup SYNCHRONIZATION 1672 @param loc source location information 1673 @param global_tid thread id. 1674 @return one if the thread should execute the master block, zero otherwise 1675 1676 Start execution of a combined barrier and master(nowait) construct. 1677 The barrier is executed inside this function. 1678 There is no equivalent "end" function, since the 1679 */ 1680 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1681 kmp_int32 ret; 1682 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1683 __kmp_assert_valid_gtid(global_tid); 1684 1685 if (!TCR_4(__kmp_init_parallel)) 1686 __kmp_parallel_initialize(); 1687 1688 __kmp_resume_if_soft_paused(); 1689 1690 if (__kmp_env_consistency_check) { 1691 if (loc == 0) { 1692 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1693 } 1694 __kmp_check_barrier(global_tid, ct_barrier, loc); 1695 } 1696 1697 #if OMPT_SUPPORT 1698 ompt_frame_t *ompt_frame; 1699 if (ompt_enabled.enabled) { 1700 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1701 if (ompt_frame->enter_frame.ptr == NULL) 1702 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1703 } 1704 OMPT_STORE_RETURN_ADDRESS(global_tid); 1705 #endif 1706 #if USE_ITT_NOTIFY 1707 __kmp_threads[global_tid]->th.th_ident = loc; 1708 #endif 1709 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1710 #if OMPT_SUPPORT && OMPT_OPTIONAL 1711 if (ompt_enabled.enabled) { 1712 ompt_frame->enter_frame = ompt_data_none; 1713 } 1714 #endif 1715 1716 ret = __kmpc_master(loc, global_tid); 1717 1718 if (__kmp_env_consistency_check) { 1719 /* there's no __kmpc_end_master called; so the (stats) */ 1720 /* actions of __kmpc_end_master are done here */ 1721 if (ret) { 1722 /* only one thread should do the pop since only */ 1723 /* one did the push (see __kmpc_master()) */ 1724 __kmp_pop_sync(global_tid, ct_master, loc); 1725 } 1726 } 1727 1728 return (ret); 1729 } 1730 1731 /* The BARRIER for a SINGLE process section is always explicit */ 1732 /*! 1733 @ingroup WORK_SHARING 1734 @param loc source location information 1735 @param global_tid global thread number 1736 @return One if this thread should execute the single construct, zero otherwise. 1737 1738 Test whether to execute a <tt>single</tt> construct. 1739 There are no implicit barriers in the two "single" calls, rather the compiler 1740 should introduce an explicit barrier if it is required. 1741 */ 1742 1743 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1744 __kmp_assert_valid_gtid(global_tid); 1745 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1746 1747 if (rc) { 1748 // We are going to execute the single statement, so we should count it. 1749 KMP_COUNT_BLOCK(OMP_SINGLE); 1750 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1751 } 1752 1753 #if OMPT_SUPPORT && OMPT_OPTIONAL 1754 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1755 kmp_team_t *team = this_thr->th.th_team; 1756 int tid = __kmp_tid_from_gtid(global_tid); 1757 1758 if (ompt_enabled.enabled) { 1759 if (rc) { 1760 if (ompt_enabled.ompt_callback_work) { 1761 ompt_callbacks.ompt_callback(ompt_callback_work)( 1762 ompt_work_single_executor, ompt_scope_begin, 1763 &(team->t.ompt_team_info.parallel_data), 1764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1765 1, OMPT_GET_RETURN_ADDRESS(0)); 1766 } 1767 } else { 1768 if (ompt_enabled.ompt_callback_work) { 1769 ompt_callbacks.ompt_callback(ompt_callback_work)( 1770 ompt_work_single_other, ompt_scope_begin, 1771 &(team->t.ompt_team_info.parallel_data), 1772 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1773 1, OMPT_GET_RETURN_ADDRESS(0)); 1774 ompt_callbacks.ompt_callback(ompt_callback_work)( 1775 ompt_work_single_other, ompt_scope_end, 1776 &(team->t.ompt_team_info.parallel_data), 1777 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1778 1, OMPT_GET_RETURN_ADDRESS(0)); 1779 } 1780 } 1781 } 1782 #endif 1783 1784 return rc; 1785 } 1786 1787 /*! 1788 @ingroup WORK_SHARING 1789 @param loc source location information 1790 @param global_tid global thread number 1791 1792 Mark the end of a <tt>single</tt> construct. This function should 1793 only be called by the thread that executed the block of code protected 1794 by the `single` construct. 1795 */ 1796 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1797 __kmp_assert_valid_gtid(global_tid); 1798 __kmp_exit_single(global_tid); 1799 KMP_POP_PARTITIONED_TIMER(); 1800 1801 #if OMPT_SUPPORT && OMPT_OPTIONAL 1802 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1803 kmp_team_t *team = this_thr->th.th_team; 1804 int tid = __kmp_tid_from_gtid(global_tid); 1805 1806 if (ompt_enabled.ompt_callback_work) { 1807 ompt_callbacks.ompt_callback(ompt_callback_work)( 1808 ompt_work_single_executor, ompt_scope_end, 1809 &(team->t.ompt_team_info.parallel_data), 1810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1811 OMPT_GET_RETURN_ADDRESS(0)); 1812 } 1813 #endif 1814 } 1815 1816 /*! 1817 @ingroup WORK_SHARING 1818 @param loc Source location 1819 @param global_tid Global thread id 1820 1821 Mark the end of a statically scheduled loop. 1822 */ 1823 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1824 KMP_POP_PARTITIONED_TIMER(); 1825 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1826 1827 #if OMPT_SUPPORT && OMPT_OPTIONAL 1828 if (ompt_enabled.ompt_callback_work) { 1829 ompt_work_t ompt_work_type = ompt_work_loop; 1830 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1831 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1832 // Determine workshare type 1833 if (loc != NULL) { 1834 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1835 ompt_work_type = ompt_work_loop; 1836 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1837 ompt_work_type = ompt_work_sections; 1838 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1839 ompt_work_type = ompt_work_distribute; 1840 } else { 1841 // use default set above. 1842 // a warning about this case is provided in __kmpc_for_static_init 1843 } 1844 KMP_DEBUG_ASSERT(ompt_work_type); 1845 } 1846 ompt_callbacks.ompt_callback(ompt_callback_work)( 1847 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1848 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1849 } 1850 #endif 1851 if (__kmp_env_consistency_check) 1852 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1853 } 1854 1855 // User routines which take C-style arguments (call by value) 1856 // different from the Fortran equivalent routines 1857 1858 void ompc_set_num_threads(int arg) { 1859 // !!!!! TODO: check the per-task binding 1860 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1861 } 1862 1863 void ompc_set_dynamic(int flag) { 1864 kmp_info_t *thread; 1865 1866 /* For the thread-private implementation of the internal controls */ 1867 thread = __kmp_entry_thread(); 1868 1869 __kmp_save_internal_controls(thread); 1870 1871 set__dynamic(thread, flag ? true : false); 1872 } 1873 1874 void ompc_set_nested(int flag) { 1875 kmp_info_t *thread; 1876 1877 /* For the thread-private internal controls implementation */ 1878 thread = __kmp_entry_thread(); 1879 1880 __kmp_save_internal_controls(thread); 1881 1882 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1883 } 1884 1885 void ompc_set_max_active_levels(int max_active_levels) { 1886 /* TO DO */ 1887 /* we want per-task implementation of this internal control */ 1888 1889 /* For the per-thread internal controls implementation */ 1890 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1891 } 1892 1893 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1894 // !!!!! TODO: check the per-task binding 1895 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1896 } 1897 1898 int ompc_get_ancestor_thread_num(int level) { 1899 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1900 } 1901 1902 int ompc_get_team_size(int level) { 1903 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1904 } 1905 1906 /* OpenMP 5.0 Affinity Format API */ 1907 1908 void ompc_set_affinity_format(char const *format) { 1909 if (!__kmp_init_serial) { 1910 __kmp_serial_initialize(); 1911 } 1912 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1913 format, KMP_STRLEN(format) + 1); 1914 } 1915 1916 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1917 size_t format_size; 1918 if (!__kmp_init_serial) { 1919 __kmp_serial_initialize(); 1920 } 1921 format_size = KMP_STRLEN(__kmp_affinity_format); 1922 if (buffer && size) { 1923 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1924 format_size + 1); 1925 } 1926 return format_size; 1927 } 1928 1929 void ompc_display_affinity(char const *format) { 1930 int gtid; 1931 if (!TCR_4(__kmp_init_middle)) { 1932 __kmp_middle_initialize(); 1933 } 1934 gtid = __kmp_get_gtid(); 1935 __kmp_aux_display_affinity(gtid, format); 1936 } 1937 1938 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1939 char const *format) { 1940 int gtid; 1941 size_t num_required; 1942 kmp_str_buf_t capture_buf; 1943 if (!TCR_4(__kmp_init_middle)) { 1944 __kmp_middle_initialize(); 1945 } 1946 gtid = __kmp_get_gtid(); 1947 __kmp_str_buf_init(&capture_buf); 1948 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1949 if (buffer && buf_size) { 1950 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1951 capture_buf.used + 1); 1952 } 1953 __kmp_str_buf_free(&capture_buf); 1954 return num_required; 1955 } 1956 1957 void kmpc_set_stacksize(int arg) { 1958 // __kmp_aux_set_stacksize initializes the library if needed 1959 __kmp_aux_set_stacksize(arg); 1960 } 1961 1962 void kmpc_set_stacksize_s(size_t arg) { 1963 // __kmp_aux_set_stacksize initializes the library if needed 1964 __kmp_aux_set_stacksize(arg); 1965 } 1966 1967 void kmpc_set_blocktime(int arg) { 1968 int gtid, tid; 1969 kmp_info_t *thread; 1970 1971 gtid = __kmp_entry_gtid(); 1972 tid = __kmp_tid_from_gtid(gtid); 1973 thread = __kmp_thread_from_gtid(gtid); 1974 1975 __kmp_aux_set_blocktime(arg, thread, tid); 1976 } 1977 1978 void kmpc_set_library(int arg) { 1979 // __kmp_user_set_library initializes the library if needed 1980 __kmp_user_set_library((enum library_type)arg); 1981 } 1982 1983 void kmpc_set_defaults(char const *str) { 1984 // __kmp_aux_set_defaults initializes the library if needed 1985 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1986 } 1987 1988 void kmpc_set_disp_num_buffers(int arg) { 1989 // ignore after initialization because some teams have already 1990 // allocated dispatch buffers 1991 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 1992 arg <= KMP_MAX_DISP_NUM_BUFF) { 1993 __kmp_dispatch_num_buffers = arg; 1994 } 1995 } 1996 1997 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1998 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1999 return -1; 2000 #else 2001 if (!TCR_4(__kmp_init_middle)) { 2002 __kmp_middle_initialize(); 2003 } 2004 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2005 #endif 2006 } 2007 2008 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2009 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2010 return -1; 2011 #else 2012 if (!TCR_4(__kmp_init_middle)) { 2013 __kmp_middle_initialize(); 2014 } 2015 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2016 #endif 2017 } 2018 2019 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2020 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2021 return -1; 2022 #else 2023 if (!TCR_4(__kmp_init_middle)) { 2024 __kmp_middle_initialize(); 2025 } 2026 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2027 #endif 2028 } 2029 2030 /* -------------------------------------------------------------------------- */ 2031 /*! 2032 @ingroup THREADPRIVATE 2033 @param loc source location information 2034 @param gtid global thread number 2035 @param cpy_size size of the cpy_data buffer 2036 @param cpy_data pointer to data to be copied 2037 @param cpy_func helper function to call for copying data 2038 @param didit flag variable: 1=single thread; 0=not single thread 2039 2040 __kmpc_copyprivate implements the interface for the private data broadcast 2041 needed for the copyprivate clause associated with a single region in an 2042 OpenMP<sup>*</sup> program (both C and Fortran). 2043 All threads participating in the parallel region call this routine. 2044 One of the threads (called the single thread) should have the <tt>didit</tt> 2045 variable set to 1 and all other threads should have that variable set to 0. 2046 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2047 2048 The OpenMP specification forbids the use of nowait on the single region when a 2049 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2050 barrier internally to avoid race conditions, so the code generation for the 2051 single region should avoid generating a barrier after the call to @ref 2052 __kmpc_copyprivate. 2053 2054 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2055 The <tt>loc</tt> parameter is a pointer to source location information. 2056 2057 Internal implementation: The single thread will first copy its descriptor 2058 address (cpy_data) to a team-private location, then the other threads will each 2059 call the function pointed to by the parameter cpy_func, which carries out the 2060 copy by copying the data using the cpy_data buffer. 2061 2062 The cpy_func routine used for the copy and the contents of the data area defined 2063 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2064 to be done. For instance, the cpy_data buffer can hold the actual data to be 2065 copied or it may hold a list of pointers to the data. The cpy_func routine must 2066 interpret the cpy_data buffer appropriately. 2067 2068 The interface to cpy_func is as follows: 2069 @code 2070 void cpy_func( void *destination, void *source ) 2071 @endcode 2072 where void *destination is the cpy_data pointer for the thread being copied to 2073 and void *source is the cpy_data pointer for the thread being copied from. 2074 */ 2075 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2076 void *cpy_data, void (*cpy_func)(void *, void *), 2077 kmp_int32 didit) { 2078 void **data_ptr; 2079 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2080 __kmp_assert_valid_gtid(gtid); 2081 2082 KMP_MB(); 2083 2084 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2085 2086 if (__kmp_env_consistency_check) { 2087 if (loc == 0) { 2088 KMP_WARNING(ConstructIdentInvalid); 2089 } 2090 } 2091 2092 // ToDo: Optimize the following two barriers into some kind of split barrier 2093 2094 if (didit) 2095 *data_ptr = cpy_data; 2096 2097 #if OMPT_SUPPORT 2098 ompt_frame_t *ompt_frame; 2099 if (ompt_enabled.enabled) { 2100 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2101 if (ompt_frame->enter_frame.ptr == NULL) 2102 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2103 } 2104 OMPT_STORE_RETURN_ADDRESS(gtid); 2105 #endif 2106 /* This barrier is not a barrier region boundary */ 2107 #if USE_ITT_NOTIFY 2108 __kmp_threads[gtid]->th.th_ident = loc; 2109 #endif 2110 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2111 2112 if (!didit) 2113 (*cpy_func)(cpy_data, *data_ptr); 2114 2115 // Consider next barrier a user-visible barrier for barrier region boundaries 2116 // Nesting checks are already handled by the single construct checks 2117 { 2118 #if OMPT_SUPPORT 2119 OMPT_STORE_RETURN_ADDRESS(gtid); 2120 #endif 2121 #if USE_ITT_NOTIFY 2122 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2123 // tasks can overwrite the location) 2124 #endif 2125 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2126 #if OMPT_SUPPORT && OMPT_OPTIONAL 2127 if (ompt_enabled.enabled) { 2128 ompt_frame->enter_frame = ompt_data_none; 2129 } 2130 #endif 2131 } 2132 } 2133 2134 /* -------------------------------------------------------------------------- */ 2135 2136 #define INIT_LOCK __kmp_init_user_lock_with_checks 2137 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2138 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2139 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2140 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2141 #define ACQUIRE_NESTED_LOCK_TIMED \ 2142 __kmp_acquire_nested_user_lock_with_checks_timed 2143 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2144 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2145 #define TEST_LOCK __kmp_test_user_lock_with_checks 2146 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2147 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2148 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2149 2150 // TODO: Make check abort messages use location info & pass it into 2151 // with_checks routines 2152 2153 #if KMP_USE_DYNAMIC_LOCK 2154 2155 // internal lock initializer 2156 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2157 kmp_dyna_lockseq_t seq) { 2158 if (KMP_IS_D_LOCK(seq)) { 2159 KMP_INIT_D_LOCK(lock, seq); 2160 #if USE_ITT_BUILD 2161 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2162 #endif 2163 } else { 2164 KMP_INIT_I_LOCK(lock, seq); 2165 #if USE_ITT_BUILD 2166 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2167 __kmp_itt_lock_creating(ilk->lock, loc); 2168 #endif 2169 } 2170 } 2171 2172 // internal nest lock initializer 2173 static __forceinline void 2174 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2175 kmp_dyna_lockseq_t seq) { 2176 #if KMP_USE_TSX 2177 // Don't have nested lock implementation for speculative locks 2178 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2179 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2180 seq = __kmp_user_lock_seq; 2181 #endif 2182 switch (seq) { 2183 case lockseq_tas: 2184 seq = lockseq_nested_tas; 2185 break; 2186 #if KMP_USE_FUTEX 2187 case lockseq_futex: 2188 seq = lockseq_nested_futex; 2189 break; 2190 #endif 2191 case lockseq_ticket: 2192 seq = lockseq_nested_ticket; 2193 break; 2194 case lockseq_queuing: 2195 seq = lockseq_nested_queuing; 2196 break; 2197 case lockseq_drdpa: 2198 seq = lockseq_nested_drdpa; 2199 break; 2200 default: 2201 seq = lockseq_nested_queuing; 2202 } 2203 KMP_INIT_I_LOCK(lock, seq); 2204 #if USE_ITT_BUILD 2205 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2206 __kmp_itt_lock_creating(ilk->lock, loc); 2207 #endif 2208 } 2209 2210 /* initialize the lock with a hint */ 2211 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2212 uintptr_t hint) { 2213 KMP_DEBUG_ASSERT(__kmp_init_serial); 2214 if (__kmp_env_consistency_check && user_lock == NULL) { 2215 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2216 } 2217 2218 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2219 2220 #if OMPT_SUPPORT && OMPT_OPTIONAL 2221 // This is the case, if called from omp_init_lock_with_hint: 2222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2223 if (!codeptr) 2224 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2225 if (ompt_enabled.ompt_callback_lock_init) { 2226 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2227 ompt_mutex_lock, (omp_lock_hint_t)hint, 2228 __ompt_get_mutex_impl_type(user_lock), 2229 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2230 } 2231 #endif 2232 } 2233 2234 /* initialize the lock with a hint */ 2235 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2236 void **user_lock, uintptr_t hint) { 2237 KMP_DEBUG_ASSERT(__kmp_init_serial); 2238 if (__kmp_env_consistency_check && user_lock == NULL) { 2239 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2240 } 2241 2242 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2243 2244 #if OMPT_SUPPORT && OMPT_OPTIONAL 2245 // This is the case, if called from omp_init_lock_with_hint: 2246 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2247 if (!codeptr) 2248 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2249 if (ompt_enabled.ompt_callback_lock_init) { 2250 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2251 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2252 __ompt_get_mutex_impl_type(user_lock), 2253 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2254 } 2255 #endif 2256 } 2257 2258 #endif // KMP_USE_DYNAMIC_LOCK 2259 2260 /* initialize the lock */ 2261 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2262 #if KMP_USE_DYNAMIC_LOCK 2263 2264 KMP_DEBUG_ASSERT(__kmp_init_serial); 2265 if (__kmp_env_consistency_check && user_lock == NULL) { 2266 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2267 } 2268 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2269 2270 #if OMPT_SUPPORT && OMPT_OPTIONAL 2271 // This is the case, if called from omp_init_lock_with_hint: 2272 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2273 if (!codeptr) 2274 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2275 if (ompt_enabled.ompt_callback_lock_init) { 2276 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2277 ompt_mutex_lock, omp_lock_hint_none, 2278 __ompt_get_mutex_impl_type(user_lock), 2279 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2280 } 2281 #endif 2282 2283 #else // KMP_USE_DYNAMIC_LOCK 2284 2285 static char const *const func = "omp_init_lock"; 2286 kmp_user_lock_p lck; 2287 KMP_DEBUG_ASSERT(__kmp_init_serial); 2288 2289 if (__kmp_env_consistency_check) { 2290 if (user_lock == NULL) { 2291 KMP_FATAL(LockIsUninitialized, func); 2292 } 2293 } 2294 2295 KMP_CHECK_USER_LOCK_INIT(); 2296 2297 if ((__kmp_user_lock_kind == lk_tas) && 2298 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2299 lck = (kmp_user_lock_p)user_lock; 2300 } 2301 #if KMP_USE_FUTEX 2302 else if ((__kmp_user_lock_kind == lk_futex) && 2303 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2304 lck = (kmp_user_lock_p)user_lock; 2305 } 2306 #endif 2307 else { 2308 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2309 } 2310 INIT_LOCK(lck); 2311 __kmp_set_user_lock_location(lck, loc); 2312 2313 #if OMPT_SUPPORT && OMPT_OPTIONAL 2314 // This is the case, if called from omp_init_lock_with_hint: 2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2316 if (!codeptr) 2317 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2318 if (ompt_enabled.ompt_callback_lock_init) { 2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2320 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2321 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2322 } 2323 #endif 2324 2325 #if USE_ITT_BUILD 2326 __kmp_itt_lock_creating(lck); 2327 #endif /* USE_ITT_BUILD */ 2328 2329 #endif // KMP_USE_DYNAMIC_LOCK 2330 } // __kmpc_init_lock 2331 2332 /* initialize the lock */ 2333 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2334 #if KMP_USE_DYNAMIC_LOCK 2335 2336 KMP_DEBUG_ASSERT(__kmp_init_serial); 2337 if (__kmp_env_consistency_check && user_lock == NULL) { 2338 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2339 } 2340 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2341 2342 #if OMPT_SUPPORT && OMPT_OPTIONAL 2343 // This is the case, if called from omp_init_lock_with_hint: 2344 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2345 if (!codeptr) 2346 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2347 if (ompt_enabled.ompt_callback_lock_init) { 2348 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2349 ompt_mutex_nest_lock, omp_lock_hint_none, 2350 __ompt_get_mutex_impl_type(user_lock), 2351 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2352 } 2353 #endif 2354 2355 #else // KMP_USE_DYNAMIC_LOCK 2356 2357 static char const *const func = "omp_init_nest_lock"; 2358 kmp_user_lock_p lck; 2359 KMP_DEBUG_ASSERT(__kmp_init_serial); 2360 2361 if (__kmp_env_consistency_check) { 2362 if (user_lock == NULL) { 2363 KMP_FATAL(LockIsUninitialized, func); 2364 } 2365 } 2366 2367 KMP_CHECK_USER_LOCK_INIT(); 2368 2369 if ((__kmp_user_lock_kind == lk_tas) && 2370 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2371 OMP_NEST_LOCK_T_SIZE)) { 2372 lck = (kmp_user_lock_p)user_lock; 2373 } 2374 #if KMP_USE_FUTEX 2375 else if ((__kmp_user_lock_kind == lk_futex) && 2376 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2377 OMP_NEST_LOCK_T_SIZE)) { 2378 lck = (kmp_user_lock_p)user_lock; 2379 } 2380 #endif 2381 else { 2382 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2383 } 2384 2385 INIT_NESTED_LOCK(lck); 2386 __kmp_set_user_lock_location(lck, loc); 2387 2388 #if OMPT_SUPPORT && OMPT_OPTIONAL 2389 // This is the case, if called from omp_init_lock_with_hint: 2390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2391 if (!codeptr) 2392 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2393 if (ompt_enabled.ompt_callback_lock_init) { 2394 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2395 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2396 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2397 } 2398 #endif 2399 2400 #if USE_ITT_BUILD 2401 __kmp_itt_lock_creating(lck); 2402 #endif /* USE_ITT_BUILD */ 2403 2404 #endif // KMP_USE_DYNAMIC_LOCK 2405 } // __kmpc_init_nest_lock 2406 2407 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2408 #if KMP_USE_DYNAMIC_LOCK 2409 2410 #if USE_ITT_BUILD 2411 kmp_user_lock_p lck; 2412 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2413 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2414 } else { 2415 lck = (kmp_user_lock_p)user_lock; 2416 } 2417 __kmp_itt_lock_destroyed(lck); 2418 #endif 2419 #if OMPT_SUPPORT && OMPT_OPTIONAL 2420 // This is the case, if called from omp_init_lock_with_hint: 2421 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2422 if (!codeptr) 2423 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2424 if (ompt_enabled.ompt_callback_lock_destroy) { 2425 kmp_user_lock_p lck; 2426 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2427 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2428 } else { 2429 lck = (kmp_user_lock_p)user_lock; 2430 } 2431 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2432 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2433 } 2434 #endif 2435 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2436 #else 2437 kmp_user_lock_p lck; 2438 2439 if ((__kmp_user_lock_kind == lk_tas) && 2440 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2441 lck = (kmp_user_lock_p)user_lock; 2442 } 2443 #if KMP_USE_FUTEX 2444 else if ((__kmp_user_lock_kind == lk_futex) && 2445 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2446 lck = (kmp_user_lock_p)user_lock; 2447 } 2448 #endif 2449 else { 2450 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2451 } 2452 2453 #if OMPT_SUPPORT && OMPT_OPTIONAL 2454 // This is the case, if called from omp_init_lock_with_hint: 2455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2456 if (!codeptr) 2457 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2458 if (ompt_enabled.ompt_callback_lock_destroy) { 2459 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2460 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2461 } 2462 #endif 2463 2464 #if USE_ITT_BUILD 2465 __kmp_itt_lock_destroyed(lck); 2466 #endif /* USE_ITT_BUILD */ 2467 DESTROY_LOCK(lck); 2468 2469 if ((__kmp_user_lock_kind == lk_tas) && 2470 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2471 ; 2472 } 2473 #if KMP_USE_FUTEX 2474 else if ((__kmp_user_lock_kind == lk_futex) && 2475 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2476 ; 2477 } 2478 #endif 2479 else { 2480 __kmp_user_lock_free(user_lock, gtid, lck); 2481 } 2482 #endif // KMP_USE_DYNAMIC_LOCK 2483 } // __kmpc_destroy_lock 2484 2485 /* destroy the lock */ 2486 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2487 #if KMP_USE_DYNAMIC_LOCK 2488 2489 #if USE_ITT_BUILD 2490 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2491 __kmp_itt_lock_destroyed(ilk->lock); 2492 #endif 2493 #if OMPT_SUPPORT && OMPT_OPTIONAL 2494 // This is the case, if called from omp_init_lock_with_hint: 2495 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2496 if (!codeptr) 2497 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2498 if (ompt_enabled.ompt_callback_lock_destroy) { 2499 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2500 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2501 } 2502 #endif 2503 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2504 2505 #else // KMP_USE_DYNAMIC_LOCK 2506 2507 kmp_user_lock_p lck; 2508 2509 if ((__kmp_user_lock_kind == lk_tas) && 2510 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2511 OMP_NEST_LOCK_T_SIZE)) { 2512 lck = (kmp_user_lock_p)user_lock; 2513 } 2514 #if KMP_USE_FUTEX 2515 else if ((__kmp_user_lock_kind == lk_futex) && 2516 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2517 OMP_NEST_LOCK_T_SIZE)) { 2518 lck = (kmp_user_lock_p)user_lock; 2519 } 2520 #endif 2521 else { 2522 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2523 } 2524 2525 #if OMPT_SUPPORT && OMPT_OPTIONAL 2526 // This is the case, if called from omp_init_lock_with_hint: 2527 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2528 if (!codeptr) 2529 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2530 if (ompt_enabled.ompt_callback_lock_destroy) { 2531 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2532 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2533 } 2534 #endif 2535 2536 #if USE_ITT_BUILD 2537 __kmp_itt_lock_destroyed(lck); 2538 #endif /* USE_ITT_BUILD */ 2539 2540 DESTROY_NESTED_LOCK(lck); 2541 2542 if ((__kmp_user_lock_kind == lk_tas) && 2543 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2544 OMP_NEST_LOCK_T_SIZE)) { 2545 ; 2546 } 2547 #if KMP_USE_FUTEX 2548 else if ((__kmp_user_lock_kind == lk_futex) && 2549 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2550 OMP_NEST_LOCK_T_SIZE)) { 2551 ; 2552 } 2553 #endif 2554 else { 2555 __kmp_user_lock_free(user_lock, gtid, lck); 2556 } 2557 #endif // KMP_USE_DYNAMIC_LOCK 2558 } // __kmpc_destroy_nest_lock 2559 2560 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2561 KMP_COUNT_BLOCK(OMP_set_lock); 2562 #if KMP_USE_DYNAMIC_LOCK 2563 int tag = KMP_EXTRACT_D_TAG(user_lock); 2564 #if USE_ITT_BUILD 2565 __kmp_itt_lock_acquiring( 2566 (kmp_user_lock_p) 2567 user_lock); // itt function will get to the right lock object. 2568 #endif 2569 #if OMPT_SUPPORT && OMPT_OPTIONAL 2570 // This is the case, if called from omp_init_lock_with_hint: 2571 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2572 if (!codeptr) 2573 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2574 if (ompt_enabled.ompt_callback_mutex_acquire) { 2575 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2576 ompt_mutex_lock, omp_lock_hint_none, 2577 __ompt_get_mutex_impl_type(user_lock), 2578 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2579 } 2580 #endif 2581 #if KMP_USE_INLINED_TAS 2582 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2583 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2584 } else 2585 #elif KMP_USE_INLINED_FUTEX 2586 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2587 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2588 } else 2589 #endif 2590 { 2591 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2592 } 2593 #if USE_ITT_BUILD 2594 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2595 #endif 2596 #if OMPT_SUPPORT && OMPT_OPTIONAL 2597 if (ompt_enabled.ompt_callback_mutex_acquired) { 2598 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2599 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2600 } 2601 #endif 2602 2603 #else // KMP_USE_DYNAMIC_LOCK 2604 2605 kmp_user_lock_p lck; 2606 2607 if ((__kmp_user_lock_kind == lk_tas) && 2608 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2609 lck = (kmp_user_lock_p)user_lock; 2610 } 2611 #if KMP_USE_FUTEX 2612 else if ((__kmp_user_lock_kind == lk_futex) && 2613 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2614 lck = (kmp_user_lock_p)user_lock; 2615 } 2616 #endif 2617 else { 2618 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2619 } 2620 2621 #if USE_ITT_BUILD 2622 __kmp_itt_lock_acquiring(lck); 2623 #endif /* USE_ITT_BUILD */ 2624 #if OMPT_SUPPORT && OMPT_OPTIONAL 2625 // This is the case, if called from omp_init_lock_with_hint: 2626 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2627 if (!codeptr) 2628 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2629 if (ompt_enabled.ompt_callback_mutex_acquire) { 2630 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2631 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2632 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2633 } 2634 #endif 2635 2636 ACQUIRE_LOCK(lck, gtid); 2637 2638 #if USE_ITT_BUILD 2639 __kmp_itt_lock_acquired(lck); 2640 #endif /* USE_ITT_BUILD */ 2641 2642 #if OMPT_SUPPORT && OMPT_OPTIONAL 2643 if (ompt_enabled.ompt_callback_mutex_acquired) { 2644 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2645 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2646 } 2647 #endif 2648 2649 #endif // KMP_USE_DYNAMIC_LOCK 2650 } 2651 2652 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2653 #if KMP_USE_DYNAMIC_LOCK 2654 2655 #if USE_ITT_BUILD 2656 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2657 #endif 2658 #if OMPT_SUPPORT && OMPT_OPTIONAL 2659 // This is the case, if called from omp_init_lock_with_hint: 2660 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2661 if (!codeptr) 2662 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2663 if (ompt_enabled.enabled) { 2664 if (ompt_enabled.ompt_callback_mutex_acquire) { 2665 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2666 ompt_mutex_nest_lock, omp_lock_hint_none, 2667 __ompt_get_mutex_impl_type(user_lock), 2668 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2669 } 2670 } 2671 #endif 2672 int acquire_status = 2673 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2674 (void)acquire_status; 2675 #if USE_ITT_BUILD 2676 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2677 #endif 2678 2679 #if OMPT_SUPPORT && OMPT_OPTIONAL 2680 if (ompt_enabled.enabled) { 2681 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2682 if (ompt_enabled.ompt_callback_mutex_acquired) { 2683 // lock_first 2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2685 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2686 codeptr); 2687 } 2688 } else { 2689 if (ompt_enabled.ompt_callback_nest_lock) { 2690 // lock_next 2691 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2692 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2693 } 2694 } 2695 } 2696 #endif 2697 2698 #else // KMP_USE_DYNAMIC_LOCK 2699 int acquire_status; 2700 kmp_user_lock_p lck; 2701 2702 if ((__kmp_user_lock_kind == lk_tas) && 2703 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2704 OMP_NEST_LOCK_T_SIZE)) { 2705 lck = (kmp_user_lock_p)user_lock; 2706 } 2707 #if KMP_USE_FUTEX 2708 else if ((__kmp_user_lock_kind == lk_futex) && 2709 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2710 OMP_NEST_LOCK_T_SIZE)) { 2711 lck = (kmp_user_lock_p)user_lock; 2712 } 2713 #endif 2714 else { 2715 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2716 } 2717 2718 #if USE_ITT_BUILD 2719 __kmp_itt_lock_acquiring(lck); 2720 #endif /* USE_ITT_BUILD */ 2721 #if OMPT_SUPPORT && OMPT_OPTIONAL 2722 // This is the case, if called from omp_init_lock_with_hint: 2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2724 if (!codeptr) 2725 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2726 if (ompt_enabled.enabled) { 2727 if (ompt_enabled.ompt_callback_mutex_acquire) { 2728 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2729 ompt_mutex_nest_lock, omp_lock_hint_none, 2730 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2731 codeptr); 2732 } 2733 } 2734 #endif 2735 2736 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2737 2738 #if USE_ITT_BUILD 2739 __kmp_itt_lock_acquired(lck); 2740 #endif /* USE_ITT_BUILD */ 2741 2742 #if OMPT_SUPPORT && OMPT_OPTIONAL 2743 if (ompt_enabled.enabled) { 2744 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2745 if (ompt_enabled.ompt_callback_mutex_acquired) { 2746 // lock_first 2747 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2748 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2749 } 2750 } else { 2751 if (ompt_enabled.ompt_callback_nest_lock) { 2752 // lock_next 2753 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2754 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2755 } 2756 } 2757 } 2758 #endif 2759 2760 #endif // KMP_USE_DYNAMIC_LOCK 2761 } 2762 2763 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2764 #if KMP_USE_DYNAMIC_LOCK 2765 2766 int tag = KMP_EXTRACT_D_TAG(user_lock); 2767 #if USE_ITT_BUILD 2768 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2769 #endif 2770 #if KMP_USE_INLINED_TAS 2771 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2772 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2773 } else 2774 #elif KMP_USE_INLINED_FUTEX 2775 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2776 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2777 } else 2778 #endif 2779 { 2780 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2781 } 2782 2783 #if OMPT_SUPPORT && OMPT_OPTIONAL 2784 // This is the case, if called from omp_init_lock_with_hint: 2785 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2786 if (!codeptr) 2787 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2788 if (ompt_enabled.ompt_callback_mutex_released) { 2789 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2790 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2791 } 2792 #endif 2793 2794 #else // KMP_USE_DYNAMIC_LOCK 2795 2796 kmp_user_lock_p lck; 2797 2798 /* Can't use serial interval since not block structured */ 2799 /* release the lock */ 2800 2801 if ((__kmp_user_lock_kind == lk_tas) && 2802 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2803 #if KMP_OS_LINUX && \ 2804 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2805 // "fast" path implemented to fix customer performance issue 2806 #if USE_ITT_BUILD 2807 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2808 #endif /* USE_ITT_BUILD */ 2809 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2810 KMP_MB(); 2811 2812 #if OMPT_SUPPORT && OMPT_OPTIONAL 2813 // This is the case, if called from omp_init_lock_with_hint: 2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2815 if (!codeptr) 2816 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2817 if (ompt_enabled.ompt_callback_mutex_released) { 2818 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2819 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2820 } 2821 #endif 2822 2823 return; 2824 #else 2825 lck = (kmp_user_lock_p)user_lock; 2826 #endif 2827 } 2828 #if KMP_USE_FUTEX 2829 else if ((__kmp_user_lock_kind == lk_futex) && 2830 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2831 lck = (kmp_user_lock_p)user_lock; 2832 } 2833 #endif 2834 else { 2835 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2836 } 2837 2838 #if USE_ITT_BUILD 2839 __kmp_itt_lock_releasing(lck); 2840 #endif /* USE_ITT_BUILD */ 2841 2842 RELEASE_LOCK(lck, gtid); 2843 2844 #if OMPT_SUPPORT && OMPT_OPTIONAL 2845 // This is the case, if called from omp_init_lock_with_hint: 2846 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2847 if (!codeptr) 2848 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2849 if (ompt_enabled.ompt_callback_mutex_released) { 2850 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2851 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2852 } 2853 #endif 2854 2855 #endif // KMP_USE_DYNAMIC_LOCK 2856 } 2857 2858 /* release the lock */ 2859 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2860 #if KMP_USE_DYNAMIC_LOCK 2861 2862 #if USE_ITT_BUILD 2863 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2864 #endif 2865 int release_status = 2866 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2867 (void)release_status; 2868 2869 #if OMPT_SUPPORT && OMPT_OPTIONAL 2870 // This is the case, if called from omp_init_lock_with_hint: 2871 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2872 if (!codeptr) 2873 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2874 if (ompt_enabled.enabled) { 2875 if (release_status == KMP_LOCK_RELEASED) { 2876 if (ompt_enabled.ompt_callback_mutex_released) { 2877 // release_lock_last 2878 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2879 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2880 codeptr); 2881 } 2882 } else if (ompt_enabled.ompt_callback_nest_lock) { 2883 // release_lock_prev 2884 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2885 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2886 } 2887 } 2888 #endif 2889 2890 #else // KMP_USE_DYNAMIC_LOCK 2891 2892 kmp_user_lock_p lck; 2893 2894 /* Can't use serial interval since not block structured */ 2895 2896 if ((__kmp_user_lock_kind == lk_tas) && 2897 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2898 OMP_NEST_LOCK_T_SIZE)) { 2899 #if KMP_OS_LINUX && \ 2900 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2901 // "fast" path implemented to fix customer performance issue 2902 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2903 #if USE_ITT_BUILD 2904 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2905 #endif /* USE_ITT_BUILD */ 2906 2907 #if OMPT_SUPPORT && OMPT_OPTIONAL 2908 int release_status = KMP_LOCK_STILL_HELD; 2909 #endif 2910 2911 if (--(tl->lk.depth_locked) == 0) { 2912 TCW_4(tl->lk.poll, 0); 2913 #if OMPT_SUPPORT && OMPT_OPTIONAL 2914 release_status = KMP_LOCK_RELEASED; 2915 #endif 2916 } 2917 KMP_MB(); 2918 2919 #if OMPT_SUPPORT && OMPT_OPTIONAL 2920 // This is the case, if called from omp_init_lock_with_hint: 2921 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2922 if (!codeptr) 2923 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2924 if (ompt_enabled.enabled) { 2925 if (release_status == KMP_LOCK_RELEASED) { 2926 if (ompt_enabled.ompt_callback_mutex_released) { 2927 // release_lock_last 2928 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2929 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2930 } 2931 } else if (ompt_enabled.ompt_callback_nest_lock) { 2932 // release_lock_previous 2933 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2934 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2935 } 2936 } 2937 #endif 2938 2939 return; 2940 #else 2941 lck = (kmp_user_lock_p)user_lock; 2942 #endif 2943 } 2944 #if KMP_USE_FUTEX 2945 else if ((__kmp_user_lock_kind == lk_futex) && 2946 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2947 OMP_NEST_LOCK_T_SIZE)) { 2948 lck = (kmp_user_lock_p)user_lock; 2949 } 2950 #endif 2951 else { 2952 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2953 } 2954 2955 #if USE_ITT_BUILD 2956 __kmp_itt_lock_releasing(lck); 2957 #endif /* USE_ITT_BUILD */ 2958 2959 int release_status; 2960 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2961 #if OMPT_SUPPORT && OMPT_OPTIONAL 2962 // This is the case, if called from omp_init_lock_with_hint: 2963 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2964 if (!codeptr) 2965 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2966 if (ompt_enabled.enabled) { 2967 if (release_status == KMP_LOCK_RELEASED) { 2968 if (ompt_enabled.ompt_callback_mutex_released) { 2969 // release_lock_last 2970 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2971 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2972 } 2973 } else if (ompt_enabled.ompt_callback_nest_lock) { 2974 // release_lock_previous 2975 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2976 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2977 } 2978 } 2979 #endif 2980 2981 #endif // KMP_USE_DYNAMIC_LOCK 2982 } 2983 2984 /* try to acquire the lock */ 2985 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2986 KMP_COUNT_BLOCK(OMP_test_lock); 2987 2988 #if KMP_USE_DYNAMIC_LOCK 2989 int rc; 2990 int tag = KMP_EXTRACT_D_TAG(user_lock); 2991 #if USE_ITT_BUILD 2992 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2993 #endif 2994 #if OMPT_SUPPORT && OMPT_OPTIONAL 2995 // This is the case, if called from omp_init_lock_with_hint: 2996 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2997 if (!codeptr) 2998 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2999 if (ompt_enabled.ompt_callback_mutex_acquire) { 3000 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3001 ompt_mutex_lock, omp_lock_hint_none, 3002 __ompt_get_mutex_impl_type(user_lock), 3003 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3004 } 3005 #endif 3006 #if KMP_USE_INLINED_TAS 3007 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3008 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3009 } else 3010 #elif KMP_USE_INLINED_FUTEX 3011 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3012 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3013 } else 3014 #endif 3015 { 3016 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3017 } 3018 if (rc) { 3019 #if USE_ITT_BUILD 3020 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3021 #endif 3022 #if OMPT_SUPPORT && OMPT_OPTIONAL 3023 if (ompt_enabled.ompt_callback_mutex_acquired) { 3024 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3025 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3026 } 3027 #endif 3028 return FTN_TRUE; 3029 } else { 3030 #if USE_ITT_BUILD 3031 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3032 #endif 3033 return FTN_FALSE; 3034 } 3035 3036 #else // KMP_USE_DYNAMIC_LOCK 3037 3038 kmp_user_lock_p lck; 3039 int rc; 3040 3041 if ((__kmp_user_lock_kind == lk_tas) && 3042 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3043 lck = (kmp_user_lock_p)user_lock; 3044 } 3045 #if KMP_USE_FUTEX 3046 else if ((__kmp_user_lock_kind == lk_futex) && 3047 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3048 lck = (kmp_user_lock_p)user_lock; 3049 } 3050 #endif 3051 else { 3052 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3053 } 3054 3055 #if USE_ITT_BUILD 3056 __kmp_itt_lock_acquiring(lck); 3057 #endif /* USE_ITT_BUILD */ 3058 #if OMPT_SUPPORT && OMPT_OPTIONAL 3059 // This is the case, if called from omp_init_lock_with_hint: 3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3061 if (!codeptr) 3062 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3063 if (ompt_enabled.ompt_callback_mutex_acquire) { 3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3065 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3066 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3067 } 3068 #endif 3069 3070 rc = TEST_LOCK(lck, gtid); 3071 #if USE_ITT_BUILD 3072 if (rc) { 3073 __kmp_itt_lock_acquired(lck); 3074 } else { 3075 __kmp_itt_lock_cancelled(lck); 3076 } 3077 #endif /* USE_ITT_BUILD */ 3078 #if OMPT_SUPPORT && OMPT_OPTIONAL 3079 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3081 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3082 } 3083 #endif 3084 3085 return (rc ? FTN_TRUE : FTN_FALSE); 3086 3087 /* Can't use serial interval since not block structured */ 3088 3089 #endif // KMP_USE_DYNAMIC_LOCK 3090 } 3091 3092 /* try to acquire the lock */ 3093 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3094 #if KMP_USE_DYNAMIC_LOCK 3095 int rc; 3096 #if USE_ITT_BUILD 3097 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3098 #endif 3099 #if OMPT_SUPPORT && OMPT_OPTIONAL 3100 // This is the case, if called from omp_init_lock_with_hint: 3101 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3102 if (!codeptr) 3103 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3104 if (ompt_enabled.ompt_callback_mutex_acquire) { 3105 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3106 ompt_mutex_nest_lock, omp_lock_hint_none, 3107 __ompt_get_mutex_impl_type(user_lock), 3108 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3109 } 3110 #endif 3111 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3112 #if USE_ITT_BUILD 3113 if (rc) { 3114 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3115 } else { 3116 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3117 } 3118 #endif 3119 #if OMPT_SUPPORT && OMPT_OPTIONAL 3120 if (ompt_enabled.enabled && rc) { 3121 if (rc == 1) { 3122 if (ompt_enabled.ompt_callback_mutex_acquired) { 3123 // lock_first 3124 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3125 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3126 codeptr); 3127 } 3128 } else { 3129 if (ompt_enabled.ompt_callback_nest_lock) { 3130 // lock_next 3131 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3132 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3133 } 3134 } 3135 } 3136 #endif 3137 return rc; 3138 3139 #else // KMP_USE_DYNAMIC_LOCK 3140 3141 kmp_user_lock_p lck; 3142 int rc; 3143 3144 if ((__kmp_user_lock_kind == lk_tas) && 3145 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3146 OMP_NEST_LOCK_T_SIZE)) { 3147 lck = (kmp_user_lock_p)user_lock; 3148 } 3149 #if KMP_USE_FUTEX 3150 else if ((__kmp_user_lock_kind == lk_futex) && 3151 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3152 OMP_NEST_LOCK_T_SIZE)) { 3153 lck = (kmp_user_lock_p)user_lock; 3154 } 3155 #endif 3156 else { 3157 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3158 } 3159 3160 #if USE_ITT_BUILD 3161 __kmp_itt_lock_acquiring(lck); 3162 #endif /* USE_ITT_BUILD */ 3163 3164 #if OMPT_SUPPORT && OMPT_OPTIONAL 3165 // This is the case, if called from omp_init_lock_with_hint: 3166 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3167 if (!codeptr) 3168 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3169 if (ompt_enabled.enabled) && 3170 ompt_enabled.ompt_callback_mutex_acquire) { 3171 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3172 ompt_mutex_nest_lock, omp_lock_hint_none, 3173 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3174 codeptr); 3175 } 3176 #endif 3177 3178 rc = TEST_NESTED_LOCK(lck, gtid); 3179 #if USE_ITT_BUILD 3180 if (rc) { 3181 __kmp_itt_lock_acquired(lck); 3182 } else { 3183 __kmp_itt_lock_cancelled(lck); 3184 } 3185 #endif /* USE_ITT_BUILD */ 3186 #if OMPT_SUPPORT && OMPT_OPTIONAL 3187 if (ompt_enabled.enabled && rc) { 3188 if (rc == 1) { 3189 if (ompt_enabled.ompt_callback_mutex_acquired) { 3190 // lock_first 3191 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3192 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3193 } 3194 } else { 3195 if (ompt_enabled.ompt_callback_nest_lock) { 3196 // lock_next 3197 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3198 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3199 } 3200 } 3201 } 3202 #endif 3203 return rc; 3204 3205 /* Can't use serial interval since not block structured */ 3206 3207 #endif // KMP_USE_DYNAMIC_LOCK 3208 } 3209 3210 // Interface to fast scalable reduce methods routines 3211 3212 // keep the selected method in a thread local structure for cross-function 3213 // usage: will be used in __kmpc_end_reduce* functions; 3214 // another solution: to re-determine the method one more time in 3215 // __kmpc_end_reduce* functions (new prototype required then) 3216 // AT: which solution is better? 3217 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3218 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3219 3220 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3221 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3222 3223 // description of the packed_reduction_method variable: look at the macros in 3224 // kmp.h 3225 3226 // used in a critical section reduce block 3227 static __forceinline void 3228 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3229 kmp_critical_name *crit) { 3230 3231 // this lock was visible to a customer and to the threading profile tool as a 3232 // serial overhead span (although it's used for an internal purpose only) 3233 // why was it visible in previous implementation? 3234 // should we keep it visible in new reduce block? 3235 kmp_user_lock_p lck; 3236 3237 #if KMP_USE_DYNAMIC_LOCK 3238 3239 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3240 // Check if it is initialized. 3241 if (*lk == 0) { 3242 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3243 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3244 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3245 } else { 3246 __kmp_init_indirect_csptr(crit, loc, global_tid, 3247 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3248 } 3249 } 3250 // Branch for accessing the actual lock object and set operation. This 3251 // branching is inevitable since this lock initialization does not follow the 3252 // normal dispatch path (lock table is not used). 3253 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3254 lck = (kmp_user_lock_p)lk; 3255 KMP_DEBUG_ASSERT(lck != NULL); 3256 if (__kmp_env_consistency_check) { 3257 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3258 } 3259 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3260 } else { 3261 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3262 lck = ilk->lock; 3263 KMP_DEBUG_ASSERT(lck != NULL); 3264 if (__kmp_env_consistency_check) { 3265 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3266 } 3267 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3268 } 3269 3270 #else // KMP_USE_DYNAMIC_LOCK 3271 3272 // We know that the fast reduction code is only emitted by Intel compilers 3273 // with 32 byte critical sections. If there isn't enough space, then we 3274 // have to use a pointer. 3275 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3276 lck = (kmp_user_lock_p)crit; 3277 } else { 3278 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3279 } 3280 KMP_DEBUG_ASSERT(lck != NULL); 3281 3282 if (__kmp_env_consistency_check) 3283 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3284 3285 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3286 3287 #endif // KMP_USE_DYNAMIC_LOCK 3288 } 3289 3290 // used in a critical section reduce block 3291 static __forceinline void 3292 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3293 kmp_critical_name *crit) { 3294 3295 kmp_user_lock_p lck; 3296 3297 #if KMP_USE_DYNAMIC_LOCK 3298 3299 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3300 lck = (kmp_user_lock_p)crit; 3301 if (__kmp_env_consistency_check) 3302 __kmp_pop_sync(global_tid, ct_critical, loc); 3303 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3304 } else { 3305 kmp_indirect_lock_t *ilk = 3306 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3307 if (__kmp_env_consistency_check) 3308 __kmp_pop_sync(global_tid, ct_critical, loc); 3309 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3310 } 3311 3312 #else // KMP_USE_DYNAMIC_LOCK 3313 3314 // We know that the fast reduction code is only emitted by Intel compilers 3315 // with 32 byte critical sections. If there isn't enough space, then we have 3316 // to use a pointer. 3317 if (__kmp_base_user_lock_size > 32) { 3318 lck = *((kmp_user_lock_p *)crit); 3319 KMP_ASSERT(lck != NULL); 3320 } else { 3321 lck = (kmp_user_lock_p)crit; 3322 } 3323 3324 if (__kmp_env_consistency_check) 3325 __kmp_pop_sync(global_tid, ct_critical, loc); 3326 3327 __kmp_release_user_lock_with_checks(lck, global_tid); 3328 3329 #endif // KMP_USE_DYNAMIC_LOCK 3330 } // __kmp_end_critical_section_reduce_block 3331 3332 static __forceinline int 3333 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3334 int *task_state) { 3335 kmp_team_t *team; 3336 3337 // Check if we are inside the teams construct? 3338 if (th->th.th_teams_microtask) { 3339 *team_p = team = th->th.th_team; 3340 if (team->t.t_level == th->th.th_teams_level) { 3341 // This is reduction at teams construct. 3342 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3343 // Let's swap teams temporarily for the reduction. 3344 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3345 th->th.th_team = team->t.t_parent; 3346 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3347 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3348 *task_state = th->th.th_task_state; 3349 th->th.th_task_state = 0; 3350 3351 return 1; 3352 } 3353 } 3354 return 0; 3355 } 3356 3357 static __forceinline void 3358 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3359 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3360 th->th.th_info.ds.ds_tid = 0; 3361 th->th.th_team = team; 3362 th->th.th_team_nproc = team->t.t_nproc; 3363 th->th.th_task_team = team->t.t_task_team[task_state]; 3364 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3365 } 3366 3367 /* 2.a.i. Reduce Block without a terminating barrier */ 3368 /*! 3369 @ingroup SYNCHRONIZATION 3370 @param loc source location information 3371 @param global_tid global thread number 3372 @param num_vars number of items (variables) to be reduced 3373 @param reduce_size size of data in bytes to be reduced 3374 @param reduce_data pointer to data to be reduced 3375 @param reduce_func callback function providing reduction operation on two 3376 operands and returning result of reduction in lhs_data 3377 @param lck pointer to the unique lock data structure 3378 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3379 threads if atomic reduction needed 3380 3381 The nowait version is used for a reduce clause with the nowait argument. 3382 */ 3383 kmp_int32 3384 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3385 size_t reduce_size, void *reduce_data, 3386 void (*reduce_func)(void *lhs_data, void *rhs_data), 3387 kmp_critical_name *lck) { 3388 3389 KMP_COUNT_BLOCK(REDUCE_nowait); 3390 int retval = 0; 3391 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3392 kmp_info_t *th; 3393 kmp_team_t *team; 3394 int teams_swapped = 0, task_state; 3395 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3396 __kmp_assert_valid_gtid(global_tid); 3397 3398 // why do we need this initialization here at all? 3399 // Reduction clause can not be used as a stand-alone directive. 3400 3401 // do not call __kmp_serial_initialize(), it will be called by 3402 // __kmp_parallel_initialize() if needed 3403 // possible detection of false-positive race by the threadchecker ??? 3404 if (!TCR_4(__kmp_init_parallel)) 3405 __kmp_parallel_initialize(); 3406 3407 __kmp_resume_if_soft_paused(); 3408 3409 // check correctness of reduce block nesting 3410 #if KMP_USE_DYNAMIC_LOCK 3411 if (__kmp_env_consistency_check) 3412 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3413 #else 3414 if (__kmp_env_consistency_check) 3415 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3416 #endif 3417 3418 th = __kmp_thread_from_gtid(global_tid); 3419 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3420 3421 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3422 // the value should be kept in a variable 3423 // the variable should be either a construct-specific or thread-specific 3424 // property, not a team specific property 3425 // (a thread can reach the next reduce block on the next construct, reduce 3426 // method may differ on the next construct) 3427 // an ident_t "loc" parameter could be used as a construct-specific property 3428 // (what if loc == 0?) 3429 // (if both construct-specific and team-specific variables were shared, 3430 // then unness extra syncs should be needed) 3431 // a thread-specific variable is better regarding two issues above (next 3432 // construct and extra syncs) 3433 // a thread-specific "th_local.reduction_method" variable is used currently 3434 // each thread executes 'determine' and 'set' lines (no need to execute by one 3435 // thread, to avoid unness extra syncs) 3436 3437 packed_reduction_method = __kmp_determine_reduction_method( 3438 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3439 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3440 3441 OMPT_REDUCTION_DECL(th, global_tid); 3442 if (packed_reduction_method == critical_reduce_block) { 3443 3444 OMPT_REDUCTION_BEGIN; 3445 3446 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3447 retval = 1; 3448 3449 } else if (packed_reduction_method == empty_reduce_block) { 3450 3451 OMPT_REDUCTION_BEGIN; 3452 3453 // usage: if team size == 1, no synchronization is required ( Intel 3454 // platforms only ) 3455 retval = 1; 3456 3457 } else if (packed_reduction_method == atomic_reduce_block) { 3458 3459 retval = 2; 3460 3461 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3462 // won't be called by the code gen) 3463 // (it's not quite good, because the checking block has been closed by 3464 // this 'pop', 3465 // but atomic operation has not been executed yet, will be executed 3466 // slightly later, literally on next instruction) 3467 if (__kmp_env_consistency_check) 3468 __kmp_pop_sync(global_tid, ct_reduce, loc); 3469 3470 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3471 tree_reduce_block)) { 3472 3473 // AT: performance issue: a real barrier here 3474 // AT: (if master goes slow, other threads are blocked here waiting for the 3475 // master to come and release them) 3476 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3477 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3478 // be confusing to a customer) 3479 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3480 // might go faster and be more in line with sense of NOWAIT 3481 // AT: TO DO: do epcc test and compare times 3482 3483 // this barrier should be invisible to a customer and to the threading profile 3484 // tool (it's neither a terminating barrier nor customer's code, it's 3485 // used for an internal purpose) 3486 #if OMPT_SUPPORT 3487 // JP: can this barrier potentially leed to task scheduling? 3488 // JP: as long as there is a barrier in the implementation, OMPT should and 3489 // will provide the barrier events 3490 // so we set-up the necessary frame/return addresses. 3491 ompt_frame_t *ompt_frame; 3492 if (ompt_enabled.enabled) { 3493 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3494 if (ompt_frame->enter_frame.ptr == NULL) 3495 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3496 } 3497 OMPT_STORE_RETURN_ADDRESS(global_tid); 3498 #endif 3499 #if USE_ITT_NOTIFY 3500 __kmp_threads[global_tid]->th.th_ident = loc; 3501 #endif 3502 retval = 3503 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3504 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3505 retval = (retval != 0) ? (0) : (1); 3506 #if OMPT_SUPPORT && OMPT_OPTIONAL 3507 if (ompt_enabled.enabled) { 3508 ompt_frame->enter_frame = ompt_data_none; 3509 } 3510 #endif 3511 3512 // all other workers except master should do this pop here 3513 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3514 if (__kmp_env_consistency_check) { 3515 if (retval == 0) { 3516 __kmp_pop_sync(global_tid, ct_reduce, loc); 3517 } 3518 } 3519 3520 } else { 3521 3522 // should never reach this block 3523 KMP_ASSERT(0); // "unexpected method" 3524 } 3525 if (teams_swapped) { 3526 __kmp_restore_swapped_teams(th, team, task_state); 3527 } 3528 KA_TRACE( 3529 10, 3530 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3531 global_tid, packed_reduction_method, retval)); 3532 3533 return retval; 3534 } 3535 3536 /*! 3537 @ingroup SYNCHRONIZATION 3538 @param loc source location information 3539 @param global_tid global thread id. 3540 @param lck pointer to the unique lock data structure 3541 3542 Finish the execution of a reduce nowait. 3543 */ 3544 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3545 kmp_critical_name *lck) { 3546 3547 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3548 3549 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3550 __kmp_assert_valid_gtid(global_tid); 3551 3552 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3553 3554 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3555 3556 if (packed_reduction_method == critical_reduce_block) { 3557 3558 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3559 OMPT_REDUCTION_END; 3560 3561 } else if (packed_reduction_method == empty_reduce_block) { 3562 3563 // usage: if team size == 1, no synchronization is required ( on Intel 3564 // platforms only ) 3565 3566 OMPT_REDUCTION_END; 3567 3568 } else if (packed_reduction_method == atomic_reduce_block) { 3569 3570 // neither master nor other workers should get here 3571 // (code gen does not generate this call in case 2: atomic reduce block) 3572 // actually it's better to remove this elseif at all; 3573 // after removal this value will checked by the 'else' and will assert 3574 3575 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3576 tree_reduce_block)) { 3577 3578 // only master gets here 3579 // OMPT: tree reduction is annotated in the barrier code 3580 3581 } else { 3582 3583 // should never reach this block 3584 KMP_ASSERT(0); // "unexpected method" 3585 } 3586 3587 if (__kmp_env_consistency_check) 3588 __kmp_pop_sync(global_tid, ct_reduce, loc); 3589 3590 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3591 global_tid, packed_reduction_method)); 3592 3593 return; 3594 } 3595 3596 /* 2.a.ii. Reduce Block with a terminating barrier */ 3597 3598 /*! 3599 @ingroup SYNCHRONIZATION 3600 @param loc source location information 3601 @param global_tid global thread number 3602 @param num_vars number of items (variables) to be reduced 3603 @param reduce_size size of data in bytes to be reduced 3604 @param reduce_data pointer to data to be reduced 3605 @param reduce_func callback function providing reduction operation on two 3606 operands and returning result of reduction in lhs_data 3607 @param lck pointer to the unique lock data structure 3608 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3609 threads if atomic reduction needed 3610 3611 A blocking reduce that includes an implicit barrier. 3612 */ 3613 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3614 size_t reduce_size, void *reduce_data, 3615 void (*reduce_func)(void *lhs_data, void *rhs_data), 3616 kmp_critical_name *lck) { 3617 KMP_COUNT_BLOCK(REDUCE_wait); 3618 int retval = 0; 3619 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3620 kmp_info_t *th; 3621 kmp_team_t *team; 3622 int teams_swapped = 0, task_state; 3623 3624 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3625 __kmp_assert_valid_gtid(global_tid); 3626 3627 // why do we need this initialization here at all? 3628 // Reduction clause can not be a stand-alone directive. 3629 3630 // do not call __kmp_serial_initialize(), it will be called by 3631 // __kmp_parallel_initialize() if needed 3632 // possible detection of false-positive race by the threadchecker ??? 3633 if (!TCR_4(__kmp_init_parallel)) 3634 __kmp_parallel_initialize(); 3635 3636 __kmp_resume_if_soft_paused(); 3637 3638 // check correctness of reduce block nesting 3639 #if KMP_USE_DYNAMIC_LOCK 3640 if (__kmp_env_consistency_check) 3641 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3642 #else 3643 if (__kmp_env_consistency_check) 3644 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3645 #endif 3646 3647 th = __kmp_thread_from_gtid(global_tid); 3648 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3649 3650 packed_reduction_method = __kmp_determine_reduction_method( 3651 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3652 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3653 3654 OMPT_REDUCTION_DECL(th, global_tid); 3655 3656 if (packed_reduction_method == critical_reduce_block) { 3657 3658 OMPT_REDUCTION_BEGIN; 3659 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3660 retval = 1; 3661 3662 } else if (packed_reduction_method == empty_reduce_block) { 3663 3664 OMPT_REDUCTION_BEGIN; 3665 // usage: if team size == 1, no synchronization is required ( Intel 3666 // platforms only ) 3667 retval = 1; 3668 3669 } else if (packed_reduction_method == atomic_reduce_block) { 3670 3671 retval = 2; 3672 3673 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3674 tree_reduce_block)) { 3675 3676 // case tree_reduce_block: 3677 // this barrier should be visible to a customer and to the threading profile 3678 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3679 #if OMPT_SUPPORT 3680 ompt_frame_t *ompt_frame; 3681 if (ompt_enabled.enabled) { 3682 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3683 if (ompt_frame->enter_frame.ptr == NULL) 3684 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3685 } 3686 OMPT_STORE_RETURN_ADDRESS(global_tid); 3687 #endif 3688 #if USE_ITT_NOTIFY 3689 __kmp_threads[global_tid]->th.th_ident = 3690 loc; // needed for correct notification of frames 3691 #endif 3692 retval = 3693 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3694 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3695 retval = (retval != 0) ? (0) : (1); 3696 #if OMPT_SUPPORT && OMPT_OPTIONAL 3697 if (ompt_enabled.enabled) { 3698 ompt_frame->enter_frame = ompt_data_none; 3699 } 3700 #endif 3701 3702 // all other workers except master should do this pop here 3703 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3704 if (__kmp_env_consistency_check) { 3705 if (retval == 0) { // 0: all other workers; 1: master 3706 __kmp_pop_sync(global_tid, ct_reduce, loc); 3707 } 3708 } 3709 3710 } else { 3711 3712 // should never reach this block 3713 KMP_ASSERT(0); // "unexpected method" 3714 } 3715 if (teams_swapped) { 3716 __kmp_restore_swapped_teams(th, team, task_state); 3717 } 3718 3719 KA_TRACE(10, 3720 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3721 global_tid, packed_reduction_method, retval)); 3722 return retval; 3723 } 3724 3725 /*! 3726 @ingroup SYNCHRONIZATION 3727 @param loc source location information 3728 @param global_tid global thread id. 3729 @param lck pointer to the unique lock data structure 3730 3731 Finish the execution of a blocking reduce. 3732 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3733 start function. 3734 */ 3735 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3736 kmp_critical_name *lck) { 3737 3738 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3739 kmp_info_t *th; 3740 kmp_team_t *team; 3741 int teams_swapped = 0, task_state; 3742 3743 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3744 __kmp_assert_valid_gtid(global_tid); 3745 3746 th = __kmp_thread_from_gtid(global_tid); 3747 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3748 3749 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3750 3751 // this barrier should be visible to a customer and to the threading profile 3752 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3753 OMPT_REDUCTION_DECL(th, global_tid); 3754 3755 if (packed_reduction_method == critical_reduce_block) { 3756 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3757 3758 OMPT_REDUCTION_END; 3759 3760 // TODO: implicit barrier: should be exposed 3761 #if OMPT_SUPPORT 3762 ompt_frame_t *ompt_frame; 3763 if (ompt_enabled.enabled) { 3764 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3765 if (ompt_frame->enter_frame.ptr == NULL) 3766 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3767 } 3768 OMPT_STORE_RETURN_ADDRESS(global_tid); 3769 #endif 3770 #if USE_ITT_NOTIFY 3771 __kmp_threads[global_tid]->th.th_ident = loc; 3772 #endif 3773 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3774 #if OMPT_SUPPORT && OMPT_OPTIONAL 3775 if (ompt_enabled.enabled) { 3776 ompt_frame->enter_frame = ompt_data_none; 3777 } 3778 #endif 3779 3780 } else if (packed_reduction_method == empty_reduce_block) { 3781 3782 OMPT_REDUCTION_END; 3783 3784 // usage: if team size==1, no synchronization is required (Intel platforms only) 3785 3786 // TODO: implicit barrier: should be exposed 3787 #if OMPT_SUPPORT 3788 ompt_frame_t *ompt_frame; 3789 if (ompt_enabled.enabled) { 3790 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3791 if (ompt_frame->enter_frame.ptr == NULL) 3792 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3793 } 3794 OMPT_STORE_RETURN_ADDRESS(global_tid); 3795 #endif 3796 #if USE_ITT_NOTIFY 3797 __kmp_threads[global_tid]->th.th_ident = loc; 3798 #endif 3799 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3800 #if OMPT_SUPPORT && OMPT_OPTIONAL 3801 if (ompt_enabled.enabled) { 3802 ompt_frame->enter_frame = ompt_data_none; 3803 } 3804 #endif 3805 3806 } else if (packed_reduction_method == atomic_reduce_block) { 3807 3808 #if OMPT_SUPPORT 3809 ompt_frame_t *ompt_frame; 3810 if (ompt_enabled.enabled) { 3811 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3812 if (ompt_frame->enter_frame.ptr == NULL) 3813 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3814 } 3815 OMPT_STORE_RETURN_ADDRESS(global_tid); 3816 #endif 3817 // TODO: implicit barrier: should be exposed 3818 #if USE_ITT_NOTIFY 3819 __kmp_threads[global_tid]->th.th_ident = loc; 3820 #endif 3821 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3822 #if OMPT_SUPPORT && OMPT_OPTIONAL 3823 if (ompt_enabled.enabled) { 3824 ompt_frame->enter_frame = ompt_data_none; 3825 } 3826 #endif 3827 3828 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3829 tree_reduce_block)) { 3830 3831 // only master executes here (master releases all other workers) 3832 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3833 global_tid); 3834 3835 } else { 3836 3837 // should never reach this block 3838 KMP_ASSERT(0); // "unexpected method" 3839 } 3840 if (teams_swapped) { 3841 __kmp_restore_swapped_teams(th, team, task_state); 3842 } 3843 3844 if (__kmp_env_consistency_check) 3845 __kmp_pop_sync(global_tid, ct_reduce, loc); 3846 3847 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3848 global_tid, packed_reduction_method)); 3849 3850 return; 3851 } 3852 3853 #undef __KMP_GET_REDUCTION_METHOD 3854 #undef __KMP_SET_REDUCTION_METHOD 3855 3856 /* end of interface to fast scalable reduce routines */ 3857 3858 kmp_uint64 __kmpc_get_taskid() { 3859 3860 kmp_int32 gtid; 3861 kmp_info_t *thread; 3862 3863 gtid = __kmp_get_gtid(); 3864 if (gtid < 0) { 3865 return 0; 3866 } 3867 thread = __kmp_thread_from_gtid(gtid); 3868 return thread->th.th_current_task->td_task_id; 3869 3870 } // __kmpc_get_taskid 3871 3872 kmp_uint64 __kmpc_get_parent_taskid() { 3873 3874 kmp_int32 gtid; 3875 kmp_info_t *thread; 3876 kmp_taskdata_t *parent_task; 3877 3878 gtid = __kmp_get_gtid(); 3879 if (gtid < 0) { 3880 return 0; 3881 } 3882 thread = __kmp_thread_from_gtid(gtid); 3883 parent_task = thread->th.th_current_task->td_parent; 3884 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3885 3886 } // __kmpc_get_parent_taskid 3887 3888 /*! 3889 @ingroup WORK_SHARING 3890 @param loc source location information. 3891 @param gtid global thread number. 3892 @param num_dims number of associated doacross loops. 3893 @param dims info on loops bounds. 3894 3895 Initialize doacross loop information. 3896 Expect compiler send us inclusive bounds, 3897 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3898 */ 3899 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3900 const struct kmp_dim *dims) { 3901 __kmp_assert_valid_gtid(gtid); 3902 int j, idx; 3903 kmp_int64 last, trace_count; 3904 kmp_info_t *th = __kmp_threads[gtid]; 3905 kmp_team_t *team = th->th.th_team; 3906 kmp_uint32 *flags; 3907 kmp_disp_t *pr_buf = th->th.th_dispatch; 3908 dispatch_shared_info_t *sh_buf; 3909 3910 KA_TRACE( 3911 20, 3912 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3913 gtid, num_dims, !team->t.t_serialized)); 3914 KMP_DEBUG_ASSERT(dims != NULL); 3915 KMP_DEBUG_ASSERT(num_dims > 0); 3916 3917 if (team->t.t_serialized) { 3918 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3919 return; // no dependencies if team is serialized 3920 } 3921 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3922 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3923 // the next loop 3924 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3925 3926 // Save bounds info into allocated private buffer 3927 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3928 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3929 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3930 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3931 pr_buf->th_doacross_info[0] = 3932 (kmp_int64)num_dims; // first element is number of dimensions 3933 // Save also address of num_done in order to access it later without knowing 3934 // the buffer index 3935 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3936 pr_buf->th_doacross_info[2] = dims[0].lo; 3937 pr_buf->th_doacross_info[3] = dims[0].up; 3938 pr_buf->th_doacross_info[4] = dims[0].st; 3939 last = 5; 3940 for (j = 1; j < num_dims; ++j) { 3941 kmp_int64 3942 range_length; // To keep ranges of all dimensions but the first dims[0] 3943 if (dims[j].st == 1) { // most common case 3944 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3945 range_length = dims[j].up - dims[j].lo + 1; 3946 } else { 3947 if (dims[j].st > 0) { 3948 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3949 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3950 } else { // negative increment 3951 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3952 range_length = 3953 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3954 } 3955 } 3956 pr_buf->th_doacross_info[last++] = range_length; 3957 pr_buf->th_doacross_info[last++] = dims[j].lo; 3958 pr_buf->th_doacross_info[last++] = dims[j].up; 3959 pr_buf->th_doacross_info[last++] = dims[j].st; 3960 } 3961 3962 // Compute total trip count. 3963 // Start with range of dims[0] which we don't need to keep in the buffer. 3964 if (dims[0].st == 1) { // most common case 3965 trace_count = dims[0].up - dims[0].lo + 1; 3966 } else if (dims[0].st > 0) { 3967 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3968 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3969 } else { // negative increment 3970 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3971 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3972 } 3973 for (j = 1; j < num_dims; ++j) { 3974 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3975 } 3976 KMP_DEBUG_ASSERT(trace_count > 0); 3977 3978 // Check if shared buffer is not occupied by other loop (idx - 3979 // __kmp_dispatch_num_buffers) 3980 if (idx != sh_buf->doacross_buf_idx) { 3981 // Shared buffer is occupied, wait for it to be free 3982 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3983 __kmp_eq_4, NULL); 3984 } 3985 #if KMP_32_BIT_ARCH 3986 // Check if we are the first thread. After the CAS the first thread gets 0, 3987 // others get 1 if initialization is in progress, allocated pointer otherwise. 3988 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3989 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3990 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3991 #else 3992 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3993 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3994 #endif 3995 if (flags == NULL) { 3996 // we are the first thread, allocate the array of flags 3997 size_t size = 3998 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 3999 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4000 KMP_MB(); 4001 sh_buf->doacross_flags = flags; 4002 } else if (flags == (kmp_uint32 *)1) { 4003 #if KMP_32_BIT_ARCH 4004 // initialization is still in progress, need to wait 4005 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4006 #else 4007 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4008 #endif 4009 KMP_YIELD(TRUE); 4010 KMP_MB(); 4011 } else { 4012 KMP_MB(); 4013 } 4014 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4015 pr_buf->th_doacross_flags = 4016 sh_buf->doacross_flags; // save private copy in order to not 4017 // touch shared buffer on each iteration 4018 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4019 } 4020 4021 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4022 __kmp_assert_valid_gtid(gtid); 4023 kmp_int64 shft; 4024 size_t num_dims, i; 4025 kmp_uint32 flag; 4026 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4027 kmp_info_t *th = __kmp_threads[gtid]; 4028 kmp_team_t *team = th->th.th_team; 4029 kmp_disp_t *pr_buf; 4030 kmp_int64 lo, up, st; 4031 4032 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4033 if (team->t.t_serialized) { 4034 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4035 return; // no dependencies if team is serialized 4036 } 4037 4038 // calculate sequential iteration number and check out-of-bounds condition 4039 pr_buf = th->th.th_dispatch; 4040 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4041 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4042 lo = pr_buf->th_doacross_info[2]; 4043 up = pr_buf->th_doacross_info[3]; 4044 st = pr_buf->th_doacross_info[4]; 4045 #if OMPT_SUPPORT && OMPT_OPTIONAL 4046 ompt_dependence_t deps[num_dims]; 4047 #endif 4048 if (st == 1) { // most common case 4049 if (vec[0] < lo || vec[0] > up) { 4050 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4051 "bounds [%lld,%lld]\n", 4052 gtid, vec[0], lo, up)); 4053 return; 4054 } 4055 iter_number = vec[0] - lo; 4056 } else if (st > 0) { 4057 if (vec[0] < lo || vec[0] > up) { 4058 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4059 "bounds [%lld,%lld]\n", 4060 gtid, vec[0], lo, up)); 4061 return; 4062 } 4063 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4064 } else { // negative increment 4065 if (vec[0] > lo || vec[0] < up) { 4066 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4067 "bounds [%lld,%lld]\n", 4068 gtid, vec[0], lo, up)); 4069 return; 4070 } 4071 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4072 } 4073 #if OMPT_SUPPORT && OMPT_OPTIONAL 4074 deps[0].variable.value = iter_number; 4075 deps[0].dependence_type = ompt_dependence_type_sink; 4076 #endif 4077 for (i = 1; i < num_dims; ++i) { 4078 kmp_int64 iter, ln; 4079 size_t j = i * 4; 4080 ln = pr_buf->th_doacross_info[j + 1]; 4081 lo = pr_buf->th_doacross_info[j + 2]; 4082 up = pr_buf->th_doacross_info[j + 3]; 4083 st = pr_buf->th_doacross_info[j + 4]; 4084 if (st == 1) { 4085 if (vec[i] < lo || vec[i] > up) { 4086 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4087 "bounds [%lld,%lld]\n", 4088 gtid, vec[i], lo, up)); 4089 return; 4090 } 4091 iter = vec[i] - lo; 4092 } else if (st > 0) { 4093 if (vec[i] < lo || vec[i] > up) { 4094 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4095 "bounds [%lld,%lld]\n", 4096 gtid, vec[i], lo, up)); 4097 return; 4098 } 4099 iter = (kmp_uint64)(vec[i] - lo) / st; 4100 } else { // st < 0 4101 if (vec[i] > lo || vec[i] < up) { 4102 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4103 "bounds [%lld,%lld]\n", 4104 gtid, vec[i], lo, up)); 4105 return; 4106 } 4107 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4108 } 4109 iter_number = iter + ln * iter_number; 4110 #if OMPT_SUPPORT && OMPT_OPTIONAL 4111 deps[i].variable.value = iter; 4112 deps[i].dependence_type = ompt_dependence_type_sink; 4113 #endif 4114 } 4115 shft = iter_number % 32; // use 32-bit granularity 4116 iter_number >>= 5; // divided by 32 4117 flag = 1 << shft; 4118 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4119 KMP_YIELD(TRUE); 4120 } 4121 KMP_MB(); 4122 #if OMPT_SUPPORT && OMPT_OPTIONAL 4123 if (ompt_enabled.ompt_callback_dependences) { 4124 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4125 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4126 } 4127 #endif 4128 KA_TRACE(20, 4129 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4130 gtid, (iter_number << 5) + shft)); 4131 } 4132 4133 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4134 __kmp_assert_valid_gtid(gtid); 4135 kmp_int64 shft; 4136 size_t num_dims, i; 4137 kmp_uint32 flag; 4138 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4139 kmp_info_t *th = __kmp_threads[gtid]; 4140 kmp_team_t *team = th->th.th_team; 4141 kmp_disp_t *pr_buf; 4142 kmp_int64 lo, st; 4143 4144 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4145 if (team->t.t_serialized) { 4146 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4147 return; // no dependencies if team is serialized 4148 } 4149 4150 // calculate sequential iteration number (same as in "wait" but no 4151 // out-of-bounds checks) 4152 pr_buf = th->th.th_dispatch; 4153 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4154 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4155 lo = pr_buf->th_doacross_info[2]; 4156 st = pr_buf->th_doacross_info[4]; 4157 #if OMPT_SUPPORT && OMPT_OPTIONAL 4158 ompt_dependence_t deps[num_dims]; 4159 #endif 4160 if (st == 1) { // most common case 4161 iter_number = vec[0] - lo; 4162 } else if (st > 0) { 4163 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4164 } else { // negative increment 4165 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4166 } 4167 #if OMPT_SUPPORT && OMPT_OPTIONAL 4168 deps[0].variable.value = iter_number; 4169 deps[0].dependence_type = ompt_dependence_type_source; 4170 #endif 4171 for (i = 1; i < num_dims; ++i) { 4172 kmp_int64 iter, ln; 4173 size_t j = i * 4; 4174 ln = pr_buf->th_doacross_info[j + 1]; 4175 lo = pr_buf->th_doacross_info[j + 2]; 4176 st = pr_buf->th_doacross_info[j + 4]; 4177 if (st == 1) { 4178 iter = vec[i] - lo; 4179 } else if (st > 0) { 4180 iter = (kmp_uint64)(vec[i] - lo) / st; 4181 } else { // st < 0 4182 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4183 } 4184 iter_number = iter + ln * iter_number; 4185 #if OMPT_SUPPORT && OMPT_OPTIONAL 4186 deps[i].variable.value = iter; 4187 deps[i].dependence_type = ompt_dependence_type_source; 4188 #endif 4189 } 4190 #if OMPT_SUPPORT && OMPT_OPTIONAL 4191 if (ompt_enabled.ompt_callback_dependences) { 4192 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4193 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4194 } 4195 #endif 4196 shft = iter_number % 32; // use 32-bit granularity 4197 iter_number >>= 5; // divided by 32 4198 flag = 1 << shft; 4199 KMP_MB(); 4200 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4201 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4202 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4203 (iter_number << 5) + shft)); 4204 } 4205 4206 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4207 __kmp_assert_valid_gtid(gtid); 4208 kmp_int32 num_done; 4209 kmp_info_t *th = __kmp_threads[gtid]; 4210 kmp_team_t *team = th->th.th_team; 4211 kmp_disp_t *pr_buf = th->th.th_dispatch; 4212 4213 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4214 if (team->t.t_serialized) { 4215 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4216 return; // nothing to do 4217 } 4218 num_done = 4219 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4220 if (num_done == th->th.th_team_nproc) { 4221 // we are the last thread, need to free shared resources 4222 int idx = pr_buf->th_doacross_buf_idx - 1; 4223 dispatch_shared_info_t *sh_buf = 4224 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4225 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4226 (kmp_int64)&sh_buf->doacross_num_done); 4227 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4228 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4229 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4230 sh_buf->doacross_flags = NULL; 4231 sh_buf->doacross_num_done = 0; 4232 sh_buf->doacross_buf_idx += 4233 __kmp_dispatch_num_buffers; // free buffer for future re-use 4234 } 4235 // free private resources (need to keep buffer index forever) 4236 pr_buf->th_doacross_flags = NULL; 4237 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4238 pr_buf->th_doacross_info = NULL; 4239 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4240 } 4241 4242 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4243 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4244 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4245 } 4246 4247 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4248 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4249 } 4250 4251 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4252 omp_allocator_handle_t free_allocator) { 4253 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4254 free_allocator); 4255 } 4256 4257 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4258 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4259 } 4260 4261 int __kmpc_get_target_offload(void) { 4262 if (!__kmp_init_serial) { 4263 __kmp_serial_initialize(); 4264 } 4265 return __kmp_target_offload; 4266 } 4267 4268 int __kmpc_pause_resource(kmp_pause_status_t level) { 4269 if (!__kmp_init_serial) { 4270 return 1; // Can't pause if runtime is not initialized 4271 } 4272 return __kmp_pause_resource(level); 4273 } 4274