1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the primary thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param global_tid global thread number 358 @param num_teams_lo lower bound on number of teams requested for the teams 359 construct 360 @param num_teams_up upper bound on number of teams requested for the teams 361 construct 362 @param num_threads number of threads per team requested for the teams construct 363 364 Set the number of teams to be used by the teams construct. The number of initial 365 teams cretaed will be greater than or equal to the lower bound and less than or 366 equal to the upper bound. 367 This call is only required if the teams construct has a `num_teams` clause 368 or a `thread_limit` clause (or both). 369 */ 370 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 371 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 372 kmp_int32 num_threads) { 373 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 374 " num_teams_ub=%d num_threads=%d\n", 375 global_tid, num_teams_lb, num_teams_ub, num_threads)); 376 __kmp_assert_valid_gtid(global_tid); 377 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 378 num_threads); 379 } 380 381 /*! 382 @ingroup PARALLEL 383 @param loc source location information 384 @param argc total number of arguments in the ellipsis 385 @param microtask pointer to callback routine consisting of outlined teams 386 construct 387 @param ... pointers to shared variables that aren't global 388 389 Do the actual fork and call the microtask in the relevant number of threads. 390 */ 391 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 392 ...) { 393 int gtid = __kmp_entry_gtid(); 394 kmp_info_t *this_thr = __kmp_threads[gtid]; 395 va_list ap; 396 va_start(ap, microtask); 397 398 #if KMP_STATS_ENABLED 399 KMP_COUNT_BLOCK(OMP_TEAMS); 400 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 401 if (previous_state == stats_state_e::SERIAL_REGION) { 402 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 403 } else { 404 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 405 } 406 #endif 407 408 // remember teams entry point and nesting level 409 this_thr->th.th_teams_microtask = microtask; 410 this_thr->th.th_teams_level = 411 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 412 413 #if OMPT_SUPPORT 414 kmp_team_t *parent_team = this_thr->th.th_team; 415 int tid = __kmp_tid_from_gtid(gtid); 416 if (ompt_enabled.enabled) { 417 parent_team->t.t_implicit_task_taskdata[tid] 418 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 419 } 420 OMPT_STORE_RETURN_ADDRESS(gtid); 421 #endif 422 423 // check if __kmpc_push_num_teams called, set default number of teams 424 // otherwise 425 if (this_thr->th.th_teams_size.nteams == 0) { 426 __kmp_push_num_teams(loc, gtid, 0, 0); 427 } 428 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 431 432 __kmp_fork_call( 433 loc, gtid, fork_context_intel, argc, 434 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 435 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 436 __kmp_join_call(loc, gtid 437 #if OMPT_SUPPORT 438 , 439 fork_context_intel 440 #endif 441 ); 442 443 // Pop current CG root off list 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 446 this_thr->th.th_cg_roots = tmp->up; 447 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 448 " to node %p. cg_nthreads was %d\n", 449 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 450 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 451 int i = tmp->cg_nthreads--; 452 if (i == 1) { // check is we are the last thread in CG (not always the case) 453 __kmp_free(tmp); 454 } 455 // Restore current task's thread_limit from CG root 456 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 457 this_thr->th.th_current_task->td_icvs.thread_limit = 458 this_thr->th.th_cg_roots->cg_thread_limit; 459 460 this_thr->th.th_teams_microtask = NULL; 461 this_thr->th.th_teams_level = 0; 462 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 463 va_end(ap); 464 #if KMP_STATS_ENABLED 465 if (previous_state == stats_state_e::SERIAL_REGION) { 466 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 467 KMP_SET_THREAD_STATE(previous_state); 468 } else { 469 KMP_POP_PARTITIONED_TIMER(); 470 } 471 #endif // KMP_STATS_ENABLED 472 } 473 474 // I don't think this function should ever have been exported. 475 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 476 // openmp code ever called it, but it's been exported from the RTL for so 477 // long that I'm afraid to remove the definition. 478 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 479 480 /*! 481 @ingroup PARALLEL 482 @param loc source location information 483 @param global_tid global thread number 484 485 Enter a serialized parallel construct. This interface is used to handle a 486 conditional parallel region, like this, 487 @code 488 #pragma omp parallel if (condition) 489 @endcode 490 when the condition is false. 491 */ 492 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 493 // The implementation is now in kmp_runtime.cpp so that it can share static 494 // functions with kmp_fork_call since the tasks to be done are similar in 495 // each case. 496 __kmp_assert_valid_gtid(global_tid); 497 #if OMPT_SUPPORT 498 OMPT_STORE_RETURN_ADDRESS(global_tid); 499 #endif 500 __kmp_serialized_parallel(loc, global_tid); 501 } 502 503 /*! 504 @ingroup PARALLEL 505 @param loc source location information 506 @param global_tid global thread number 507 508 Leave a serialized parallel construct. 509 */ 510 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 511 kmp_internal_control_t *top; 512 kmp_info_t *this_thr; 513 kmp_team_t *serial_team; 514 515 KC_TRACE(10, 516 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 517 518 /* skip all this code for autopar serialized loops since it results in 519 unacceptable overhead */ 520 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 521 return; 522 523 // Not autopar code 524 __kmp_assert_valid_gtid(global_tid); 525 if (!TCR_4(__kmp_init_parallel)) 526 __kmp_parallel_initialize(); 527 528 __kmp_resume_if_soft_paused(); 529 530 this_thr = __kmp_threads[global_tid]; 531 serial_team = this_thr->th.th_serial_team; 532 533 kmp_task_team_t *task_team = this_thr->th.th_task_team; 534 // we need to wait for the proxy tasks before finishing the thread 535 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 // if( serial_team -> t.t_serialized > 1 ) 581 serial_team->t.t_level--; 582 583 /* pop dispatch buffers stack */ 584 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 585 { 586 dispatch_private_info_t *disp_buffer = 587 serial_team->t.t_dispatch->th_disp_buffer; 588 serial_team->t.t_dispatch->th_disp_buffer = 589 serial_team->t.t_dispatch->th_disp_buffer->next; 590 __kmp_free(disp_buffer); 591 } 592 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 593 594 --serial_team->t.t_serialized; 595 if (serial_team->t.t_serialized == 0) { 596 597 /* return to the parallel section */ 598 599 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 600 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 601 __kmp_clear_x87_fpu_status_word(); 602 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 603 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 604 } 605 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 606 607 this_thr->th.th_team = serial_team->t.t_parent; 608 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 609 610 /* restore values cached in the thread */ 611 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 612 this_thr->th.th_team_master = 613 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 614 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 615 616 /* TODO the below shouldn't need to be adjusted for serialized teams */ 617 this_thr->th.th_dispatch = 618 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 619 620 __kmp_pop_current_task_from_thread(this_thr); 621 622 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 623 this_thr->th.th_current_task->td_flags.executing = 1; 624 625 if (__kmp_tasking_mode != tskm_immediate_exec) { 626 // Copy the task team from the new child / old parent team to the thread. 627 this_thr->th.th_task_team = 628 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 629 KA_TRACE(20, 630 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 631 "team %p\n", 632 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 633 } 634 } else { 635 if (__kmp_tasking_mode != tskm_immediate_exec) { 636 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 637 "depth of serial team %p to %d\n", 638 global_tid, serial_team, serial_team->t.t_serialized)); 639 } 640 } 641 642 if (__kmp_env_consistency_check) 643 __kmp_pop_parallel(global_tid, NULL); 644 #if OMPT_SUPPORT 645 if (ompt_enabled.enabled) 646 this_thr->th.ompt_thread_info.state = 647 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 648 : ompt_state_work_parallel); 649 #endif 650 } 651 652 /*! 653 @ingroup SYNCHRONIZATION 654 @param loc source location information. 655 656 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 657 depending on the memory ordering convention obeyed by the compiler 658 even that may not be necessary). 659 */ 660 void __kmpc_flush(ident_t *loc) { 661 KC_TRACE(10, ("__kmpc_flush: called\n")); 662 663 /* need explicit __mf() here since use volatile instead in library */ 664 KMP_MB(); /* Flush all pending memory write invalidates. */ 665 666 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 667 #if KMP_MIC 668 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 669 // We shouldn't need it, though, since the ABI rules require that 670 // * If the compiler generates NGO stores it also generates the fence 671 // * If users hand-code NGO stores they should insert the fence 672 // therefore no incomplete unordered stores should be visible. 673 #else 674 // C74404 675 // This is to address non-temporal store instructions (sfence needed). 676 // The clflush instruction is addressed either (mfence needed). 677 // Probably the non-temporal load monvtdqa instruction should also be 678 // addressed. 679 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 680 if (!__kmp_cpuinfo.initialized) { 681 __kmp_query_cpuid(&__kmp_cpuinfo); 682 } 683 if (!__kmp_cpuinfo.sse2) { 684 // CPU cannot execute SSE2 instructions. 685 } else { 686 #if KMP_COMPILER_ICC 687 _mm_mfence(); 688 #elif KMP_COMPILER_MSVC 689 MemoryBarrier(); 690 #else 691 __sync_synchronize(); 692 #endif // KMP_COMPILER_ICC 693 } 694 #endif // KMP_MIC 695 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 696 KMP_ARCH_RISCV64) 697 // Nothing to see here move along 698 #elif KMP_ARCH_PPC64 699 // Nothing needed here (we have a real MB above). 700 #else 701 #error Unknown or unsupported architecture 702 #endif 703 704 #if OMPT_SUPPORT && OMPT_OPTIONAL 705 if (ompt_enabled.ompt_callback_flush) { 706 ompt_callbacks.ompt_callback(ompt_callback_flush)( 707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 708 } 709 #endif 710 } 711 712 /* -------------------------------------------------------------------------- */ 713 /*! 714 @ingroup SYNCHRONIZATION 715 @param loc source location information 716 @param global_tid thread id. 717 718 Execute a barrier. 719 */ 720 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 721 KMP_COUNT_BLOCK(OMP_BARRIER); 722 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 723 __kmp_assert_valid_gtid(global_tid); 724 725 if (!TCR_4(__kmp_init_parallel)) 726 __kmp_parallel_initialize(); 727 728 __kmp_resume_if_soft_paused(); 729 730 if (__kmp_env_consistency_check) { 731 if (loc == 0) { 732 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 733 } 734 __kmp_check_barrier(global_tid, ct_barrier, loc); 735 } 736 737 #if OMPT_SUPPORT 738 ompt_frame_t *ompt_frame; 739 if (ompt_enabled.enabled) { 740 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 741 if (ompt_frame->enter_frame.ptr == NULL) 742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 743 } 744 OMPT_STORE_RETURN_ADDRESS(global_tid); 745 #endif 746 __kmp_threads[global_tid]->th.th_ident = loc; 747 // TODO: explicit barrier_wait_id: 748 // this function is called when 'barrier' directive is present or 749 // implicit barrier at the end of a worksharing construct. 750 // 1) better to add a per-thread barrier counter to a thread data structure 751 // 2) set to 0 when a new team is created 752 // 4) no sync is required 753 754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 755 #if OMPT_SUPPORT && OMPT_OPTIONAL 756 if (ompt_enabled.enabled) { 757 ompt_frame->enter_frame = ompt_data_none; 758 } 759 #endif 760 } 761 762 /* The BARRIER for a MASTER section is always explicit */ 763 /*! 764 @ingroup WORK_SHARING 765 @param loc source location information. 766 @param global_tid global thread number . 767 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 768 */ 769 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 770 int status = 0; 771 772 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 773 __kmp_assert_valid_gtid(global_tid); 774 775 if (!TCR_4(__kmp_init_parallel)) 776 __kmp_parallel_initialize(); 777 778 __kmp_resume_if_soft_paused(); 779 780 if (KMP_MASTER_GTID(global_tid)) { 781 KMP_COUNT_BLOCK(OMP_MASTER); 782 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 783 status = 1; 784 } 785 786 #if OMPT_SUPPORT && OMPT_OPTIONAL 787 if (status) { 788 if (ompt_enabled.ompt_callback_masked) { 789 kmp_info_t *this_thr = __kmp_threads[global_tid]; 790 kmp_team_t *team = this_thr->th.th_team; 791 792 int tid = __kmp_tid_from_gtid(global_tid); 793 ompt_callbacks.ompt_callback(ompt_callback_masked)( 794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 796 OMPT_GET_RETURN_ADDRESS(0)); 797 } 798 } 799 #endif 800 801 if (__kmp_env_consistency_check) { 802 #if KMP_USE_DYNAMIC_LOCK 803 if (status) 804 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 805 else 806 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 807 #else 808 if (status) 809 __kmp_push_sync(global_tid, ct_master, loc, NULL); 810 else 811 __kmp_check_sync(global_tid, ct_master, loc, NULL); 812 #endif 813 } 814 815 return status; 816 } 817 818 /*! 819 @ingroup WORK_SHARING 820 @param loc source location information. 821 @param global_tid global thread number . 822 823 Mark the end of a <tt>master</tt> region. This should only be called by the 824 thread that executes the <tt>master</tt> region. 825 */ 826 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 827 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 828 __kmp_assert_valid_gtid(global_tid); 829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 830 KMP_POP_PARTITIONED_TIMER(); 831 832 #if OMPT_SUPPORT && OMPT_OPTIONAL 833 kmp_info_t *this_thr = __kmp_threads[global_tid]; 834 kmp_team_t *team = this_thr->th.th_team; 835 if (ompt_enabled.ompt_callback_masked) { 836 int tid = __kmp_tid_from_gtid(global_tid); 837 ompt_callbacks.ompt_callback(ompt_callback_masked)( 838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 840 OMPT_GET_RETURN_ADDRESS(0)); 841 } 842 #endif 843 844 if (__kmp_env_consistency_check) { 845 if (KMP_MASTER_GTID(global_tid)) 846 __kmp_pop_sync(global_tid, ct_master, loc); 847 } 848 } 849 850 /*! 851 @ingroup WORK_SHARING 852 @param loc source location information. 853 @param global_tid global thread number. 854 @param filter result of evaluating filter clause on thread global_tid, or zero 855 if no filter clause present 856 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 857 */ 858 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 859 int status = 0; 860 int tid; 861 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 862 __kmp_assert_valid_gtid(global_tid); 863 864 if (!TCR_4(__kmp_init_parallel)) 865 __kmp_parallel_initialize(); 866 867 __kmp_resume_if_soft_paused(); 868 869 tid = __kmp_tid_from_gtid(global_tid); 870 if (tid == filter) { 871 KMP_COUNT_BLOCK(OMP_MASKED); 872 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 873 status = 1; 874 } 875 876 #if OMPT_SUPPORT && OMPT_OPTIONAL 877 if (status) { 878 if (ompt_enabled.ompt_callback_masked) { 879 kmp_info_t *this_thr = __kmp_threads[global_tid]; 880 kmp_team_t *team = this_thr->th.th_team; 881 ompt_callbacks.ompt_callback(ompt_callback_masked)( 882 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 883 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 884 OMPT_GET_RETURN_ADDRESS(0)); 885 } 886 } 887 #endif 888 889 if (__kmp_env_consistency_check) { 890 #if KMP_USE_DYNAMIC_LOCK 891 if (status) 892 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 893 else 894 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 895 #else 896 if (status) 897 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 898 else 899 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 900 #endif 901 } 902 903 return status; 904 } 905 906 /*! 907 @ingroup WORK_SHARING 908 @param loc source location information. 909 @param global_tid global thread number . 910 911 Mark the end of a <tt>masked</tt> region. This should only be called by the 912 thread that executes the <tt>masked</tt> region. 913 */ 914 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 915 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 916 __kmp_assert_valid_gtid(global_tid); 917 KMP_POP_PARTITIONED_TIMER(); 918 919 #if OMPT_SUPPORT && OMPT_OPTIONAL 920 kmp_info_t *this_thr = __kmp_threads[global_tid]; 921 kmp_team_t *team = this_thr->th.th_team; 922 if (ompt_enabled.ompt_callback_masked) { 923 int tid = __kmp_tid_from_gtid(global_tid); 924 ompt_callbacks.ompt_callback(ompt_callback_masked)( 925 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 926 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 927 OMPT_GET_RETURN_ADDRESS(0)); 928 } 929 #endif 930 931 if (__kmp_env_consistency_check) { 932 __kmp_pop_sync(global_tid, ct_masked, loc); 933 } 934 } 935 936 /*! 937 @ingroup WORK_SHARING 938 @param loc source location information. 939 @param gtid global thread number. 940 941 Start execution of an <tt>ordered</tt> construct. 942 */ 943 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 944 int cid = 0; 945 kmp_info_t *th; 946 KMP_DEBUG_ASSERT(__kmp_init_serial); 947 948 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 949 __kmp_assert_valid_gtid(gtid); 950 951 if (!TCR_4(__kmp_init_parallel)) 952 __kmp_parallel_initialize(); 953 954 __kmp_resume_if_soft_paused(); 955 956 #if USE_ITT_BUILD 957 __kmp_itt_ordered_prep(gtid); 958 // TODO: ordered_wait_id 959 #endif /* USE_ITT_BUILD */ 960 961 th = __kmp_threads[gtid]; 962 963 #if OMPT_SUPPORT && OMPT_OPTIONAL 964 kmp_team_t *team; 965 ompt_wait_id_t lck; 966 void *codeptr_ra; 967 OMPT_STORE_RETURN_ADDRESS(gtid); 968 if (ompt_enabled.enabled) { 969 team = __kmp_team_from_gtid(gtid); 970 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 971 /* OMPT state update */ 972 th->th.ompt_thread_info.wait_id = lck; 973 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 974 975 /* OMPT event callback */ 976 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 977 if (ompt_enabled.ompt_callback_mutex_acquire) { 978 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 979 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 980 codeptr_ra); 981 } 982 } 983 #endif 984 985 if (th->th.th_dispatch->th_deo_fcn != 0) 986 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 987 else 988 __kmp_parallel_deo(>id, &cid, loc); 989 990 #if OMPT_SUPPORT && OMPT_OPTIONAL 991 if (ompt_enabled.enabled) { 992 /* OMPT state update */ 993 th->th.ompt_thread_info.state = ompt_state_work_parallel; 994 th->th.ompt_thread_info.wait_id = 0; 995 996 /* OMPT event callback */ 997 if (ompt_enabled.ompt_callback_mutex_acquired) { 998 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 999 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1000 } 1001 } 1002 #endif 1003 1004 #if USE_ITT_BUILD 1005 __kmp_itt_ordered_start(gtid); 1006 #endif /* USE_ITT_BUILD */ 1007 } 1008 1009 /*! 1010 @ingroup WORK_SHARING 1011 @param loc source location information. 1012 @param gtid global thread number. 1013 1014 End execution of an <tt>ordered</tt> construct. 1015 */ 1016 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1017 int cid = 0; 1018 kmp_info_t *th; 1019 1020 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1021 __kmp_assert_valid_gtid(gtid); 1022 1023 #if USE_ITT_BUILD 1024 __kmp_itt_ordered_end(gtid); 1025 // TODO: ordered_wait_id 1026 #endif /* USE_ITT_BUILD */ 1027 1028 th = __kmp_threads[gtid]; 1029 1030 if (th->th.th_dispatch->th_dxo_fcn != 0) 1031 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1032 else 1033 __kmp_parallel_dxo(>id, &cid, loc); 1034 1035 #if OMPT_SUPPORT && OMPT_OPTIONAL 1036 OMPT_STORE_RETURN_ADDRESS(gtid); 1037 if (ompt_enabled.ompt_callback_mutex_released) { 1038 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1039 ompt_mutex_ordered, 1040 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1041 ->t.t_ordered.dt.t_value, 1042 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1043 } 1044 #endif 1045 } 1046 1047 #if KMP_USE_DYNAMIC_LOCK 1048 1049 static __forceinline void 1050 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1051 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1052 // Pointer to the allocated indirect lock is written to crit, while indexing 1053 // is ignored. 1054 void *idx; 1055 kmp_indirect_lock_t **lck; 1056 lck = (kmp_indirect_lock_t **)crit; 1057 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1058 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1059 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1060 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1061 KA_TRACE(20, 1062 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1063 #if USE_ITT_BUILD 1064 __kmp_itt_critical_creating(ilk->lock, loc); 1065 #endif 1066 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1067 if (status == 0) { 1068 #if USE_ITT_BUILD 1069 __kmp_itt_critical_destroyed(ilk->lock); 1070 #endif 1071 // We don't really need to destroy the unclaimed lock here since it will be 1072 // cleaned up at program exit. 1073 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1074 } 1075 KMP_DEBUG_ASSERT(*lck != NULL); 1076 } 1077 1078 // Fast-path acquire tas lock 1079 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1080 { \ 1081 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1082 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1083 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1084 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1085 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1086 kmp_uint32 spins; \ 1087 KMP_FSYNC_PREPARE(l); \ 1088 KMP_INIT_YIELD(spins); \ 1089 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1090 do { \ 1091 if (TCR_4(__kmp_nth) > \ 1092 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1093 KMP_YIELD(TRUE); \ 1094 } else { \ 1095 KMP_YIELD_SPIN(spins); \ 1096 } \ 1097 __kmp_spin_backoff(&backoff); \ 1098 } while ( \ 1099 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1100 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1101 } \ 1102 KMP_FSYNC_ACQUIRED(l); \ 1103 } 1104 1105 // Fast-path test tas lock 1106 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1107 { \ 1108 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1109 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1110 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1111 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1112 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1113 } 1114 1115 // Fast-path release tas lock 1116 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1117 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1118 1119 #if KMP_USE_FUTEX 1120 1121 #include <sys/syscall.h> 1122 #include <unistd.h> 1123 #ifndef FUTEX_WAIT 1124 #define FUTEX_WAIT 0 1125 #endif 1126 #ifndef FUTEX_WAKE 1127 #define FUTEX_WAKE 1 1128 #endif 1129 1130 // Fast-path acquire futex lock 1131 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1132 { \ 1133 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1134 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1135 KMP_MB(); \ 1136 KMP_FSYNC_PREPARE(ftx); \ 1137 kmp_int32 poll_val; \ 1138 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1139 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1140 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1141 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1142 if (!cond) { \ 1143 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1144 poll_val | \ 1145 KMP_LOCK_BUSY(1, futex))) { \ 1146 continue; \ 1147 } \ 1148 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1149 } \ 1150 kmp_int32 rc; \ 1151 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1152 NULL, NULL, 0)) != 0) { \ 1153 continue; \ 1154 } \ 1155 gtid_code |= 1; \ 1156 } \ 1157 KMP_FSYNC_ACQUIRED(ftx); \ 1158 } 1159 1160 // Fast-path test futex lock 1161 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1162 { \ 1163 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1164 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1165 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1166 KMP_FSYNC_ACQUIRED(ftx); \ 1167 rc = TRUE; \ 1168 } else { \ 1169 rc = FALSE; \ 1170 } \ 1171 } 1172 1173 // Fast-path release futex lock 1174 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1175 { \ 1176 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1177 KMP_MB(); \ 1178 KMP_FSYNC_RELEASING(ftx); \ 1179 kmp_int32 poll_val = \ 1180 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1181 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1182 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1183 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1184 } \ 1185 KMP_MB(); \ 1186 KMP_YIELD_OVERSUB(); \ 1187 } 1188 1189 #endif // KMP_USE_FUTEX 1190 1191 #else // KMP_USE_DYNAMIC_LOCK 1192 1193 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1194 ident_t const *loc, 1195 kmp_int32 gtid) { 1196 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1197 1198 // Because of the double-check, the following load doesn't need to be volatile 1199 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1200 1201 if (lck == NULL) { 1202 void *idx; 1203 1204 // Allocate & initialize the lock. 1205 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1206 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1207 __kmp_init_user_lock_with_checks(lck); 1208 __kmp_set_user_lock_location(lck, loc); 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_creating(lck); 1211 // __kmp_itt_critical_creating() should be called *before* the first usage 1212 // of underlying lock. It is the only place where we can guarantee it. There 1213 // are chances the lock will destroyed with no usage, but it is not a 1214 // problem, because this is not real event seen by user but rather setting 1215 // name for object (lock). See more details in kmp_itt.h. 1216 #endif /* USE_ITT_BUILD */ 1217 1218 // Use a cmpxchg instruction to slam the start of the critical section with 1219 // the lock pointer. If another thread beat us to it, deallocate the lock, 1220 // and use the lock that the other thread allocated. 1221 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1222 1223 if (status == 0) { 1224 // Deallocate the lock and reload the value. 1225 #if USE_ITT_BUILD 1226 __kmp_itt_critical_destroyed(lck); 1227 // Let ITT know the lock is destroyed and the same memory location may be reused 1228 // for another purpose. 1229 #endif /* USE_ITT_BUILD */ 1230 __kmp_destroy_user_lock_with_checks(lck); 1231 __kmp_user_lock_free(&idx, gtid, lck); 1232 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1233 KMP_DEBUG_ASSERT(lck != NULL); 1234 } 1235 } 1236 return lck; 1237 } 1238 1239 #endif // KMP_USE_DYNAMIC_LOCK 1240 1241 /*! 1242 @ingroup WORK_SHARING 1243 @param loc source location information. 1244 @param global_tid global thread number. 1245 @param crit identity of the critical section. This could be a pointer to a lock 1246 associated with the critical section, or some other suitably unique value. 1247 1248 Enter code protected by a `critical` construct. 1249 This function blocks until the executing thread can enter the critical section. 1250 */ 1251 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1252 kmp_critical_name *crit) { 1253 #if KMP_USE_DYNAMIC_LOCK 1254 #if OMPT_SUPPORT && OMPT_OPTIONAL 1255 OMPT_STORE_RETURN_ADDRESS(global_tid); 1256 #endif // OMPT_SUPPORT 1257 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1258 #else 1259 KMP_COUNT_BLOCK(OMP_CRITICAL); 1260 #if OMPT_SUPPORT && OMPT_OPTIONAL 1261 ompt_state_t prev_state = ompt_state_undefined; 1262 ompt_thread_info_t ti; 1263 #endif 1264 kmp_user_lock_p lck; 1265 1266 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1267 __kmp_assert_valid_gtid(global_tid); 1268 1269 // TODO: add THR_OVHD_STATE 1270 1271 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1272 KMP_CHECK_USER_LOCK_INIT(); 1273 1274 if ((__kmp_user_lock_kind == lk_tas) && 1275 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1276 lck = (kmp_user_lock_p)crit; 1277 } 1278 #if KMP_USE_FUTEX 1279 else if ((__kmp_user_lock_kind == lk_futex) && 1280 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1281 lck = (kmp_user_lock_p)crit; 1282 } 1283 #endif 1284 else { // ticket, queuing or drdpa 1285 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1286 } 1287 1288 if (__kmp_env_consistency_check) 1289 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1290 1291 // since the critical directive binds to all threads, not just the current 1292 // team we have to check this even if we are in a serialized team. 1293 // also, even if we are the uber thread, we still have to conduct the lock, 1294 // as we have to contend with sibling threads. 1295 1296 #if USE_ITT_BUILD 1297 __kmp_itt_critical_acquiring(lck); 1298 #endif /* USE_ITT_BUILD */ 1299 #if OMPT_SUPPORT && OMPT_OPTIONAL 1300 OMPT_STORE_RETURN_ADDRESS(gtid); 1301 void *codeptr_ra = NULL; 1302 if (ompt_enabled.enabled) { 1303 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1304 /* OMPT state update */ 1305 prev_state = ti.state; 1306 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1307 ti.state = ompt_state_wait_critical; 1308 1309 /* OMPT event callback */ 1310 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1311 if (ompt_enabled.ompt_callback_mutex_acquire) { 1312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1313 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1314 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1315 } 1316 } 1317 #endif 1318 // Value of 'crit' should be good for using as a critical_id of the critical 1319 // section directive. 1320 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1321 1322 #if USE_ITT_BUILD 1323 __kmp_itt_critical_acquired(lck); 1324 #endif /* USE_ITT_BUILD */ 1325 #if OMPT_SUPPORT && OMPT_OPTIONAL 1326 if (ompt_enabled.enabled) { 1327 /* OMPT state update */ 1328 ti.state = prev_state; 1329 ti.wait_id = 0; 1330 1331 /* OMPT event callback */ 1332 if (ompt_enabled.ompt_callback_mutex_acquired) { 1333 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1334 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1335 } 1336 } 1337 #endif 1338 KMP_POP_PARTITIONED_TIMER(); 1339 1340 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1341 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1342 #endif // KMP_USE_DYNAMIC_LOCK 1343 } 1344 1345 #if KMP_USE_DYNAMIC_LOCK 1346 1347 // Converts the given hint to an internal lock implementation 1348 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1349 #if KMP_USE_TSX 1350 #define KMP_TSX_LOCK(seq) lockseq_##seq 1351 #else 1352 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1353 #endif 1354 1355 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1356 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1357 #else 1358 #define KMP_CPUINFO_RTM 0 1359 #endif 1360 1361 // Hints that do not require further logic 1362 if (hint & kmp_lock_hint_hle) 1363 return KMP_TSX_LOCK(hle); 1364 if (hint & kmp_lock_hint_rtm) 1365 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1366 if (hint & kmp_lock_hint_adaptive) 1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1368 1369 // Rule out conflicting hints first by returning the default lock 1370 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1371 return __kmp_user_lock_seq; 1372 if ((hint & omp_lock_hint_speculative) && 1373 (hint & omp_lock_hint_nonspeculative)) 1374 return __kmp_user_lock_seq; 1375 1376 // Do not even consider speculation when it appears to be contended 1377 if (hint & omp_lock_hint_contended) 1378 return lockseq_queuing; 1379 1380 // Uncontended lock without speculation 1381 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1382 return lockseq_tas; 1383 1384 // Use RTM lock for speculation 1385 if (hint & omp_lock_hint_speculative) 1386 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1387 1388 return __kmp_user_lock_seq; 1389 } 1390 1391 #if OMPT_SUPPORT && OMPT_OPTIONAL 1392 #if KMP_USE_DYNAMIC_LOCK 1393 static kmp_mutex_impl_t 1394 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1395 if (user_lock) { 1396 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1397 case 0: 1398 break; 1399 #if KMP_USE_FUTEX 1400 case locktag_futex: 1401 return kmp_mutex_impl_queuing; 1402 #endif 1403 case locktag_tas: 1404 return kmp_mutex_impl_spin; 1405 #if KMP_USE_TSX 1406 case locktag_hle: 1407 case locktag_rtm_spin: 1408 return kmp_mutex_impl_speculative; 1409 #endif 1410 default: 1411 return kmp_mutex_impl_none; 1412 } 1413 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1414 } 1415 KMP_ASSERT(ilock); 1416 switch (ilock->type) { 1417 #if KMP_USE_TSX 1418 case locktag_adaptive: 1419 case locktag_rtm_queuing: 1420 return kmp_mutex_impl_speculative; 1421 #endif 1422 case locktag_nested_tas: 1423 return kmp_mutex_impl_spin; 1424 #if KMP_USE_FUTEX 1425 case locktag_nested_futex: 1426 #endif 1427 case locktag_ticket: 1428 case locktag_queuing: 1429 case locktag_drdpa: 1430 case locktag_nested_ticket: 1431 case locktag_nested_queuing: 1432 case locktag_nested_drdpa: 1433 return kmp_mutex_impl_queuing; 1434 default: 1435 return kmp_mutex_impl_none; 1436 } 1437 } 1438 #else 1439 // For locks without dynamic binding 1440 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1441 switch (__kmp_user_lock_kind) { 1442 case lk_tas: 1443 return kmp_mutex_impl_spin; 1444 #if KMP_USE_FUTEX 1445 case lk_futex: 1446 #endif 1447 case lk_ticket: 1448 case lk_queuing: 1449 case lk_drdpa: 1450 return kmp_mutex_impl_queuing; 1451 #if KMP_USE_TSX 1452 case lk_hle: 1453 case lk_rtm_queuing: 1454 case lk_rtm_spin: 1455 case lk_adaptive: 1456 return kmp_mutex_impl_speculative; 1457 #endif 1458 default: 1459 return kmp_mutex_impl_none; 1460 } 1461 } 1462 #endif // KMP_USE_DYNAMIC_LOCK 1463 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1464 1465 /*! 1466 @ingroup WORK_SHARING 1467 @param loc source location information. 1468 @param global_tid global thread number. 1469 @param crit identity of the critical section. This could be a pointer to a lock 1470 associated with the critical section, or some other suitably unique value. 1471 @param hint the lock hint. 1472 1473 Enter code protected by a `critical` construct with a hint. The hint value is 1474 used to suggest a lock implementation. This function blocks until the executing 1475 thread can enter the critical section unless the hint suggests use of 1476 speculative execution and the hardware supports it. 1477 */ 1478 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1479 kmp_critical_name *crit, uint32_t hint) { 1480 KMP_COUNT_BLOCK(OMP_CRITICAL); 1481 kmp_user_lock_p lck; 1482 #if OMPT_SUPPORT && OMPT_OPTIONAL 1483 ompt_state_t prev_state = ompt_state_undefined; 1484 ompt_thread_info_t ti; 1485 // This is the case, if called from __kmpc_critical: 1486 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1487 if (!codeptr) 1488 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1489 #endif 1490 1491 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1492 __kmp_assert_valid_gtid(global_tid); 1493 1494 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1495 // Check if it is initialized. 1496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1497 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1498 if (*lk == 0) { 1499 if (KMP_IS_D_LOCK(lockseq)) { 1500 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1501 KMP_GET_D_TAG(lockseq)); 1502 } else { 1503 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1504 } 1505 } 1506 // Branch for accessing the actual lock object and set operation. This 1507 // branching is inevitable since this lock initialization does not follow the 1508 // normal dispatch path (lock table is not used). 1509 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1510 lck = (kmp_user_lock_p)lk; 1511 if (__kmp_env_consistency_check) { 1512 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1513 __kmp_map_hint_to_lock(hint)); 1514 } 1515 #if USE_ITT_BUILD 1516 __kmp_itt_critical_acquiring(lck); 1517 #endif 1518 #if OMPT_SUPPORT && OMPT_OPTIONAL 1519 if (ompt_enabled.enabled) { 1520 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1521 /* OMPT state update */ 1522 prev_state = ti.state; 1523 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1524 ti.state = ompt_state_wait_critical; 1525 1526 /* OMPT event callback */ 1527 if (ompt_enabled.ompt_callback_mutex_acquire) { 1528 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1529 ompt_mutex_critical, (unsigned int)hint, 1530 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1531 codeptr); 1532 } 1533 } 1534 #endif 1535 #if KMP_USE_INLINED_TAS 1536 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1537 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1538 } else 1539 #elif KMP_USE_INLINED_FUTEX 1540 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1541 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1542 } else 1543 #endif 1544 { 1545 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1546 } 1547 } else { 1548 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1549 lck = ilk->lock; 1550 if (__kmp_env_consistency_check) { 1551 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1552 __kmp_map_hint_to_lock(hint)); 1553 } 1554 #if USE_ITT_BUILD 1555 __kmp_itt_critical_acquiring(lck); 1556 #endif 1557 #if OMPT_SUPPORT && OMPT_OPTIONAL 1558 if (ompt_enabled.enabled) { 1559 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1560 /* OMPT state update */ 1561 prev_state = ti.state; 1562 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1563 ti.state = ompt_state_wait_critical; 1564 1565 /* OMPT event callback */ 1566 if (ompt_enabled.ompt_callback_mutex_acquire) { 1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1568 ompt_mutex_critical, (unsigned int)hint, 1569 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1570 codeptr); 1571 } 1572 } 1573 #endif 1574 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1575 } 1576 KMP_POP_PARTITIONED_TIMER(); 1577 1578 #if USE_ITT_BUILD 1579 __kmp_itt_critical_acquired(lck); 1580 #endif /* USE_ITT_BUILD */ 1581 #if OMPT_SUPPORT && OMPT_OPTIONAL 1582 if (ompt_enabled.enabled) { 1583 /* OMPT state update */ 1584 ti.state = prev_state; 1585 ti.wait_id = 0; 1586 1587 /* OMPT event callback */ 1588 if (ompt_enabled.ompt_callback_mutex_acquired) { 1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1591 } 1592 } 1593 #endif 1594 1595 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1596 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1597 } // __kmpc_critical_with_hint 1598 1599 #endif // KMP_USE_DYNAMIC_LOCK 1600 1601 /*! 1602 @ingroup WORK_SHARING 1603 @param loc source location information. 1604 @param global_tid global thread number . 1605 @param crit identity of the critical section. This could be a pointer to a lock 1606 associated with the critical section, or some other suitably unique value. 1607 1608 Leave a critical section, releasing any lock that was held during its execution. 1609 */ 1610 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1611 kmp_critical_name *crit) { 1612 kmp_user_lock_p lck; 1613 1614 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1615 1616 #if KMP_USE_DYNAMIC_LOCK 1617 int locktag = KMP_EXTRACT_D_TAG(crit); 1618 if (locktag) { 1619 lck = (kmp_user_lock_p)crit; 1620 KMP_ASSERT(lck != NULL); 1621 if (__kmp_env_consistency_check) { 1622 __kmp_pop_sync(global_tid, ct_critical, loc); 1623 } 1624 #if USE_ITT_BUILD 1625 __kmp_itt_critical_releasing(lck); 1626 #endif 1627 #if KMP_USE_INLINED_TAS 1628 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1629 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1630 } else 1631 #elif KMP_USE_INLINED_FUTEX 1632 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1633 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1634 } else 1635 #endif 1636 { 1637 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1638 } 1639 } else { 1640 kmp_indirect_lock_t *ilk = 1641 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1642 KMP_ASSERT(ilk != NULL); 1643 lck = ilk->lock; 1644 if (__kmp_env_consistency_check) { 1645 __kmp_pop_sync(global_tid, ct_critical, loc); 1646 } 1647 #if USE_ITT_BUILD 1648 __kmp_itt_critical_releasing(lck); 1649 #endif 1650 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1651 } 1652 1653 #else // KMP_USE_DYNAMIC_LOCK 1654 1655 if ((__kmp_user_lock_kind == lk_tas) && 1656 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1657 lck = (kmp_user_lock_p)crit; 1658 } 1659 #if KMP_USE_FUTEX 1660 else if ((__kmp_user_lock_kind == lk_futex) && 1661 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1662 lck = (kmp_user_lock_p)crit; 1663 } 1664 #endif 1665 else { // ticket, queuing or drdpa 1666 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1667 } 1668 1669 KMP_ASSERT(lck != NULL); 1670 1671 if (__kmp_env_consistency_check) 1672 __kmp_pop_sync(global_tid, ct_critical, loc); 1673 1674 #if USE_ITT_BUILD 1675 __kmp_itt_critical_releasing(lck); 1676 #endif /* USE_ITT_BUILD */ 1677 // Value of 'crit' should be good for using as a critical_id of the critical 1678 // section directive. 1679 __kmp_release_user_lock_with_checks(lck, global_tid); 1680 1681 #endif // KMP_USE_DYNAMIC_LOCK 1682 1683 #if OMPT_SUPPORT && OMPT_OPTIONAL 1684 /* OMPT release event triggers after lock is released; place here to trigger 1685 * for all #if branches */ 1686 OMPT_STORE_RETURN_ADDRESS(global_tid); 1687 if (ompt_enabled.ompt_callback_mutex_released) { 1688 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1689 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1690 OMPT_LOAD_RETURN_ADDRESS(0)); 1691 } 1692 #endif 1693 1694 KMP_POP_PARTITIONED_TIMER(); 1695 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1696 } 1697 1698 /*! 1699 @ingroup SYNCHRONIZATION 1700 @param loc source location information 1701 @param global_tid thread id. 1702 @return one if the thread should execute the master block, zero otherwise 1703 1704 Start execution of a combined barrier and master. The barrier is executed inside 1705 this function. 1706 */ 1707 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1708 int status; 1709 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1710 __kmp_assert_valid_gtid(global_tid); 1711 1712 if (!TCR_4(__kmp_init_parallel)) 1713 __kmp_parallel_initialize(); 1714 1715 __kmp_resume_if_soft_paused(); 1716 1717 if (__kmp_env_consistency_check) 1718 __kmp_check_barrier(global_tid, ct_barrier, loc); 1719 1720 #if OMPT_SUPPORT 1721 ompt_frame_t *ompt_frame; 1722 if (ompt_enabled.enabled) { 1723 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1724 if (ompt_frame->enter_frame.ptr == NULL) 1725 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1726 } 1727 OMPT_STORE_RETURN_ADDRESS(global_tid); 1728 #endif 1729 #if USE_ITT_NOTIFY 1730 __kmp_threads[global_tid]->th.th_ident = loc; 1731 #endif 1732 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1733 #if OMPT_SUPPORT && OMPT_OPTIONAL 1734 if (ompt_enabled.enabled) { 1735 ompt_frame->enter_frame = ompt_data_none; 1736 } 1737 #endif 1738 1739 return (status != 0) ? 0 : 1; 1740 } 1741 1742 /*! 1743 @ingroup SYNCHRONIZATION 1744 @param loc source location information 1745 @param global_tid thread id. 1746 1747 Complete the execution of a combined barrier and master. This function should 1748 only be called at the completion of the <tt>master</tt> code. Other threads will 1749 still be waiting at the barrier and this call releases them. 1750 */ 1751 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1752 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1753 __kmp_assert_valid_gtid(global_tid); 1754 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1755 } 1756 1757 /*! 1758 @ingroup SYNCHRONIZATION 1759 @param loc source location information 1760 @param global_tid thread id. 1761 @return one if the thread should execute the master block, zero otherwise 1762 1763 Start execution of a combined barrier and master(nowait) construct. 1764 The barrier is executed inside this function. 1765 There is no equivalent "end" function, since the 1766 */ 1767 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1768 kmp_int32 ret; 1769 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1770 __kmp_assert_valid_gtid(global_tid); 1771 1772 if (!TCR_4(__kmp_init_parallel)) 1773 __kmp_parallel_initialize(); 1774 1775 __kmp_resume_if_soft_paused(); 1776 1777 if (__kmp_env_consistency_check) { 1778 if (loc == 0) { 1779 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1780 } 1781 __kmp_check_barrier(global_tid, ct_barrier, loc); 1782 } 1783 1784 #if OMPT_SUPPORT 1785 ompt_frame_t *ompt_frame; 1786 if (ompt_enabled.enabled) { 1787 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1788 if (ompt_frame->enter_frame.ptr == NULL) 1789 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1790 } 1791 OMPT_STORE_RETURN_ADDRESS(global_tid); 1792 #endif 1793 #if USE_ITT_NOTIFY 1794 __kmp_threads[global_tid]->th.th_ident = loc; 1795 #endif 1796 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1797 #if OMPT_SUPPORT && OMPT_OPTIONAL 1798 if (ompt_enabled.enabled) { 1799 ompt_frame->enter_frame = ompt_data_none; 1800 } 1801 #endif 1802 1803 ret = __kmpc_master(loc, global_tid); 1804 1805 if (__kmp_env_consistency_check) { 1806 /* there's no __kmpc_end_master called; so the (stats) */ 1807 /* actions of __kmpc_end_master are done here */ 1808 if (ret) { 1809 /* only one thread should do the pop since only */ 1810 /* one did the push (see __kmpc_master()) */ 1811 __kmp_pop_sync(global_tid, ct_master, loc); 1812 } 1813 } 1814 1815 return (ret); 1816 } 1817 1818 /* The BARRIER for a SINGLE process section is always explicit */ 1819 /*! 1820 @ingroup WORK_SHARING 1821 @param loc source location information 1822 @param global_tid global thread number 1823 @return One if this thread should execute the single construct, zero otherwise. 1824 1825 Test whether to execute a <tt>single</tt> construct. 1826 There are no implicit barriers in the two "single" calls, rather the compiler 1827 should introduce an explicit barrier if it is required. 1828 */ 1829 1830 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1831 __kmp_assert_valid_gtid(global_tid); 1832 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1833 1834 if (rc) { 1835 // We are going to execute the single statement, so we should count it. 1836 KMP_COUNT_BLOCK(OMP_SINGLE); 1837 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1838 } 1839 1840 #if OMPT_SUPPORT && OMPT_OPTIONAL 1841 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1842 kmp_team_t *team = this_thr->th.th_team; 1843 int tid = __kmp_tid_from_gtid(global_tid); 1844 1845 if (ompt_enabled.enabled) { 1846 if (rc) { 1847 if (ompt_enabled.ompt_callback_work) { 1848 ompt_callbacks.ompt_callback(ompt_callback_work)( 1849 ompt_work_single_executor, ompt_scope_begin, 1850 &(team->t.ompt_team_info.parallel_data), 1851 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1852 1, OMPT_GET_RETURN_ADDRESS(0)); 1853 } 1854 } else { 1855 if (ompt_enabled.ompt_callback_work) { 1856 ompt_callbacks.ompt_callback(ompt_callback_work)( 1857 ompt_work_single_other, ompt_scope_begin, 1858 &(team->t.ompt_team_info.parallel_data), 1859 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1860 1, OMPT_GET_RETURN_ADDRESS(0)); 1861 ompt_callbacks.ompt_callback(ompt_callback_work)( 1862 ompt_work_single_other, ompt_scope_end, 1863 &(team->t.ompt_team_info.parallel_data), 1864 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1865 1, OMPT_GET_RETURN_ADDRESS(0)); 1866 } 1867 } 1868 } 1869 #endif 1870 1871 return rc; 1872 } 1873 1874 /*! 1875 @ingroup WORK_SHARING 1876 @param loc source location information 1877 @param global_tid global thread number 1878 1879 Mark the end of a <tt>single</tt> construct. This function should 1880 only be called by the thread that executed the block of code protected 1881 by the `single` construct. 1882 */ 1883 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1884 __kmp_assert_valid_gtid(global_tid); 1885 __kmp_exit_single(global_tid); 1886 KMP_POP_PARTITIONED_TIMER(); 1887 1888 #if OMPT_SUPPORT && OMPT_OPTIONAL 1889 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1890 kmp_team_t *team = this_thr->th.th_team; 1891 int tid = __kmp_tid_from_gtid(global_tid); 1892 1893 if (ompt_enabled.ompt_callback_work) { 1894 ompt_callbacks.ompt_callback(ompt_callback_work)( 1895 ompt_work_single_executor, ompt_scope_end, 1896 &(team->t.ompt_team_info.parallel_data), 1897 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1898 OMPT_GET_RETURN_ADDRESS(0)); 1899 } 1900 #endif 1901 } 1902 1903 /*! 1904 @ingroup WORK_SHARING 1905 @param loc Source location 1906 @param global_tid Global thread id 1907 1908 Mark the end of a statically scheduled loop. 1909 */ 1910 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1911 KMP_POP_PARTITIONED_TIMER(); 1912 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1913 1914 #if OMPT_SUPPORT && OMPT_OPTIONAL 1915 if (ompt_enabled.ompt_callback_work) { 1916 ompt_work_t ompt_work_type = ompt_work_loop; 1917 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1918 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1919 // Determine workshare type 1920 if (loc != NULL) { 1921 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1922 ompt_work_type = ompt_work_loop; 1923 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1924 ompt_work_type = ompt_work_sections; 1925 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1926 ompt_work_type = ompt_work_distribute; 1927 } else { 1928 // use default set above. 1929 // a warning about this case is provided in __kmpc_for_static_init 1930 } 1931 KMP_DEBUG_ASSERT(ompt_work_type); 1932 } 1933 ompt_callbacks.ompt_callback(ompt_callback_work)( 1934 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1935 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1936 } 1937 #endif 1938 if (__kmp_env_consistency_check) 1939 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1940 } 1941 1942 // User routines which take C-style arguments (call by value) 1943 // different from the Fortran equivalent routines 1944 1945 void ompc_set_num_threads(int arg) { 1946 // !!!!! TODO: check the per-task binding 1947 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1948 } 1949 1950 void ompc_set_dynamic(int flag) { 1951 kmp_info_t *thread; 1952 1953 /* For the thread-private implementation of the internal controls */ 1954 thread = __kmp_entry_thread(); 1955 1956 __kmp_save_internal_controls(thread); 1957 1958 set__dynamic(thread, flag ? true : false); 1959 } 1960 1961 void ompc_set_nested(int flag) { 1962 kmp_info_t *thread; 1963 1964 /* For the thread-private internal controls implementation */ 1965 thread = __kmp_entry_thread(); 1966 1967 __kmp_save_internal_controls(thread); 1968 1969 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1970 } 1971 1972 void ompc_set_max_active_levels(int max_active_levels) { 1973 /* TO DO */ 1974 /* we want per-task implementation of this internal control */ 1975 1976 /* For the per-thread internal controls implementation */ 1977 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1978 } 1979 1980 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1981 // !!!!! TODO: check the per-task binding 1982 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1983 } 1984 1985 int ompc_get_ancestor_thread_num(int level) { 1986 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1987 } 1988 1989 int ompc_get_team_size(int level) { 1990 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1991 } 1992 1993 /* OpenMP 5.0 Affinity Format API */ 1994 1995 void ompc_set_affinity_format(char const *format) { 1996 if (!__kmp_init_serial) { 1997 __kmp_serial_initialize(); 1998 } 1999 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2000 format, KMP_STRLEN(format) + 1); 2001 } 2002 2003 size_t ompc_get_affinity_format(char *buffer, size_t size) { 2004 size_t format_size; 2005 if (!__kmp_init_serial) { 2006 __kmp_serial_initialize(); 2007 } 2008 format_size = KMP_STRLEN(__kmp_affinity_format); 2009 if (buffer && size) { 2010 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2011 format_size + 1); 2012 } 2013 return format_size; 2014 } 2015 2016 void ompc_display_affinity(char const *format) { 2017 int gtid; 2018 if (!TCR_4(__kmp_init_middle)) { 2019 __kmp_middle_initialize(); 2020 } 2021 gtid = __kmp_get_gtid(); 2022 __kmp_aux_display_affinity(gtid, format); 2023 } 2024 2025 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 2026 char const *format) { 2027 int gtid; 2028 size_t num_required; 2029 kmp_str_buf_t capture_buf; 2030 if (!TCR_4(__kmp_init_middle)) { 2031 __kmp_middle_initialize(); 2032 } 2033 gtid = __kmp_get_gtid(); 2034 __kmp_str_buf_init(&capture_buf); 2035 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2036 if (buffer && buf_size) { 2037 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2038 capture_buf.used + 1); 2039 } 2040 __kmp_str_buf_free(&capture_buf); 2041 return num_required; 2042 } 2043 2044 void kmpc_set_stacksize(int arg) { 2045 // __kmp_aux_set_stacksize initializes the library if needed 2046 __kmp_aux_set_stacksize(arg); 2047 } 2048 2049 void kmpc_set_stacksize_s(size_t arg) { 2050 // __kmp_aux_set_stacksize initializes the library if needed 2051 __kmp_aux_set_stacksize(arg); 2052 } 2053 2054 void kmpc_set_blocktime(int arg) { 2055 int gtid, tid; 2056 kmp_info_t *thread; 2057 2058 gtid = __kmp_entry_gtid(); 2059 tid = __kmp_tid_from_gtid(gtid); 2060 thread = __kmp_thread_from_gtid(gtid); 2061 2062 __kmp_aux_set_blocktime(arg, thread, tid); 2063 } 2064 2065 void kmpc_set_library(int arg) { 2066 // __kmp_user_set_library initializes the library if needed 2067 __kmp_user_set_library((enum library_type)arg); 2068 } 2069 2070 void kmpc_set_defaults(char const *str) { 2071 // __kmp_aux_set_defaults initializes the library if needed 2072 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2073 } 2074 2075 void kmpc_set_disp_num_buffers(int arg) { 2076 // ignore after initialization because some teams have already 2077 // allocated dispatch buffers 2078 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2079 arg <= KMP_MAX_DISP_NUM_BUFF) { 2080 __kmp_dispatch_num_buffers = arg; 2081 } 2082 } 2083 2084 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2085 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2086 return -1; 2087 #else 2088 if (!TCR_4(__kmp_init_middle)) { 2089 __kmp_middle_initialize(); 2090 } 2091 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2092 #endif 2093 } 2094 2095 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2096 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2097 return -1; 2098 #else 2099 if (!TCR_4(__kmp_init_middle)) { 2100 __kmp_middle_initialize(); 2101 } 2102 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2103 #endif 2104 } 2105 2106 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2107 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2108 return -1; 2109 #else 2110 if (!TCR_4(__kmp_init_middle)) { 2111 __kmp_middle_initialize(); 2112 } 2113 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2114 #endif 2115 } 2116 2117 /* -------------------------------------------------------------------------- */ 2118 /*! 2119 @ingroup THREADPRIVATE 2120 @param loc source location information 2121 @param gtid global thread number 2122 @param cpy_size size of the cpy_data buffer 2123 @param cpy_data pointer to data to be copied 2124 @param cpy_func helper function to call for copying data 2125 @param didit flag variable: 1=single thread; 0=not single thread 2126 2127 __kmpc_copyprivate implements the interface for the private data broadcast 2128 needed for the copyprivate clause associated with a single region in an 2129 OpenMP<sup>*</sup> program (both C and Fortran). 2130 All threads participating in the parallel region call this routine. 2131 One of the threads (called the single thread) should have the <tt>didit</tt> 2132 variable set to 1 and all other threads should have that variable set to 0. 2133 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2134 2135 The OpenMP specification forbids the use of nowait on the single region when a 2136 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2137 barrier internally to avoid race conditions, so the code generation for the 2138 single region should avoid generating a barrier after the call to @ref 2139 __kmpc_copyprivate. 2140 2141 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2142 The <tt>loc</tt> parameter is a pointer to source location information. 2143 2144 Internal implementation: The single thread will first copy its descriptor 2145 address (cpy_data) to a team-private location, then the other threads will each 2146 call the function pointed to by the parameter cpy_func, which carries out the 2147 copy by copying the data using the cpy_data buffer. 2148 2149 The cpy_func routine used for the copy and the contents of the data area defined 2150 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2151 to be done. For instance, the cpy_data buffer can hold the actual data to be 2152 copied or it may hold a list of pointers to the data. The cpy_func routine must 2153 interpret the cpy_data buffer appropriately. 2154 2155 The interface to cpy_func is as follows: 2156 @code 2157 void cpy_func( void *destination, void *source ) 2158 @endcode 2159 where void *destination is the cpy_data pointer for the thread being copied to 2160 and void *source is the cpy_data pointer for the thread being copied from. 2161 */ 2162 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2163 void *cpy_data, void (*cpy_func)(void *, void *), 2164 kmp_int32 didit) { 2165 void **data_ptr; 2166 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2167 __kmp_assert_valid_gtid(gtid); 2168 2169 KMP_MB(); 2170 2171 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2172 2173 if (__kmp_env_consistency_check) { 2174 if (loc == 0) { 2175 KMP_WARNING(ConstructIdentInvalid); 2176 } 2177 } 2178 2179 // ToDo: Optimize the following two barriers into some kind of split barrier 2180 2181 if (didit) 2182 *data_ptr = cpy_data; 2183 2184 #if OMPT_SUPPORT 2185 ompt_frame_t *ompt_frame; 2186 if (ompt_enabled.enabled) { 2187 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2188 if (ompt_frame->enter_frame.ptr == NULL) 2189 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2190 } 2191 OMPT_STORE_RETURN_ADDRESS(gtid); 2192 #endif 2193 /* This barrier is not a barrier region boundary */ 2194 #if USE_ITT_NOTIFY 2195 __kmp_threads[gtid]->th.th_ident = loc; 2196 #endif 2197 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2198 2199 if (!didit) 2200 (*cpy_func)(cpy_data, *data_ptr); 2201 2202 // Consider next barrier a user-visible barrier for barrier region boundaries 2203 // Nesting checks are already handled by the single construct checks 2204 { 2205 #if OMPT_SUPPORT 2206 OMPT_STORE_RETURN_ADDRESS(gtid); 2207 #endif 2208 #if USE_ITT_NOTIFY 2209 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2210 // tasks can overwrite the location) 2211 #endif 2212 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2213 #if OMPT_SUPPORT && OMPT_OPTIONAL 2214 if (ompt_enabled.enabled) { 2215 ompt_frame->enter_frame = ompt_data_none; 2216 } 2217 #endif 2218 } 2219 } 2220 2221 /* -------------------------------------------------------------------------- */ 2222 2223 #define INIT_LOCK __kmp_init_user_lock_with_checks 2224 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2225 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2226 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2227 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2228 #define ACQUIRE_NESTED_LOCK_TIMED \ 2229 __kmp_acquire_nested_user_lock_with_checks_timed 2230 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2231 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2232 #define TEST_LOCK __kmp_test_user_lock_with_checks 2233 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2234 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2235 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2236 2237 // TODO: Make check abort messages use location info & pass it into 2238 // with_checks routines 2239 2240 #if KMP_USE_DYNAMIC_LOCK 2241 2242 // internal lock initializer 2243 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2244 kmp_dyna_lockseq_t seq) { 2245 if (KMP_IS_D_LOCK(seq)) { 2246 KMP_INIT_D_LOCK(lock, seq); 2247 #if USE_ITT_BUILD 2248 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2249 #endif 2250 } else { 2251 KMP_INIT_I_LOCK(lock, seq); 2252 #if USE_ITT_BUILD 2253 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2254 __kmp_itt_lock_creating(ilk->lock, loc); 2255 #endif 2256 } 2257 } 2258 2259 // internal nest lock initializer 2260 static __forceinline void 2261 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2262 kmp_dyna_lockseq_t seq) { 2263 #if KMP_USE_TSX 2264 // Don't have nested lock implementation for speculative locks 2265 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2266 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2267 seq = __kmp_user_lock_seq; 2268 #endif 2269 switch (seq) { 2270 case lockseq_tas: 2271 seq = lockseq_nested_tas; 2272 break; 2273 #if KMP_USE_FUTEX 2274 case lockseq_futex: 2275 seq = lockseq_nested_futex; 2276 break; 2277 #endif 2278 case lockseq_ticket: 2279 seq = lockseq_nested_ticket; 2280 break; 2281 case lockseq_queuing: 2282 seq = lockseq_nested_queuing; 2283 break; 2284 case lockseq_drdpa: 2285 seq = lockseq_nested_drdpa; 2286 break; 2287 default: 2288 seq = lockseq_nested_queuing; 2289 } 2290 KMP_INIT_I_LOCK(lock, seq); 2291 #if USE_ITT_BUILD 2292 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2293 __kmp_itt_lock_creating(ilk->lock, loc); 2294 #endif 2295 } 2296 2297 /* initialize the lock with a hint */ 2298 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2299 uintptr_t hint) { 2300 KMP_DEBUG_ASSERT(__kmp_init_serial); 2301 if (__kmp_env_consistency_check && user_lock == NULL) { 2302 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2303 } 2304 2305 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2306 2307 #if OMPT_SUPPORT && OMPT_OPTIONAL 2308 // This is the case, if called from omp_init_lock_with_hint: 2309 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2310 if (!codeptr) 2311 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2312 if (ompt_enabled.ompt_callback_lock_init) { 2313 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2314 ompt_mutex_lock, (omp_lock_hint_t)hint, 2315 __ompt_get_mutex_impl_type(user_lock), 2316 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2317 } 2318 #endif 2319 } 2320 2321 /* initialize the lock with a hint */ 2322 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2323 void **user_lock, uintptr_t hint) { 2324 KMP_DEBUG_ASSERT(__kmp_init_serial); 2325 if (__kmp_env_consistency_check && user_lock == NULL) { 2326 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2327 } 2328 2329 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2330 2331 #if OMPT_SUPPORT && OMPT_OPTIONAL 2332 // This is the case, if called from omp_init_lock_with_hint: 2333 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2334 if (!codeptr) 2335 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2336 if (ompt_enabled.ompt_callback_lock_init) { 2337 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2338 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2339 __ompt_get_mutex_impl_type(user_lock), 2340 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2341 } 2342 #endif 2343 } 2344 2345 #endif // KMP_USE_DYNAMIC_LOCK 2346 2347 /* initialize the lock */ 2348 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2349 #if KMP_USE_DYNAMIC_LOCK 2350 2351 KMP_DEBUG_ASSERT(__kmp_init_serial); 2352 if (__kmp_env_consistency_check && user_lock == NULL) { 2353 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2354 } 2355 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2356 2357 #if OMPT_SUPPORT && OMPT_OPTIONAL 2358 // This is the case, if called from omp_init_lock_with_hint: 2359 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2360 if (!codeptr) 2361 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2362 if (ompt_enabled.ompt_callback_lock_init) { 2363 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2364 ompt_mutex_lock, omp_lock_hint_none, 2365 __ompt_get_mutex_impl_type(user_lock), 2366 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2367 } 2368 #endif 2369 2370 #else // KMP_USE_DYNAMIC_LOCK 2371 2372 static char const *const func = "omp_init_lock"; 2373 kmp_user_lock_p lck; 2374 KMP_DEBUG_ASSERT(__kmp_init_serial); 2375 2376 if (__kmp_env_consistency_check) { 2377 if (user_lock == NULL) { 2378 KMP_FATAL(LockIsUninitialized, func); 2379 } 2380 } 2381 2382 KMP_CHECK_USER_LOCK_INIT(); 2383 2384 if ((__kmp_user_lock_kind == lk_tas) && 2385 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2386 lck = (kmp_user_lock_p)user_lock; 2387 } 2388 #if KMP_USE_FUTEX 2389 else if ((__kmp_user_lock_kind == lk_futex) && 2390 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2391 lck = (kmp_user_lock_p)user_lock; 2392 } 2393 #endif 2394 else { 2395 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2396 } 2397 INIT_LOCK(lck); 2398 __kmp_set_user_lock_location(lck, loc); 2399 2400 #if OMPT_SUPPORT && OMPT_OPTIONAL 2401 // This is the case, if called from omp_init_lock_with_hint: 2402 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2403 if (!codeptr) 2404 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2405 if (ompt_enabled.ompt_callback_lock_init) { 2406 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2407 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2408 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2409 } 2410 #endif 2411 2412 #if USE_ITT_BUILD 2413 __kmp_itt_lock_creating(lck); 2414 #endif /* USE_ITT_BUILD */ 2415 2416 #endif // KMP_USE_DYNAMIC_LOCK 2417 } // __kmpc_init_lock 2418 2419 /* initialize the lock */ 2420 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2421 #if KMP_USE_DYNAMIC_LOCK 2422 2423 KMP_DEBUG_ASSERT(__kmp_init_serial); 2424 if (__kmp_env_consistency_check && user_lock == NULL) { 2425 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2426 } 2427 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2428 2429 #if OMPT_SUPPORT && OMPT_OPTIONAL 2430 // This is the case, if called from omp_init_lock_with_hint: 2431 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2432 if (!codeptr) 2433 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2434 if (ompt_enabled.ompt_callback_lock_init) { 2435 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2436 ompt_mutex_nest_lock, omp_lock_hint_none, 2437 __ompt_get_mutex_impl_type(user_lock), 2438 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2439 } 2440 #endif 2441 2442 #else // KMP_USE_DYNAMIC_LOCK 2443 2444 static char const *const func = "omp_init_nest_lock"; 2445 kmp_user_lock_p lck; 2446 KMP_DEBUG_ASSERT(__kmp_init_serial); 2447 2448 if (__kmp_env_consistency_check) { 2449 if (user_lock == NULL) { 2450 KMP_FATAL(LockIsUninitialized, func); 2451 } 2452 } 2453 2454 KMP_CHECK_USER_LOCK_INIT(); 2455 2456 if ((__kmp_user_lock_kind == lk_tas) && 2457 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2458 OMP_NEST_LOCK_T_SIZE)) { 2459 lck = (kmp_user_lock_p)user_lock; 2460 } 2461 #if KMP_USE_FUTEX 2462 else if ((__kmp_user_lock_kind == lk_futex) && 2463 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2464 OMP_NEST_LOCK_T_SIZE)) { 2465 lck = (kmp_user_lock_p)user_lock; 2466 } 2467 #endif 2468 else { 2469 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2470 } 2471 2472 INIT_NESTED_LOCK(lck); 2473 __kmp_set_user_lock_location(lck, loc); 2474 2475 #if OMPT_SUPPORT && OMPT_OPTIONAL 2476 // This is the case, if called from omp_init_lock_with_hint: 2477 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2478 if (!codeptr) 2479 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2480 if (ompt_enabled.ompt_callback_lock_init) { 2481 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2482 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2483 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2484 } 2485 #endif 2486 2487 #if USE_ITT_BUILD 2488 __kmp_itt_lock_creating(lck); 2489 #endif /* USE_ITT_BUILD */ 2490 2491 #endif // KMP_USE_DYNAMIC_LOCK 2492 } // __kmpc_init_nest_lock 2493 2494 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2495 #if KMP_USE_DYNAMIC_LOCK 2496 2497 #if USE_ITT_BUILD 2498 kmp_user_lock_p lck; 2499 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2500 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2501 } else { 2502 lck = (kmp_user_lock_p)user_lock; 2503 } 2504 __kmp_itt_lock_destroyed(lck); 2505 #endif 2506 #if OMPT_SUPPORT && OMPT_OPTIONAL 2507 // This is the case, if called from omp_init_lock_with_hint: 2508 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2509 if (!codeptr) 2510 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2511 if (ompt_enabled.ompt_callback_lock_destroy) { 2512 kmp_user_lock_p lck; 2513 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2514 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2515 } else { 2516 lck = (kmp_user_lock_p)user_lock; 2517 } 2518 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2519 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2520 } 2521 #endif 2522 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2523 #else 2524 kmp_user_lock_p lck; 2525 2526 if ((__kmp_user_lock_kind == lk_tas) && 2527 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2528 lck = (kmp_user_lock_p)user_lock; 2529 } 2530 #if KMP_USE_FUTEX 2531 else if ((__kmp_user_lock_kind == lk_futex) && 2532 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2533 lck = (kmp_user_lock_p)user_lock; 2534 } 2535 #endif 2536 else { 2537 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2538 } 2539 2540 #if OMPT_SUPPORT && OMPT_OPTIONAL 2541 // This is the case, if called from omp_init_lock_with_hint: 2542 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2543 if (!codeptr) 2544 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2545 if (ompt_enabled.ompt_callback_lock_destroy) { 2546 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2547 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2548 } 2549 #endif 2550 2551 #if USE_ITT_BUILD 2552 __kmp_itt_lock_destroyed(lck); 2553 #endif /* USE_ITT_BUILD */ 2554 DESTROY_LOCK(lck); 2555 2556 if ((__kmp_user_lock_kind == lk_tas) && 2557 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2558 ; 2559 } 2560 #if KMP_USE_FUTEX 2561 else if ((__kmp_user_lock_kind == lk_futex) && 2562 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2563 ; 2564 } 2565 #endif 2566 else { 2567 __kmp_user_lock_free(user_lock, gtid, lck); 2568 } 2569 #endif // KMP_USE_DYNAMIC_LOCK 2570 } // __kmpc_destroy_lock 2571 2572 /* destroy the lock */ 2573 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2574 #if KMP_USE_DYNAMIC_LOCK 2575 2576 #if USE_ITT_BUILD 2577 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2578 __kmp_itt_lock_destroyed(ilk->lock); 2579 #endif 2580 #if OMPT_SUPPORT && OMPT_OPTIONAL 2581 // This is the case, if called from omp_init_lock_with_hint: 2582 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2583 if (!codeptr) 2584 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2585 if (ompt_enabled.ompt_callback_lock_destroy) { 2586 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2587 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2588 } 2589 #endif 2590 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2591 2592 #else // KMP_USE_DYNAMIC_LOCK 2593 2594 kmp_user_lock_p lck; 2595 2596 if ((__kmp_user_lock_kind == lk_tas) && 2597 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2598 OMP_NEST_LOCK_T_SIZE)) { 2599 lck = (kmp_user_lock_p)user_lock; 2600 } 2601 #if KMP_USE_FUTEX 2602 else if ((__kmp_user_lock_kind == lk_futex) && 2603 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2604 OMP_NEST_LOCK_T_SIZE)) { 2605 lck = (kmp_user_lock_p)user_lock; 2606 } 2607 #endif 2608 else { 2609 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2610 } 2611 2612 #if OMPT_SUPPORT && OMPT_OPTIONAL 2613 // This is the case, if called from omp_init_lock_with_hint: 2614 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2615 if (!codeptr) 2616 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2617 if (ompt_enabled.ompt_callback_lock_destroy) { 2618 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2619 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2620 } 2621 #endif 2622 2623 #if USE_ITT_BUILD 2624 __kmp_itt_lock_destroyed(lck); 2625 #endif /* USE_ITT_BUILD */ 2626 2627 DESTROY_NESTED_LOCK(lck); 2628 2629 if ((__kmp_user_lock_kind == lk_tas) && 2630 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2631 OMP_NEST_LOCK_T_SIZE)) { 2632 ; 2633 } 2634 #if KMP_USE_FUTEX 2635 else if ((__kmp_user_lock_kind == lk_futex) && 2636 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2637 OMP_NEST_LOCK_T_SIZE)) { 2638 ; 2639 } 2640 #endif 2641 else { 2642 __kmp_user_lock_free(user_lock, gtid, lck); 2643 } 2644 #endif // KMP_USE_DYNAMIC_LOCK 2645 } // __kmpc_destroy_nest_lock 2646 2647 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2648 KMP_COUNT_BLOCK(OMP_set_lock); 2649 #if KMP_USE_DYNAMIC_LOCK 2650 int tag = KMP_EXTRACT_D_TAG(user_lock); 2651 #if USE_ITT_BUILD 2652 __kmp_itt_lock_acquiring( 2653 (kmp_user_lock_p) 2654 user_lock); // itt function will get to the right lock object. 2655 #endif 2656 #if OMPT_SUPPORT && OMPT_OPTIONAL 2657 // This is the case, if called from omp_init_lock_with_hint: 2658 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2659 if (!codeptr) 2660 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2661 if (ompt_enabled.ompt_callback_mutex_acquire) { 2662 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2663 ompt_mutex_lock, omp_lock_hint_none, 2664 __ompt_get_mutex_impl_type(user_lock), 2665 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2666 } 2667 #endif 2668 #if KMP_USE_INLINED_TAS 2669 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2670 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2671 } else 2672 #elif KMP_USE_INLINED_FUTEX 2673 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2674 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2675 } else 2676 #endif 2677 { 2678 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2679 } 2680 #if USE_ITT_BUILD 2681 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2682 #endif 2683 #if OMPT_SUPPORT && OMPT_OPTIONAL 2684 if (ompt_enabled.ompt_callback_mutex_acquired) { 2685 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2686 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2687 } 2688 #endif 2689 2690 #else // KMP_USE_DYNAMIC_LOCK 2691 2692 kmp_user_lock_p lck; 2693 2694 if ((__kmp_user_lock_kind == lk_tas) && 2695 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2696 lck = (kmp_user_lock_p)user_lock; 2697 } 2698 #if KMP_USE_FUTEX 2699 else if ((__kmp_user_lock_kind == lk_futex) && 2700 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2701 lck = (kmp_user_lock_p)user_lock; 2702 } 2703 #endif 2704 else { 2705 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2706 } 2707 2708 #if USE_ITT_BUILD 2709 __kmp_itt_lock_acquiring(lck); 2710 #endif /* USE_ITT_BUILD */ 2711 #if OMPT_SUPPORT && OMPT_OPTIONAL 2712 // This is the case, if called from omp_init_lock_with_hint: 2713 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2714 if (!codeptr) 2715 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2716 if (ompt_enabled.ompt_callback_mutex_acquire) { 2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2718 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2719 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2720 } 2721 #endif 2722 2723 ACQUIRE_LOCK(lck, gtid); 2724 2725 #if USE_ITT_BUILD 2726 __kmp_itt_lock_acquired(lck); 2727 #endif /* USE_ITT_BUILD */ 2728 2729 #if OMPT_SUPPORT && OMPT_OPTIONAL 2730 if (ompt_enabled.ompt_callback_mutex_acquired) { 2731 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2732 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2733 } 2734 #endif 2735 2736 #endif // KMP_USE_DYNAMIC_LOCK 2737 } 2738 2739 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2740 #if KMP_USE_DYNAMIC_LOCK 2741 2742 #if USE_ITT_BUILD 2743 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2744 #endif 2745 #if OMPT_SUPPORT && OMPT_OPTIONAL 2746 // This is the case, if called from omp_init_lock_with_hint: 2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2748 if (!codeptr) 2749 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2750 if (ompt_enabled.enabled) { 2751 if (ompt_enabled.ompt_callback_mutex_acquire) { 2752 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2753 ompt_mutex_nest_lock, omp_lock_hint_none, 2754 __ompt_get_mutex_impl_type(user_lock), 2755 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2756 } 2757 } 2758 #endif 2759 int acquire_status = 2760 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2761 (void)acquire_status; 2762 #if USE_ITT_BUILD 2763 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2764 #endif 2765 2766 #if OMPT_SUPPORT && OMPT_OPTIONAL 2767 if (ompt_enabled.enabled) { 2768 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2769 if (ompt_enabled.ompt_callback_mutex_acquired) { 2770 // lock_first 2771 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2772 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2773 codeptr); 2774 } 2775 } else { 2776 if (ompt_enabled.ompt_callback_nest_lock) { 2777 // lock_next 2778 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2779 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2780 } 2781 } 2782 } 2783 #endif 2784 2785 #else // KMP_USE_DYNAMIC_LOCK 2786 int acquire_status; 2787 kmp_user_lock_p lck; 2788 2789 if ((__kmp_user_lock_kind == lk_tas) && 2790 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2791 OMP_NEST_LOCK_T_SIZE)) { 2792 lck = (kmp_user_lock_p)user_lock; 2793 } 2794 #if KMP_USE_FUTEX 2795 else if ((__kmp_user_lock_kind == lk_futex) && 2796 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2797 OMP_NEST_LOCK_T_SIZE)) { 2798 lck = (kmp_user_lock_p)user_lock; 2799 } 2800 #endif 2801 else { 2802 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2803 } 2804 2805 #if USE_ITT_BUILD 2806 __kmp_itt_lock_acquiring(lck); 2807 #endif /* USE_ITT_BUILD */ 2808 #if OMPT_SUPPORT && OMPT_OPTIONAL 2809 // This is the case, if called from omp_init_lock_with_hint: 2810 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2811 if (!codeptr) 2812 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2813 if (ompt_enabled.enabled) { 2814 if (ompt_enabled.ompt_callback_mutex_acquire) { 2815 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2816 ompt_mutex_nest_lock, omp_lock_hint_none, 2817 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2818 codeptr); 2819 } 2820 } 2821 #endif 2822 2823 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2824 2825 #if USE_ITT_BUILD 2826 __kmp_itt_lock_acquired(lck); 2827 #endif /* USE_ITT_BUILD */ 2828 2829 #if OMPT_SUPPORT && OMPT_OPTIONAL 2830 if (ompt_enabled.enabled) { 2831 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2832 if (ompt_enabled.ompt_callback_mutex_acquired) { 2833 // lock_first 2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2835 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2836 } 2837 } else { 2838 if (ompt_enabled.ompt_callback_nest_lock) { 2839 // lock_next 2840 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2841 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2842 } 2843 } 2844 } 2845 #endif 2846 2847 #endif // KMP_USE_DYNAMIC_LOCK 2848 } 2849 2850 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2851 #if KMP_USE_DYNAMIC_LOCK 2852 2853 int tag = KMP_EXTRACT_D_TAG(user_lock); 2854 #if USE_ITT_BUILD 2855 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2856 #endif 2857 #if KMP_USE_INLINED_TAS 2858 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2859 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2860 } else 2861 #elif KMP_USE_INLINED_FUTEX 2862 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2863 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2864 } else 2865 #endif 2866 { 2867 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2868 } 2869 2870 #if OMPT_SUPPORT && OMPT_OPTIONAL 2871 // This is the case, if called from omp_init_lock_with_hint: 2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2873 if (!codeptr) 2874 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2875 if (ompt_enabled.ompt_callback_mutex_released) { 2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2877 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2878 } 2879 #endif 2880 2881 #else // KMP_USE_DYNAMIC_LOCK 2882 2883 kmp_user_lock_p lck; 2884 2885 /* Can't use serial interval since not block structured */ 2886 /* release the lock */ 2887 2888 if ((__kmp_user_lock_kind == lk_tas) && 2889 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2890 #if KMP_OS_LINUX && \ 2891 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2892 // "fast" path implemented to fix customer performance issue 2893 #if USE_ITT_BUILD 2894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2895 #endif /* USE_ITT_BUILD */ 2896 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2897 KMP_MB(); 2898 2899 #if OMPT_SUPPORT && OMPT_OPTIONAL 2900 // This is the case, if called from omp_init_lock_with_hint: 2901 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2902 if (!codeptr) 2903 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2904 if (ompt_enabled.ompt_callback_mutex_released) { 2905 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2906 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2907 } 2908 #endif 2909 2910 return; 2911 #else 2912 lck = (kmp_user_lock_p)user_lock; 2913 #endif 2914 } 2915 #if KMP_USE_FUTEX 2916 else if ((__kmp_user_lock_kind == lk_futex) && 2917 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2918 lck = (kmp_user_lock_p)user_lock; 2919 } 2920 #endif 2921 else { 2922 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2923 } 2924 2925 #if USE_ITT_BUILD 2926 __kmp_itt_lock_releasing(lck); 2927 #endif /* USE_ITT_BUILD */ 2928 2929 RELEASE_LOCK(lck, gtid); 2930 2931 #if OMPT_SUPPORT && OMPT_OPTIONAL 2932 // This is the case, if called from omp_init_lock_with_hint: 2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2934 if (!codeptr) 2935 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2936 if (ompt_enabled.ompt_callback_mutex_released) { 2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2938 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2939 } 2940 #endif 2941 2942 #endif // KMP_USE_DYNAMIC_LOCK 2943 } 2944 2945 /* release the lock */ 2946 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2947 #if KMP_USE_DYNAMIC_LOCK 2948 2949 #if USE_ITT_BUILD 2950 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2951 #endif 2952 int release_status = 2953 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2954 (void)release_status; 2955 2956 #if OMPT_SUPPORT && OMPT_OPTIONAL 2957 // This is the case, if called from omp_init_lock_with_hint: 2958 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2959 if (!codeptr) 2960 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2961 if (ompt_enabled.enabled) { 2962 if (release_status == KMP_LOCK_RELEASED) { 2963 if (ompt_enabled.ompt_callback_mutex_released) { 2964 // release_lock_last 2965 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2966 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2967 codeptr); 2968 } 2969 } else if (ompt_enabled.ompt_callback_nest_lock) { 2970 // release_lock_prev 2971 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2972 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2973 } 2974 } 2975 #endif 2976 2977 #else // KMP_USE_DYNAMIC_LOCK 2978 2979 kmp_user_lock_p lck; 2980 2981 /* Can't use serial interval since not block structured */ 2982 2983 if ((__kmp_user_lock_kind == lk_tas) && 2984 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2985 OMP_NEST_LOCK_T_SIZE)) { 2986 #if KMP_OS_LINUX && \ 2987 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2988 // "fast" path implemented to fix customer performance issue 2989 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2990 #if USE_ITT_BUILD 2991 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2992 #endif /* USE_ITT_BUILD */ 2993 2994 #if OMPT_SUPPORT && OMPT_OPTIONAL 2995 int release_status = KMP_LOCK_STILL_HELD; 2996 #endif 2997 2998 if (--(tl->lk.depth_locked) == 0) { 2999 TCW_4(tl->lk.poll, 0); 3000 #if OMPT_SUPPORT && OMPT_OPTIONAL 3001 release_status = KMP_LOCK_RELEASED; 3002 #endif 3003 } 3004 KMP_MB(); 3005 3006 #if OMPT_SUPPORT && OMPT_OPTIONAL 3007 // This is the case, if called from omp_init_lock_with_hint: 3008 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3009 if (!codeptr) 3010 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3011 if (ompt_enabled.enabled) { 3012 if (release_status == KMP_LOCK_RELEASED) { 3013 if (ompt_enabled.ompt_callback_mutex_released) { 3014 // release_lock_last 3015 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3016 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3017 } 3018 } else if (ompt_enabled.ompt_callback_nest_lock) { 3019 // release_lock_previous 3020 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3021 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3022 } 3023 } 3024 #endif 3025 3026 return; 3027 #else 3028 lck = (kmp_user_lock_p)user_lock; 3029 #endif 3030 } 3031 #if KMP_USE_FUTEX 3032 else if ((__kmp_user_lock_kind == lk_futex) && 3033 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3034 OMP_NEST_LOCK_T_SIZE)) { 3035 lck = (kmp_user_lock_p)user_lock; 3036 } 3037 #endif 3038 else { 3039 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3040 } 3041 3042 #if USE_ITT_BUILD 3043 __kmp_itt_lock_releasing(lck); 3044 #endif /* USE_ITT_BUILD */ 3045 3046 int release_status; 3047 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3048 #if OMPT_SUPPORT && OMPT_OPTIONAL 3049 // This is the case, if called from omp_init_lock_with_hint: 3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3051 if (!codeptr) 3052 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3053 if (ompt_enabled.enabled) { 3054 if (release_status == KMP_LOCK_RELEASED) { 3055 if (ompt_enabled.ompt_callback_mutex_released) { 3056 // release_lock_last 3057 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3058 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3059 } 3060 } else if (ompt_enabled.ompt_callback_nest_lock) { 3061 // release_lock_previous 3062 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3063 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3064 } 3065 } 3066 #endif 3067 3068 #endif // KMP_USE_DYNAMIC_LOCK 3069 } 3070 3071 /* try to acquire the lock */ 3072 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3073 KMP_COUNT_BLOCK(OMP_test_lock); 3074 3075 #if KMP_USE_DYNAMIC_LOCK 3076 int rc; 3077 int tag = KMP_EXTRACT_D_TAG(user_lock); 3078 #if USE_ITT_BUILD 3079 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3080 #endif 3081 #if OMPT_SUPPORT && OMPT_OPTIONAL 3082 // This is the case, if called from omp_init_lock_with_hint: 3083 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3084 if (!codeptr) 3085 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3086 if (ompt_enabled.ompt_callback_mutex_acquire) { 3087 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3088 ompt_mutex_lock, omp_lock_hint_none, 3089 __ompt_get_mutex_impl_type(user_lock), 3090 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3091 } 3092 #endif 3093 #if KMP_USE_INLINED_TAS 3094 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3095 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3096 } else 3097 #elif KMP_USE_INLINED_FUTEX 3098 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3099 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3100 } else 3101 #endif 3102 { 3103 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3104 } 3105 if (rc) { 3106 #if USE_ITT_BUILD 3107 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3108 #endif 3109 #if OMPT_SUPPORT && OMPT_OPTIONAL 3110 if (ompt_enabled.ompt_callback_mutex_acquired) { 3111 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3112 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3113 } 3114 #endif 3115 return FTN_TRUE; 3116 } else { 3117 #if USE_ITT_BUILD 3118 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3119 #endif 3120 return FTN_FALSE; 3121 } 3122 3123 #else // KMP_USE_DYNAMIC_LOCK 3124 3125 kmp_user_lock_p lck; 3126 int rc; 3127 3128 if ((__kmp_user_lock_kind == lk_tas) && 3129 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3130 lck = (kmp_user_lock_p)user_lock; 3131 } 3132 #if KMP_USE_FUTEX 3133 else if ((__kmp_user_lock_kind == lk_futex) && 3134 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3135 lck = (kmp_user_lock_p)user_lock; 3136 } 3137 #endif 3138 else { 3139 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3140 } 3141 3142 #if USE_ITT_BUILD 3143 __kmp_itt_lock_acquiring(lck); 3144 #endif /* USE_ITT_BUILD */ 3145 #if OMPT_SUPPORT && OMPT_OPTIONAL 3146 // This is the case, if called from omp_init_lock_with_hint: 3147 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3148 if (!codeptr) 3149 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3150 if (ompt_enabled.ompt_callback_mutex_acquire) { 3151 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3152 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3153 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3154 } 3155 #endif 3156 3157 rc = TEST_LOCK(lck, gtid); 3158 #if USE_ITT_BUILD 3159 if (rc) { 3160 __kmp_itt_lock_acquired(lck); 3161 } else { 3162 __kmp_itt_lock_cancelled(lck); 3163 } 3164 #endif /* USE_ITT_BUILD */ 3165 #if OMPT_SUPPORT && OMPT_OPTIONAL 3166 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3167 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3168 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3169 } 3170 #endif 3171 3172 return (rc ? FTN_TRUE : FTN_FALSE); 3173 3174 /* Can't use serial interval since not block structured */ 3175 3176 #endif // KMP_USE_DYNAMIC_LOCK 3177 } 3178 3179 /* try to acquire the lock */ 3180 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3181 #if KMP_USE_DYNAMIC_LOCK 3182 int rc; 3183 #if USE_ITT_BUILD 3184 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3185 #endif 3186 #if OMPT_SUPPORT && OMPT_OPTIONAL 3187 // This is the case, if called from omp_init_lock_with_hint: 3188 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3189 if (!codeptr) 3190 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3191 if (ompt_enabled.ompt_callback_mutex_acquire) { 3192 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3193 ompt_mutex_nest_lock, omp_lock_hint_none, 3194 __ompt_get_mutex_impl_type(user_lock), 3195 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3196 } 3197 #endif 3198 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3199 #if USE_ITT_BUILD 3200 if (rc) { 3201 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3202 } else { 3203 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3204 } 3205 #endif 3206 #if OMPT_SUPPORT && OMPT_OPTIONAL 3207 if (ompt_enabled.enabled && rc) { 3208 if (rc == 1) { 3209 if (ompt_enabled.ompt_callback_mutex_acquired) { 3210 // lock_first 3211 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3212 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3213 codeptr); 3214 } 3215 } else { 3216 if (ompt_enabled.ompt_callback_nest_lock) { 3217 // lock_next 3218 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3219 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3220 } 3221 } 3222 } 3223 #endif 3224 return rc; 3225 3226 #else // KMP_USE_DYNAMIC_LOCK 3227 3228 kmp_user_lock_p lck; 3229 int rc; 3230 3231 if ((__kmp_user_lock_kind == lk_tas) && 3232 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3233 OMP_NEST_LOCK_T_SIZE)) { 3234 lck = (kmp_user_lock_p)user_lock; 3235 } 3236 #if KMP_USE_FUTEX 3237 else if ((__kmp_user_lock_kind == lk_futex) && 3238 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3239 OMP_NEST_LOCK_T_SIZE)) { 3240 lck = (kmp_user_lock_p)user_lock; 3241 } 3242 #endif 3243 else { 3244 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3245 } 3246 3247 #if USE_ITT_BUILD 3248 __kmp_itt_lock_acquiring(lck); 3249 #endif /* USE_ITT_BUILD */ 3250 3251 #if OMPT_SUPPORT && OMPT_OPTIONAL 3252 // This is the case, if called from omp_init_lock_with_hint: 3253 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3254 if (!codeptr) 3255 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3256 if (ompt_enabled.enabled) && 3257 ompt_enabled.ompt_callback_mutex_acquire) { 3258 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3259 ompt_mutex_nest_lock, omp_lock_hint_none, 3260 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3261 codeptr); 3262 } 3263 #endif 3264 3265 rc = TEST_NESTED_LOCK(lck, gtid); 3266 #if USE_ITT_BUILD 3267 if (rc) { 3268 __kmp_itt_lock_acquired(lck); 3269 } else { 3270 __kmp_itt_lock_cancelled(lck); 3271 } 3272 #endif /* USE_ITT_BUILD */ 3273 #if OMPT_SUPPORT && OMPT_OPTIONAL 3274 if (ompt_enabled.enabled && rc) { 3275 if (rc == 1) { 3276 if (ompt_enabled.ompt_callback_mutex_acquired) { 3277 // lock_first 3278 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3279 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3280 } 3281 } else { 3282 if (ompt_enabled.ompt_callback_nest_lock) { 3283 // lock_next 3284 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3285 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3286 } 3287 } 3288 } 3289 #endif 3290 return rc; 3291 3292 /* Can't use serial interval since not block structured */ 3293 3294 #endif // KMP_USE_DYNAMIC_LOCK 3295 } 3296 3297 // Interface to fast scalable reduce methods routines 3298 3299 // keep the selected method in a thread local structure for cross-function 3300 // usage: will be used in __kmpc_end_reduce* functions; 3301 // another solution: to re-determine the method one more time in 3302 // __kmpc_end_reduce* functions (new prototype required then) 3303 // AT: which solution is better? 3304 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3305 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3306 3307 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3308 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3309 3310 // description of the packed_reduction_method variable: look at the macros in 3311 // kmp.h 3312 3313 // used in a critical section reduce block 3314 static __forceinline void 3315 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3316 kmp_critical_name *crit) { 3317 3318 // this lock was visible to a customer and to the threading profile tool as a 3319 // serial overhead span (although it's used for an internal purpose only) 3320 // why was it visible in previous implementation? 3321 // should we keep it visible in new reduce block? 3322 kmp_user_lock_p lck; 3323 3324 #if KMP_USE_DYNAMIC_LOCK 3325 3326 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3327 // Check if it is initialized. 3328 if (*lk == 0) { 3329 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3330 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3331 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3332 } else { 3333 __kmp_init_indirect_csptr(crit, loc, global_tid, 3334 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3335 } 3336 } 3337 // Branch for accessing the actual lock object and set operation. This 3338 // branching is inevitable since this lock initialization does not follow the 3339 // normal dispatch path (lock table is not used). 3340 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3341 lck = (kmp_user_lock_p)lk; 3342 KMP_DEBUG_ASSERT(lck != NULL); 3343 if (__kmp_env_consistency_check) { 3344 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3345 } 3346 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3347 } else { 3348 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3349 lck = ilk->lock; 3350 KMP_DEBUG_ASSERT(lck != NULL); 3351 if (__kmp_env_consistency_check) { 3352 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3353 } 3354 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3355 } 3356 3357 #else // KMP_USE_DYNAMIC_LOCK 3358 3359 // We know that the fast reduction code is only emitted by Intel compilers 3360 // with 32 byte critical sections. If there isn't enough space, then we 3361 // have to use a pointer. 3362 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3363 lck = (kmp_user_lock_p)crit; 3364 } else { 3365 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3366 } 3367 KMP_DEBUG_ASSERT(lck != NULL); 3368 3369 if (__kmp_env_consistency_check) 3370 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3371 3372 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3373 3374 #endif // KMP_USE_DYNAMIC_LOCK 3375 } 3376 3377 // used in a critical section reduce block 3378 static __forceinline void 3379 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3380 kmp_critical_name *crit) { 3381 3382 kmp_user_lock_p lck; 3383 3384 #if KMP_USE_DYNAMIC_LOCK 3385 3386 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3387 lck = (kmp_user_lock_p)crit; 3388 if (__kmp_env_consistency_check) 3389 __kmp_pop_sync(global_tid, ct_critical, loc); 3390 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3391 } else { 3392 kmp_indirect_lock_t *ilk = 3393 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3394 if (__kmp_env_consistency_check) 3395 __kmp_pop_sync(global_tid, ct_critical, loc); 3396 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3397 } 3398 3399 #else // KMP_USE_DYNAMIC_LOCK 3400 3401 // We know that the fast reduction code is only emitted by Intel compilers 3402 // with 32 byte critical sections. If there isn't enough space, then we have 3403 // to use a pointer. 3404 if (__kmp_base_user_lock_size > 32) { 3405 lck = *((kmp_user_lock_p *)crit); 3406 KMP_ASSERT(lck != NULL); 3407 } else { 3408 lck = (kmp_user_lock_p)crit; 3409 } 3410 3411 if (__kmp_env_consistency_check) 3412 __kmp_pop_sync(global_tid, ct_critical, loc); 3413 3414 __kmp_release_user_lock_with_checks(lck, global_tid); 3415 3416 #endif // KMP_USE_DYNAMIC_LOCK 3417 } // __kmp_end_critical_section_reduce_block 3418 3419 static __forceinline int 3420 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3421 int *task_state) { 3422 kmp_team_t *team; 3423 3424 // Check if we are inside the teams construct? 3425 if (th->th.th_teams_microtask) { 3426 *team_p = team = th->th.th_team; 3427 if (team->t.t_level == th->th.th_teams_level) { 3428 // This is reduction at teams construct. 3429 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3430 // Let's swap teams temporarily for the reduction. 3431 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3432 th->th.th_team = team->t.t_parent; 3433 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3434 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3435 *task_state = th->th.th_task_state; 3436 th->th.th_task_state = 0; 3437 3438 return 1; 3439 } 3440 } 3441 return 0; 3442 } 3443 3444 static __forceinline void 3445 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3446 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3447 th->th.th_info.ds.ds_tid = 0; 3448 th->th.th_team = team; 3449 th->th.th_team_nproc = team->t.t_nproc; 3450 th->th.th_task_team = team->t.t_task_team[task_state]; 3451 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3452 } 3453 3454 /* 2.a.i. Reduce Block without a terminating barrier */ 3455 /*! 3456 @ingroup SYNCHRONIZATION 3457 @param loc source location information 3458 @param global_tid global thread number 3459 @param num_vars number of items (variables) to be reduced 3460 @param reduce_size size of data in bytes to be reduced 3461 @param reduce_data pointer to data to be reduced 3462 @param reduce_func callback function providing reduction operation on two 3463 operands and returning result of reduction in lhs_data 3464 @param lck pointer to the unique lock data structure 3465 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3466 threads if atomic reduction needed 3467 3468 The nowait version is used for a reduce clause with the nowait argument. 3469 */ 3470 kmp_int32 3471 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3472 size_t reduce_size, void *reduce_data, 3473 void (*reduce_func)(void *lhs_data, void *rhs_data), 3474 kmp_critical_name *lck) { 3475 3476 KMP_COUNT_BLOCK(REDUCE_nowait); 3477 int retval = 0; 3478 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3479 kmp_info_t *th; 3480 kmp_team_t *team; 3481 int teams_swapped = 0, task_state; 3482 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3483 __kmp_assert_valid_gtid(global_tid); 3484 3485 // why do we need this initialization here at all? 3486 // Reduction clause can not be used as a stand-alone directive. 3487 3488 // do not call __kmp_serial_initialize(), it will be called by 3489 // __kmp_parallel_initialize() if needed 3490 // possible detection of false-positive race by the threadchecker ??? 3491 if (!TCR_4(__kmp_init_parallel)) 3492 __kmp_parallel_initialize(); 3493 3494 __kmp_resume_if_soft_paused(); 3495 3496 // check correctness of reduce block nesting 3497 #if KMP_USE_DYNAMIC_LOCK 3498 if (__kmp_env_consistency_check) 3499 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3500 #else 3501 if (__kmp_env_consistency_check) 3502 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3503 #endif 3504 3505 th = __kmp_thread_from_gtid(global_tid); 3506 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3507 3508 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3509 // the value should be kept in a variable 3510 // the variable should be either a construct-specific or thread-specific 3511 // property, not a team specific property 3512 // (a thread can reach the next reduce block on the next construct, reduce 3513 // method may differ on the next construct) 3514 // an ident_t "loc" parameter could be used as a construct-specific property 3515 // (what if loc == 0?) 3516 // (if both construct-specific and team-specific variables were shared, 3517 // then unness extra syncs should be needed) 3518 // a thread-specific variable is better regarding two issues above (next 3519 // construct and extra syncs) 3520 // a thread-specific "th_local.reduction_method" variable is used currently 3521 // each thread executes 'determine' and 'set' lines (no need to execute by one 3522 // thread, to avoid unness extra syncs) 3523 3524 packed_reduction_method = __kmp_determine_reduction_method( 3525 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3526 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3527 3528 OMPT_REDUCTION_DECL(th, global_tid); 3529 if (packed_reduction_method == critical_reduce_block) { 3530 3531 OMPT_REDUCTION_BEGIN; 3532 3533 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3534 retval = 1; 3535 3536 } else if (packed_reduction_method == empty_reduce_block) { 3537 3538 OMPT_REDUCTION_BEGIN; 3539 3540 // usage: if team size == 1, no synchronization is required ( Intel 3541 // platforms only ) 3542 retval = 1; 3543 3544 } else if (packed_reduction_method == atomic_reduce_block) { 3545 3546 retval = 2; 3547 3548 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3549 // won't be called by the code gen) 3550 // (it's not quite good, because the checking block has been closed by 3551 // this 'pop', 3552 // but atomic operation has not been executed yet, will be executed 3553 // slightly later, literally on next instruction) 3554 if (__kmp_env_consistency_check) 3555 __kmp_pop_sync(global_tid, ct_reduce, loc); 3556 3557 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3558 tree_reduce_block)) { 3559 3560 // AT: performance issue: a real barrier here 3561 // AT: (if primary thread is slow, other threads are blocked here waiting for 3562 // the primary thread to come and release them) 3563 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3564 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3565 // be confusing to a customer) 3566 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3567 // might go faster and be more in line with sense of NOWAIT 3568 // AT: TO DO: do epcc test and compare times 3569 3570 // this barrier should be invisible to a customer and to the threading profile 3571 // tool (it's neither a terminating barrier nor customer's code, it's 3572 // used for an internal purpose) 3573 #if OMPT_SUPPORT 3574 // JP: can this barrier potentially leed to task scheduling? 3575 // JP: as long as there is a barrier in the implementation, OMPT should and 3576 // will provide the barrier events 3577 // so we set-up the necessary frame/return addresses. 3578 ompt_frame_t *ompt_frame; 3579 if (ompt_enabled.enabled) { 3580 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3581 if (ompt_frame->enter_frame.ptr == NULL) 3582 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3583 } 3584 OMPT_STORE_RETURN_ADDRESS(global_tid); 3585 #endif 3586 #if USE_ITT_NOTIFY 3587 __kmp_threads[global_tid]->th.th_ident = loc; 3588 #endif 3589 retval = 3590 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3591 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3592 retval = (retval != 0) ? (0) : (1); 3593 #if OMPT_SUPPORT && OMPT_OPTIONAL 3594 if (ompt_enabled.enabled) { 3595 ompt_frame->enter_frame = ompt_data_none; 3596 } 3597 #endif 3598 3599 // all other workers except primary thread should do this pop here 3600 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3601 if (__kmp_env_consistency_check) { 3602 if (retval == 0) { 3603 __kmp_pop_sync(global_tid, ct_reduce, loc); 3604 } 3605 } 3606 3607 } else { 3608 3609 // should never reach this block 3610 KMP_ASSERT(0); // "unexpected method" 3611 } 3612 if (teams_swapped) { 3613 __kmp_restore_swapped_teams(th, team, task_state); 3614 } 3615 KA_TRACE( 3616 10, 3617 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3618 global_tid, packed_reduction_method, retval)); 3619 3620 return retval; 3621 } 3622 3623 /*! 3624 @ingroup SYNCHRONIZATION 3625 @param loc source location information 3626 @param global_tid global thread id. 3627 @param lck pointer to the unique lock data structure 3628 3629 Finish the execution of a reduce nowait. 3630 */ 3631 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3632 kmp_critical_name *lck) { 3633 3634 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3635 3636 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3637 __kmp_assert_valid_gtid(global_tid); 3638 3639 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3640 3641 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3642 3643 if (packed_reduction_method == critical_reduce_block) { 3644 3645 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3646 OMPT_REDUCTION_END; 3647 3648 } else if (packed_reduction_method == empty_reduce_block) { 3649 3650 // usage: if team size == 1, no synchronization is required ( on Intel 3651 // platforms only ) 3652 3653 OMPT_REDUCTION_END; 3654 3655 } else if (packed_reduction_method == atomic_reduce_block) { 3656 3657 // neither primary thread nor other workers should get here 3658 // (code gen does not generate this call in case 2: atomic reduce block) 3659 // actually it's better to remove this elseif at all; 3660 // after removal this value will checked by the 'else' and will assert 3661 3662 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3663 tree_reduce_block)) { 3664 3665 // only primary thread gets here 3666 // OMPT: tree reduction is annotated in the barrier code 3667 3668 } else { 3669 3670 // should never reach this block 3671 KMP_ASSERT(0); // "unexpected method" 3672 } 3673 3674 if (__kmp_env_consistency_check) 3675 __kmp_pop_sync(global_tid, ct_reduce, loc); 3676 3677 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3678 global_tid, packed_reduction_method)); 3679 3680 return; 3681 } 3682 3683 /* 2.a.ii. Reduce Block with a terminating barrier */ 3684 3685 /*! 3686 @ingroup SYNCHRONIZATION 3687 @param loc source location information 3688 @param global_tid global thread number 3689 @param num_vars number of items (variables) to be reduced 3690 @param reduce_size size of data in bytes to be reduced 3691 @param reduce_data pointer to data to be reduced 3692 @param reduce_func callback function providing reduction operation on two 3693 operands and returning result of reduction in lhs_data 3694 @param lck pointer to the unique lock data structure 3695 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3696 threads if atomic reduction needed 3697 3698 A blocking reduce that includes an implicit barrier. 3699 */ 3700 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3701 size_t reduce_size, void *reduce_data, 3702 void (*reduce_func)(void *lhs_data, void *rhs_data), 3703 kmp_critical_name *lck) { 3704 KMP_COUNT_BLOCK(REDUCE_wait); 3705 int retval = 0; 3706 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3707 kmp_info_t *th; 3708 kmp_team_t *team; 3709 int teams_swapped = 0, task_state; 3710 3711 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3712 __kmp_assert_valid_gtid(global_tid); 3713 3714 // why do we need this initialization here at all? 3715 // Reduction clause can not be a stand-alone directive. 3716 3717 // do not call __kmp_serial_initialize(), it will be called by 3718 // __kmp_parallel_initialize() if needed 3719 // possible detection of false-positive race by the threadchecker ??? 3720 if (!TCR_4(__kmp_init_parallel)) 3721 __kmp_parallel_initialize(); 3722 3723 __kmp_resume_if_soft_paused(); 3724 3725 // check correctness of reduce block nesting 3726 #if KMP_USE_DYNAMIC_LOCK 3727 if (__kmp_env_consistency_check) 3728 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3729 #else 3730 if (__kmp_env_consistency_check) 3731 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3732 #endif 3733 3734 th = __kmp_thread_from_gtid(global_tid); 3735 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3736 3737 packed_reduction_method = __kmp_determine_reduction_method( 3738 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3739 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3740 3741 OMPT_REDUCTION_DECL(th, global_tid); 3742 3743 if (packed_reduction_method == critical_reduce_block) { 3744 3745 OMPT_REDUCTION_BEGIN; 3746 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3747 retval = 1; 3748 3749 } else if (packed_reduction_method == empty_reduce_block) { 3750 3751 OMPT_REDUCTION_BEGIN; 3752 // usage: if team size == 1, no synchronization is required ( Intel 3753 // platforms only ) 3754 retval = 1; 3755 3756 } else if (packed_reduction_method == atomic_reduce_block) { 3757 3758 retval = 2; 3759 3760 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3761 tree_reduce_block)) { 3762 3763 // case tree_reduce_block: 3764 // this barrier should be visible to a customer and to the threading profile 3765 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3766 #if OMPT_SUPPORT 3767 ompt_frame_t *ompt_frame; 3768 if (ompt_enabled.enabled) { 3769 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3770 if (ompt_frame->enter_frame.ptr == NULL) 3771 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3772 } 3773 OMPT_STORE_RETURN_ADDRESS(global_tid); 3774 #endif 3775 #if USE_ITT_NOTIFY 3776 __kmp_threads[global_tid]->th.th_ident = 3777 loc; // needed for correct notification of frames 3778 #endif 3779 retval = 3780 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3781 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3782 retval = (retval != 0) ? (0) : (1); 3783 #if OMPT_SUPPORT && OMPT_OPTIONAL 3784 if (ompt_enabled.enabled) { 3785 ompt_frame->enter_frame = ompt_data_none; 3786 } 3787 #endif 3788 3789 // all other workers except primary thread should do this pop here 3790 // (none of other workers except primary will enter __kmpc_end_reduce()) 3791 if (__kmp_env_consistency_check) { 3792 if (retval == 0) { // 0: all other workers; 1: primary thread 3793 __kmp_pop_sync(global_tid, ct_reduce, loc); 3794 } 3795 } 3796 3797 } else { 3798 3799 // should never reach this block 3800 KMP_ASSERT(0); // "unexpected method" 3801 } 3802 if (teams_swapped) { 3803 __kmp_restore_swapped_teams(th, team, task_state); 3804 } 3805 3806 KA_TRACE(10, 3807 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3808 global_tid, packed_reduction_method, retval)); 3809 return retval; 3810 } 3811 3812 /*! 3813 @ingroup SYNCHRONIZATION 3814 @param loc source location information 3815 @param global_tid global thread id. 3816 @param lck pointer to the unique lock data structure 3817 3818 Finish the execution of a blocking reduce. 3819 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3820 start function. 3821 */ 3822 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3823 kmp_critical_name *lck) { 3824 3825 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3826 kmp_info_t *th; 3827 kmp_team_t *team; 3828 int teams_swapped = 0, task_state; 3829 3830 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3831 __kmp_assert_valid_gtid(global_tid); 3832 3833 th = __kmp_thread_from_gtid(global_tid); 3834 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3835 3836 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3837 3838 // this barrier should be visible to a customer and to the threading profile 3839 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3840 OMPT_REDUCTION_DECL(th, global_tid); 3841 3842 if (packed_reduction_method == critical_reduce_block) { 3843 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3844 3845 OMPT_REDUCTION_END; 3846 3847 // TODO: implicit barrier: should be exposed 3848 #if OMPT_SUPPORT 3849 ompt_frame_t *ompt_frame; 3850 if (ompt_enabled.enabled) { 3851 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3852 if (ompt_frame->enter_frame.ptr == NULL) 3853 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3854 } 3855 OMPT_STORE_RETURN_ADDRESS(global_tid); 3856 #endif 3857 #if USE_ITT_NOTIFY 3858 __kmp_threads[global_tid]->th.th_ident = loc; 3859 #endif 3860 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3861 #if OMPT_SUPPORT && OMPT_OPTIONAL 3862 if (ompt_enabled.enabled) { 3863 ompt_frame->enter_frame = ompt_data_none; 3864 } 3865 #endif 3866 3867 } else if (packed_reduction_method == empty_reduce_block) { 3868 3869 OMPT_REDUCTION_END; 3870 3871 // usage: if team size==1, no synchronization is required (Intel platforms only) 3872 3873 // TODO: implicit barrier: should be exposed 3874 #if OMPT_SUPPORT 3875 ompt_frame_t *ompt_frame; 3876 if (ompt_enabled.enabled) { 3877 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3878 if (ompt_frame->enter_frame.ptr == NULL) 3879 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3880 } 3881 OMPT_STORE_RETURN_ADDRESS(global_tid); 3882 #endif 3883 #if USE_ITT_NOTIFY 3884 __kmp_threads[global_tid]->th.th_ident = loc; 3885 #endif 3886 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3887 #if OMPT_SUPPORT && OMPT_OPTIONAL 3888 if (ompt_enabled.enabled) { 3889 ompt_frame->enter_frame = ompt_data_none; 3890 } 3891 #endif 3892 3893 } else if (packed_reduction_method == atomic_reduce_block) { 3894 3895 #if OMPT_SUPPORT 3896 ompt_frame_t *ompt_frame; 3897 if (ompt_enabled.enabled) { 3898 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3899 if (ompt_frame->enter_frame.ptr == NULL) 3900 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3901 } 3902 OMPT_STORE_RETURN_ADDRESS(global_tid); 3903 #endif 3904 // TODO: implicit barrier: should be exposed 3905 #if USE_ITT_NOTIFY 3906 __kmp_threads[global_tid]->th.th_ident = loc; 3907 #endif 3908 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3909 #if OMPT_SUPPORT && OMPT_OPTIONAL 3910 if (ompt_enabled.enabled) { 3911 ompt_frame->enter_frame = ompt_data_none; 3912 } 3913 #endif 3914 3915 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3916 tree_reduce_block)) { 3917 3918 // only primary thread executes here (primary releases all other workers) 3919 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3920 global_tid); 3921 3922 } else { 3923 3924 // should never reach this block 3925 KMP_ASSERT(0); // "unexpected method" 3926 } 3927 if (teams_swapped) { 3928 __kmp_restore_swapped_teams(th, team, task_state); 3929 } 3930 3931 if (__kmp_env_consistency_check) 3932 __kmp_pop_sync(global_tid, ct_reduce, loc); 3933 3934 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3935 global_tid, packed_reduction_method)); 3936 3937 return; 3938 } 3939 3940 #undef __KMP_GET_REDUCTION_METHOD 3941 #undef __KMP_SET_REDUCTION_METHOD 3942 3943 /* end of interface to fast scalable reduce routines */ 3944 3945 kmp_uint64 __kmpc_get_taskid() { 3946 3947 kmp_int32 gtid; 3948 kmp_info_t *thread; 3949 3950 gtid = __kmp_get_gtid(); 3951 if (gtid < 0) { 3952 return 0; 3953 } 3954 thread = __kmp_thread_from_gtid(gtid); 3955 return thread->th.th_current_task->td_task_id; 3956 3957 } // __kmpc_get_taskid 3958 3959 kmp_uint64 __kmpc_get_parent_taskid() { 3960 3961 kmp_int32 gtid; 3962 kmp_info_t *thread; 3963 kmp_taskdata_t *parent_task; 3964 3965 gtid = __kmp_get_gtid(); 3966 if (gtid < 0) { 3967 return 0; 3968 } 3969 thread = __kmp_thread_from_gtid(gtid); 3970 parent_task = thread->th.th_current_task->td_parent; 3971 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3972 3973 } // __kmpc_get_parent_taskid 3974 3975 /*! 3976 @ingroup WORK_SHARING 3977 @param loc source location information. 3978 @param gtid global thread number. 3979 @param num_dims number of associated doacross loops. 3980 @param dims info on loops bounds. 3981 3982 Initialize doacross loop information. 3983 Expect compiler send us inclusive bounds, 3984 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3985 */ 3986 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3987 const struct kmp_dim *dims) { 3988 __kmp_assert_valid_gtid(gtid); 3989 int j, idx; 3990 kmp_int64 last, trace_count; 3991 kmp_info_t *th = __kmp_threads[gtid]; 3992 kmp_team_t *team = th->th.th_team; 3993 kmp_uint32 *flags; 3994 kmp_disp_t *pr_buf = th->th.th_dispatch; 3995 dispatch_shared_info_t *sh_buf; 3996 3997 KA_TRACE( 3998 20, 3999 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4000 gtid, num_dims, !team->t.t_serialized)); 4001 KMP_DEBUG_ASSERT(dims != NULL); 4002 KMP_DEBUG_ASSERT(num_dims > 0); 4003 4004 if (team->t.t_serialized) { 4005 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4006 return; // no dependencies if team is serialized 4007 } 4008 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4009 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4010 // the next loop 4011 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4012 4013 // Save bounds info into allocated private buffer 4014 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4015 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4016 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4017 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4018 pr_buf->th_doacross_info[0] = 4019 (kmp_int64)num_dims; // first element is number of dimensions 4020 // Save also address of num_done in order to access it later without knowing 4021 // the buffer index 4022 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4023 pr_buf->th_doacross_info[2] = dims[0].lo; 4024 pr_buf->th_doacross_info[3] = dims[0].up; 4025 pr_buf->th_doacross_info[4] = dims[0].st; 4026 last = 5; 4027 for (j = 1; j < num_dims; ++j) { 4028 kmp_int64 4029 range_length; // To keep ranges of all dimensions but the first dims[0] 4030 if (dims[j].st == 1) { // most common case 4031 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4032 range_length = dims[j].up - dims[j].lo + 1; 4033 } else { 4034 if (dims[j].st > 0) { 4035 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4036 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4037 } else { // negative increment 4038 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4039 range_length = 4040 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4041 } 4042 } 4043 pr_buf->th_doacross_info[last++] = range_length; 4044 pr_buf->th_doacross_info[last++] = dims[j].lo; 4045 pr_buf->th_doacross_info[last++] = dims[j].up; 4046 pr_buf->th_doacross_info[last++] = dims[j].st; 4047 } 4048 4049 // Compute total trip count. 4050 // Start with range of dims[0] which we don't need to keep in the buffer. 4051 if (dims[0].st == 1) { // most common case 4052 trace_count = dims[0].up - dims[0].lo + 1; 4053 } else if (dims[0].st > 0) { 4054 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4055 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4056 } else { // negative increment 4057 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4058 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4059 } 4060 for (j = 1; j < num_dims; ++j) { 4061 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4062 } 4063 KMP_DEBUG_ASSERT(trace_count > 0); 4064 4065 // Check if shared buffer is not occupied by other loop (idx - 4066 // __kmp_dispatch_num_buffers) 4067 if (idx != sh_buf->doacross_buf_idx) { 4068 // Shared buffer is occupied, wait for it to be free 4069 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4070 __kmp_eq_4, NULL); 4071 } 4072 #if KMP_32_BIT_ARCH 4073 // Check if we are the first thread. After the CAS the first thread gets 0, 4074 // others get 1 if initialization is in progress, allocated pointer otherwise. 4075 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4076 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4077 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4078 #else 4079 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4080 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4081 #endif 4082 if (flags == NULL) { 4083 // we are the first thread, allocate the array of flags 4084 size_t size = 4085 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4086 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4087 KMP_MB(); 4088 sh_buf->doacross_flags = flags; 4089 } else if (flags == (kmp_uint32 *)1) { 4090 #if KMP_32_BIT_ARCH 4091 // initialization is still in progress, need to wait 4092 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4093 #else 4094 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4095 #endif 4096 KMP_YIELD(TRUE); 4097 KMP_MB(); 4098 } else { 4099 KMP_MB(); 4100 } 4101 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4102 pr_buf->th_doacross_flags = 4103 sh_buf->doacross_flags; // save private copy in order to not 4104 // touch shared buffer on each iteration 4105 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4106 } 4107 4108 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4109 __kmp_assert_valid_gtid(gtid); 4110 kmp_int64 shft; 4111 size_t num_dims, i; 4112 kmp_uint32 flag; 4113 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4114 kmp_info_t *th = __kmp_threads[gtid]; 4115 kmp_team_t *team = th->th.th_team; 4116 kmp_disp_t *pr_buf; 4117 kmp_int64 lo, up, st; 4118 4119 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4120 if (team->t.t_serialized) { 4121 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4122 return; // no dependencies if team is serialized 4123 } 4124 4125 // calculate sequential iteration number and check out-of-bounds condition 4126 pr_buf = th->th.th_dispatch; 4127 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4128 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4129 lo = pr_buf->th_doacross_info[2]; 4130 up = pr_buf->th_doacross_info[3]; 4131 st = pr_buf->th_doacross_info[4]; 4132 #if OMPT_SUPPORT && OMPT_OPTIONAL 4133 ompt_dependence_t deps[num_dims]; 4134 #endif 4135 if (st == 1) { // most common case 4136 if (vec[0] < lo || vec[0] > up) { 4137 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4138 "bounds [%lld,%lld]\n", 4139 gtid, vec[0], lo, up)); 4140 return; 4141 } 4142 iter_number = vec[0] - lo; 4143 } else if (st > 0) { 4144 if (vec[0] < lo || vec[0] > up) { 4145 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4146 "bounds [%lld,%lld]\n", 4147 gtid, vec[0], lo, up)); 4148 return; 4149 } 4150 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4151 } else { // negative increment 4152 if (vec[0] > lo || vec[0] < up) { 4153 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4154 "bounds [%lld,%lld]\n", 4155 gtid, vec[0], lo, up)); 4156 return; 4157 } 4158 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4159 } 4160 #if OMPT_SUPPORT && OMPT_OPTIONAL 4161 deps[0].variable.value = iter_number; 4162 deps[0].dependence_type = ompt_dependence_type_sink; 4163 #endif 4164 for (i = 1; i < num_dims; ++i) { 4165 kmp_int64 iter, ln; 4166 size_t j = i * 4; 4167 ln = pr_buf->th_doacross_info[j + 1]; 4168 lo = pr_buf->th_doacross_info[j + 2]; 4169 up = pr_buf->th_doacross_info[j + 3]; 4170 st = pr_buf->th_doacross_info[j + 4]; 4171 if (st == 1) { 4172 if (vec[i] < lo || vec[i] > up) { 4173 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4174 "bounds [%lld,%lld]\n", 4175 gtid, vec[i], lo, up)); 4176 return; 4177 } 4178 iter = vec[i] - lo; 4179 } else if (st > 0) { 4180 if (vec[i] < lo || vec[i] > up) { 4181 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4182 "bounds [%lld,%lld]\n", 4183 gtid, vec[i], lo, up)); 4184 return; 4185 } 4186 iter = (kmp_uint64)(vec[i] - lo) / st; 4187 } else { // st < 0 4188 if (vec[i] > lo || vec[i] < up) { 4189 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4190 "bounds [%lld,%lld]\n", 4191 gtid, vec[i], lo, up)); 4192 return; 4193 } 4194 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4195 } 4196 iter_number = iter + ln * iter_number; 4197 #if OMPT_SUPPORT && OMPT_OPTIONAL 4198 deps[i].variable.value = iter; 4199 deps[i].dependence_type = ompt_dependence_type_sink; 4200 #endif 4201 } 4202 shft = iter_number % 32; // use 32-bit granularity 4203 iter_number >>= 5; // divided by 32 4204 flag = 1 << shft; 4205 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4206 KMP_YIELD(TRUE); 4207 } 4208 KMP_MB(); 4209 #if OMPT_SUPPORT && OMPT_OPTIONAL 4210 if (ompt_enabled.ompt_callback_dependences) { 4211 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4212 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4213 } 4214 #endif 4215 KA_TRACE(20, 4216 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4217 gtid, (iter_number << 5) + shft)); 4218 } 4219 4220 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4221 __kmp_assert_valid_gtid(gtid); 4222 kmp_int64 shft; 4223 size_t num_dims, i; 4224 kmp_uint32 flag; 4225 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4226 kmp_info_t *th = __kmp_threads[gtid]; 4227 kmp_team_t *team = th->th.th_team; 4228 kmp_disp_t *pr_buf; 4229 kmp_int64 lo, st; 4230 4231 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4232 if (team->t.t_serialized) { 4233 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4234 return; // no dependencies if team is serialized 4235 } 4236 4237 // calculate sequential iteration number (same as in "wait" but no 4238 // out-of-bounds checks) 4239 pr_buf = th->th.th_dispatch; 4240 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4241 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4242 lo = pr_buf->th_doacross_info[2]; 4243 st = pr_buf->th_doacross_info[4]; 4244 #if OMPT_SUPPORT && OMPT_OPTIONAL 4245 ompt_dependence_t deps[num_dims]; 4246 #endif 4247 if (st == 1) { // most common case 4248 iter_number = vec[0] - lo; 4249 } else if (st > 0) { 4250 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4251 } else { // negative increment 4252 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4253 } 4254 #if OMPT_SUPPORT && OMPT_OPTIONAL 4255 deps[0].variable.value = iter_number; 4256 deps[0].dependence_type = ompt_dependence_type_source; 4257 #endif 4258 for (i = 1; i < num_dims; ++i) { 4259 kmp_int64 iter, ln; 4260 size_t j = i * 4; 4261 ln = pr_buf->th_doacross_info[j + 1]; 4262 lo = pr_buf->th_doacross_info[j + 2]; 4263 st = pr_buf->th_doacross_info[j + 4]; 4264 if (st == 1) { 4265 iter = vec[i] - lo; 4266 } else if (st > 0) { 4267 iter = (kmp_uint64)(vec[i] - lo) / st; 4268 } else { // st < 0 4269 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4270 } 4271 iter_number = iter + ln * iter_number; 4272 #if OMPT_SUPPORT && OMPT_OPTIONAL 4273 deps[i].variable.value = iter; 4274 deps[i].dependence_type = ompt_dependence_type_source; 4275 #endif 4276 } 4277 #if OMPT_SUPPORT && OMPT_OPTIONAL 4278 if (ompt_enabled.ompt_callback_dependences) { 4279 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4280 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4281 } 4282 #endif 4283 shft = iter_number % 32; // use 32-bit granularity 4284 iter_number >>= 5; // divided by 32 4285 flag = 1 << shft; 4286 KMP_MB(); 4287 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4288 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4289 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4290 (iter_number << 5) + shft)); 4291 } 4292 4293 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4294 __kmp_assert_valid_gtid(gtid); 4295 kmp_int32 num_done; 4296 kmp_info_t *th = __kmp_threads[gtid]; 4297 kmp_team_t *team = th->th.th_team; 4298 kmp_disp_t *pr_buf = th->th.th_dispatch; 4299 4300 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4301 if (team->t.t_serialized) { 4302 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4303 return; // nothing to do 4304 } 4305 num_done = 4306 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4307 if (num_done == th->th.th_team_nproc) { 4308 // we are the last thread, need to free shared resources 4309 int idx = pr_buf->th_doacross_buf_idx - 1; 4310 dispatch_shared_info_t *sh_buf = 4311 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4312 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4313 (kmp_int64)&sh_buf->doacross_num_done); 4314 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4315 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4316 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4317 sh_buf->doacross_flags = NULL; 4318 sh_buf->doacross_num_done = 0; 4319 sh_buf->doacross_buf_idx += 4320 __kmp_dispatch_num_buffers; // free buffer for future re-use 4321 } 4322 // free private resources (need to keep buffer index forever) 4323 pr_buf->th_doacross_flags = NULL; 4324 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4325 pr_buf->th_doacross_info = NULL; 4326 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4327 } 4328 4329 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4330 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4331 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4332 } 4333 4334 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4335 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4336 } 4337 4338 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4339 omp_allocator_handle_t free_allocator) { 4340 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4341 free_allocator); 4342 } 4343 4344 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4345 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4346 } 4347 4348 int __kmpc_get_target_offload(void) { 4349 if (!__kmp_init_serial) { 4350 __kmp_serial_initialize(); 4351 } 4352 return __kmp_target_offload; 4353 } 4354 4355 int __kmpc_pause_resource(kmp_pause_status_t level) { 4356 if (!__kmp_init_serial) { 4357 return 1; // Can't pause if runtime is not initialized 4358 } 4359 return __kmp_pause_resource(level); 4360 } 4361 4362 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4363 if (!__kmp_init_serial) 4364 __kmp_serial_initialize(); 4365 4366 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4367 4368 #if OMPT_SUPPORT 4369 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4370 ompt_callbacks.ompt_callback(ompt_callback_error)( 4371 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4372 OMPT_GET_RETURN_ADDRESS(0)); 4373 } 4374 #endif // OMPT_SUPPORT 4375 4376 char *src_loc; 4377 if (loc && loc->psource) { 4378 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4379 src_loc = 4380 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4381 __kmp_str_loc_free(&str_loc); 4382 } else { 4383 src_loc = __kmp_str_format("unknown"); 4384 } 4385 4386 if (severity == severity_warning) 4387 KMP_WARNING(UserDirectedWarning, src_loc, message); 4388 else 4389 KMP_FATAL(UserDirectedError, src_loc, message); 4390 4391 __kmp_str_free(&src_loc); 4392 } 4393