1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the primary thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param global_tid global thread number 358 @param num_teams_lo lower bound on number of teams requested for the teams 359 construct 360 @param num_teams_up upper bound on number of teams requested for the teams 361 construct 362 @param num_threads number of threads per team requested for the teams construct 363 364 Set the number of teams to be used by the teams construct. The number of initial 365 teams cretaed will be greater than or equal to the lower bound and less than or 366 equal to the upper bound. 367 This call is only required if the teams construct has a `num_teams` clause 368 or a `thread_limit` clause (or both). 369 */ 370 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 371 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 372 kmp_int32 num_threads) { 373 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 374 " num_teams_ub=%d num_threads=%d\n", 375 global_tid, num_teams_lb, num_teams_ub, num_threads)); 376 __kmp_assert_valid_gtid(global_tid); 377 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 378 num_threads); 379 } 380 381 /*! 382 @ingroup PARALLEL 383 @param loc source location information 384 @param argc total number of arguments in the ellipsis 385 @param microtask pointer to callback routine consisting of outlined teams 386 construct 387 @param ... pointers to shared variables that aren't global 388 389 Do the actual fork and call the microtask in the relevant number of threads. 390 */ 391 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 392 ...) { 393 int gtid = __kmp_entry_gtid(); 394 kmp_info_t *this_thr = __kmp_threads[gtid]; 395 va_list ap; 396 va_start(ap, microtask); 397 398 #if KMP_STATS_ENABLED 399 KMP_COUNT_BLOCK(OMP_TEAMS); 400 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 401 if (previous_state == stats_state_e::SERIAL_REGION) { 402 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 403 } else { 404 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 405 } 406 #endif 407 408 // remember teams entry point and nesting level 409 this_thr->th.th_teams_microtask = microtask; 410 this_thr->th.th_teams_level = 411 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 412 413 #if OMPT_SUPPORT 414 kmp_team_t *parent_team = this_thr->th.th_team; 415 int tid = __kmp_tid_from_gtid(gtid); 416 if (ompt_enabled.enabled) { 417 parent_team->t.t_implicit_task_taskdata[tid] 418 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 419 } 420 OMPT_STORE_RETURN_ADDRESS(gtid); 421 #endif 422 423 // check if __kmpc_push_num_teams called, set default number of teams 424 // otherwise 425 if (this_thr->th.th_teams_size.nteams == 0) { 426 __kmp_push_num_teams(loc, gtid, 0, 0); 427 } 428 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 431 432 __kmp_fork_call( 433 loc, gtid, fork_context_intel, argc, 434 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 435 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 436 __kmp_join_call(loc, gtid 437 #if OMPT_SUPPORT 438 , 439 fork_context_intel 440 #endif 441 ); 442 443 // Pop current CG root off list 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 446 this_thr->th.th_cg_roots = tmp->up; 447 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 448 " to node %p. cg_nthreads was %d\n", 449 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 450 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 451 int i = tmp->cg_nthreads--; 452 if (i == 1) { // check is we are the last thread in CG (not always the case) 453 __kmp_free(tmp); 454 } 455 // Restore current task's thread_limit from CG root 456 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 457 this_thr->th.th_current_task->td_icvs.thread_limit = 458 this_thr->th.th_cg_roots->cg_thread_limit; 459 460 this_thr->th.th_teams_microtask = NULL; 461 this_thr->th.th_teams_level = 0; 462 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 463 va_end(ap); 464 #if KMP_STATS_ENABLED 465 if (previous_state == stats_state_e::SERIAL_REGION) { 466 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 467 KMP_SET_THREAD_STATE(previous_state); 468 } else { 469 KMP_POP_PARTITIONED_TIMER(); 470 } 471 #endif // KMP_STATS_ENABLED 472 } 473 474 // I don't think this function should ever have been exported. 475 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 476 // openmp code ever called it, but it's been exported from the RTL for so 477 // long that I'm afraid to remove the definition. 478 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 479 480 /*! 481 @ingroup PARALLEL 482 @param loc source location information 483 @param global_tid global thread number 484 485 Enter a serialized parallel construct. This interface is used to handle a 486 conditional parallel region, like this, 487 @code 488 #pragma omp parallel if (condition) 489 @endcode 490 when the condition is false. 491 */ 492 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 493 // The implementation is now in kmp_runtime.cpp so that it can share static 494 // functions with kmp_fork_call since the tasks to be done are similar in 495 // each case. 496 __kmp_assert_valid_gtid(global_tid); 497 #if OMPT_SUPPORT 498 OMPT_STORE_RETURN_ADDRESS(global_tid); 499 #endif 500 __kmp_serialized_parallel(loc, global_tid); 501 } 502 503 /*! 504 @ingroup PARALLEL 505 @param loc source location information 506 @param global_tid global thread number 507 508 Leave a serialized parallel construct. 509 */ 510 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 511 kmp_internal_control_t *top; 512 kmp_info_t *this_thr; 513 kmp_team_t *serial_team; 514 515 KC_TRACE(10, 516 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 517 518 /* skip all this code for autopar serialized loops since it results in 519 unacceptable overhead */ 520 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 521 return; 522 523 // Not autopar code 524 __kmp_assert_valid_gtid(global_tid); 525 if (!TCR_4(__kmp_init_parallel)) 526 __kmp_parallel_initialize(); 527 528 __kmp_resume_if_soft_paused(); 529 530 this_thr = __kmp_threads[global_tid]; 531 serial_team = this_thr->th.th_serial_team; 532 533 kmp_task_team_t *task_team = this_thr->th.th_task_team; 534 // we need to wait for the proxy tasks before finishing the thread 535 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 // if( serial_team -> t.t_serialized > 1 ) 581 serial_team->t.t_level--; 582 583 /* pop dispatch buffers stack */ 584 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 585 { 586 dispatch_private_info_t *disp_buffer = 587 serial_team->t.t_dispatch->th_disp_buffer; 588 serial_team->t.t_dispatch->th_disp_buffer = 589 serial_team->t.t_dispatch->th_disp_buffer->next; 590 __kmp_free(disp_buffer); 591 } 592 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 593 594 --serial_team->t.t_serialized; 595 if (serial_team->t.t_serialized == 0) { 596 597 /* return to the parallel section */ 598 599 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 600 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 601 __kmp_clear_x87_fpu_status_word(); 602 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 603 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 604 } 605 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 606 607 this_thr->th.th_team = serial_team->t.t_parent; 608 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 609 610 /* restore values cached in the thread */ 611 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 612 this_thr->th.th_team_master = 613 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 614 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 615 616 /* TODO the below shouldn't need to be adjusted for serialized teams */ 617 this_thr->th.th_dispatch = 618 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 619 620 __kmp_pop_current_task_from_thread(this_thr); 621 622 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 623 this_thr->th.th_current_task->td_flags.executing = 1; 624 625 if (__kmp_tasking_mode != tskm_immediate_exec) { 626 // Copy the task team from the new child / old parent team to the thread. 627 this_thr->th.th_task_team = 628 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 629 KA_TRACE(20, 630 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 631 "team %p\n", 632 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 633 } 634 } else { 635 if (__kmp_tasking_mode != tskm_immediate_exec) { 636 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 637 "depth of serial team %p to %d\n", 638 global_tid, serial_team, serial_team->t.t_serialized)); 639 } 640 } 641 642 if (__kmp_env_consistency_check) 643 __kmp_pop_parallel(global_tid, NULL); 644 #if OMPT_SUPPORT 645 if (ompt_enabled.enabled) 646 this_thr->th.ompt_thread_info.state = 647 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 648 : ompt_state_work_parallel); 649 #endif 650 } 651 652 /*! 653 @ingroup SYNCHRONIZATION 654 @param loc source location information. 655 656 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 657 depending on the memory ordering convention obeyed by the compiler 658 even that may not be necessary). 659 */ 660 void __kmpc_flush(ident_t *loc) { 661 KC_TRACE(10, ("__kmpc_flush: called\n")); 662 663 /* need explicit __mf() here since use volatile instead in library */ 664 KMP_MB(); /* Flush all pending memory write invalidates. */ 665 666 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 667 #if KMP_MIC 668 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 669 // We shouldn't need it, though, since the ABI rules require that 670 // * If the compiler generates NGO stores it also generates the fence 671 // * If users hand-code NGO stores they should insert the fence 672 // therefore no incomplete unordered stores should be visible. 673 #else 674 // C74404 675 // This is to address non-temporal store instructions (sfence needed). 676 // The clflush instruction is addressed either (mfence needed). 677 // Probably the non-temporal load monvtdqa instruction should also be 678 // addressed. 679 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 680 if (!__kmp_cpuinfo.initialized) { 681 __kmp_query_cpuid(&__kmp_cpuinfo); 682 } 683 if (!__kmp_cpuinfo.sse2) { 684 // CPU cannot execute SSE2 instructions. 685 } else { 686 #if KMP_COMPILER_ICC 687 _mm_mfence(); 688 #elif KMP_COMPILER_MSVC 689 MemoryBarrier(); 690 #else 691 __sync_synchronize(); 692 #endif // KMP_COMPILER_ICC 693 } 694 #endif // KMP_MIC 695 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 696 KMP_ARCH_RISCV64) 697 // Nothing to see here move along 698 #elif KMP_ARCH_PPC64 699 // Nothing needed here (we have a real MB above). 700 #else 701 #error Unknown or unsupported architecture 702 #endif 703 704 #if OMPT_SUPPORT && OMPT_OPTIONAL 705 if (ompt_enabled.ompt_callback_flush) { 706 ompt_callbacks.ompt_callback(ompt_callback_flush)( 707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 708 } 709 #endif 710 } 711 712 /* -------------------------------------------------------------------------- */ 713 /*! 714 @ingroup SYNCHRONIZATION 715 @param loc source location information 716 @param global_tid thread id. 717 718 Execute a barrier. 719 */ 720 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 721 KMP_COUNT_BLOCK(OMP_BARRIER); 722 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 723 __kmp_assert_valid_gtid(global_tid); 724 725 if (!TCR_4(__kmp_init_parallel)) 726 __kmp_parallel_initialize(); 727 728 __kmp_resume_if_soft_paused(); 729 730 if (__kmp_env_consistency_check) { 731 if (loc == 0) { 732 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 733 } 734 __kmp_check_barrier(global_tid, ct_barrier, loc); 735 } 736 737 #if OMPT_SUPPORT 738 ompt_frame_t *ompt_frame; 739 if (ompt_enabled.enabled) { 740 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 741 if (ompt_frame->enter_frame.ptr == NULL) 742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 743 } 744 OMPT_STORE_RETURN_ADDRESS(global_tid); 745 #endif 746 __kmp_threads[global_tid]->th.th_ident = loc; 747 // TODO: explicit barrier_wait_id: 748 // this function is called when 'barrier' directive is present or 749 // implicit barrier at the end of a worksharing construct. 750 // 1) better to add a per-thread barrier counter to a thread data structure 751 // 2) set to 0 when a new team is created 752 // 4) no sync is required 753 754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 755 #if OMPT_SUPPORT && OMPT_OPTIONAL 756 if (ompt_enabled.enabled) { 757 ompt_frame->enter_frame = ompt_data_none; 758 } 759 #endif 760 } 761 762 /* The BARRIER for a MASTER section is always explicit */ 763 /*! 764 @ingroup WORK_SHARING 765 @param loc source location information. 766 @param global_tid global thread number . 767 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 768 */ 769 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 770 int status = 0; 771 772 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 773 __kmp_assert_valid_gtid(global_tid); 774 775 if (!TCR_4(__kmp_init_parallel)) 776 __kmp_parallel_initialize(); 777 778 __kmp_resume_if_soft_paused(); 779 780 if (KMP_MASTER_GTID(global_tid)) { 781 KMP_COUNT_BLOCK(OMP_MASTER); 782 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 783 status = 1; 784 } 785 786 #if OMPT_SUPPORT && OMPT_OPTIONAL 787 if (status) { 788 if (ompt_enabled.ompt_callback_masked) { 789 kmp_info_t *this_thr = __kmp_threads[global_tid]; 790 kmp_team_t *team = this_thr->th.th_team; 791 792 int tid = __kmp_tid_from_gtid(global_tid); 793 ompt_callbacks.ompt_callback(ompt_callback_masked)( 794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 796 OMPT_GET_RETURN_ADDRESS(0)); 797 } 798 } 799 #endif 800 801 if (__kmp_env_consistency_check) { 802 #if KMP_USE_DYNAMIC_LOCK 803 if (status) 804 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 805 else 806 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 807 #else 808 if (status) 809 __kmp_push_sync(global_tid, ct_master, loc, NULL); 810 else 811 __kmp_check_sync(global_tid, ct_master, loc, NULL); 812 #endif 813 } 814 815 return status; 816 } 817 818 /*! 819 @ingroup WORK_SHARING 820 @param loc source location information. 821 @param global_tid global thread number . 822 823 Mark the end of a <tt>master</tt> region. This should only be called by the 824 thread that executes the <tt>master</tt> region. 825 */ 826 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 827 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 828 __kmp_assert_valid_gtid(global_tid); 829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 830 KMP_POP_PARTITIONED_TIMER(); 831 832 #if OMPT_SUPPORT && OMPT_OPTIONAL 833 kmp_info_t *this_thr = __kmp_threads[global_tid]; 834 kmp_team_t *team = this_thr->th.th_team; 835 if (ompt_enabled.ompt_callback_masked) { 836 int tid = __kmp_tid_from_gtid(global_tid); 837 ompt_callbacks.ompt_callback(ompt_callback_masked)( 838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 840 OMPT_GET_RETURN_ADDRESS(0)); 841 } 842 #endif 843 844 if (__kmp_env_consistency_check) { 845 if (KMP_MASTER_GTID(global_tid)) 846 __kmp_pop_sync(global_tid, ct_master, loc); 847 } 848 } 849 850 /*! 851 @ingroup WORK_SHARING 852 @param loc source location information. 853 @param global_tid global thread number. 854 @param filter result of evaluating filter clause on thread global_tid, or zero 855 if no filter clause present 856 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 857 */ 858 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 859 int status = 0; 860 int tid; 861 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 862 __kmp_assert_valid_gtid(global_tid); 863 864 if (!TCR_4(__kmp_init_parallel)) 865 __kmp_parallel_initialize(); 866 867 __kmp_resume_if_soft_paused(); 868 869 tid = __kmp_tid_from_gtid(global_tid); 870 if (tid == filter) { 871 KMP_COUNT_BLOCK(OMP_MASKED); 872 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 873 status = 1; 874 } 875 876 #if OMPT_SUPPORT && OMPT_OPTIONAL 877 if (status) { 878 if (ompt_enabled.ompt_callback_masked) { 879 kmp_info_t *this_thr = __kmp_threads[global_tid]; 880 kmp_team_t *team = this_thr->th.th_team; 881 ompt_callbacks.ompt_callback(ompt_callback_masked)( 882 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 883 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 884 OMPT_GET_RETURN_ADDRESS(0)); 885 } 886 } 887 #endif 888 889 if (__kmp_env_consistency_check) { 890 #if KMP_USE_DYNAMIC_LOCK 891 if (status) 892 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 893 else 894 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 895 #else 896 if (status) 897 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 898 else 899 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 900 #endif 901 } 902 903 return status; 904 } 905 906 /*! 907 @ingroup WORK_SHARING 908 @param loc source location information. 909 @param global_tid global thread number . 910 911 Mark the end of a <tt>masked</tt> region. This should only be called by the 912 thread that executes the <tt>masked</tt> region. 913 */ 914 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 915 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 916 __kmp_assert_valid_gtid(global_tid); 917 KMP_POP_PARTITIONED_TIMER(); 918 919 #if OMPT_SUPPORT && OMPT_OPTIONAL 920 kmp_info_t *this_thr = __kmp_threads[global_tid]; 921 kmp_team_t *team = this_thr->th.th_team; 922 if (ompt_enabled.ompt_callback_masked) { 923 int tid = __kmp_tid_from_gtid(global_tid); 924 ompt_callbacks.ompt_callback(ompt_callback_masked)( 925 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 926 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 927 OMPT_GET_RETURN_ADDRESS(0)); 928 } 929 #endif 930 931 if (__kmp_env_consistency_check) { 932 __kmp_pop_sync(global_tid, ct_masked, loc); 933 } 934 } 935 936 /*! 937 @ingroup WORK_SHARING 938 @param loc source location information. 939 @param gtid global thread number. 940 941 Start execution of an <tt>ordered</tt> construct. 942 */ 943 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 944 int cid = 0; 945 kmp_info_t *th; 946 KMP_DEBUG_ASSERT(__kmp_init_serial); 947 948 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 949 __kmp_assert_valid_gtid(gtid); 950 951 if (!TCR_4(__kmp_init_parallel)) 952 __kmp_parallel_initialize(); 953 954 __kmp_resume_if_soft_paused(); 955 956 #if USE_ITT_BUILD 957 __kmp_itt_ordered_prep(gtid); 958 // TODO: ordered_wait_id 959 #endif /* USE_ITT_BUILD */ 960 961 th = __kmp_threads[gtid]; 962 963 #if OMPT_SUPPORT && OMPT_OPTIONAL 964 kmp_team_t *team; 965 ompt_wait_id_t lck; 966 void *codeptr_ra; 967 OMPT_STORE_RETURN_ADDRESS(gtid); 968 if (ompt_enabled.enabled) { 969 team = __kmp_team_from_gtid(gtid); 970 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 971 /* OMPT state update */ 972 th->th.ompt_thread_info.wait_id = lck; 973 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 974 975 /* OMPT event callback */ 976 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 977 if (ompt_enabled.ompt_callback_mutex_acquire) { 978 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 979 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 980 codeptr_ra); 981 } 982 } 983 #endif 984 985 if (th->th.th_dispatch->th_deo_fcn != 0) 986 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 987 else 988 __kmp_parallel_deo(>id, &cid, loc); 989 990 #if OMPT_SUPPORT && OMPT_OPTIONAL 991 if (ompt_enabled.enabled) { 992 /* OMPT state update */ 993 th->th.ompt_thread_info.state = ompt_state_work_parallel; 994 th->th.ompt_thread_info.wait_id = 0; 995 996 /* OMPT event callback */ 997 if (ompt_enabled.ompt_callback_mutex_acquired) { 998 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 999 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1000 } 1001 } 1002 #endif 1003 1004 #if USE_ITT_BUILD 1005 __kmp_itt_ordered_start(gtid); 1006 #endif /* USE_ITT_BUILD */ 1007 } 1008 1009 /*! 1010 @ingroup WORK_SHARING 1011 @param loc source location information. 1012 @param gtid global thread number. 1013 1014 End execution of an <tt>ordered</tt> construct. 1015 */ 1016 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1017 int cid = 0; 1018 kmp_info_t *th; 1019 1020 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1021 __kmp_assert_valid_gtid(gtid); 1022 1023 #if USE_ITT_BUILD 1024 __kmp_itt_ordered_end(gtid); 1025 // TODO: ordered_wait_id 1026 #endif /* USE_ITT_BUILD */ 1027 1028 th = __kmp_threads[gtid]; 1029 1030 if (th->th.th_dispatch->th_dxo_fcn != 0) 1031 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1032 else 1033 __kmp_parallel_dxo(>id, &cid, loc); 1034 1035 #if OMPT_SUPPORT && OMPT_OPTIONAL 1036 OMPT_STORE_RETURN_ADDRESS(gtid); 1037 if (ompt_enabled.ompt_callback_mutex_released) { 1038 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1039 ompt_mutex_ordered, 1040 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1041 ->t.t_ordered.dt.t_value, 1042 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1043 } 1044 #endif 1045 } 1046 1047 #if KMP_USE_DYNAMIC_LOCK 1048 1049 static __forceinline void 1050 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1051 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1052 // Pointer to the allocated indirect lock is written to crit, while indexing 1053 // is ignored. 1054 void *idx; 1055 kmp_indirect_lock_t **lck; 1056 lck = (kmp_indirect_lock_t **)crit; 1057 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1058 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1059 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1060 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1061 KA_TRACE(20, 1062 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1063 #if USE_ITT_BUILD 1064 __kmp_itt_critical_creating(ilk->lock, loc); 1065 #endif 1066 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1067 if (status == 0) { 1068 #if USE_ITT_BUILD 1069 __kmp_itt_critical_destroyed(ilk->lock); 1070 #endif 1071 // We don't really need to destroy the unclaimed lock here since it will be 1072 // cleaned up at program exit. 1073 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1074 } 1075 KMP_DEBUG_ASSERT(*lck != NULL); 1076 } 1077 1078 // Fast-path acquire tas lock 1079 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1080 { \ 1081 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1082 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1083 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1084 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1085 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1086 kmp_uint32 spins; \ 1087 KMP_FSYNC_PREPARE(l); \ 1088 KMP_INIT_YIELD(spins); \ 1089 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1090 do { \ 1091 if (TCR_4(__kmp_nth) > \ 1092 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1093 KMP_YIELD(TRUE); \ 1094 } else { \ 1095 KMP_YIELD_SPIN(spins); \ 1096 } \ 1097 __kmp_spin_backoff(&backoff); \ 1098 } while ( \ 1099 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1100 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1101 } \ 1102 KMP_FSYNC_ACQUIRED(l); \ 1103 } 1104 1105 // Fast-path test tas lock 1106 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1107 { \ 1108 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1109 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1110 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1111 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1112 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1113 } 1114 1115 // Fast-path release tas lock 1116 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1117 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1118 1119 #if KMP_USE_FUTEX 1120 1121 #include <sys/syscall.h> 1122 #include <unistd.h> 1123 #ifndef FUTEX_WAIT 1124 #define FUTEX_WAIT 0 1125 #endif 1126 #ifndef FUTEX_WAKE 1127 #define FUTEX_WAKE 1 1128 #endif 1129 1130 // Fast-path acquire futex lock 1131 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1132 { \ 1133 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1134 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1135 KMP_MB(); \ 1136 KMP_FSYNC_PREPARE(ftx); \ 1137 kmp_int32 poll_val; \ 1138 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1139 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1140 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1141 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1142 if (!cond) { \ 1143 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1144 poll_val | \ 1145 KMP_LOCK_BUSY(1, futex))) { \ 1146 continue; \ 1147 } \ 1148 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1149 } \ 1150 kmp_int32 rc; \ 1151 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1152 NULL, NULL, 0)) != 0) { \ 1153 continue; \ 1154 } \ 1155 gtid_code |= 1; \ 1156 } \ 1157 KMP_FSYNC_ACQUIRED(ftx); \ 1158 } 1159 1160 // Fast-path test futex lock 1161 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1162 { \ 1163 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1164 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1165 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1166 KMP_FSYNC_ACQUIRED(ftx); \ 1167 rc = TRUE; \ 1168 } else { \ 1169 rc = FALSE; \ 1170 } \ 1171 } 1172 1173 // Fast-path release futex lock 1174 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1175 { \ 1176 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1177 KMP_MB(); \ 1178 KMP_FSYNC_RELEASING(ftx); \ 1179 kmp_int32 poll_val = \ 1180 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1181 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1182 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1183 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1184 } \ 1185 KMP_MB(); \ 1186 KMP_YIELD_OVERSUB(); \ 1187 } 1188 1189 #endif // KMP_USE_FUTEX 1190 1191 #else // KMP_USE_DYNAMIC_LOCK 1192 1193 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1194 ident_t const *loc, 1195 kmp_int32 gtid) { 1196 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1197 1198 // Because of the double-check, the following load doesn't need to be volatile 1199 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1200 1201 if (lck == NULL) { 1202 void *idx; 1203 1204 // Allocate & initialize the lock. 1205 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1206 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1207 __kmp_init_user_lock_with_checks(lck); 1208 __kmp_set_user_lock_location(lck, loc); 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_creating(lck); 1211 // __kmp_itt_critical_creating() should be called *before* the first usage 1212 // of underlying lock. It is the only place where we can guarantee it. There 1213 // are chances the lock will destroyed with no usage, but it is not a 1214 // problem, because this is not real event seen by user but rather setting 1215 // name for object (lock). See more details in kmp_itt.h. 1216 #endif /* USE_ITT_BUILD */ 1217 1218 // Use a cmpxchg instruction to slam the start of the critical section with 1219 // the lock pointer. If another thread beat us to it, deallocate the lock, 1220 // and use the lock that the other thread allocated. 1221 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1222 1223 if (status == 0) { 1224 // Deallocate the lock and reload the value. 1225 #if USE_ITT_BUILD 1226 __kmp_itt_critical_destroyed(lck); 1227 // Let ITT know the lock is destroyed and the same memory location may be reused 1228 // for another purpose. 1229 #endif /* USE_ITT_BUILD */ 1230 __kmp_destroy_user_lock_with_checks(lck); 1231 __kmp_user_lock_free(&idx, gtid, lck); 1232 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1233 KMP_DEBUG_ASSERT(lck != NULL); 1234 } 1235 } 1236 return lck; 1237 } 1238 1239 #endif // KMP_USE_DYNAMIC_LOCK 1240 1241 /*! 1242 @ingroup WORK_SHARING 1243 @param loc source location information. 1244 @param global_tid global thread number. 1245 @param crit identity of the critical section. This could be a pointer to a lock 1246 associated with the critical section, or some other suitably unique value. 1247 1248 Enter code protected by a `critical` construct. 1249 This function blocks until the executing thread can enter the critical section. 1250 */ 1251 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1252 kmp_critical_name *crit) { 1253 #if KMP_USE_DYNAMIC_LOCK 1254 #if OMPT_SUPPORT && OMPT_OPTIONAL 1255 OMPT_STORE_RETURN_ADDRESS(global_tid); 1256 #endif // OMPT_SUPPORT 1257 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1258 #else 1259 KMP_COUNT_BLOCK(OMP_CRITICAL); 1260 #if OMPT_SUPPORT && OMPT_OPTIONAL 1261 ompt_state_t prev_state = ompt_state_undefined; 1262 ompt_thread_info_t ti; 1263 #endif 1264 kmp_user_lock_p lck; 1265 1266 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1267 __kmp_assert_valid_gtid(global_tid); 1268 1269 // TODO: add THR_OVHD_STATE 1270 1271 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1272 KMP_CHECK_USER_LOCK_INIT(); 1273 1274 if ((__kmp_user_lock_kind == lk_tas) && 1275 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1276 lck = (kmp_user_lock_p)crit; 1277 } 1278 #if KMP_USE_FUTEX 1279 else if ((__kmp_user_lock_kind == lk_futex) && 1280 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1281 lck = (kmp_user_lock_p)crit; 1282 } 1283 #endif 1284 else { // ticket, queuing or drdpa 1285 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1286 } 1287 1288 if (__kmp_env_consistency_check) 1289 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1290 1291 // since the critical directive binds to all threads, not just the current 1292 // team we have to check this even if we are in a serialized team. 1293 // also, even if we are the uber thread, we still have to conduct the lock, 1294 // as we have to contend with sibling threads. 1295 1296 #if USE_ITT_BUILD 1297 __kmp_itt_critical_acquiring(lck); 1298 #endif /* USE_ITT_BUILD */ 1299 #if OMPT_SUPPORT && OMPT_OPTIONAL 1300 OMPT_STORE_RETURN_ADDRESS(gtid); 1301 void *codeptr_ra = NULL; 1302 if (ompt_enabled.enabled) { 1303 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1304 /* OMPT state update */ 1305 prev_state = ti.state; 1306 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1307 ti.state = ompt_state_wait_critical; 1308 1309 /* OMPT event callback */ 1310 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1311 if (ompt_enabled.ompt_callback_mutex_acquire) { 1312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1313 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1314 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1315 } 1316 } 1317 #endif 1318 // Value of 'crit' should be good for using as a critical_id of the critical 1319 // section directive. 1320 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1321 1322 #if USE_ITT_BUILD 1323 __kmp_itt_critical_acquired(lck); 1324 #endif /* USE_ITT_BUILD */ 1325 #if OMPT_SUPPORT && OMPT_OPTIONAL 1326 if (ompt_enabled.enabled) { 1327 /* OMPT state update */ 1328 ti.state = prev_state; 1329 ti.wait_id = 0; 1330 1331 /* OMPT event callback */ 1332 if (ompt_enabled.ompt_callback_mutex_acquired) { 1333 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1334 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1335 } 1336 } 1337 #endif 1338 KMP_POP_PARTITIONED_TIMER(); 1339 1340 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1341 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1342 #endif // KMP_USE_DYNAMIC_LOCK 1343 } 1344 1345 #if KMP_USE_DYNAMIC_LOCK 1346 1347 // Converts the given hint to an internal lock implementation 1348 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1349 #if KMP_USE_TSX 1350 #define KMP_TSX_LOCK(seq) lockseq_##seq 1351 #else 1352 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1353 #endif 1354 1355 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1356 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1357 #else 1358 #define KMP_CPUINFO_RTM 0 1359 #endif 1360 1361 // Hints that do not require further logic 1362 if (hint & kmp_lock_hint_hle) 1363 return KMP_TSX_LOCK(hle); 1364 if (hint & kmp_lock_hint_rtm) 1365 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1366 if (hint & kmp_lock_hint_adaptive) 1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1368 1369 // Rule out conflicting hints first by returning the default lock 1370 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1371 return __kmp_user_lock_seq; 1372 if ((hint & omp_lock_hint_speculative) && 1373 (hint & omp_lock_hint_nonspeculative)) 1374 return __kmp_user_lock_seq; 1375 1376 // Do not even consider speculation when it appears to be contended 1377 if (hint & omp_lock_hint_contended) 1378 return lockseq_queuing; 1379 1380 // Uncontended lock without speculation 1381 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1382 return lockseq_tas; 1383 1384 // Use RTM lock for speculation 1385 if (hint & omp_lock_hint_speculative) 1386 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1387 1388 return __kmp_user_lock_seq; 1389 } 1390 1391 #if OMPT_SUPPORT && OMPT_OPTIONAL 1392 #if KMP_USE_DYNAMIC_LOCK 1393 static kmp_mutex_impl_t 1394 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1395 if (user_lock) { 1396 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1397 case 0: 1398 break; 1399 #if KMP_USE_FUTEX 1400 case locktag_futex: 1401 return kmp_mutex_impl_queuing; 1402 #endif 1403 case locktag_tas: 1404 return kmp_mutex_impl_spin; 1405 #if KMP_USE_TSX 1406 case locktag_hle: 1407 case locktag_rtm_spin: 1408 return kmp_mutex_impl_speculative; 1409 #endif 1410 default: 1411 return kmp_mutex_impl_none; 1412 } 1413 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1414 } 1415 KMP_ASSERT(ilock); 1416 switch (ilock->type) { 1417 #if KMP_USE_TSX 1418 case locktag_adaptive: 1419 case locktag_rtm_queuing: 1420 return kmp_mutex_impl_speculative; 1421 #endif 1422 case locktag_nested_tas: 1423 return kmp_mutex_impl_spin; 1424 #if KMP_USE_FUTEX 1425 case locktag_nested_futex: 1426 #endif 1427 case locktag_ticket: 1428 case locktag_queuing: 1429 case locktag_drdpa: 1430 case locktag_nested_ticket: 1431 case locktag_nested_queuing: 1432 case locktag_nested_drdpa: 1433 return kmp_mutex_impl_queuing; 1434 default: 1435 return kmp_mutex_impl_none; 1436 } 1437 } 1438 #else 1439 // For locks without dynamic binding 1440 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1441 switch (__kmp_user_lock_kind) { 1442 case lk_tas: 1443 return kmp_mutex_impl_spin; 1444 #if KMP_USE_FUTEX 1445 case lk_futex: 1446 #endif 1447 case lk_ticket: 1448 case lk_queuing: 1449 case lk_drdpa: 1450 return kmp_mutex_impl_queuing; 1451 #if KMP_USE_TSX 1452 case lk_hle: 1453 case lk_rtm_queuing: 1454 case lk_rtm_spin: 1455 case lk_adaptive: 1456 return kmp_mutex_impl_speculative; 1457 #endif 1458 default: 1459 return kmp_mutex_impl_none; 1460 } 1461 } 1462 #endif // KMP_USE_DYNAMIC_LOCK 1463 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1464 1465 /*! 1466 @ingroup WORK_SHARING 1467 @param loc source location information. 1468 @param global_tid global thread number. 1469 @param crit identity of the critical section. This could be a pointer to a lock 1470 associated with the critical section, or some other suitably unique value. 1471 @param hint the lock hint. 1472 1473 Enter code protected by a `critical` construct with a hint. The hint value is 1474 used to suggest a lock implementation. This function blocks until the executing 1475 thread can enter the critical section unless the hint suggests use of 1476 speculative execution and the hardware supports it. 1477 */ 1478 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1479 kmp_critical_name *crit, uint32_t hint) { 1480 KMP_COUNT_BLOCK(OMP_CRITICAL); 1481 kmp_user_lock_p lck; 1482 #if OMPT_SUPPORT && OMPT_OPTIONAL 1483 ompt_state_t prev_state = ompt_state_undefined; 1484 ompt_thread_info_t ti; 1485 // This is the case, if called from __kmpc_critical: 1486 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1487 if (!codeptr) 1488 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1489 #endif 1490 1491 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1492 __kmp_assert_valid_gtid(global_tid); 1493 1494 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1495 // Check if it is initialized. 1496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1497 if (*lk == 0) { 1498 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1499 if (KMP_IS_D_LOCK(lckseq)) { 1500 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1501 KMP_GET_D_TAG(lckseq)); 1502 } else { 1503 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1504 } 1505 } 1506 // Branch for accessing the actual lock object and set operation. This 1507 // branching is inevitable since this lock initialization does not follow the 1508 // normal dispatch path (lock table is not used). 1509 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1510 lck = (kmp_user_lock_p)lk; 1511 if (__kmp_env_consistency_check) { 1512 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1513 __kmp_map_hint_to_lock(hint)); 1514 } 1515 #if USE_ITT_BUILD 1516 __kmp_itt_critical_acquiring(lck); 1517 #endif 1518 #if OMPT_SUPPORT && OMPT_OPTIONAL 1519 if (ompt_enabled.enabled) { 1520 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1521 /* OMPT state update */ 1522 prev_state = ti.state; 1523 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1524 ti.state = ompt_state_wait_critical; 1525 1526 /* OMPT event callback */ 1527 if (ompt_enabled.ompt_callback_mutex_acquire) { 1528 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1529 ompt_mutex_critical, (unsigned int)hint, 1530 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1531 codeptr); 1532 } 1533 } 1534 #endif 1535 #if KMP_USE_INLINED_TAS 1536 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1537 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1538 } else 1539 #elif KMP_USE_INLINED_FUTEX 1540 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1541 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1542 } else 1543 #endif 1544 { 1545 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1546 } 1547 } else { 1548 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1549 lck = ilk->lock; 1550 if (__kmp_env_consistency_check) { 1551 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1552 __kmp_map_hint_to_lock(hint)); 1553 } 1554 #if USE_ITT_BUILD 1555 __kmp_itt_critical_acquiring(lck); 1556 #endif 1557 #if OMPT_SUPPORT && OMPT_OPTIONAL 1558 if (ompt_enabled.enabled) { 1559 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1560 /* OMPT state update */ 1561 prev_state = ti.state; 1562 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1563 ti.state = ompt_state_wait_critical; 1564 1565 /* OMPT event callback */ 1566 if (ompt_enabled.ompt_callback_mutex_acquire) { 1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1568 ompt_mutex_critical, (unsigned int)hint, 1569 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1570 codeptr); 1571 } 1572 } 1573 #endif 1574 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1575 } 1576 KMP_POP_PARTITIONED_TIMER(); 1577 1578 #if USE_ITT_BUILD 1579 __kmp_itt_critical_acquired(lck); 1580 #endif /* USE_ITT_BUILD */ 1581 #if OMPT_SUPPORT && OMPT_OPTIONAL 1582 if (ompt_enabled.enabled) { 1583 /* OMPT state update */ 1584 ti.state = prev_state; 1585 ti.wait_id = 0; 1586 1587 /* OMPT event callback */ 1588 if (ompt_enabled.ompt_callback_mutex_acquired) { 1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1591 } 1592 } 1593 #endif 1594 1595 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1596 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1597 } // __kmpc_critical_with_hint 1598 1599 #endif // KMP_USE_DYNAMIC_LOCK 1600 1601 /*! 1602 @ingroup WORK_SHARING 1603 @param loc source location information. 1604 @param global_tid global thread number . 1605 @param crit identity of the critical section. This could be a pointer to a lock 1606 associated with the critical section, or some other suitably unique value. 1607 1608 Leave a critical section, releasing any lock that was held during its execution. 1609 */ 1610 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1611 kmp_critical_name *crit) { 1612 kmp_user_lock_p lck; 1613 1614 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1615 1616 #if KMP_USE_DYNAMIC_LOCK 1617 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1618 lck = (kmp_user_lock_p)crit; 1619 KMP_ASSERT(lck != NULL); 1620 if (__kmp_env_consistency_check) { 1621 __kmp_pop_sync(global_tid, ct_critical, loc); 1622 } 1623 #if USE_ITT_BUILD 1624 __kmp_itt_critical_releasing(lck); 1625 #endif 1626 #if KMP_USE_INLINED_TAS 1627 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1628 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1629 } else 1630 #elif KMP_USE_INLINED_FUTEX 1631 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1632 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1633 } else 1634 #endif 1635 { 1636 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1637 } 1638 } else { 1639 kmp_indirect_lock_t *ilk = 1640 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1641 KMP_ASSERT(ilk != NULL); 1642 lck = ilk->lock; 1643 if (__kmp_env_consistency_check) { 1644 __kmp_pop_sync(global_tid, ct_critical, loc); 1645 } 1646 #if USE_ITT_BUILD 1647 __kmp_itt_critical_releasing(lck); 1648 #endif 1649 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1650 } 1651 1652 #else // KMP_USE_DYNAMIC_LOCK 1653 1654 if ((__kmp_user_lock_kind == lk_tas) && 1655 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1656 lck = (kmp_user_lock_p)crit; 1657 } 1658 #if KMP_USE_FUTEX 1659 else if ((__kmp_user_lock_kind == lk_futex) && 1660 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1661 lck = (kmp_user_lock_p)crit; 1662 } 1663 #endif 1664 else { // ticket, queuing or drdpa 1665 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1666 } 1667 1668 KMP_ASSERT(lck != NULL); 1669 1670 if (__kmp_env_consistency_check) 1671 __kmp_pop_sync(global_tid, ct_critical, loc); 1672 1673 #if USE_ITT_BUILD 1674 __kmp_itt_critical_releasing(lck); 1675 #endif /* USE_ITT_BUILD */ 1676 // Value of 'crit' should be good for using as a critical_id of the critical 1677 // section directive. 1678 __kmp_release_user_lock_with_checks(lck, global_tid); 1679 1680 #endif // KMP_USE_DYNAMIC_LOCK 1681 1682 #if OMPT_SUPPORT && OMPT_OPTIONAL 1683 /* OMPT release event triggers after lock is released; place here to trigger 1684 * for all #if branches */ 1685 OMPT_STORE_RETURN_ADDRESS(global_tid); 1686 if (ompt_enabled.ompt_callback_mutex_released) { 1687 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1688 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1689 OMPT_LOAD_RETURN_ADDRESS(0)); 1690 } 1691 #endif 1692 1693 KMP_POP_PARTITIONED_TIMER(); 1694 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1695 } 1696 1697 /*! 1698 @ingroup SYNCHRONIZATION 1699 @param loc source location information 1700 @param global_tid thread id. 1701 @return one if the thread should execute the master block, zero otherwise 1702 1703 Start execution of a combined barrier and master. The barrier is executed inside 1704 this function. 1705 */ 1706 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1707 int status; 1708 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1709 __kmp_assert_valid_gtid(global_tid); 1710 1711 if (!TCR_4(__kmp_init_parallel)) 1712 __kmp_parallel_initialize(); 1713 1714 __kmp_resume_if_soft_paused(); 1715 1716 if (__kmp_env_consistency_check) 1717 __kmp_check_barrier(global_tid, ct_barrier, loc); 1718 1719 #if OMPT_SUPPORT 1720 ompt_frame_t *ompt_frame; 1721 if (ompt_enabled.enabled) { 1722 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1723 if (ompt_frame->enter_frame.ptr == NULL) 1724 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1725 } 1726 OMPT_STORE_RETURN_ADDRESS(global_tid); 1727 #endif 1728 #if USE_ITT_NOTIFY 1729 __kmp_threads[global_tid]->th.th_ident = loc; 1730 #endif 1731 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1732 #if OMPT_SUPPORT && OMPT_OPTIONAL 1733 if (ompt_enabled.enabled) { 1734 ompt_frame->enter_frame = ompt_data_none; 1735 } 1736 #endif 1737 1738 return (status != 0) ? 0 : 1; 1739 } 1740 1741 /*! 1742 @ingroup SYNCHRONIZATION 1743 @param loc source location information 1744 @param global_tid thread id. 1745 1746 Complete the execution of a combined barrier and master. This function should 1747 only be called at the completion of the <tt>master</tt> code. Other threads will 1748 still be waiting at the barrier and this call releases them. 1749 */ 1750 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1751 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1752 __kmp_assert_valid_gtid(global_tid); 1753 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1754 } 1755 1756 /*! 1757 @ingroup SYNCHRONIZATION 1758 @param loc source location information 1759 @param global_tid thread id. 1760 @return one if the thread should execute the master block, zero otherwise 1761 1762 Start execution of a combined barrier and master(nowait) construct. 1763 The barrier is executed inside this function. 1764 There is no equivalent "end" function, since the 1765 */ 1766 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1767 kmp_int32 ret; 1768 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1769 __kmp_assert_valid_gtid(global_tid); 1770 1771 if (!TCR_4(__kmp_init_parallel)) 1772 __kmp_parallel_initialize(); 1773 1774 __kmp_resume_if_soft_paused(); 1775 1776 if (__kmp_env_consistency_check) { 1777 if (loc == 0) { 1778 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1779 } 1780 __kmp_check_barrier(global_tid, ct_barrier, loc); 1781 } 1782 1783 #if OMPT_SUPPORT 1784 ompt_frame_t *ompt_frame; 1785 if (ompt_enabled.enabled) { 1786 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1787 if (ompt_frame->enter_frame.ptr == NULL) 1788 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1789 } 1790 OMPT_STORE_RETURN_ADDRESS(global_tid); 1791 #endif 1792 #if USE_ITT_NOTIFY 1793 __kmp_threads[global_tid]->th.th_ident = loc; 1794 #endif 1795 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1796 #if OMPT_SUPPORT && OMPT_OPTIONAL 1797 if (ompt_enabled.enabled) { 1798 ompt_frame->enter_frame = ompt_data_none; 1799 } 1800 #endif 1801 1802 ret = __kmpc_master(loc, global_tid); 1803 1804 if (__kmp_env_consistency_check) { 1805 /* there's no __kmpc_end_master called; so the (stats) */ 1806 /* actions of __kmpc_end_master are done here */ 1807 if (ret) { 1808 /* only one thread should do the pop since only */ 1809 /* one did the push (see __kmpc_master()) */ 1810 __kmp_pop_sync(global_tid, ct_master, loc); 1811 } 1812 } 1813 1814 return (ret); 1815 } 1816 1817 /* The BARRIER for a SINGLE process section is always explicit */ 1818 /*! 1819 @ingroup WORK_SHARING 1820 @param loc source location information 1821 @param global_tid global thread number 1822 @return One if this thread should execute the single construct, zero otherwise. 1823 1824 Test whether to execute a <tt>single</tt> construct. 1825 There are no implicit barriers in the two "single" calls, rather the compiler 1826 should introduce an explicit barrier if it is required. 1827 */ 1828 1829 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1830 __kmp_assert_valid_gtid(global_tid); 1831 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1832 1833 if (rc) { 1834 // We are going to execute the single statement, so we should count it. 1835 KMP_COUNT_BLOCK(OMP_SINGLE); 1836 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1837 } 1838 1839 #if OMPT_SUPPORT && OMPT_OPTIONAL 1840 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1841 kmp_team_t *team = this_thr->th.th_team; 1842 int tid = __kmp_tid_from_gtid(global_tid); 1843 1844 if (ompt_enabled.enabled) { 1845 if (rc) { 1846 if (ompt_enabled.ompt_callback_work) { 1847 ompt_callbacks.ompt_callback(ompt_callback_work)( 1848 ompt_work_single_executor, ompt_scope_begin, 1849 &(team->t.ompt_team_info.parallel_data), 1850 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1851 1, OMPT_GET_RETURN_ADDRESS(0)); 1852 } 1853 } else { 1854 if (ompt_enabled.ompt_callback_work) { 1855 ompt_callbacks.ompt_callback(ompt_callback_work)( 1856 ompt_work_single_other, ompt_scope_begin, 1857 &(team->t.ompt_team_info.parallel_data), 1858 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1859 1, OMPT_GET_RETURN_ADDRESS(0)); 1860 ompt_callbacks.ompt_callback(ompt_callback_work)( 1861 ompt_work_single_other, ompt_scope_end, 1862 &(team->t.ompt_team_info.parallel_data), 1863 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1864 1, OMPT_GET_RETURN_ADDRESS(0)); 1865 } 1866 } 1867 } 1868 #endif 1869 1870 return rc; 1871 } 1872 1873 /*! 1874 @ingroup WORK_SHARING 1875 @param loc source location information 1876 @param global_tid global thread number 1877 1878 Mark the end of a <tt>single</tt> construct. This function should 1879 only be called by the thread that executed the block of code protected 1880 by the `single` construct. 1881 */ 1882 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1883 __kmp_assert_valid_gtid(global_tid); 1884 __kmp_exit_single(global_tid); 1885 KMP_POP_PARTITIONED_TIMER(); 1886 1887 #if OMPT_SUPPORT && OMPT_OPTIONAL 1888 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1889 kmp_team_t *team = this_thr->th.th_team; 1890 int tid = __kmp_tid_from_gtid(global_tid); 1891 1892 if (ompt_enabled.ompt_callback_work) { 1893 ompt_callbacks.ompt_callback(ompt_callback_work)( 1894 ompt_work_single_executor, ompt_scope_end, 1895 &(team->t.ompt_team_info.parallel_data), 1896 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1897 OMPT_GET_RETURN_ADDRESS(0)); 1898 } 1899 #endif 1900 } 1901 1902 /*! 1903 @ingroup WORK_SHARING 1904 @param loc Source location 1905 @param global_tid Global thread id 1906 1907 Mark the end of a statically scheduled loop. 1908 */ 1909 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1910 KMP_POP_PARTITIONED_TIMER(); 1911 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1912 1913 #if OMPT_SUPPORT && OMPT_OPTIONAL 1914 if (ompt_enabled.ompt_callback_work) { 1915 ompt_work_t ompt_work_type = ompt_work_loop; 1916 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1917 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1918 // Determine workshare type 1919 if (loc != NULL) { 1920 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1921 ompt_work_type = ompt_work_loop; 1922 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1923 ompt_work_type = ompt_work_sections; 1924 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1925 ompt_work_type = ompt_work_distribute; 1926 } else { 1927 // use default set above. 1928 // a warning about this case is provided in __kmpc_for_static_init 1929 } 1930 KMP_DEBUG_ASSERT(ompt_work_type); 1931 } 1932 ompt_callbacks.ompt_callback(ompt_callback_work)( 1933 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1934 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1935 } 1936 #endif 1937 if (__kmp_env_consistency_check) 1938 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1939 } 1940 1941 // User routines which take C-style arguments (call by value) 1942 // different from the Fortran equivalent routines 1943 1944 void ompc_set_num_threads(int arg) { 1945 // !!!!! TODO: check the per-task binding 1946 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1947 } 1948 1949 void ompc_set_dynamic(int flag) { 1950 kmp_info_t *thread; 1951 1952 /* For the thread-private implementation of the internal controls */ 1953 thread = __kmp_entry_thread(); 1954 1955 __kmp_save_internal_controls(thread); 1956 1957 set__dynamic(thread, flag ? true : false); 1958 } 1959 1960 void ompc_set_nested(int flag) { 1961 kmp_info_t *thread; 1962 1963 /* For the thread-private internal controls implementation */ 1964 thread = __kmp_entry_thread(); 1965 1966 __kmp_save_internal_controls(thread); 1967 1968 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1969 } 1970 1971 void ompc_set_max_active_levels(int max_active_levels) { 1972 /* TO DO */ 1973 /* we want per-task implementation of this internal control */ 1974 1975 /* For the per-thread internal controls implementation */ 1976 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1977 } 1978 1979 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1980 // !!!!! TODO: check the per-task binding 1981 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1982 } 1983 1984 int ompc_get_ancestor_thread_num(int level) { 1985 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1986 } 1987 1988 int ompc_get_team_size(int level) { 1989 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1990 } 1991 1992 /* OpenMP 5.0 Affinity Format API */ 1993 1994 void ompc_set_affinity_format(char const *format) { 1995 if (!__kmp_init_serial) { 1996 __kmp_serial_initialize(); 1997 } 1998 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1999 format, KMP_STRLEN(format) + 1); 2000 } 2001 2002 size_t ompc_get_affinity_format(char *buffer, size_t size) { 2003 size_t format_size; 2004 if (!__kmp_init_serial) { 2005 __kmp_serial_initialize(); 2006 } 2007 format_size = KMP_STRLEN(__kmp_affinity_format); 2008 if (buffer && size) { 2009 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2010 format_size + 1); 2011 } 2012 return format_size; 2013 } 2014 2015 void ompc_display_affinity(char const *format) { 2016 int gtid; 2017 if (!TCR_4(__kmp_init_middle)) { 2018 __kmp_middle_initialize(); 2019 } 2020 gtid = __kmp_get_gtid(); 2021 __kmp_aux_display_affinity(gtid, format); 2022 } 2023 2024 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 2025 char const *format) { 2026 int gtid; 2027 size_t num_required; 2028 kmp_str_buf_t capture_buf; 2029 if (!TCR_4(__kmp_init_middle)) { 2030 __kmp_middle_initialize(); 2031 } 2032 gtid = __kmp_get_gtid(); 2033 __kmp_str_buf_init(&capture_buf); 2034 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2035 if (buffer && buf_size) { 2036 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2037 capture_buf.used + 1); 2038 } 2039 __kmp_str_buf_free(&capture_buf); 2040 return num_required; 2041 } 2042 2043 void kmpc_set_stacksize(int arg) { 2044 // __kmp_aux_set_stacksize initializes the library if needed 2045 __kmp_aux_set_stacksize(arg); 2046 } 2047 2048 void kmpc_set_stacksize_s(size_t arg) { 2049 // __kmp_aux_set_stacksize initializes the library if needed 2050 __kmp_aux_set_stacksize(arg); 2051 } 2052 2053 void kmpc_set_blocktime(int arg) { 2054 int gtid, tid; 2055 kmp_info_t *thread; 2056 2057 gtid = __kmp_entry_gtid(); 2058 tid = __kmp_tid_from_gtid(gtid); 2059 thread = __kmp_thread_from_gtid(gtid); 2060 2061 __kmp_aux_set_blocktime(arg, thread, tid); 2062 } 2063 2064 void kmpc_set_library(int arg) { 2065 // __kmp_user_set_library initializes the library if needed 2066 __kmp_user_set_library((enum library_type)arg); 2067 } 2068 2069 void kmpc_set_defaults(char const *str) { 2070 // __kmp_aux_set_defaults initializes the library if needed 2071 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2072 } 2073 2074 void kmpc_set_disp_num_buffers(int arg) { 2075 // ignore after initialization because some teams have already 2076 // allocated dispatch buffers 2077 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2078 arg <= KMP_MAX_DISP_NUM_BUFF) { 2079 __kmp_dispatch_num_buffers = arg; 2080 } 2081 } 2082 2083 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2084 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2085 return -1; 2086 #else 2087 if (!TCR_4(__kmp_init_middle)) { 2088 __kmp_middle_initialize(); 2089 } 2090 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2091 #endif 2092 } 2093 2094 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2095 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2096 return -1; 2097 #else 2098 if (!TCR_4(__kmp_init_middle)) { 2099 __kmp_middle_initialize(); 2100 } 2101 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2102 #endif 2103 } 2104 2105 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2106 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2107 return -1; 2108 #else 2109 if (!TCR_4(__kmp_init_middle)) { 2110 __kmp_middle_initialize(); 2111 } 2112 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2113 #endif 2114 } 2115 2116 /* -------------------------------------------------------------------------- */ 2117 /*! 2118 @ingroup THREADPRIVATE 2119 @param loc source location information 2120 @param gtid global thread number 2121 @param cpy_size size of the cpy_data buffer 2122 @param cpy_data pointer to data to be copied 2123 @param cpy_func helper function to call for copying data 2124 @param didit flag variable: 1=single thread; 0=not single thread 2125 2126 __kmpc_copyprivate implements the interface for the private data broadcast 2127 needed for the copyprivate clause associated with a single region in an 2128 OpenMP<sup>*</sup> program (both C and Fortran). 2129 All threads participating in the parallel region call this routine. 2130 One of the threads (called the single thread) should have the <tt>didit</tt> 2131 variable set to 1 and all other threads should have that variable set to 0. 2132 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2133 2134 The OpenMP specification forbids the use of nowait on the single region when a 2135 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2136 barrier internally to avoid race conditions, so the code generation for the 2137 single region should avoid generating a barrier after the call to @ref 2138 __kmpc_copyprivate. 2139 2140 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2141 The <tt>loc</tt> parameter is a pointer to source location information. 2142 2143 Internal implementation: The single thread will first copy its descriptor 2144 address (cpy_data) to a team-private location, then the other threads will each 2145 call the function pointed to by the parameter cpy_func, which carries out the 2146 copy by copying the data using the cpy_data buffer. 2147 2148 The cpy_func routine used for the copy and the contents of the data area defined 2149 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2150 to be done. For instance, the cpy_data buffer can hold the actual data to be 2151 copied or it may hold a list of pointers to the data. The cpy_func routine must 2152 interpret the cpy_data buffer appropriately. 2153 2154 The interface to cpy_func is as follows: 2155 @code 2156 void cpy_func( void *destination, void *source ) 2157 @endcode 2158 where void *destination is the cpy_data pointer for the thread being copied to 2159 and void *source is the cpy_data pointer for the thread being copied from. 2160 */ 2161 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2162 void *cpy_data, void (*cpy_func)(void *, void *), 2163 kmp_int32 didit) { 2164 void **data_ptr; 2165 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2166 __kmp_assert_valid_gtid(gtid); 2167 2168 KMP_MB(); 2169 2170 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2171 2172 if (__kmp_env_consistency_check) { 2173 if (loc == 0) { 2174 KMP_WARNING(ConstructIdentInvalid); 2175 } 2176 } 2177 2178 // ToDo: Optimize the following two barriers into some kind of split barrier 2179 2180 if (didit) 2181 *data_ptr = cpy_data; 2182 2183 #if OMPT_SUPPORT 2184 ompt_frame_t *ompt_frame; 2185 if (ompt_enabled.enabled) { 2186 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2187 if (ompt_frame->enter_frame.ptr == NULL) 2188 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2189 } 2190 OMPT_STORE_RETURN_ADDRESS(gtid); 2191 #endif 2192 /* This barrier is not a barrier region boundary */ 2193 #if USE_ITT_NOTIFY 2194 __kmp_threads[gtid]->th.th_ident = loc; 2195 #endif 2196 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2197 2198 if (!didit) 2199 (*cpy_func)(cpy_data, *data_ptr); 2200 2201 // Consider next barrier a user-visible barrier for barrier region boundaries 2202 // Nesting checks are already handled by the single construct checks 2203 { 2204 #if OMPT_SUPPORT 2205 OMPT_STORE_RETURN_ADDRESS(gtid); 2206 #endif 2207 #if USE_ITT_NOTIFY 2208 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2209 // tasks can overwrite the location) 2210 #endif 2211 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2212 #if OMPT_SUPPORT && OMPT_OPTIONAL 2213 if (ompt_enabled.enabled) { 2214 ompt_frame->enter_frame = ompt_data_none; 2215 } 2216 #endif 2217 } 2218 } 2219 2220 /* -------------------------------------------------------------------------- */ 2221 2222 #define INIT_LOCK __kmp_init_user_lock_with_checks 2223 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2224 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2225 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2226 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2227 #define ACQUIRE_NESTED_LOCK_TIMED \ 2228 __kmp_acquire_nested_user_lock_with_checks_timed 2229 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2230 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2231 #define TEST_LOCK __kmp_test_user_lock_with_checks 2232 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2233 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2234 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2235 2236 // TODO: Make check abort messages use location info & pass it into 2237 // with_checks routines 2238 2239 #if KMP_USE_DYNAMIC_LOCK 2240 2241 // internal lock initializer 2242 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2243 kmp_dyna_lockseq_t seq) { 2244 if (KMP_IS_D_LOCK(seq)) { 2245 KMP_INIT_D_LOCK(lock, seq); 2246 #if USE_ITT_BUILD 2247 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2248 #endif 2249 } else { 2250 KMP_INIT_I_LOCK(lock, seq); 2251 #if USE_ITT_BUILD 2252 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2253 __kmp_itt_lock_creating(ilk->lock, loc); 2254 #endif 2255 } 2256 } 2257 2258 // internal nest lock initializer 2259 static __forceinline void 2260 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2261 kmp_dyna_lockseq_t seq) { 2262 #if KMP_USE_TSX 2263 // Don't have nested lock implementation for speculative locks 2264 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2265 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2266 seq = __kmp_user_lock_seq; 2267 #endif 2268 switch (seq) { 2269 case lockseq_tas: 2270 seq = lockseq_nested_tas; 2271 break; 2272 #if KMP_USE_FUTEX 2273 case lockseq_futex: 2274 seq = lockseq_nested_futex; 2275 break; 2276 #endif 2277 case lockseq_ticket: 2278 seq = lockseq_nested_ticket; 2279 break; 2280 case lockseq_queuing: 2281 seq = lockseq_nested_queuing; 2282 break; 2283 case lockseq_drdpa: 2284 seq = lockseq_nested_drdpa; 2285 break; 2286 default: 2287 seq = lockseq_nested_queuing; 2288 } 2289 KMP_INIT_I_LOCK(lock, seq); 2290 #if USE_ITT_BUILD 2291 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2292 __kmp_itt_lock_creating(ilk->lock, loc); 2293 #endif 2294 } 2295 2296 /* initialize the lock with a hint */ 2297 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2298 uintptr_t hint) { 2299 KMP_DEBUG_ASSERT(__kmp_init_serial); 2300 if (__kmp_env_consistency_check && user_lock == NULL) { 2301 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2302 } 2303 2304 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2305 2306 #if OMPT_SUPPORT && OMPT_OPTIONAL 2307 // This is the case, if called from omp_init_lock_with_hint: 2308 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2309 if (!codeptr) 2310 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2311 if (ompt_enabled.ompt_callback_lock_init) { 2312 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2313 ompt_mutex_lock, (omp_lock_hint_t)hint, 2314 __ompt_get_mutex_impl_type(user_lock), 2315 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2316 } 2317 #endif 2318 } 2319 2320 /* initialize the lock with a hint */ 2321 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2322 void **user_lock, uintptr_t hint) { 2323 KMP_DEBUG_ASSERT(__kmp_init_serial); 2324 if (__kmp_env_consistency_check && user_lock == NULL) { 2325 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2326 } 2327 2328 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2329 2330 #if OMPT_SUPPORT && OMPT_OPTIONAL 2331 // This is the case, if called from omp_init_lock_with_hint: 2332 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2333 if (!codeptr) 2334 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2335 if (ompt_enabled.ompt_callback_lock_init) { 2336 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2337 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2338 __ompt_get_mutex_impl_type(user_lock), 2339 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2340 } 2341 #endif 2342 } 2343 2344 #endif // KMP_USE_DYNAMIC_LOCK 2345 2346 /* initialize the lock */ 2347 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2348 #if KMP_USE_DYNAMIC_LOCK 2349 2350 KMP_DEBUG_ASSERT(__kmp_init_serial); 2351 if (__kmp_env_consistency_check && user_lock == NULL) { 2352 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2353 } 2354 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2355 2356 #if OMPT_SUPPORT && OMPT_OPTIONAL 2357 // This is the case, if called from omp_init_lock_with_hint: 2358 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2359 if (!codeptr) 2360 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2361 if (ompt_enabled.ompt_callback_lock_init) { 2362 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2363 ompt_mutex_lock, omp_lock_hint_none, 2364 __ompt_get_mutex_impl_type(user_lock), 2365 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2366 } 2367 #endif 2368 2369 #else // KMP_USE_DYNAMIC_LOCK 2370 2371 static char const *const func = "omp_init_lock"; 2372 kmp_user_lock_p lck; 2373 KMP_DEBUG_ASSERT(__kmp_init_serial); 2374 2375 if (__kmp_env_consistency_check) { 2376 if (user_lock == NULL) { 2377 KMP_FATAL(LockIsUninitialized, func); 2378 } 2379 } 2380 2381 KMP_CHECK_USER_LOCK_INIT(); 2382 2383 if ((__kmp_user_lock_kind == lk_tas) && 2384 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2385 lck = (kmp_user_lock_p)user_lock; 2386 } 2387 #if KMP_USE_FUTEX 2388 else if ((__kmp_user_lock_kind == lk_futex) && 2389 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2390 lck = (kmp_user_lock_p)user_lock; 2391 } 2392 #endif 2393 else { 2394 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2395 } 2396 INIT_LOCK(lck); 2397 __kmp_set_user_lock_location(lck, loc); 2398 2399 #if OMPT_SUPPORT && OMPT_OPTIONAL 2400 // This is the case, if called from omp_init_lock_with_hint: 2401 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2402 if (!codeptr) 2403 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2404 if (ompt_enabled.ompt_callback_lock_init) { 2405 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2406 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2407 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2408 } 2409 #endif 2410 2411 #if USE_ITT_BUILD 2412 __kmp_itt_lock_creating(lck); 2413 #endif /* USE_ITT_BUILD */ 2414 2415 #endif // KMP_USE_DYNAMIC_LOCK 2416 } // __kmpc_init_lock 2417 2418 /* initialize the lock */ 2419 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2420 #if KMP_USE_DYNAMIC_LOCK 2421 2422 KMP_DEBUG_ASSERT(__kmp_init_serial); 2423 if (__kmp_env_consistency_check && user_lock == NULL) { 2424 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2425 } 2426 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2427 2428 #if OMPT_SUPPORT && OMPT_OPTIONAL 2429 // This is the case, if called from omp_init_lock_with_hint: 2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2431 if (!codeptr) 2432 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2433 if (ompt_enabled.ompt_callback_lock_init) { 2434 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2435 ompt_mutex_nest_lock, omp_lock_hint_none, 2436 __ompt_get_mutex_impl_type(user_lock), 2437 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2438 } 2439 #endif 2440 2441 #else // KMP_USE_DYNAMIC_LOCK 2442 2443 static char const *const func = "omp_init_nest_lock"; 2444 kmp_user_lock_p lck; 2445 KMP_DEBUG_ASSERT(__kmp_init_serial); 2446 2447 if (__kmp_env_consistency_check) { 2448 if (user_lock == NULL) { 2449 KMP_FATAL(LockIsUninitialized, func); 2450 } 2451 } 2452 2453 KMP_CHECK_USER_LOCK_INIT(); 2454 2455 if ((__kmp_user_lock_kind == lk_tas) && 2456 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2457 OMP_NEST_LOCK_T_SIZE)) { 2458 lck = (kmp_user_lock_p)user_lock; 2459 } 2460 #if KMP_USE_FUTEX 2461 else if ((__kmp_user_lock_kind == lk_futex) && 2462 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2463 OMP_NEST_LOCK_T_SIZE)) { 2464 lck = (kmp_user_lock_p)user_lock; 2465 } 2466 #endif 2467 else { 2468 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2469 } 2470 2471 INIT_NESTED_LOCK(lck); 2472 __kmp_set_user_lock_location(lck, loc); 2473 2474 #if OMPT_SUPPORT && OMPT_OPTIONAL 2475 // This is the case, if called from omp_init_lock_with_hint: 2476 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2477 if (!codeptr) 2478 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2479 if (ompt_enabled.ompt_callback_lock_init) { 2480 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2481 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2482 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2483 } 2484 #endif 2485 2486 #if USE_ITT_BUILD 2487 __kmp_itt_lock_creating(lck); 2488 #endif /* USE_ITT_BUILD */ 2489 2490 #endif // KMP_USE_DYNAMIC_LOCK 2491 } // __kmpc_init_nest_lock 2492 2493 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2494 #if KMP_USE_DYNAMIC_LOCK 2495 2496 #if USE_ITT_BUILD 2497 kmp_user_lock_p lck; 2498 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2499 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2500 } else { 2501 lck = (kmp_user_lock_p)user_lock; 2502 } 2503 __kmp_itt_lock_destroyed(lck); 2504 #endif 2505 #if OMPT_SUPPORT && OMPT_OPTIONAL 2506 // This is the case, if called from omp_init_lock_with_hint: 2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2508 if (!codeptr) 2509 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2510 if (ompt_enabled.ompt_callback_lock_destroy) { 2511 kmp_user_lock_p lck; 2512 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2513 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2514 } else { 2515 lck = (kmp_user_lock_p)user_lock; 2516 } 2517 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2518 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2519 } 2520 #endif 2521 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2522 #else 2523 kmp_user_lock_p lck; 2524 2525 if ((__kmp_user_lock_kind == lk_tas) && 2526 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2527 lck = (kmp_user_lock_p)user_lock; 2528 } 2529 #if KMP_USE_FUTEX 2530 else if ((__kmp_user_lock_kind == lk_futex) && 2531 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2532 lck = (kmp_user_lock_p)user_lock; 2533 } 2534 #endif 2535 else { 2536 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2537 } 2538 2539 #if OMPT_SUPPORT && OMPT_OPTIONAL 2540 // This is the case, if called from omp_init_lock_with_hint: 2541 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2542 if (!codeptr) 2543 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2544 if (ompt_enabled.ompt_callback_lock_destroy) { 2545 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2546 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2547 } 2548 #endif 2549 2550 #if USE_ITT_BUILD 2551 __kmp_itt_lock_destroyed(lck); 2552 #endif /* USE_ITT_BUILD */ 2553 DESTROY_LOCK(lck); 2554 2555 if ((__kmp_user_lock_kind == lk_tas) && 2556 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2557 ; 2558 } 2559 #if KMP_USE_FUTEX 2560 else if ((__kmp_user_lock_kind == lk_futex) && 2561 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2562 ; 2563 } 2564 #endif 2565 else { 2566 __kmp_user_lock_free(user_lock, gtid, lck); 2567 } 2568 #endif // KMP_USE_DYNAMIC_LOCK 2569 } // __kmpc_destroy_lock 2570 2571 /* destroy the lock */ 2572 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2573 #if KMP_USE_DYNAMIC_LOCK 2574 2575 #if USE_ITT_BUILD 2576 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2577 __kmp_itt_lock_destroyed(ilk->lock); 2578 #endif 2579 #if OMPT_SUPPORT && OMPT_OPTIONAL 2580 // This is the case, if called from omp_init_lock_with_hint: 2581 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2582 if (!codeptr) 2583 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2584 if (ompt_enabled.ompt_callback_lock_destroy) { 2585 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2586 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2587 } 2588 #endif 2589 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2590 2591 #else // KMP_USE_DYNAMIC_LOCK 2592 2593 kmp_user_lock_p lck; 2594 2595 if ((__kmp_user_lock_kind == lk_tas) && 2596 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2597 OMP_NEST_LOCK_T_SIZE)) { 2598 lck = (kmp_user_lock_p)user_lock; 2599 } 2600 #if KMP_USE_FUTEX 2601 else if ((__kmp_user_lock_kind == lk_futex) && 2602 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2603 OMP_NEST_LOCK_T_SIZE)) { 2604 lck = (kmp_user_lock_p)user_lock; 2605 } 2606 #endif 2607 else { 2608 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2609 } 2610 2611 #if OMPT_SUPPORT && OMPT_OPTIONAL 2612 // This is the case, if called from omp_init_lock_with_hint: 2613 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2614 if (!codeptr) 2615 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2616 if (ompt_enabled.ompt_callback_lock_destroy) { 2617 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2618 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2619 } 2620 #endif 2621 2622 #if USE_ITT_BUILD 2623 __kmp_itt_lock_destroyed(lck); 2624 #endif /* USE_ITT_BUILD */ 2625 2626 DESTROY_NESTED_LOCK(lck); 2627 2628 if ((__kmp_user_lock_kind == lk_tas) && 2629 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2630 OMP_NEST_LOCK_T_SIZE)) { 2631 ; 2632 } 2633 #if KMP_USE_FUTEX 2634 else if ((__kmp_user_lock_kind == lk_futex) && 2635 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2636 OMP_NEST_LOCK_T_SIZE)) { 2637 ; 2638 } 2639 #endif 2640 else { 2641 __kmp_user_lock_free(user_lock, gtid, lck); 2642 } 2643 #endif // KMP_USE_DYNAMIC_LOCK 2644 } // __kmpc_destroy_nest_lock 2645 2646 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2647 KMP_COUNT_BLOCK(OMP_set_lock); 2648 #if KMP_USE_DYNAMIC_LOCK 2649 int tag = KMP_EXTRACT_D_TAG(user_lock); 2650 #if USE_ITT_BUILD 2651 __kmp_itt_lock_acquiring( 2652 (kmp_user_lock_p) 2653 user_lock); // itt function will get to the right lock object. 2654 #endif 2655 #if OMPT_SUPPORT && OMPT_OPTIONAL 2656 // This is the case, if called from omp_init_lock_with_hint: 2657 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2658 if (!codeptr) 2659 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2660 if (ompt_enabled.ompt_callback_mutex_acquire) { 2661 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2662 ompt_mutex_lock, omp_lock_hint_none, 2663 __ompt_get_mutex_impl_type(user_lock), 2664 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2665 } 2666 #endif 2667 #if KMP_USE_INLINED_TAS 2668 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2669 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2670 } else 2671 #elif KMP_USE_INLINED_FUTEX 2672 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2673 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2674 } else 2675 #endif 2676 { 2677 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2678 } 2679 #if USE_ITT_BUILD 2680 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2681 #endif 2682 #if OMPT_SUPPORT && OMPT_OPTIONAL 2683 if (ompt_enabled.ompt_callback_mutex_acquired) { 2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2685 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2686 } 2687 #endif 2688 2689 #else // KMP_USE_DYNAMIC_LOCK 2690 2691 kmp_user_lock_p lck; 2692 2693 if ((__kmp_user_lock_kind == lk_tas) && 2694 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2695 lck = (kmp_user_lock_p)user_lock; 2696 } 2697 #if KMP_USE_FUTEX 2698 else if ((__kmp_user_lock_kind == lk_futex) && 2699 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2700 lck = (kmp_user_lock_p)user_lock; 2701 } 2702 #endif 2703 else { 2704 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2705 } 2706 2707 #if USE_ITT_BUILD 2708 __kmp_itt_lock_acquiring(lck); 2709 #endif /* USE_ITT_BUILD */ 2710 #if OMPT_SUPPORT && OMPT_OPTIONAL 2711 // This is the case, if called from omp_init_lock_with_hint: 2712 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2713 if (!codeptr) 2714 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2715 if (ompt_enabled.ompt_callback_mutex_acquire) { 2716 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2717 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2718 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2719 } 2720 #endif 2721 2722 ACQUIRE_LOCK(lck, gtid); 2723 2724 #if USE_ITT_BUILD 2725 __kmp_itt_lock_acquired(lck); 2726 #endif /* USE_ITT_BUILD */ 2727 2728 #if OMPT_SUPPORT && OMPT_OPTIONAL 2729 if (ompt_enabled.ompt_callback_mutex_acquired) { 2730 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2731 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2732 } 2733 #endif 2734 2735 #endif // KMP_USE_DYNAMIC_LOCK 2736 } 2737 2738 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2739 #if KMP_USE_DYNAMIC_LOCK 2740 2741 #if USE_ITT_BUILD 2742 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2743 #endif 2744 #if OMPT_SUPPORT && OMPT_OPTIONAL 2745 // This is the case, if called from omp_init_lock_with_hint: 2746 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2747 if (!codeptr) 2748 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2749 if (ompt_enabled.enabled) { 2750 if (ompt_enabled.ompt_callback_mutex_acquire) { 2751 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2752 ompt_mutex_nest_lock, omp_lock_hint_none, 2753 __ompt_get_mutex_impl_type(user_lock), 2754 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2755 } 2756 } 2757 #endif 2758 int acquire_status = 2759 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2760 (void)acquire_status; 2761 #if USE_ITT_BUILD 2762 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2763 #endif 2764 2765 #if OMPT_SUPPORT && OMPT_OPTIONAL 2766 if (ompt_enabled.enabled) { 2767 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2768 if (ompt_enabled.ompt_callback_mutex_acquired) { 2769 // lock_first 2770 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2771 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2772 codeptr); 2773 } 2774 } else { 2775 if (ompt_enabled.ompt_callback_nest_lock) { 2776 // lock_next 2777 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2778 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2779 } 2780 } 2781 } 2782 #endif 2783 2784 #else // KMP_USE_DYNAMIC_LOCK 2785 int acquire_status; 2786 kmp_user_lock_p lck; 2787 2788 if ((__kmp_user_lock_kind == lk_tas) && 2789 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2790 OMP_NEST_LOCK_T_SIZE)) { 2791 lck = (kmp_user_lock_p)user_lock; 2792 } 2793 #if KMP_USE_FUTEX 2794 else if ((__kmp_user_lock_kind == lk_futex) && 2795 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2796 OMP_NEST_LOCK_T_SIZE)) { 2797 lck = (kmp_user_lock_p)user_lock; 2798 } 2799 #endif 2800 else { 2801 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2802 } 2803 2804 #if USE_ITT_BUILD 2805 __kmp_itt_lock_acquiring(lck); 2806 #endif /* USE_ITT_BUILD */ 2807 #if OMPT_SUPPORT && OMPT_OPTIONAL 2808 // This is the case, if called from omp_init_lock_with_hint: 2809 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2810 if (!codeptr) 2811 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2812 if (ompt_enabled.enabled) { 2813 if (ompt_enabled.ompt_callback_mutex_acquire) { 2814 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2815 ompt_mutex_nest_lock, omp_lock_hint_none, 2816 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2817 codeptr); 2818 } 2819 } 2820 #endif 2821 2822 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2823 2824 #if USE_ITT_BUILD 2825 __kmp_itt_lock_acquired(lck); 2826 #endif /* USE_ITT_BUILD */ 2827 2828 #if OMPT_SUPPORT && OMPT_OPTIONAL 2829 if (ompt_enabled.enabled) { 2830 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2831 if (ompt_enabled.ompt_callback_mutex_acquired) { 2832 // lock_first 2833 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2834 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2835 } 2836 } else { 2837 if (ompt_enabled.ompt_callback_nest_lock) { 2838 // lock_next 2839 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2840 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2841 } 2842 } 2843 } 2844 #endif 2845 2846 #endif // KMP_USE_DYNAMIC_LOCK 2847 } 2848 2849 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2850 #if KMP_USE_DYNAMIC_LOCK 2851 2852 int tag = KMP_EXTRACT_D_TAG(user_lock); 2853 #if USE_ITT_BUILD 2854 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2855 #endif 2856 #if KMP_USE_INLINED_TAS 2857 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2858 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2859 } else 2860 #elif KMP_USE_INLINED_FUTEX 2861 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2862 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2863 } else 2864 #endif 2865 { 2866 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2867 } 2868 2869 #if OMPT_SUPPORT && OMPT_OPTIONAL 2870 // This is the case, if called from omp_init_lock_with_hint: 2871 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2872 if (!codeptr) 2873 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2874 if (ompt_enabled.ompt_callback_mutex_released) { 2875 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2876 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2877 } 2878 #endif 2879 2880 #else // KMP_USE_DYNAMIC_LOCK 2881 2882 kmp_user_lock_p lck; 2883 2884 /* Can't use serial interval since not block structured */ 2885 /* release the lock */ 2886 2887 if ((__kmp_user_lock_kind == lk_tas) && 2888 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2889 #if KMP_OS_LINUX && \ 2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2891 // "fast" path implemented to fix customer performance issue 2892 #if USE_ITT_BUILD 2893 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2894 #endif /* USE_ITT_BUILD */ 2895 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2896 KMP_MB(); 2897 2898 #if OMPT_SUPPORT && OMPT_OPTIONAL 2899 // This is the case, if called from omp_init_lock_with_hint: 2900 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2901 if (!codeptr) 2902 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2903 if (ompt_enabled.ompt_callback_mutex_released) { 2904 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2905 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2906 } 2907 #endif 2908 2909 return; 2910 #else 2911 lck = (kmp_user_lock_p)user_lock; 2912 #endif 2913 } 2914 #if KMP_USE_FUTEX 2915 else if ((__kmp_user_lock_kind == lk_futex) && 2916 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2917 lck = (kmp_user_lock_p)user_lock; 2918 } 2919 #endif 2920 else { 2921 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2922 } 2923 2924 #if USE_ITT_BUILD 2925 __kmp_itt_lock_releasing(lck); 2926 #endif /* USE_ITT_BUILD */ 2927 2928 RELEASE_LOCK(lck, gtid); 2929 2930 #if OMPT_SUPPORT && OMPT_OPTIONAL 2931 // This is the case, if called from omp_init_lock_with_hint: 2932 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2933 if (!codeptr) 2934 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2935 if (ompt_enabled.ompt_callback_mutex_released) { 2936 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2937 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2938 } 2939 #endif 2940 2941 #endif // KMP_USE_DYNAMIC_LOCK 2942 } 2943 2944 /* release the lock */ 2945 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2946 #if KMP_USE_DYNAMIC_LOCK 2947 2948 #if USE_ITT_BUILD 2949 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2950 #endif 2951 int release_status = 2952 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2953 (void)release_status; 2954 2955 #if OMPT_SUPPORT && OMPT_OPTIONAL 2956 // This is the case, if called from omp_init_lock_with_hint: 2957 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2958 if (!codeptr) 2959 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2960 if (ompt_enabled.enabled) { 2961 if (release_status == KMP_LOCK_RELEASED) { 2962 if (ompt_enabled.ompt_callback_mutex_released) { 2963 // release_lock_last 2964 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2965 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2966 codeptr); 2967 } 2968 } else if (ompt_enabled.ompt_callback_nest_lock) { 2969 // release_lock_prev 2970 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2971 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2972 } 2973 } 2974 #endif 2975 2976 #else // KMP_USE_DYNAMIC_LOCK 2977 2978 kmp_user_lock_p lck; 2979 2980 /* Can't use serial interval since not block structured */ 2981 2982 if ((__kmp_user_lock_kind == lk_tas) && 2983 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2984 OMP_NEST_LOCK_T_SIZE)) { 2985 #if KMP_OS_LINUX && \ 2986 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2987 // "fast" path implemented to fix customer performance issue 2988 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2989 #if USE_ITT_BUILD 2990 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2991 #endif /* USE_ITT_BUILD */ 2992 2993 #if OMPT_SUPPORT && OMPT_OPTIONAL 2994 int release_status = KMP_LOCK_STILL_HELD; 2995 #endif 2996 2997 if (--(tl->lk.depth_locked) == 0) { 2998 TCW_4(tl->lk.poll, 0); 2999 #if OMPT_SUPPORT && OMPT_OPTIONAL 3000 release_status = KMP_LOCK_RELEASED; 3001 #endif 3002 } 3003 KMP_MB(); 3004 3005 #if OMPT_SUPPORT && OMPT_OPTIONAL 3006 // This is the case, if called from omp_init_lock_with_hint: 3007 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3008 if (!codeptr) 3009 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3010 if (ompt_enabled.enabled) { 3011 if (release_status == KMP_LOCK_RELEASED) { 3012 if (ompt_enabled.ompt_callback_mutex_released) { 3013 // release_lock_last 3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3015 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3016 } 3017 } else if (ompt_enabled.ompt_callback_nest_lock) { 3018 // release_lock_previous 3019 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3020 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3021 } 3022 } 3023 #endif 3024 3025 return; 3026 #else 3027 lck = (kmp_user_lock_p)user_lock; 3028 #endif 3029 } 3030 #if KMP_USE_FUTEX 3031 else if ((__kmp_user_lock_kind == lk_futex) && 3032 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3033 OMP_NEST_LOCK_T_SIZE)) { 3034 lck = (kmp_user_lock_p)user_lock; 3035 } 3036 #endif 3037 else { 3038 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3039 } 3040 3041 #if USE_ITT_BUILD 3042 __kmp_itt_lock_releasing(lck); 3043 #endif /* USE_ITT_BUILD */ 3044 3045 int release_status; 3046 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3047 #if OMPT_SUPPORT && OMPT_OPTIONAL 3048 // This is the case, if called from omp_init_lock_with_hint: 3049 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3050 if (!codeptr) 3051 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3052 if (ompt_enabled.enabled) { 3053 if (release_status == KMP_LOCK_RELEASED) { 3054 if (ompt_enabled.ompt_callback_mutex_released) { 3055 // release_lock_last 3056 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3057 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3058 } 3059 } else if (ompt_enabled.ompt_callback_nest_lock) { 3060 // release_lock_previous 3061 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3062 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3063 } 3064 } 3065 #endif 3066 3067 #endif // KMP_USE_DYNAMIC_LOCK 3068 } 3069 3070 /* try to acquire the lock */ 3071 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3072 KMP_COUNT_BLOCK(OMP_test_lock); 3073 3074 #if KMP_USE_DYNAMIC_LOCK 3075 int rc; 3076 int tag = KMP_EXTRACT_D_TAG(user_lock); 3077 #if USE_ITT_BUILD 3078 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3079 #endif 3080 #if OMPT_SUPPORT && OMPT_OPTIONAL 3081 // This is the case, if called from omp_init_lock_with_hint: 3082 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3083 if (!codeptr) 3084 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3085 if (ompt_enabled.ompt_callback_mutex_acquire) { 3086 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3087 ompt_mutex_lock, omp_lock_hint_none, 3088 __ompt_get_mutex_impl_type(user_lock), 3089 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3090 } 3091 #endif 3092 #if KMP_USE_INLINED_TAS 3093 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3094 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3095 } else 3096 #elif KMP_USE_INLINED_FUTEX 3097 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3098 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3099 } else 3100 #endif 3101 { 3102 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3103 } 3104 if (rc) { 3105 #if USE_ITT_BUILD 3106 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3107 #endif 3108 #if OMPT_SUPPORT && OMPT_OPTIONAL 3109 if (ompt_enabled.ompt_callback_mutex_acquired) { 3110 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3111 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3112 } 3113 #endif 3114 return FTN_TRUE; 3115 } else { 3116 #if USE_ITT_BUILD 3117 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3118 #endif 3119 return FTN_FALSE; 3120 } 3121 3122 #else // KMP_USE_DYNAMIC_LOCK 3123 3124 kmp_user_lock_p lck; 3125 int rc; 3126 3127 if ((__kmp_user_lock_kind == lk_tas) && 3128 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3129 lck = (kmp_user_lock_p)user_lock; 3130 } 3131 #if KMP_USE_FUTEX 3132 else if ((__kmp_user_lock_kind == lk_futex) && 3133 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3134 lck = (kmp_user_lock_p)user_lock; 3135 } 3136 #endif 3137 else { 3138 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3139 } 3140 3141 #if USE_ITT_BUILD 3142 __kmp_itt_lock_acquiring(lck); 3143 #endif /* USE_ITT_BUILD */ 3144 #if OMPT_SUPPORT && OMPT_OPTIONAL 3145 // This is the case, if called from omp_init_lock_with_hint: 3146 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3147 if (!codeptr) 3148 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3149 if (ompt_enabled.ompt_callback_mutex_acquire) { 3150 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3151 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3152 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3153 } 3154 #endif 3155 3156 rc = TEST_LOCK(lck, gtid); 3157 #if USE_ITT_BUILD 3158 if (rc) { 3159 __kmp_itt_lock_acquired(lck); 3160 } else { 3161 __kmp_itt_lock_cancelled(lck); 3162 } 3163 #endif /* USE_ITT_BUILD */ 3164 #if OMPT_SUPPORT && OMPT_OPTIONAL 3165 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3166 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3167 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3168 } 3169 #endif 3170 3171 return (rc ? FTN_TRUE : FTN_FALSE); 3172 3173 /* Can't use serial interval since not block structured */ 3174 3175 #endif // KMP_USE_DYNAMIC_LOCK 3176 } 3177 3178 /* try to acquire the lock */ 3179 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3180 #if KMP_USE_DYNAMIC_LOCK 3181 int rc; 3182 #if USE_ITT_BUILD 3183 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3184 #endif 3185 #if OMPT_SUPPORT && OMPT_OPTIONAL 3186 // This is the case, if called from omp_init_lock_with_hint: 3187 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3188 if (!codeptr) 3189 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3190 if (ompt_enabled.ompt_callback_mutex_acquire) { 3191 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3192 ompt_mutex_nest_lock, omp_lock_hint_none, 3193 __ompt_get_mutex_impl_type(user_lock), 3194 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3195 } 3196 #endif 3197 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3198 #if USE_ITT_BUILD 3199 if (rc) { 3200 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3201 } else { 3202 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3203 } 3204 #endif 3205 #if OMPT_SUPPORT && OMPT_OPTIONAL 3206 if (ompt_enabled.enabled && rc) { 3207 if (rc == 1) { 3208 if (ompt_enabled.ompt_callback_mutex_acquired) { 3209 // lock_first 3210 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3211 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3212 codeptr); 3213 } 3214 } else { 3215 if (ompt_enabled.ompt_callback_nest_lock) { 3216 // lock_next 3217 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3218 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3219 } 3220 } 3221 } 3222 #endif 3223 return rc; 3224 3225 #else // KMP_USE_DYNAMIC_LOCK 3226 3227 kmp_user_lock_p lck; 3228 int rc; 3229 3230 if ((__kmp_user_lock_kind == lk_tas) && 3231 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3232 OMP_NEST_LOCK_T_SIZE)) { 3233 lck = (kmp_user_lock_p)user_lock; 3234 } 3235 #if KMP_USE_FUTEX 3236 else if ((__kmp_user_lock_kind == lk_futex) && 3237 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3238 OMP_NEST_LOCK_T_SIZE)) { 3239 lck = (kmp_user_lock_p)user_lock; 3240 } 3241 #endif 3242 else { 3243 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3244 } 3245 3246 #if USE_ITT_BUILD 3247 __kmp_itt_lock_acquiring(lck); 3248 #endif /* USE_ITT_BUILD */ 3249 3250 #if OMPT_SUPPORT && OMPT_OPTIONAL 3251 // This is the case, if called from omp_init_lock_with_hint: 3252 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3253 if (!codeptr) 3254 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3255 if (ompt_enabled.enabled) && 3256 ompt_enabled.ompt_callback_mutex_acquire) { 3257 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3258 ompt_mutex_nest_lock, omp_lock_hint_none, 3259 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3260 codeptr); 3261 } 3262 #endif 3263 3264 rc = TEST_NESTED_LOCK(lck, gtid); 3265 #if USE_ITT_BUILD 3266 if (rc) { 3267 __kmp_itt_lock_acquired(lck); 3268 } else { 3269 __kmp_itt_lock_cancelled(lck); 3270 } 3271 #endif /* USE_ITT_BUILD */ 3272 #if OMPT_SUPPORT && OMPT_OPTIONAL 3273 if (ompt_enabled.enabled && rc) { 3274 if (rc == 1) { 3275 if (ompt_enabled.ompt_callback_mutex_acquired) { 3276 // lock_first 3277 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3278 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3279 } 3280 } else { 3281 if (ompt_enabled.ompt_callback_nest_lock) { 3282 // lock_next 3283 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3284 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3285 } 3286 } 3287 } 3288 #endif 3289 return rc; 3290 3291 /* Can't use serial interval since not block structured */ 3292 3293 #endif // KMP_USE_DYNAMIC_LOCK 3294 } 3295 3296 // Interface to fast scalable reduce methods routines 3297 3298 // keep the selected method in a thread local structure for cross-function 3299 // usage: will be used in __kmpc_end_reduce* functions; 3300 // another solution: to re-determine the method one more time in 3301 // __kmpc_end_reduce* functions (new prototype required then) 3302 // AT: which solution is better? 3303 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3304 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3305 3306 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3307 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3308 3309 // description of the packed_reduction_method variable: look at the macros in 3310 // kmp.h 3311 3312 // used in a critical section reduce block 3313 static __forceinline void 3314 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3315 kmp_critical_name *crit) { 3316 3317 // this lock was visible to a customer and to the threading profile tool as a 3318 // serial overhead span (although it's used for an internal purpose only) 3319 // why was it visible in previous implementation? 3320 // should we keep it visible in new reduce block? 3321 kmp_user_lock_p lck; 3322 3323 #if KMP_USE_DYNAMIC_LOCK 3324 3325 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3326 // Check if it is initialized. 3327 if (*lk == 0) { 3328 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3329 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3330 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3331 } else { 3332 __kmp_init_indirect_csptr(crit, loc, global_tid, 3333 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3334 } 3335 } 3336 // Branch for accessing the actual lock object and set operation. This 3337 // branching is inevitable since this lock initialization does not follow the 3338 // normal dispatch path (lock table is not used). 3339 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3340 lck = (kmp_user_lock_p)lk; 3341 KMP_DEBUG_ASSERT(lck != NULL); 3342 if (__kmp_env_consistency_check) { 3343 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3344 } 3345 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3346 } else { 3347 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3348 lck = ilk->lock; 3349 KMP_DEBUG_ASSERT(lck != NULL); 3350 if (__kmp_env_consistency_check) { 3351 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3352 } 3353 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3354 } 3355 3356 #else // KMP_USE_DYNAMIC_LOCK 3357 3358 // We know that the fast reduction code is only emitted by Intel compilers 3359 // with 32 byte critical sections. If there isn't enough space, then we 3360 // have to use a pointer. 3361 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3362 lck = (kmp_user_lock_p)crit; 3363 } else { 3364 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3365 } 3366 KMP_DEBUG_ASSERT(lck != NULL); 3367 3368 if (__kmp_env_consistency_check) 3369 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3370 3371 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3372 3373 #endif // KMP_USE_DYNAMIC_LOCK 3374 } 3375 3376 // used in a critical section reduce block 3377 static __forceinline void 3378 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3379 kmp_critical_name *crit) { 3380 3381 kmp_user_lock_p lck; 3382 3383 #if KMP_USE_DYNAMIC_LOCK 3384 3385 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3386 lck = (kmp_user_lock_p)crit; 3387 if (__kmp_env_consistency_check) 3388 __kmp_pop_sync(global_tid, ct_critical, loc); 3389 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3390 } else { 3391 kmp_indirect_lock_t *ilk = 3392 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3393 if (__kmp_env_consistency_check) 3394 __kmp_pop_sync(global_tid, ct_critical, loc); 3395 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3396 } 3397 3398 #else // KMP_USE_DYNAMIC_LOCK 3399 3400 // We know that the fast reduction code is only emitted by Intel compilers 3401 // with 32 byte critical sections. If there isn't enough space, then we have 3402 // to use a pointer. 3403 if (__kmp_base_user_lock_size > 32) { 3404 lck = *((kmp_user_lock_p *)crit); 3405 KMP_ASSERT(lck != NULL); 3406 } else { 3407 lck = (kmp_user_lock_p)crit; 3408 } 3409 3410 if (__kmp_env_consistency_check) 3411 __kmp_pop_sync(global_tid, ct_critical, loc); 3412 3413 __kmp_release_user_lock_with_checks(lck, global_tid); 3414 3415 #endif // KMP_USE_DYNAMIC_LOCK 3416 } // __kmp_end_critical_section_reduce_block 3417 3418 static __forceinline int 3419 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3420 int *task_state) { 3421 kmp_team_t *team; 3422 3423 // Check if we are inside the teams construct? 3424 if (th->th.th_teams_microtask) { 3425 *team_p = team = th->th.th_team; 3426 if (team->t.t_level == th->th.th_teams_level) { 3427 // This is reduction at teams construct. 3428 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3429 // Let's swap teams temporarily for the reduction. 3430 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3431 th->th.th_team = team->t.t_parent; 3432 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3433 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3434 *task_state = th->th.th_task_state; 3435 th->th.th_task_state = 0; 3436 3437 return 1; 3438 } 3439 } 3440 return 0; 3441 } 3442 3443 static __forceinline void 3444 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3445 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3446 th->th.th_info.ds.ds_tid = 0; 3447 th->th.th_team = team; 3448 th->th.th_team_nproc = team->t.t_nproc; 3449 th->th.th_task_team = team->t.t_task_team[task_state]; 3450 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3451 } 3452 3453 /* 2.a.i. Reduce Block without a terminating barrier */ 3454 /*! 3455 @ingroup SYNCHRONIZATION 3456 @param loc source location information 3457 @param global_tid global thread number 3458 @param num_vars number of items (variables) to be reduced 3459 @param reduce_size size of data in bytes to be reduced 3460 @param reduce_data pointer to data to be reduced 3461 @param reduce_func callback function providing reduction operation on two 3462 operands and returning result of reduction in lhs_data 3463 @param lck pointer to the unique lock data structure 3464 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3465 threads if atomic reduction needed 3466 3467 The nowait version is used for a reduce clause with the nowait argument. 3468 */ 3469 kmp_int32 3470 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3471 size_t reduce_size, void *reduce_data, 3472 void (*reduce_func)(void *lhs_data, void *rhs_data), 3473 kmp_critical_name *lck) { 3474 3475 KMP_COUNT_BLOCK(REDUCE_nowait); 3476 int retval = 0; 3477 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3478 kmp_info_t *th; 3479 kmp_team_t *team; 3480 int teams_swapped = 0, task_state; 3481 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3482 __kmp_assert_valid_gtid(global_tid); 3483 3484 // why do we need this initialization here at all? 3485 // Reduction clause can not be used as a stand-alone directive. 3486 3487 // do not call __kmp_serial_initialize(), it will be called by 3488 // __kmp_parallel_initialize() if needed 3489 // possible detection of false-positive race by the threadchecker ??? 3490 if (!TCR_4(__kmp_init_parallel)) 3491 __kmp_parallel_initialize(); 3492 3493 __kmp_resume_if_soft_paused(); 3494 3495 // check correctness of reduce block nesting 3496 #if KMP_USE_DYNAMIC_LOCK 3497 if (__kmp_env_consistency_check) 3498 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3499 #else 3500 if (__kmp_env_consistency_check) 3501 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3502 #endif 3503 3504 th = __kmp_thread_from_gtid(global_tid); 3505 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3506 3507 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3508 // the value should be kept in a variable 3509 // the variable should be either a construct-specific or thread-specific 3510 // property, not a team specific property 3511 // (a thread can reach the next reduce block on the next construct, reduce 3512 // method may differ on the next construct) 3513 // an ident_t "loc" parameter could be used as a construct-specific property 3514 // (what if loc == 0?) 3515 // (if both construct-specific and team-specific variables were shared, 3516 // then unness extra syncs should be needed) 3517 // a thread-specific variable is better regarding two issues above (next 3518 // construct and extra syncs) 3519 // a thread-specific "th_local.reduction_method" variable is used currently 3520 // each thread executes 'determine' and 'set' lines (no need to execute by one 3521 // thread, to avoid unness extra syncs) 3522 3523 packed_reduction_method = __kmp_determine_reduction_method( 3524 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3525 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3526 3527 OMPT_REDUCTION_DECL(th, global_tid); 3528 if (packed_reduction_method == critical_reduce_block) { 3529 3530 OMPT_REDUCTION_BEGIN; 3531 3532 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3533 retval = 1; 3534 3535 } else if (packed_reduction_method == empty_reduce_block) { 3536 3537 OMPT_REDUCTION_BEGIN; 3538 3539 // usage: if team size == 1, no synchronization is required ( Intel 3540 // platforms only ) 3541 retval = 1; 3542 3543 } else if (packed_reduction_method == atomic_reduce_block) { 3544 3545 retval = 2; 3546 3547 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3548 // won't be called by the code gen) 3549 // (it's not quite good, because the checking block has been closed by 3550 // this 'pop', 3551 // but atomic operation has not been executed yet, will be executed 3552 // slightly later, literally on next instruction) 3553 if (__kmp_env_consistency_check) 3554 __kmp_pop_sync(global_tid, ct_reduce, loc); 3555 3556 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3557 tree_reduce_block)) { 3558 3559 // AT: performance issue: a real barrier here 3560 // AT: (if primary thread is slow, other threads are blocked here waiting for 3561 // the primary thread to come and release them) 3562 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3563 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3564 // be confusing to a customer) 3565 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3566 // might go faster and be more in line with sense of NOWAIT 3567 // AT: TO DO: do epcc test and compare times 3568 3569 // this barrier should be invisible to a customer and to the threading profile 3570 // tool (it's neither a terminating barrier nor customer's code, it's 3571 // used for an internal purpose) 3572 #if OMPT_SUPPORT 3573 // JP: can this barrier potentially leed to task scheduling? 3574 // JP: as long as there is a barrier in the implementation, OMPT should and 3575 // will provide the barrier events 3576 // so we set-up the necessary frame/return addresses. 3577 ompt_frame_t *ompt_frame; 3578 if (ompt_enabled.enabled) { 3579 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3580 if (ompt_frame->enter_frame.ptr == NULL) 3581 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3582 } 3583 OMPT_STORE_RETURN_ADDRESS(global_tid); 3584 #endif 3585 #if USE_ITT_NOTIFY 3586 __kmp_threads[global_tid]->th.th_ident = loc; 3587 #endif 3588 retval = 3589 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3590 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3591 retval = (retval != 0) ? (0) : (1); 3592 #if OMPT_SUPPORT && OMPT_OPTIONAL 3593 if (ompt_enabled.enabled) { 3594 ompt_frame->enter_frame = ompt_data_none; 3595 } 3596 #endif 3597 3598 // all other workers except primary thread should do this pop here 3599 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3600 if (__kmp_env_consistency_check) { 3601 if (retval == 0) { 3602 __kmp_pop_sync(global_tid, ct_reduce, loc); 3603 } 3604 } 3605 3606 } else { 3607 3608 // should never reach this block 3609 KMP_ASSERT(0); // "unexpected method" 3610 } 3611 if (teams_swapped) { 3612 __kmp_restore_swapped_teams(th, team, task_state); 3613 } 3614 KA_TRACE( 3615 10, 3616 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3617 global_tid, packed_reduction_method, retval)); 3618 3619 return retval; 3620 } 3621 3622 /*! 3623 @ingroup SYNCHRONIZATION 3624 @param loc source location information 3625 @param global_tid global thread id. 3626 @param lck pointer to the unique lock data structure 3627 3628 Finish the execution of a reduce nowait. 3629 */ 3630 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3631 kmp_critical_name *lck) { 3632 3633 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3634 3635 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3636 __kmp_assert_valid_gtid(global_tid); 3637 3638 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3639 3640 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3641 3642 if (packed_reduction_method == critical_reduce_block) { 3643 3644 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3645 OMPT_REDUCTION_END; 3646 3647 } else if (packed_reduction_method == empty_reduce_block) { 3648 3649 // usage: if team size == 1, no synchronization is required ( on Intel 3650 // platforms only ) 3651 3652 OMPT_REDUCTION_END; 3653 3654 } else if (packed_reduction_method == atomic_reduce_block) { 3655 3656 // neither primary thread nor other workers should get here 3657 // (code gen does not generate this call in case 2: atomic reduce block) 3658 // actually it's better to remove this elseif at all; 3659 // after removal this value will checked by the 'else' and will assert 3660 3661 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3662 tree_reduce_block)) { 3663 3664 // only primary thread gets here 3665 // OMPT: tree reduction is annotated in the barrier code 3666 3667 } else { 3668 3669 // should never reach this block 3670 KMP_ASSERT(0); // "unexpected method" 3671 } 3672 3673 if (__kmp_env_consistency_check) 3674 __kmp_pop_sync(global_tid, ct_reduce, loc); 3675 3676 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3677 global_tid, packed_reduction_method)); 3678 3679 return; 3680 } 3681 3682 /* 2.a.ii. Reduce Block with a terminating barrier */ 3683 3684 /*! 3685 @ingroup SYNCHRONIZATION 3686 @param loc source location information 3687 @param global_tid global thread number 3688 @param num_vars number of items (variables) to be reduced 3689 @param reduce_size size of data in bytes to be reduced 3690 @param reduce_data pointer to data to be reduced 3691 @param reduce_func callback function providing reduction operation on two 3692 operands and returning result of reduction in lhs_data 3693 @param lck pointer to the unique lock data structure 3694 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3695 threads if atomic reduction needed 3696 3697 A blocking reduce that includes an implicit barrier. 3698 */ 3699 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3700 size_t reduce_size, void *reduce_data, 3701 void (*reduce_func)(void *lhs_data, void *rhs_data), 3702 kmp_critical_name *lck) { 3703 KMP_COUNT_BLOCK(REDUCE_wait); 3704 int retval = 0; 3705 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3706 kmp_info_t *th; 3707 kmp_team_t *team; 3708 int teams_swapped = 0, task_state; 3709 3710 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3711 __kmp_assert_valid_gtid(global_tid); 3712 3713 // why do we need this initialization here at all? 3714 // Reduction clause can not be a stand-alone directive. 3715 3716 // do not call __kmp_serial_initialize(), it will be called by 3717 // __kmp_parallel_initialize() if needed 3718 // possible detection of false-positive race by the threadchecker ??? 3719 if (!TCR_4(__kmp_init_parallel)) 3720 __kmp_parallel_initialize(); 3721 3722 __kmp_resume_if_soft_paused(); 3723 3724 // check correctness of reduce block nesting 3725 #if KMP_USE_DYNAMIC_LOCK 3726 if (__kmp_env_consistency_check) 3727 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3728 #else 3729 if (__kmp_env_consistency_check) 3730 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3731 #endif 3732 3733 th = __kmp_thread_from_gtid(global_tid); 3734 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3735 3736 packed_reduction_method = __kmp_determine_reduction_method( 3737 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3738 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3739 3740 OMPT_REDUCTION_DECL(th, global_tid); 3741 3742 if (packed_reduction_method == critical_reduce_block) { 3743 3744 OMPT_REDUCTION_BEGIN; 3745 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3746 retval = 1; 3747 3748 } else if (packed_reduction_method == empty_reduce_block) { 3749 3750 OMPT_REDUCTION_BEGIN; 3751 // usage: if team size == 1, no synchronization is required ( Intel 3752 // platforms only ) 3753 retval = 1; 3754 3755 } else if (packed_reduction_method == atomic_reduce_block) { 3756 3757 retval = 2; 3758 3759 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3760 tree_reduce_block)) { 3761 3762 // case tree_reduce_block: 3763 // this barrier should be visible to a customer and to the threading profile 3764 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3765 #if OMPT_SUPPORT 3766 ompt_frame_t *ompt_frame; 3767 if (ompt_enabled.enabled) { 3768 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3769 if (ompt_frame->enter_frame.ptr == NULL) 3770 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3771 } 3772 OMPT_STORE_RETURN_ADDRESS(global_tid); 3773 #endif 3774 #if USE_ITT_NOTIFY 3775 __kmp_threads[global_tid]->th.th_ident = 3776 loc; // needed for correct notification of frames 3777 #endif 3778 retval = 3779 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3780 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3781 retval = (retval != 0) ? (0) : (1); 3782 #if OMPT_SUPPORT && OMPT_OPTIONAL 3783 if (ompt_enabled.enabled) { 3784 ompt_frame->enter_frame = ompt_data_none; 3785 } 3786 #endif 3787 3788 // all other workers except primary thread should do this pop here 3789 // (none of other workers except primary will enter __kmpc_end_reduce()) 3790 if (__kmp_env_consistency_check) { 3791 if (retval == 0) { // 0: all other workers; 1: primary thread 3792 __kmp_pop_sync(global_tid, ct_reduce, loc); 3793 } 3794 } 3795 3796 } else { 3797 3798 // should never reach this block 3799 KMP_ASSERT(0); // "unexpected method" 3800 } 3801 if (teams_swapped) { 3802 __kmp_restore_swapped_teams(th, team, task_state); 3803 } 3804 3805 KA_TRACE(10, 3806 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3807 global_tid, packed_reduction_method, retval)); 3808 return retval; 3809 } 3810 3811 /*! 3812 @ingroup SYNCHRONIZATION 3813 @param loc source location information 3814 @param global_tid global thread id. 3815 @param lck pointer to the unique lock data structure 3816 3817 Finish the execution of a blocking reduce. 3818 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3819 start function. 3820 */ 3821 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3822 kmp_critical_name *lck) { 3823 3824 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3825 kmp_info_t *th; 3826 kmp_team_t *team; 3827 int teams_swapped = 0, task_state; 3828 3829 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3830 __kmp_assert_valid_gtid(global_tid); 3831 3832 th = __kmp_thread_from_gtid(global_tid); 3833 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3834 3835 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3836 3837 // this barrier should be visible to a customer and to the threading profile 3838 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3839 OMPT_REDUCTION_DECL(th, global_tid); 3840 3841 if (packed_reduction_method == critical_reduce_block) { 3842 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3843 3844 OMPT_REDUCTION_END; 3845 3846 // TODO: implicit barrier: should be exposed 3847 #if OMPT_SUPPORT 3848 ompt_frame_t *ompt_frame; 3849 if (ompt_enabled.enabled) { 3850 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3851 if (ompt_frame->enter_frame.ptr == NULL) 3852 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3853 } 3854 OMPT_STORE_RETURN_ADDRESS(global_tid); 3855 #endif 3856 #if USE_ITT_NOTIFY 3857 __kmp_threads[global_tid]->th.th_ident = loc; 3858 #endif 3859 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3860 #if OMPT_SUPPORT && OMPT_OPTIONAL 3861 if (ompt_enabled.enabled) { 3862 ompt_frame->enter_frame = ompt_data_none; 3863 } 3864 #endif 3865 3866 } else if (packed_reduction_method == empty_reduce_block) { 3867 3868 OMPT_REDUCTION_END; 3869 3870 // usage: if team size==1, no synchronization is required (Intel platforms only) 3871 3872 // TODO: implicit barrier: should be exposed 3873 #if OMPT_SUPPORT 3874 ompt_frame_t *ompt_frame; 3875 if (ompt_enabled.enabled) { 3876 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3877 if (ompt_frame->enter_frame.ptr == NULL) 3878 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3879 } 3880 OMPT_STORE_RETURN_ADDRESS(global_tid); 3881 #endif 3882 #if USE_ITT_NOTIFY 3883 __kmp_threads[global_tid]->th.th_ident = loc; 3884 #endif 3885 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3886 #if OMPT_SUPPORT && OMPT_OPTIONAL 3887 if (ompt_enabled.enabled) { 3888 ompt_frame->enter_frame = ompt_data_none; 3889 } 3890 #endif 3891 3892 } else if (packed_reduction_method == atomic_reduce_block) { 3893 3894 #if OMPT_SUPPORT 3895 ompt_frame_t *ompt_frame; 3896 if (ompt_enabled.enabled) { 3897 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3898 if (ompt_frame->enter_frame.ptr == NULL) 3899 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3900 } 3901 OMPT_STORE_RETURN_ADDRESS(global_tid); 3902 #endif 3903 // TODO: implicit barrier: should be exposed 3904 #if USE_ITT_NOTIFY 3905 __kmp_threads[global_tid]->th.th_ident = loc; 3906 #endif 3907 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3908 #if OMPT_SUPPORT && OMPT_OPTIONAL 3909 if (ompt_enabled.enabled) { 3910 ompt_frame->enter_frame = ompt_data_none; 3911 } 3912 #endif 3913 3914 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3915 tree_reduce_block)) { 3916 3917 // only primary thread executes here (primary releases all other workers) 3918 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3919 global_tid); 3920 3921 } else { 3922 3923 // should never reach this block 3924 KMP_ASSERT(0); // "unexpected method" 3925 } 3926 if (teams_swapped) { 3927 __kmp_restore_swapped_teams(th, team, task_state); 3928 } 3929 3930 if (__kmp_env_consistency_check) 3931 __kmp_pop_sync(global_tid, ct_reduce, loc); 3932 3933 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3934 global_tid, packed_reduction_method)); 3935 3936 return; 3937 } 3938 3939 #undef __KMP_GET_REDUCTION_METHOD 3940 #undef __KMP_SET_REDUCTION_METHOD 3941 3942 /* end of interface to fast scalable reduce routines */ 3943 3944 kmp_uint64 __kmpc_get_taskid() { 3945 3946 kmp_int32 gtid; 3947 kmp_info_t *thread; 3948 3949 gtid = __kmp_get_gtid(); 3950 if (gtid < 0) { 3951 return 0; 3952 } 3953 thread = __kmp_thread_from_gtid(gtid); 3954 return thread->th.th_current_task->td_task_id; 3955 3956 } // __kmpc_get_taskid 3957 3958 kmp_uint64 __kmpc_get_parent_taskid() { 3959 3960 kmp_int32 gtid; 3961 kmp_info_t *thread; 3962 kmp_taskdata_t *parent_task; 3963 3964 gtid = __kmp_get_gtid(); 3965 if (gtid < 0) { 3966 return 0; 3967 } 3968 thread = __kmp_thread_from_gtid(gtid); 3969 parent_task = thread->th.th_current_task->td_parent; 3970 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3971 3972 } // __kmpc_get_parent_taskid 3973 3974 /*! 3975 @ingroup WORK_SHARING 3976 @param loc source location information. 3977 @param gtid global thread number. 3978 @param num_dims number of associated doacross loops. 3979 @param dims info on loops bounds. 3980 3981 Initialize doacross loop information. 3982 Expect compiler send us inclusive bounds, 3983 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3984 */ 3985 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3986 const struct kmp_dim *dims) { 3987 __kmp_assert_valid_gtid(gtid); 3988 int j, idx; 3989 kmp_int64 last, trace_count; 3990 kmp_info_t *th = __kmp_threads[gtid]; 3991 kmp_team_t *team = th->th.th_team; 3992 kmp_uint32 *flags; 3993 kmp_disp_t *pr_buf = th->th.th_dispatch; 3994 dispatch_shared_info_t *sh_buf; 3995 3996 KA_TRACE( 3997 20, 3998 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3999 gtid, num_dims, !team->t.t_serialized)); 4000 KMP_DEBUG_ASSERT(dims != NULL); 4001 KMP_DEBUG_ASSERT(num_dims > 0); 4002 4003 if (team->t.t_serialized) { 4004 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4005 return; // no dependencies if team is serialized 4006 } 4007 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4008 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4009 // the next loop 4010 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4011 4012 // Save bounds info into allocated private buffer 4013 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4014 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4015 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4016 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4017 pr_buf->th_doacross_info[0] = 4018 (kmp_int64)num_dims; // first element is number of dimensions 4019 // Save also address of num_done in order to access it later without knowing 4020 // the buffer index 4021 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4022 pr_buf->th_doacross_info[2] = dims[0].lo; 4023 pr_buf->th_doacross_info[3] = dims[0].up; 4024 pr_buf->th_doacross_info[4] = dims[0].st; 4025 last = 5; 4026 for (j = 1; j < num_dims; ++j) { 4027 kmp_int64 4028 range_length; // To keep ranges of all dimensions but the first dims[0] 4029 if (dims[j].st == 1) { // most common case 4030 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4031 range_length = dims[j].up - dims[j].lo + 1; 4032 } else { 4033 if (dims[j].st > 0) { 4034 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4035 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4036 } else { // negative increment 4037 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4038 range_length = 4039 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4040 } 4041 } 4042 pr_buf->th_doacross_info[last++] = range_length; 4043 pr_buf->th_doacross_info[last++] = dims[j].lo; 4044 pr_buf->th_doacross_info[last++] = dims[j].up; 4045 pr_buf->th_doacross_info[last++] = dims[j].st; 4046 } 4047 4048 // Compute total trip count. 4049 // Start with range of dims[0] which we don't need to keep in the buffer. 4050 if (dims[0].st == 1) { // most common case 4051 trace_count = dims[0].up - dims[0].lo + 1; 4052 } else if (dims[0].st > 0) { 4053 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4054 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4055 } else { // negative increment 4056 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4057 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4058 } 4059 for (j = 1; j < num_dims; ++j) { 4060 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4061 } 4062 KMP_DEBUG_ASSERT(trace_count > 0); 4063 4064 // Check if shared buffer is not occupied by other loop (idx - 4065 // __kmp_dispatch_num_buffers) 4066 if (idx != sh_buf->doacross_buf_idx) { 4067 // Shared buffer is occupied, wait for it to be free 4068 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4069 __kmp_eq_4, NULL); 4070 } 4071 #if KMP_32_BIT_ARCH 4072 // Check if we are the first thread. After the CAS the first thread gets 0, 4073 // others get 1 if initialization is in progress, allocated pointer otherwise. 4074 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4075 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4076 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4077 #else 4078 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4079 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4080 #endif 4081 if (flags == NULL) { 4082 // we are the first thread, allocate the array of flags 4083 size_t size = 4084 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4085 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4086 KMP_MB(); 4087 sh_buf->doacross_flags = flags; 4088 } else if (flags == (kmp_uint32 *)1) { 4089 #if KMP_32_BIT_ARCH 4090 // initialization is still in progress, need to wait 4091 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4092 #else 4093 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4094 #endif 4095 KMP_YIELD(TRUE); 4096 KMP_MB(); 4097 } else { 4098 KMP_MB(); 4099 } 4100 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4101 pr_buf->th_doacross_flags = 4102 sh_buf->doacross_flags; // save private copy in order to not 4103 // touch shared buffer on each iteration 4104 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4105 } 4106 4107 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4108 __kmp_assert_valid_gtid(gtid); 4109 kmp_int64 shft; 4110 size_t num_dims, i; 4111 kmp_uint32 flag; 4112 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4113 kmp_info_t *th = __kmp_threads[gtid]; 4114 kmp_team_t *team = th->th.th_team; 4115 kmp_disp_t *pr_buf; 4116 kmp_int64 lo, up, st; 4117 4118 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4119 if (team->t.t_serialized) { 4120 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4121 return; // no dependencies if team is serialized 4122 } 4123 4124 // calculate sequential iteration number and check out-of-bounds condition 4125 pr_buf = th->th.th_dispatch; 4126 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4127 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4128 lo = pr_buf->th_doacross_info[2]; 4129 up = pr_buf->th_doacross_info[3]; 4130 st = pr_buf->th_doacross_info[4]; 4131 #if OMPT_SUPPORT && OMPT_OPTIONAL 4132 ompt_dependence_t deps[num_dims]; 4133 #endif 4134 if (st == 1) { // most common case 4135 if (vec[0] < lo || vec[0] > up) { 4136 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4137 "bounds [%lld,%lld]\n", 4138 gtid, vec[0], lo, up)); 4139 return; 4140 } 4141 iter_number = vec[0] - lo; 4142 } else if (st > 0) { 4143 if (vec[0] < lo || vec[0] > up) { 4144 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4145 "bounds [%lld,%lld]\n", 4146 gtid, vec[0], lo, up)); 4147 return; 4148 } 4149 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4150 } else { // negative increment 4151 if (vec[0] > lo || vec[0] < up) { 4152 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4153 "bounds [%lld,%lld]\n", 4154 gtid, vec[0], lo, up)); 4155 return; 4156 } 4157 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4158 } 4159 #if OMPT_SUPPORT && OMPT_OPTIONAL 4160 deps[0].variable.value = iter_number; 4161 deps[0].dependence_type = ompt_dependence_type_sink; 4162 #endif 4163 for (i = 1; i < num_dims; ++i) { 4164 kmp_int64 iter, ln; 4165 size_t j = i * 4; 4166 ln = pr_buf->th_doacross_info[j + 1]; 4167 lo = pr_buf->th_doacross_info[j + 2]; 4168 up = pr_buf->th_doacross_info[j + 3]; 4169 st = pr_buf->th_doacross_info[j + 4]; 4170 if (st == 1) { 4171 if (vec[i] < lo || vec[i] > up) { 4172 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4173 "bounds [%lld,%lld]\n", 4174 gtid, vec[i], lo, up)); 4175 return; 4176 } 4177 iter = vec[i] - lo; 4178 } else if (st > 0) { 4179 if (vec[i] < lo || vec[i] > up) { 4180 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4181 "bounds [%lld,%lld]\n", 4182 gtid, vec[i], lo, up)); 4183 return; 4184 } 4185 iter = (kmp_uint64)(vec[i] - lo) / st; 4186 } else { // st < 0 4187 if (vec[i] > lo || vec[i] < up) { 4188 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4189 "bounds [%lld,%lld]\n", 4190 gtid, vec[i], lo, up)); 4191 return; 4192 } 4193 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4194 } 4195 iter_number = iter + ln * iter_number; 4196 #if OMPT_SUPPORT && OMPT_OPTIONAL 4197 deps[i].variable.value = iter; 4198 deps[i].dependence_type = ompt_dependence_type_sink; 4199 #endif 4200 } 4201 shft = iter_number % 32; // use 32-bit granularity 4202 iter_number >>= 5; // divided by 32 4203 flag = 1 << shft; 4204 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4205 KMP_YIELD(TRUE); 4206 } 4207 KMP_MB(); 4208 #if OMPT_SUPPORT && OMPT_OPTIONAL 4209 if (ompt_enabled.ompt_callback_dependences) { 4210 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4211 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4212 } 4213 #endif 4214 KA_TRACE(20, 4215 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4216 gtid, (iter_number << 5) + shft)); 4217 } 4218 4219 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4220 __kmp_assert_valid_gtid(gtid); 4221 kmp_int64 shft; 4222 size_t num_dims, i; 4223 kmp_uint32 flag; 4224 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4225 kmp_info_t *th = __kmp_threads[gtid]; 4226 kmp_team_t *team = th->th.th_team; 4227 kmp_disp_t *pr_buf; 4228 kmp_int64 lo, st; 4229 4230 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4231 if (team->t.t_serialized) { 4232 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4233 return; // no dependencies if team is serialized 4234 } 4235 4236 // calculate sequential iteration number (same as in "wait" but no 4237 // out-of-bounds checks) 4238 pr_buf = th->th.th_dispatch; 4239 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4240 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4241 lo = pr_buf->th_doacross_info[2]; 4242 st = pr_buf->th_doacross_info[4]; 4243 #if OMPT_SUPPORT && OMPT_OPTIONAL 4244 ompt_dependence_t deps[num_dims]; 4245 #endif 4246 if (st == 1) { // most common case 4247 iter_number = vec[0] - lo; 4248 } else if (st > 0) { 4249 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4250 } else { // negative increment 4251 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4252 } 4253 #if OMPT_SUPPORT && OMPT_OPTIONAL 4254 deps[0].variable.value = iter_number; 4255 deps[0].dependence_type = ompt_dependence_type_source; 4256 #endif 4257 for (i = 1; i < num_dims; ++i) { 4258 kmp_int64 iter, ln; 4259 size_t j = i * 4; 4260 ln = pr_buf->th_doacross_info[j + 1]; 4261 lo = pr_buf->th_doacross_info[j + 2]; 4262 st = pr_buf->th_doacross_info[j + 4]; 4263 if (st == 1) { 4264 iter = vec[i] - lo; 4265 } else if (st > 0) { 4266 iter = (kmp_uint64)(vec[i] - lo) / st; 4267 } else { // st < 0 4268 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4269 } 4270 iter_number = iter + ln * iter_number; 4271 #if OMPT_SUPPORT && OMPT_OPTIONAL 4272 deps[i].variable.value = iter; 4273 deps[i].dependence_type = ompt_dependence_type_source; 4274 #endif 4275 } 4276 #if OMPT_SUPPORT && OMPT_OPTIONAL 4277 if (ompt_enabled.ompt_callback_dependences) { 4278 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4279 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4280 } 4281 #endif 4282 shft = iter_number % 32; // use 32-bit granularity 4283 iter_number >>= 5; // divided by 32 4284 flag = 1 << shft; 4285 KMP_MB(); 4286 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4287 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4288 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4289 (iter_number << 5) + shft)); 4290 } 4291 4292 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4293 __kmp_assert_valid_gtid(gtid); 4294 kmp_int32 num_done; 4295 kmp_info_t *th = __kmp_threads[gtid]; 4296 kmp_team_t *team = th->th.th_team; 4297 kmp_disp_t *pr_buf = th->th.th_dispatch; 4298 4299 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4300 if (team->t.t_serialized) { 4301 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4302 return; // nothing to do 4303 } 4304 num_done = 4305 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4306 if (num_done == th->th.th_team_nproc) { 4307 // we are the last thread, need to free shared resources 4308 int idx = pr_buf->th_doacross_buf_idx - 1; 4309 dispatch_shared_info_t *sh_buf = 4310 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4311 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4312 (kmp_int64)&sh_buf->doacross_num_done); 4313 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4314 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4315 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4316 sh_buf->doacross_flags = NULL; 4317 sh_buf->doacross_num_done = 0; 4318 sh_buf->doacross_buf_idx += 4319 __kmp_dispatch_num_buffers; // free buffer for future re-use 4320 } 4321 // free private resources (need to keep buffer index forever) 4322 pr_buf->th_doacross_flags = NULL; 4323 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4324 pr_buf->th_doacross_info = NULL; 4325 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4326 } 4327 4328 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4329 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4330 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4331 } 4332 4333 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4334 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4335 } 4336 4337 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4338 omp_allocator_handle_t free_allocator) { 4339 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4340 free_allocator); 4341 } 4342 4343 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4344 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4345 } 4346 4347 int __kmpc_get_target_offload(void) { 4348 if (!__kmp_init_serial) { 4349 __kmp_serial_initialize(); 4350 } 4351 return __kmp_target_offload; 4352 } 4353 4354 int __kmpc_pause_resource(kmp_pause_status_t level) { 4355 if (!__kmp_init_serial) { 4356 return 1; // Can't pause if runtime is not initialized 4357 } 4358 return __kmp_pause_resource(level); 4359 } 4360 4361 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4362 if (!__kmp_init_serial) 4363 __kmp_serial_initialize(); 4364 4365 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4366 4367 #if OMPT_SUPPORT 4368 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4369 ompt_callbacks.ompt_callback(ompt_callback_error)( 4370 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4371 OMPT_GET_RETURN_ADDRESS(0)); 4372 } 4373 #endif // OMPT_SUPPORT 4374 4375 char *src_loc; 4376 if (loc && loc->psource) { 4377 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4378 src_loc = 4379 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4380 __kmp_str_loc_free(&str_loc); 4381 } else { 4382 src_loc = __kmp_str_format("unknown"); 4383 } 4384 4385 if (severity == severity_warning) 4386 KMP_WARNING(UserDirectedWarning, src_loc, message); 4387 else 4388 KMP_FATAL(UserDirectedError, src_loc, message); 4389 4390 __kmp_str_free(&src_loc); 4391 } 4392