1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the primary thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param global_tid global thread number 358 @param num_teams_lo lower bound on number of teams requested for the teams 359 construct 360 @param num_teams_up upper bound on number of teams requested for the teams 361 construct 362 @param num_threads number of threads per team requested for the teams construct 363 364 Set the number of teams to be used by the teams construct. The number of initial 365 teams cretaed will be greater than or equal to the lower bound and less than or 366 equal to the upper bound. 367 This call is only required if the teams construct has a `num_teams` clause 368 or a `thread_limit` clause (or both). 369 */ 370 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 371 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 372 kmp_int32 num_threads) { 373 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 374 " num_teams_ub=%d num_threads=%d\n", 375 global_tid, num_teams_lb, num_teams_ub, num_threads)); 376 __kmp_assert_valid_gtid(global_tid); 377 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 378 num_threads); 379 } 380 381 /*! 382 @ingroup PARALLEL 383 @param loc source location information 384 @param argc total number of arguments in the ellipsis 385 @param microtask pointer to callback routine consisting of outlined teams 386 construct 387 @param ... pointers to shared variables that aren't global 388 389 Do the actual fork and call the microtask in the relevant number of threads. 390 */ 391 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 392 ...) { 393 int gtid = __kmp_entry_gtid(); 394 kmp_info_t *this_thr = __kmp_threads[gtid]; 395 va_list ap; 396 va_start(ap, microtask); 397 398 #if KMP_STATS_ENABLED 399 KMP_COUNT_BLOCK(OMP_TEAMS); 400 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 401 if (previous_state == stats_state_e::SERIAL_REGION) { 402 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 403 } else { 404 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 405 } 406 #endif 407 408 // remember teams entry point and nesting level 409 this_thr->th.th_teams_microtask = microtask; 410 this_thr->th.th_teams_level = 411 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 412 413 #if OMPT_SUPPORT 414 kmp_team_t *parent_team = this_thr->th.th_team; 415 int tid = __kmp_tid_from_gtid(gtid); 416 if (ompt_enabled.enabled) { 417 parent_team->t.t_implicit_task_taskdata[tid] 418 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 419 } 420 OMPT_STORE_RETURN_ADDRESS(gtid); 421 #endif 422 423 // check if __kmpc_push_num_teams called, set default number of teams 424 // otherwise 425 if (this_thr->th.th_teams_size.nteams == 0) { 426 __kmp_push_num_teams(loc, gtid, 0, 0); 427 } 428 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 431 432 __kmp_fork_call( 433 loc, gtid, fork_context_intel, argc, 434 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 435 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 436 __kmp_join_call(loc, gtid 437 #if OMPT_SUPPORT 438 , 439 fork_context_intel 440 #endif 441 ); 442 443 // Pop current CG root off list 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 446 this_thr->th.th_cg_roots = tmp->up; 447 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 448 " to node %p. cg_nthreads was %d\n", 449 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 450 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 451 int i = tmp->cg_nthreads--; 452 if (i == 1) { // check is we are the last thread in CG (not always the case) 453 __kmp_free(tmp); 454 } 455 // Restore current task's thread_limit from CG root 456 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 457 this_thr->th.th_current_task->td_icvs.thread_limit = 458 this_thr->th.th_cg_roots->cg_thread_limit; 459 460 this_thr->th.th_teams_microtask = NULL; 461 this_thr->th.th_teams_level = 0; 462 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 463 va_end(ap); 464 #if KMP_STATS_ENABLED 465 if (previous_state == stats_state_e::SERIAL_REGION) { 466 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 467 KMP_SET_THREAD_STATE(previous_state); 468 } else { 469 KMP_POP_PARTITIONED_TIMER(); 470 } 471 #endif // KMP_STATS_ENABLED 472 } 473 474 // I don't think this function should ever have been exported. 475 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 476 // openmp code ever called it, but it's been exported from the RTL for so 477 // long that I'm afraid to remove the definition. 478 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 479 480 /*! 481 @ingroup PARALLEL 482 @param loc source location information 483 @param global_tid global thread number 484 485 Enter a serialized parallel construct. This interface is used to handle a 486 conditional parallel region, like this, 487 @code 488 #pragma omp parallel if (condition) 489 @endcode 490 when the condition is false. 491 */ 492 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 493 // The implementation is now in kmp_runtime.cpp so that it can share static 494 // functions with kmp_fork_call since the tasks to be done are similar in 495 // each case. 496 __kmp_assert_valid_gtid(global_tid); 497 #if OMPT_SUPPORT 498 OMPT_STORE_RETURN_ADDRESS(global_tid); 499 #endif 500 __kmp_serialized_parallel(loc, global_tid); 501 } 502 503 /*! 504 @ingroup PARALLEL 505 @param loc source location information 506 @param global_tid global thread number 507 508 Leave a serialized parallel construct. 509 */ 510 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 511 kmp_internal_control_t *top; 512 kmp_info_t *this_thr; 513 kmp_team_t *serial_team; 514 515 KC_TRACE(10, 516 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 517 518 /* skip all this code for autopar serialized loops since it results in 519 unacceptable overhead */ 520 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 521 return; 522 523 // Not autopar code 524 __kmp_assert_valid_gtid(global_tid); 525 if (!TCR_4(__kmp_init_parallel)) 526 __kmp_parallel_initialize(); 527 528 __kmp_resume_if_soft_paused(); 529 530 this_thr = __kmp_threads[global_tid]; 531 serial_team = this_thr->th.th_serial_team; 532 533 kmp_task_team_t *task_team = this_thr->th.th_task_team; 534 // we need to wait for the proxy tasks before finishing the thread 535 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 // if( serial_team -> t.t_serialized > 1 ) 581 serial_team->t.t_level--; 582 583 /* pop dispatch buffers stack */ 584 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 585 { 586 dispatch_private_info_t *disp_buffer = 587 serial_team->t.t_dispatch->th_disp_buffer; 588 serial_team->t.t_dispatch->th_disp_buffer = 589 serial_team->t.t_dispatch->th_disp_buffer->next; 590 __kmp_free(disp_buffer); 591 } 592 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 593 594 --serial_team->t.t_serialized; 595 if (serial_team->t.t_serialized == 0) { 596 597 /* return to the parallel section */ 598 599 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 600 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 601 __kmp_clear_x87_fpu_status_word(); 602 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 603 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 604 } 605 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 606 607 #if OMPD_SUPPORT 608 if (ompd_state & OMPD_ENABLE_BP) 609 ompd_bp_parallel_end(); 610 #endif 611 612 this_thr->th.th_team = serial_team->t.t_parent; 613 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 614 615 /* restore values cached in the thread */ 616 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 617 this_thr->th.th_team_master = 618 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 619 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 620 621 /* TODO the below shouldn't need to be adjusted for serialized teams */ 622 this_thr->th.th_dispatch = 623 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 624 625 __kmp_pop_current_task_from_thread(this_thr); 626 627 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 628 this_thr->th.th_current_task->td_flags.executing = 1; 629 630 if (__kmp_tasking_mode != tskm_immediate_exec) { 631 // Copy the task team from the new child / old parent team to the thread. 632 this_thr->th.th_task_team = 633 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 634 KA_TRACE(20, 635 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 636 "team %p\n", 637 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 638 } 639 } else { 640 if (__kmp_tasking_mode != tskm_immediate_exec) { 641 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 642 "depth of serial team %p to %d\n", 643 global_tid, serial_team, serial_team->t.t_serialized)); 644 } 645 } 646 647 if (__kmp_env_consistency_check) 648 __kmp_pop_parallel(global_tid, NULL); 649 #if OMPT_SUPPORT 650 if (ompt_enabled.enabled) 651 this_thr->th.ompt_thread_info.state = 652 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 653 : ompt_state_work_parallel); 654 #endif 655 } 656 657 /*! 658 @ingroup SYNCHRONIZATION 659 @param loc source location information. 660 661 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 662 depending on the memory ordering convention obeyed by the compiler 663 even that may not be necessary). 664 */ 665 void __kmpc_flush(ident_t *loc) { 666 KC_TRACE(10, ("__kmpc_flush: called\n")); 667 668 /* need explicit __mf() here since use volatile instead in library */ 669 KMP_MB(); /* Flush all pending memory write invalidates. */ 670 671 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 672 #if KMP_MIC 673 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 674 // We shouldn't need it, though, since the ABI rules require that 675 // * If the compiler generates NGO stores it also generates the fence 676 // * If users hand-code NGO stores they should insert the fence 677 // therefore no incomplete unordered stores should be visible. 678 #else 679 // C74404 680 // This is to address non-temporal store instructions (sfence needed). 681 // The clflush instruction is addressed either (mfence needed). 682 // Probably the non-temporal load monvtdqa instruction should also be 683 // addressed. 684 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 685 if (!__kmp_cpuinfo.initialized) { 686 __kmp_query_cpuid(&__kmp_cpuinfo); 687 } 688 if (!__kmp_cpuinfo.sse2) { 689 // CPU cannot execute SSE2 instructions. 690 } else { 691 #if KMP_COMPILER_ICC 692 _mm_mfence(); 693 #elif KMP_COMPILER_MSVC 694 MemoryBarrier(); 695 #else 696 __sync_synchronize(); 697 #endif // KMP_COMPILER_ICC 698 } 699 #endif // KMP_MIC 700 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 701 KMP_ARCH_RISCV64) 702 // Nothing to see here move along 703 #elif KMP_ARCH_PPC64 704 // Nothing needed here (we have a real MB above). 705 #else 706 #error Unknown or unsupported architecture 707 #endif 708 709 #if OMPT_SUPPORT && OMPT_OPTIONAL 710 if (ompt_enabled.ompt_callback_flush) { 711 ompt_callbacks.ompt_callback(ompt_callback_flush)( 712 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 713 } 714 #endif 715 } 716 717 /* -------------------------------------------------------------------------- */ 718 /*! 719 @ingroup SYNCHRONIZATION 720 @param loc source location information 721 @param global_tid thread id. 722 723 Execute a barrier. 724 */ 725 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 726 KMP_COUNT_BLOCK(OMP_BARRIER); 727 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 728 __kmp_assert_valid_gtid(global_tid); 729 730 if (!TCR_4(__kmp_init_parallel)) 731 __kmp_parallel_initialize(); 732 733 __kmp_resume_if_soft_paused(); 734 735 if (__kmp_env_consistency_check) { 736 if (loc == 0) { 737 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 738 } 739 __kmp_check_barrier(global_tid, ct_barrier, loc); 740 } 741 742 #if OMPT_SUPPORT 743 ompt_frame_t *ompt_frame; 744 if (ompt_enabled.enabled) { 745 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 746 if (ompt_frame->enter_frame.ptr == NULL) 747 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 748 } 749 OMPT_STORE_RETURN_ADDRESS(global_tid); 750 #endif 751 __kmp_threads[global_tid]->th.th_ident = loc; 752 // TODO: explicit barrier_wait_id: 753 // this function is called when 'barrier' directive is present or 754 // implicit barrier at the end of a worksharing construct. 755 // 1) better to add a per-thread barrier counter to a thread data structure 756 // 2) set to 0 when a new team is created 757 // 4) no sync is required 758 759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 760 #if OMPT_SUPPORT && OMPT_OPTIONAL 761 if (ompt_enabled.enabled) { 762 ompt_frame->enter_frame = ompt_data_none; 763 } 764 #endif 765 } 766 767 /* The BARRIER for a MASTER section is always explicit */ 768 /*! 769 @ingroup WORK_SHARING 770 @param loc source location information. 771 @param global_tid global thread number . 772 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 773 */ 774 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 775 int status = 0; 776 777 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 778 __kmp_assert_valid_gtid(global_tid); 779 780 if (!TCR_4(__kmp_init_parallel)) 781 __kmp_parallel_initialize(); 782 783 __kmp_resume_if_soft_paused(); 784 785 if (KMP_MASTER_GTID(global_tid)) { 786 KMP_COUNT_BLOCK(OMP_MASTER); 787 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 788 status = 1; 789 } 790 791 #if OMPT_SUPPORT && OMPT_OPTIONAL 792 if (status) { 793 if (ompt_enabled.ompt_callback_masked) { 794 kmp_info_t *this_thr = __kmp_threads[global_tid]; 795 kmp_team_t *team = this_thr->th.th_team; 796 797 int tid = __kmp_tid_from_gtid(global_tid); 798 ompt_callbacks.ompt_callback(ompt_callback_masked)( 799 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 800 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 801 OMPT_GET_RETURN_ADDRESS(0)); 802 } 803 } 804 #endif 805 806 if (__kmp_env_consistency_check) { 807 #if KMP_USE_DYNAMIC_LOCK 808 if (status) 809 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 810 else 811 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 812 #else 813 if (status) 814 __kmp_push_sync(global_tid, ct_master, loc, NULL); 815 else 816 __kmp_check_sync(global_tid, ct_master, loc, NULL); 817 #endif 818 } 819 820 return status; 821 } 822 823 /*! 824 @ingroup WORK_SHARING 825 @param loc source location information. 826 @param global_tid global thread number . 827 828 Mark the end of a <tt>master</tt> region. This should only be called by the 829 thread that executes the <tt>master</tt> region. 830 */ 831 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 832 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 833 __kmp_assert_valid_gtid(global_tid); 834 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 835 KMP_POP_PARTITIONED_TIMER(); 836 837 #if OMPT_SUPPORT && OMPT_OPTIONAL 838 kmp_info_t *this_thr = __kmp_threads[global_tid]; 839 kmp_team_t *team = this_thr->th.th_team; 840 if (ompt_enabled.ompt_callback_masked) { 841 int tid = __kmp_tid_from_gtid(global_tid); 842 ompt_callbacks.ompt_callback(ompt_callback_masked)( 843 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 844 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 845 OMPT_GET_RETURN_ADDRESS(0)); 846 } 847 #endif 848 849 if (__kmp_env_consistency_check) { 850 if (KMP_MASTER_GTID(global_tid)) 851 __kmp_pop_sync(global_tid, ct_master, loc); 852 } 853 } 854 855 /*! 856 @ingroup WORK_SHARING 857 @param loc source location information. 858 @param global_tid global thread number. 859 @param filter result of evaluating filter clause on thread global_tid, or zero 860 if no filter clause present 861 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 862 */ 863 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 864 int status = 0; 865 int tid; 866 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 867 __kmp_assert_valid_gtid(global_tid); 868 869 if (!TCR_4(__kmp_init_parallel)) 870 __kmp_parallel_initialize(); 871 872 __kmp_resume_if_soft_paused(); 873 874 tid = __kmp_tid_from_gtid(global_tid); 875 if (tid == filter) { 876 KMP_COUNT_BLOCK(OMP_MASKED); 877 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 878 status = 1; 879 } 880 881 #if OMPT_SUPPORT && OMPT_OPTIONAL 882 if (status) { 883 if (ompt_enabled.ompt_callback_masked) { 884 kmp_info_t *this_thr = __kmp_threads[global_tid]; 885 kmp_team_t *team = this_thr->th.th_team; 886 ompt_callbacks.ompt_callback(ompt_callback_masked)( 887 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 888 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 889 OMPT_GET_RETURN_ADDRESS(0)); 890 } 891 } 892 #endif 893 894 if (__kmp_env_consistency_check) { 895 #if KMP_USE_DYNAMIC_LOCK 896 if (status) 897 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 898 else 899 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 900 #else 901 if (status) 902 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 903 else 904 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 905 #endif 906 } 907 908 return status; 909 } 910 911 /*! 912 @ingroup WORK_SHARING 913 @param loc source location information. 914 @param global_tid global thread number . 915 916 Mark the end of a <tt>masked</tt> region. This should only be called by the 917 thread that executes the <tt>masked</tt> region. 918 */ 919 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 920 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 921 __kmp_assert_valid_gtid(global_tid); 922 KMP_POP_PARTITIONED_TIMER(); 923 924 #if OMPT_SUPPORT && OMPT_OPTIONAL 925 kmp_info_t *this_thr = __kmp_threads[global_tid]; 926 kmp_team_t *team = this_thr->th.th_team; 927 if (ompt_enabled.ompt_callback_masked) { 928 int tid = __kmp_tid_from_gtid(global_tid); 929 ompt_callbacks.ompt_callback(ompt_callback_masked)( 930 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 932 OMPT_GET_RETURN_ADDRESS(0)); 933 } 934 #endif 935 936 if (__kmp_env_consistency_check) { 937 __kmp_pop_sync(global_tid, ct_masked, loc); 938 } 939 } 940 941 /*! 942 @ingroup WORK_SHARING 943 @param loc source location information. 944 @param gtid global thread number. 945 946 Start execution of an <tt>ordered</tt> construct. 947 */ 948 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 949 int cid = 0; 950 kmp_info_t *th; 951 KMP_DEBUG_ASSERT(__kmp_init_serial); 952 953 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 954 __kmp_assert_valid_gtid(gtid); 955 956 if (!TCR_4(__kmp_init_parallel)) 957 __kmp_parallel_initialize(); 958 959 __kmp_resume_if_soft_paused(); 960 961 #if USE_ITT_BUILD 962 __kmp_itt_ordered_prep(gtid); 963 // TODO: ordered_wait_id 964 #endif /* USE_ITT_BUILD */ 965 966 th = __kmp_threads[gtid]; 967 968 #if OMPT_SUPPORT && OMPT_OPTIONAL 969 kmp_team_t *team; 970 ompt_wait_id_t lck; 971 void *codeptr_ra; 972 OMPT_STORE_RETURN_ADDRESS(gtid); 973 if (ompt_enabled.enabled) { 974 team = __kmp_team_from_gtid(gtid); 975 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 976 /* OMPT state update */ 977 th->th.ompt_thread_info.wait_id = lck; 978 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 979 980 /* OMPT event callback */ 981 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 982 if (ompt_enabled.ompt_callback_mutex_acquire) { 983 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 984 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 985 codeptr_ra); 986 } 987 } 988 #endif 989 990 if (th->th.th_dispatch->th_deo_fcn != 0) 991 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 992 else 993 __kmp_parallel_deo(>id, &cid, loc); 994 995 #if OMPT_SUPPORT && OMPT_OPTIONAL 996 if (ompt_enabled.enabled) { 997 /* OMPT state update */ 998 th->th.ompt_thread_info.state = ompt_state_work_parallel; 999 th->th.ompt_thread_info.wait_id = 0; 1000 1001 /* OMPT event callback */ 1002 if (ompt_enabled.ompt_callback_mutex_acquired) { 1003 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1004 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1005 } 1006 } 1007 #endif 1008 1009 #if USE_ITT_BUILD 1010 __kmp_itt_ordered_start(gtid); 1011 #endif /* USE_ITT_BUILD */ 1012 } 1013 1014 /*! 1015 @ingroup WORK_SHARING 1016 @param loc source location information. 1017 @param gtid global thread number. 1018 1019 End execution of an <tt>ordered</tt> construct. 1020 */ 1021 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1022 int cid = 0; 1023 kmp_info_t *th; 1024 1025 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1026 __kmp_assert_valid_gtid(gtid); 1027 1028 #if USE_ITT_BUILD 1029 __kmp_itt_ordered_end(gtid); 1030 // TODO: ordered_wait_id 1031 #endif /* USE_ITT_BUILD */ 1032 1033 th = __kmp_threads[gtid]; 1034 1035 if (th->th.th_dispatch->th_dxo_fcn != 0) 1036 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1037 else 1038 __kmp_parallel_dxo(>id, &cid, loc); 1039 1040 #if OMPT_SUPPORT && OMPT_OPTIONAL 1041 OMPT_STORE_RETURN_ADDRESS(gtid); 1042 if (ompt_enabled.ompt_callback_mutex_released) { 1043 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1044 ompt_mutex_ordered, 1045 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1046 ->t.t_ordered.dt.t_value, 1047 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1048 } 1049 #endif 1050 } 1051 1052 #if KMP_USE_DYNAMIC_LOCK 1053 1054 static __forceinline void 1055 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1056 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1057 // Pointer to the allocated indirect lock is written to crit, while indexing 1058 // is ignored. 1059 void *idx; 1060 kmp_indirect_lock_t **lck; 1061 lck = (kmp_indirect_lock_t **)crit; 1062 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1063 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1064 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1065 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1066 KA_TRACE(20, 1067 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1068 #if USE_ITT_BUILD 1069 __kmp_itt_critical_creating(ilk->lock, loc); 1070 #endif 1071 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1072 if (status == 0) { 1073 #if USE_ITT_BUILD 1074 __kmp_itt_critical_destroyed(ilk->lock); 1075 #endif 1076 // We don't really need to destroy the unclaimed lock here since it will be 1077 // cleaned up at program exit. 1078 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1079 } 1080 KMP_DEBUG_ASSERT(*lck != NULL); 1081 } 1082 1083 // Fast-path acquire tas lock 1084 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1085 { \ 1086 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1087 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1088 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1089 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1090 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1091 kmp_uint32 spins; \ 1092 KMP_FSYNC_PREPARE(l); \ 1093 KMP_INIT_YIELD(spins); \ 1094 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1095 do { \ 1096 if (TCR_4(__kmp_nth) > \ 1097 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1098 KMP_YIELD(TRUE); \ 1099 } else { \ 1100 KMP_YIELD_SPIN(spins); \ 1101 } \ 1102 __kmp_spin_backoff(&backoff); \ 1103 } while ( \ 1104 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1105 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1106 } \ 1107 KMP_FSYNC_ACQUIRED(l); \ 1108 } 1109 1110 // Fast-path test tas lock 1111 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1112 { \ 1113 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1114 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1115 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1116 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1117 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1118 } 1119 1120 // Fast-path release tas lock 1121 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1122 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1123 1124 #if KMP_USE_FUTEX 1125 1126 #include <sys/syscall.h> 1127 #include <unistd.h> 1128 #ifndef FUTEX_WAIT 1129 #define FUTEX_WAIT 0 1130 #endif 1131 #ifndef FUTEX_WAKE 1132 #define FUTEX_WAKE 1 1133 #endif 1134 1135 // Fast-path acquire futex lock 1136 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1137 { \ 1138 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1139 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1140 KMP_MB(); \ 1141 KMP_FSYNC_PREPARE(ftx); \ 1142 kmp_int32 poll_val; \ 1143 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1144 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1145 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1146 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1147 if (!cond) { \ 1148 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1149 poll_val | \ 1150 KMP_LOCK_BUSY(1, futex))) { \ 1151 continue; \ 1152 } \ 1153 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1154 } \ 1155 kmp_int32 rc; \ 1156 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1157 NULL, NULL, 0)) != 0) { \ 1158 continue; \ 1159 } \ 1160 gtid_code |= 1; \ 1161 } \ 1162 KMP_FSYNC_ACQUIRED(ftx); \ 1163 } 1164 1165 // Fast-path test futex lock 1166 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1167 { \ 1168 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1169 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1170 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1171 KMP_FSYNC_ACQUIRED(ftx); \ 1172 rc = TRUE; \ 1173 } else { \ 1174 rc = FALSE; \ 1175 } \ 1176 } 1177 1178 // Fast-path release futex lock 1179 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1180 { \ 1181 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1182 KMP_MB(); \ 1183 KMP_FSYNC_RELEASING(ftx); \ 1184 kmp_int32 poll_val = \ 1185 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1186 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1187 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1188 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1189 } \ 1190 KMP_MB(); \ 1191 KMP_YIELD_OVERSUB(); \ 1192 } 1193 1194 #endif // KMP_USE_FUTEX 1195 1196 #else // KMP_USE_DYNAMIC_LOCK 1197 1198 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1199 ident_t const *loc, 1200 kmp_int32 gtid) { 1201 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1202 1203 // Because of the double-check, the following load doesn't need to be volatile 1204 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1205 1206 if (lck == NULL) { 1207 void *idx; 1208 1209 // Allocate & initialize the lock. 1210 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1211 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1212 __kmp_init_user_lock_with_checks(lck); 1213 __kmp_set_user_lock_location(lck, loc); 1214 #if USE_ITT_BUILD 1215 __kmp_itt_critical_creating(lck); 1216 // __kmp_itt_critical_creating() should be called *before* the first usage 1217 // of underlying lock. It is the only place where we can guarantee it. There 1218 // are chances the lock will destroyed with no usage, but it is not a 1219 // problem, because this is not real event seen by user but rather setting 1220 // name for object (lock). See more details in kmp_itt.h. 1221 #endif /* USE_ITT_BUILD */ 1222 1223 // Use a cmpxchg instruction to slam the start of the critical section with 1224 // the lock pointer. If another thread beat us to it, deallocate the lock, 1225 // and use the lock that the other thread allocated. 1226 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1227 1228 if (status == 0) { 1229 // Deallocate the lock and reload the value. 1230 #if USE_ITT_BUILD 1231 __kmp_itt_critical_destroyed(lck); 1232 // Let ITT know the lock is destroyed and the same memory location may be reused 1233 // for another purpose. 1234 #endif /* USE_ITT_BUILD */ 1235 __kmp_destroy_user_lock_with_checks(lck); 1236 __kmp_user_lock_free(&idx, gtid, lck); 1237 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1238 KMP_DEBUG_ASSERT(lck != NULL); 1239 } 1240 } 1241 return lck; 1242 } 1243 1244 #endif // KMP_USE_DYNAMIC_LOCK 1245 1246 /*! 1247 @ingroup WORK_SHARING 1248 @param loc source location information. 1249 @param global_tid global thread number. 1250 @param crit identity of the critical section. This could be a pointer to a lock 1251 associated with the critical section, or some other suitably unique value. 1252 1253 Enter code protected by a `critical` construct. 1254 This function blocks until the executing thread can enter the critical section. 1255 */ 1256 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1257 kmp_critical_name *crit) { 1258 #if KMP_USE_DYNAMIC_LOCK 1259 #if OMPT_SUPPORT && OMPT_OPTIONAL 1260 OMPT_STORE_RETURN_ADDRESS(global_tid); 1261 #endif // OMPT_SUPPORT 1262 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1263 #else 1264 KMP_COUNT_BLOCK(OMP_CRITICAL); 1265 #if OMPT_SUPPORT && OMPT_OPTIONAL 1266 ompt_state_t prev_state = ompt_state_undefined; 1267 ompt_thread_info_t ti; 1268 #endif 1269 kmp_user_lock_p lck; 1270 1271 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1272 __kmp_assert_valid_gtid(global_tid); 1273 1274 // TODO: add THR_OVHD_STATE 1275 1276 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1277 KMP_CHECK_USER_LOCK_INIT(); 1278 1279 if ((__kmp_user_lock_kind == lk_tas) && 1280 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1281 lck = (kmp_user_lock_p)crit; 1282 } 1283 #if KMP_USE_FUTEX 1284 else if ((__kmp_user_lock_kind == lk_futex) && 1285 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1286 lck = (kmp_user_lock_p)crit; 1287 } 1288 #endif 1289 else { // ticket, queuing or drdpa 1290 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1291 } 1292 1293 if (__kmp_env_consistency_check) 1294 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1295 1296 // since the critical directive binds to all threads, not just the current 1297 // team we have to check this even if we are in a serialized team. 1298 // also, even if we are the uber thread, we still have to conduct the lock, 1299 // as we have to contend with sibling threads. 1300 1301 #if USE_ITT_BUILD 1302 __kmp_itt_critical_acquiring(lck); 1303 #endif /* USE_ITT_BUILD */ 1304 #if OMPT_SUPPORT && OMPT_OPTIONAL 1305 OMPT_STORE_RETURN_ADDRESS(gtid); 1306 void *codeptr_ra = NULL; 1307 if (ompt_enabled.enabled) { 1308 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1309 /* OMPT state update */ 1310 prev_state = ti.state; 1311 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1312 ti.state = ompt_state_wait_critical; 1313 1314 /* OMPT event callback */ 1315 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1316 if (ompt_enabled.ompt_callback_mutex_acquire) { 1317 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1318 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1319 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1320 } 1321 } 1322 #endif 1323 // Value of 'crit' should be good for using as a critical_id of the critical 1324 // section directive. 1325 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1326 1327 #if USE_ITT_BUILD 1328 __kmp_itt_critical_acquired(lck); 1329 #endif /* USE_ITT_BUILD */ 1330 #if OMPT_SUPPORT && OMPT_OPTIONAL 1331 if (ompt_enabled.enabled) { 1332 /* OMPT state update */ 1333 ti.state = prev_state; 1334 ti.wait_id = 0; 1335 1336 /* OMPT event callback */ 1337 if (ompt_enabled.ompt_callback_mutex_acquired) { 1338 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1339 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1340 } 1341 } 1342 #endif 1343 KMP_POP_PARTITIONED_TIMER(); 1344 1345 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1346 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1347 #endif // KMP_USE_DYNAMIC_LOCK 1348 } 1349 1350 #if KMP_USE_DYNAMIC_LOCK 1351 1352 // Converts the given hint to an internal lock implementation 1353 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1354 #if KMP_USE_TSX 1355 #define KMP_TSX_LOCK(seq) lockseq_##seq 1356 #else 1357 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1358 #endif 1359 1360 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1361 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1362 #else 1363 #define KMP_CPUINFO_RTM 0 1364 #endif 1365 1366 // Hints that do not require further logic 1367 if (hint & kmp_lock_hint_hle) 1368 return KMP_TSX_LOCK(hle); 1369 if (hint & kmp_lock_hint_rtm) 1370 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1371 if (hint & kmp_lock_hint_adaptive) 1372 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1373 1374 // Rule out conflicting hints first by returning the default lock 1375 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1376 return __kmp_user_lock_seq; 1377 if ((hint & omp_lock_hint_speculative) && 1378 (hint & omp_lock_hint_nonspeculative)) 1379 return __kmp_user_lock_seq; 1380 1381 // Do not even consider speculation when it appears to be contended 1382 if (hint & omp_lock_hint_contended) 1383 return lockseq_queuing; 1384 1385 // Uncontended lock without speculation 1386 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1387 return lockseq_tas; 1388 1389 // Use RTM lock for speculation 1390 if (hint & omp_lock_hint_speculative) 1391 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1392 1393 return __kmp_user_lock_seq; 1394 } 1395 1396 #if OMPT_SUPPORT && OMPT_OPTIONAL 1397 #if KMP_USE_DYNAMIC_LOCK 1398 static kmp_mutex_impl_t 1399 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1400 if (user_lock) { 1401 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1402 case 0: 1403 break; 1404 #if KMP_USE_FUTEX 1405 case locktag_futex: 1406 return kmp_mutex_impl_queuing; 1407 #endif 1408 case locktag_tas: 1409 return kmp_mutex_impl_spin; 1410 #if KMP_USE_TSX 1411 case locktag_hle: 1412 case locktag_rtm_spin: 1413 return kmp_mutex_impl_speculative; 1414 #endif 1415 default: 1416 return kmp_mutex_impl_none; 1417 } 1418 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1419 } 1420 KMP_ASSERT(ilock); 1421 switch (ilock->type) { 1422 #if KMP_USE_TSX 1423 case locktag_adaptive: 1424 case locktag_rtm_queuing: 1425 return kmp_mutex_impl_speculative; 1426 #endif 1427 case locktag_nested_tas: 1428 return kmp_mutex_impl_spin; 1429 #if KMP_USE_FUTEX 1430 case locktag_nested_futex: 1431 #endif 1432 case locktag_ticket: 1433 case locktag_queuing: 1434 case locktag_drdpa: 1435 case locktag_nested_ticket: 1436 case locktag_nested_queuing: 1437 case locktag_nested_drdpa: 1438 return kmp_mutex_impl_queuing; 1439 default: 1440 return kmp_mutex_impl_none; 1441 } 1442 } 1443 #else 1444 // For locks without dynamic binding 1445 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1446 switch (__kmp_user_lock_kind) { 1447 case lk_tas: 1448 return kmp_mutex_impl_spin; 1449 #if KMP_USE_FUTEX 1450 case lk_futex: 1451 #endif 1452 case lk_ticket: 1453 case lk_queuing: 1454 case lk_drdpa: 1455 return kmp_mutex_impl_queuing; 1456 #if KMP_USE_TSX 1457 case lk_hle: 1458 case lk_rtm_queuing: 1459 case lk_rtm_spin: 1460 case lk_adaptive: 1461 return kmp_mutex_impl_speculative; 1462 #endif 1463 default: 1464 return kmp_mutex_impl_none; 1465 } 1466 } 1467 #endif // KMP_USE_DYNAMIC_LOCK 1468 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1469 1470 /*! 1471 @ingroup WORK_SHARING 1472 @param loc source location information. 1473 @param global_tid global thread number. 1474 @param crit identity of the critical section. This could be a pointer to a lock 1475 associated with the critical section, or some other suitably unique value. 1476 @param hint the lock hint. 1477 1478 Enter code protected by a `critical` construct with a hint. The hint value is 1479 used to suggest a lock implementation. This function blocks until the executing 1480 thread can enter the critical section unless the hint suggests use of 1481 speculative execution and the hardware supports it. 1482 */ 1483 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1484 kmp_critical_name *crit, uint32_t hint) { 1485 KMP_COUNT_BLOCK(OMP_CRITICAL); 1486 kmp_user_lock_p lck; 1487 #if OMPT_SUPPORT && OMPT_OPTIONAL 1488 ompt_state_t prev_state = ompt_state_undefined; 1489 ompt_thread_info_t ti; 1490 // This is the case, if called from __kmpc_critical: 1491 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1492 if (!codeptr) 1493 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1494 #endif 1495 1496 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1497 __kmp_assert_valid_gtid(global_tid); 1498 1499 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1500 // Check if it is initialized. 1501 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1502 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1503 if (*lk == 0) { 1504 if (KMP_IS_D_LOCK(lockseq)) { 1505 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1506 KMP_GET_D_TAG(lockseq)); 1507 } else { 1508 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1509 } 1510 } 1511 // Branch for accessing the actual lock object and set operation. This 1512 // branching is inevitable since this lock initialization does not follow the 1513 // normal dispatch path (lock table is not used). 1514 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1515 lck = (kmp_user_lock_p)lk; 1516 if (__kmp_env_consistency_check) { 1517 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1518 __kmp_map_hint_to_lock(hint)); 1519 } 1520 #if USE_ITT_BUILD 1521 __kmp_itt_critical_acquiring(lck); 1522 #endif 1523 #if OMPT_SUPPORT && OMPT_OPTIONAL 1524 if (ompt_enabled.enabled) { 1525 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1526 /* OMPT state update */ 1527 prev_state = ti.state; 1528 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1529 ti.state = ompt_state_wait_critical; 1530 1531 /* OMPT event callback */ 1532 if (ompt_enabled.ompt_callback_mutex_acquire) { 1533 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1534 ompt_mutex_critical, (unsigned int)hint, 1535 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1536 codeptr); 1537 } 1538 } 1539 #endif 1540 #if KMP_USE_INLINED_TAS 1541 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1542 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1543 } else 1544 #elif KMP_USE_INLINED_FUTEX 1545 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1546 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1547 } else 1548 #endif 1549 { 1550 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1551 } 1552 } else { 1553 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1554 lck = ilk->lock; 1555 if (__kmp_env_consistency_check) { 1556 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1557 __kmp_map_hint_to_lock(hint)); 1558 } 1559 #if USE_ITT_BUILD 1560 __kmp_itt_critical_acquiring(lck); 1561 #endif 1562 #if OMPT_SUPPORT && OMPT_OPTIONAL 1563 if (ompt_enabled.enabled) { 1564 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1565 /* OMPT state update */ 1566 prev_state = ti.state; 1567 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1568 ti.state = ompt_state_wait_critical; 1569 1570 /* OMPT event callback */ 1571 if (ompt_enabled.ompt_callback_mutex_acquire) { 1572 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1573 ompt_mutex_critical, (unsigned int)hint, 1574 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1575 codeptr); 1576 } 1577 } 1578 #endif 1579 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1580 } 1581 KMP_POP_PARTITIONED_TIMER(); 1582 1583 #if USE_ITT_BUILD 1584 __kmp_itt_critical_acquired(lck); 1585 #endif /* USE_ITT_BUILD */ 1586 #if OMPT_SUPPORT && OMPT_OPTIONAL 1587 if (ompt_enabled.enabled) { 1588 /* OMPT state update */ 1589 ti.state = prev_state; 1590 ti.wait_id = 0; 1591 1592 /* OMPT event callback */ 1593 if (ompt_enabled.ompt_callback_mutex_acquired) { 1594 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1595 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1596 } 1597 } 1598 #endif 1599 1600 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1601 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1602 } // __kmpc_critical_with_hint 1603 1604 #endif // KMP_USE_DYNAMIC_LOCK 1605 1606 /*! 1607 @ingroup WORK_SHARING 1608 @param loc source location information. 1609 @param global_tid global thread number . 1610 @param crit identity of the critical section. This could be a pointer to a lock 1611 associated with the critical section, or some other suitably unique value. 1612 1613 Leave a critical section, releasing any lock that was held during its execution. 1614 */ 1615 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1616 kmp_critical_name *crit) { 1617 kmp_user_lock_p lck; 1618 1619 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1620 1621 #if KMP_USE_DYNAMIC_LOCK 1622 int locktag = KMP_EXTRACT_D_TAG(crit); 1623 if (locktag) { 1624 lck = (kmp_user_lock_p)crit; 1625 KMP_ASSERT(lck != NULL); 1626 if (__kmp_env_consistency_check) { 1627 __kmp_pop_sync(global_tid, ct_critical, loc); 1628 } 1629 #if USE_ITT_BUILD 1630 __kmp_itt_critical_releasing(lck); 1631 #endif 1632 #if KMP_USE_INLINED_TAS 1633 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1634 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1635 } else 1636 #elif KMP_USE_INLINED_FUTEX 1637 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1638 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1639 } else 1640 #endif 1641 { 1642 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1643 } 1644 } else { 1645 kmp_indirect_lock_t *ilk = 1646 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1647 KMP_ASSERT(ilk != NULL); 1648 lck = ilk->lock; 1649 if (__kmp_env_consistency_check) { 1650 __kmp_pop_sync(global_tid, ct_critical, loc); 1651 } 1652 #if USE_ITT_BUILD 1653 __kmp_itt_critical_releasing(lck); 1654 #endif 1655 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1656 } 1657 1658 #else // KMP_USE_DYNAMIC_LOCK 1659 1660 if ((__kmp_user_lock_kind == lk_tas) && 1661 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1662 lck = (kmp_user_lock_p)crit; 1663 } 1664 #if KMP_USE_FUTEX 1665 else if ((__kmp_user_lock_kind == lk_futex) && 1666 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1667 lck = (kmp_user_lock_p)crit; 1668 } 1669 #endif 1670 else { // ticket, queuing or drdpa 1671 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1672 } 1673 1674 KMP_ASSERT(lck != NULL); 1675 1676 if (__kmp_env_consistency_check) 1677 __kmp_pop_sync(global_tid, ct_critical, loc); 1678 1679 #if USE_ITT_BUILD 1680 __kmp_itt_critical_releasing(lck); 1681 #endif /* USE_ITT_BUILD */ 1682 // Value of 'crit' should be good for using as a critical_id of the critical 1683 // section directive. 1684 __kmp_release_user_lock_with_checks(lck, global_tid); 1685 1686 #endif // KMP_USE_DYNAMIC_LOCK 1687 1688 #if OMPT_SUPPORT && OMPT_OPTIONAL 1689 /* OMPT release event triggers after lock is released; place here to trigger 1690 * for all #if branches */ 1691 OMPT_STORE_RETURN_ADDRESS(global_tid); 1692 if (ompt_enabled.ompt_callback_mutex_released) { 1693 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1694 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1695 OMPT_LOAD_RETURN_ADDRESS(0)); 1696 } 1697 #endif 1698 1699 KMP_POP_PARTITIONED_TIMER(); 1700 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1701 } 1702 1703 /*! 1704 @ingroup SYNCHRONIZATION 1705 @param loc source location information 1706 @param global_tid thread id. 1707 @return one if the thread should execute the master block, zero otherwise 1708 1709 Start execution of a combined barrier and master. The barrier is executed inside 1710 this function. 1711 */ 1712 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1713 int status; 1714 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1715 __kmp_assert_valid_gtid(global_tid); 1716 1717 if (!TCR_4(__kmp_init_parallel)) 1718 __kmp_parallel_initialize(); 1719 1720 __kmp_resume_if_soft_paused(); 1721 1722 if (__kmp_env_consistency_check) 1723 __kmp_check_barrier(global_tid, ct_barrier, loc); 1724 1725 #if OMPT_SUPPORT 1726 ompt_frame_t *ompt_frame; 1727 if (ompt_enabled.enabled) { 1728 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1729 if (ompt_frame->enter_frame.ptr == NULL) 1730 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1731 } 1732 OMPT_STORE_RETURN_ADDRESS(global_tid); 1733 #endif 1734 #if USE_ITT_NOTIFY 1735 __kmp_threads[global_tid]->th.th_ident = loc; 1736 #endif 1737 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1738 #if OMPT_SUPPORT && OMPT_OPTIONAL 1739 if (ompt_enabled.enabled) { 1740 ompt_frame->enter_frame = ompt_data_none; 1741 } 1742 #endif 1743 1744 return (status != 0) ? 0 : 1; 1745 } 1746 1747 /*! 1748 @ingroup SYNCHRONIZATION 1749 @param loc source location information 1750 @param global_tid thread id. 1751 1752 Complete the execution of a combined barrier and master. This function should 1753 only be called at the completion of the <tt>master</tt> code. Other threads will 1754 still be waiting at the barrier and this call releases them. 1755 */ 1756 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1757 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1758 __kmp_assert_valid_gtid(global_tid); 1759 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1760 } 1761 1762 /*! 1763 @ingroup SYNCHRONIZATION 1764 @param loc source location information 1765 @param global_tid thread id. 1766 @return one if the thread should execute the master block, zero otherwise 1767 1768 Start execution of a combined barrier and master(nowait) construct. 1769 The barrier is executed inside this function. 1770 There is no equivalent "end" function, since the 1771 */ 1772 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1773 kmp_int32 ret; 1774 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1775 __kmp_assert_valid_gtid(global_tid); 1776 1777 if (!TCR_4(__kmp_init_parallel)) 1778 __kmp_parallel_initialize(); 1779 1780 __kmp_resume_if_soft_paused(); 1781 1782 if (__kmp_env_consistency_check) { 1783 if (loc == 0) { 1784 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1785 } 1786 __kmp_check_barrier(global_tid, ct_barrier, loc); 1787 } 1788 1789 #if OMPT_SUPPORT 1790 ompt_frame_t *ompt_frame; 1791 if (ompt_enabled.enabled) { 1792 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1793 if (ompt_frame->enter_frame.ptr == NULL) 1794 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1795 } 1796 OMPT_STORE_RETURN_ADDRESS(global_tid); 1797 #endif 1798 #if USE_ITT_NOTIFY 1799 __kmp_threads[global_tid]->th.th_ident = loc; 1800 #endif 1801 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1802 #if OMPT_SUPPORT && OMPT_OPTIONAL 1803 if (ompt_enabled.enabled) { 1804 ompt_frame->enter_frame = ompt_data_none; 1805 } 1806 #endif 1807 1808 ret = __kmpc_master(loc, global_tid); 1809 1810 if (__kmp_env_consistency_check) { 1811 /* there's no __kmpc_end_master called; so the (stats) */ 1812 /* actions of __kmpc_end_master are done here */ 1813 if (ret) { 1814 /* only one thread should do the pop since only */ 1815 /* one did the push (see __kmpc_master()) */ 1816 __kmp_pop_sync(global_tid, ct_master, loc); 1817 } 1818 } 1819 1820 return (ret); 1821 } 1822 1823 /* The BARRIER for a SINGLE process section is always explicit */ 1824 /*! 1825 @ingroup WORK_SHARING 1826 @param loc source location information 1827 @param global_tid global thread number 1828 @return One if this thread should execute the single construct, zero otherwise. 1829 1830 Test whether to execute a <tt>single</tt> construct. 1831 There are no implicit barriers in the two "single" calls, rather the compiler 1832 should introduce an explicit barrier if it is required. 1833 */ 1834 1835 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1836 __kmp_assert_valid_gtid(global_tid); 1837 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1838 1839 if (rc) { 1840 // We are going to execute the single statement, so we should count it. 1841 KMP_COUNT_BLOCK(OMP_SINGLE); 1842 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1843 } 1844 1845 #if OMPT_SUPPORT && OMPT_OPTIONAL 1846 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1847 kmp_team_t *team = this_thr->th.th_team; 1848 int tid = __kmp_tid_from_gtid(global_tid); 1849 1850 if (ompt_enabled.enabled) { 1851 if (rc) { 1852 if (ompt_enabled.ompt_callback_work) { 1853 ompt_callbacks.ompt_callback(ompt_callback_work)( 1854 ompt_work_single_executor, ompt_scope_begin, 1855 &(team->t.ompt_team_info.parallel_data), 1856 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1857 1, OMPT_GET_RETURN_ADDRESS(0)); 1858 } 1859 } else { 1860 if (ompt_enabled.ompt_callback_work) { 1861 ompt_callbacks.ompt_callback(ompt_callback_work)( 1862 ompt_work_single_other, ompt_scope_begin, 1863 &(team->t.ompt_team_info.parallel_data), 1864 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1865 1, OMPT_GET_RETURN_ADDRESS(0)); 1866 ompt_callbacks.ompt_callback(ompt_callback_work)( 1867 ompt_work_single_other, ompt_scope_end, 1868 &(team->t.ompt_team_info.parallel_data), 1869 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1870 1, OMPT_GET_RETURN_ADDRESS(0)); 1871 } 1872 } 1873 } 1874 #endif 1875 1876 return rc; 1877 } 1878 1879 /*! 1880 @ingroup WORK_SHARING 1881 @param loc source location information 1882 @param global_tid global thread number 1883 1884 Mark the end of a <tt>single</tt> construct. This function should 1885 only be called by the thread that executed the block of code protected 1886 by the `single` construct. 1887 */ 1888 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1889 __kmp_assert_valid_gtid(global_tid); 1890 __kmp_exit_single(global_tid); 1891 KMP_POP_PARTITIONED_TIMER(); 1892 1893 #if OMPT_SUPPORT && OMPT_OPTIONAL 1894 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1895 kmp_team_t *team = this_thr->th.th_team; 1896 int tid = __kmp_tid_from_gtid(global_tid); 1897 1898 if (ompt_enabled.ompt_callback_work) { 1899 ompt_callbacks.ompt_callback(ompt_callback_work)( 1900 ompt_work_single_executor, ompt_scope_end, 1901 &(team->t.ompt_team_info.parallel_data), 1902 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1903 OMPT_GET_RETURN_ADDRESS(0)); 1904 } 1905 #endif 1906 } 1907 1908 /*! 1909 @ingroup WORK_SHARING 1910 @param loc Source location 1911 @param global_tid Global thread id 1912 1913 Mark the end of a statically scheduled loop. 1914 */ 1915 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1916 KMP_POP_PARTITIONED_TIMER(); 1917 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1918 1919 #if OMPT_SUPPORT && OMPT_OPTIONAL 1920 if (ompt_enabled.ompt_callback_work) { 1921 ompt_work_t ompt_work_type = ompt_work_loop; 1922 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1923 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1924 // Determine workshare type 1925 if (loc != NULL) { 1926 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1927 ompt_work_type = ompt_work_loop; 1928 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1929 ompt_work_type = ompt_work_sections; 1930 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1931 ompt_work_type = ompt_work_distribute; 1932 } else { 1933 // use default set above. 1934 // a warning about this case is provided in __kmpc_for_static_init 1935 } 1936 KMP_DEBUG_ASSERT(ompt_work_type); 1937 } 1938 ompt_callbacks.ompt_callback(ompt_callback_work)( 1939 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1940 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1941 } 1942 #endif 1943 if (__kmp_env_consistency_check) 1944 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1945 } 1946 1947 // User routines which take C-style arguments (call by value) 1948 // different from the Fortran equivalent routines 1949 1950 void ompc_set_num_threads(int arg) { 1951 // !!!!! TODO: check the per-task binding 1952 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1953 } 1954 1955 void ompc_set_dynamic(int flag) { 1956 kmp_info_t *thread; 1957 1958 /* For the thread-private implementation of the internal controls */ 1959 thread = __kmp_entry_thread(); 1960 1961 __kmp_save_internal_controls(thread); 1962 1963 set__dynamic(thread, flag ? true : false); 1964 } 1965 1966 void ompc_set_nested(int flag) { 1967 kmp_info_t *thread; 1968 1969 /* For the thread-private internal controls implementation */ 1970 thread = __kmp_entry_thread(); 1971 1972 __kmp_save_internal_controls(thread); 1973 1974 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1975 } 1976 1977 void ompc_set_max_active_levels(int max_active_levels) { 1978 /* TO DO */ 1979 /* we want per-task implementation of this internal control */ 1980 1981 /* For the per-thread internal controls implementation */ 1982 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1983 } 1984 1985 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1986 // !!!!! TODO: check the per-task binding 1987 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1988 } 1989 1990 int ompc_get_ancestor_thread_num(int level) { 1991 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1992 } 1993 1994 int ompc_get_team_size(int level) { 1995 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1996 } 1997 1998 /* OpenMP 5.0 Affinity Format API */ 1999 2000 void ompc_set_affinity_format(char const *format) { 2001 if (!__kmp_init_serial) { 2002 __kmp_serial_initialize(); 2003 } 2004 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2005 format, KMP_STRLEN(format) + 1); 2006 } 2007 2008 size_t ompc_get_affinity_format(char *buffer, size_t size) { 2009 size_t format_size; 2010 if (!__kmp_init_serial) { 2011 __kmp_serial_initialize(); 2012 } 2013 format_size = KMP_STRLEN(__kmp_affinity_format); 2014 if (buffer && size) { 2015 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2016 format_size + 1); 2017 } 2018 return format_size; 2019 } 2020 2021 void ompc_display_affinity(char const *format) { 2022 int gtid; 2023 if (!TCR_4(__kmp_init_middle)) { 2024 __kmp_middle_initialize(); 2025 } 2026 gtid = __kmp_get_gtid(); 2027 __kmp_aux_display_affinity(gtid, format); 2028 } 2029 2030 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 2031 char const *format) { 2032 int gtid; 2033 size_t num_required; 2034 kmp_str_buf_t capture_buf; 2035 if (!TCR_4(__kmp_init_middle)) { 2036 __kmp_middle_initialize(); 2037 } 2038 gtid = __kmp_get_gtid(); 2039 __kmp_str_buf_init(&capture_buf); 2040 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2041 if (buffer && buf_size) { 2042 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2043 capture_buf.used + 1); 2044 } 2045 __kmp_str_buf_free(&capture_buf); 2046 return num_required; 2047 } 2048 2049 void kmpc_set_stacksize(int arg) { 2050 // __kmp_aux_set_stacksize initializes the library if needed 2051 __kmp_aux_set_stacksize(arg); 2052 } 2053 2054 void kmpc_set_stacksize_s(size_t arg) { 2055 // __kmp_aux_set_stacksize initializes the library if needed 2056 __kmp_aux_set_stacksize(arg); 2057 } 2058 2059 void kmpc_set_blocktime(int arg) { 2060 int gtid, tid; 2061 kmp_info_t *thread; 2062 2063 gtid = __kmp_entry_gtid(); 2064 tid = __kmp_tid_from_gtid(gtid); 2065 thread = __kmp_thread_from_gtid(gtid); 2066 2067 __kmp_aux_set_blocktime(arg, thread, tid); 2068 } 2069 2070 void kmpc_set_library(int arg) { 2071 // __kmp_user_set_library initializes the library if needed 2072 __kmp_user_set_library((enum library_type)arg); 2073 } 2074 2075 void kmpc_set_defaults(char const *str) { 2076 // __kmp_aux_set_defaults initializes the library if needed 2077 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2078 } 2079 2080 void kmpc_set_disp_num_buffers(int arg) { 2081 // ignore after initialization because some teams have already 2082 // allocated dispatch buffers 2083 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2084 arg <= KMP_MAX_DISP_NUM_BUFF) { 2085 __kmp_dispatch_num_buffers = arg; 2086 } 2087 } 2088 2089 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2090 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2091 return -1; 2092 #else 2093 if (!TCR_4(__kmp_init_middle)) { 2094 __kmp_middle_initialize(); 2095 } 2096 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2097 #endif 2098 } 2099 2100 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2101 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2102 return -1; 2103 #else 2104 if (!TCR_4(__kmp_init_middle)) { 2105 __kmp_middle_initialize(); 2106 } 2107 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2108 #endif 2109 } 2110 2111 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2112 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2113 return -1; 2114 #else 2115 if (!TCR_4(__kmp_init_middle)) { 2116 __kmp_middle_initialize(); 2117 } 2118 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2119 #endif 2120 } 2121 2122 /* -------------------------------------------------------------------------- */ 2123 /*! 2124 @ingroup THREADPRIVATE 2125 @param loc source location information 2126 @param gtid global thread number 2127 @param cpy_size size of the cpy_data buffer 2128 @param cpy_data pointer to data to be copied 2129 @param cpy_func helper function to call for copying data 2130 @param didit flag variable: 1=single thread; 0=not single thread 2131 2132 __kmpc_copyprivate implements the interface for the private data broadcast 2133 needed for the copyprivate clause associated with a single region in an 2134 OpenMP<sup>*</sup> program (both C and Fortran). 2135 All threads participating in the parallel region call this routine. 2136 One of the threads (called the single thread) should have the <tt>didit</tt> 2137 variable set to 1 and all other threads should have that variable set to 0. 2138 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2139 2140 The OpenMP specification forbids the use of nowait on the single region when a 2141 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2142 barrier internally to avoid race conditions, so the code generation for the 2143 single region should avoid generating a barrier after the call to @ref 2144 __kmpc_copyprivate. 2145 2146 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2147 The <tt>loc</tt> parameter is a pointer to source location information. 2148 2149 Internal implementation: The single thread will first copy its descriptor 2150 address (cpy_data) to a team-private location, then the other threads will each 2151 call the function pointed to by the parameter cpy_func, which carries out the 2152 copy by copying the data using the cpy_data buffer. 2153 2154 The cpy_func routine used for the copy and the contents of the data area defined 2155 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2156 to be done. For instance, the cpy_data buffer can hold the actual data to be 2157 copied or it may hold a list of pointers to the data. The cpy_func routine must 2158 interpret the cpy_data buffer appropriately. 2159 2160 The interface to cpy_func is as follows: 2161 @code 2162 void cpy_func( void *destination, void *source ) 2163 @endcode 2164 where void *destination is the cpy_data pointer for the thread being copied to 2165 and void *source is the cpy_data pointer for the thread being copied from. 2166 */ 2167 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2168 void *cpy_data, void (*cpy_func)(void *, void *), 2169 kmp_int32 didit) { 2170 void **data_ptr; 2171 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2172 __kmp_assert_valid_gtid(gtid); 2173 2174 KMP_MB(); 2175 2176 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2177 2178 if (__kmp_env_consistency_check) { 2179 if (loc == 0) { 2180 KMP_WARNING(ConstructIdentInvalid); 2181 } 2182 } 2183 2184 // ToDo: Optimize the following two barriers into some kind of split barrier 2185 2186 if (didit) 2187 *data_ptr = cpy_data; 2188 2189 #if OMPT_SUPPORT 2190 ompt_frame_t *ompt_frame; 2191 if (ompt_enabled.enabled) { 2192 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2193 if (ompt_frame->enter_frame.ptr == NULL) 2194 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2195 } 2196 OMPT_STORE_RETURN_ADDRESS(gtid); 2197 #endif 2198 /* This barrier is not a barrier region boundary */ 2199 #if USE_ITT_NOTIFY 2200 __kmp_threads[gtid]->th.th_ident = loc; 2201 #endif 2202 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2203 2204 if (!didit) 2205 (*cpy_func)(cpy_data, *data_ptr); 2206 2207 // Consider next barrier a user-visible barrier for barrier region boundaries 2208 // Nesting checks are already handled by the single construct checks 2209 { 2210 #if OMPT_SUPPORT 2211 OMPT_STORE_RETURN_ADDRESS(gtid); 2212 #endif 2213 #if USE_ITT_NOTIFY 2214 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2215 // tasks can overwrite the location) 2216 #endif 2217 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2218 #if OMPT_SUPPORT && OMPT_OPTIONAL 2219 if (ompt_enabled.enabled) { 2220 ompt_frame->enter_frame = ompt_data_none; 2221 } 2222 #endif 2223 } 2224 } 2225 2226 /* -------------------------------------------------------------------------- */ 2227 2228 #define INIT_LOCK __kmp_init_user_lock_with_checks 2229 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2230 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2231 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2232 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2233 #define ACQUIRE_NESTED_LOCK_TIMED \ 2234 __kmp_acquire_nested_user_lock_with_checks_timed 2235 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2236 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2237 #define TEST_LOCK __kmp_test_user_lock_with_checks 2238 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2239 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2240 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2241 2242 // TODO: Make check abort messages use location info & pass it into 2243 // with_checks routines 2244 2245 #if KMP_USE_DYNAMIC_LOCK 2246 2247 // internal lock initializer 2248 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2249 kmp_dyna_lockseq_t seq) { 2250 if (KMP_IS_D_LOCK(seq)) { 2251 KMP_INIT_D_LOCK(lock, seq); 2252 #if USE_ITT_BUILD 2253 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2254 #endif 2255 } else { 2256 KMP_INIT_I_LOCK(lock, seq); 2257 #if USE_ITT_BUILD 2258 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2259 __kmp_itt_lock_creating(ilk->lock, loc); 2260 #endif 2261 } 2262 } 2263 2264 // internal nest lock initializer 2265 static __forceinline void 2266 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2267 kmp_dyna_lockseq_t seq) { 2268 #if KMP_USE_TSX 2269 // Don't have nested lock implementation for speculative locks 2270 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2271 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2272 seq = __kmp_user_lock_seq; 2273 #endif 2274 switch (seq) { 2275 case lockseq_tas: 2276 seq = lockseq_nested_tas; 2277 break; 2278 #if KMP_USE_FUTEX 2279 case lockseq_futex: 2280 seq = lockseq_nested_futex; 2281 break; 2282 #endif 2283 case lockseq_ticket: 2284 seq = lockseq_nested_ticket; 2285 break; 2286 case lockseq_queuing: 2287 seq = lockseq_nested_queuing; 2288 break; 2289 case lockseq_drdpa: 2290 seq = lockseq_nested_drdpa; 2291 break; 2292 default: 2293 seq = lockseq_nested_queuing; 2294 } 2295 KMP_INIT_I_LOCK(lock, seq); 2296 #if USE_ITT_BUILD 2297 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2298 __kmp_itt_lock_creating(ilk->lock, loc); 2299 #endif 2300 } 2301 2302 /* initialize the lock with a hint */ 2303 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2304 uintptr_t hint) { 2305 KMP_DEBUG_ASSERT(__kmp_init_serial); 2306 if (__kmp_env_consistency_check && user_lock == NULL) { 2307 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2308 } 2309 2310 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2311 2312 #if OMPT_SUPPORT && OMPT_OPTIONAL 2313 // This is the case, if called from omp_init_lock_with_hint: 2314 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2315 if (!codeptr) 2316 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2317 if (ompt_enabled.ompt_callback_lock_init) { 2318 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2319 ompt_mutex_lock, (omp_lock_hint_t)hint, 2320 __ompt_get_mutex_impl_type(user_lock), 2321 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2322 } 2323 #endif 2324 } 2325 2326 /* initialize the lock with a hint */ 2327 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2328 void **user_lock, uintptr_t hint) { 2329 KMP_DEBUG_ASSERT(__kmp_init_serial); 2330 if (__kmp_env_consistency_check && user_lock == NULL) { 2331 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2332 } 2333 2334 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2335 2336 #if OMPT_SUPPORT && OMPT_OPTIONAL 2337 // This is the case, if called from omp_init_lock_with_hint: 2338 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2339 if (!codeptr) 2340 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2341 if (ompt_enabled.ompt_callback_lock_init) { 2342 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2343 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2344 __ompt_get_mutex_impl_type(user_lock), 2345 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2346 } 2347 #endif 2348 } 2349 2350 #endif // KMP_USE_DYNAMIC_LOCK 2351 2352 /* initialize the lock */ 2353 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2354 #if KMP_USE_DYNAMIC_LOCK 2355 2356 KMP_DEBUG_ASSERT(__kmp_init_serial); 2357 if (__kmp_env_consistency_check && user_lock == NULL) { 2358 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2359 } 2360 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2361 2362 #if OMPT_SUPPORT && OMPT_OPTIONAL 2363 // This is the case, if called from omp_init_lock_with_hint: 2364 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2365 if (!codeptr) 2366 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2367 if (ompt_enabled.ompt_callback_lock_init) { 2368 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2369 ompt_mutex_lock, omp_lock_hint_none, 2370 __ompt_get_mutex_impl_type(user_lock), 2371 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2372 } 2373 #endif 2374 2375 #else // KMP_USE_DYNAMIC_LOCK 2376 2377 static char const *const func = "omp_init_lock"; 2378 kmp_user_lock_p lck; 2379 KMP_DEBUG_ASSERT(__kmp_init_serial); 2380 2381 if (__kmp_env_consistency_check) { 2382 if (user_lock == NULL) { 2383 KMP_FATAL(LockIsUninitialized, func); 2384 } 2385 } 2386 2387 KMP_CHECK_USER_LOCK_INIT(); 2388 2389 if ((__kmp_user_lock_kind == lk_tas) && 2390 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2391 lck = (kmp_user_lock_p)user_lock; 2392 } 2393 #if KMP_USE_FUTEX 2394 else if ((__kmp_user_lock_kind == lk_futex) && 2395 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2396 lck = (kmp_user_lock_p)user_lock; 2397 } 2398 #endif 2399 else { 2400 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2401 } 2402 INIT_LOCK(lck); 2403 __kmp_set_user_lock_location(lck, loc); 2404 2405 #if OMPT_SUPPORT && OMPT_OPTIONAL 2406 // This is the case, if called from omp_init_lock_with_hint: 2407 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2408 if (!codeptr) 2409 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2410 if (ompt_enabled.ompt_callback_lock_init) { 2411 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2412 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2413 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2414 } 2415 #endif 2416 2417 #if USE_ITT_BUILD 2418 __kmp_itt_lock_creating(lck); 2419 #endif /* USE_ITT_BUILD */ 2420 2421 #endif // KMP_USE_DYNAMIC_LOCK 2422 } // __kmpc_init_lock 2423 2424 /* initialize the lock */ 2425 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2426 #if KMP_USE_DYNAMIC_LOCK 2427 2428 KMP_DEBUG_ASSERT(__kmp_init_serial); 2429 if (__kmp_env_consistency_check && user_lock == NULL) { 2430 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2431 } 2432 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2433 2434 #if OMPT_SUPPORT && OMPT_OPTIONAL 2435 // This is the case, if called from omp_init_lock_with_hint: 2436 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2437 if (!codeptr) 2438 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2439 if (ompt_enabled.ompt_callback_lock_init) { 2440 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2441 ompt_mutex_nest_lock, omp_lock_hint_none, 2442 __ompt_get_mutex_impl_type(user_lock), 2443 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2444 } 2445 #endif 2446 2447 #else // KMP_USE_DYNAMIC_LOCK 2448 2449 static char const *const func = "omp_init_nest_lock"; 2450 kmp_user_lock_p lck; 2451 KMP_DEBUG_ASSERT(__kmp_init_serial); 2452 2453 if (__kmp_env_consistency_check) { 2454 if (user_lock == NULL) { 2455 KMP_FATAL(LockIsUninitialized, func); 2456 } 2457 } 2458 2459 KMP_CHECK_USER_LOCK_INIT(); 2460 2461 if ((__kmp_user_lock_kind == lk_tas) && 2462 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2463 OMP_NEST_LOCK_T_SIZE)) { 2464 lck = (kmp_user_lock_p)user_lock; 2465 } 2466 #if KMP_USE_FUTEX 2467 else if ((__kmp_user_lock_kind == lk_futex) && 2468 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2469 OMP_NEST_LOCK_T_SIZE)) { 2470 lck = (kmp_user_lock_p)user_lock; 2471 } 2472 #endif 2473 else { 2474 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2475 } 2476 2477 INIT_NESTED_LOCK(lck); 2478 __kmp_set_user_lock_location(lck, loc); 2479 2480 #if OMPT_SUPPORT && OMPT_OPTIONAL 2481 // This is the case, if called from omp_init_lock_with_hint: 2482 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2483 if (!codeptr) 2484 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2485 if (ompt_enabled.ompt_callback_lock_init) { 2486 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2487 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2488 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2489 } 2490 #endif 2491 2492 #if USE_ITT_BUILD 2493 __kmp_itt_lock_creating(lck); 2494 #endif /* USE_ITT_BUILD */ 2495 2496 #endif // KMP_USE_DYNAMIC_LOCK 2497 } // __kmpc_init_nest_lock 2498 2499 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2500 #if KMP_USE_DYNAMIC_LOCK 2501 2502 #if USE_ITT_BUILD 2503 kmp_user_lock_p lck; 2504 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2505 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2506 } else { 2507 lck = (kmp_user_lock_p)user_lock; 2508 } 2509 __kmp_itt_lock_destroyed(lck); 2510 #endif 2511 #if OMPT_SUPPORT && OMPT_OPTIONAL 2512 // This is the case, if called from omp_init_lock_with_hint: 2513 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2514 if (!codeptr) 2515 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2516 if (ompt_enabled.ompt_callback_lock_destroy) { 2517 kmp_user_lock_p lck; 2518 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2519 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2520 } else { 2521 lck = (kmp_user_lock_p)user_lock; 2522 } 2523 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2524 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2525 } 2526 #endif 2527 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2528 #else 2529 kmp_user_lock_p lck; 2530 2531 if ((__kmp_user_lock_kind == lk_tas) && 2532 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2533 lck = (kmp_user_lock_p)user_lock; 2534 } 2535 #if KMP_USE_FUTEX 2536 else if ((__kmp_user_lock_kind == lk_futex) && 2537 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2538 lck = (kmp_user_lock_p)user_lock; 2539 } 2540 #endif 2541 else { 2542 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2543 } 2544 2545 #if OMPT_SUPPORT && OMPT_OPTIONAL 2546 // This is the case, if called from omp_init_lock_with_hint: 2547 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2548 if (!codeptr) 2549 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2550 if (ompt_enabled.ompt_callback_lock_destroy) { 2551 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2552 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2553 } 2554 #endif 2555 2556 #if USE_ITT_BUILD 2557 __kmp_itt_lock_destroyed(lck); 2558 #endif /* USE_ITT_BUILD */ 2559 DESTROY_LOCK(lck); 2560 2561 if ((__kmp_user_lock_kind == lk_tas) && 2562 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2563 ; 2564 } 2565 #if KMP_USE_FUTEX 2566 else if ((__kmp_user_lock_kind == lk_futex) && 2567 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2568 ; 2569 } 2570 #endif 2571 else { 2572 __kmp_user_lock_free(user_lock, gtid, lck); 2573 } 2574 #endif // KMP_USE_DYNAMIC_LOCK 2575 } // __kmpc_destroy_lock 2576 2577 /* destroy the lock */ 2578 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2579 #if KMP_USE_DYNAMIC_LOCK 2580 2581 #if USE_ITT_BUILD 2582 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2583 __kmp_itt_lock_destroyed(ilk->lock); 2584 #endif 2585 #if OMPT_SUPPORT && OMPT_OPTIONAL 2586 // This is the case, if called from omp_init_lock_with_hint: 2587 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2588 if (!codeptr) 2589 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2590 if (ompt_enabled.ompt_callback_lock_destroy) { 2591 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2592 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2593 } 2594 #endif 2595 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2596 2597 #else // KMP_USE_DYNAMIC_LOCK 2598 2599 kmp_user_lock_p lck; 2600 2601 if ((__kmp_user_lock_kind == lk_tas) && 2602 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2603 OMP_NEST_LOCK_T_SIZE)) { 2604 lck = (kmp_user_lock_p)user_lock; 2605 } 2606 #if KMP_USE_FUTEX 2607 else if ((__kmp_user_lock_kind == lk_futex) && 2608 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2609 OMP_NEST_LOCK_T_SIZE)) { 2610 lck = (kmp_user_lock_p)user_lock; 2611 } 2612 #endif 2613 else { 2614 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2615 } 2616 2617 #if OMPT_SUPPORT && OMPT_OPTIONAL 2618 // This is the case, if called from omp_init_lock_with_hint: 2619 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2620 if (!codeptr) 2621 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2622 if (ompt_enabled.ompt_callback_lock_destroy) { 2623 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2624 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2625 } 2626 #endif 2627 2628 #if USE_ITT_BUILD 2629 __kmp_itt_lock_destroyed(lck); 2630 #endif /* USE_ITT_BUILD */ 2631 2632 DESTROY_NESTED_LOCK(lck); 2633 2634 if ((__kmp_user_lock_kind == lk_tas) && 2635 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2636 OMP_NEST_LOCK_T_SIZE)) { 2637 ; 2638 } 2639 #if KMP_USE_FUTEX 2640 else if ((__kmp_user_lock_kind == lk_futex) && 2641 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2642 OMP_NEST_LOCK_T_SIZE)) { 2643 ; 2644 } 2645 #endif 2646 else { 2647 __kmp_user_lock_free(user_lock, gtid, lck); 2648 } 2649 #endif // KMP_USE_DYNAMIC_LOCK 2650 } // __kmpc_destroy_nest_lock 2651 2652 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2653 KMP_COUNT_BLOCK(OMP_set_lock); 2654 #if KMP_USE_DYNAMIC_LOCK 2655 int tag = KMP_EXTRACT_D_TAG(user_lock); 2656 #if USE_ITT_BUILD 2657 __kmp_itt_lock_acquiring( 2658 (kmp_user_lock_p) 2659 user_lock); // itt function will get to the right lock object. 2660 #endif 2661 #if OMPT_SUPPORT && OMPT_OPTIONAL 2662 // This is the case, if called from omp_init_lock_with_hint: 2663 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2664 if (!codeptr) 2665 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2666 if (ompt_enabled.ompt_callback_mutex_acquire) { 2667 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2668 ompt_mutex_lock, omp_lock_hint_none, 2669 __ompt_get_mutex_impl_type(user_lock), 2670 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2671 } 2672 #endif 2673 #if KMP_USE_INLINED_TAS 2674 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2675 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2676 } else 2677 #elif KMP_USE_INLINED_FUTEX 2678 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2679 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2680 } else 2681 #endif 2682 { 2683 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2684 } 2685 #if USE_ITT_BUILD 2686 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2687 #endif 2688 #if OMPT_SUPPORT && OMPT_OPTIONAL 2689 if (ompt_enabled.ompt_callback_mutex_acquired) { 2690 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2691 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2692 } 2693 #endif 2694 2695 #else // KMP_USE_DYNAMIC_LOCK 2696 2697 kmp_user_lock_p lck; 2698 2699 if ((__kmp_user_lock_kind == lk_tas) && 2700 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2701 lck = (kmp_user_lock_p)user_lock; 2702 } 2703 #if KMP_USE_FUTEX 2704 else if ((__kmp_user_lock_kind == lk_futex) && 2705 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2706 lck = (kmp_user_lock_p)user_lock; 2707 } 2708 #endif 2709 else { 2710 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2711 } 2712 2713 #if USE_ITT_BUILD 2714 __kmp_itt_lock_acquiring(lck); 2715 #endif /* USE_ITT_BUILD */ 2716 #if OMPT_SUPPORT && OMPT_OPTIONAL 2717 // This is the case, if called from omp_init_lock_with_hint: 2718 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2719 if (!codeptr) 2720 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2721 if (ompt_enabled.ompt_callback_mutex_acquire) { 2722 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2723 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2724 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2725 } 2726 #endif 2727 2728 ACQUIRE_LOCK(lck, gtid); 2729 2730 #if USE_ITT_BUILD 2731 __kmp_itt_lock_acquired(lck); 2732 #endif /* USE_ITT_BUILD */ 2733 2734 #if OMPT_SUPPORT && OMPT_OPTIONAL 2735 if (ompt_enabled.ompt_callback_mutex_acquired) { 2736 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2737 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2738 } 2739 #endif 2740 2741 #endif // KMP_USE_DYNAMIC_LOCK 2742 } 2743 2744 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2745 #if KMP_USE_DYNAMIC_LOCK 2746 2747 #if USE_ITT_BUILD 2748 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2749 #endif 2750 #if OMPT_SUPPORT && OMPT_OPTIONAL 2751 // This is the case, if called from omp_init_lock_with_hint: 2752 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2753 if (!codeptr) 2754 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2755 if (ompt_enabled.enabled) { 2756 if (ompt_enabled.ompt_callback_mutex_acquire) { 2757 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2758 ompt_mutex_nest_lock, omp_lock_hint_none, 2759 __ompt_get_mutex_impl_type(user_lock), 2760 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2761 } 2762 } 2763 #endif 2764 int acquire_status = 2765 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2766 (void)acquire_status; 2767 #if USE_ITT_BUILD 2768 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2769 #endif 2770 2771 #if OMPT_SUPPORT && OMPT_OPTIONAL 2772 if (ompt_enabled.enabled) { 2773 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2774 if (ompt_enabled.ompt_callback_mutex_acquired) { 2775 // lock_first 2776 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2777 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2778 codeptr); 2779 } 2780 } else { 2781 if (ompt_enabled.ompt_callback_nest_lock) { 2782 // lock_next 2783 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2784 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2785 } 2786 } 2787 } 2788 #endif 2789 2790 #else // KMP_USE_DYNAMIC_LOCK 2791 int acquire_status; 2792 kmp_user_lock_p lck; 2793 2794 if ((__kmp_user_lock_kind == lk_tas) && 2795 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2796 OMP_NEST_LOCK_T_SIZE)) { 2797 lck = (kmp_user_lock_p)user_lock; 2798 } 2799 #if KMP_USE_FUTEX 2800 else if ((__kmp_user_lock_kind == lk_futex) && 2801 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2802 OMP_NEST_LOCK_T_SIZE)) { 2803 lck = (kmp_user_lock_p)user_lock; 2804 } 2805 #endif 2806 else { 2807 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2808 } 2809 2810 #if USE_ITT_BUILD 2811 __kmp_itt_lock_acquiring(lck); 2812 #endif /* USE_ITT_BUILD */ 2813 #if OMPT_SUPPORT && OMPT_OPTIONAL 2814 // This is the case, if called from omp_init_lock_with_hint: 2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2816 if (!codeptr) 2817 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2818 if (ompt_enabled.enabled) { 2819 if (ompt_enabled.ompt_callback_mutex_acquire) { 2820 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2821 ompt_mutex_nest_lock, omp_lock_hint_none, 2822 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2823 codeptr); 2824 } 2825 } 2826 #endif 2827 2828 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2829 2830 #if USE_ITT_BUILD 2831 __kmp_itt_lock_acquired(lck); 2832 #endif /* USE_ITT_BUILD */ 2833 2834 #if OMPT_SUPPORT && OMPT_OPTIONAL 2835 if (ompt_enabled.enabled) { 2836 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2837 if (ompt_enabled.ompt_callback_mutex_acquired) { 2838 // lock_first 2839 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2840 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2841 } 2842 } else { 2843 if (ompt_enabled.ompt_callback_nest_lock) { 2844 // lock_next 2845 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2846 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2847 } 2848 } 2849 } 2850 #endif 2851 2852 #endif // KMP_USE_DYNAMIC_LOCK 2853 } 2854 2855 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2856 #if KMP_USE_DYNAMIC_LOCK 2857 2858 int tag = KMP_EXTRACT_D_TAG(user_lock); 2859 #if USE_ITT_BUILD 2860 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2861 #endif 2862 #if KMP_USE_INLINED_TAS 2863 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2864 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2865 } else 2866 #elif KMP_USE_INLINED_FUTEX 2867 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2868 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2869 } else 2870 #endif 2871 { 2872 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2873 } 2874 2875 #if OMPT_SUPPORT && OMPT_OPTIONAL 2876 // This is the case, if called from omp_init_lock_with_hint: 2877 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2878 if (!codeptr) 2879 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2880 if (ompt_enabled.ompt_callback_mutex_released) { 2881 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2882 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2883 } 2884 #endif 2885 2886 #else // KMP_USE_DYNAMIC_LOCK 2887 2888 kmp_user_lock_p lck; 2889 2890 /* Can't use serial interval since not block structured */ 2891 /* release the lock */ 2892 2893 if ((__kmp_user_lock_kind == lk_tas) && 2894 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2895 #if KMP_OS_LINUX && \ 2896 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2897 // "fast" path implemented to fix customer performance issue 2898 #if USE_ITT_BUILD 2899 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2900 #endif /* USE_ITT_BUILD */ 2901 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2902 KMP_MB(); 2903 2904 #if OMPT_SUPPORT && OMPT_OPTIONAL 2905 // This is the case, if called from omp_init_lock_with_hint: 2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2907 if (!codeptr) 2908 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2909 if (ompt_enabled.ompt_callback_mutex_released) { 2910 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2911 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2912 } 2913 #endif 2914 2915 return; 2916 #else 2917 lck = (kmp_user_lock_p)user_lock; 2918 #endif 2919 } 2920 #if KMP_USE_FUTEX 2921 else if ((__kmp_user_lock_kind == lk_futex) && 2922 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2923 lck = (kmp_user_lock_p)user_lock; 2924 } 2925 #endif 2926 else { 2927 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2928 } 2929 2930 #if USE_ITT_BUILD 2931 __kmp_itt_lock_releasing(lck); 2932 #endif /* USE_ITT_BUILD */ 2933 2934 RELEASE_LOCK(lck, gtid); 2935 2936 #if OMPT_SUPPORT && OMPT_OPTIONAL 2937 // This is the case, if called from omp_init_lock_with_hint: 2938 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2939 if (!codeptr) 2940 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2941 if (ompt_enabled.ompt_callback_mutex_released) { 2942 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2943 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2944 } 2945 #endif 2946 2947 #endif // KMP_USE_DYNAMIC_LOCK 2948 } 2949 2950 /* release the lock */ 2951 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2952 #if KMP_USE_DYNAMIC_LOCK 2953 2954 #if USE_ITT_BUILD 2955 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2956 #endif 2957 int release_status = 2958 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2959 (void)release_status; 2960 2961 #if OMPT_SUPPORT && OMPT_OPTIONAL 2962 // This is the case, if called from omp_init_lock_with_hint: 2963 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2964 if (!codeptr) 2965 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2966 if (ompt_enabled.enabled) { 2967 if (release_status == KMP_LOCK_RELEASED) { 2968 if (ompt_enabled.ompt_callback_mutex_released) { 2969 // release_lock_last 2970 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2971 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2972 codeptr); 2973 } 2974 } else if (ompt_enabled.ompt_callback_nest_lock) { 2975 // release_lock_prev 2976 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2977 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2978 } 2979 } 2980 #endif 2981 2982 #else // KMP_USE_DYNAMIC_LOCK 2983 2984 kmp_user_lock_p lck; 2985 2986 /* Can't use serial interval since not block structured */ 2987 2988 if ((__kmp_user_lock_kind == lk_tas) && 2989 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2990 OMP_NEST_LOCK_T_SIZE)) { 2991 #if KMP_OS_LINUX && \ 2992 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2993 // "fast" path implemented to fix customer performance issue 2994 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2995 #if USE_ITT_BUILD 2996 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2997 #endif /* USE_ITT_BUILD */ 2998 2999 #if OMPT_SUPPORT && OMPT_OPTIONAL 3000 int release_status = KMP_LOCK_STILL_HELD; 3001 #endif 3002 3003 if (--(tl->lk.depth_locked) == 0) { 3004 TCW_4(tl->lk.poll, 0); 3005 #if OMPT_SUPPORT && OMPT_OPTIONAL 3006 release_status = KMP_LOCK_RELEASED; 3007 #endif 3008 } 3009 KMP_MB(); 3010 3011 #if OMPT_SUPPORT && OMPT_OPTIONAL 3012 // This is the case, if called from omp_init_lock_with_hint: 3013 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3014 if (!codeptr) 3015 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3016 if (ompt_enabled.enabled) { 3017 if (release_status == KMP_LOCK_RELEASED) { 3018 if (ompt_enabled.ompt_callback_mutex_released) { 3019 // release_lock_last 3020 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3021 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3022 } 3023 } else if (ompt_enabled.ompt_callback_nest_lock) { 3024 // release_lock_previous 3025 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3026 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3027 } 3028 } 3029 #endif 3030 3031 return; 3032 #else 3033 lck = (kmp_user_lock_p)user_lock; 3034 #endif 3035 } 3036 #if KMP_USE_FUTEX 3037 else if ((__kmp_user_lock_kind == lk_futex) && 3038 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3039 OMP_NEST_LOCK_T_SIZE)) { 3040 lck = (kmp_user_lock_p)user_lock; 3041 } 3042 #endif 3043 else { 3044 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3045 } 3046 3047 #if USE_ITT_BUILD 3048 __kmp_itt_lock_releasing(lck); 3049 #endif /* USE_ITT_BUILD */ 3050 3051 int release_status; 3052 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3053 #if OMPT_SUPPORT && OMPT_OPTIONAL 3054 // This is the case, if called from omp_init_lock_with_hint: 3055 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3056 if (!codeptr) 3057 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3058 if (ompt_enabled.enabled) { 3059 if (release_status == KMP_LOCK_RELEASED) { 3060 if (ompt_enabled.ompt_callback_mutex_released) { 3061 // release_lock_last 3062 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3063 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3064 } 3065 } else if (ompt_enabled.ompt_callback_nest_lock) { 3066 // release_lock_previous 3067 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3068 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3069 } 3070 } 3071 #endif 3072 3073 #endif // KMP_USE_DYNAMIC_LOCK 3074 } 3075 3076 /* try to acquire the lock */ 3077 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3078 KMP_COUNT_BLOCK(OMP_test_lock); 3079 3080 #if KMP_USE_DYNAMIC_LOCK 3081 int rc; 3082 int tag = KMP_EXTRACT_D_TAG(user_lock); 3083 #if USE_ITT_BUILD 3084 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3085 #endif 3086 #if OMPT_SUPPORT && OMPT_OPTIONAL 3087 // This is the case, if called from omp_init_lock_with_hint: 3088 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3089 if (!codeptr) 3090 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3091 if (ompt_enabled.ompt_callback_mutex_acquire) { 3092 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3093 ompt_mutex_lock, omp_lock_hint_none, 3094 __ompt_get_mutex_impl_type(user_lock), 3095 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3096 } 3097 #endif 3098 #if KMP_USE_INLINED_TAS 3099 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3100 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3101 } else 3102 #elif KMP_USE_INLINED_FUTEX 3103 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3104 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3105 } else 3106 #endif 3107 { 3108 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3109 } 3110 if (rc) { 3111 #if USE_ITT_BUILD 3112 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3113 #endif 3114 #if OMPT_SUPPORT && OMPT_OPTIONAL 3115 if (ompt_enabled.ompt_callback_mutex_acquired) { 3116 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3117 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3118 } 3119 #endif 3120 return FTN_TRUE; 3121 } else { 3122 #if USE_ITT_BUILD 3123 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3124 #endif 3125 return FTN_FALSE; 3126 } 3127 3128 #else // KMP_USE_DYNAMIC_LOCK 3129 3130 kmp_user_lock_p lck; 3131 int rc; 3132 3133 if ((__kmp_user_lock_kind == lk_tas) && 3134 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3135 lck = (kmp_user_lock_p)user_lock; 3136 } 3137 #if KMP_USE_FUTEX 3138 else if ((__kmp_user_lock_kind == lk_futex) && 3139 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3140 lck = (kmp_user_lock_p)user_lock; 3141 } 3142 #endif 3143 else { 3144 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3145 } 3146 3147 #if USE_ITT_BUILD 3148 __kmp_itt_lock_acquiring(lck); 3149 #endif /* USE_ITT_BUILD */ 3150 #if OMPT_SUPPORT && OMPT_OPTIONAL 3151 // This is the case, if called from omp_init_lock_with_hint: 3152 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3153 if (!codeptr) 3154 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3155 if (ompt_enabled.ompt_callback_mutex_acquire) { 3156 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3157 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3158 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3159 } 3160 #endif 3161 3162 rc = TEST_LOCK(lck, gtid); 3163 #if USE_ITT_BUILD 3164 if (rc) { 3165 __kmp_itt_lock_acquired(lck); 3166 } else { 3167 __kmp_itt_lock_cancelled(lck); 3168 } 3169 #endif /* USE_ITT_BUILD */ 3170 #if OMPT_SUPPORT && OMPT_OPTIONAL 3171 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3172 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3173 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3174 } 3175 #endif 3176 3177 return (rc ? FTN_TRUE : FTN_FALSE); 3178 3179 /* Can't use serial interval since not block structured */ 3180 3181 #endif // KMP_USE_DYNAMIC_LOCK 3182 } 3183 3184 /* try to acquire the lock */ 3185 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3186 #if KMP_USE_DYNAMIC_LOCK 3187 int rc; 3188 #if USE_ITT_BUILD 3189 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3190 #endif 3191 #if OMPT_SUPPORT && OMPT_OPTIONAL 3192 // This is the case, if called from omp_init_lock_with_hint: 3193 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3194 if (!codeptr) 3195 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3196 if (ompt_enabled.ompt_callback_mutex_acquire) { 3197 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3198 ompt_mutex_nest_lock, omp_lock_hint_none, 3199 __ompt_get_mutex_impl_type(user_lock), 3200 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3201 } 3202 #endif 3203 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3204 #if USE_ITT_BUILD 3205 if (rc) { 3206 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3207 } else { 3208 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3209 } 3210 #endif 3211 #if OMPT_SUPPORT && OMPT_OPTIONAL 3212 if (ompt_enabled.enabled && rc) { 3213 if (rc == 1) { 3214 if (ompt_enabled.ompt_callback_mutex_acquired) { 3215 // lock_first 3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3217 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3218 codeptr); 3219 } 3220 } else { 3221 if (ompt_enabled.ompt_callback_nest_lock) { 3222 // lock_next 3223 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3224 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3225 } 3226 } 3227 } 3228 #endif 3229 return rc; 3230 3231 #else // KMP_USE_DYNAMIC_LOCK 3232 3233 kmp_user_lock_p lck; 3234 int rc; 3235 3236 if ((__kmp_user_lock_kind == lk_tas) && 3237 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3238 OMP_NEST_LOCK_T_SIZE)) { 3239 lck = (kmp_user_lock_p)user_lock; 3240 } 3241 #if KMP_USE_FUTEX 3242 else if ((__kmp_user_lock_kind == lk_futex) && 3243 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3244 OMP_NEST_LOCK_T_SIZE)) { 3245 lck = (kmp_user_lock_p)user_lock; 3246 } 3247 #endif 3248 else { 3249 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3250 } 3251 3252 #if USE_ITT_BUILD 3253 __kmp_itt_lock_acquiring(lck); 3254 #endif /* USE_ITT_BUILD */ 3255 3256 #if OMPT_SUPPORT && OMPT_OPTIONAL 3257 // This is the case, if called from omp_init_lock_with_hint: 3258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3259 if (!codeptr) 3260 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3261 if (ompt_enabled.enabled) && 3262 ompt_enabled.ompt_callback_mutex_acquire) { 3263 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3264 ompt_mutex_nest_lock, omp_lock_hint_none, 3265 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3266 codeptr); 3267 } 3268 #endif 3269 3270 rc = TEST_NESTED_LOCK(lck, gtid); 3271 #if USE_ITT_BUILD 3272 if (rc) { 3273 __kmp_itt_lock_acquired(lck); 3274 } else { 3275 __kmp_itt_lock_cancelled(lck); 3276 } 3277 #endif /* USE_ITT_BUILD */ 3278 #if OMPT_SUPPORT && OMPT_OPTIONAL 3279 if (ompt_enabled.enabled && rc) { 3280 if (rc == 1) { 3281 if (ompt_enabled.ompt_callback_mutex_acquired) { 3282 // lock_first 3283 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3284 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3285 } 3286 } else { 3287 if (ompt_enabled.ompt_callback_nest_lock) { 3288 // lock_next 3289 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3290 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3291 } 3292 } 3293 } 3294 #endif 3295 return rc; 3296 3297 /* Can't use serial interval since not block structured */ 3298 3299 #endif // KMP_USE_DYNAMIC_LOCK 3300 } 3301 3302 // Interface to fast scalable reduce methods routines 3303 3304 // keep the selected method in a thread local structure for cross-function 3305 // usage: will be used in __kmpc_end_reduce* functions; 3306 // another solution: to re-determine the method one more time in 3307 // __kmpc_end_reduce* functions (new prototype required then) 3308 // AT: which solution is better? 3309 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3310 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3311 3312 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3313 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3314 3315 // description of the packed_reduction_method variable: look at the macros in 3316 // kmp.h 3317 3318 // used in a critical section reduce block 3319 static __forceinline void 3320 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3321 kmp_critical_name *crit) { 3322 3323 // this lock was visible to a customer and to the threading profile tool as a 3324 // serial overhead span (although it's used for an internal purpose only) 3325 // why was it visible in previous implementation? 3326 // should we keep it visible in new reduce block? 3327 kmp_user_lock_p lck; 3328 3329 #if KMP_USE_DYNAMIC_LOCK 3330 3331 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3332 // Check if it is initialized. 3333 if (*lk == 0) { 3334 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3335 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3336 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3337 } else { 3338 __kmp_init_indirect_csptr(crit, loc, global_tid, 3339 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3340 } 3341 } 3342 // Branch for accessing the actual lock object and set operation. This 3343 // branching is inevitable since this lock initialization does not follow the 3344 // normal dispatch path (lock table is not used). 3345 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3346 lck = (kmp_user_lock_p)lk; 3347 KMP_DEBUG_ASSERT(lck != NULL); 3348 if (__kmp_env_consistency_check) { 3349 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3350 } 3351 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3352 } else { 3353 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3354 lck = ilk->lock; 3355 KMP_DEBUG_ASSERT(lck != NULL); 3356 if (__kmp_env_consistency_check) { 3357 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3358 } 3359 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3360 } 3361 3362 #else // KMP_USE_DYNAMIC_LOCK 3363 3364 // We know that the fast reduction code is only emitted by Intel compilers 3365 // with 32 byte critical sections. If there isn't enough space, then we 3366 // have to use a pointer. 3367 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3368 lck = (kmp_user_lock_p)crit; 3369 } else { 3370 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3371 } 3372 KMP_DEBUG_ASSERT(lck != NULL); 3373 3374 if (__kmp_env_consistency_check) 3375 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3376 3377 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3378 3379 #endif // KMP_USE_DYNAMIC_LOCK 3380 } 3381 3382 // used in a critical section reduce block 3383 static __forceinline void 3384 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3385 kmp_critical_name *crit) { 3386 3387 kmp_user_lock_p lck; 3388 3389 #if KMP_USE_DYNAMIC_LOCK 3390 3391 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3392 lck = (kmp_user_lock_p)crit; 3393 if (__kmp_env_consistency_check) 3394 __kmp_pop_sync(global_tid, ct_critical, loc); 3395 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3396 } else { 3397 kmp_indirect_lock_t *ilk = 3398 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3399 if (__kmp_env_consistency_check) 3400 __kmp_pop_sync(global_tid, ct_critical, loc); 3401 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3402 } 3403 3404 #else // KMP_USE_DYNAMIC_LOCK 3405 3406 // We know that the fast reduction code is only emitted by Intel compilers 3407 // with 32 byte critical sections. If there isn't enough space, then we have 3408 // to use a pointer. 3409 if (__kmp_base_user_lock_size > 32) { 3410 lck = *((kmp_user_lock_p *)crit); 3411 KMP_ASSERT(lck != NULL); 3412 } else { 3413 lck = (kmp_user_lock_p)crit; 3414 } 3415 3416 if (__kmp_env_consistency_check) 3417 __kmp_pop_sync(global_tid, ct_critical, loc); 3418 3419 __kmp_release_user_lock_with_checks(lck, global_tid); 3420 3421 #endif // KMP_USE_DYNAMIC_LOCK 3422 } // __kmp_end_critical_section_reduce_block 3423 3424 static __forceinline int 3425 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3426 int *task_state) { 3427 kmp_team_t *team; 3428 3429 // Check if we are inside the teams construct? 3430 if (th->th.th_teams_microtask) { 3431 *team_p = team = th->th.th_team; 3432 if (team->t.t_level == th->th.th_teams_level) { 3433 // This is reduction at teams construct. 3434 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3435 // Let's swap teams temporarily for the reduction. 3436 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3437 th->th.th_team = team->t.t_parent; 3438 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3439 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3440 *task_state = th->th.th_task_state; 3441 th->th.th_task_state = 0; 3442 3443 return 1; 3444 } 3445 } 3446 return 0; 3447 } 3448 3449 static __forceinline void 3450 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3451 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3452 th->th.th_info.ds.ds_tid = 0; 3453 th->th.th_team = team; 3454 th->th.th_team_nproc = team->t.t_nproc; 3455 th->th.th_task_team = team->t.t_task_team[task_state]; 3456 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3457 } 3458 3459 /* 2.a.i. Reduce Block without a terminating barrier */ 3460 /*! 3461 @ingroup SYNCHRONIZATION 3462 @param loc source location information 3463 @param global_tid global thread number 3464 @param num_vars number of items (variables) to be reduced 3465 @param reduce_size size of data in bytes to be reduced 3466 @param reduce_data pointer to data to be reduced 3467 @param reduce_func callback function providing reduction operation on two 3468 operands and returning result of reduction in lhs_data 3469 @param lck pointer to the unique lock data structure 3470 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3471 threads if atomic reduction needed 3472 3473 The nowait version is used for a reduce clause with the nowait argument. 3474 */ 3475 kmp_int32 3476 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3477 size_t reduce_size, void *reduce_data, 3478 void (*reduce_func)(void *lhs_data, void *rhs_data), 3479 kmp_critical_name *lck) { 3480 3481 KMP_COUNT_BLOCK(REDUCE_nowait); 3482 int retval = 0; 3483 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3484 kmp_info_t *th; 3485 kmp_team_t *team; 3486 int teams_swapped = 0, task_state; 3487 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3488 __kmp_assert_valid_gtid(global_tid); 3489 3490 // why do we need this initialization here at all? 3491 // Reduction clause can not be used as a stand-alone directive. 3492 3493 // do not call __kmp_serial_initialize(), it will be called by 3494 // __kmp_parallel_initialize() if needed 3495 // possible detection of false-positive race by the threadchecker ??? 3496 if (!TCR_4(__kmp_init_parallel)) 3497 __kmp_parallel_initialize(); 3498 3499 __kmp_resume_if_soft_paused(); 3500 3501 // check correctness of reduce block nesting 3502 #if KMP_USE_DYNAMIC_LOCK 3503 if (__kmp_env_consistency_check) 3504 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3505 #else 3506 if (__kmp_env_consistency_check) 3507 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3508 #endif 3509 3510 th = __kmp_thread_from_gtid(global_tid); 3511 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3512 3513 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3514 // the value should be kept in a variable 3515 // the variable should be either a construct-specific or thread-specific 3516 // property, not a team specific property 3517 // (a thread can reach the next reduce block on the next construct, reduce 3518 // method may differ on the next construct) 3519 // an ident_t "loc" parameter could be used as a construct-specific property 3520 // (what if loc == 0?) 3521 // (if both construct-specific and team-specific variables were shared, 3522 // then unness extra syncs should be needed) 3523 // a thread-specific variable is better regarding two issues above (next 3524 // construct and extra syncs) 3525 // a thread-specific "th_local.reduction_method" variable is used currently 3526 // each thread executes 'determine' and 'set' lines (no need to execute by one 3527 // thread, to avoid unness extra syncs) 3528 3529 packed_reduction_method = __kmp_determine_reduction_method( 3530 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3531 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3532 3533 OMPT_REDUCTION_DECL(th, global_tid); 3534 if (packed_reduction_method == critical_reduce_block) { 3535 3536 OMPT_REDUCTION_BEGIN; 3537 3538 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3539 retval = 1; 3540 3541 } else if (packed_reduction_method == empty_reduce_block) { 3542 3543 OMPT_REDUCTION_BEGIN; 3544 3545 // usage: if team size == 1, no synchronization is required ( Intel 3546 // platforms only ) 3547 retval = 1; 3548 3549 } else if (packed_reduction_method == atomic_reduce_block) { 3550 3551 retval = 2; 3552 3553 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3554 // won't be called by the code gen) 3555 // (it's not quite good, because the checking block has been closed by 3556 // this 'pop', 3557 // but atomic operation has not been executed yet, will be executed 3558 // slightly later, literally on next instruction) 3559 if (__kmp_env_consistency_check) 3560 __kmp_pop_sync(global_tid, ct_reduce, loc); 3561 3562 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3563 tree_reduce_block)) { 3564 3565 // AT: performance issue: a real barrier here 3566 // AT: (if primary thread is slow, other threads are blocked here waiting for 3567 // the primary thread to come and release them) 3568 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3569 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3570 // be confusing to a customer) 3571 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3572 // might go faster and be more in line with sense of NOWAIT 3573 // AT: TO DO: do epcc test and compare times 3574 3575 // this barrier should be invisible to a customer and to the threading profile 3576 // tool (it's neither a terminating barrier nor customer's code, it's 3577 // used for an internal purpose) 3578 #if OMPT_SUPPORT 3579 // JP: can this barrier potentially leed to task scheduling? 3580 // JP: as long as there is a barrier in the implementation, OMPT should and 3581 // will provide the barrier events 3582 // so we set-up the necessary frame/return addresses. 3583 ompt_frame_t *ompt_frame; 3584 if (ompt_enabled.enabled) { 3585 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3586 if (ompt_frame->enter_frame.ptr == NULL) 3587 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3588 } 3589 OMPT_STORE_RETURN_ADDRESS(global_tid); 3590 #endif 3591 #if USE_ITT_NOTIFY 3592 __kmp_threads[global_tid]->th.th_ident = loc; 3593 #endif 3594 retval = 3595 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3596 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3597 retval = (retval != 0) ? (0) : (1); 3598 #if OMPT_SUPPORT && OMPT_OPTIONAL 3599 if (ompt_enabled.enabled) { 3600 ompt_frame->enter_frame = ompt_data_none; 3601 } 3602 #endif 3603 3604 // all other workers except primary thread should do this pop here 3605 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3606 if (__kmp_env_consistency_check) { 3607 if (retval == 0) { 3608 __kmp_pop_sync(global_tid, ct_reduce, loc); 3609 } 3610 } 3611 3612 } else { 3613 3614 // should never reach this block 3615 KMP_ASSERT(0); // "unexpected method" 3616 } 3617 if (teams_swapped) { 3618 __kmp_restore_swapped_teams(th, team, task_state); 3619 } 3620 KA_TRACE( 3621 10, 3622 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3623 global_tid, packed_reduction_method, retval)); 3624 3625 return retval; 3626 } 3627 3628 /*! 3629 @ingroup SYNCHRONIZATION 3630 @param loc source location information 3631 @param global_tid global thread id. 3632 @param lck pointer to the unique lock data structure 3633 3634 Finish the execution of a reduce nowait. 3635 */ 3636 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3637 kmp_critical_name *lck) { 3638 3639 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3640 3641 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3642 __kmp_assert_valid_gtid(global_tid); 3643 3644 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3645 3646 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3647 3648 if (packed_reduction_method == critical_reduce_block) { 3649 3650 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3651 OMPT_REDUCTION_END; 3652 3653 } else if (packed_reduction_method == empty_reduce_block) { 3654 3655 // usage: if team size == 1, no synchronization is required ( on Intel 3656 // platforms only ) 3657 3658 OMPT_REDUCTION_END; 3659 3660 } else if (packed_reduction_method == atomic_reduce_block) { 3661 3662 // neither primary thread nor other workers should get here 3663 // (code gen does not generate this call in case 2: atomic reduce block) 3664 // actually it's better to remove this elseif at all; 3665 // after removal this value will checked by the 'else' and will assert 3666 3667 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3668 tree_reduce_block)) { 3669 3670 // only primary thread gets here 3671 // OMPT: tree reduction is annotated in the barrier code 3672 3673 } else { 3674 3675 // should never reach this block 3676 KMP_ASSERT(0); // "unexpected method" 3677 } 3678 3679 if (__kmp_env_consistency_check) 3680 __kmp_pop_sync(global_tid, ct_reduce, loc); 3681 3682 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3683 global_tid, packed_reduction_method)); 3684 3685 return; 3686 } 3687 3688 /* 2.a.ii. Reduce Block with a terminating barrier */ 3689 3690 /*! 3691 @ingroup SYNCHRONIZATION 3692 @param loc source location information 3693 @param global_tid global thread number 3694 @param num_vars number of items (variables) to be reduced 3695 @param reduce_size size of data in bytes to be reduced 3696 @param reduce_data pointer to data to be reduced 3697 @param reduce_func callback function providing reduction operation on two 3698 operands and returning result of reduction in lhs_data 3699 @param lck pointer to the unique lock data structure 3700 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3701 threads if atomic reduction needed 3702 3703 A blocking reduce that includes an implicit barrier. 3704 */ 3705 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3706 size_t reduce_size, void *reduce_data, 3707 void (*reduce_func)(void *lhs_data, void *rhs_data), 3708 kmp_critical_name *lck) { 3709 KMP_COUNT_BLOCK(REDUCE_wait); 3710 int retval = 0; 3711 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3712 kmp_info_t *th; 3713 kmp_team_t *team; 3714 int teams_swapped = 0, task_state; 3715 3716 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3717 __kmp_assert_valid_gtid(global_tid); 3718 3719 // why do we need this initialization here at all? 3720 // Reduction clause can not be a stand-alone directive. 3721 3722 // do not call __kmp_serial_initialize(), it will be called by 3723 // __kmp_parallel_initialize() if needed 3724 // possible detection of false-positive race by the threadchecker ??? 3725 if (!TCR_4(__kmp_init_parallel)) 3726 __kmp_parallel_initialize(); 3727 3728 __kmp_resume_if_soft_paused(); 3729 3730 // check correctness of reduce block nesting 3731 #if KMP_USE_DYNAMIC_LOCK 3732 if (__kmp_env_consistency_check) 3733 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3734 #else 3735 if (__kmp_env_consistency_check) 3736 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3737 #endif 3738 3739 th = __kmp_thread_from_gtid(global_tid); 3740 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3741 3742 packed_reduction_method = __kmp_determine_reduction_method( 3743 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3744 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3745 3746 OMPT_REDUCTION_DECL(th, global_tid); 3747 3748 if (packed_reduction_method == critical_reduce_block) { 3749 3750 OMPT_REDUCTION_BEGIN; 3751 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3752 retval = 1; 3753 3754 } else if (packed_reduction_method == empty_reduce_block) { 3755 3756 OMPT_REDUCTION_BEGIN; 3757 // usage: if team size == 1, no synchronization is required ( Intel 3758 // platforms only ) 3759 retval = 1; 3760 3761 } else if (packed_reduction_method == atomic_reduce_block) { 3762 3763 retval = 2; 3764 3765 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3766 tree_reduce_block)) { 3767 3768 // case tree_reduce_block: 3769 // this barrier should be visible to a customer and to the threading profile 3770 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3771 #if OMPT_SUPPORT 3772 ompt_frame_t *ompt_frame; 3773 if (ompt_enabled.enabled) { 3774 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3775 if (ompt_frame->enter_frame.ptr == NULL) 3776 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3777 } 3778 OMPT_STORE_RETURN_ADDRESS(global_tid); 3779 #endif 3780 #if USE_ITT_NOTIFY 3781 __kmp_threads[global_tid]->th.th_ident = 3782 loc; // needed for correct notification of frames 3783 #endif 3784 retval = 3785 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3786 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3787 retval = (retval != 0) ? (0) : (1); 3788 #if OMPT_SUPPORT && OMPT_OPTIONAL 3789 if (ompt_enabled.enabled) { 3790 ompt_frame->enter_frame = ompt_data_none; 3791 } 3792 #endif 3793 3794 // all other workers except primary thread should do this pop here 3795 // (none of other workers except primary will enter __kmpc_end_reduce()) 3796 if (__kmp_env_consistency_check) { 3797 if (retval == 0) { // 0: all other workers; 1: primary thread 3798 __kmp_pop_sync(global_tid, ct_reduce, loc); 3799 } 3800 } 3801 3802 } else { 3803 3804 // should never reach this block 3805 KMP_ASSERT(0); // "unexpected method" 3806 } 3807 if (teams_swapped) { 3808 __kmp_restore_swapped_teams(th, team, task_state); 3809 } 3810 3811 KA_TRACE(10, 3812 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3813 global_tid, packed_reduction_method, retval)); 3814 return retval; 3815 } 3816 3817 /*! 3818 @ingroup SYNCHRONIZATION 3819 @param loc source location information 3820 @param global_tid global thread id. 3821 @param lck pointer to the unique lock data structure 3822 3823 Finish the execution of a blocking reduce. 3824 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3825 start function. 3826 */ 3827 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3828 kmp_critical_name *lck) { 3829 3830 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3831 kmp_info_t *th; 3832 kmp_team_t *team; 3833 int teams_swapped = 0, task_state; 3834 3835 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3836 __kmp_assert_valid_gtid(global_tid); 3837 3838 th = __kmp_thread_from_gtid(global_tid); 3839 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3840 3841 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3842 3843 // this barrier should be visible to a customer and to the threading profile 3844 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3845 OMPT_REDUCTION_DECL(th, global_tid); 3846 3847 if (packed_reduction_method == critical_reduce_block) { 3848 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3849 3850 OMPT_REDUCTION_END; 3851 3852 // TODO: implicit barrier: should be exposed 3853 #if OMPT_SUPPORT 3854 ompt_frame_t *ompt_frame; 3855 if (ompt_enabled.enabled) { 3856 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3857 if (ompt_frame->enter_frame.ptr == NULL) 3858 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3859 } 3860 OMPT_STORE_RETURN_ADDRESS(global_tid); 3861 #endif 3862 #if USE_ITT_NOTIFY 3863 __kmp_threads[global_tid]->th.th_ident = loc; 3864 #endif 3865 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3866 #if OMPT_SUPPORT && OMPT_OPTIONAL 3867 if (ompt_enabled.enabled) { 3868 ompt_frame->enter_frame = ompt_data_none; 3869 } 3870 #endif 3871 3872 } else if (packed_reduction_method == empty_reduce_block) { 3873 3874 OMPT_REDUCTION_END; 3875 3876 // usage: if team size==1, no synchronization is required (Intel platforms only) 3877 3878 // TODO: implicit barrier: should be exposed 3879 #if OMPT_SUPPORT 3880 ompt_frame_t *ompt_frame; 3881 if (ompt_enabled.enabled) { 3882 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3883 if (ompt_frame->enter_frame.ptr == NULL) 3884 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3885 } 3886 OMPT_STORE_RETURN_ADDRESS(global_tid); 3887 #endif 3888 #if USE_ITT_NOTIFY 3889 __kmp_threads[global_tid]->th.th_ident = loc; 3890 #endif 3891 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3892 #if OMPT_SUPPORT && OMPT_OPTIONAL 3893 if (ompt_enabled.enabled) { 3894 ompt_frame->enter_frame = ompt_data_none; 3895 } 3896 #endif 3897 3898 } else if (packed_reduction_method == atomic_reduce_block) { 3899 3900 #if OMPT_SUPPORT 3901 ompt_frame_t *ompt_frame; 3902 if (ompt_enabled.enabled) { 3903 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3904 if (ompt_frame->enter_frame.ptr == NULL) 3905 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3906 } 3907 OMPT_STORE_RETURN_ADDRESS(global_tid); 3908 #endif 3909 // TODO: implicit barrier: should be exposed 3910 #if USE_ITT_NOTIFY 3911 __kmp_threads[global_tid]->th.th_ident = loc; 3912 #endif 3913 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3914 #if OMPT_SUPPORT && OMPT_OPTIONAL 3915 if (ompt_enabled.enabled) { 3916 ompt_frame->enter_frame = ompt_data_none; 3917 } 3918 #endif 3919 3920 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3921 tree_reduce_block)) { 3922 3923 // only primary thread executes here (primary releases all other workers) 3924 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3925 global_tid); 3926 3927 } else { 3928 3929 // should never reach this block 3930 KMP_ASSERT(0); // "unexpected method" 3931 } 3932 if (teams_swapped) { 3933 __kmp_restore_swapped_teams(th, team, task_state); 3934 } 3935 3936 if (__kmp_env_consistency_check) 3937 __kmp_pop_sync(global_tid, ct_reduce, loc); 3938 3939 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3940 global_tid, packed_reduction_method)); 3941 3942 return; 3943 } 3944 3945 #undef __KMP_GET_REDUCTION_METHOD 3946 #undef __KMP_SET_REDUCTION_METHOD 3947 3948 /* end of interface to fast scalable reduce routines */ 3949 3950 kmp_uint64 __kmpc_get_taskid() { 3951 3952 kmp_int32 gtid; 3953 kmp_info_t *thread; 3954 3955 gtid = __kmp_get_gtid(); 3956 if (gtid < 0) { 3957 return 0; 3958 } 3959 thread = __kmp_thread_from_gtid(gtid); 3960 return thread->th.th_current_task->td_task_id; 3961 3962 } // __kmpc_get_taskid 3963 3964 kmp_uint64 __kmpc_get_parent_taskid() { 3965 3966 kmp_int32 gtid; 3967 kmp_info_t *thread; 3968 kmp_taskdata_t *parent_task; 3969 3970 gtid = __kmp_get_gtid(); 3971 if (gtid < 0) { 3972 return 0; 3973 } 3974 thread = __kmp_thread_from_gtid(gtid); 3975 parent_task = thread->th.th_current_task->td_parent; 3976 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3977 3978 } // __kmpc_get_parent_taskid 3979 3980 /*! 3981 @ingroup WORK_SHARING 3982 @param loc source location information. 3983 @param gtid global thread number. 3984 @param num_dims number of associated doacross loops. 3985 @param dims info on loops bounds. 3986 3987 Initialize doacross loop information. 3988 Expect compiler send us inclusive bounds, 3989 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3990 */ 3991 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3992 const struct kmp_dim *dims) { 3993 __kmp_assert_valid_gtid(gtid); 3994 int j, idx; 3995 kmp_int64 last, trace_count; 3996 kmp_info_t *th = __kmp_threads[gtid]; 3997 kmp_team_t *team = th->th.th_team; 3998 kmp_uint32 *flags; 3999 kmp_disp_t *pr_buf = th->th.th_dispatch; 4000 dispatch_shared_info_t *sh_buf; 4001 4002 KA_TRACE( 4003 20, 4004 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4005 gtid, num_dims, !team->t.t_serialized)); 4006 KMP_DEBUG_ASSERT(dims != NULL); 4007 KMP_DEBUG_ASSERT(num_dims > 0); 4008 4009 if (team->t.t_serialized) { 4010 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4011 return; // no dependencies if team is serialized 4012 } 4013 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4014 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4015 // the next loop 4016 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4017 4018 // Save bounds info into allocated private buffer 4019 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4020 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4021 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4022 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4023 pr_buf->th_doacross_info[0] = 4024 (kmp_int64)num_dims; // first element is number of dimensions 4025 // Save also address of num_done in order to access it later without knowing 4026 // the buffer index 4027 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4028 pr_buf->th_doacross_info[2] = dims[0].lo; 4029 pr_buf->th_doacross_info[3] = dims[0].up; 4030 pr_buf->th_doacross_info[4] = dims[0].st; 4031 last = 5; 4032 for (j = 1; j < num_dims; ++j) { 4033 kmp_int64 4034 range_length; // To keep ranges of all dimensions but the first dims[0] 4035 if (dims[j].st == 1) { // most common case 4036 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4037 range_length = dims[j].up - dims[j].lo + 1; 4038 } else { 4039 if (dims[j].st > 0) { 4040 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4041 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4042 } else { // negative increment 4043 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4044 range_length = 4045 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4046 } 4047 } 4048 pr_buf->th_doacross_info[last++] = range_length; 4049 pr_buf->th_doacross_info[last++] = dims[j].lo; 4050 pr_buf->th_doacross_info[last++] = dims[j].up; 4051 pr_buf->th_doacross_info[last++] = dims[j].st; 4052 } 4053 4054 // Compute total trip count. 4055 // Start with range of dims[0] which we don't need to keep in the buffer. 4056 if (dims[0].st == 1) { // most common case 4057 trace_count = dims[0].up - dims[0].lo + 1; 4058 } else if (dims[0].st > 0) { 4059 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4060 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4061 } else { // negative increment 4062 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4063 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4064 } 4065 for (j = 1; j < num_dims; ++j) { 4066 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4067 } 4068 KMP_DEBUG_ASSERT(trace_count > 0); 4069 4070 // Check if shared buffer is not occupied by other loop (idx - 4071 // __kmp_dispatch_num_buffers) 4072 if (idx != sh_buf->doacross_buf_idx) { 4073 // Shared buffer is occupied, wait for it to be free 4074 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4075 __kmp_eq_4, NULL); 4076 } 4077 #if KMP_32_BIT_ARCH 4078 // Check if we are the first thread. After the CAS the first thread gets 0, 4079 // others get 1 if initialization is in progress, allocated pointer otherwise. 4080 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4081 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4082 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4083 #else 4084 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4085 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4086 #endif 4087 if (flags == NULL) { 4088 // we are the first thread, allocate the array of flags 4089 size_t size = 4090 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4091 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4092 KMP_MB(); 4093 sh_buf->doacross_flags = flags; 4094 } else if (flags == (kmp_uint32 *)1) { 4095 #if KMP_32_BIT_ARCH 4096 // initialization is still in progress, need to wait 4097 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4098 #else 4099 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4100 #endif 4101 KMP_YIELD(TRUE); 4102 KMP_MB(); 4103 } else { 4104 KMP_MB(); 4105 } 4106 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4107 pr_buf->th_doacross_flags = 4108 sh_buf->doacross_flags; // save private copy in order to not 4109 // touch shared buffer on each iteration 4110 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4111 } 4112 4113 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4114 __kmp_assert_valid_gtid(gtid); 4115 kmp_int64 shft; 4116 size_t num_dims, i; 4117 kmp_uint32 flag; 4118 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4119 kmp_info_t *th = __kmp_threads[gtid]; 4120 kmp_team_t *team = th->th.th_team; 4121 kmp_disp_t *pr_buf; 4122 kmp_int64 lo, up, st; 4123 4124 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4125 if (team->t.t_serialized) { 4126 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4127 return; // no dependencies if team is serialized 4128 } 4129 4130 // calculate sequential iteration number and check out-of-bounds condition 4131 pr_buf = th->th.th_dispatch; 4132 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4133 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4134 lo = pr_buf->th_doacross_info[2]; 4135 up = pr_buf->th_doacross_info[3]; 4136 st = pr_buf->th_doacross_info[4]; 4137 #if OMPT_SUPPORT && OMPT_OPTIONAL 4138 ompt_dependence_t deps[num_dims]; 4139 #endif 4140 if (st == 1) { // most common case 4141 if (vec[0] < lo || vec[0] > up) { 4142 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4143 "bounds [%lld,%lld]\n", 4144 gtid, vec[0], lo, up)); 4145 return; 4146 } 4147 iter_number = vec[0] - lo; 4148 } else if (st > 0) { 4149 if (vec[0] < lo || vec[0] > up) { 4150 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4151 "bounds [%lld,%lld]\n", 4152 gtid, vec[0], lo, up)); 4153 return; 4154 } 4155 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4156 } else { // negative increment 4157 if (vec[0] > lo || vec[0] < up) { 4158 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4159 "bounds [%lld,%lld]\n", 4160 gtid, vec[0], lo, up)); 4161 return; 4162 } 4163 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4164 } 4165 #if OMPT_SUPPORT && OMPT_OPTIONAL 4166 deps[0].variable.value = iter_number; 4167 deps[0].dependence_type = ompt_dependence_type_sink; 4168 #endif 4169 for (i = 1; i < num_dims; ++i) { 4170 kmp_int64 iter, ln; 4171 size_t j = i * 4; 4172 ln = pr_buf->th_doacross_info[j + 1]; 4173 lo = pr_buf->th_doacross_info[j + 2]; 4174 up = pr_buf->th_doacross_info[j + 3]; 4175 st = pr_buf->th_doacross_info[j + 4]; 4176 if (st == 1) { 4177 if (vec[i] < lo || vec[i] > up) { 4178 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4179 "bounds [%lld,%lld]\n", 4180 gtid, vec[i], lo, up)); 4181 return; 4182 } 4183 iter = vec[i] - lo; 4184 } else if (st > 0) { 4185 if (vec[i] < lo || vec[i] > up) { 4186 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4187 "bounds [%lld,%lld]\n", 4188 gtid, vec[i], lo, up)); 4189 return; 4190 } 4191 iter = (kmp_uint64)(vec[i] - lo) / st; 4192 } else { // st < 0 4193 if (vec[i] > lo || vec[i] < up) { 4194 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4195 "bounds [%lld,%lld]\n", 4196 gtid, vec[i], lo, up)); 4197 return; 4198 } 4199 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4200 } 4201 iter_number = iter + ln * iter_number; 4202 #if OMPT_SUPPORT && OMPT_OPTIONAL 4203 deps[i].variable.value = iter; 4204 deps[i].dependence_type = ompt_dependence_type_sink; 4205 #endif 4206 } 4207 shft = iter_number % 32; // use 32-bit granularity 4208 iter_number >>= 5; // divided by 32 4209 flag = 1 << shft; 4210 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4211 KMP_YIELD(TRUE); 4212 } 4213 KMP_MB(); 4214 #if OMPT_SUPPORT && OMPT_OPTIONAL 4215 if (ompt_enabled.ompt_callback_dependences) { 4216 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4217 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4218 } 4219 #endif 4220 KA_TRACE(20, 4221 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4222 gtid, (iter_number << 5) + shft)); 4223 } 4224 4225 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4226 __kmp_assert_valid_gtid(gtid); 4227 kmp_int64 shft; 4228 size_t num_dims, i; 4229 kmp_uint32 flag; 4230 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4231 kmp_info_t *th = __kmp_threads[gtid]; 4232 kmp_team_t *team = th->th.th_team; 4233 kmp_disp_t *pr_buf; 4234 kmp_int64 lo, st; 4235 4236 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4237 if (team->t.t_serialized) { 4238 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4239 return; // no dependencies if team is serialized 4240 } 4241 4242 // calculate sequential iteration number (same as in "wait" but no 4243 // out-of-bounds checks) 4244 pr_buf = th->th.th_dispatch; 4245 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4246 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4247 lo = pr_buf->th_doacross_info[2]; 4248 st = pr_buf->th_doacross_info[4]; 4249 #if OMPT_SUPPORT && OMPT_OPTIONAL 4250 ompt_dependence_t deps[num_dims]; 4251 #endif 4252 if (st == 1) { // most common case 4253 iter_number = vec[0] - lo; 4254 } else if (st > 0) { 4255 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4256 } else { // negative increment 4257 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4258 } 4259 #if OMPT_SUPPORT && OMPT_OPTIONAL 4260 deps[0].variable.value = iter_number; 4261 deps[0].dependence_type = ompt_dependence_type_source; 4262 #endif 4263 for (i = 1; i < num_dims; ++i) { 4264 kmp_int64 iter, ln; 4265 size_t j = i * 4; 4266 ln = pr_buf->th_doacross_info[j + 1]; 4267 lo = pr_buf->th_doacross_info[j + 2]; 4268 st = pr_buf->th_doacross_info[j + 4]; 4269 if (st == 1) { 4270 iter = vec[i] - lo; 4271 } else if (st > 0) { 4272 iter = (kmp_uint64)(vec[i] - lo) / st; 4273 } else { // st < 0 4274 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4275 } 4276 iter_number = iter + ln * iter_number; 4277 #if OMPT_SUPPORT && OMPT_OPTIONAL 4278 deps[i].variable.value = iter; 4279 deps[i].dependence_type = ompt_dependence_type_source; 4280 #endif 4281 } 4282 #if OMPT_SUPPORT && OMPT_OPTIONAL 4283 if (ompt_enabled.ompt_callback_dependences) { 4284 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4285 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4286 } 4287 #endif 4288 shft = iter_number % 32; // use 32-bit granularity 4289 iter_number >>= 5; // divided by 32 4290 flag = 1 << shft; 4291 KMP_MB(); 4292 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4293 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4294 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4295 (iter_number << 5) + shft)); 4296 } 4297 4298 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4299 __kmp_assert_valid_gtid(gtid); 4300 kmp_int32 num_done; 4301 kmp_info_t *th = __kmp_threads[gtid]; 4302 kmp_team_t *team = th->th.th_team; 4303 kmp_disp_t *pr_buf = th->th.th_dispatch; 4304 4305 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4306 if (team->t.t_serialized) { 4307 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4308 return; // nothing to do 4309 } 4310 num_done = 4311 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4312 if (num_done == th->th.th_team_nproc) { 4313 // we are the last thread, need to free shared resources 4314 int idx = pr_buf->th_doacross_buf_idx - 1; 4315 dispatch_shared_info_t *sh_buf = 4316 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4317 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4318 (kmp_int64)&sh_buf->doacross_num_done); 4319 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4320 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4321 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4322 sh_buf->doacross_flags = NULL; 4323 sh_buf->doacross_num_done = 0; 4324 sh_buf->doacross_buf_idx += 4325 __kmp_dispatch_num_buffers; // free buffer for future re-use 4326 } 4327 // free private resources (need to keep buffer index forever) 4328 pr_buf->th_doacross_flags = NULL; 4329 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4330 pr_buf->th_doacross_info = NULL; 4331 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4332 } 4333 4334 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4335 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4336 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4337 } 4338 4339 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4340 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4341 } 4342 4343 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4344 omp_allocator_handle_t free_allocator) { 4345 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4346 free_allocator); 4347 } 4348 4349 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4350 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4351 } 4352 4353 int __kmpc_get_target_offload(void) { 4354 if (!__kmp_init_serial) { 4355 __kmp_serial_initialize(); 4356 } 4357 return __kmp_target_offload; 4358 } 4359 4360 int __kmpc_pause_resource(kmp_pause_status_t level) { 4361 if (!__kmp_init_serial) { 4362 return 1; // Can't pause if runtime is not initialized 4363 } 4364 return __kmp_pause_resource(level); 4365 } 4366 4367 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4368 if (!__kmp_init_serial) 4369 __kmp_serial_initialize(); 4370 4371 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4372 4373 #if OMPT_SUPPORT 4374 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4375 ompt_callbacks.ompt_callback(ompt_callback_error)( 4376 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4377 OMPT_GET_RETURN_ADDRESS(0)); 4378 } 4379 #endif // OMPT_SUPPORT 4380 4381 char *src_loc; 4382 if (loc && loc->psource) { 4383 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4384 src_loc = 4385 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4386 __kmp_str_loc_free(&str_loc); 4387 } else { 4388 src_loc = __kmp_str_format("unknown"); 4389 } 4390 4391 if (severity == severity_warning) 4392 KMP_WARNING(UserDirectedWarning, src_loc, message); 4393 else 4394 KMP_FATAL(UserDirectedError, src_loc, message); 4395 4396 __kmp_str_free(&src_loc); 4397 } 4398