1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 kmp_team_t *parent_team = master_th->th.th_team; 292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 293 if (lwt) 294 ompt_frame = &(lwt->ompt_task_info.frame); 295 else { 296 int tid = __kmp_tid_from_gtid(gtid); 297 ompt_frame = &( 298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 299 } 300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 301 } 302 OMPT_STORE_RETURN_ADDRESS(gtid); 303 #endif 304 305 #if INCLUDE_SSC_MARKS 306 SSC_MARK_FORKING(); 307 #endif 308 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 311 kmp_va_addr_of(ap)); 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_JOINING(); 314 #endif 315 __kmp_join_call(loc, gtid 316 #if OMPT_SUPPORT 317 , 318 fork_context_intel 319 #endif 320 ); 321 322 va_end(ap); 323 } 324 325 #if KMP_STATS_ENABLED 326 if (previous_state == stats_state_e::SERIAL_REGION) { 327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 328 KMP_SET_THREAD_STATE(previous_state); 329 } else { 330 KMP_POP_PARTITIONED_TIMER(); 331 } 332 #endif // KMP_STATS_ENABLED 333 } 334 335 /*! 336 @ingroup PARALLEL 337 @param loc source location information 338 @param global_tid global thread number 339 @param num_teams number of teams requested for the teams construct 340 @param num_threads number of threads per team requested for the teams construct 341 342 Set the number of teams to be used by the teams construct. 343 This call is only required if the teams construct has a `num_teams` clause 344 or a `thread_limit` clause (or both). 345 */ 346 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 347 kmp_int32 num_teams, kmp_int32 num_threads) { 348 KA_TRACE(20, 349 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 350 global_tid, num_teams, num_threads)); 351 __kmp_assert_valid_gtid(global_tid); 352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 353 } 354 355 /*! 356 @ingroup PARALLEL 357 @param loc source location information 358 @param global_tid global thread number 359 @param num_teams_lo lower bound on number of teams requested for the teams 360 construct 361 @param num_teams_up upper bound on number of teams requested for the teams 362 construct 363 @param num_threads number of threads per team requested for the teams construct 364 365 Set the number of teams to be used by the teams construct. The number of initial 366 teams cretaed will be greater than or equal to the lower bound and less than or 367 equal to the upper bound. 368 This call is only required if the teams construct has a `num_teams` clause 369 or a `thread_limit` clause (or both). 370 */ 371 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 373 kmp_int32 num_threads) { 374 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 375 " num_teams_ub=%d num_threads=%d\n", 376 global_tid, num_teams_lb, num_teams_ub, num_threads)); 377 __kmp_assert_valid_gtid(global_tid); 378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 379 num_threads); 380 } 381 382 /*! 383 @ingroup PARALLEL 384 @param loc source location information 385 @param argc total number of arguments in the ellipsis 386 @param microtask pointer to callback routine consisting of outlined teams 387 construct 388 @param ... pointers to shared variables that aren't global 389 390 Do the actual fork and call the microtask in the relevant number of threads. 391 */ 392 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 393 ...) { 394 int gtid = __kmp_entry_gtid(); 395 kmp_info_t *this_thr = __kmp_threads[gtid]; 396 va_list ap; 397 va_start(ap, microtask); 398 399 #if KMP_STATS_ENABLED 400 KMP_COUNT_BLOCK(OMP_TEAMS); 401 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 402 if (previous_state == stats_state_e::SERIAL_REGION) { 403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 404 } else { 405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 406 } 407 #endif 408 409 // remember teams entry point and nesting level 410 this_thr->th.th_teams_microtask = microtask; 411 this_thr->th.th_teams_level = 412 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 413 414 #if OMPT_SUPPORT 415 kmp_team_t *parent_team = this_thr->th.th_team; 416 int tid = __kmp_tid_from_gtid(gtid); 417 if (ompt_enabled.enabled) { 418 parent_team->t.t_implicit_task_taskdata[tid] 419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 420 } 421 OMPT_STORE_RETURN_ADDRESS(gtid); 422 #endif 423 424 // check if __kmpc_push_num_teams called, set default number of teams 425 // otherwise 426 if (this_thr->th.th_teams_size.nteams == 0) { 427 __kmp_push_num_teams(loc, gtid, 0, 0); 428 } 429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 432 433 __kmp_fork_call( 434 loc, gtid, fork_context_intel, argc, 435 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 437 __kmp_join_call(loc, gtid 438 #if OMPT_SUPPORT 439 , 440 fork_context_intel 441 #endif 442 ); 443 444 // Pop current CG root off list 445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 447 this_thr->th.th_cg_roots = tmp->up; 448 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 449 " to node %p. cg_nthreads was %d\n", 450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 451 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 452 int i = tmp->cg_nthreads--; 453 if (i == 1) { // check is we are the last thread in CG (not always the case) 454 __kmp_free(tmp); 455 } 456 // Restore current task's thread_limit from CG root 457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 458 this_thr->th.th_current_task->td_icvs.thread_limit = 459 this_thr->th.th_cg_roots->cg_thread_limit; 460 461 this_thr->th.th_teams_microtask = NULL; 462 this_thr->th.th_teams_level = 0; 463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 464 va_end(ap); 465 #if KMP_STATS_ENABLED 466 if (previous_state == stats_state_e::SERIAL_REGION) { 467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 468 KMP_SET_THREAD_STATE(previous_state); 469 } else { 470 KMP_POP_PARTITIONED_TIMER(); 471 } 472 #endif // KMP_STATS_ENABLED 473 } 474 475 // I don't think this function should ever have been exported. 476 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 477 // openmp code ever called it, but it's been exported from the RTL for so 478 // long that I'm afraid to remove the definition. 479 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 480 481 /*! 482 @ingroup PARALLEL 483 @param loc source location information 484 @param global_tid global thread number 485 486 Enter a serialized parallel construct. This interface is used to handle a 487 conditional parallel region, like this, 488 @code 489 #pragma omp parallel if (condition) 490 @endcode 491 when the condition is false. 492 */ 493 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 494 // The implementation is now in kmp_runtime.cpp so that it can share static 495 // functions with kmp_fork_call since the tasks to be done are similar in 496 // each case. 497 __kmp_assert_valid_gtid(global_tid); 498 #if OMPT_SUPPORT 499 OMPT_STORE_RETURN_ADDRESS(global_tid); 500 #endif 501 __kmp_serialized_parallel(loc, global_tid); 502 } 503 504 /*! 505 @ingroup PARALLEL 506 @param loc source location information 507 @param global_tid global thread number 508 509 Leave a serialized parallel construct. 510 */ 511 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 512 kmp_internal_control_t *top; 513 kmp_info_t *this_thr; 514 kmp_team_t *serial_team; 515 516 KC_TRACE(10, 517 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 518 519 /* skip all this code for autopar serialized loops since it results in 520 unacceptable overhead */ 521 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 522 return; 523 524 // Not autopar code 525 __kmp_assert_valid_gtid(global_tid); 526 if (!TCR_4(__kmp_init_parallel)) 527 __kmp_parallel_initialize(); 528 529 __kmp_resume_if_soft_paused(); 530 531 this_thr = __kmp_threads[global_tid]; 532 serial_team = this_thr->th.th_serial_team; 533 534 kmp_task_team_t *task_team = this_thr->th.th_task_team; 535 // we need to wait for the proxy tasks before finishing the thread 536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 538 539 KMP_MB(); 540 KMP_DEBUG_ASSERT(serial_team); 541 KMP_ASSERT(serial_team->t.t_serialized); 542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 546 547 #if OMPT_SUPPORT 548 if (ompt_enabled.enabled && 549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 551 if (ompt_enabled.ompt_callback_implicit_task) { 552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 555 } 556 557 // reset clear the task id only after unlinking the task 558 ompt_data_t *parent_task_data; 559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 560 561 if (ompt_enabled.ompt_callback_parallel_end) { 562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 564 ompt_parallel_invoker_program | ompt_parallel_team, 565 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 566 } 567 __ompt_lw_taskteam_unlink(this_thr); 568 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 569 } 570 #endif 571 572 /* If necessary, pop the internal control stack values and replace the team 573 * values */ 574 top = serial_team->t.t_control_stack_top; 575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 577 serial_team->t.t_control_stack_top = top->next; 578 __kmp_free(top); 579 } 580 581 /* pop dispatch buffers stack */ 582 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 583 { 584 dispatch_private_info_t *disp_buffer = 585 serial_team->t.t_dispatch->th_disp_buffer; 586 serial_team->t.t_dispatch->th_disp_buffer = 587 serial_team->t.t_dispatch->th_disp_buffer->next; 588 __kmp_free(disp_buffer); 589 } 590 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 591 592 --serial_team->t.t_serialized; 593 if (serial_team->t.t_serialized == 0) { 594 595 /* return to the parallel section */ 596 597 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 598 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 599 __kmp_clear_x87_fpu_status_word(); 600 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 601 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 602 } 603 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 604 605 __kmp_pop_current_task_from_thread(this_thr); 606 #if OMPD_SUPPORT 607 if (ompd_state & OMPD_ENABLE_BP) 608 ompd_bp_parallel_end(); 609 #endif 610 611 this_thr->th.th_team = serial_team->t.t_parent; 612 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 613 614 /* restore values cached in the thread */ 615 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 616 this_thr->th.th_team_master = 617 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 618 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 619 620 /* TODO the below shouldn't need to be adjusted for serialized teams */ 621 this_thr->th.th_dispatch = 622 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 623 624 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 625 this_thr->th.th_current_task->td_flags.executing = 1; 626 627 if (__kmp_tasking_mode != tskm_immediate_exec) { 628 // Copy the task team from the new child / old parent team to the thread. 629 this_thr->th.th_task_team = 630 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 631 KA_TRACE(20, 632 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 633 "team %p\n", 634 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 635 } 636 } else { 637 if (__kmp_tasking_mode != tskm_immediate_exec) { 638 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 639 "depth of serial team %p to %d\n", 640 global_tid, serial_team, serial_team->t.t_serialized)); 641 } 642 } 643 644 serial_team->t.t_level--; 645 if (__kmp_env_consistency_check) 646 __kmp_pop_parallel(global_tid, NULL); 647 #if OMPT_SUPPORT 648 if (ompt_enabled.enabled) 649 this_thr->th.ompt_thread_info.state = 650 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 651 : ompt_state_work_parallel); 652 #endif 653 } 654 655 /*! 656 @ingroup SYNCHRONIZATION 657 @param loc source location information. 658 659 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 660 depending on the memory ordering convention obeyed by the compiler 661 even that may not be necessary). 662 */ 663 void __kmpc_flush(ident_t *loc) { 664 KC_TRACE(10, ("__kmpc_flush: called\n")); 665 666 /* need explicit __mf() here since use volatile instead in library */ 667 KMP_MB(); /* Flush all pending memory write invalidates. */ 668 669 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 670 #if KMP_MIC 671 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 672 // We shouldn't need it, though, since the ABI rules require that 673 // * If the compiler generates NGO stores it also generates the fence 674 // * If users hand-code NGO stores they should insert the fence 675 // therefore no incomplete unordered stores should be visible. 676 #else 677 // C74404 678 // This is to address non-temporal store instructions (sfence needed). 679 // The clflush instruction is addressed either (mfence needed). 680 // Probably the non-temporal load monvtdqa instruction should also be 681 // addressed. 682 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 683 if (!__kmp_cpuinfo.initialized) { 684 __kmp_query_cpuid(&__kmp_cpuinfo); 685 } 686 if (!__kmp_cpuinfo.flags.sse2) { 687 // CPU cannot execute SSE2 instructions. 688 } else { 689 #if KMP_COMPILER_ICC 690 _mm_mfence(); 691 #elif KMP_COMPILER_MSVC 692 MemoryBarrier(); 693 #else 694 __sync_synchronize(); 695 #endif // KMP_COMPILER_ICC 696 } 697 #endif // KMP_MIC 698 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 699 KMP_ARCH_RISCV64) 700 // Nothing to see here move along 701 #elif KMP_ARCH_PPC64 702 // Nothing needed here (we have a real MB above). 703 #else 704 #error Unknown or unsupported architecture 705 #endif 706 707 #if OMPT_SUPPORT && OMPT_OPTIONAL 708 if (ompt_enabled.ompt_callback_flush) { 709 ompt_callbacks.ompt_callback(ompt_callback_flush)( 710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 711 } 712 #endif 713 } 714 715 /* -------------------------------------------------------------------------- */ 716 /*! 717 @ingroup SYNCHRONIZATION 718 @param loc source location information 719 @param global_tid thread id. 720 721 Execute a barrier. 722 */ 723 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 724 KMP_COUNT_BLOCK(OMP_BARRIER); 725 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 726 __kmp_assert_valid_gtid(global_tid); 727 728 if (!TCR_4(__kmp_init_parallel)) 729 __kmp_parallel_initialize(); 730 731 __kmp_resume_if_soft_paused(); 732 733 if (__kmp_env_consistency_check) { 734 if (loc == 0) { 735 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 736 } 737 __kmp_check_barrier(global_tid, ct_barrier, loc); 738 } 739 740 #if OMPT_SUPPORT 741 ompt_frame_t *ompt_frame; 742 if (ompt_enabled.enabled) { 743 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 744 if (ompt_frame->enter_frame.ptr == NULL) 745 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 746 } 747 OMPT_STORE_RETURN_ADDRESS(global_tid); 748 #endif 749 __kmp_threads[global_tid]->th.th_ident = loc; 750 // TODO: explicit barrier_wait_id: 751 // this function is called when 'barrier' directive is present or 752 // implicit barrier at the end of a worksharing construct. 753 // 1) better to add a per-thread barrier counter to a thread data structure 754 // 2) set to 0 when a new team is created 755 // 4) no sync is required 756 757 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 758 #if OMPT_SUPPORT && OMPT_OPTIONAL 759 if (ompt_enabled.enabled) { 760 ompt_frame->enter_frame = ompt_data_none; 761 } 762 #endif 763 } 764 765 /* The BARRIER for a MASTER section is always explicit */ 766 /*! 767 @ingroup WORK_SHARING 768 @param loc source location information. 769 @param global_tid global thread number . 770 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 771 */ 772 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 773 int status = 0; 774 775 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 776 __kmp_assert_valid_gtid(global_tid); 777 778 if (!TCR_4(__kmp_init_parallel)) 779 __kmp_parallel_initialize(); 780 781 __kmp_resume_if_soft_paused(); 782 783 if (KMP_MASTER_GTID(global_tid)) { 784 KMP_COUNT_BLOCK(OMP_MASTER); 785 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 786 status = 1; 787 } 788 789 #if OMPT_SUPPORT && OMPT_OPTIONAL 790 if (status) { 791 if (ompt_enabled.ompt_callback_masked) { 792 kmp_info_t *this_thr = __kmp_threads[global_tid]; 793 kmp_team_t *team = this_thr->th.th_team; 794 795 int tid = __kmp_tid_from_gtid(global_tid); 796 ompt_callbacks.ompt_callback(ompt_callback_masked)( 797 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 798 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 799 OMPT_GET_RETURN_ADDRESS(0)); 800 } 801 } 802 #endif 803 804 if (__kmp_env_consistency_check) { 805 #if KMP_USE_DYNAMIC_LOCK 806 if (status) 807 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 808 else 809 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 810 #else 811 if (status) 812 __kmp_push_sync(global_tid, ct_master, loc, NULL); 813 else 814 __kmp_check_sync(global_tid, ct_master, loc, NULL); 815 #endif 816 } 817 818 return status; 819 } 820 821 /*! 822 @ingroup WORK_SHARING 823 @param loc source location information. 824 @param global_tid global thread number . 825 826 Mark the end of a <tt>master</tt> region. This should only be called by the 827 thread that executes the <tt>master</tt> region. 828 */ 829 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 830 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 831 __kmp_assert_valid_gtid(global_tid); 832 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 833 KMP_POP_PARTITIONED_TIMER(); 834 835 #if OMPT_SUPPORT && OMPT_OPTIONAL 836 kmp_info_t *this_thr = __kmp_threads[global_tid]; 837 kmp_team_t *team = this_thr->th.th_team; 838 if (ompt_enabled.ompt_callback_masked) { 839 int tid = __kmp_tid_from_gtid(global_tid); 840 ompt_callbacks.ompt_callback(ompt_callback_masked)( 841 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 842 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 843 OMPT_GET_RETURN_ADDRESS(0)); 844 } 845 #endif 846 847 if (__kmp_env_consistency_check) { 848 if (KMP_MASTER_GTID(global_tid)) 849 __kmp_pop_sync(global_tid, ct_master, loc); 850 } 851 } 852 853 /*! 854 @ingroup WORK_SHARING 855 @param loc source location information. 856 @param global_tid global thread number. 857 @param filter result of evaluating filter clause on thread global_tid, or zero 858 if no filter clause present 859 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 860 */ 861 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 862 int status = 0; 863 int tid; 864 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 865 __kmp_assert_valid_gtid(global_tid); 866 867 if (!TCR_4(__kmp_init_parallel)) 868 __kmp_parallel_initialize(); 869 870 __kmp_resume_if_soft_paused(); 871 872 tid = __kmp_tid_from_gtid(global_tid); 873 if (tid == filter) { 874 KMP_COUNT_BLOCK(OMP_MASKED); 875 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 876 status = 1; 877 } 878 879 #if OMPT_SUPPORT && OMPT_OPTIONAL 880 if (status) { 881 if (ompt_enabled.ompt_callback_masked) { 882 kmp_info_t *this_thr = __kmp_threads[global_tid]; 883 kmp_team_t *team = this_thr->th.th_team; 884 ompt_callbacks.ompt_callback(ompt_callback_masked)( 885 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 886 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 887 OMPT_GET_RETURN_ADDRESS(0)); 888 } 889 } 890 #endif 891 892 if (__kmp_env_consistency_check) { 893 #if KMP_USE_DYNAMIC_LOCK 894 if (status) 895 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 896 else 897 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 898 #else 899 if (status) 900 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 901 else 902 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 903 #endif 904 } 905 906 return status; 907 } 908 909 /*! 910 @ingroup WORK_SHARING 911 @param loc source location information. 912 @param global_tid global thread number . 913 914 Mark the end of a <tt>masked</tt> region. This should only be called by the 915 thread that executes the <tt>masked</tt> region. 916 */ 917 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 918 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 919 __kmp_assert_valid_gtid(global_tid); 920 KMP_POP_PARTITIONED_TIMER(); 921 922 #if OMPT_SUPPORT && OMPT_OPTIONAL 923 kmp_info_t *this_thr = __kmp_threads[global_tid]; 924 kmp_team_t *team = this_thr->th.th_team; 925 if (ompt_enabled.ompt_callback_masked) { 926 int tid = __kmp_tid_from_gtid(global_tid); 927 ompt_callbacks.ompt_callback(ompt_callback_masked)( 928 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 929 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 930 OMPT_GET_RETURN_ADDRESS(0)); 931 } 932 #endif 933 934 if (__kmp_env_consistency_check) { 935 __kmp_pop_sync(global_tid, ct_masked, loc); 936 } 937 } 938 939 /*! 940 @ingroup WORK_SHARING 941 @param loc source location information. 942 @param gtid global thread number. 943 944 Start execution of an <tt>ordered</tt> construct. 945 */ 946 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 947 int cid = 0; 948 kmp_info_t *th; 949 KMP_DEBUG_ASSERT(__kmp_init_serial); 950 951 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 952 __kmp_assert_valid_gtid(gtid); 953 954 if (!TCR_4(__kmp_init_parallel)) 955 __kmp_parallel_initialize(); 956 957 __kmp_resume_if_soft_paused(); 958 959 #if USE_ITT_BUILD 960 __kmp_itt_ordered_prep(gtid); 961 // TODO: ordered_wait_id 962 #endif /* USE_ITT_BUILD */ 963 964 th = __kmp_threads[gtid]; 965 966 #if OMPT_SUPPORT && OMPT_OPTIONAL 967 kmp_team_t *team; 968 ompt_wait_id_t lck; 969 void *codeptr_ra; 970 OMPT_STORE_RETURN_ADDRESS(gtid); 971 if (ompt_enabled.enabled) { 972 team = __kmp_team_from_gtid(gtid); 973 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 974 /* OMPT state update */ 975 th->th.ompt_thread_info.wait_id = lck; 976 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 977 978 /* OMPT event callback */ 979 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 980 if (ompt_enabled.ompt_callback_mutex_acquire) { 981 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 982 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 983 codeptr_ra); 984 } 985 } 986 #endif 987 988 if (th->th.th_dispatch->th_deo_fcn != 0) 989 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 990 else 991 __kmp_parallel_deo(>id, &cid, loc); 992 993 #if OMPT_SUPPORT && OMPT_OPTIONAL 994 if (ompt_enabled.enabled) { 995 /* OMPT state update */ 996 th->th.ompt_thread_info.state = ompt_state_work_parallel; 997 th->th.ompt_thread_info.wait_id = 0; 998 999 /* OMPT event callback */ 1000 if (ompt_enabled.ompt_callback_mutex_acquired) { 1001 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1002 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1003 } 1004 } 1005 #endif 1006 1007 #if USE_ITT_BUILD 1008 __kmp_itt_ordered_start(gtid); 1009 #endif /* USE_ITT_BUILD */ 1010 } 1011 1012 /*! 1013 @ingroup WORK_SHARING 1014 @param loc source location information. 1015 @param gtid global thread number. 1016 1017 End execution of an <tt>ordered</tt> construct. 1018 */ 1019 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1020 int cid = 0; 1021 kmp_info_t *th; 1022 1023 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1024 __kmp_assert_valid_gtid(gtid); 1025 1026 #if USE_ITT_BUILD 1027 __kmp_itt_ordered_end(gtid); 1028 // TODO: ordered_wait_id 1029 #endif /* USE_ITT_BUILD */ 1030 1031 th = __kmp_threads[gtid]; 1032 1033 if (th->th.th_dispatch->th_dxo_fcn != 0) 1034 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1035 else 1036 __kmp_parallel_dxo(>id, &cid, loc); 1037 1038 #if OMPT_SUPPORT && OMPT_OPTIONAL 1039 OMPT_STORE_RETURN_ADDRESS(gtid); 1040 if (ompt_enabled.ompt_callback_mutex_released) { 1041 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1042 ompt_mutex_ordered, 1043 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1044 ->t.t_ordered.dt.t_value, 1045 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1046 } 1047 #endif 1048 } 1049 1050 #if KMP_USE_DYNAMIC_LOCK 1051 1052 static __forceinline void 1053 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1054 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1055 // Pointer to the allocated indirect lock is written to crit, while indexing 1056 // is ignored. 1057 void *idx; 1058 kmp_indirect_lock_t **lck; 1059 lck = (kmp_indirect_lock_t **)crit; 1060 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1061 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1062 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1063 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1064 KA_TRACE(20, 1065 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1066 #if USE_ITT_BUILD 1067 __kmp_itt_critical_creating(ilk->lock, loc); 1068 #endif 1069 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1070 if (status == 0) { 1071 #if USE_ITT_BUILD 1072 __kmp_itt_critical_destroyed(ilk->lock); 1073 #endif 1074 // We don't really need to destroy the unclaimed lock here since it will be 1075 // cleaned up at program exit. 1076 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1077 } 1078 KMP_DEBUG_ASSERT(*lck != NULL); 1079 } 1080 1081 // Fast-path acquire tas lock 1082 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1083 { \ 1084 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1085 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1086 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1087 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1088 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1089 kmp_uint32 spins; \ 1090 KMP_FSYNC_PREPARE(l); \ 1091 KMP_INIT_YIELD(spins); \ 1092 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1093 do { \ 1094 if (TCR_4(__kmp_nth) > \ 1095 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1096 KMP_YIELD(TRUE); \ 1097 } else { \ 1098 KMP_YIELD_SPIN(spins); \ 1099 } \ 1100 __kmp_spin_backoff(&backoff); \ 1101 } while ( \ 1102 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1103 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1104 } \ 1105 KMP_FSYNC_ACQUIRED(l); \ 1106 } 1107 1108 // Fast-path test tas lock 1109 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1110 { \ 1111 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1112 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1113 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1114 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1115 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1116 } 1117 1118 // Fast-path release tas lock 1119 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1120 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1121 1122 #if KMP_USE_FUTEX 1123 1124 #include <sys/syscall.h> 1125 #include <unistd.h> 1126 #ifndef FUTEX_WAIT 1127 #define FUTEX_WAIT 0 1128 #endif 1129 #ifndef FUTEX_WAKE 1130 #define FUTEX_WAKE 1 1131 #endif 1132 1133 // Fast-path acquire futex lock 1134 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1135 { \ 1136 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1137 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1138 KMP_MB(); \ 1139 KMP_FSYNC_PREPARE(ftx); \ 1140 kmp_int32 poll_val; \ 1141 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1142 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1143 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1144 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1145 if (!cond) { \ 1146 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1147 poll_val | \ 1148 KMP_LOCK_BUSY(1, futex))) { \ 1149 continue; \ 1150 } \ 1151 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1152 } \ 1153 kmp_int32 rc; \ 1154 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1155 NULL, NULL, 0)) != 0) { \ 1156 continue; \ 1157 } \ 1158 gtid_code |= 1; \ 1159 } \ 1160 KMP_FSYNC_ACQUIRED(ftx); \ 1161 } 1162 1163 // Fast-path test futex lock 1164 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1165 { \ 1166 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1167 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1168 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1169 KMP_FSYNC_ACQUIRED(ftx); \ 1170 rc = TRUE; \ 1171 } else { \ 1172 rc = FALSE; \ 1173 } \ 1174 } 1175 1176 // Fast-path release futex lock 1177 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1178 { \ 1179 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1180 KMP_MB(); \ 1181 KMP_FSYNC_RELEASING(ftx); \ 1182 kmp_int32 poll_val = \ 1183 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1184 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1185 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1186 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1187 } \ 1188 KMP_MB(); \ 1189 KMP_YIELD_OVERSUB(); \ 1190 } 1191 1192 #endif // KMP_USE_FUTEX 1193 1194 #else // KMP_USE_DYNAMIC_LOCK 1195 1196 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1197 ident_t const *loc, 1198 kmp_int32 gtid) { 1199 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1200 1201 // Because of the double-check, the following load doesn't need to be volatile 1202 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1203 1204 if (lck == NULL) { 1205 void *idx; 1206 1207 // Allocate & initialize the lock. 1208 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1209 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1210 __kmp_init_user_lock_with_checks(lck); 1211 __kmp_set_user_lock_location(lck, loc); 1212 #if USE_ITT_BUILD 1213 __kmp_itt_critical_creating(lck); 1214 // __kmp_itt_critical_creating() should be called *before* the first usage 1215 // of underlying lock. It is the only place where we can guarantee it. There 1216 // are chances the lock will destroyed with no usage, but it is not a 1217 // problem, because this is not real event seen by user but rather setting 1218 // name for object (lock). See more details in kmp_itt.h. 1219 #endif /* USE_ITT_BUILD */ 1220 1221 // Use a cmpxchg instruction to slam the start of the critical section with 1222 // the lock pointer. If another thread beat us to it, deallocate the lock, 1223 // and use the lock that the other thread allocated. 1224 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1225 1226 if (status == 0) { 1227 // Deallocate the lock and reload the value. 1228 #if USE_ITT_BUILD 1229 __kmp_itt_critical_destroyed(lck); 1230 // Let ITT know the lock is destroyed and the same memory location may be reused 1231 // for another purpose. 1232 #endif /* USE_ITT_BUILD */ 1233 __kmp_destroy_user_lock_with_checks(lck); 1234 __kmp_user_lock_free(&idx, gtid, lck); 1235 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1236 KMP_DEBUG_ASSERT(lck != NULL); 1237 } 1238 } 1239 return lck; 1240 } 1241 1242 #endif // KMP_USE_DYNAMIC_LOCK 1243 1244 /*! 1245 @ingroup WORK_SHARING 1246 @param loc source location information. 1247 @param global_tid global thread number. 1248 @param crit identity of the critical section. This could be a pointer to a lock 1249 associated with the critical section, or some other suitably unique value. 1250 1251 Enter code protected by a `critical` construct. 1252 This function blocks until the executing thread can enter the critical section. 1253 */ 1254 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1255 kmp_critical_name *crit) { 1256 #if KMP_USE_DYNAMIC_LOCK 1257 #if OMPT_SUPPORT && OMPT_OPTIONAL 1258 OMPT_STORE_RETURN_ADDRESS(global_tid); 1259 #endif // OMPT_SUPPORT 1260 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1261 #else 1262 KMP_COUNT_BLOCK(OMP_CRITICAL); 1263 #if OMPT_SUPPORT && OMPT_OPTIONAL 1264 ompt_state_t prev_state = ompt_state_undefined; 1265 ompt_thread_info_t ti; 1266 #endif 1267 kmp_user_lock_p lck; 1268 1269 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1270 __kmp_assert_valid_gtid(global_tid); 1271 1272 // TODO: add THR_OVHD_STATE 1273 1274 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1275 KMP_CHECK_USER_LOCK_INIT(); 1276 1277 if ((__kmp_user_lock_kind == lk_tas) && 1278 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1279 lck = (kmp_user_lock_p)crit; 1280 } 1281 #if KMP_USE_FUTEX 1282 else if ((__kmp_user_lock_kind == lk_futex) && 1283 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1284 lck = (kmp_user_lock_p)crit; 1285 } 1286 #endif 1287 else { // ticket, queuing or drdpa 1288 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1289 } 1290 1291 if (__kmp_env_consistency_check) 1292 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1293 1294 // since the critical directive binds to all threads, not just the current 1295 // team we have to check this even if we are in a serialized team. 1296 // also, even if we are the uber thread, we still have to conduct the lock, 1297 // as we have to contend with sibling threads. 1298 1299 #if USE_ITT_BUILD 1300 __kmp_itt_critical_acquiring(lck); 1301 #endif /* USE_ITT_BUILD */ 1302 #if OMPT_SUPPORT && OMPT_OPTIONAL 1303 OMPT_STORE_RETURN_ADDRESS(gtid); 1304 void *codeptr_ra = NULL; 1305 if (ompt_enabled.enabled) { 1306 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1307 /* OMPT state update */ 1308 prev_state = ti.state; 1309 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1310 ti.state = ompt_state_wait_critical; 1311 1312 /* OMPT event callback */ 1313 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1314 if (ompt_enabled.ompt_callback_mutex_acquire) { 1315 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1316 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1317 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1318 } 1319 } 1320 #endif 1321 // Value of 'crit' should be good for using as a critical_id of the critical 1322 // section directive. 1323 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1324 1325 #if USE_ITT_BUILD 1326 __kmp_itt_critical_acquired(lck); 1327 #endif /* USE_ITT_BUILD */ 1328 #if OMPT_SUPPORT && OMPT_OPTIONAL 1329 if (ompt_enabled.enabled) { 1330 /* OMPT state update */ 1331 ti.state = prev_state; 1332 ti.wait_id = 0; 1333 1334 /* OMPT event callback */ 1335 if (ompt_enabled.ompt_callback_mutex_acquired) { 1336 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1337 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1338 } 1339 } 1340 #endif 1341 KMP_POP_PARTITIONED_TIMER(); 1342 1343 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1344 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1345 #endif // KMP_USE_DYNAMIC_LOCK 1346 } 1347 1348 #if KMP_USE_DYNAMIC_LOCK 1349 1350 // Converts the given hint to an internal lock implementation 1351 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1352 #if KMP_USE_TSX 1353 #define KMP_TSX_LOCK(seq) lockseq_##seq 1354 #else 1355 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1356 #endif 1357 1358 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1359 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1360 #else 1361 #define KMP_CPUINFO_RTM 0 1362 #endif 1363 1364 // Hints that do not require further logic 1365 if (hint & kmp_lock_hint_hle) 1366 return KMP_TSX_LOCK(hle); 1367 if (hint & kmp_lock_hint_rtm) 1368 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1369 if (hint & kmp_lock_hint_adaptive) 1370 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1371 1372 // Rule out conflicting hints first by returning the default lock 1373 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1374 return __kmp_user_lock_seq; 1375 if ((hint & omp_lock_hint_speculative) && 1376 (hint & omp_lock_hint_nonspeculative)) 1377 return __kmp_user_lock_seq; 1378 1379 // Do not even consider speculation when it appears to be contended 1380 if (hint & omp_lock_hint_contended) 1381 return lockseq_queuing; 1382 1383 // Uncontended lock without speculation 1384 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1385 return lockseq_tas; 1386 1387 // Use RTM lock for speculation 1388 if (hint & omp_lock_hint_speculative) 1389 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1390 1391 return __kmp_user_lock_seq; 1392 } 1393 1394 #if OMPT_SUPPORT && OMPT_OPTIONAL 1395 #if KMP_USE_DYNAMIC_LOCK 1396 static kmp_mutex_impl_t 1397 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1398 if (user_lock) { 1399 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1400 case 0: 1401 break; 1402 #if KMP_USE_FUTEX 1403 case locktag_futex: 1404 return kmp_mutex_impl_queuing; 1405 #endif 1406 case locktag_tas: 1407 return kmp_mutex_impl_spin; 1408 #if KMP_USE_TSX 1409 case locktag_hle: 1410 case locktag_rtm_spin: 1411 return kmp_mutex_impl_speculative; 1412 #endif 1413 default: 1414 return kmp_mutex_impl_none; 1415 } 1416 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1417 } 1418 KMP_ASSERT(ilock); 1419 switch (ilock->type) { 1420 #if KMP_USE_TSX 1421 case locktag_adaptive: 1422 case locktag_rtm_queuing: 1423 return kmp_mutex_impl_speculative; 1424 #endif 1425 case locktag_nested_tas: 1426 return kmp_mutex_impl_spin; 1427 #if KMP_USE_FUTEX 1428 case locktag_nested_futex: 1429 #endif 1430 case locktag_ticket: 1431 case locktag_queuing: 1432 case locktag_drdpa: 1433 case locktag_nested_ticket: 1434 case locktag_nested_queuing: 1435 case locktag_nested_drdpa: 1436 return kmp_mutex_impl_queuing; 1437 default: 1438 return kmp_mutex_impl_none; 1439 } 1440 } 1441 #else 1442 // For locks without dynamic binding 1443 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1444 switch (__kmp_user_lock_kind) { 1445 case lk_tas: 1446 return kmp_mutex_impl_spin; 1447 #if KMP_USE_FUTEX 1448 case lk_futex: 1449 #endif 1450 case lk_ticket: 1451 case lk_queuing: 1452 case lk_drdpa: 1453 return kmp_mutex_impl_queuing; 1454 #if KMP_USE_TSX 1455 case lk_hle: 1456 case lk_rtm_queuing: 1457 case lk_rtm_spin: 1458 case lk_adaptive: 1459 return kmp_mutex_impl_speculative; 1460 #endif 1461 default: 1462 return kmp_mutex_impl_none; 1463 } 1464 } 1465 #endif // KMP_USE_DYNAMIC_LOCK 1466 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1467 1468 /*! 1469 @ingroup WORK_SHARING 1470 @param loc source location information. 1471 @param global_tid global thread number. 1472 @param crit identity of the critical section. This could be a pointer to a lock 1473 associated with the critical section, or some other suitably unique value. 1474 @param hint the lock hint. 1475 1476 Enter code protected by a `critical` construct with a hint. The hint value is 1477 used to suggest a lock implementation. This function blocks until the executing 1478 thread can enter the critical section unless the hint suggests use of 1479 speculative execution and the hardware supports it. 1480 */ 1481 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1482 kmp_critical_name *crit, uint32_t hint) { 1483 KMP_COUNT_BLOCK(OMP_CRITICAL); 1484 kmp_user_lock_p lck; 1485 #if OMPT_SUPPORT && OMPT_OPTIONAL 1486 ompt_state_t prev_state = ompt_state_undefined; 1487 ompt_thread_info_t ti; 1488 // This is the case, if called from __kmpc_critical: 1489 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1490 if (!codeptr) 1491 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1492 #endif 1493 1494 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1495 __kmp_assert_valid_gtid(global_tid); 1496 1497 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1498 // Check if it is initialized. 1499 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1500 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1501 if (*lk == 0) { 1502 if (KMP_IS_D_LOCK(lockseq)) { 1503 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1504 KMP_GET_D_TAG(lockseq)); 1505 } else { 1506 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1507 } 1508 } 1509 // Branch for accessing the actual lock object and set operation. This 1510 // branching is inevitable since this lock initialization does not follow the 1511 // normal dispatch path (lock table is not used). 1512 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1513 lck = (kmp_user_lock_p)lk; 1514 if (__kmp_env_consistency_check) { 1515 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1516 __kmp_map_hint_to_lock(hint)); 1517 } 1518 #if USE_ITT_BUILD 1519 __kmp_itt_critical_acquiring(lck); 1520 #endif 1521 #if OMPT_SUPPORT && OMPT_OPTIONAL 1522 if (ompt_enabled.enabled) { 1523 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1524 /* OMPT state update */ 1525 prev_state = ti.state; 1526 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1527 ti.state = ompt_state_wait_critical; 1528 1529 /* OMPT event callback */ 1530 if (ompt_enabled.ompt_callback_mutex_acquire) { 1531 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1532 ompt_mutex_critical, (unsigned int)hint, 1533 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1534 codeptr); 1535 } 1536 } 1537 #endif 1538 #if KMP_USE_INLINED_TAS 1539 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1540 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1541 } else 1542 #elif KMP_USE_INLINED_FUTEX 1543 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1544 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1545 } else 1546 #endif 1547 { 1548 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1549 } 1550 } else { 1551 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1552 lck = ilk->lock; 1553 if (__kmp_env_consistency_check) { 1554 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1555 __kmp_map_hint_to_lock(hint)); 1556 } 1557 #if USE_ITT_BUILD 1558 __kmp_itt_critical_acquiring(lck); 1559 #endif 1560 #if OMPT_SUPPORT && OMPT_OPTIONAL 1561 if (ompt_enabled.enabled) { 1562 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1563 /* OMPT state update */ 1564 prev_state = ti.state; 1565 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1566 ti.state = ompt_state_wait_critical; 1567 1568 /* OMPT event callback */ 1569 if (ompt_enabled.ompt_callback_mutex_acquire) { 1570 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1571 ompt_mutex_critical, (unsigned int)hint, 1572 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1573 codeptr); 1574 } 1575 } 1576 #endif 1577 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1578 } 1579 KMP_POP_PARTITIONED_TIMER(); 1580 1581 #if USE_ITT_BUILD 1582 __kmp_itt_critical_acquired(lck); 1583 #endif /* USE_ITT_BUILD */ 1584 #if OMPT_SUPPORT && OMPT_OPTIONAL 1585 if (ompt_enabled.enabled) { 1586 /* OMPT state update */ 1587 ti.state = prev_state; 1588 ti.wait_id = 0; 1589 1590 /* OMPT event callback */ 1591 if (ompt_enabled.ompt_callback_mutex_acquired) { 1592 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1593 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1594 } 1595 } 1596 #endif 1597 1598 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1599 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1600 } // __kmpc_critical_with_hint 1601 1602 #endif // KMP_USE_DYNAMIC_LOCK 1603 1604 /*! 1605 @ingroup WORK_SHARING 1606 @param loc source location information. 1607 @param global_tid global thread number . 1608 @param crit identity of the critical section. This could be a pointer to a lock 1609 associated with the critical section, or some other suitably unique value. 1610 1611 Leave a critical section, releasing any lock that was held during its execution. 1612 */ 1613 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1614 kmp_critical_name *crit) { 1615 kmp_user_lock_p lck; 1616 1617 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1618 1619 #if KMP_USE_DYNAMIC_LOCK 1620 int locktag = KMP_EXTRACT_D_TAG(crit); 1621 if (locktag) { 1622 lck = (kmp_user_lock_p)crit; 1623 KMP_ASSERT(lck != NULL); 1624 if (__kmp_env_consistency_check) { 1625 __kmp_pop_sync(global_tid, ct_critical, loc); 1626 } 1627 #if USE_ITT_BUILD 1628 __kmp_itt_critical_releasing(lck); 1629 #endif 1630 #if KMP_USE_INLINED_TAS 1631 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1632 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1633 } else 1634 #elif KMP_USE_INLINED_FUTEX 1635 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1636 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1637 } else 1638 #endif 1639 { 1640 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1641 } 1642 } else { 1643 kmp_indirect_lock_t *ilk = 1644 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1645 KMP_ASSERT(ilk != NULL); 1646 lck = ilk->lock; 1647 if (__kmp_env_consistency_check) { 1648 __kmp_pop_sync(global_tid, ct_critical, loc); 1649 } 1650 #if USE_ITT_BUILD 1651 __kmp_itt_critical_releasing(lck); 1652 #endif 1653 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1654 } 1655 1656 #else // KMP_USE_DYNAMIC_LOCK 1657 1658 if ((__kmp_user_lock_kind == lk_tas) && 1659 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1660 lck = (kmp_user_lock_p)crit; 1661 } 1662 #if KMP_USE_FUTEX 1663 else if ((__kmp_user_lock_kind == lk_futex) && 1664 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1665 lck = (kmp_user_lock_p)crit; 1666 } 1667 #endif 1668 else { // ticket, queuing or drdpa 1669 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1670 } 1671 1672 KMP_ASSERT(lck != NULL); 1673 1674 if (__kmp_env_consistency_check) 1675 __kmp_pop_sync(global_tid, ct_critical, loc); 1676 1677 #if USE_ITT_BUILD 1678 __kmp_itt_critical_releasing(lck); 1679 #endif /* USE_ITT_BUILD */ 1680 // Value of 'crit' should be good for using as a critical_id of the critical 1681 // section directive. 1682 __kmp_release_user_lock_with_checks(lck, global_tid); 1683 1684 #endif // KMP_USE_DYNAMIC_LOCK 1685 1686 #if OMPT_SUPPORT && OMPT_OPTIONAL 1687 /* OMPT release event triggers after lock is released; place here to trigger 1688 * for all #if branches */ 1689 OMPT_STORE_RETURN_ADDRESS(global_tid); 1690 if (ompt_enabled.ompt_callback_mutex_released) { 1691 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1692 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1693 OMPT_LOAD_RETURN_ADDRESS(0)); 1694 } 1695 #endif 1696 1697 KMP_POP_PARTITIONED_TIMER(); 1698 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1699 } 1700 1701 /*! 1702 @ingroup SYNCHRONIZATION 1703 @param loc source location information 1704 @param global_tid thread id. 1705 @return one if the thread should execute the master block, zero otherwise 1706 1707 Start execution of a combined barrier and master. The barrier is executed inside 1708 this function. 1709 */ 1710 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1711 int status; 1712 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1713 __kmp_assert_valid_gtid(global_tid); 1714 1715 if (!TCR_4(__kmp_init_parallel)) 1716 __kmp_parallel_initialize(); 1717 1718 __kmp_resume_if_soft_paused(); 1719 1720 if (__kmp_env_consistency_check) 1721 __kmp_check_barrier(global_tid, ct_barrier, loc); 1722 1723 #if OMPT_SUPPORT 1724 ompt_frame_t *ompt_frame; 1725 if (ompt_enabled.enabled) { 1726 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1727 if (ompt_frame->enter_frame.ptr == NULL) 1728 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1729 } 1730 OMPT_STORE_RETURN_ADDRESS(global_tid); 1731 #endif 1732 #if USE_ITT_NOTIFY 1733 __kmp_threads[global_tid]->th.th_ident = loc; 1734 #endif 1735 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1736 #if OMPT_SUPPORT && OMPT_OPTIONAL 1737 if (ompt_enabled.enabled) { 1738 ompt_frame->enter_frame = ompt_data_none; 1739 } 1740 #endif 1741 1742 return (status != 0) ? 0 : 1; 1743 } 1744 1745 /*! 1746 @ingroup SYNCHRONIZATION 1747 @param loc source location information 1748 @param global_tid thread id. 1749 1750 Complete the execution of a combined barrier and master. This function should 1751 only be called at the completion of the <tt>master</tt> code. Other threads will 1752 still be waiting at the barrier and this call releases them. 1753 */ 1754 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1755 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1756 __kmp_assert_valid_gtid(global_tid); 1757 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1758 } 1759 1760 /*! 1761 @ingroup SYNCHRONIZATION 1762 @param loc source location information 1763 @param global_tid thread id. 1764 @return one if the thread should execute the master block, zero otherwise 1765 1766 Start execution of a combined barrier and master(nowait) construct. 1767 The barrier is executed inside this function. 1768 There is no equivalent "end" function, since the 1769 */ 1770 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1771 kmp_int32 ret; 1772 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1773 __kmp_assert_valid_gtid(global_tid); 1774 1775 if (!TCR_4(__kmp_init_parallel)) 1776 __kmp_parallel_initialize(); 1777 1778 __kmp_resume_if_soft_paused(); 1779 1780 if (__kmp_env_consistency_check) { 1781 if (loc == 0) { 1782 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1783 } 1784 __kmp_check_barrier(global_tid, ct_barrier, loc); 1785 } 1786 1787 #if OMPT_SUPPORT 1788 ompt_frame_t *ompt_frame; 1789 if (ompt_enabled.enabled) { 1790 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1791 if (ompt_frame->enter_frame.ptr == NULL) 1792 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1793 } 1794 OMPT_STORE_RETURN_ADDRESS(global_tid); 1795 #endif 1796 #if USE_ITT_NOTIFY 1797 __kmp_threads[global_tid]->th.th_ident = loc; 1798 #endif 1799 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1800 #if OMPT_SUPPORT && OMPT_OPTIONAL 1801 if (ompt_enabled.enabled) { 1802 ompt_frame->enter_frame = ompt_data_none; 1803 } 1804 #endif 1805 1806 ret = __kmpc_master(loc, global_tid); 1807 1808 if (__kmp_env_consistency_check) { 1809 /* there's no __kmpc_end_master called; so the (stats) */ 1810 /* actions of __kmpc_end_master are done here */ 1811 if (ret) { 1812 /* only one thread should do the pop since only */ 1813 /* one did the push (see __kmpc_master()) */ 1814 __kmp_pop_sync(global_tid, ct_master, loc); 1815 } 1816 } 1817 1818 return (ret); 1819 } 1820 1821 /* The BARRIER for a SINGLE process section is always explicit */ 1822 /*! 1823 @ingroup WORK_SHARING 1824 @param loc source location information 1825 @param global_tid global thread number 1826 @return One if this thread should execute the single construct, zero otherwise. 1827 1828 Test whether to execute a <tt>single</tt> construct. 1829 There are no implicit barriers in the two "single" calls, rather the compiler 1830 should introduce an explicit barrier if it is required. 1831 */ 1832 1833 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1834 __kmp_assert_valid_gtid(global_tid); 1835 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1836 1837 if (rc) { 1838 // We are going to execute the single statement, so we should count it. 1839 KMP_COUNT_BLOCK(OMP_SINGLE); 1840 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1841 } 1842 1843 #if OMPT_SUPPORT && OMPT_OPTIONAL 1844 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1845 kmp_team_t *team = this_thr->th.th_team; 1846 int tid = __kmp_tid_from_gtid(global_tid); 1847 1848 if (ompt_enabled.enabled) { 1849 if (rc) { 1850 if (ompt_enabled.ompt_callback_work) { 1851 ompt_callbacks.ompt_callback(ompt_callback_work)( 1852 ompt_work_single_executor, ompt_scope_begin, 1853 &(team->t.ompt_team_info.parallel_data), 1854 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1855 1, OMPT_GET_RETURN_ADDRESS(0)); 1856 } 1857 } else { 1858 if (ompt_enabled.ompt_callback_work) { 1859 ompt_callbacks.ompt_callback(ompt_callback_work)( 1860 ompt_work_single_other, ompt_scope_begin, 1861 &(team->t.ompt_team_info.parallel_data), 1862 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1863 1, OMPT_GET_RETURN_ADDRESS(0)); 1864 ompt_callbacks.ompt_callback(ompt_callback_work)( 1865 ompt_work_single_other, ompt_scope_end, 1866 &(team->t.ompt_team_info.parallel_data), 1867 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1868 1, OMPT_GET_RETURN_ADDRESS(0)); 1869 } 1870 } 1871 } 1872 #endif 1873 1874 return rc; 1875 } 1876 1877 /*! 1878 @ingroup WORK_SHARING 1879 @param loc source location information 1880 @param global_tid global thread number 1881 1882 Mark the end of a <tt>single</tt> construct. This function should 1883 only be called by the thread that executed the block of code protected 1884 by the `single` construct. 1885 */ 1886 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1887 __kmp_assert_valid_gtid(global_tid); 1888 __kmp_exit_single(global_tid); 1889 KMP_POP_PARTITIONED_TIMER(); 1890 1891 #if OMPT_SUPPORT && OMPT_OPTIONAL 1892 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1893 kmp_team_t *team = this_thr->th.th_team; 1894 int tid = __kmp_tid_from_gtid(global_tid); 1895 1896 if (ompt_enabled.ompt_callback_work) { 1897 ompt_callbacks.ompt_callback(ompt_callback_work)( 1898 ompt_work_single_executor, ompt_scope_end, 1899 &(team->t.ompt_team_info.parallel_data), 1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1901 OMPT_GET_RETURN_ADDRESS(0)); 1902 } 1903 #endif 1904 } 1905 1906 /*! 1907 @ingroup WORK_SHARING 1908 @param loc Source location 1909 @param global_tid Global thread id 1910 1911 Mark the end of a statically scheduled loop. 1912 */ 1913 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1914 KMP_POP_PARTITIONED_TIMER(); 1915 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1916 1917 #if OMPT_SUPPORT && OMPT_OPTIONAL 1918 if (ompt_enabled.ompt_callback_work) { 1919 ompt_work_t ompt_work_type = ompt_work_loop; 1920 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1921 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1922 // Determine workshare type 1923 if (loc != NULL) { 1924 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1925 ompt_work_type = ompt_work_loop; 1926 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1927 ompt_work_type = ompt_work_sections; 1928 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1929 ompt_work_type = ompt_work_distribute; 1930 } else { 1931 // use default set above. 1932 // a warning about this case is provided in __kmpc_for_static_init 1933 } 1934 KMP_DEBUG_ASSERT(ompt_work_type); 1935 } 1936 ompt_callbacks.ompt_callback(ompt_callback_work)( 1937 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1938 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1939 } 1940 #endif 1941 if (__kmp_env_consistency_check) 1942 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1943 } 1944 1945 // User routines which take C-style arguments (call by value) 1946 // different from the Fortran equivalent routines 1947 1948 void ompc_set_num_threads(int arg) { 1949 // !!!!! TODO: check the per-task binding 1950 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1951 } 1952 1953 void ompc_set_dynamic(int flag) { 1954 kmp_info_t *thread; 1955 1956 /* For the thread-private implementation of the internal controls */ 1957 thread = __kmp_entry_thread(); 1958 1959 __kmp_save_internal_controls(thread); 1960 1961 set__dynamic(thread, flag ? true : false); 1962 } 1963 1964 void ompc_set_nested(int flag) { 1965 kmp_info_t *thread; 1966 1967 /* For the thread-private internal controls implementation */ 1968 thread = __kmp_entry_thread(); 1969 1970 __kmp_save_internal_controls(thread); 1971 1972 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1973 } 1974 1975 void ompc_set_max_active_levels(int max_active_levels) { 1976 /* TO DO */ 1977 /* we want per-task implementation of this internal control */ 1978 1979 /* For the per-thread internal controls implementation */ 1980 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1981 } 1982 1983 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1984 // !!!!! TODO: check the per-task binding 1985 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1986 } 1987 1988 int ompc_get_ancestor_thread_num(int level) { 1989 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1990 } 1991 1992 int ompc_get_team_size(int level) { 1993 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1994 } 1995 1996 /* OpenMP 5.0 Affinity Format API */ 1997 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 1998 if (!__kmp_init_serial) { 1999 __kmp_serial_initialize(); 2000 } 2001 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2002 format, KMP_STRLEN(format) + 1); 2003 } 2004 2005 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2006 size_t format_size; 2007 if (!__kmp_init_serial) { 2008 __kmp_serial_initialize(); 2009 } 2010 format_size = KMP_STRLEN(__kmp_affinity_format); 2011 if (buffer && size) { 2012 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2013 format_size + 1); 2014 } 2015 return format_size; 2016 } 2017 2018 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2019 int gtid; 2020 if (!TCR_4(__kmp_init_middle)) { 2021 __kmp_middle_initialize(); 2022 } 2023 __kmp_assign_root_init_mask(); 2024 gtid = __kmp_get_gtid(); 2025 __kmp_aux_display_affinity(gtid, format); 2026 } 2027 2028 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2029 char const *format) { 2030 int gtid; 2031 size_t num_required; 2032 kmp_str_buf_t capture_buf; 2033 if (!TCR_4(__kmp_init_middle)) { 2034 __kmp_middle_initialize(); 2035 } 2036 __kmp_assign_root_init_mask(); 2037 gtid = __kmp_get_gtid(); 2038 __kmp_str_buf_init(&capture_buf); 2039 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2040 if (buffer && buf_size) { 2041 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2042 capture_buf.used + 1); 2043 } 2044 __kmp_str_buf_free(&capture_buf); 2045 return num_required; 2046 } 2047 2048 void kmpc_set_stacksize(int arg) { 2049 // __kmp_aux_set_stacksize initializes the library if needed 2050 __kmp_aux_set_stacksize(arg); 2051 } 2052 2053 void kmpc_set_stacksize_s(size_t arg) { 2054 // __kmp_aux_set_stacksize initializes the library if needed 2055 __kmp_aux_set_stacksize(arg); 2056 } 2057 2058 void kmpc_set_blocktime(int arg) { 2059 int gtid, tid; 2060 kmp_info_t *thread; 2061 2062 gtid = __kmp_entry_gtid(); 2063 tid = __kmp_tid_from_gtid(gtid); 2064 thread = __kmp_thread_from_gtid(gtid); 2065 2066 __kmp_aux_set_blocktime(arg, thread, tid); 2067 } 2068 2069 void kmpc_set_library(int arg) { 2070 // __kmp_user_set_library initializes the library if needed 2071 __kmp_user_set_library((enum library_type)arg); 2072 } 2073 2074 void kmpc_set_defaults(char const *str) { 2075 // __kmp_aux_set_defaults initializes the library if needed 2076 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2077 } 2078 2079 void kmpc_set_disp_num_buffers(int arg) { 2080 // ignore after initialization because some teams have already 2081 // allocated dispatch buffers 2082 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2083 arg <= KMP_MAX_DISP_NUM_BUFF) { 2084 __kmp_dispatch_num_buffers = arg; 2085 } 2086 } 2087 2088 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2089 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2090 return -1; 2091 #else 2092 if (!TCR_4(__kmp_init_middle)) { 2093 __kmp_middle_initialize(); 2094 } 2095 __kmp_assign_root_init_mask(); 2096 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2097 #endif 2098 } 2099 2100 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2101 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2102 return -1; 2103 #else 2104 if (!TCR_4(__kmp_init_middle)) { 2105 __kmp_middle_initialize(); 2106 } 2107 __kmp_assign_root_init_mask(); 2108 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2109 #endif 2110 } 2111 2112 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2113 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2114 return -1; 2115 #else 2116 if (!TCR_4(__kmp_init_middle)) { 2117 __kmp_middle_initialize(); 2118 } 2119 __kmp_assign_root_init_mask(); 2120 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2121 #endif 2122 } 2123 2124 /* -------------------------------------------------------------------------- */ 2125 /*! 2126 @ingroup THREADPRIVATE 2127 @param loc source location information 2128 @param gtid global thread number 2129 @param cpy_size size of the cpy_data buffer 2130 @param cpy_data pointer to data to be copied 2131 @param cpy_func helper function to call for copying data 2132 @param didit flag variable: 1=single thread; 0=not single thread 2133 2134 __kmpc_copyprivate implements the interface for the private data broadcast 2135 needed for the copyprivate clause associated with a single region in an 2136 OpenMP<sup>*</sup> program (both C and Fortran). 2137 All threads participating in the parallel region call this routine. 2138 One of the threads (called the single thread) should have the <tt>didit</tt> 2139 variable set to 1 and all other threads should have that variable set to 0. 2140 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2141 2142 The OpenMP specification forbids the use of nowait on the single region when a 2143 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2144 barrier internally to avoid race conditions, so the code generation for the 2145 single region should avoid generating a barrier after the call to @ref 2146 __kmpc_copyprivate. 2147 2148 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2149 The <tt>loc</tt> parameter is a pointer to source location information. 2150 2151 Internal implementation: The single thread will first copy its descriptor 2152 address (cpy_data) to a team-private location, then the other threads will each 2153 call the function pointed to by the parameter cpy_func, which carries out the 2154 copy by copying the data using the cpy_data buffer. 2155 2156 The cpy_func routine used for the copy and the contents of the data area defined 2157 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2158 to be done. For instance, the cpy_data buffer can hold the actual data to be 2159 copied or it may hold a list of pointers to the data. The cpy_func routine must 2160 interpret the cpy_data buffer appropriately. 2161 2162 The interface to cpy_func is as follows: 2163 @code 2164 void cpy_func( void *destination, void *source ) 2165 @endcode 2166 where void *destination is the cpy_data pointer for the thread being copied to 2167 and void *source is the cpy_data pointer for the thread being copied from. 2168 */ 2169 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2170 void *cpy_data, void (*cpy_func)(void *, void *), 2171 kmp_int32 didit) { 2172 void **data_ptr; 2173 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2174 __kmp_assert_valid_gtid(gtid); 2175 2176 KMP_MB(); 2177 2178 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2179 2180 if (__kmp_env_consistency_check) { 2181 if (loc == 0) { 2182 KMP_WARNING(ConstructIdentInvalid); 2183 } 2184 } 2185 2186 // ToDo: Optimize the following two barriers into some kind of split barrier 2187 2188 if (didit) 2189 *data_ptr = cpy_data; 2190 2191 #if OMPT_SUPPORT 2192 ompt_frame_t *ompt_frame; 2193 if (ompt_enabled.enabled) { 2194 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2195 if (ompt_frame->enter_frame.ptr == NULL) 2196 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2197 } 2198 OMPT_STORE_RETURN_ADDRESS(gtid); 2199 #endif 2200 /* This barrier is not a barrier region boundary */ 2201 #if USE_ITT_NOTIFY 2202 __kmp_threads[gtid]->th.th_ident = loc; 2203 #endif 2204 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2205 2206 if (!didit) 2207 (*cpy_func)(cpy_data, *data_ptr); 2208 2209 // Consider next barrier a user-visible barrier for barrier region boundaries 2210 // Nesting checks are already handled by the single construct checks 2211 { 2212 #if OMPT_SUPPORT 2213 OMPT_STORE_RETURN_ADDRESS(gtid); 2214 #endif 2215 #if USE_ITT_NOTIFY 2216 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2217 // tasks can overwrite the location) 2218 #endif 2219 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2220 #if OMPT_SUPPORT && OMPT_OPTIONAL 2221 if (ompt_enabled.enabled) { 2222 ompt_frame->enter_frame = ompt_data_none; 2223 } 2224 #endif 2225 } 2226 } 2227 2228 /* -------------------------------------------------------------------------- */ 2229 2230 #define INIT_LOCK __kmp_init_user_lock_with_checks 2231 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2232 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2233 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2234 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2235 #define ACQUIRE_NESTED_LOCK_TIMED \ 2236 __kmp_acquire_nested_user_lock_with_checks_timed 2237 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2238 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2239 #define TEST_LOCK __kmp_test_user_lock_with_checks 2240 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2241 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2242 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2243 2244 // TODO: Make check abort messages use location info & pass it into 2245 // with_checks routines 2246 2247 #if KMP_USE_DYNAMIC_LOCK 2248 2249 // internal lock initializer 2250 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2251 kmp_dyna_lockseq_t seq) { 2252 if (KMP_IS_D_LOCK(seq)) { 2253 KMP_INIT_D_LOCK(lock, seq); 2254 #if USE_ITT_BUILD 2255 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2256 #endif 2257 } else { 2258 KMP_INIT_I_LOCK(lock, seq); 2259 #if USE_ITT_BUILD 2260 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2261 __kmp_itt_lock_creating(ilk->lock, loc); 2262 #endif 2263 } 2264 } 2265 2266 // internal nest lock initializer 2267 static __forceinline void 2268 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2269 kmp_dyna_lockseq_t seq) { 2270 #if KMP_USE_TSX 2271 // Don't have nested lock implementation for speculative locks 2272 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2273 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2274 seq = __kmp_user_lock_seq; 2275 #endif 2276 switch (seq) { 2277 case lockseq_tas: 2278 seq = lockseq_nested_tas; 2279 break; 2280 #if KMP_USE_FUTEX 2281 case lockseq_futex: 2282 seq = lockseq_nested_futex; 2283 break; 2284 #endif 2285 case lockseq_ticket: 2286 seq = lockseq_nested_ticket; 2287 break; 2288 case lockseq_queuing: 2289 seq = lockseq_nested_queuing; 2290 break; 2291 case lockseq_drdpa: 2292 seq = lockseq_nested_drdpa; 2293 break; 2294 default: 2295 seq = lockseq_nested_queuing; 2296 } 2297 KMP_INIT_I_LOCK(lock, seq); 2298 #if USE_ITT_BUILD 2299 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2300 __kmp_itt_lock_creating(ilk->lock, loc); 2301 #endif 2302 } 2303 2304 /* initialize the lock with a hint */ 2305 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2306 uintptr_t hint) { 2307 KMP_DEBUG_ASSERT(__kmp_init_serial); 2308 if (__kmp_env_consistency_check && user_lock == NULL) { 2309 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2310 } 2311 2312 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2313 2314 #if OMPT_SUPPORT && OMPT_OPTIONAL 2315 // This is the case, if called from omp_init_lock_with_hint: 2316 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2317 if (!codeptr) 2318 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2319 if (ompt_enabled.ompt_callback_lock_init) { 2320 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2321 ompt_mutex_lock, (omp_lock_hint_t)hint, 2322 __ompt_get_mutex_impl_type(user_lock), 2323 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2324 } 2325 #endif 2326 } 2327 2328 /* initialize the lock with a hint */ 2329 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2330 void **user_lock, uintptr_t hint) { 2331 KMP_DEBUG_ASSERT(__kmp_init_serial); 2332 if (__kmp_env_consistency_check && user_lock == NULL) { 2333 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2334 } 2335 2336 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2337 2338 #if OMPT_SUPPORT && OMPT_OPTIONAL 2339 // This is the case, if called from omp_init_lock_with_hint: 2340 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2341 if (!codeptr) 2342 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2343 if (ompt_enabled.ompt_callback_lock_init) { 2344 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2345 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2346 __ompt_get_mutex_impl_type(user_lock), 2347 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2348 } 2349 #endif 2350 } 2351 2352 #endif // KMP_USE_DYNAMIC_LOCK 2353 2354 /* initialize the lock */ 2355 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2356 #if KMP_USE_DYNAMIC_LOCK 2357 2358 KMP_DEBUG_ASSERT(__kmp_init_serial); 2359 if (__kmp_env_consistency_check && user_lock == NULL) { 2360 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2361 } 2362 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2363 2364 #if OMPT_SUPPORT && OMPT_OPTIONAL 2365 // This is the case, if called from omp_init_lock_with_hint: 2366 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2367 if (!codeptr) 2368 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2369 if (ompt_enabled.ompt_callback_lock_init) { 2370 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2371 ompt_mutex_lock, omp_lock_hint_none, 2372 __ompt_get_mutex_impl_type(user_lock), 2373 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2374 } 2375 #endif 2376 2377 #else // KMP_USE_DYNAMIC_LOCK 2378 2379 static char const *const func = "omp_init_lock"; 2380 kmp_user_lock_p lck; 2381 KMP_DEBUG_ASSERT(__kmp_init_serial); 2382 2383 if (__kmp_env_consistency_check) { 2384 if (user_lock == NULL) { 2385 KMP_FATAL(LockIsUninitialized, func); 2386 } 2387 } 2388 2389 KMP_CHECK_USER_LOCK_INIT(); 2390 2391 if ((__kmp_user_lock_kind == lk_tas) && 2392 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2393 lck = (kmp_user_lock_p)user_lock; 2394 } 2395 #if KMP_USE_FUTEX 2396 else if ((__kmp_user_lock_kind == lk_futex) && 2397 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2398 lck = (kmp_user_lock_p)user_lock; 2399 } 2400 #endif 2401 else { 2402 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2403 } 2404 INIT_LOCK(lck); 2405 __kmp_set_user_lock_location(lck, loc); 2406 2407 #if OMPT_SUPPORT && OMPT_OPTIONAL 2408 // This is the case, if called from omp_init_lock_with_hint: 2409 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2410 if (!codeptr) 2411 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2412 if (ompt_enabled.ompt_callback_lock_init) { 2413 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2414 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2415 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2416 } 2417 #endif 2418 2419 #if USE_ITT_BUILD 2420 __kmp_itt_lock_creating(lck); 2421 #endif /* USE_ITT_BUILD */ 2422 2423 #endif // KMP_USE_DYNAMIC_LOCK 2424 } // __kmpc_init_lock 2425 2426 /* initialize the lock */ 2427 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2428 #if KMP_USE_DYNAMIC_LOCK 2429 2430 KMP_DEBUG_ASSERT(__kmp_init_serial); 2431 if (__kmp_env_consistency_check && user_lock == NULL) { 2432 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2433 } 2434 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2435 2436 #if OMPT_SUPPORT && OMPT_OPTIONAL 2437 // This is the case, if called from omp_init_lock_with_hint: 2438 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2439 if (!codeptr) 2440 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2441 if (ompt_enabled.ompt_callback_lock_init) { 2442 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2443 ompt_mutex_nest_lock, omp_lock_hint_none, 2444 __ompt_get_mutex_impl_type(user_lock), 2445 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2446 } 2447 #endif 2448 2449 #else // KMP_USE_DYNAMIC_LOCK 2450 2451 static char const *const func = "omp_init_nest_lock"; 2452 kmp_user_lock_p lck; 2453 KMP_DEBUG_ASSERT(__kmp_init_serial); 2454 2455 if (__kmp_env_consistency_check) { 2456 if (user_lock == NULL) { 2457 KMP_FATAL(LockIsUninitialized, func); 2458 } 2459 } 2460 2461 KMP_CHECK_USER_LOCK_INIT(); 2462 2463 if ((__kmp_user_lock_kind == lk_tas) && 2464 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2465 OMP_NEST_LOCK_T_SIZE)) { 2466 lck = (kmp_user_lock_p)user_lock; 2467 } 2468 #if KMP_USE_FUTEX 2469 else if ((__kmp_user_lock_kind == lk_futex) && 2470 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2471 OMP_NEST_LOCK_T_SIZE)) { 2472 lck = (kmp_user_lock_p)user_lock; 2473 } 2474 #endif 2475 else { 2476 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2477 } 2478 2479 INIT_NESTED_LOCK(lck); 2480 __kmp_set_user_lock_location(lck, loc); 2481 2482 #if OMPT_SUPPORT && OMPT_OPTIONAL 2483 // This is the case, if called from omp_init_lock_with_hint: 2484 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2485 if (!codeptr) 2486 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2487 if (ompt_enabled.ompt_callback_lock_init) { 2488 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2489 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2490 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2491 } 2492 #endif 2493 2494 #if USE_ITT_BUILD 2495 __kmp_itt_lock_creating(lck); 2496 #endif /* USE_ITT_BUILD */ 2497 2498 #endif // KMP_USE_DYNAMIC_LOCK 2499 } // __kmpc_init_nest_lock 2500 2501 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2502 #if KMP_USE_DYNAMIC_LOCK 2503 2504 #if USE_ITT_BUILD 2505 kmp_user_lock_p lck; 2506 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2507 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2508 } else { 2509 lck = (kmp_user_lock_p)user_lock; 2510 } 2511 __kmp_itt_lock_destroyed(lck); 2512 #endif 2513 #if OMPT_SUPPORT && OMPT_OPTIONAL 2514 // This is the case, if called from omp_init_lock_with_hint: 2515 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2516 if (!codeptr) 2517 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2518 if (ompt_enabled.ompt_callback_lock_destroy) { 2519 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2520 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2521 } 2522 #endif 2523 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2524 #else 2525 kmp_user_lock_p lck; 2526 2527 if ((__kmp_user_lock_kind == lk_tas) && 2528 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2529 lck = (kmp_user_lock_p)user_lock; 2530 } 2531 #if KMP_USE_FUTEX 2532 else if ((__kmp_user_lock_kind == lk_futex) && 2533 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2534 lck = (kmp_user_lock_p)user_lock; 2535 } 2536 #endif 2537 else { 2538 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2539 } 2540 2541 #if OMPT_SUPPORT && OMPT_OPTIONAL 2542 // This is the case, if called from omp_init_lock_with_hint: 2543 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2544 if (!codeptr) 2545 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2546 if (ompt_enabled.ompt_callback_lock_destroy) { 2547 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2548 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2549 } 2550 #endif 2551 2552 #if USE_ITT_BUILD 2553 __kmp_itt_lock_destroyed(lck); 2554 #endif /* USE_ITT_BUILD */ 2555 DESTROY_LOCK(lck); 2556 2557 if ((__kmp_user_lock_kind == lk_tas) && 2558 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2559 ; 2560 } 2561 #if KMP_USE_FUTEX 2562 else if ((__kmp_user_lock_kind == lk_futex) && 2563 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2564 ; 2565 } 2566 #endif 2567 else { 2568 __kmp_user_lock_free(user_lock, gtid, lck); 2569 } 2570 #endif // KMP_USE_DYNAMIC_LOCK 2571 } // __kmpc_destroy_lock 2572 2573 /* destroy the lock */ 2574 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2575 #if KMP_USE_DYNAMIC_LOCK 2576 2577 #if USE_ITT_BUILD 2578 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2579 __kmp_itt_lock_destroyed(ilk->lock); 2580 #endif 2581 #if OMPT_SUPPORT && OMPT_OPTIONAL 2582 // This is the case, if called from omp_init_lock_with_hint: 2583 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2584 if (!codeptr) 2585 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2586 if (ompt_enabled.ompt_callback_lock_destroy) { 2587 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2588 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2589 } 2590 #endif 2591 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2592 2593 #else // KMP_USE_DYNAMIC_LOCK 2594 2595 kmp_user_lock_p lck; 2596 2597 if ((__kmp_user_lock_kind == lk_tas) && 2598 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2599 OMP_NEST_LOCK_T_SIZE)) { 2600 lck = (kmp_user_lock_p)user_lock; 2601 } 2602 #if KMP_USE_FUTEX 2603 else if ((__kmp_user_lock_kind == lk_futex) && 2604 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2605 OMP_NEST_LOCK_T_SIZE)) { 2606 lck = (kmp_user_lock_p)user_lock; 2607 } 2608 #endif 2609 else { 2610 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2611 } 2612 2613 #if OMPT_SUPPORT && OMPT_OPTIONAL 2614 // This is the case, if called from omp_init_lock_with_hint: 2615 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2616 if (!codeptr) 2617 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2618 if (ompt_enabled.ompt_callback_lock_destroy) { 2619 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2620 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2621 } 2622 #endif 2623 2624 #if USE_ITT_BUILD 2625 __kmp_itt_lock_destroyed(lck); 2626 #endif /* USE_ITT_BUILD */ 2627 2628 DESTROY_NESTED_LOCK(lck); 2629 2630 if ((__kmp_user_lock_kind == lk_tas) && 2631 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2632 OMP_NEST_LOCK_T_SIZE)) { 2633 ; 2634 } 2635 #if KMP_USE_FUTEX 2636 else if ((__kmp_user_lock_kind == lk_futex) && 2637 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2638 OMP_NEST_LOCK_T_SIZE)) { 2639 ; 2640 } 2641 #endif 2642 else { 2643 __kmp_user_lock_free(user_lock, gtid, lck); 2644 } 2645 #endif // KMP_USE_DYNAMIC_LOCK 2646 } // __kmpc_destroy_nest_lock 2647 2648 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2649 KMP_COUNT_BLOCK(OMP_set_lock); 2650 #if KMP_USE_DYNAMIC_LOCK 2651 int tag = KMP_EXTRACT_D_TAG(user_lock); 2652 #if USE_ITT_BUILD 2653 __kmp_itt_lock_acquiring( 2654 (kmp_user_lock_p) 2655 user_lock); // itt function will get to the right lock object. 2656 #endif 2657 #if OMPT_SUPPORT && OMPT_OPTIONAL 2658 // This is the case, if called from omp_init_lock_with_hint: 2659 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2660 if (!codeptr) 2661 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2662 if (ompt_enabled.ompt_callback_mutex_acquire) { 2663 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2664 ompt_mutex_lock, omp_lock_hint_none, 2665 __ompt_get_mutex_impl_type(user_lock), 2666 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2667 } 2668 #endif 2669 #if KMP_USE_INLINED_TAS 2670 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2671 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2672 } else 2673 #elif KMP_USE_INLINED_FUTEX 2674 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2675 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2676 } else 2677 #endif 2678 { 2679 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2680 } 2681 #if USE_ITT_BUILD 2682 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2683 #endif 2684 #if OMPT_SUPPORT && OMPT_OPTIONAL 2685 if (ompt_enabled.ompt_callback_mutex_acquired) { 2686 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2687 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2688 } 2689 #endif 2690 2691 #else // KMP_USE_DYNAMIC_LOCK 2692 2693 kmp_user_lock_p lck; 2694 2695 if ((__kmp_user_lock_kind == lk_tas) && 2696 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2697 lck = (kmp_user_lock_p)user_lock; 2698 } 2699 #if KMP_USE_FUTEX 2700 else if ((__kmp_user_lock_kind == lk_futex) && 2701 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2702 lck = (kmp_user_lock_p)user_lock; 2703 } 2704 #endif 2705 else { 2706 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2707 } 2708 2709 #if USE_ITT_BUILD 2710 __kmp_itt_lock_acquiring(lck); 2711 #endif /* USE_ITT_BUILD */ 2712 #if OMPT_SUPPORT && OMPT_OPTIONAL 2713 // This is the case, if called from omp_init_lock_with_hint: 2714 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2715 if (!codeptr) 2716 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2717 if (ompt_enabled.ompt_callback_mutex_acquire) { 2718 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2719 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2720 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2721 } 2722 #endif 2723 2724 ACQUIRE_LOCK(lck, gtid); 2725 2726 #if USE_ITT_BUILD 2727 __kmp_itt_lock_acquired(lck); 2728 #endif /* USE_ITT_BUILD */ 2729 2730 #if OMPT_SUPPORT && OMPT_OPTIONAL 2731 if (ompt_enabled.ompt_callback_mutex_acquired) { 2732 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2733 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2734 } 2735 #endif 2736 2737 #endif // KMP_USE_DYNAMIC_LOCK 2738 } 2739 2740 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2741 #if KMP_USE_DYNAMIC_LOCK 2742 2743 #if USE_ITT_BUILD 2744 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2745 #endif 2746 #if OMPT_SUPPORT && OMPT_OPTIONAL 2747 // This is the case, if called from omp_init_lock_with_hint: 2748 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2749 if (!codeptr) 2750 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2751 if (ompt_enabled.enabled) { 2752 if (ompt_enabled.ompt_callback_mutex_acquire) { 2753 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2754 ompt_mutex_nest_lock, omp_lock_hint_none, 2755 __ompt_get_mutex_impl_type(user_lock), 2756 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2757 } 2758 } 2759 #endif 2760 int acquire_status = 2761 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2762 (void)acquire_status; 2763 #if USE_ITT_BUILD 2764 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2765 #endif 2766 2767 #if OMPT_SUPPORT && OMPT_OPTIONAL 2768 if (ompt_enabled.enabled) { 2769 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2770 if (ompt_enabled.ompt_callback_mutex_acquired) { 2771 // lock_first 2772 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2773 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2774 codeptr); 2775 } 2776 } else { 2777 if (ompt_enabled.ompt_callback_nest_lock) { 2778 // lock_next 2779 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2780 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2781 } 2782 } 2783 } 2784 #endif 2785 2786 #else // KMP_USE_DYNAMIC_LOCK 2787 int acquire_status; 2788 kmp_user_lock_p lck; 2789 2790 if ((__kmp_user_lock_kind == lk_tas) && 2791 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2792 OMP_NEST_LOCK_T_SIZE)) { 2793 lck = (kmp_user_lock_p)user_lock; 2794 } 2795 #if KMP_USE_FUTEX 2796 else if ((__kmp_user_lock_kind == lk_futex) && 2797 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2798 OMP_NEST_LOCK_T_SIZE)) { 2799 lck = (kmp_user_lock_p)user_lock; 2800 } 2801 #endif 2802 else { 2803 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2804 } 2805 2806 #if USE_ITT_BUILD 2807 __kmp_itt_lock_acquiring(lck); 2808 #endif /* USE_ITT_BUILD */ 2809 #if OMPT_SUPPORT && OMPT_OPTIONAL 2810 // This is the case, if called from omp_init_lock_with_hint: 2811 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2812 if (!codeptr) 2813 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2814 if (ompt_enabled.enabled) { 2815 if (ompt_enabled.ompt_callback_mutex_acquire) { 2816 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2817 ompt_mutex_nest_lock, omp_lock_hint_none, 2818 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2819 codeptr); 2820 } 2821 } 2822 #endif 2823 2824 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2825 2826 #if USE_ITT_BUILD 2827 __kmp_itt_lock_acquired(lck); 2828 #endif /* USE_ITT_BUILD */ 2829 2830 #if OMPT_SUPPORT && OMPT_OPTIONAL 2831 if (ompt_enabled.enabled) { 2832 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2833 if (ompt_enabled.ompt_callback_mutex_acquired) { 2834 // lock_first 2835 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2836 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2837 } 2838 } else { 2839 if (ompt_enabled.ompt_callback_nest_lock) { 2840 // lock_next 2841 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2842 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2843 } 2844 } 2845 } 2846 #endif 2847 2848 #endif // KMP_USE_DYNAMIC_LOCK 2849 } 2850 2851 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2852 #if KMP_USE_DYNAMIC_LOCK 2853 2854 int tag = KMP_EXTRACT_D_TAG(user_lock); 2855 #if USE_ITT_BUILD 2856 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2857 #endif 2858 #if KMP_USE_INLINED_TAS 2859 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2860 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2861 } else 2862 #elif KMP_USE_INLINED_FUTEX 2863 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2864 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2865 } else 2866 #endif 2867 { 2868 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2869 } 2870 2871 #if OMPT_SUPPORT && OMPT_OPTIONAL 2872 // This is the case, if called from omp_init_lock_with_hint: 2873 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2874 if (!codeptr) 2875 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2876 if (ompt_enabled.ompt_callback_mutex_released) { 2877 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2878 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2879 } 2880 #endif 2881 2882 #else // KMP_USE_DYNAMIC_LOCK 2883 2884 kmp_user_lock_p lck; 2885 2886 /* Can't use serial interval since not block structured */ 2887 /* release the lock */ 2888 2889 if ((__kmp_user_lock_kind == lk_tas) && 2890 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2891 #if KMP_OS_LINUX && \ 2892 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2893 // "fast" path implemented to fix customer performance issue 2894 #if USE_ITT_BUILD 2895 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2896 #endif /* USE_ITT_BUILD */ 2897 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2898 KMP_MB(); 2899 2900 #if OMPT_SUPPORT && OMPT_OPTIONAL 2901 // This is the case, if called from omp_init_lock_with_hint: 2902 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2903 if (!codeptr) 2904 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2905 if (ompt_enabled.ompt_callback_mutex_released) { 2906 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2907 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2908 } 2909 #endif 2910 2911 return; 2912 #else 2913 lck = (kmp_user_lock_p)user_lock; 2914 #endif 2915 } 2916 #if KMP_USE_FUTEX 2917 else if ((__kmp_user_lock_kind == lk_futex) && 2918 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2919 lck = (kmp_user_lock_p)user_lock; 2920 } 2921 #endif 2922 else { 2923 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2924 } 2925 2926 #if USE_ITT_BUILD 2927 __kmp_itt_lock_releasing(lck); 2928 #endif /* USE_ITT_BUILD */ 2929 2930 RELEASE_LOCK(lck, gtid); 2931 2932 #if OMPT_SUPPORT && OMPT_OPTIONAL 2933 // This is the case, if called from omp_init_lock_with_hint: 2934 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2935 if (!codeptr) 2936 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2937 if (ompt_enabled.ompt_callback_mutex_released) { 2938 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2939 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2940 } 2941 #endif 2942 2943 #endif // KMP_USE_DYNAMIC_LOCK 2944 } 2945 2946 /* release the lock */ 2947 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2948 #if KMP_USE_DYNAMIC_LOCK 2949 2950 #if USE_ITT_BUILD 2951 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2952 #endif 2953 int release_status = 2954 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2955 (void)release_status; 2956 2957 #if OMPT_SUPPORT && OMPT_OPTIONAL 2958 // This is the case, if called from omp_init_lock_with_hint: 2959 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2960 if (!codeptr) 2961 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2962 if (ompt_enabled.enabled) { 2963 if (release_status == KMP_LOCK_RELEASED) { 2964 if (ompt_enabled.ompt_callback_mutex_released) { 2965 // release_lock_last 2966 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2967 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2968 codeptr); 2969 } 2970 } else if (ompt_enabled.ompt_callback_nest_lock) { 2971 // release_lock_prev 2972 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2973 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2974 } 2975 } 2976 #endif 2977 2978 #else // KMP_USE_DYNAMIC_LOCK 2979 2980 kmp_user_lock_p lck; 2981 2982 /* Can't use serial interval since not block structured */ 2983 2984 if ((__kmp_user_lock_kind == lk_tas) && 2985 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2986 OMP_NEST_LOCK_T_SIZE)) { 2987 #if KMP_OS_LINUX && \ 2988 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2989 // "fast" path implemented to fix customer performance issue 2990 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2991 #if USE_ITT_BUILD 2992 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2993 #endif /* USE_ITT_BUILD */ 2994 2995 #if OMPT_SUPPORT && OMPT_OPTIONAL 2996 int release_status = KMP_LOCK_STILL_HELD; 2997 #endif 2998 2999 if (--(tl->lk.depth_locked) == 0) { 3000 TCW_4(tl->lk.poll, 0); 3001 #if OMPT_SUPPORT && OMPT_OPTIONAL 3002 release_status = KMP_LOCK_RELEASED; 3003 #endif 3004 } 3005 KMP_MB(); 3006 3007 #if OMPT_SUPPORT && OMPT_OPTIONAL 3008 // This is the case, if called from omp_init_lock_with_hint: 3009 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3010 if (!codeptr) 3011 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3012 if (ompt_enabled.enabled) { 3013 if (release_status == KMP_LOCK_RELEASED) { 3014 if (ompt_enabled.ompt_callback_mutex_released) { 3015 // release_lock_last 3016 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3017 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3018 } 3019 } else if (ompt_enabled.ompt_callback_nest_lock) { 3020 // release_lock_previous 3021 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3022 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3023 } 3024 } 3025 #endif 3026 3027 return; 3028 #else 3029 lck = (kmp_user_lock_p)user_lock; 3030 #endif 3031 } 3032 #if KMP_USE_FUTEX 3033 else if ((__kmp_user_lock_kind == lk_futex) && 3034 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3035 OMP_NEST_LOCK_T_SIZE)) { 3036 lck = (kmp_user_lock_p)user_lock; 3037 } 3038 #endif 3039 else { 3040 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3041 } 3042 3043 #if USE_ITT_BUILD 3044 __kmp_itt_lock_releasing(lck); 3045 #endif /* USE_ITT_BUILD */ 3046 3047 int release_status; 3048 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3049 #if OMPT_SUPPORT && OMPT_OPTIONAL 3050 // This is the case, if called from omp_init_lock_with_hint: 3051 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3052 if (!codeptr) 3053 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3054 if (ompt_enabled.enabled) { 3055 if (release_status == KMP_LOCK_RELEASED) { 3056 if (ompt_enabled.ompt_callback_mutex_released) { 3057 // release_lock_last 3058 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3059 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3060 } 3061 } else if (ompt_enabled.ompt_callback_nest_lock) { 3062 // release_lock_previous 3063 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3064 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3065 } 3066 } 3067 #endif 3068 3069 #endif // KMP_USE_DYNAMIC_LOCK 3070 } 3071 3072 /* try to acquire the lock */ 3073 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3074 KMP_COUNT_BLOCK(OMP_test_lock); 3075 3076 #if KMP_USE_DYNAMIC_LOCK 3077 int rc; 3078 int tag = KMP_EXTRACT_D_TAG(user_lock); 3079 #if USE_ITT_BUILD 3080 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3081 #endif 3082 #if OMPT_SUPPORT && OMPT_OPTIONAL 3083 // This is the case, if called from omp_init_lock_with_hint: 3084 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3085 if (!codeptr) 3086 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3087 if (ompt_enabled.ompt_callback_mutex_acquire) { 3088 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3089 ompt_mutex_lock, omp_lock_hint_none, 3090 __ompt_get_mutex_impl_type(user_lock), 3091 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3092 } 3093 #endif 3094 #if KMP_USE_INLINED_TAS 3095 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3096 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3097 } else 3098 #elif KMP_USE_INLINED_FUTEX 3099 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3100 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3101 } else 3102 #endif 3103 { 3104 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3105 } 3106 if (rc) { 3107 #if USE_ITT_BUILD 3108 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3109 #endif 3110 #if OMPT_SUPPORT && OMPT_OPTIONAL 3111 if (ompt_enabled.ompt_callback_mutex_acquired) { 3112 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3113 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3114 } 3115 #endif 3116 return FTN_TRUE; 3117 } else { 3118 #if USE_ITT_BUILD 3119 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3120 #endif 3121 return FTN_FALSE; 3122 } 3123 3124 #else // KMP_USE_DYNAMIC_LOCK 3125 3126 kmp_user_lock_p lck; 3127 int rc; 3128 3129 if ((__kmp_user_lock_kind == lk_tas) && 3130 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3131 lck = (kmp_user_lock_p)user_lock; 3132 } 3133 #if KMP_USE_FUTEX 3134 else if ((__kmp_user_lock_kind == lk_futex) && 3135 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3136 lck = (kmp_user_lock_p)user_lock; 3137 } 3138 #endif 3139 else { 3140 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3141 } 3142 3143 #if USE_ITT_BUILD 3144 __kmp_itt_lock_acquiring(lck); 3145 #endif /* USE_ITT_BUILD */ 3146 #if OMPT_SUPPORT && OMPT_OPTIONAL 3147 // This is the case, if called from omp_init_lock_with_hint: 3148 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3149 if (!codeptr) 3150 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3151 if (ompt_enabled.ompt_callback_mutex_acquire) { 3152 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3153 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3154 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3155 } 3156 #endif 3157 3158 rc = TEST_LOCK(lck, gtid); 3159 #if USE_ITT_BUILD 3160 if (rc) { 3161 __kmp_itt_lock_acquired(lck); 3162 } else { 3163 __kmp_itt_lock_cancelled(lck); 3164 } 3165 #endif /* USE_ITT_BUILD */ 3166 #if OMPT_SUPPORT && OMPT_OPTIONAL 3167 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3168 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3169 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3170 } 3171 #endif 3172 3173 return (rc ? FTN_TRUE : FTN_FALSE); 3174 3175 /* Can't use serial interval since not block structured */ 3176 3177 #endif // KMP_USE_DYNAMIC_LOCK 3178 } 3179 3180 /* try to acquire the lock */ 3181 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3182 #if KMP_USE_DYNAMIC_LOCK 3183 int rc; 3184 #if USE_ITT_BUILD 3185 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3186 #endif 3187 #if OMPT_SUPPORT && OMPT_OPTIONAL 3188 // This is the case, if called from omp_init_lock_with_hint: 3189 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3190 if (!codeptr) 3191 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3192 if (ompt_enabled.ompt_callback_mutex_acquire) { 3193 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3194 ompt_mutex_nest_lock, omp_lock_hint_none, 3195 __ompt_get_mutex_impl_type(user_lock), 3196 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3197 } 3198 #endif 3199 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3200 #if USE_ITT_BUILD 3201 if (rc) { 3202 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3203 } else { 3204 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3205 } 3206 #endif 3207 #if OMPT_SUPPORT && OMPT_OPTIONAL 3208 if (ompt_enabled.enabled && rc) { 3209 if (rc == 1) { 3210 if (ompt_enabled.ompt_callback_mutex_acquired) { 3211 // lock_first 3212 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3213 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3214 codeptr); 3215 } 3216 } else { 3217 if (ompt_enabled.ompt_callback_nest_lock) { 3218 // lock_next 3219 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3220 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3221 } 3222 } 3223 } 3224 #endif 3225 return rc; 3226 3227 #else // KMP_USE_DYNAMIC_LOCK 3228 3229 kmp_user_lock_p lck; 3230 int rc; 3231 3232 if ((__kmp_user_lock_kind == lk_tas) && 3233 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3234 OMP_NEST_LOCK_T_SIZE)) { 3235 lck = (kmp_user_lock_p)user_lock; 3236 } 3237 #if KMP_USE_FUTEX 3238 else if ((__kmp_user_lock_kind == lk_futex) && 3239 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3240 OMP_NEST_LOCK_T_SIZE)) { 3241 lck = (kmp_user_lock_p)user_lock; 3242 } 3243 #endif 3244 else { 3245 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3246 } 3247 3248 #if USE_ITT_BUILD 3249 __kmp_itt_lock_acquiring(lck); 3250 #endif /* USE_ITT_BUILD */ 3251 3252 #if OMPT_SUPPORT && OMPT_OPTIONAL 3253 // This is the case, if called from omp_init_lock_with_hint: 3254 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3255 if (!codeptr) 3256 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3257 if (ompt_enabled.enabled) && 3258 ompt_enabled.ompt_callback_mutex_acquire) { 3259 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3260 ompt_mutex_nest_lock, omp_lock_hint_none, 3261 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3262 codeptr); 3263 } 3264 #endif 3265 3266 rc = TEST_NESTED_LOCK(lck, gtid); 3267 #if USE_ITT_BUILD 3268 if (rc) { 3269 __kmp_itt_lock_acquired(lck); 3270 } else { 3271 __kmp_itt_lock_cancelled(lck); 3272 } 3273 #endif /* USE_ITT_BUILD */ 3274 #if OMPT_SUPPORT && OMPT_OPTIONAL 3275 if (ompt_enabled.enabled && rc) { 3276 if (rc == 1) { 3277 if (ompt_enabled.ompt_callback_mutex_acquired) { 3278 // lock_first 3279 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3280 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3281 } 3282 } else { 3283 if (ompt_enabled.ompt_callback_nest_lock) { 3284 // lock_next 3285 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3286 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3287 } 3288 } 3289 } 3290 #endif 3291 return rc; 3292 3293 /* Can't use serial interval since not block structured */ 3294 3295 #endif // KMP_USE_DYNAMIC_LOCK 3296 } 3297 3298 // Interface to fast scalable reduce methods routines 3299 3300 // keep the selected method in a thread local structure for cross-function 3301 // usage: will be used in __kmpc_end_reduce* functions; 3302 // another solution: to re-determine the method one more time in 3303 // __kmpc_end_reduce* functions (new prototype required then) 3304 // AT: which solution is better? 3305 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3306 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3307 3308 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3309 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3310 3311 // description of the packed_reduction_method variable: look at the macros in 3312 // kmp.h 3313 3314 // used in a critical section reduce block 3315 static __forceinline void 3316 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3317 kmp_critical_name *crit) { 3318 3319 // this lock was visible to a customer and to the threading profile tool as a 3320 // serial overhead span (although it's used for an internal purpose only) 3321 // why was it visible in previous implementation? 3322 // should we keep it visible in new reduce block? 3323 kmp_user_lock_p lck; 3324 3325 #if KMP_USE_DYNAMIC_LOCK 3326 3327 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3328 // Check if it is initialized. 3329 if (*lk == 0) { 3330 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3331 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3332 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3333 } else { 3334 __kmp_init_indirect_csptr(crit, loc, global_tid, 3335 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3336 } 3337 } 3338 // Branch for accessing the actual lock object and set operation. This 3339 // branching is inevitable since this lock initialization does not follow the 3340 // normal dispatch path (lock table is not used). 3341 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3342 lck = (kmp_user_lock_p)lk; 3343 KMP_DEBUG_ASSERT(lck != NULL); 3344 if (__kmp_env_consistency_check) { 3345 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3346 } 3347 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3348 } else { 3349 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3350 lck = ilk->lock; 3351 KMP_DEBUG_ASSERT(lck != NULL); 3352 if (__kmp_env_consistency_check) { 3353 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3354 } 3355 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3356 } 3357 3358 #else // KMP_USE_DYNAMIC_LOCK 3359 3360 // We know that the fast reduction code is only emitted by Intel compilers 3361 // with 32 byte critical sections. If there isn't enough space, then we 3362 // have to use a pointer. 3363 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3364 lck = (kmp_user_lock_p)crit; 3365 } else { 3366 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3367 } 3368 KMP_DEBUG_ASSERT(lck != NULL); 3369 3370 if (__kmp_env_consistency_check) 3371 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3372 3373 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3374 3375 #endif // KMP_USE_DYNAMIC_LOCK 3376 } 3377 3378 // used in a critical section reduce block 3379 static __forceinline void 3380 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3381 kmp_critical_name *crit) { 3382 3383 kmp_user_lock_p lck; 3384 3385 #if KMP_USE_DYNAMIC_LOCK 3386 3387 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3388 lck = (kmp_user_lock_p)crit; 3389 if (__kmp_env_consistency_check) 3390 __kmp_pop_sync(global_tid, ct_critical, loc); 3391 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3392 } else { 3393 kmp_indirect_lock_t *ilk = 3394 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3395 if (__kmp_env_consistency_check) 3396 __kmp_pop_sync(global_tid, ct_critical, loc); 3397 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3398 } 3399 3400 #else // KMP_USE_DYNAMIC_LOCK 3401 3402 // We know that the fast reduction code is only emitted by Intel compilers 3403 // with 32 byte critical sections. If there isn't enough space, then we have 3404 // to use a pointer. 3405 if (__kmp_base_user_lock_size > 32) { 3406 lck = *((kmp_user_lock_p *)crit); 3407 KMP_ASSERT(lck != NULL); 3408 } else { 3409 lck = (kmp_user_lock_p)crit; 3410 } 3411 3412 if (__kmp_env_consistency_check) 3413 __kmp_pop_sync(global_tid, ct_critical, loc); 3414 3415 __kmp_release_user_lock_with_checks(lck, global_tid); 3416 3417 #endif // KMP_USE_DYNAMIC_LOCK 3418 } // __kmp_end_critical_section_reduce_block 3419 3420 static __forceinline int 3421 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3422 int *task_state) { 3423 kmp_team_t *team; 3424 3425 // Check if we are inside the teams construct? 3426 if (th->th.th_teams_microtask) { 3427 *team_p = team = th->th.th_team; 3428 if (team->t.t_level == th->th.th_teams_level) { 3429 // This is reduction at teams construct. 3430 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3431 // Let's swap teams temporarily for the reduction. 3432 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3433 th->th.th_team = team->t.t_parent; 3434 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3435 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3436 *task_state = th->th.th_task_state; 3437 th->th.th_task_state = 0; 3438 3439 return 1; 3440 } 3441 } 3442 return 0; 3443 } 3444 3445 static __forceinline void 3446 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3447 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3448 th->th.th_info.ds.ds_tid = 0; 3449 th->th.th_team = team; 3450 th->th.th_team_nproc = team->t.t_nproc; 3451 th->th.th_task_team = team->t.t_task_team[task_state]; 3452 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3453 } 3454 3455 /* 2.a.i. Reduce Block without a terminating barrier */ 3456 /*! 3457 @ingroup SYNCHRONIZATION 3458 @param loc source location information 3459 @param global_tid global thread number 3460 @param num_vars number of items (variables) to be reduced 3461 @param reduce_size size of data in bytes to be reduced 3462 @param reduce_data pointer to data to be reduced 3463 @param reduce_func callback function providing reduction operation on two 3464 operands and returning result of reduction in lhs_data 3465 @param lck pointer to the unique lock data structure 3466 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3467 threads if atomic reduction needed 3468 3469 The nowait version is used for a reduce clause with the nowait argument. 3470 */ 3471 kmp_int32 3472 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3473 size_t reduce_size, void *reduce_data, 3474 void (*reduce_func)(void *lhs_data, void *rhs_data), 3475 kmp_critical_name *lck) { 3476 3477 KMP_COUNT_BLOCK(REDUCE_nowait); 3478 int retval = 0; 3479 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3480 kmp_info_t *th; 3481 kmp_team_t *team; 3482 int teams_swapped = 0, task_state; 3483 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3484 __kmp_assert_valid_gtid(global_tid); 3485 3486 // why do we need this initialization here at all? 3487 // Reduction clause can not be used as a stand-alone directive. 3488 3489 // do not call __kmp_serial_initialize(), it will be called by 3490 // __kmp_parallel_initialize() if needed 3491 // possible detection of false-positive race by the threadchecker ??? 3492 if (!TCR_4(__kmp_init_parallel)) 3493 __kmp_parallel_initialize(); 3494 3495 __kmp_resume_if_soft_paused(); 3496 3497 // check correctness of reduce block nesting 3498 #if KMP_USE_DYNAMIC_LOCK 3499 if (__kmp_env_consistency_check) 3500 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3501 #else 3502 if (__kmp_env_consistency_check) 3503 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3504 #endif 3505 3506 th = __kmp_thread_from_gtid(global_tid); 3507 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3508 3509 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3510 // the value should be kept in a variable 3511 // the variable should be either a construct-specific or thread-specific 3512 // property, not a team specific property 3513 // (a thread can reach the next reduce block on the next construct, reduce 3514 // method may differ on the next construct) 3515 // an ident_t "loc" parameter could be used as a construct-specific property 3516 // (what if loc == 0?) 3517 // (if both construct-specific and team-specific variables were shared, 3518 // then unness extra syncs should be needed) 3519 // a thread-specific variable is better regarding two issues above (next 3520 // construct and extra syncs) 3521 // a thread-specific "th_local.reduction_method" variable is used currently 3522 // each thread executes 'determine' and 'set' lines (no need to execute by one 3523 // thread, to avoid unness extra syncs) 3524 3525 packed_reduction_method = __kmp_determine_reduction_method( 3526 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3527 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3528 3529 OMPT_REDUCTION_DECL(th, global_tid); 3530 if (packed_reduction_method == critical_reduce_block) { 3531 3532 OMPT_REDUCTION_BEGIN; 3533 3534 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3535 retval = 1; 3536 3537 } else if (packed_reduction_method == empty_reduce_block) { 3538 3539 OMPT_REDUCTION_BEGIN; 3540 3541 // usage: if team size == 1, no synchronization is required ( Intel 3542 // platforms only ) 3543 retval = 1; 3544 3545 } else if (packed_reduction_method == atomic_reduce_block) { 3546 3547 retval = 2; 3548 3549 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3550 // won't be called by the code gen) 3551 // (it's not quite good, because the checking block has been closed by 3552 // this 'pop', 3553 // but atomic operation has not been executed yet, will be executed 3554 // slightly later, literally on next instruction) 3555 if (__kmp_env_consistency_check) 3556 __kmp_pop_sync(global_tid, ct_reduce, loc); 3557 3558 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3559 tree_reduce_block)) { 3560 3561 // AT: performance issue: a real barrier here 3562 // AT: (if primary thread is slow, other threads are blocked here waiting for 3563 // the primary thread to come and release them) 3564 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3565 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3566 // be confusing to a customer) 3567 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3568 // might go faster and be more in line with sense of NOWAIT 3569 // AT: TO DO: do epcc test and compare times 3570 3571 // this barrier should be invisible to a customer and to the threading profile 3572 // tool (it's neither a terminating barrier nor customer's code, it's 3573 // used for an internal purpose) 3574 #if OMPT_SUPPORT 3575 // JP: can this barrier potentially leed to task scheduling? 3576 // JP: as long as there is a barrier in the implementation, OMPT should and 3577 // will provide the barrier events 3578 // so we set-up the necessary frame/return addresses. 3579 ompt_frame_t *ompt_frame; 3580 if (ompt_enabled.enabled) { 3581 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3582 if (ompt_frame->enter_frame.ptr == NULL) 3583 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3584 } 3585 OMPT_STORE_RETURN_ADDRESS(global_tid); 3586 #endif 3587 #if USE_ITT_NOTIFY 3588 __kmp_threads[global_tid]->th.th_ident = loc; 3589 #endif 3590 retval = 3591 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3592 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3593 retval = (retval != 0) ? (0) : (1); 3594 #if OMPT_SUPPORT && OMPT_OPTIONAL 3595 if (ompt_enabled.enabled) { 3596 ompt_frame->enter_frame = ompt_data_none; 3597 } 3598 #endif 3599 3600 // all other workers except primary thread should do this pop here 3601 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3602 if (__kmp_env_consistency_check) { 3603 if (retval == 0) { 3604 __kmp_pop_sync(global_tid, ct_reduce, loc); 3605 } 3606 } 3607 3608 } else { 3609 3610 // should never reach this block 3611 KMP_ASSERT(0); // "unexpected method" 3612 } 3613 if (teams_swapped) { 3614 __kmp_restore_swapped_teams(th, team, task_state); 3615 } 3616 KA_TRACE( 3617 10, 3618 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3619 global_tid, packed_reduction_method, retval)); 3620 3621 return retval; 3622 } 3623 3624 /*! 3625 @ingroup SYNCHRONIZATION 3626 @param loc source location information 3627 @param global_tid global thread id. 3628 @param lck pointer to the unique lock data structure 3629 3630 Finish the execution of a reduce nowait. 3631 */ 3632 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3633 kmp_critical_name *lck) { 3634 3635 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3636 3637 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3638 __kmp_assert_valid_gtid(global_tid); 3639 3640 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3641 3642 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3643 3644 if (packed_reduction_method == critical_reduce_block) { 3645 3646 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3647 OMPT_REDUCTION_END; 3648 3649 } else if (packed_reduction_method == empty_reduce_block) { 3650 3651 // usage: if team size == 1, no synchronization is required ( on Intel 3652 // platforms only ) 3653 3654 OMPT_REDUCTION_END; 3655 3656 } else if (packed_reduction_method == atomic_reduce_block) { 3657 3658 // neither primary thread nor other workers should get here 3659 // (code gen does not generate this call in case 2: atomic reduce block) 3660 // actually it's better to remove this elseif at all; 3661 // after removal this value will checked by the 'else' and will assert 3662 3663 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3664 tree_reduce_block)) { 3665 3666 // only primary thread gets here 3667 // OMPT: tree reduction is annotated in the barrier code 3668 3669 } else { 3670 3671 // should never reach this block 3672 KMP_ASSERT(0); // "unexpected method" 3673 } 3674 3675 if (__kmp_env_consistency_check) 3676 __kmp_pop_sync(global_tid, ct_reduce, loc); 3677 3678 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3679 global_tid, packed_reduction_method)); 3680 3681 return; 3682 } 3683 3684 /* 2.a.ii. Reduce Block with a terminating barrier */ 3685 3686 /*! 3687 @ingroup SYNCHRONIZATION 3688 @param loc source location information 3689 @param global_tid global thread number 3690 @param num_vars number of items (variables) to be reduced 3691 @param reduce_size size of data in bytes to be reduced 3692 @param reduce_data pointer to data to be reduced 3693 @param reduce_func callback function providing reduction operation on two 3694 operands and returning result of reduction in lhs_data 3695 @param lck pointer to the unique lock data structure 3696 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3697 threads if atomic reduction needed 3698 3699 A blocking reduce that includes an implicit barrier. 3700 */ 3701 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3702 size_t reduce_size, void *reduce_data, 3703 void (*reduce_func)(void *lhs_data, void *rhs_data), 3704 kmp_critical_name *lck) { 3705 KMP_COUNT_BLOCK(REDUCE_wait); 3706 int retval = 0; 3707 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3708 kmp_info_t *th; 3709 kmp_team_t *team; 3710 int teams_swapped = 0, task_state; 3711 3712 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3713 __kmp_assert_valid_gtid(global_tid); 3714 3715 // why do we need this initialization here at all? 3716 // Reduction clause can not be a stand-alone directive. 3717 3718 // do not call __kmp_serial_initialize(), it will be called by 3719 // __kmp_parallel_initialize() if needed 3720 // possible detection of false-positive race by the threadchecker ??? 3721 if (!TCR_4(__kmp_init_parallel)) 3722 __kmp_parallel_initialize(); 3723 3724 __kmp_resume_if_soft_paused(); 3725 3726 // check correctness of reduce block nesting 3727 #if KMP_USE_DYNAMIC_LOCK 3728 if (__kmp_env_consistency_check) 3729 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3730 #else 3731 if (__kmp_env_consistency_check) 3732 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3733 #endif 3734 3735 th = __kmp_thread_from_gtid(global_tid); 3736 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3737 3738 packed_reduction_method = __kmp_determine_reduction_method( 3739 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3740 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3741 3742 OMPT_REDUCTION_DECL(th, global_tid); 3743 3744 if (packed_reduction_method == critical_reduce_block) { 3745 3746 OMPT_REDUCTION_BEGIN; 3747 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3748 retval = 1; 3749 3750 } else if (packed_reduction_method == empty_reduce_block) { 3751 3752 OMPT_REDUCTION_BEGIN; 3753 // usage: if team size == 1, no synchronization is required ( Intel 3754 // platforms only ) 3755 retval = 1; 3756 3757 } else if (packed_reduction_method == atomic_reduce_block) { 3758 3759 retval = 2; 3760 3761 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3762 tree_reduce_block)) { 3763 3764 // case tree_reduce_block: 3765 // this barrier should be visible to a customer and to the threading profile 3766 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3767 #if OMPT_SUPPORT 3768 ompt_frame_t *ompt_frame; 3769 if (ompt_enabled.enabled) { 3770 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3771 if (ompt_frame->enter_frame.ptr == NULL) 3772 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3773 } 3774 OMPT_STORE_RETURN_ADDRESS(global_tid); 3775 #endif 3776 #if USE_ITT_NOTIFY 3777 __kmp_threads[global_tid]->th.th_ident = 3778 loc; // needed for correct notification of frames 3779 #endif 3780 retval = 3781 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3782 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3783 retval = (retval != 0) ? (0) : (1); 3784 #if OMPT_SUPPORT && OMPT_OPTIONAL 3785 if (ompt_enabled.enabled) { 3786 ompt_frame->enter_frame = ompt_data_none; 3787 } 3788 #endif 3789 3790 // all other workers except primary thread should do this pop here 3791 // (none of other workers except primary will enter __kmpc_end_reduce()) 3792 if (__kmp_env_consistency_check) { 3793 if (retval == 0) { // 0: all other workers; 1: primary thread 3794 __kmp_pop_sync(global_tid, ct_reduce, loc); 3795 } 3796 } 3797 3798 } else { 3799 3800 // should never reach this block 3801 KMP_ASSERT(0); // "unexpected method" 3802 } 3803 if (teams_swapped) { 3804 __kmp_restore_swapped_teams(th, team, task_state); 3805 } 3806 3807 KA_TRACE(10, 3808 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3809 global_tid, packed_reduction_method, retval)); 3810 return retval; 3811 } 3812 3813 /*! 3814 @ingroup SYNCHRONIZATION 3815 @param loc source location information 3816 @param global_tid global thread id. 3817 @param lck pointer to the unique lock data structure 3818 3819 Finish the execution of a blocking reduce. 3820 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3821 start function. 3822 */ 3823 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3824 kmp_critical_name *lck) { 3825 3826 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3827 kmp_info_t *th; 3828 kmp_team_t *team; 3829 int teams_swapped = 0, task_state; 3830 3831 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3832 __kmp_assert_valid_gtid(global_tid); 3833 3834 th = __kmp_thread_from_gtid(global_tid); 3835 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3836 3837 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3838 3839 // this barrier should be visible to a customer and to the threading profile 3840 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3841 OMPT_REDUCTION_DECL(th, global_tid); 3842 3843 if (packed_reduction_method == critical_reduce_block) { 3844 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3845 3846 OMPT_REDUCTION_END; 3847 3848 // TODO: implicit barrier: should be exposed 3849 #if OMPT_SUPPORT 3850 ompt_frame_t *ompt_frame; 3851 if (ompt_enabled.enabled) { 3852 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3853 if (ompt_frame->enter_frame.ptr == NULL) 3854 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3855 } 3856 OMPT_STORE_RETURN_ADDRESS(global_tid); 3857 #endif 3858 #if USE_ITT_NOTIFY 3859 __kmp_threads[global_tid]->th.th_ident = loc; 3860 #endif 3861 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3862 #if OMPT_SUPPORT && OMPT_OPTIONAL 3863 if (ompt_enabled.enabled) { 3864 ompt_frame->enter_frame = ompt_data_none; 3865 } 3866 #endif 3867 3868 } else if (packed_reduction_method == empty_reduce_block) { 3869 3870 OMPT_REDUCTION_END; 3871 3872 // usage: if team size==1, no synchronization is required (Intel platforms only) 3873 3874 // TODO: implicit barrier: should be exposed 3875 #if OMPT_SUPPORT 3876 ompt_frame_t *ompt_frame; 3877 if (ompt_enabled.enabled) { 3878 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3879 if (ompt_frame->enter_frame.ptr == NULL) 3880 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3881 } 3882 OMPT_STORE_RETURN_ADDRESS(global_tid); 3883 #endif 3884 #if USE_ITT_NOTIFY 3885 __kmp_threads[global_tid]->th.th_ident = loc; 3886 #endif 3887 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3888 #if OMPT_SUPPORT && OMPT_OPTIONAL 3889 if (ompt_enabled.enabled) { 3890 ompt_frame->enter_frame = ompt_data_none; 3891 } 3892 #endif 3893 3894 } else if (packed_reduction_method == atomic_reduce_block) { 3895 3896 #if OMPT_SUPPORT 3897 ompt_frame_t *ompt_frame; 3898 if (ompt_enabled.enabled) { 3899 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3900 if (ompt_frame->enter_frame.ptr == NULL) 3901 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3902 } 3903 OMPT_STORE_RETURN_ADDRESS(global_tid); 3904 #endif 3905 // TODO: implicit barrier: should be exposed 3906 #if USE_ITT_NOTIFY 3907 __kmp_threads[global_tid]->th.th_ident = loc; 3908 #endif 3909 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3910 #if OMPT_SUPPORT && OMPT_OPTIONAL 3911 if (ompt_enabled.enabled) { 3912 ompt_frame->enter_frame = ompt_data_none; 3913 } 3914 #endif 3915 3916 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3917 tree_reduce_block)) { 3918 3919 // only primary thread executes here (primary releases all other workers) 3920 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3921 global_tid); 3922 3923 } else { 3924 3925 // should never reach this block 3926 KMP_ASSERT(0); // "unexpected method" 3927 } 3928 if (teams_swapped) { 3929 __kmp_restore_swapped_teams(th, team, task_state); 3930 } 3931 3932 if (__kmp_env_consistency_check) 3933 __kmp_pop_sync(global_tid, ct_reduce, loc); 3934 3935 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3936 global_tid, packed_reduction_method)); 3937 3938 return; 3939 } 3940 3941 #undef __KMP_GET_REDUCTION_METHOD 3942 #undef __KMP_SET_REDUCTION_METHOD 3943 3944 /* end of interface to fast scalable reduce routines */ 3945 3946 kmp_uint64 __kmpc_get_taskid() { 3947 3948 kmp_int32 gtid; 3949 kmp_info_t *thread; 3950 3951 gtid = __kmp_get_gtid(); 3952 if (gtid < 0) { 3953 return 0; 3954 } 3955 thread = __kmp_thread_from_gtid(gtid); 3956 return thread->th.th_current_task->td_task_id; 3957 3958 } // __kmpc_get_taskid 3959 3960 kmp_uint64 __kmpc_get_parent_taskid() { 3961 3962 kmp_int32 gtid; 3963 kmp_info_t *thread; 3964 kmp_taskdata_t *parent_task; 3965 3966 gtid = __kmp_get_gtid(); 3967 if (gtid < 0) { 3968 return 0; 3969 } 3970 thread = __kmp_thread_from_gtid(gtid); 3971 parent_task = thread->th.th_current_task->td_parent; 3972 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3973 3974 } // __kmpc_get_parent_taskid 3975 3976 /*! 3977 @ingroup WORK_SHARING 3978 @param loc source location information. 3979 @param gtid global thread number. 3980 @param num_dims number of associated doacross loops. 3981 @param dims info on loops bounds. 3982 3983 Initialize doacross loop information. 3984 Expect compiler send us inclusive bounds, 3985 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3986 */ 3987 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3988 const struct kmp_dim *dims) { 3989 __kmp_assert_valid_gtid(gtid); 3990 int j, idx; 3991 kmp_int64 last, trace_count; 3992 kmp_info_t *th = __kmp_threads[gtid]; 3993 kmp_team_t *team = th->th.th_team; 3994 kmp_uint32 *flags; 3995 kmp_disp_t *pr_buf = th->th.th_dispatch; 3996 dispatch_shared_info_t *sh_buf; 3997 3998 KA_TRACE( 3999 20, 4000 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4001 gtid, num_dims, !team->t.t_serialized)); 4002 KMP_DEBUG_ASSERT(dims != NULL); 4003 KMP_DEBUG_ASSERT(num_dims > 0); 4004 4005 if (team->t.t_serialized) { 4006 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4007 return; // no dependencies if team is serialized 4008 } 4009 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4010 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4011 // the next loop 4012 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4013 4014 // Save bounds info into allocated private buffer 4015 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4016 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4017 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4018 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4019 pr_buf->th_doacross_info[0] = 4020 (kmp_int64)num_dims; // first element is number of dimensions 4021 // Save also address of num_done in order to access it later without knowing 4022 // the buffer index 4023 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4024 pr_buf->th_doacross_info[2] = dims[0].lo; 4025 pr_buf->th_doacross_info[3] = dims[0].up; 4026 pr_buf->th_doacross_info[4] = dims[0].st; 4027 last = 5; 4028 for (j = 1; j < num_dims; ++j) { 4029 kmp_int64 4030 range_length; // To keep ranges of all dimensions but the first dims[0] 4031 if (dims[j].st == 1) { // most common case 4032 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4033 range_length = dims[j].up - dims[j].lo + 1; 4034 } else { 4035 if (dims[j].st > 0) { 4036 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4037 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4038 } else { // negative increment 4039 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4040 range_length = 4041 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4042 } 4043 } 4044 pr_buf->th_doacross_info[last++] = range_length; 4045 pr_buf->th_doacross_info[last++] = dims[j].lo; 4046 pr_buf->th_doacross_info[last++] = dims[j].up; 4047 pr_buf->th_doacross_info[last++] = dims[j].st; 4048 } 4049 4050 // Compute total trip count. 4051 // Start with range of dims[0] which we don't need to keep in the buffer. 4052 if (dims[0].st == 1) { // most common case 4053 trace_count = dims[0].up - dims[0].lo + 1; 4054 } else if (dims[0].st > 0) { 4055 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4056 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4057 } else { // negative increment 4058 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4059 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4060 } 4061 for (j = 1; j < num_dims; ++j) { 4062 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4063 } 4064 KMP_DEBUG_ASSERT(trace_count > 0); 4065 4066 // Check if shared buffer is not occupied by other loop (idx - 4067 // __kmp_dispatch_num_buffers) 4068 if (idx != sh_buf->doacross_buf_idx) { 4069 // Shared buffer is occupied, wait for it to be free 4070 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4071 __kmp_eq_4, NULL); 4072 } 4073 #if KMP_32_BIT_ARCH 4074 // Check if we are the first thread. After the CAS the first thread gets 0, 4075 // others get 1 if initialization is in progress, allocated pointer otherwise. 4076 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4077 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4078 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4079 #else 4080 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4081 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4082 #endif 4083 if (flags == NULL) { 4084 // we are the first thread, allocate the array of flags 4085 size_t size = 4086 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4087 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4088 KMP_MB(); 4089 sh_buf->doacross_flags = flags; 4090 } else if (flags == (kmp_uint32 *)1) { 4091 #if KMP_32_BIT_ARCH 4092 // initialization is still in progress, need to wait 4093 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4094 #else 4095 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4096 #endif 4097 KMP_YIELD(TRUE); 4098 KMP_MB(); 4099 } else { 4100 KMP_MB(); 4101 } 4102 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4103 pr_buf->th_doacross_flags = 4104 sh_buf->doacross_flags; // save private copy in order to not 4105 // touch shared buffer on each iteration 4106 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4107 } 4108 4109 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4110 __kmp_assert_valid_gtid(gtid); 4111 kmp_int64 shft; 4112 size_t num_dims, i; 4113 kmp_uint32 flag; 4114 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4115 kmp_info_t *th = __kmp_threads[gtid]; 4116 kmp_team_t *team = th->th.th_team; 4117 kmp_disp_t *pr_buf; 4118 kmp_int64 lo, up, st; 4119 4120 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4121 if (team->t.t_serialized) { 4122 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4123 return; // no dependencies if team is serialized 4124 } 4125 4126 // calculate sequential iteration number and check out-of-bounds condition 4127 pr_buf = th->th.th_dispatch; 4128 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4129 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4130 lo = pr_buf->th_doacross_info[2]; 4131 up = pr_buf->th_doacross_info[3]; 4132 st = pr_buf->th_doacross_info[4]; 4133 #if OMPT_SUPPORT && OMPT_OPTIONAL 4134 ompt_dependence_t deps[num_dims]; 4135 #endif 4136 if (st == 1) { // most common case 4137 if (vec[0] < lo || vec[0] > up) { 4138 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4139 "bounds [%lld,%lld]\n", 4140 gtid, vec[0], lo, up)); 4141 return; 4142 } 4143 iter_number = vec[0] - lo; 4144 } else if (st > 0) { 4145 if (vec[0] < lo || vec[0] > up) { 4146 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4147 "bounds [%lld,%lld]\n", 4148 gtid, vec[0], lo, up)); 4149 return; 4150 } 4151 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4152 } else { // negative increment 4153 if (vec[0] > lo || vec[0] < up) { 4154 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4155 "bounds [%lld,%lld]\n", 4156 gtid, vec[0], lo, up)); 4157 return; 4158 } 4159 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4160 } 4161 #if OMPT_SUPPORT && OMPT_OPTIONAL 4162 deps[0].variable.value = iter_number; 4163 deps[0].dependence_type = ompt_dependence_type_sink; 4164 #endif 4165 for (i = 1; i < num_dims; ++i) { 4166 kmp_int64 iter, ln; 4167 size_t j = i * 4; 4168 ln = pr_buf->th_doacross_info[j + 1]; 4169 lo = pr_buf->th_doacross_info[j + 2]; 4170 up = pr_buf->th_doacross_info[j + 3]; 4171 st = pr_buf->th_doacross_info[j + 4]; 4172 if (st == 1) { 4173 if (vec[i] < lo || vec[i] > up) { 4174 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4175 "bounds [%lld,%lld]\n", 4176 gtid, vec[i], lo, up)); 4177 return; 4178 } 4179 iter = vec[i] - lo; 4180 } else if (st > 0) { 4181 if (vec[i] < lo || vec[i] > up) { 4182 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4183 "bounds [%lld,%lld]\n", 4184 gtid, vec[i], lo, up)); 4185 return; 4186 } 4187 iter = (kmp_uint64)(vec[i] - lo) / st; 4188 } else { // st < 0 4189 if (vec[i] > lo || vec[i] < up) { 4190 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4191 "bounds [%lld,%lld]\n", 4192 gtid, vec[i], lo, up)); 4193 return; 4194 } 4195 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4196 } 4197 iter_number = iter + ln * iter_number; 4198 #if OMPT_SUPPORT && OMPT_OPTIONAL 4199 deps[i].variable.value = iter; 4200 deps[i].dependence_type = ompt_dependence_type_sink; 4201 #endif 4202 } 4203 shft = iter_number % 32; // use 32-bit granularity 4204 iter_number >>= 5; // divided by 32 4205 flag = 1 << shft; 4206 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4207 KMP_YIELD(TRUE); 4208 } 4209 KMP_MB(); 4210 #if OMPT_SUPPORT && OMPT_OPTIONAL 4211 if (ompt_enabled.ompt_callback_dependences) { 4212 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4213 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4214 } 4215 #endif 4216 KA_TRACE(20, 4217 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4218 gtid, (iter_number << 5) + shft)); 4219 } 4220 4221 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4222 __kmp_assert_valid_gtid(gtid); 4223 kmp_int64 shft; 4224 size_t num_dims, i; 4225 kmp_uint32 flag; 4226 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4227 kmp_info_t *th = __kmp_threads[gtid]; 4228 kmp_team_t *team = th->th.th_team; 4229 kmp_disp_t *pr_buf; 4230 kmp_int64 lo, st; 4231 4232 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4233 if (team->t.t_serialized) { 4234 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4235 return; // no dependencies if team is serialized 4236 } 4237 4238 // calculate sequential iteration number (same as in "wait" but no 4239 // out-of-bounds checks) 4240 pr_buf = th->th.th_dispatch; 4241 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4242 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4243 lo = pr_buf->th_doacross_info[2]; 4244 st = pr_buf->th_doacross_info[4]; 4245 #if OMPT_SUPPORT && OMPT_OPTIONAL 4246 ompt_dependence_t deps[num_dims]; 4247 #endif 4248 if (st == 1) { // most common case 4249 iter_number = vec[0] - lo; 4250 } else if (st > 0) { 4251 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4252 } else { // negative increment 4253 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4254 } 4255 #if OMPT_SUPPORT && OMPT_OPTIONAL 4256 deps[0].variable.value = iter_number; 4257 deps[0].dependence_type = ompt_dependence_type_source; 4258 #endif 4259 for (i = 1; i < num_dims; ++i) { 4260 kmp_int64 iter, ln; 4261 size_t j = i * 4; 4262 ln = pr_buf->th_doacross_info[j + 1]; 4263 lo = pr_buf->th_doacross_info[j + 2]; 4264 st = pr_buf->th_doacross_info[j + 4]; 4265 if (st == 1) { 4266 iter = vec[i] - lo; 4267 } else if (st > 0) { 4268 iter = (kmp_uint64)(vec[i] - lo) / st; 4269 } else { // st < 0 4270 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4271 } 4272 iter_number = iter + ln * iter_number; 4273 #if OMPT_SUPPORT && OMPT_OPTIONAL 4274 deps[i].variable.value = iter; 4275 deps[i].dependence_type = ompt_dependence_type_source; 4276 #endif 4277 } 4278 #if OMPT_SUPPORT && OMPT_OPTIONAL 4279 if (ompt_enabled.ompt_callback_dependences) { 4280 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4281 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4282 } 4283 #endif 4284 shft = iter_number % 32; // use 32-bit granularity 4285 iter_number >>= 5; // divided by 32 4286 flag = 1 << shft; 4287 KMP_MB(); 4288 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4289 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4290 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4291 (iter_number << 5) + shft)); 4292 } 4293 4294 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4295 __kmp_assert_valid_gtid(gtid); 4296 kmp_int32 num_done; 4297 kmp_info_t *th = __kmp_threads[gtid]; 4298 kmp_team_t *team = th->th.th_team; 4299 kmp_disp_t *pr_buf = th->th.th_dispatch; 4300 4301 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4302 if (team->t.t_serialized) { 4303 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4304 return; // nothing to do 4305 } 4306 num_done = 4307 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4308 if (num_done == th->th.th_team_nproc) { 4309 // we are the last thread, need to free shared resources 4310 int idx = pr_buf->th_doacross_buf_idx - 1; 4311 dispatch_shared_info_t *sh_buf = 4312 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4313 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4314 (kmp_int64)&sh_buf->doacross_num_done); 4315 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4316 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4317 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4318 sh_buf->doacross_flags = NULL; 4319 sh_buf->doacross_num_done = 0; 4320 sh_buf->doacross_buf_idx += 4321 __kmp_dispatch_num_buffers; // free buffer for future re-use 4322 } 4323 // free private resources (need to keep buffer index forever) 4324 pr_buf->th_doacross_flags = NULL; 4325 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4326 pr_buf->th_doacross_info = NULL; 4327 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4328 } 4329 4330 /* OpenMP 5.1 Memory Management routines */ 4331 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4332 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4333 } 4334 4335 void *omp_aligned_alloc(size_t align, size_t size, 4336 omp_allocator_handle_t allocator) { 4337 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4338 } 4339 4340 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4341 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4342 } 4343 4344 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4345 omp_allocator_handle_t allocator) { 4346 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4347 } 4348 4349 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4350 omp_allocator_handle_t free_allocator) { 4351 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4352 free_allocator); 4353 } 4354 4355 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4356 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4357 } 4358 /* end of OpenMP 5.1 Memory Management routines */ 4359 4360 int __kmpc_get_target_offload(void) { 4361 if (!__kmp_init_serial) { 4362 __kmp_serial_initialize(); 4363 } 4364 return __kmp_target_offload; 4365 } 4366 4367 int __kmpc_pause_resource(kmp_pause_status_t level) { 4368 if (!__kmp_init_serial) { 4369 return 1; // Can't pause if runtime is not initialized 4370 } 4371 return __kmp_pause_resource(level); 4372 } 4373 4374 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4375 if (!__kmp_init_serial) 4376 __kmp_serial_initialize(); 4377 4378 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4379 4380 #if OMPT_SUPPORT 4381 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4382 ompt_callbacks.ompt_callback(ompt_callback_error)( 4383 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4384 OMPT_GET_RETURN_ADDRESS(0)); 4385 } 4386 #endif // OMPT_SUPPORT 4387 4388 char *src_loc; 4389 if (loc && loc->psource) { 4390 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4391 src_loc = 4392 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4393 __kmp_str_loc_free(&str_loc); 4394 } else { 4395 src_loc = __kmp_str_format("unknown"); 4396 } 4397 4398 if (severity == severity_warning) 4399 KMP_WARNING(UserDirectedWarning, src_loc, message); 4400 else 4401 KMP_FATAL(UserDirectedError, src_loc, message); 4402 4403 __kmp_str_free(&src_loc); 4404 } 4405 4406 // Mark begin of scope directive. 4407 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4408 // reserved is for extension of scope directive and not used. 4409 #if OMPT_SUPPORT && OMPT_OPTIONAL 4410 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4411 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4412 int tid = __kmp_tid_from_gtid(gtid); 4413 ompt_callbacks.ompt_callback(ompt_callback_work)( 4414 ompt_work_scope, ompt_scope_begin, 4415 &(team->t.ompt_team_info.parallel_data), 4416 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4417 OMPT_GET_RETURN_ADDRESS(0)); 4418 } 4419 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4420 } 4421 4422 // Mark end of scope directive 4423 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4424 // reserved is for extension of scope directive and not used. 4425 #if OMPT_SUPPORT && OMPT_OPTIONAL 4426 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4427 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4428 int tid = __kmp_tid_from_gtid(gtid); 4429 ompt_callbacks.ompt_callback(ompt_callback_work)( 4430 ompt_work_scope, ompt_scope_end, 4431 &(team->t.ompt_team_info.parallel_data), 4432 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4433 OMPT_GET_RETURN_ADDRESS(0)); 4434 } 4435 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4436 } 4437 4438 #ifdef KMP_USE_VERSION_SYMBOLS 4439 // For GOMP compatibility there are two versions of each omp_* API. 4440 // One is the plain C symbol and one is the Fortran symbol with an appended 4441 // underscore. When we implement a specific ompc_* version of an omp_* 4442 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4443 // instead of the Fortran versions in kmp_ftn_entry.h 4444 extern "C" { 4445 // Have to undef these from omp.h so they aren't translated into 4446 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4447 #ifdef omp_set_affinity_format 4448 #undef omp_set_affinity_format 4449 #endif 4450 #ifdef omp_get_affinity_format 4451 #undef omp_get_affinity_format 4452 #endif 4453 #ifdef omp_display_affinity 4454 #undef omp_display_affinity 4455 #endif 4456 #ifdef omp_capture_affinity 4457 #undef omp_capture_affinity 4458 #endif 4459 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4460 "OMP_5.0"); 4461 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4462 "OMP_5.0"); 4463 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4464 "OMP_5.0"); 4465 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4466 "OMP_5.0"); 4467 } // extern "C" 4468 #endif 4469