1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 kmp_team_t *parent_team = master_th->th.th_team; 292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 293 if (lwt) 294 ompt_frame = &(lwt->ompt_task_info.frame); 295 else { 296 int tid = __kmp_tid_from_gtid(gtid); 297 ompt_frame = &( 298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 299 } 300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 301 } 302 OMPT_STORE_RETURN_ADDRESS(gtid); 303 #endif 304 305 #if INCLUDE_SSC_MARKS 306 SSC_MARK_FORKING(); 307 #endif 308 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 311 kmp_va_addr_of(ap)); 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_JOINING(); 314 #endif 315 __kmp_join_call(loc, gtid 316 #if OMPT_SUPPORT 317 , 318 fork_context_intel 319 #endif 320 ); 321 322 va_end(ap); 323 } 324 325 #if KMP_STATS_ENABLED 326 if (previous_state == stats_state_e::SERIAL_REGION) { 327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 328 KMP_SET_THREAD_STATE(previous_state); 329 } else { 330 KMP_POP_PARTITIONED_TIMER(); 331 } 332 #endif // KMP_STATS_ENABLED 333 } 334 335 /*! 336 @ingroup PARALLEL 337 @param loc source location information 338 @param global_tid global thread number 339 @param num_teams number of teams requested for the teams construct 340 @param num_threads number of threads per team requested for the teams construct 341 342 Set the number of teams to be used by the teams construct. 343 This call is only required if the teams construct has a `num_teams` clause 344 or a `thread_limit` clause (or both). 345 */ 346 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 347 kmp_int32 num_teams, kmp_int32 num_threads) { 348 KA_TRACE(20, 349 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 350 global_tid, num_teams, num_threads)); 351 __kmp_assert_valid_gtid(global_tid); 352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 353 } 354 355 /*! 356 @ingroup PARALLEL 357 @param loc source location information 358 @param global_tid global thread number 359 @param num_teams_lo lower bound on number of teams requested for the teams 360 construct 361 @param num_teams_up upper bound on number of teams requested for the teams 362 construct 363 @param num_threads number of threads per team requested for the teams construct 364 365 Set the number of teams to be used by the teams construct. The number of initial 366 teams cretaed will be greater than or equal to the lower bound and less than or 367 equal to the upper bound. 368 This call is only required if the teams construct has a `num_teams` clause 369 or a `thread_limit` clause (or both). 370 */ 371 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 373 kmp_int32 num_threads) { 374 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 375 " num_teams_ub=%d num_threads=%d\n", 376 global_tid, num_teams_lb, num_teams_ub, num_threads)); 377 __kmp_assert_valid_gtid(global_tid); 378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 379 num_threads); 380 } 381 382 /*! 383 @ingroup PARALLEL 384 @param loc source location information 385 @param argc total number of arguments in the ellipsis 386 @param microtask pointer to callback routine consisting of outlined teams 387 construct 388 @param ... pointers to shared variables that aren't global 389 390 Do the actual fork and call the microtask in the relevant number of threads. 391 */ 392 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 393 ...) { 394 int gtid = __kmp_entry_gtid(); 395 kmp_info_t *this_thr = __kmp_threads[gtid]; 396 va_list ap; 397 va_start(ap, microtask); 398 399 #if KMP_STATS_ENABLED 400 KMP_COUNT_BLOCK(OMP_TEAMS); 401 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 402 if (previous_state == stats_state_e::SERIAL_REGION) { 403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 404 } else { 405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 406 } 407 #endif 408 409 // remember teams entry point and nesting level 410 this_thr->th.th_teams_microtask = microtask; 411 this_thr->th.th_teams_level = 412 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 413 414 #if OMPT_SUPPORT 415 kmp_team_t *parent_team = this_thr->th.th_team; 416 int tid = __kmp_tid_from_gtid(gtid); 417 if (ompt_enabled.enabled) { 418 parent_team->t.t_implicit_task_taskdata[tid] 419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 420 } 421 OMPT_STORE_RETURN_ADDRESS(gtid); 422 #endif 423 424 // check if __kmpc_push_num_teams called, set default number of teams 425 // otherwise 426 if (this_thr->th.th_teams_size.nteams == 0) { 427 __kmp_push_num_teams(loc, gtid, 0, 0); 428 } 429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 432 433 __kmp_fork_call( 434 loc, gtid, fork_context_intel, argc, 435 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 437 __kmp_join_call(loc, gtid 438 #if OMPT_SUPPORT 439 , 440 fork_context_intel 441 #endif 442 ); 443 444 // Pop current CG root off list 445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 447 this_thr->th.th_cg_roots = tmp->up; 448 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 449 " to node %p. cg_nthreads was %d\n", 450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 451 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 452 int i = tmp->cg_nthreads--; 453 if (i == 1) { // check is we are the last thread in CG (not always the case) 454 __kmp_free(tmp); 455 } 456 // Restore current task's thread_limit from CG root 457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 458 this_thr->th.th_current_task->td_icvs.thread_limit = 459 this_thr->th.th_cg_roots->cg_thread_limit; 460 461 this_thr->th.th_teams_microtask = NULL; 462 this_thr->th.th_teams_level = 0; 463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 464 va_end(ap); 465 #if KMP_STATS_ENABLED 466 if (previous_state == stats_state_e::SERIAL_REGION) { 467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 468 KMP_SET_THREAD_STATE(previous_state); 469 } else { 470 KMP_POP_PARTITIONED_TIMER(); 471 } 472 #endif // KMP_STATS_ENABLED 473 } 474 475 // I don't think this function should ever have been exported. 476 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 477 // openmp code ever called it, but it's been exported from the RTL for so 478 // long that I'm afraid to remove the definition. 479 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 480 481 /*! 482 @ingroup PARALLEL 483 @param loc source location information 484 @param global_tid global thread number 485 486 Enter a serialized parallel construct. This interface is used to handle a 487 conditional parallel region, like this, 488 @code 489 #pragma omp parallel if (condition) 490 @endcode 491 when the condition is false. 492 */ 493 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 494 // The implementation is now in kmp_runtime.cpp so that it can share static 495 // functions with kmp_fork_call since the tasks to be done are similar in 496 // each case. 497 __kmp_assert_valid_gtid(global_tid); 498 #if OMPT_SUPPORT 499 OMPT_STORE_RETURN_ADDRESS(global_tid); 500 #endif 501 __kmp_serialized_parallel(loc, global_tid); 502 } 503 504 /*! 505 @ingroup PARALLEL 506 @param loc source location information 507 @param global_tid global thread number 508 509 Leave a serialized parallel construct. 510 */ 511 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 512 kmp_internal_control_t *top; 513 kmp_info_t *this_thr; 514 kmp_team_t *serial_team; 515 516 KC_TRACE(10, 517 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 518 519 /* skip all this code for autopar serialized loops since it results in 520 unacceptable overhead */ 521 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 522 return; 523 524 // Not autopar code 525 __kmp_assert_valid_gtid(global_tid); 526 if (!TCR_4(__kmp_init_parallel)) 527 __kmp_parallel_initialize(); 528 529 __kmp_resume_if_soft_paused(); 530 531 this_thr = __kmp_threads[global_tid]; 532 serial_team = this_thr->th.th_serial_team; 533 534 kmp_task_team_t *task_team = this_thr->th.th_task_team; 535 // we need to wait for the proxy tasks before finishing the thread 536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 538 539 KMP_MB(); 540 KMP_DEBUG_ASSERT(serial_team); 541 KMP_ASSERT(serial_team->t.t_serialized); 542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 546 547 #if OMPT_SUPPORT 548 if (ompt_enabled.enabled && 549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 551 if (ompt_enabled.ompt_callback_implicit_task) { 552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 555 } 556 557 // reset clear the task id only after unlinking the task 558 ompt_data_t *parent_task_data; 559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 560 561 if (ompt_enabled.ompt_callback_parallel_end) { 562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 564 ompt_parallel_invoker_program | ompt_parallel_team, 565 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 566 } 567 __ompt_lw_taskteam_unlink(this_thr); 568 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 569 } 570 #endif 571 572 /* If necessary, pop the internal control stack values and replace the team 573 * values */ 574 top = serial_team->t.t_control_stack_top; 575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 577 serial_team->t.t_control_stack_top = top->next; 578 __kmp_free(top); 579 } 580 581 // if( serial_team -> t.t_serialized > 1 ) 582 serial_team->t.t_level--; 583 584 /* pop dispatch buffers stack */ 585 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 586 { 587 dispatch_private_info_t *disp_buffer = 588 serial_team->t.t_dispatch->th_disp_buffer; 589 serial_team->t.t_dispatch->th_disp_buffer = 590 serial_team->t.t_dispatch->th_disp_buffer->next; 591 __kmp_free(disp_buffer); 592 } 593 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 594 595 --serial_team->t.t_serialized; 596 if (serial_team->t.t_serialized == 0) { 597 598 /* return to the parallel section */ 599 600 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 601 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 602 __kmp_clear_x87_fpu_status_word(); 603 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 604 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 605 } 606 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 607 608 #if OMPD_SUPPORT 609 if (ompd_state & OMPD_ENABLE_BP) 610 ompd_bp_parallel_end(); 611 #endif 612 613 this_thr->th.th_team = serial_team->t.t_parent; 614 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 615 616 /* restore values cached in the thread */ 617 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 618 this_thr->th.th_team_master = 619 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 620 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 621 622 /* TODO the below shouldn't need to be adjusted for serialized teams */ 623 this_thr->th.th_dispatch = 624 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 625 626 __kmp_pop_current_task_from_thread(this_thr); 627 628 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 629 this_thr->th.th_current_task->td_flags.executing = 1; 630 631 if (__kmp_tasking_mode != tskm_immediate_exec) { 632 // Copy the task team from the new child / old parent team to the thread. 633 this_thr->th.th_task_team = 634 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 635 KA_TRACE(20, 636 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 637 "team %p\n", 638 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 639 } 640 } else { 641 if (__kmp_tasking_mode != tskm_immediate_exec) { 642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 643 "depth of serial team %p to %d\n", 644 global_tid, serial_team, serial_team->t.t_serialized)); 645 } 646 } 647 648 if (__kmp_env_consistency_check) 649 __kmp_pop_parallel(global_tid, NULL); 650 #if OMPT_SUPPORT 651 if (ompt_enabled.enabled) 652 this_thr->th.ompt_thread_info.state = 653 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 654 : ompt_state_work_parallel); 655 #endif 656 } 657 658 /*! 659 @ingroup SYNCHRONIZATION 660 @param loc source location information. 661 662 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 663 depending on the memory ordering convention obeyed by the compiler 664 even that may not be necessary). 665 */ 666 void __kmpc_flush(ident_t *loc) { 667 KC_TRACE(10, ("__kmpc_flush: called\n")); 668 669 /* need explicit __mf() here since use volatile instead in library */ 670 KMP_MB(); /* Flush all pending memory write invalidates. */ 671 672 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 673 #if KMP_MIC 674 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 675 // We shouldn't need it, though, since the ABI rules require that 676 // * If the compiler generates NGO stores it also generates the fence 677 // * If users hand-code NGO stores they should insert the fence 678 // therefore no incomplete unordered stores should be visible. 679 #else 680 // C74404 681 // This is to address non-temporal store instructions (sfence needed). 682 // The clflush instruction is addressed either (mfence needed). 683 // Probably the non-temporal load monvtdqa instruction should also be 684 // addressed. 685 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 686 if (!__kmp_cpuinfo.initialized) { 687 __kmp_query_cpuid(&__kmp_cpuinfo); 688 } 689 if (!__kmp_cpuinfo.sse2) { 690 // CPU cannot execute SSE2 instructions. 691 } else { 692 #if KMP_COMPILER_ICC 693 _mm_mfence(); 694 #elif KMP_COMPILER_MSVC 695 MemoryBarrier(); 696 #else 697 __sync_synchronize(); 698 #endif // KMP_COMPILER_ICC 699 } 700 #endif // KMP_MIC 701 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 702 KMP_ARCH_RISCV64) 703 // Nothing to see here move along 704 #elif KMP_ARCH_PPC64 705 // Nothing needed here (we have a real MB above). 706 #else 707 #error Unknown or unsupported architecture 708 #endif 709 710 #if OMPT_SUPPORT && OMPT_OPTIONAL 711 if (ompt_enabled.ompt_callback_flush) { 712 ompt_callbacks.ompt_callback(ompt_callback_flush)( 713 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 714 } 715 #endif 716 } 717 718 /* -------------------------------------------------------------------------- */ 719 /*! 720 @ingroup SYNCHRONIZATION 721 @param loc source location information 722 @param global_tid thread id. 723 724 Execute a barrier. 725 */ 726 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 727 KMP_COUNT_BLOCK(OMP_BARRIER); 728 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 729 __kmp_assert_valid_gtid(global_tid); 730 731 if (!TCR_4(__kmp_init_parallel)) 732 __kmp_parallel_initialize(); 733 734 __kmp_resume_if_soft_paused(); 735 736 if (__kmp_env_consistency_check) { 737 if (loc == 0) { 738 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 739 } 740 __kmp_check_barrier(global_tid, ct_barrier, loc); 741 } 742 743 #if OMPT_SUPPORT 744 ompt_frame_t *ompt_frame; 745 if (ompt_enabled.enabled) { 746 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 747 if (ompt_frame->enter_frame.ptr == NULL) 748 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 749 } 750 OMPT_STORE_RETURN_ADDRESS(global_tid); 751 #endif 752 __kmp_threads[global_tid]->th.th_ident = loc; 753 // TODO: explicit barrier_wait_id: 754 // this function is called when 'barrier' directive is present or 755 // implicit barrier at the end of a worksharing construct. 756 // 1) better to add a per-thread barrier counter to a thread data structure 757 // 2) set to 0 when a new team is created 758 // 4) no sync is required 759 760 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 761 #if OMPT_SUPPORT && OMPT_OPTIONAL 762 if (ompt_enabled.enabled) { 763 ompt_frame->enter_frame = ompt_data_none; 764 } 765 #endif 766 } 767 768 /* The BARRIER for a MASTER section is always explicit */ 769 /*! 770 @ingroup WORK_SHARING 771 @param loc source location information. 772 @param global_tid global thread number . 773 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 774 */ 775 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 776 int status = 0; 777 778 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 779 __kmp_assert_valid_gtid(global_tid); 780 781 if (!TCR_4(__kmp_init_parallel)) 782 __kmp_parallel_initialize(); 783 784 __kmp_resume_if_soft_paused(); 785 786 if (KMP_MASTER_GTID(global_tid)) { 787 KMP_COUNT_BLOCK(OMP_MASTER); 788 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 789 status = 1; 790 } 791 792 #if OMPT_SUPPORT && OMPT_OPTIONAL 793 if (status) { 794 if (ompt_enabled.ompt_callback_masked) { 795 kmp_info_t *this_thr = __kmp_threads[global_tid]; 796 kmp_team_t *team = this_thr->th.th_team; 797 798 int tid = __kmp_tid_from_gtid(global_tid); 799 ompt_callbacks.ompt_callback(ompt_callback_masked)( 800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 802 OMPT_GET_RETURN_ADDRESS(0)); 803 } 804 } 805 #endif 806 807 if (__kmp_env_consistency_check) { 808 #if KMP_USE_DYNAMIC_LOCK 809 if (status) 810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 811 else 812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 813 #else 814 if (status) 815 __kmp_push_sync(global_tid, ct_master, loc, NULL); 816 else 817 __kmp_check_sync(global_tid, ct_master, loc, NULL); 818 #endif 819 } 820 821 return status; 822 } 823 824 /*! 825 @ingroup WORK_SHARING 826 @param loc source location information. 827 @param global_tid global thread number . 828 829 Mark the end of a <tt>master</tt> region. This should only be called by the 830 thread that executes the <tt>master</tt> region. 831 */ 832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 834 __kmp_assert_valid_gtid(global_tid); 835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 836 KMP_POP_PARTITIONED_TIMER(); 837 838 #if OMPT_SUPPORT && OMPT_OPTIONAL 839 kmp_info_t *this_thr = __kmp_threads[global_tid]; 840 kmp_team_t *team = this_thr->th.th_team; 841 if (ompt_enabled.ompt_callback_masked) { 842 int tid = __kmp_tid_from_gtid(global_tid); 843 ompt_callbacks.ompt_callback(ompt_callback_masked)( 844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 846 OMPT_GET_RETURN_ADDRESS(0)); 847 } 848 #endif 849 850 if (__kmp_env_consistency_check) { 851 if (KMP_MASTER_GTID(global_tid)) 852 __kmp_pop_sync(global_tid, ct_master, loc); 853 } 854 } 855 856 /*! 857 @ingroup WORK_SHARING 858 @param loc source location information. 859 @param global_tid global thread number. 860 @param filter result of evaluating filter clause on thread global_tid, or zero 861 if no filter clause present 862 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 863 */ 864 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 865 int status = 0; 866 int tid; 867 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 868 __kmp_assert_valid_gtid(global_tid); 869 870 if (!TCR_4(__kmp_init_parallel)) 871 __kmp_parallel_initialize(); 872 873 __kmp_resume_if_soft_paused(); 874 875 tid = __kmp_tid_from_gtid(global_tid); 876 if (tid == filter) { 877 KMP_COUNT_BLOCK(OMP_MASKED); 878 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 879 status = 1; 880 } 881 882 #if OMPT_SUPPORT && OMPT_OPTIONAL 883 if (status) { 884 if (ompt_enabled.ompt_callback_masked) { 885 kmp_info_t *this_thr = __kmp_threads[global_tid]; 886 kmp_team_t *team = this_thr->th.th_team; 887 ompt_callbacks.ompt_callback(ompt_callback_masked)( 888 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 890 OMPT_GET_RETURN_ADDRESS(0)); 891 } 892 } 893 #endif 894 895 if (__kmp_env_consistency_check) { 896 #if KMP_USE_DYNAMIC_LOCK 897 if (status) 898 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 899 else 900 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 901 #else 902 if (status) 903 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 904 else 905 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 906 #endif 907 } 908 909 return status; 910 } 911 912 /*! 913 @ingroup WORK_SHARING 914 @param loc source location information. 915 @param global_tid global thread number . 916 917 Mark the end of a <tt>masked</tt> region. This should only be called by the 918 thread that executes the <tt>masked</tt> region. 919 */ 920 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 921 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 922 __kmp_assert_valid_gtid(global_tid); 923 KMP_POP_PARTITIONED_TIMER(); 924 925 #if OMPT_SUPPORT && OMPT_OPTIONAL 926 kmp_info_t *this_thr = __kmp_threads[global_tid]; 927 kmp_team_t *team = this_thr->th.th_team; 928 if (ompt_enabled.ompt_callback_masked) { 929 int tid = __kmp_tid_from_gtid(global_tid); 930 ompt_callbacks.ompt_callback(ompt_callback_masked)( 931 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 932 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 933 OMPT_GET_RETURN_ADDRESS(0)); 934 } 935 #endif 936 937 if (__kmp_env_consistency_check) { 938 __kmp_pop_sync(global_tid, ct_masked, loc); 939 } 940 } 941 942 /*! 943 @ingroup WORK_SHARING 944 @param loc source location information. 945 @param gtid global thread number. 946 947 Start execution of an <tt>ordered</tt> construct. 948 */ 949 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 950 int cid = 0; 951 kmp_info_t *th; 952 KMP_DEBUG_ASSERT(__kmp_init_serial); 953 954 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 955 __kmp_assert_valid_gtid(gtid); 956 957 if (!TCR_4(__kmp_init_parallel)) 958 __kmp_parallel_initialize(); 959 960 __kmp_resume_if_soft_paused(); 961 962 #if USE_ITT_BUILD 963 __kmp_itt_ordered_prep(gtid); 964 // TODO: ordered_wait_id 965 #endif /* USE_ITT_BUILD */ 966 967 th = __kmp_threads[gtid]; 968 969 #if OMPT_SUPPORT && OMPT_OPTIONAL 970 kmp_team_t *team; 971 ompt_wait_id_t lck; 972 void *codeptr_ra; 973 OMPT_STORE_RETURN_ADDRESS(gtid); 974 if (ompt_enabled.enabled) { 975 team = __kmp_team_from_gtid(gtid); 976 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 977 /* OMPT state update */ 978 th->th.ompt_thread_info.wait_id = lck; 979 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 980 981 /* OMPT event callback */ 982 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 983 if (ompt_enabled.ompt_callback_mutex_acquire) { 984 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 985 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 986 codeptr_ra); 987 } 988 } 989 #endif 990 991 if (th->th.th_dispatch->th_deo_fcn != 0) 992 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 993 else 994 __kmp_parallel_deo(>id, &cid, loc); 995 996 #if OMPT_SUPPORT && OMPT_OPTIONAL 997 if (ompt_enabled.enabled) { 998 /* OMPT state update */ 999 th->th.ompt_thread_info.state = ompt_state_work_parallel; 1000 th->th.ompt_thread_info.wait_id = 0; 1001 1002 /* OMPT event callback */ 1003 if (ompt_enabled.ompt_callback_mutex_acquired) { 1004 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1005 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1006 } 1007 } 1008 #endif 1009 1010 #if USE_ITT_BUILD 1011 __kmp_itt_ordered_start(gtid); 1012 #endif /* USE_ITT_BUILD */ 1013 } 1014 1015 /*! 1016 @ingroup WORK_SHARING 1017 @param loc source location information. 1018 @param gtid global thread number. 1019 1020 End execution of an <tt>ordered</tt> construct. 1021 */ 1022 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1023 int cid = 0; 1024 kmp_info_t *th; 1025 1026 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1027 __kmp_assert_valid_gtid(gtid); 1028 1029 #if USE_ITT_BUILD 1030 __kmp_itt_ordered_end(gtid); 1031 // TODO: ordered_wait_id 1032 #endif /* USE_ITT_BUILD */ 1033 1034 th = __kmp_threads[gtid]; 1035 1036 if (th->th.th_dispatch->th_dxo_fcn != 0) 1037 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1038 else 1039 __kmp_parallel_dxo(>id, &cid, loc); 1040 1041 #if OMPT_SUPPORT && OMPT_OPTIONAL 1042 OMPT_STORE_RETURN_ADDRESS(gtid); 1043 if (ompt_enabled.ompt_callback_mutex_released) { 1044 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1045 ompt_mutex_ordered, 1046 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1047 ->t.t_ordered.dt.t_value, 1048 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1049 } 1050 #endif 1051 } 1052 1053 #if KMP_USE_DYNAMIC_LOCK 1054 1055 static __forceinline void 1056 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1057 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1058 // Pointer to the allocated indirect lock is written to crit, while indexing 1059 // is ignored. 1060 void *idx; 1061 kmp_indirect_lock_t **lck; 1062 lck = (kmp_indirect_lock_t **)crit; 1063 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1064 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1065 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1066 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1067 KA_TRACE(20, 1068 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1069 #if USE_ITT_BUILD 1070 __kmp_itt_critical_creating(ilk->lock, loc); 1071 #endif 1072 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1073 if (status == 0) { 1074 #if USE_ITT_BUILD 1075 __kmp_itt_critical_destroyed(ilk->lock); 1076 #endif 1077 // We don't really need to destroy the unclaimed lock here since it will be 1078 // cleaned up at program exit. 1079 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1080 } 1081 KMP_DEBUG_ASSERT(*lck != NULL); 1082 } 1083 1084 // Fast-path acquire tas lock 1085 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1086 { \ 1087 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1088 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1089 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1090 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1091 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1092 kmp_uint32 spins; \ 1093 KMP_FSYNC_PREPARE(l); \ 1094 KMP_INIT_YIELD(spins); \ 1095 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1096 do { \ 1097 if (TCR_4(__kmp_nth) > \ 1098 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1099 KMP_YIELD(TRUE); \ 1100 } else { \ 1101 KMP_YIELD_SPIN(spins); \ 1102 } \ 1103 __kmp_spin_backoff(&backoff); \ 1104 } while ( \ 1105 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1106 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1107 } \ 1108 KMP_FSYNC_ACQUIRED(l); \ 1109 } 1110 1111 // Fast-path test tas lock 1112 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1113 { \ 1114 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1115 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1116 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1117 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1118 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1119 } 1120 1121 // Fast-path release tas lock 1122 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1123 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1124 1125 #if KMP_USE_FUTEX 1126 1127 #include <sys/syscall.h> 1128 #include <unistd.h> 1129 #ifndef FUTEX_WAIT 1130 #define FUTEX_WAIT 0 1131 #endif 1132 #ifndef FUTEX_WAKE 1133 #define FUTEX_WAKE 1 1134 #endif 1135 1136 // Fast-path acquire futex lock 1137 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1138 { \ 1139 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1140 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1141 KMP_MB(); \ 1142 KMP_FSYNC_PREPARE(ftx); \ 1143 kmp_int32 poll_val; \ 1144 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1145 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1146 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1147 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1148 if (!cond) { \ 1149 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1150 poll_val | \ 1151 KMP_LOCK_BUSY(1, futex))) { \ 1152 continue; \ 1153 } \ 1154 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1155 } \ 1156 kmp_int32 rc; \ 1157 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1158 NULL, NULL, 0)) != 0) { \ 1159 continue; \ 1160 } \ 1161 gtid_code |= 1; \ 1162 } \ 1163 KMP_FSYNC_ACQUIRED(ftx); \ 1164 } 1165 1166 // Fast-path test futex lock 1167 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1168 { \ 1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1170 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1171 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1172 KMP_FSYNC_ACQUIRED(ftx); \ 1173 rc = TRUE; \ 1174 } else { \ 1175 rc = FALSE; \ 1176 } \ 1177 } 1178 1179 // Fast-path release futex lock 1180 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1181 { \ 1182 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1183 KMP_MB(); \ 1184 KMP_FSYNC_RELEASING(ftx); \ 1185 kmp_int32 poll_val = \ 1186 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1187 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1188 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1189 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1190 } \ 1191 KMP_MB(); \ 1192 KMP_YIELD_OVERSUB(); \ 1193 } 1194 1195 #endif // KMP_USE_FUTEX 1196 1197 #else // KMP_USE_DYNAMIC_LOCK 1198 1199 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1200 ident_t const *loc, 1201 kmp_int32 gtid) { 1202 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1203 1204 // Because of the double-check, the following load doesn't need to be volatile 1205 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1206 1207 if (lck == NULL) { 1208 void *idx; 1209 1210 // Allocate & initialize the lock. 1211 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1212 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1213 __kmp_init_user_lock_with_checks(lck); 1214 __kmp_set_user_lock_location(lck, loc); 1215 #if USE_ITT_BUILD 1216 __kmp_itt_critical_creating(lck); 1217 // __kmp_itt_critical_creating() should be called *before* the first usage 1218 // of underlying lock. It is the only place where we can guarantee it. There 1219 // are chances the lock will destroyed with no usage, but it is not a 1220 // problem, because this is not real event seen by user but rather setting 1221 // name for object (lock). See more details in kmp_itt.h. 1222 #endif /* USE_ITT_BUILD */ 1223 1224 // Use a cmpxchg instruction to slam the start of the critical section with 1225 // the lock pointer. If another thread beat us to it, deallocate the lock, 1226 // and use the lock that the other thread allocated. 1227 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1228 1229 if (status == 0) { 1230 // Deallocate the lock and reload the value. 1231 #if USE_ITT_BUILD 1232 __kmp_itt_critical_destroyed(lck); 1233 // Let ITT know the lock is destroyed and the same memory location may be reused 1234 // for another purpose. 1235 #endif /* USE_ITT_BUILD */ 1236 __kmp_destroy_user_lock_with_checks(lck); 1237 __kmp_user_lock_free(&idx, gtid, lck); 1238 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1239 KMP_DEBUG_ASSERT(lck != NULL); 1240 } 1241 } 1242 return lck; 1243 } 1244 1245 #endif // KMP_USE_DYNAMIC_LOCK 1246 1247 /*! 1248 @ingroup WORK_SHARING 1249 @param loc source location information. 1250 @param global_tid global thread number. 1251 @param crit identity of the critical section. This could be a pointer to a lock 1252 associated with the critical section, or some other suitably unique value. 1253 1254 Enter code protected by a `critical` construct. 1255 This function blocks until the executing thread can enter the critical section. 1256 */ 1257 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1258 kmp_critical_name *crit) { 1259 #if KMP_USE_DYNAMIC_LOCK 1260 #if OMPT_SUPPORT && OMPT_OPTIONAL 1261 OMPT_STORE_RETURN_ADDRESS(global_tid); 1262 #endif // OMPT_SUPPORT 1263 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1264 #else 1265 KMP_COUNT_BLOCK(OMP_CRITICAL); 1266 #if OMPT_SUPPORT && OMPT_OPTIONAL 1267 ompt_state_t prev_state = ompt_state_undefined; 1268 ompt_thread_info_t ti; 1269 #endif 1270 kmp_user_lock_p lck; 1271 1272 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1273 __kmp_assert_valid_gtid(global_tid); 1274 1275 // TODO: add THR_OVHD_STATE 1276 1277 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1278 KMP_CHECK_USER_LOCK_INIT(); 1279 1280 if ((__kmp_user_lock_kind == lk_tas) && 1281 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1282 lck = (kmp_user_lock_p)crit; 1283 } 1284 #if KMP_USE_FUTEX 1285 else if ((__kmp_user_lock_kind == lk_futex) && 1286 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1287 lck = (kmp_user_lock_p)crit; 1288 } 1289 #endif 1290 else { // ticket, queuing or drdpa 1291 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1292 } 1293 1294 if (__kmp_env_consistency_check) 1295 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1296 1297 // since the critical directive binds to all threads, not just the current 1298 // team we have to check this even if we are in a serialized team. 1299 // also, even if we are the uber thread, we still have to conduct the lock, 1300 // as we have to contend with sibling threads. 1301 1302 #if USE_ITT_BUILD 1303 __kmp_itt_critical_acquiring(lck); 1304 #endif /* USE_ITT_BUILD */ 1305 #if OMPT_SUPPORT && OMPT_OPTIONAL 1306 OMPT_STORE_RETURN_ADDRESS(gtid); 1307 void *codeptr_ra = NULL; 1308 if (ompt_enabled.enabled) { 1309 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1310 /* OMPT state update */ 1311 prev_state = ti.state; 1312 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1313 ti.state = ompt_state_wait_critical; 1314 1315 /* OMPT event callback */ 1316 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1317 if (ompt_enabled.ompt_callback_mutex_acquire) { 1318 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1319 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1320 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1321 } 1322 } 1323 #endif 1324 // Value of 'crit' should be good for using as a critical_id of the critical 1325 // section directive. 1326 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1327 1328 #if USE_ITT_BUILD 1329 __kmp_itt_critical_acquired(lck); 1330 #endif /* USE_ITT_BUILD */ 1331 #if OMPT_SUPPORT && OMPT_OPTIONAL 1332 if (ompt_enabled.enabled) { 1333 /* OMPT state update */ 1334 ti.state = prev_state; 1335 ti.wait_id = 0; 1336 1337 /* OMPT event callback */ 1338 if (ompt_enabled.ompt_callback_mutex_acquired) { 1339 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1340 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1341 } 1342 } 1343 #endif 1344 KMP_POP_PARTITIONED_TIMER(); 1345 1346 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1347 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1348 #endif // KMP_USE_DYNAMIC_LOCK 1349 } 1350 1351 #if KMP_USE_DYNAMIC_LOCK 1352 1353 // Converts the given hint to an internal lock implementation 1354 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1355 #if KMP_USE_TSX 1356 #define KMP_TSX_LOCK(seq) lockseq_##seq 1357 #else 1358 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1359 #endif 1360 1361 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1362 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1363 #else 1364 #define KMP_CPUINFO_RTM 0 1365 #endif 1366 1367 // Hints that do not require further logic 1368 if (hint & kmp_lock_hint_hle) 1369 return KMP_TSX_LOCK(hle); 1370 if (hint & kmp_lock_hint_rtm) 1371 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1372 if (hint & kmp_lock_hint_adaptive) 1373 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1374 1375 // Rule out conflicting hints first by returning the default lock 1376 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1377 return __kmp_user_lock_seq; 1378 if ((hint & omp_lock_hint_speculative) && 1379 (hint & omp_lock_hint_nonspeculative)) 1380 return __kmp_user_lock_seq; 1381 1382 // Do not even consider speculation when it appears to be contended 1383 if (hint & omp_lock_hint_contended) 1384 return lockseq_queuing; 1385 1386 // Uncontended lock without speculation 1387 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1388 return lockseq_tas; 1389 1390 // Use RTM lock for speculation 1391 if (hint & omp_lock_hint_speculative) 1392 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1393 1394 return __kmp_user_lock_seq; 1395 } 1396 1397 #if OMPT_SUPPORT && OMPT_OPTIONAL 1398 #if KMP_USE_DYNAMIC_LOCK 1399 static kmp_mutex_impl_t 1400 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1401 if (user_lock) { 1402 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1403 case 0: 1404 break; 1405 #if KMP_USE_FUTEX 1406 case locktag_futex: 1407 return kmp_mutex_impl_queuing; 1408 #endif 1409 case locktag_tas: 1410 return kmp_mutex_impl_spin; 1411 #if KMP_USE_TSX 1412 case locktag_hle: 1413 case locktag_rtm_spin: 1414 return kmp_mutex_impl_speculative; 1415 #endif 1416 default: 1417 return kmp_mutex_impl_none; 1418 } 1419 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1420 } 1421 KMP_ASSERT(ilock); 1422 switch (ilock->type) { 1423 #if KMP_USE_TSX 1424 case locktag_adaptive: 1425 case locktag_rtm_queuing: 1426 return kmp_mutex_impl_speculative; 1427 #endif 1428 case locktag_nested_tas: 1429 return kmp_mutex_impl_spin; 1430 #if KMP_USE_FUTEX 1431 case locktag_nested_futex: 1432 #endif 1433 case locktag_ticket: 1434 case locktag_queuing: 1435 case locktag_drdpa: 1436 case locktag_nested_ticket: 1437 case locktag_nested_queuing: 1438 case locktag_nested_drdpa: 1439 return kmp_mutex_impl_queuing; 1440 default: 1441 return kmp_mutex_impl_none; 1442 } 1443 } 1444 #else 1445 // For locks without dynamic binding 1446 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1447 switch (__kmp_user_lock_kind) { 1448 case lk_tas: 1449 return kmp_mutex_impl_spin; 1450 #if KMP_USE_FUTEX 1451 case lk_futex: 1452 #endif 1453 case lk_ticket: 1454 case lk_queuing: 1455 case lk_drdpa: 1456 return kmp_mutex_impl_queuing; 1457 #if KMP_USE_TSX 1458 case lk_hle: 1459 case lk_rtm_queuing: 1460 case lk_rtm_spin: 1461 case lk_adaptive: 1462 return kmp_mutex_impl_speculative; 1463 #endif 1464 default: 1465 return kmp_mutex_impl_none; 1466 } 1467 } 1468 #endif // KMP_USE_DYNAMIC_LOCK 1469 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1470 1471 /*! 1472 @ingroup WORK_SHARING 1473 @param loc source location information. 1474 @param global_tid global thread number. 1475 @param crit identity of the critical section. This could be a pointer to a lock 1476 associated with the critical section, or some other suitably unique value. 1477 @param hint the lock hint. 1478 1479 Enter code protected by a `critical` construct with a hint. The hint value is 1480 used to suggest a lock implementation. This function blocks until the executing 1481 thread can enter the critical section unless the hint suggests use of 1482 speculative execution and the hardware supports it. 1483 */ 1484 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1485 kmp_critical_name *crit, uint32_t hint) { 1486 KMP_COUNT_BLOCK(OMP_CRITICAL); 1487 kmp_user_lock_p lck; 1488 #if OMPT_SUPPORT && OMPT_OPTIONAL 1489 ompt_state_t prev_state = ompt_state_undefined; 1490 ompt_thread_info_t ti; 1491 // This is the case, if called from __kmpc_critical: 1492 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1493 if (!codeptr) 1494 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1495 #endif 1496 1497 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1498 __kmp_assert_valid_gtid(global_tid); 1499 1500 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1501 // Check if it is initialized. 1502 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1503 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1504 if (*lk == 0) { 1505 if (KMP_IS_D_LOCK(lockseq)) { 1506 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1507 KMP_GET_D_TAG(lockseq)); 1508 } else { 1509 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1510 } 1511 } 1512 // Branch for accessing the actual lock object and set operation. This 1513 // branching is inevitable since this lock initialization does not follow the 1514 // normal dispatch path (lock table is not used). 1515 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1516 lck = (kmp_user_lock_p)lk; 1517 if (__kmp_env_consistency_check) { 1518 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1519 __kmp_map_hint_to_lock(hint)); 1520 } 1521 #if USE_ITT_BUILD 1522 __kmp_itt_critical_acquiring(lck); 1523 #endif 1524 #if OMPT_SUPPORT && OMPT_OPTIONAL 1525 if (ompt_enabled.enabled) { 1526 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1527 /* OMPT state update */ 1528 prev_state = ti.state; 1529 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1530 ti.state = ompt_state_wait_critical; 1531 1532 /* OMPT event callback */ 1533 if (ompt_enabled.ompt_callback_mutex_acquire) { 1534 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1535 ompt_mutex_critical, (unsigned int)hint, 1536 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1537 codeptr); 1538 } 1539 } 1540 #endif 1541 #if KMP_USE_INLINED_TAS 1542 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1543 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1544 } else 1545 #elif KMP_USE_INLINED_FUTEX 1546 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1547 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1548 } else 1549 #endif 1550 { 1551 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1552 } 1553 } else { 1554 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1555 lck = ilk->lock; 1556 if (__kmp_env_consistency_check) { 1557 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1558 __kmp_map_hint_to_lock(hint)); 1559 } 1560 #if USE_ITT_BUILD 1561 __kmp_itt_critical_acquiring(lck); 1562 #endif 1563 #if OMPT_SUPPORT && OMPT_OPTIONAL 1564 if (ompt_enabled.enabled) { 1565 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1566 /* OMPT state update */ 1567 prev_state = ti.state; 1568 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1569 ti.state = ompt_state_wait_critical; 1570 1571 /* OMPT event callback */ 1572 if (ompt_enabled.ompt_callback_mutex_acquire) { 1573 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1574 ompt_mutex_critical, (unsigned int)hint, 1575 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1576 codeptr); 1577 } 1578 } 1579 #endif 1580 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1581 } 1582 KMP_POP_PARTITIONED_TIMER(); 1583 1584 #if USE_ITT_BUILD 1585 __kmp_itt_critical_acquired(lck); 1586 #endif /* USE_ITT_BUILD */ 1587 #if OMPT_SUPPORT && OMPT_OPTIONAL 1588 if (ompt_enabled.enabled) { 1589 /* OMPT state update */ 1590 ti.state = prev_state; 1591 ti.wait_id = 0; 1592 1593 /* OMPT event callback */ 1594 if (ompt_enabled.ompt_callback_mutex_acquired) { 1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1596 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1597 } 1598 } 1599 #endif 1600 1601 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1602 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1603 } // __kmpc_critical_with_hint 1604 1605 #endif // KMP_USE_DYNAMIC_LOCK 1606 1607 /*! 1608 @ingroup WORK_SHARING 1609 @param loc source location information. 1610 @param global_tid global thread number . 1611 @param crit identity of the critical section. This could be a pointer to a lock 1612 associated with the critical section, or some other suitably unique value. 1613 1614 Leave a critical section, releasing any lock that was held during its execution. 1615 */ 1616 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1617 kmp_critical_name *crit) { 1618 kmp_user_lock_p lck; 1619 1620 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1621 1622 #if KMP_USE_DYNAMIC_LOCK 1623 int locktag = KMP_EXTRACT_D_TAG(crit); 1624 if (locktag) { 1625 lck = (kmp_user_lock_p)crit; 1626 KMP_ASSERT(lck != NULL); 1627 if (__kmp_env_consistency_check) { 1628 __kmp_pop_sync(global_tid, ct_critical, loc); 1629 } 1630 #if USE_ITT_BUILD 1631 __kmp_itt_critical_releasing(lck); 1632 #endif 1633 #if KMP_USE_INLINED_TAS 1634 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1635 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1636 } else 1637 #elif KMP_USE_INLINED_FUTEX 1638 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1639 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1640 } else 1641 #endif 1642 { 1643 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1644 } 1645 } else { 1646 kmp_indirect_lock_t *ilk = 1647 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1648 KMP_ASSERT(ilk != NULL); 1649 lck = ilk->lock; 1650 if (__kmp_env_consistency_check) { 1651 __kmp_pop_sync(global_tid, ct_critical, loc); 1652 } 1653 #if USE_ITT_BUILD 1654 __kmp_itt_critical_releasing(lck); 1655 #endif 1656 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1657 } 1658 1659 #else // KMP_USE_DYNAMIC_LOCK 1660 1661 if ((__kmp_user_lock_kind == lk_tas) && 1662 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1663 lck = (kmp_user_lock_p)crit; 1664 } 1665 #if KMP_USE_FUTEX 1666 else if ((__kmp_user_lock_kind == lk_futex) && 1667 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1668 lck = (kmp_user_lock_p)crit; 1669 } 1670 #endif 1671 else { // ticket, queuing or drdpa 1672 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1673 } 1674 1675 KMP_ASSERT(lck != NULL); 1676 1677 if (__kmp_env_consistency_check) 1678 __kmp_pop_sync(global_tid, ct_critical, loc); 1679 1680 #if USE_ITT_BUILD 1681 __kmp_itt_critical_releasing(lck); 1682 #endif /* USE_ITT_BUILD */ 1683 // Value of 'crit' should be good for using as a critical_id of the critical 1684 // section directive. 1685 __kmp_release_user_lock_with_checks(lck, global_tid); 1686 1687 #endif // KMP_USE_DYNAMIC_LOCK 1688 1689 #if OMPT_SUPPORT && OMPT_OPTIONAL 1690 /* OMPT release event triggers after lock is released; place here to trigger 1691 * for all #if branches */ 1692 OMPT_STORE_RETURN_ADDRESS(global_tid); 1693 if (ompt_enabled.ompt_callback_mutex_released) { 1694 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1695 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1696 OMPT_LOAD_RETURN_ADDRESS(0)); 1697 } 1698 #endif 1699 1700 KMP_POP_PARTITIONED_TIMER(); 1701 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1702 } 1703 1704 /*! 1705 @ingroup SYNCHRONIZATION 1706 @param loc source location information 1707 @param global_tid thread id. 1708 @return one if the thread should execute the master block, zero otherwise 1709 1710 Start execution of a combined barrier and master. The barrier is executed inside 1711 this function. 1712 */ 1713 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1714 int status; 1715 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1716 __kmp_assert_valid_gtid(global_tid); 1717 1718 if (!TCR_4(__kmp_init_parallel)) 1719 __kmp_parallel_initialize(); 1720 1721 __kmp_resume_if_soft_paused(); 1722 1723 if (__kmp_env_consistency_check) 1724 __kmp_check_barrier(global_tid, ct_barrier, loc); 1725 1726 #if OMPT_SUPPORT 1727 ompt_frame_t *ompt_frame; 1728 if (ompt_enabled.enabled) { 1729 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1730 if (ompt_frame->enter_frame.ptr == NULL) 1731 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1732 } 1733 OMPT_STORE_RETURN_ADDRESS(global_tid); 1734 #endif 1735 #if USE_ITT_NOTIFY 1736 __kmp_threads[global_tid]->th.th_ident = loc; 1737 #endif 1738 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1739 #if OMPT_SUPPORT && OMPT_OPTIONAL 1740 if (ompt_enabled.enabled) { 1741 ompt_frame->enter_frame = ompt_data_none; 1742 } 1743 #endif 1744 1745 return (status != 0) ? 0 : 1; 1746 } 1747 1748 /*! 1749 @ingroup SYNCHRONIZATION 1750 @param loc source location information 1751 @param global_tid thread id. 1752 1753 Complete the execution of a combined barrier and master. This function should 1754 only be called at the completion of the <tt>master</tt> code. Other threads will 1755 still be waiting at the barrier and this call releases them. 1756 */ 1757 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1758 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1759 __kmp_assert_valid_gtid(global_tid); 1760 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1761 } 1762 1763 /*! 1764 @ingroup SYNCHRONIZATION 1765 @param loc source location information 1766 @param global_tid thread id. 1767 @return one if the thread should execute the master block, zero otherwise 1768 1769 Start execution of a combined barrier and master(nowait) construct. 1770 The barrier is executed inside this function. 1771 There is no equivalent "end" function, since the 1772 */ 1773 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1774 kmp_int32 ret; 1775 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1776 __kmp_assert_valid_gtid(global_tid); 1777 1778 if (!TCR_4(__kmp_init_parallel)) 1779 __kmp_parallel_initialize(); 1780 1781 __kmp_resume_if_soft_paused(); 1782 1783 if (__kmp_env_consistency_check) { 1784 if (loc == 0) { 1785 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1786 } 1787 __kmp_check_barrier(global_tid, ct_barrier, loc); 1788 } 1789 1790 #if OMPT_SUPPORT 1791 ompt_frame_t *ompt_frame; 1792 if (ompt_enabled.enabled) { 1793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1794 if (ompt_frame->enter_frame.ptr == NULL) 1795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1796 } 1797 OMPT_STORE_RETURN_ADDRESS(global_tid); 1798 #endif 1799 #if USE_ITT_NOTIFY 1800 __kmp_threads[global_tid]->th.th_ident = loc; 1801 #endif 1802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1803 #if OMPT_SUPPORT && OMPT_OPTIONAL 1804 if (ompt_enabled.enabled) { 1805 ompt_frame->enter_frame = ompt_data_none; 1806 } 1807 #endif 1808 1809 ret = __kmpc_master(loc, global_tid); 1810 1811 if (__kmp_env_consistency_check) { 1812 /* there's no __kmpc_end_master called; so the (stats) */ 1813 /* actions of __kmpc_end_master are done here */ 1814 if (ret) { 1815 /* only one thread should do the pop since only */ 1816 /* one did the push (see __kmpc_master()) */ 1817 __kmp_pop_sync(global_tid, ct_master, loc); 1818 } 1819 } 1820 1821 return (ret); 1822 } 1823 1824 /* The BARRIER for a SINGLE process section is always explicit */ 1825 /*! 1826 @ingroup WORK_SHARING 1827 @param loc source location information 1828 @param global_tid global thread number 1829 @return One if this thread should execute the single construct, zero otherwise. 1830 1831 Test whether to execute a <tt>single</tt> construct. 1832 There are no implicit barriers in the two "single" calls, rather the compiler 1833 should introduce an explicit barrier if it is required. 1834 */ 1835 1836 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1837 __kmp_assert_valid_gtid(global_tid); 1838 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1839 1840 if (rc) { 1841 // We are going to execute the single statement, so we should count it. 1842 KMP_COUNT_BLOCK(OMP_SINGLE); 1843 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1844 } 1845 1846 #if OMPT_SUPPORT && OMPT_OPTIONAL 1847 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1848 kmp_team_t *team = this_thr->th.th_team; 1849 int tid = __kmp_tid_from_gtid(global_tid); 1850 1851 if (ompt_enabled.enabled) { 1852 if (rc) { 1853 if (ompt_enabled.ompt_callback_work) { 1854 ompt_callbacks.ompt_callback(ompt_callback_work)( 1855 ompt_work_single_executor, ompt_scope_begin, 1856 &(team->t.ompt_team_info.parallel_data), 1857 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1858 1, OMPT_GET_RETURN_ADDRESS(0)); 1859 } 1860 } else { 1861 if (ompt_enabled.ompt_callback_work) { 1862 ompt_callbacks.ompt_callback(ompt_callback_work)( 1863 ompt_work_single_other, ompt_scope_begin, 1864 &(team->t.ompt_team_info.parallel_data), 1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1866 1, OMPT_GET_RETURN_ADDRESS(0)); 1867 ompt_callbacks.ompt_callback(ompt_callback_work)( 1868 ompt_work_single_other, ompt_scope_end, 1869 &(team->t.ompt_team_info.parallel_data), 1870 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1871 1, OMPT_GET_RETURN_ADDRESS(0)); 1872 } 1873 } 1874 } 1875 #endif 1876 1877 return rc; 1878 } 1879 1880 /*! 1881 @ingroup WORK_SHARING 1882 @param loc source location information 1883 @param global_tid global thread number 1884 1885 Mark the end of a <tt>single</tt> construct. This function should 1886 only be called by the thread that executed the block of code protected 1887 by the `single` construct. 1888 */ 1889 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1890 __kmp_assert_valid_gtid(global_tid); 1891 __kmp_exit_single(global_tid); 1892 KMP_POP_PARTITIONED_TIMER(); 1893 1894 #if OMPT_SUPPORT && OMPT_OPTIONAL 1895 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1896 kmp_team_t *team = this_thr->th.th_team; 1897 int tid = __kmp_tid_from_gtid(global_tid); 1898 1899 if (ompt_enabled.ompt_callback_work) { 1900 ompt_callbacks.ompt_callback(ompt_callback_work)( 1901 ompt_work_single_executor, ompt_scope_end, 1902 &(team->t.ompt_team_info.parallel_data), 1903 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1904 OMPT_GET_RETURN_ADDRESS(0)); 1905 } 1906 #endif 1907 } 1908 1909 /*! 1910 @ingroup WORK_SHARING 1911 @param loc Source location 1912 @param global_tid Global thread id 1913 1914 Mark the end of a statically scheduled loop. 1915 */ 1916 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1917 KMP_POP_PARTITIONED_TIMER(); 1918 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1919 1920 #if OMPT_SUPPORT && OMPT_OPTIONAL 1921 if (ompt_enabled.ompt_callback_work) { 1922 ompt_work_t ompt_work_type = ompt_work_loop; 1923 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1924 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1925 // Determine workshare type 1926 if (loc != NULL) { 1927 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1928 ompt_work_type = ompt_work_loop; 1929 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1930 ompt_work_type = ompt_work_sections; 1931 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1932 ompt_work_type = ompt_work_distribute; 1933 } else { 1934 // use default set above. 1935 // a warning about this case is provided in __kmpc_for_static_init 1936 } 1937 KMP_DEBUG_ASSERT(ompt_work_type); 1938 } 1939 ompt_callbacks.ompt_callback(ompt_callback_work)( 1940 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1941 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1942 } 1943 #endif 1944 if (__kmp_env_consistency_check) 1945 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1946 } 1947 1948 // User routines which take C-style arguments (call by value) 1949 // different from the Fortran equivalent routines 1950 1951 void ompc_set_num_threads(int arg) { 1952 // !!!!! TODO: check the per-task binding 1953 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1954 } 1955 1956 void ompc_set_dynamic(int flag) { 1957 kmp_info_t *thread; 1958 1959 /* For the thread-private implementation of the internal controls */ 1960 thread = __kmp_entry_thread(); 1961 1962 __kmp_save_internal_controls(thread); 1963 1964 set__dynamic(thread, flag ? true : false); 1965 } 1966 1967 void ompc_set_nested(int flag) { 1968 kmp_info_t *thread; 1969 1970 /* For the thread-private internal controls implementation */ 1971 thread = __kmp_entry_thread(); 1972 1973 __kmp_save_internal_controls(thread); 1974 1975 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1976 } 1977 1978 void ompc_set_max_active_levels(int max_active_levels) { 1979 /* TO DO */ 1980 /* we want per-task implementation of this internal control */ 1981 1982 /* For the per-thread internal controls implementation */ 1983 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1984 } 1985 1986 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1987 // !!!!! TODO: check the per-task binding 1988 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1989 } 1990 1991 int ompc_get_ancestor_thread_num(int level) { 1992 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1993 } 1994 1995 int ompc_get_team_size(int level) { 1996 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1997 } 1998 1999 /* OpenMP 5.0 Affinity Format API */ 2000 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 2001 if (!__kmp_init_serial) { 2002 __kmp_serial_initialize(); 2003 } 2004 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2005 format, KMP_STRLEN(format) + 1); 2006 } 2007 2008 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2009 size_t format_size; 2010 if (!__kmp_init_serial) { 2011 __kmp_serial_initialize(); 2012 } 2013 format_size = KMP_STRLEN(__kmp_affinity_format); 2014 if (buffer && size) { 2015 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2016 format_size + 1); 2017 } 2018 return format_size; 2019 } 2020 2021 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2022 int gtid; 2023 if (!TCR_4(__kmp_init_middle)) { 2024 __kmp_middle_initialize(); 2025 } 2026 __kmp_assign_root_init_mask(); 2027 gtid = __kmp_get_gtid(); 2028 __kmp_aux_display_affinity(gtid, format); 2029 } 2030 2031 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2032 char const *format) { 2033 int gtid; 2034 size_t num_required; 2035 kmp_str_buf_t capture_buf; 2036 if (!TCR_4(__kmp_init_middle)) { 2037 __kmp_middle_initialize(); 2038 } 2039 __kmp_assign_root_init_mask(); 2040 gtid = __kmp_get_gtid(); 2041 __kmp_str_buf_init(&capture_buf); 2042 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2043 if (buffer && buf_size) { 2044 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2045 capture_buf.used + 1); 2046 } 2047 __kmp_str_buf_free(&capture_buf); 2048 return num_required; 2049 } 2050 2051 void kmpc_set_stacksize(int arg) { 2052 // __kmp_aux_set_stacksize initializes the library if needed 2053 __kmp_aux_set_stacksize(arg); 2054 } 2055 2056 void kmpc_set_stacksize_s(size_t arg) { 2057 // __kmp_aux_set_stacksize initializes the library if needed 2058 __kmp_aux_set_stacksize(arg); 2059 } 2060 2061 void kmpc_set_blocktime(int arg) { 2062 int gtid, tid; 2063 kmp_info_t *thread; 2064 2065 gtid = __kmp_entry_gtid(); 2066 tid = __kmp_tid_from_gtid(gtid); 2067 thread = __kmp_thread_from_gtid(gtid); 2068 2069 __kmp_aux_set_blocktime(arg, thread, tid); 2070 } 2071 2072 void kmpc_set_library(int arg) { 2073 // __kmp_user_set_library initializes the library if needed 2074 __kmp_user_set_library((enum library_type)arg); 2075 } 2076 2077 void kmpc_set_defaults(char const *str) { 2078 // __kmp_aux_set_defaults initializes the library if needed 2079 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2080 } 2081 2082 void kmpc_set_disp_num_buffers(int arg) { 2083 // ignore after initialization because some teams have already 2084 // allocated dispatch buffers 2085 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2086 arg <= KMP_MAX_DISP_NUM_BUFF) { 2087 __kmp_dispatch_num_buffers = arg; 2088 } 2089 } 2090 2091 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2092 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2093 return -1; 2094 #else 2095 if (!TCR_4(__kmp_init_middle)) { 2096 __kmp_middle_initialize(); 2097 } 2098 __kmp_assign_root_init_mask(); 2099 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2100 #endif 2101 } 2102 2103 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2104 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2105 return -1; 2106 #else 2107 if (!TCR_4(__kmp_init_middle)) { 2108 __kmp_middle_initialize(); 2109 } 2110 __kmp_assign_root_init_mask(); 2111 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2112 #endif 2113 } 2114 2115 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2116 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2117 return -1; 2118 #else 2119 if (!TCR_4(__kmp_init_middle)) { 2120 __kmp_middle_initialize(); 2121 } 2122 __kmp_assign_root_init_mask(); 2123 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2124 #endif 2125 } 2126 2127 /* -------------------------------------------------------------------------- */ 2128 /*! 2129 @ingroup THREADPRIVATE 2130 @param loc source location information 2131 @param gtid global thread number 2132 @param cpy_size size of the cpy_data buffer 2133 @param cpy_data pointer to data to be copied 2134 @param cpy_func helper function to call for copying data 2135 @param didit flag variable: 1=single thread; 0=not single thread 2136 2137 __kmpc_copyprivate implements the interface for the private data broadcast 2138 needed for the copyprivate clause associated with a single region in an 2139 OpenMP<sup>*</sup> program (both C and Fortran). 2140 All threads participating in the parallel region call this routine. 2141 One of the threads (called the single thread) should have the <tt>didit</tt> 2142 variable set to 1 and all other threads should have that variable set to 0. 2143 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2144 2145 The OpenMP specification forbids the use of nowait on the single region when a 2146 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2147 barrier internally to avoid race conditions, so the code generation for the 2148 single region should avoid generating a barrier after the call to @ref 2149 __kmpc_copyprivate. 2150 2151 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2152 The <tt>loc</tt> parameter is a pointer to source location information. 2153 2154 Internal implementation: The single thread will first copy its descriptor 2155 address (cpy_data) to a team-private location, then the other threads will each 2156 call the function pointed to by the parameter cpy_func, which carries out the 2157 copy by copying the data using the cpy_data buffer. 2158 2159 The cpy_func routine used for the copy and the contents of the data area defined 2160 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2161 to be done. For instance, the cpy_data buffer can hold the actual data to be 2162 copied or it may hold a list of pointers to the data. The cpy_func routine must 2163 interpret the cpy_data buffer appropriately. 2164 2165 The interface to cpy_func is as follows: 2166 @code 2167 void cpy_func( void *destination, void *source ) 2168 @endcode 2169 where void *destination is the cpy_data pointer for the thread being copied to 2170 and void *source is the cpy_data pointer for the thread being copied from. 2171 */ 2172 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2173 void *cpy_data, void (*cpy_func)(void *, void *), 2174 kmp_int32 didit) { 2175 void **data_ptr; 2176 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2177 __kmp_assert_valid_gtid(gtid); 2178 2179 KMP_MB(); 2180 2181 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2182 2183 if (__kmp_env_consistency_check) { 2184 if (loc == 0) { 2185 KMP_WARNING(ConstructIdentInvalid); 2186 } 2187 } 2188 2189 // ToDo: Optimize the following two barriers into some kind of split barrier 2190 2191 if (didit) 2192 *data_ptr = cpy_data; 2193 2194 #if OMPT_SUPPORT 2195 ompt_frame_t *ompt_frame; 2196 if (ompt_enabled.enabled) { 2197 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2198 if (ompt_frame->enter_frame.ptr == NULL) 2199 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2200 } 2201 OMPT_STORE_RETURN_ADDRESS(gtid); 2202 #endif 2203 /* This barrier is not a barrier region boundary */ 2204 #if USE_ITT_NOTIFY 2205 __kmp_threads[gtid]->th.th_ident = loc; 2206 #endif 2207 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2208 2209 if (!didit) 2210 (*cpy_func)(cpy_data, *data_ptr); 2211 2212 // Consider next barrier a user-visible barrier for barrier region boundaries 2213 // Nesting checks are already handled by the single construct checks 2214 { 2215 #if OMPT_SUPPORT 2216 OMPT_STORE_RETURN_ADDRESS(gtid); 2217 #endif 2218 #if USE_ITT_NOTIFY 2219 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2220 // tasks can overwrite the location) 2221 #endif 2222 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2223 #if OMPT_SUPPORT && OMPT_OPTIONAL 2224 if (ompt_enabled.enabled) { 2225 ompt_frame->enter_frame = ompt_data_none; 2226 } 2227 #endif 2228 } 2229 } 2230 2231 /* -------------------------------------------------------------------------- */ 2232 2233 #define INIT_LOCK __kmp_init_user_lock_with_checks 2234 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2235 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2236 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2237 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2238 #define ACQUIRE_NESTED_LOCK_TIMED \ 2239 __kmp_acquire_nested_user_lock_with_checks_timed 2240 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2241 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2242 #define TEST_LOCK __kmp_test_user_lock_with_checks 2243 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2244 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2245 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2246 2247 // TODO: Make check abort messages use location info & pass it into 2248 // with_checks routines 2249 2250 #if KMP_USE_DYNAMIC_LOCK 2251 2252 // internal lock initializer 2253 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2254 kmp_dyna_lockseq_t seq) { 2255 if (KMP_IS_D_LOCK(seq)) { 2256 KMP_INIT_D_LOCK(lock, seq); 2257 #if USE_ITT_BUILD 2258 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2259 #endif 2260 } else { 2261 KMP_INIT_I_LOCK(lock, seq); 2262 #if USE_ITT_BUILD 2263 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2264 __kmp_itt_lock_creating(ilk->lock, loc); 2265 #endif 2266 } 2267 } 2268 2269 // internal nest lock initializer 2270 static __forceinline void 2271 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2272 kmp_dyna_lockseq_t seq) { 2273 #if KMP_USE_TSX 2274 // Don't have nested lock implementation for speculative locks 2275 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2276 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2277 seq = __kmp_user_lock_seq; 2278 #endif 2279 switch (seq) { 2280 case lockseq_tas: 2281 seq = lockseq_nested_tas; 2282 break; 2283 #if KMP_USE_FUTEX 2284 case lockseq_futex: 2285 seq = lockseq_nested_futex; 2286 break; 2287 #endif 2288 case lockseq_ticket: 2289 seq = lockseq_nested_ticket; 2290 break; 2291 case lockseq_queuing: 2292 seq = lockseq_nested_queuing; 2293 break; 2294 case lockseq_drdpa: 2295 seq = lockseq_nested_drdpa; 2296 break; 2297 default: 2298 seq = lockseq_nested_queuing; 2299 } 2300 KMP_INIT_I_LOCK(lock, seq); 2301 #if USE_ITT_BUILD 2302 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2303 __kmp_itt_lock_creating(ilk->lock, loc); 2304 #endif 2305 } 2306 2307 /* initialize the lock with a hint */ 2308 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2309 uintptr_t hint) { 2310 KMP_DEBUG_ASSERT(__kmp_init_serial); 2311 if (__kmp_env_consistency_check && user_lock == NULL) { 2312 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2313 } 2314 2315 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2316 2317 #if OMPT_SUPPORT && OMPT_OPTIONAL 2318 // This is the case, if called from omp_init_lock_with_hint: 2319 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2320 if (!codeptr) 2321 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2322 if (ompt_enabled.ompt_callback_lock_init) { 2323 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2324 ompt_mutex_lock, (omp_lock_hint_t)hint, 2325 __ompt_get_mutex_impl_type(user_lock), 2326 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2327 } 2328 #endif 2329 } 2330 2331 /* initialize the lock with a hint */ 2332 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2333 void **user_lock, uintptr_t hint) { 2334 KMP_DEBUG_ASSERT(__kmp_init_serial); 2335 if (__kmp_env_consistency_check && user_lock == NULL) { 2336 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2337 } 2338 2339 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2340 2341 #if OMPT_SUPPORT && OMPT_OPTIONAL 2342 // This is the case, if called from omp_init_lock_with_hint: 2343 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2344 if (!codeptr) 2345 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2346 if (ompt_enabled.ompt_callback_lock_init) { 2347 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2348 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2349 __ompt_get_mutex_impl_type(user_lock), 2350 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2351 } 2352 #endif 2353 } 2354 2355 #endif // KMP_USE_DYNAMIC_LOCK 2356 2357 /* initialize the lock */ 2358 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2359 #if KMP_USE_DYNAMIC_LOCK 2360 2361 KMP_DEBUG_ASSERT(__kmp_init_serial); 2362 if (__kmp_env_consistency_check && user_lock == NULL) { 2363 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2364 } 2365 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2366 2367 #if OMPT_SUPPORT && OMPT_OPTIONAL 2368 // This is the case, if called from omp_init_lock_with_hint: 2369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2370 if (!codeptr) 2371 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2372 if (ompt_enabled.ompt_callback_lock_init) { 2373 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2374 ompt_mutex_lock, omp_lock_hint_none, 2375 __ompt_get_mutex_impl_type(user_lock), 2376 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2377 } 2378 #endif 2379 2380 #else // KMP_USE_DYNAMIC_LOCK 2381 2382 static char const *const func = "omp_init_lock"; 2383 kmp_user_lock_p lck; 2384 KMP_DEBUG_ASSERT(__kmp_init_serial); 2385 2386 if (__kmp_env_consistency_check) { 2387 if (user_lock == NULL) { 2388 KMP_FATAL(LockIsUninitialized, func); 2389 } 2390 } 2391 2392 KMP_CHECK_USER_LOCK_INIT(); 2393 2394 if ((__kmp_user_lock_kind == lk_tas) && 2395 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2396 lck = (kmp_user_lock_p)user_lock; 2397 } 2398 #if KMP_USE_FUTEX 2399 else if ((__kmp_user_lock_kind == lk_futex) && 2400 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2401 lck = (kmp_user_lock_p)user_lock; 2402 } 2403 #endif 2404 else { 2405 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2406 } 2407 INIT_LOCK(lck); 2408 __kmp_set_user_lock_location(lck, loc); 2409 2410 #if OMPT_SUPPORT && OMPT_OPTIONAL 2411 // This is the case, if called from omp_init_lock_with_hint: 2412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2413 if (!codeptr) 2414 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2415 if (ompt_enabled.ompt_callback_lock_init) { 2416 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2417 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2418 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2419 } 2420 #endif 2421 2422 #if USE_ITT_BUILD 2423 __kmp_itt_lock_creating(lck); 2424 #endif /* USE_ITT_BUILD */ 2425 2426 #endif // KMP_USE_DYNAMIC_LOCK 2427 } // __kmpc_init_lock 2428 2429 /* initialize the lock */ 2430 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2431 #if KMP_USE_DYNAMIC_LOCK 2432 2433 KMP_DEBUG_ASSERT(__kmp_init_serial); 2434 if (__kmp_env_consistency_check && user_lock == NULL) { 2435 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2436 } 2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2438 2439 #if OMPT_SUPPORT && OMPT_OPTIONAL 2440 // This is the case, if called from omp_init_lock_with_hint: 2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2442 if (!codeptr) 2443 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2444 if (ompt_enabled.ompt_callback_lock_init) { 2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2446 ompt_mutex_nest_lock, omp_lock_hint_none, 2447 __ompt_get_mutex_impl_type(user_lock), 2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2449 } 2450 #endif 2451 2452 #else // KMP_USE_DYNAMIC_LOCK 2453 2454 static char const *const func = "omp_init_nest_lock"; 2455 kmp_user_lock_p lck; 2456 KMP_DEBUG_ASSERT(__kmp_init_serial); 2457 2458 if (__kmp_env_consistency_check) { 2459 if (user_lock == NULL) { 2460 KMP_FATAL(LockIsUninitialized, func); 2461 } 2462 } 2463 2464 KMP_CHECK_USER_LOCK_INIT(); 2465 2466 if ((__kmp_user_lock_kind == lk_tas) && 2467 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2468 OMP_NEST_LOCK_T_SIZE)) { 2469 lck = (kmp_user_lock_p)user_lock; 2470 } 2471 #if KMP_USE_FUTEX 2472 else if ((__kmp_user_lock_kind == lk_futex) && 2473 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2474 OMP_NEST_LOCK_T_SIZE)) { 2475 lck = (kmp_user_lock_p)user_lock; 2476 } 2477 #endif 2478 else { 2479 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2480 } 2481 2482 INIT_NESTED_LOCK(lck); 2483 __kmp_set_user_lock_location(lck, loc); 2484 2485 #if OMPT_SUPPORT && OMPT_OPTIONAL 2486 // This is the case, if called from omp_init_lock_with_hint: 2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2488 if (!codeptr) 2489 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2490 if (ompt_enabled.ompt_callback_lock_init) { 2491 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2492 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2493 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2494 } 2495 #endif 2496 2497 #if USE_ITT_BUILD 2498 __kmp_itt_lock_creating(lck); 2499 #endif /* USE_ITT_BUILD */ 2500 2501 #endif // KMP_USE_DYNAMIC_LOCK 2502 } // __kmpc_init_nest_lock 2503 2504 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2505 #if KMP_USE_DYNAMIC_LOCK 2506 2507 #if USE_ITT_BUILD 2508 kmp_user_lock_p lck; 2509 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2510 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2511 } else { 2512 lck = (kmp_user_lock_p)user_lock; 2513 } 2514 __kmp_itt_lock_destroyed(lck); 2515 #endif 2516 #if OMPT_SUPPORT && OMPT_OPTIONAL 2517 // This is the case, if called from omp_init_lock_with_hint: 2518 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2519 if (!codeptr) 2520 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2521 if (ompt_enabled.ompt_callback_lock_destroy) { 2522 kmp_user_lock_p lck; 2523 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2524 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2525 } else { 2526 lck = (kmp_user_lock_p)user_lock; 2527 } 2528 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2529 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2530 } 2531 #endif 2532 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2533 #else 2534 kmp_user_lock_p lck; 2535 2536 if ((__kmp_user_lock_kind == lk_tas) && 2537 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2538 lck = (kmp_user_lock_p)user_lock; 2539 } 2540 #if KMP_USE_FUTEX 2541 else if ((__kmp_user_lock_kind == lk_futex) && 2542 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2543 lck = (kmp_user_lock_p)user_lock; 2544 } 2545 #endif 2546 else { 2547 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2548 } 2549 2550 #if OMPT_SUPPORT && OMPT_OPTIONAL 2551 // This is the case, if called from omp_init_lock_with_hint: 2552 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2553 if (!codeptr) 2554 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2555 if (ompt_enabled.ompt_callback_lock_destroy) { 2556 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2557 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2558 } 2559 #endif 2560 2561 #if USE_ITT_BUILD 2562 __kmp_itt_lock_destroyed(lck); 2563 #endif /* USE_ITT_BUILD */ 2564 DESTROY_LOCK(lck); 2565 2566 if ((__kmp_user_lock_kind == lk_tas) && 2567 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2568 ; 2569 } 2570 #if KMP_USE_FUTEX 2571 else if ((__kmp_user_lock_kind == lk_futex) && 2572 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2573 ; 2574 } 2575 #endif 2576 else { 2577 __kmp_user_lock_free(user_lock, gtid, lck); 2578 } 2579 #endif // KMP_USE_DYNAMIC_LOCK 2580 } // __kmpc_destroy_lock 2581 2582 /* destroy the lock */ 2583 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2584 #if KMP_USE_DYNAMIC_LOCK 2585 2586 #if USE_ITT_BUILD 2587 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2588 __kmp_itt_lock_destroyed(ilk->lock); 2589 #endif 2590 #if OMPT_SUPPORT && OMPT_OPTIONAL 2591 // This is the case, if called from omp_init_lock_with_hint: 2592 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2593 if (!codeptr) 2594 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2595 if (ompt_enabled.ompt_callback_lock_destroy) { 2596 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2597 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2598 } 2599 #endif 2600 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2601 2602 #else // KMP_USE_DYNAMIC_LOCK 2603 2604 kmp_user_lock_p lck; 2605 2606 if ((__kmp_user_lock_kind == lk_tas) && 2607 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2608 OMP_NEST_LOCK_T_SIZE)) { 2609 lck = (kmp_user_lock_p)user_lock; 2610 } 2611 #if KMP_USE_FUTEX 2612 else if ((__kmp_user_lock_kind == lk_futex) && 2613 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2614 OMP_NEST_LOCK_T_SIZE)) { 2615 lck = (kmp_user_lock_p)user_lock; 2616 } 2617 #endif 2618 else { 2619 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2620 } 2621 2622 #if OMPT_SUPPORT && OMPT_OPTIONAL 2623 // This is the case, if called from omp_init_lock_with_hint: 2624 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2625 if (!codeptr) 2626 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2627 if (ompt_enabled.ompt_callback_lock_destroy) { 2628 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2629 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2630 } 2631 #endif 2632 2633 #if USE_ITT_BUILD 2634 __kmp_itt_lock_destroyed(lck); 2635 #endif /* USE_ITT_BUILD */ 2636 2637 DESTROY_NESTED_LOCK(lck); 2638 2639 if ((__kmp_user_lock_kind == lk_tas) && 2640 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2641 OMP_NEST_LOCK_T_SIZE)) { 2642 ; 2643 } 2644 #if KMP_USE_FUTEX 2645 else if ((__kmp_user_lock_kind == lk_futex) && 2646 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2647 OMP_NEST_LOCK_T_SIZE)) { 2648 ; 2649 } 2650 #endif 2651 else { 2652 __kmp_user_lock_free(user_lock, gtid, lck); 2653 } 2654 #endif // KMP_USE_DYNAMIC_LOCK 2655 } // __kmpc_destroy_nest_lock 2656 2657 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2658 KMP_COUNT_BLOCK(OMP_set_lock); 2659 #if KMP_USE_DYNAMIC_LOCK 2660 int tag = KMP_EXTRACT_D_TAG(user_lock); 2661 #if USE_ITT_BUILD 2662 __kmp_itt_lock_acquiring( 2663 (kmp_user_lock_p) 2664 user_lock); // itt function will get to the right lock object. 2665 #endif 2666 #if OMPT_SUPPORT && OMPT_OPTIONAL 2667 // This is the case, if called from omp_init_lock_with_hint: 2668 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2669 if (!codeptr) 2670 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2671 if (ompt_enabled.ompt_callback_mutex_acquire) { 2672 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2673 ompt_mutex_lock, omp_lock_hint_none, 2674 __ompt_get_mutex_impl_type(user_lock), 2675 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2676 } 2677 #endif 2678 #if KMP_USE_INLINED_TAS 2679 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2680 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2681 } else 2682 #elif KMP_USE_INLINED_FUTEX 2683 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2684 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2685 } else 2686 #endif 2687 { 2688 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2689 } 2690 #if USE_ITT_BUILD 2691 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2692 #endif 2693 #if OMPT_SUPPORT && OMPT_OPTIONAL 2694 if (ompt_enabled.ompt_callback_mutex_acquired) { 2695 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2696 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2697 } 2698 #endif 2699 2700 #else // KMP_USE_DYNAMIC_LOCK 2701 2702 kmp_user_lock_p lck; 2703 2704 if ((__kmp_user_lock_kind == lk_tas) && 2705 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2706 lck = (kmp_user_lock_p)user_lock; 2707 } 2708 #if KMP_USE_FUTEX 2709 else if ((__kmp_user_lock_kind == lk_futex) && 2710 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2711 lck = (kmp_user_lock_p)user_lock; 2712 } 2713 #endif 2714 else { 2715 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2716 } 2717 2718 #if USE_ITT_BUILD 2719 __kmp_itt_lock_acquiring(lck); 2720 #endif /* USE_ITT_BUILD */ 2721 #if OMPT_SUPPORT && OMPT_OPTIONAL 2722 // This is the case, if called from omp_init_lock_with_hint: 2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2724 if (!codeptr) 2725 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2726 if (ompt_enabled.ompt_callback_mutex_acquire) { 2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2728 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2729 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2730 } 2731 #endif 2732 2733 ACQUIRE_LOCK(lck, gtid); 2734 2735 #if USE_ITT_BUILD 2736 __kmp_itt_lock_acquired(lck); 2737 #endif /* USE_ITT_BUILD */ 2738 2739 #if OMPT_SUPPORT && OMPT_OPTIONAL 2740 if (ompt_enabled.ompt_callback_mutex_acquired) { 2741 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2742 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2743 } 2744 #endif 2745 2746 #endif // KMP_USE_DYNAMIC_LOCK 2747 } 2748 2749 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2750 #if KMP_USE_DYNAMIC_LOCK 2751 2752 #if USE_ITT_BUILD 2753 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2754 #endif 2755 #if OMPT_SUPPORT && OMPT_OPTIONAL 2756 // This is the case, if called from omp_init_lock_with_hint: 2757 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2758 if (!codeptr) 2759 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2760 if (ompt_enabled.enabled) { 2761 if (ompt_enabled.ompt_callback_mutex_acquire) { 2762 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2763 ompt_mutex_nest_lock, omp_lock_hint_none, 2764 __ompt_get_mutex_impl_type(user_lock), 2765 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2766 } 2767 } 2768 #endif 2769 int acquire_status = 2770 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2771 (void)acquire_status; 2772 #if USE_ITT_BUILD 2773 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2774 #endif 2775 2776 #if OMPT_SUPPORT && OMPT_OPTIONAL 2777 if (ompt_enabled.enabled) { 2778 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2779 if (ompt_enabled.ompt_callback_mutex_acquired) { 2780 // lock_first 2781 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2782 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2783 codeptr); 2784 } 2785 } else { 2786 if (ompt_enabled.ompt_callback_nest_lock) { 2787 // lock_next 2788 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2789 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2790 } 2791 } 2792 } 2793 #endif 2794 2795 #else // KMP_USE_DYNAMIC_LOCK 2796 int acquire_status; 2797 kmp_user_lock_p lck; 2798 2799 if ((__kmp_user_lock_kind == lk_tas) && 2800 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2801 OMP_NEST_LOCK_T_SIZE)) { 2802 lck = (kmp_user_lock_p)user_lock; 2803 } 2804 #if KMP_USE_FUTEX 2805 else if ((__kmp_user_lock_kind == lk_futex) && 2806 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2807 OMP_NEST_LOCK_T_SIZE)) { 2808 lck = (kmp_user_lock_p)user_lock; 2809 } 2810 #endif 2811 else { 2812 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2813 } 2814 2815 #if USE_ITT_BUILD 2816 __kmp_itt_lock_acquiring(lck); 2817 #endif /* USE_ITT_BUILD */ 2818 #if OMPT_SUPPORT && OMPT_OPTIONAL 2819 // This is the case, if called from omp_init_lock_with_hint: 2820 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2821 if (!codeptr) 2822 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2823 if (ompt_enabled.enabled) { 2824 if (ompt_enabled.ompt_callback_mutex_acquire) { 2825 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2826 ompt_mutex_nest_lock, omp_lock_hint_none, 2827 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2828 codeptr); 2829 } 2830 } 2831 #endif 2832 2833 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2834 2835 #if USE_ITT_BUILD 2836 __kmp_itt_lock_acquired(lck); 2837 #endif /* USE_ITT_BUILD */ 2838 2839 #if OMPT_SUPPORT && OMPT_OPTIONAL 2840 if (ompt_enabled.enabled) { 2841 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2842 if (ompt_enabled.ompt_callback_mutex_acquired) { 2843 // lock_first 2844 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2845 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2846 } 2847 } else { 2848 if (ompt_enabled.ompt_callback_nest_lock) { 2849 // lock_next 2850 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2851 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2852 } 2853 } 2854 } 2855 #endif 2856 2857 #endif // KMP_USE_DYNAMIC_LOCK 2858 } 2859 2860 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2861 #if KMP_USE_DYNAMIC_LOCK 2862 2863 int tag = KMP_EXTRACT_D_TAG(user_lock); 2864 #if USE_ITT_BUILD 2865 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2866 #endif 2867 #if KMP_USE_INLINED_TAS 2868 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2869 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2870 } else 2871 #elif KMP_USE_INLINED_FUTEX 2872 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2873 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2874 } else 2875 #endif 2876 { 2877 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2878 } 2879 2880 #if OMPT_SUPPORT && OMPT_OPTIONAL 2881 // This is the case, if called from omp_init_lock_with_hint: 2882 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2883 if (!codeptr) 2884 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2885 if (ompt_enabled.ompt_callback_mutex_released) { 2886 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2887 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2888 } 2889 #endif 2890 2891 #else // KMP_USE_DYNAMIC_LOCK 2892 2893 kmp_user_lock_p lck; 2894 2895 /* Can't use serial interval since not block structured */ 2896 /* release the lock */ 2897 2898 if ((__kmp_user_lock_kind == lk_tas) && 2899 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2900 #if KMP_OS_LINUX && \ 2901 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2902 // "fast" path implemented to fix customer performance issue 2903 #if USE_ITT_BUILD 2904 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2905 #endif /* USE_ITT_BUILD */ 2906 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2907 KMP_MB(); 2908 2909 #if OMPT_SUPPORT && OMPT_OPTIONAL 2910 // This is the case, if called from omp_init_lock_with_hint: 2911 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2912 if (!codeptr) 2913 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2914 if (ompt_enabled.ompt_callback_mutex_released) { 2915 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2916 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2917 } 2918 #endif 2919 2920 return; 2921 #else 2922 lck = (kmp_user_lock_p)user_lock; 2923 #endif 2924 } 2925 #if KMP_USE_FUTEX 2926 else if ((__kmp_user_lock_kind == lk_futex) && 2927 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2928 lck = (kmp_user_lock_p)user_lock; 2929 } 2930 #endif 2931 else { 2932 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2933 } 2934 2935 #if USE_ITT_BUILD 2936 __kmp_itt_lock_releasing(lck); 2937 #endif /* USE_ITT_BUILD */ 2938 2939 RELEASE_LOCK(lck, gtid); 2940 2941 #if OMPT_SUPPORT && OMPT_OPTIONAL 2942 // This is the case, if called from omp_init_lock_with_hint: 2943 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2944 if (!codeptr) 2945 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2946 if (ompt_enabled.ompt_callback_mutex_released) { 2947 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2948 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2949 } 2950 #endif 2951 2952 #endif // KMP_USE_DYNAMIC_LOCK 2953 } 2954 2955 /* release the lock */ 2956 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2957 #if KMP_USE_DYNAMIC_LOCK 2958 2959 #if USE_ITT_BUILD 2960 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2961 #endif 2962 int release_status = 2963 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2964 (void)release_status; 2965 2966 #if OMPT_SUPPORT && OMPT_OPTIONAL 2967 // This is the case, if called from omp_init_lock_with_hint: 2968 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2969 if (!codeptr) 2970 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2971 if (ompt_enabled.enabled) { 2972 if (release_status == KMP_LOCK_RELEASED) { 2973 if (ompt_enabled.ompt_callback_mutex_released) { 2974 // release_lock_last 2975 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2976 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2977 codeptr); 2978 } 2979 } else if (ompt_enabled.ompt_callback_nest_lock) { 2980 // release_lock_prev 2981 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2982 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2983 } 2984 } 2985 #endif 2986 2987 #else // KMP_USE_DYNAMIC_LOCK 2988 2989 kmp_user_lock_p lck; 2990 2991 /* Can't use serial interval since not block structured */ 2992 2993 if ((__kmp_user_lock_kind == lk_tas) && 2994 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2995 OMP_NEST_LOCK_T_SIZE)) { 2996 #if KMP_OS_LINUX && \ 2997 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2998 // "fast" path implemented to fix customer performance issue 2999 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3000 #if USE_ITT_BUILD 3001 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3002 #endif /* USE_ITT_BUILD */ 3003 3004 #if OMPT_SUPPORT && OMPT_OPTIONAL 3005 int release_status = KMP_LOCK_STILL_HELD; 3006 #endif 3007 3008 if (--(tl->lk.depth_locked) == 0) { 3009 TCW_4(tl->lk.poll, 0); 3010 #if OMPT_SUPPORT && OMPT_OPTIONAL 3011 release_status = KMP_LOCK_RELEASED; 3012 #endif 3013 } 3014 KMP_MB(); 3015 3016 #if OMPT_SUPPORT && OMPT_OPTIONAL 3017 // This is the case, if called from omp_init_lock_with_hint: 3018 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3019 if (!codeptr) 3020 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3021 if (ompt_enabled.enabled) { 3022 if (release_status == KMP_LOCK_RELEASED) { 3023 if (ompt_enabled.ompt_callback_mutex_released) { 3024 // release_lock_last 3025 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3026 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3027 } 3028 } else if (ompt_enabled.ompt_callback_nest_lock) { 3029 // release_lock_previous 3030 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3031 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3032 } 3033 } 3034 #endif 3035 3036 return; 3037 #else 3038 lck = (kmp_user_lock_p)user_lock; 3039 #endif 3040 } 3041 #if KMP_USE_FUTEX 3042 else if ((__kmp_user_lock_kind == lk_futex) && 3043 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3044 OMP_NEST_LOCK_T_SIZE)) { 3045 lck = (kmp_user_lock_p)user_lock; 3046 } 3047 #endif 3048 else { 3049 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3050 } 3051 3052 #if USE_ITT_BUILD 3053 __kmp_itt_lock_releasing(lck); 3054 #endif /* USE_ITT_BUILD */ 3055 3056 int release_status; 3057 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3058 #if OMPT_SUPPORT && OMPT_OPTIONAL 3059 // This is the case, if called from omp_init_lock_with_hint: 3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3061 if (!codeptr) 3062 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3063 if (ompt_enabled.enabled) { 3064 if (release_status == KMP_LOCK_RELEASED) { 3065 if (ompt_enabled.ompt_callback_mutex_released) { 3066 // release_lock_last 3067 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3068 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3069 } 3070 } else if (ompt_enabled.ompt_callback_nest_lock) { 3071 // release_lock_previous 3072 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3073 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3074 } 3075 } 3076 #endif 3077 3078 #endif // KMP_USE_DYNAMIC_LOCK 3079 } 3080 3081 /* try to acquire the lock */ 3082 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3083 KMP_COUNT_BLOCK(OMP_test_lock); 3084 3085 #if KMP_USE_DYNAMIC_LOCK 3086 int rc; 3087 int tag = KMP_EXTRACT_D_TAG(user_lock); 3088 #if USE_ITT_BUILD 3089 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3090 #endif 3091 #if OMPT_SUPPORT && OMPT_OPTIONAL 3092 // This is the case, if called from omp_init_lock_with_hint: 3093 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3094 if (!codeptr) 3095 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3096 if (ompt_enabled.ompt_callback_mutex_acquire) { 3097 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3098 ompt_mutex_lock, omp_lock_hint_none, 3099 __ompt_get_mutex_impl_type(user_lock), 3100 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3101 } 3102 #endif 3103 #if KMP_USE_INLINED_TAS 3104 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3105 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3106 } else 3107 #elif KMP_USE_INLINED_FUTEX 3108 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3109 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3110 } else 3111 #endif 3112 { 3113 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3114 } 3115 if (rc) { 3116 #if USE_ITT_BUILD 3117 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3118 #endif 3119 #if OMPT_SUPPORT && OMPT_OPTIONAL 3120 if (ompt_enabled.ompt_callback_mutex_acquired) { 3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3122 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3123 } 3124 #endif 3125 return FTN_TRUE; 3126 } else { 3127 #if USE_ITT_BUILD 3128 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3129 #endif 3130 return FTN_FALSE; 3131 } 3132 3133 #else // KMP_USE_DYNAMIC_LOCK 3134 3135 kmp_user_lock_p lck; 3136 int rc; 3137 3138 if ((__kmp_user_lock_kind == lk_tas) && 3139 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3140 lck = (kmp_user_lock_p)user_lock; 3141 } 3142 #if KMP_USE_FUTEX 3143 else if ((__kmp_user_lock_kind == lk_futex) && 3144 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3145 lck = (kmp_user_lock_p)user_lock; 3146 } 3147 #endif 3148 else { 3149 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3150 } 3151 3152 #if USE_ITT_BUILD 3153 __kmp_itt_lock_acquiring(lck); 3154 #endif /* USE_ITT_BUILD */ 3155 #if OMPT_SUPPORT && OMPT_OPTIONAL 3156 // This is the case, if called from omp_init_lock_with_hint: 3157 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3158 if (!codeptr) 3159 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3160 if (ompt_enabled.ompt_callback_mutex_acquire) { 3161 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3162 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3163 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3164 } 3165 #endif 3166 3167 rc = TEST_LOCK(lck, gtid); 3168 #if USE_ITT_BUILD 3169 if (rc) { 3170 __kmp_itt_lock_acquired(lck); 3171 } else { 3172 __kmp_itt_lock_cancelled(lck); 3173 } 3174 #endif /* USE_ITT_BUILD */ 3175 #if OMPT_SUPPORT && OMPT_OPTIONAL 3176 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3177 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3178 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3179 } 3180 #endif 3181 3182 return (rc ? FTN_TRUE : FTN_FALSE); 3183 3184 /* Can't use serial interval since not block structured */ 3185 3186 #endif // KMP_USE_DYNAMIC_LOCK 3187 } 3188 3189 /* try to acquire the lock */ 3190 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3191 #if KMP_USE_DYNAMIC_LOCK 3192 int rc; 3193 #if USE_ITT_BUILD 3194 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3195 #endif 3196 #if OMPT_SUPPORT && OMPT_OPTIONAL 3197 // This is the case, if called from omp_init_lock_with_hint: 3198 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3199 if (!codeptr) 3200 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3201 if (ompt_enabled.ompt_callback_mutex_acquire) { 3202 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3203 ompt_mutex_nest_lock, omp_lock_hint_none, 3204 __ompt_get_mutex_impl_type(user_lock), 3205 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3206 } 3207 #endif 3208 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3209 #if USE_ITT_BUILD 3210 if (rc) { 3211 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3212 } else { 3213 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3214 } 3215 #endif 3216 #if OMPT_SUPPORT && OMPT_OPTIONAL 3217 if (ompt_enabled.enabled && rc) { 3218 if (rc == 1) { 3219 if (ompt_enabled.ompt_callback_mutex_acquired) { 3220 // lock_first 3221 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3222 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3223 codeptr); 3224 } 3225 } else { 3226 if (ompt_enabled.ompt_callback_nest_lock) { 3227 // lock_next 3228 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3229 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3230 } 3231 } 3232 } 3233 #endif 3234 return rc; 3235 3236 #else // KMP_USE_DYNAMIC_LOCK 3237 3238 kmp_user_lock_p lck; 3239 int rc; 3240 3241 if ((__kmp_user_lock_kind == lk_tas) && 3242 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3243 OMP_NEST_LOCK_T_SIZE)) { 3244 lck = (kmp_user_lock_p)user_lock; 3245 } 3246 #if KMP_USE_FUTEX 3247 else if ((__kmp_user_lock_kind == lk_futex) && 3248 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3249 OMP_NEST_LOCK_T_SIZE)) { 3250 lck = (kmp_user_lock_p)user_lock; 3251 } 3252 #endif 3253 else { 3254 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3255 } 3256 3257 #if USE_ITT_BUILD 3258 __kmp_itt_lock_acquiring(lck); 3259 #endif /* USE_ITT_BUILD */ 3260 3261 #if OMPT_SUPPORT && OMPT_OPTIONAL 3262 // This is the case, if called from omp_init_lock_with_hint: 3263 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3264 if (!codeptr) 3265 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3266 if (ompt_enabled.enabled) && 3267 ompt_enabled.ompt_callback_mutex_acquire) { 3268 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3269 ompt_mutex_nest_lock, omp_lock_hint_none, 3270 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3271 codeptr); 3272 } 3273 #endif 3274 3275 rc = TEST_NESTED_LOCK(lck, gtid); 3276 #if USE_ITT_BUILD 3277 if (rc) { 3278 __kmp_itt_lock_acquired(lck); 3279 } else { 3280 __kmp_itt_lock_cancelled(lck); 3281 } 3282 #endif /* USE_ITT_BUILD */ 3283 #if OMPT_SUPPORT && OMPT_OPTIONAL 3284 if (ompt_enabled.enabled && rc) { 3285 if (rc == 1) { 3286 if (ompt_enabled.ompt_callback_mutex_acquired) { 3287 // lock_first 3288 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3289 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3290 } 3291 } else { 3292 if (ompt_enabled.ompt_callback_nest_lock) { 3293 // lock_next 3294 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3295 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3296 } 3297 } 3298 } 3299 #endif 3300 return rc; 3301 3302 /* Can't use serial interval since not block structured */ 3303 3304 #endif // KMP_USE_DYNAMIC_LOCK 3305 } 3306 3307 // Interface to fast scalable reduce methods routines 3308 3309 // keep the selected method in a thread local structure for cross-function 3310 // usage: will be used in __kmpc_end_reduce* functions; 3311 // another solution: to re-determine the method one more time in 3312 // __kmpc_end_reduce* functions (new prototype required then) 3313 // AT: which solution is better? 3314 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3315 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3316 3317 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3318 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3319 3320 // description of the packed_reduction_method variable: look at the macros in 3321 // kmp.h 3322 3323 // used in a critical section reduce block 3324 static __forceinline void 3325 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3326 kmp_critical_name *crit) { 3327 3328 // this lock was visible to a customer and to the threading profile tool as a 3329 // serial overhead span (although it's used for an internal purpose only) 3330 // why was it visible in previous implementation? 3331 // should we keep it visible in new reduce block? 3332 kmp_user_lock_p lck; 3333 3334 #if KMP_USE_DYNAMIC_LOCK 3335 3336 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3337 // Check if it is initialized. 3338 if (*lk == 0) { 3339 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3340 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3341 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3342 } else { 3343 __kmp_init_indirect_csptr(crit, loc, global_tid, 3344 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3345 } 3346 } 3347 // Branch for accessing the actual lock object and set operation. This 3348 // branching is inevitable since this lock initialization does not follow the 3349 // normal dispatch path (lock table is not used). 3350 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3351 lck = (kmp_user_lock_p)lk; 3352 KMP_DEBUG_ASSERT(lck != NULL); 3353 if (__kmp_env_consistency_check) { 3354 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3355 } 3356 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3357 } else { 3358 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3359 lck = ilk->lock; 3360 KMP_DEBUG_ASSERT(lck != NULL); 3361 if (__kmp_env_consistency_check) { 3362 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3363 } 3364 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3365 } 3366 3367 #else // KMP_USE_DYNAMIC_LOCK 3368 3369 // We know that the fast reduction code is only emitted by Intel compilers 3370 // with 32 byte critical sections. If there isn't enough space, then we 3371 // have to use a pointer. 3372 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3373 lck = (kmp_user_lock_p)crit; 3374 } else { 3375 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3376 } 3377 KMP_DEBUG_ASSERT(lck != NULL); 3378 3379 if (__kmp_env_consistency_check) 3380 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3381 3382 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3383 3384 #endif // KMP_USE_DYNAMIC_LOCK 3385 } 3386 3387 // used in a critical section reduce block 3388 static __forceinline void 3389 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3390 kmp_critical_name *crit) { 3391 3392 kmp_user_lock_p lck; 3393 3394 #if KMP_USE_DYNAMIC_LOCK 3395 3396 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3397 lck = (kmp_user_lock_p)crit; 3398 if (__kmp_env_consistency_check) 3399 __kmp_pop_sync(global_tid, ct_critical, loc); 3400 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3401 } else { 3402 kmp_indirect_lock_t *ilk = 3403 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3404 if (__kmp_env_consistency_check) 3405 __kmp_pop_sync(global_tid, ct_critical, loc); 3406 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3407 } 3408 3409 #else // KMP_USE_DYNAMIC_LOCK 3410 3411 // We know that the fast reduction code is only emitted by Intel compilers 3412 // with 32 byte critical sections. If there isn't enough space, then we have 3413 // to use a pointer. 3414 if (__kmp_base_user_lock_size > 32) { 3415 lck = *((kmp_user_lock_p *)crit); 3416 KMP_ASSERT(lck != NULL); 3417 } else { 3418 lck = (kmp_user_lock_p)crit; 3419 } 3420 3421 if (__kmp_env_consistency_check) 3422 __kmp_pop_sync(global_tid, ct_critical, loc); 3423 3424 __kmp_release_user_lock_with_checks(lck, global_tid); 3425 3426 #endif // KMP_USE_DYNAMIC_LOCK 3427 } // __kmp_end_critical_section_reduce_block 3428 3429 static __forceinline int 3430 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3431 int *task_state) { 3432 kmp_team_t *team; 3433 3434 // Check if we are inside the teams construct? 3435 if (th->th.th_teams_microtask) { 3436 *team_p = team = th->th.th_team; 3437 if (team->t.t_level == th->th.th_teams_level) { 3438 // This is reduction at teams construct. 3439 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3440 // Let's swap teams temporarily for the reduction. 3441 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3442 th->th.th_team = team->t.t_parent; 3443 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3444 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3445 *task_state = th->th.th_task_state; 3446 th->th.th_task_state = 0; 3447 3448 return 1; 3449 } 3450 } 3451 return 0; 3452 } 3453 3454 static __forceinline void 3455 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3456 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3457 th->th.th_info.ds.ds_tid = 0; 3458 th->th.th_team = team; 3459 th->th.th_team_nproc = team->t.t_nproc; 3460 th->th.th_task_team = team->t.t_task_team[task_state]; 3461 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3462 } 3463 3464 /* 2.a.i. Reduce Block without a terminating barrier */ 3465 /*! 3466 @ingroup SYNCHRONIZATION 3467 @param loc source location information 3468 @param global_tid global thread number 3469 @param num_vars number of items (variables) to be reduced 3470 @param reduce_size size of data in bytes to be reduced 3471 @param reduce_data pointer to data to be reduced 3472 @param reduce_func callback function providing reduction operation on two 3473 operands and returning result of reduction in lhs_data 3474 @param lck pointer to the unique lock data structure 3475 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3476 threads if atomic reduction needed 3477 3478 The nowait version is used for a reduce clause with the nowait argument. 3479 */ 3480 kmp_int32 3481 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3482 size_t reduce_size, void *reduce_data, 3483 void (*reduce_func)(void *lhs_data, void *rhs_data), 3484 kmp_critical_name *lck) { 3485 3486 KMP_COUNT_BLOCK(REDUCE_nowait); 3487 int retval = 0; 3488 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3489 kmp_info_t *th; 3490 kmp_team_t *team; 3491 int teams_swapped = 0, task_state; 3492 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3493 __kmp_assert_valid_gtid(global_tid); 3494 3495 // why do we need this initialization here at all? 3496 // Reduction clause can not be used as a stand-alone directive. 3497 3498 // do not call __kmp_serial_initialize(), it will be called by 3499 // __kmp_parallel_initialize() if needed 3500 // possible detection of false-positive race by the threadchecker ??? 3501 if (!TCR_4(__kmp_init_parallel)) 3502 __kmp_parallel_initialize(); 3503 3504 __kmp_resume_if_soft_paused(); 3505 3506 // check correctness of reduce block nesting 3507 #if KMP_USE_DYNAMIC_LOCK 3508 if (__kmp_env_consistency_check) 3509 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3510 #else 3511 if (__kmp_env_consistency_check) 3512 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3513 #endif 3514 3515 th = __kmp_thread_from_gtid(global_tid); 3516 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3517 3518 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3519 // the value should be kept in a variable 3520 // the variable should be either a construct-specific or thread-specific 3521 // property, not a team specific property 3522 // (a thread can reach the next reduce block on the next construct, reduce 3523 // method may differ on the next construct) 3524 // an ident_t "loc" parameter could be used as a construct-specific property 3525 // (what if loc == 0?) 3526 // (if both construct-specific and team-specific variables were shared, 3527 // then unness extra syncs should be needed) 3528 // a thread-specific variable is better regarding two issues above (next 3529 // construct and extra syncs) 3530 // a thread-specific "th_local.reduction_method" variable is used currently 3531 // each thread executes 'determine' and 'set' lines (no need to execute by one 3532 // thread, to avoid unness extra syncs) 3533 3534 packed_reduction_method = __kmp_determine_reduction_method( 3535 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3536 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3537 3538 OMPT_REDUCTION_DECL(th, global_tid); 3539 if (packed_reduction_method == critical_reduce_block) { 3540 3541 OMPT_REDUCTION_BEGIN; 3542 3543 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3544 retval = 1; 3545 3546 } else if (packed_reduction_method == empty_reduce_block) { 3547 3548 OMPT_REDUCTION_BEGIN; 3549 3550 // usage: if team size == 1, no synchronization is required ( Intel 3551 // platforms only ) 3552 retval = 1; 3553 3554 } else if (packed_reduction_method == atomic_reduce_block) { 3555 3556 retval = 2; 3557 3558 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3559 // won't be called by the code gen) 3560 // (it's not quite good, because the checking block has been closed by 3561 // this 'pop', 3562 // but atomic operation has not been executed yet, will be executed 3563 // slightly later, literally on next instruction) 3564 if (__kmp_env_consistency_check) 3565 __kmp_pop_sync(global_tid, ct_reduce, loc); 3566 3567 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3568 tree_reduce_block)) { 3569 3570 // AT: performance issue: a real barrier here 3571 // AT: (if primary thread is slow, other threads are blocked here waiting for 3572 // the primary thread to come and release them) 3573 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3574 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3575 // be confusing to a customer) 3576 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3577 // might go faster and be more in line with sense of NOWAIT 3578 // AT: TO DO: do epcc test and compare times 3579 3580 // this barrier should be invisible to a customer and to the threading profile 3581 // tool (it's neither a terminating barrier nor customer's code, it's 3582 // used for an internal purpose) 3583 #if OMPT_SUPPORT 3584 // JP: can this barrier potentially leed to task scheduling? 3585 // JP: as long as there is a barrier in the implementation, OMPT should and 3586 // will provide the barrier events 3587 // so we set-up the necessary frame/return addresses. 3588 ompt_frame_t *ompt_frame; 3589 if (ompt_enabled.enabled) { 3590 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3591 if (ompt_frame->enter_frame.ptr == NULL) 3592 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3593 } 3594 OMPT_STORE_RETURN_ADDRESS(global_tid); 3595 #endif 3596 #if USE_ITT_NOTIFY 3597 __kmp_threads[global_tid]->th.th_ident = loc; 3598 #endif 3599 retval = 3600 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3601 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3602 retval = (retval != 0) ? (0) : (1); 3603 #if OMPT_SUPPORT && OMPT_OPTIONAL 3604 if (ompt_enabled.enabled) { 3605 ompt_frame->enter_frame = ompt_data_none; 3606 } 3607 #endif 3608 3609 // all other workers except primary thread should do this pop here 3610 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3611 if (__kmp_env_consistency_check) { 3612 if (retval == 0) { 3613 __kmp_pop_sync(global_tid, ct_reduce, loc); 3614 } 3615 } 3616 3617 } else { 3618 3619 // should never reach this block 3620 KMP_ASSERT(0); // "unexpected method" 3621 } 3622 if (teams_swapped) { 3623 __kmp_restore_swapped_teams(th, team, task_state); 3624 } 3625 KA_TRACE( 3626 10, 3627 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3628 global_tid, packed_reduction_method, retval)); 3629 3630 return retval; 3631 } 3632 3633 /*! 3634 @ingroup SYNCHRONIZATION 3635 @param loc source location information 3636 @param global_tid global thread id. 3637 @param lck pointer to the unique lock data structure 3638 3639 Finish the execution of a reduce nowait. 3640 */ 3641 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3642 kmp_critical_name *lck) { 3643 3644 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3645 3646 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3647 __kmp_assert_valid_gtid(global_tid); 3648 3649 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3650 3651 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3652 3653 if (packed_reduction_method == critical_reduce_block) { 3654 3655 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3656 OMPT_REDUCTION_END; 3657 3658 } else if (packed_reduction_method == empty_reduce_block) { 3659 3660 // usage: if team size == 1, no synchronization is required ( on Intel 3661 // platforms only ) 3662 3663 OMPT_REDUCTION_END; 3664 3665 } else if (packed_reduction_method == atomic_reduce_block) { 3666 3667 // neither primary thread nor other workers should get here 3668 // (code gen does not generate this call in case 2: atomic reduce block) 3669 // actually it's better to remove this elseif at all; 3670 // after removal this value will checked by the 'else' and will assert 3671 3672 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3673 tree_reduce_block)) { 3674 3675 // only primary thread gets here 3676 // OMPT: tree reduction is annotated in the barrier code 3677 3678 } else { 3679 3680 // should never reach this block 3681 KMP_ASSERT(0); // "unexpected method" 3682 } 3683 3684 if (__kmp_env_consistency_check) 3685 __kmp_pop_sync(global_tid, ct_reduce, loc); 3686 3687 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3688 global_tid, packed_reduction_method)); 3689 3690 return; 3691 } 3692 3693 /* 2.a.ii. Reduce Block with a terminating barrier */ 3694 3695 /*! 3696 @ingroup SYNCHRONIZATION 3697 @param loc source location information 3698 @param global_tid global thread number 3699 @param num_vars number of items (variables) to be reduced 3700 @param reduce_size size of data in bytes to be reduced 3701 @param reduce_data pointer to data to be reduced 3702 @param reduce_func callback function providing reduction operation on two 3703 operands and returning result of reduction in lhs_data 3704 @param lck pointer to the unique lock data structure 3705 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3706 threads if atomic reduction needed 3707 3708 A blocking reduce that includes an implicit barrier. 3709 */ 3710 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3711 size_t reduce_size, void *reduce_data, 3712 void (*reduce_func)(void *lhs_data, void *rhs_data), 3713 kmp_critical_name *lck) { 3714 KMP_COUNT_BLOCK(REDUCE_wait); 3715 int retval = 0; 3716 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3717 kmp_info_t *th; 3718 kmp_team_t *team; 3719 int teams_swapped = 0, task_state; 3720 3721 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3722 __kmp_assert_valid_gtid(global_tid); 3723 3724 // why do we need this initialization here at all? 3725 // Reduction clause can not be a stand-alone directive. 3726 3727 // do not call __kmp_serial_initialize(), it will be called by 3728 // __kmp_parallel_initialize() if needed 3729 // possible detection of false-positive race by the threadchecker ??? 3730 if (!TCR_4(__kmp_init_parallel)) 3731 __kmp_parallel_initialize(); 3732 3733 __kmp_resume_if_soft_paused(); 3734 3735 // check correctness of reduce block nesting 3736 #if KMP_USE_DYNAMIC_LOCK 3737 if (__kmp_env_consistency_check) 3738 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3739 #else 3740 if (__kmp_env_consistency_check) 3741 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3742 #endif 3743 3744 th = __kmp_thread_from_gtid(global_tid); 3745 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3746 3747 packed_reduction_method = __kmp_determine_reduction_method( 3748 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3749 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3750 3751 OMPT_REDUCTION_DECL(th, global_tid); 3752 3753 if (packed_reduction_method == critical_reduce_block) { 3754 3755 OMPT_REDUCTION_BEGIN; 3756 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3757 retval = 1; 3758 3759 } else if (packed_reduction_method == empty_reduce_block) { 3760 3761 OMPT_REDUCTION_BEGIN; 3762 // usage: if team size == 1, no synchronization is required ( Intel 3763 // platforms only ) 3764 retval = 1; 3765 3766 } else if (packed_reduction_method == atomic_reduce_block) { 3767 3768 retval = 2; 3769 3770 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3771 tree_reduce_block)) { 3772 3773 // case tree_reduce_block: 3774 // this barrier should be visible to a customer and to the threading profile 3775 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3776 #if OMPT_SUPPORT 3777 ompt_frame_t *ompt_frame; 3778 if (ompt_enabled.enabled) { 3779 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3780 if (ompt_frame->enter_frame.ptr == NULL) 3781 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3782 } 3783 OMPT_STORE_RETURN_ADDRESS(global_tid); 3784 #endif 3785 #if USE_ITT_NOTIFY 3786 __kmp_threads[global_tid]->th.th_ident = 3787 loc; // needed for correct notification of frames 3788 #endif 3789 retval = 3790 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3791 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3792 retval = (retval != 0) ? (0) : (1); 3793 #if OMPT_SUPPORT && OMPT_OPTIONAL 3794 if (ompt_enabled.enabled) { 3795 ompt_frame->enter_frame = ompt_data_none; 3796 } 3797 #endif 3798 3799 // all other workers except primary thread should do this pop here 3800 // (none of other workers except primary will enter __kmpc_end_reduce()) 3801 if (__kmp_env_consistency_check) { 3802 if (retval == 0) { // 0: all other workers; 1: primary thread 3803 __kmp_pop_sync(global_tid, ct_reduce, loc); 3804 } 3805 } 3806 3807 } else { 3808 3809 // should never reach this block 3810 KMP_ASSERT(0); // "unexpected method" 3811 } 3812 if (teams_swapped) { 3813 __kmp_restore_swapped_teams(th, team, task_state); 3814 } 3815 3816 KA_TRACE(10, 3817 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3818 global_tid, packed_reduction_method, retval)); 3819 return retval; 3820 } 3821 3822 /*! 3823 @ingroup SYNCHRONIZATION 3824 @param loc source location information 3825 @param global_tid global thread id. 3826 @param lck pointer to the unique lock data structure 3827 3828 Finish the execution of a blocking reduce. 3829 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3830 start function. 3831 */ 3832 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3833 kmp_critical_name *lck) { 3834 3835 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3836 kmp_info_t *th; 3837 kmp_team_t *team; 3838 int teams_swapped = 0, task_state; 3839 3840 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3841 __kmp_assert_valid_gtid(global_tid); 3842 3843 th = __kmp_thread_from_gtid(global_tid); 3844 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3845 3846 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3847 3848 // this barrier should be visible to a customer and to the threading profile 3849 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3850 OMPT_REDUCTION_DECL(th, global_tid); 3851 3852 if (packed_reduction_method == critical_reduce_block) { 3853 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3854 3855 OMPT_REDUCTION_END; 3856 3857 // TODO: implicit barrier: should be exposed 3858 #if OMPT_SUPPORT 3859 ompt_frame_t *ompt_frame; 3860 if (ompt_enabled.enabled) { 3861 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3862 if (ompt_frame->enter_frame.ptr == NULL) 3863 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3864 } 3865 OMPT_STORE_RETURN_ADDRESS(global_tid); 3866 #endif 3867 #if USE_ITT_NOTIFY 3868 __kmp_threads[global_tid]->th.th_ident = loc; 3869 #endif 3870 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3871 #if OMPT_SUPPORT && OMPT_OPTIONAL 3872 if (ompt_enabled.enabled) { 3873 ompt_frame->enter_frame = ompt_data_none; 3874 } 3875 #endif 3876 3877 } else if (packed_reduction_method == empty_reduce_block) { 3878 3879 OMPT_REDUCTION_END; 3880 3881 // usage: if team size==1, no synchronization is required (Intel platforms only) 3882 3883 // TODO: implicit barrier: should be exposed 3884 #if OMPT_SUPPORT 3885 ompt_frame_t *ompt_frame; 3886 if (ompt_enabled.enabled) { 3887 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3888 if (ompt_frame->enter_frame.ptr == NULL) 3889 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3890 } 3891 OMPT_STORE_RETURN_ADDRESS(global_tid); 3892 #endif 3893 #if USE_ITT_NOTIFY 3894 __kmp_threads[global_tid]->th.th_ident = loc; 3895 #endif 3896 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3897 #if OMPT_SUPPORT && OMPT_OPTIONAL 3898 if (ompt_enabled.enabled) { 3899 ompt_frame->enter_frame = ompt_data_none; 3900 } 3901 #endif 3902 3903 } else if (packed_reduction_method == atomic_reduce_block) { 3904 3905 #if OMPT_SUPPORT 3906 ompt_frame_t *ompt_frame; 3907 if (ompt_enabled.enabled) { 3908 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3909 if (ompt_frame->enter_frame.ptr == NULL) 3910 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3911 } 3912 OMPT_STORE_RETURN_ADDRESS(global_tid); 3913 #endif 3914 // TODO: implicit barrier: should be exposed 3915 #if USE_ITT_NOTIFY 3916 __kmp_threads[global_tid]->th.th_ident = loc; 3917 #endif 3918 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3919 #if OMPT_SUPPORT && OMPT_OPTIONAL 3920 if (ompt_enabled.enabled) { 3921 ompt_frame->enter_frame = ompt_data_none; 3922 } 3923 #endif 3924 3925 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3926 tree_reduce_block)) { 3927 3928 // only primary thread executes here (primary releases all other workers) 3929 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3930 global_tid); 3931 3932 } else { 3933 3934 // should never reach this block 3935 KMP_ASSERT(0); // "unexpected method" 3936 } 3937 if (teams_swapped) { 3938 __kmp_restore_swapped_teams(th, team, task_state); 3939 } 3940 3941 if (__kmp_env_consistency_check) 3942 __kmp_pop_sync(global_tid, ct_reduce, loc); 3943 3944 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3945 global_tid, packed_reduction_method)); 3946 3947 return; 3948 } 3949 3950 #undef __KMP_GET_REDUCTION_METHOD 3951 #undef __KMP_SET_REDUCTION_METHOD 3952 3953 /* end of interface to fast scalable reduce routines */ 3954 3955 kmp_uint64 __kmpc_get_taskid() { 3956 3957 kmp_int32 gtid; 3958 kmp_info_t *thread; 3959 3960 gtid = __kmp_get_gtid(); 3961 if (gtid < 0) { 3962 return 0; 3963 } 3964 thread = __kmp_thread_from_gtid(gtid); 3965 return thread->th.th_current_task->td_task_id; 3966 3967 } // __kmpc_get_taskid 3968 3969 kmp_uint64 __kmpc_get_parent_taskid() { 3970 3971 kmp_int32 gtid; 3972 kmp_info_t *thread; 3973 kmp_taskdata_t *parent_task; 3974 3975 gtid = __kmp_get_gtid(); 3976 if (gtid < 0) { 3977 return 0; 3978 } 3979 thread = __kmp_thread_from_gtid(gtid); 3980 parent_task = thread->th.th_current_task->td_parent; 3981 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3982 3983 } // __kmpc_get_parent_taskid 3984 3985 /*! 3986 @ingroup WORK_SHARING 3987 @param loc source location information. 3988 @param gtid global thread number. 3989 @param num_dims number of associated doacross loops. 3990 @param dims info on loops bounds. 3991 3992 Initialize doacross loop information. 3993 Expect compiler send us inclusive bounds, 3994 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3995 */ 3996 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3997 const struct kmp_dim *dims) { 3998 __kmp_assert_valid_gtid(gtid); 3999 int j, idx; 4000 kmp_int64 last, trace_count; 4001 kmp_info_t *th = __kmp_threads[gtid]; 4002 kmp_team_t *team = th->th.th_team; 4003 kmp_uint32 *flags; 4004 kmp_disp_t *pr_buf = th->th.th_dispatch; 4005 dispatch_shared_info_t *sh_buf; 4006 4007 KA_TRACE( 4008 20, 4009 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4010 gtid, num_dims, !team->t.t_serialized)); 4011 KMP_DEBUG_ASSERT(dims != NULL); 4012 KMP_DEBUG_ASSERT(num_dims > 0); 4013 4014 if (team->t.t_serialized) { 4015 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4016 return; // no dependencies if team is serialized 4017 } 4018 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4019 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4020 // the next loop 4021 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4022 4023 // Save bounds info into allocated private buffer 4024 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4025 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4026 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4027 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4028 pr_buf->th_doacross_info[0] = 4029 (kmp_int64)num_dims; // first element is number of dimensions 4030 // Save also address of num_done in order to access it later without knowing 4031 // the buffer index 4032 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4033 pr_buf->th_doacross_info[2] = dims[0].lo; 4034 pr_buf->th_doacross_info[3] = dims[0].up; 4035 pr_buf->th_doacross_info[4] = dims[0].st; 4036 last = 5; 4037 for (j = 1; j < num_dims; ++j) { 4038 kmp_int64 4039 range_length; // To keep ranges of all dimensions but the first dims[0] 4040 if (dims[j].st == 1) { // most common case 4041 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4042 range_length = dims[j].up - dims[j].lo + 1; 4043 } else { 4044 if (dims[j].st > 0) { 4045 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4046 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4047 } else { // negative increment 4048 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4049 range_length = 4050 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4051 } 4052 } 4053 pr_buf->th_doacross_info[last++] = range_length; 4054 pr_buf->th_doacross_info[last++] = dims[j].lo; 4055 pr_buf->th_doacross_info[last++] = dims[j].up; 4056 pr_buf->th_doacross_info[last++] = dims[j].st; 4057 } 4058 4059 // Compute total trip count. 4060 // Start with range of dims[0] which we don't need to keep in the buffer. 4061 if (dims[0].st == 1) { // most common case 4062 trace_count = dims[0].up - dims[0].lo + 1; 4063 } else if (dims[0].st > 0) { 4064 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4065 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4066 } else { // negative increment 4067 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4068 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4069 } 4070 for (j = 1; j < num_dims; ++j) { 4071 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4072 } 4073 KMP_DEBUG_ASSERT(trace_count > 0); 4074 4075 // Check if shared buffer is not occupied by other loop (idx - 4076 // __kmp_dispatch_num_buffers) 4077 if (idx != sh_buf->doacross_buf_idx) { 4078 // Shared buffer is occupied, wait for it to be free 4079 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4080 __kmp_eq_4, NULL); 4081 } 4082 #if KMP_32_BIT_ARCH 4083 // Check if we are the first thread. After the CAS the first thread gets 0, 4084 // others get 1 if initialization is in progress, allocated pointer otherwise. 4085 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4086 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4087 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4088 #else 4089 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4090 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4091 #endif 4092 if (flags == NULL) { 4093 // we are the first thread, allocate the array of flags 4094 size_t size = 4095 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4096 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4097 KMP_MB(); 4098 sh_buf->doacross_flags = flags; 4099 } else if (flags == (kmp_uint32 *)1) { 4100 #if KMP_32_BIT_ARCH 4101 // initialization is still in progress, need to wait 4102 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4103 #else 4104 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4105 #endif 4106 KMP_YIELD(TRUE); 4107 KMP_MB(); 4108 } else { 4109 KMP_MB(); 4110 } 4111 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4112 pr_buf->th_doacross_flags = 4113 sh_buf->doacross_flags; // save private copy in order to not 4114 // touch shared buffer on each iteration 4115 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4116 } 4117 4118 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4119 __kmp_assert_valid_gtid(gtid); 4120 kmp_int64 shft; 4121 size_t num_dims, i; 4122 kmp_uint32 flag; 4123 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4124 kmp_info_t *th = __kmp_threads[gtid]; 4125 kmp_team_t *team = th->th.th_team; 4126 kmp_disp_t *pr_buf; 4127 kmp_int64 lo, up, st; 4128 4129 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4130 if (team->t.t_serialized) { 4131 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4132 return; // no dependencies if team is serialized 4133 } 4134 4135 // calculate sequential iteration number and check out-of-bounds condition 4136 pr_buf = th->th.th_dispatch; 4137 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4138 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4139 lo = pr_buf->th_doacross_info[2]; 4140 up = pr_buf->th_doacross_info[3]; 4141 st = pr_buf->th_doacross_info[4]; 4142 #if OMPT_SUPPORT && OMPT_OPTIONAL 4143 ompt_dependence_t deps[num_dims]; 4144 #endif 4145 if (st == 1) { // most common case 4146 if (vec[0] < lo || vec[0] > up) { 4147 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4148 "bounds [%lld,%lld]\n", 4149 gtid, vec[0], lo, up)); 4150 return; 4151 } 4152 iter_number = vec[0] - lo; 4153 } else if (st > 0) { 4154 if (vec[0] < lo || vec[0] > up) { 4155 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4156 "bounds [%lld,%lld]\n", 4157 gtid, vec[0], lo, up)); 4158 return; 4159 } 4160 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4161 } else { // negative increment 4162 if (vec[0] > lo || vec[0] < up) { 4163 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4164 "bounds [%lld,%lld]\n", 4165 gtid, vec[0], lo, up)); 4166 return; 4167 } 4168 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4169 } 4170 #if OMPT_SUPPORT && OMPT_OPTIONAL 4171 deps[0].variable.value = iter_number; 4172 deps[0].dependence_type = ompt_dependence_type_sink; 4173 #endif 4174 for (i = 1; i < num_dims; ++i) { 4175 kmp_int64 iter, ln; 4176 size_t j = i * 4; 4177 ln = pr_buf->th_doacross_info[j + 1]; 4178 lo = pr_buf->th_doacross_info[j + 2]; 4179 up = pr_buf->th_doacross_info[j + 3]; 4180 st = pr_buf->th_doacross_info[j + 4]; 4181 if (st == 1) { 4182 if (vec[i] < lo || vec[i] > up) { 4183 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4184 "bounds [%lld,%lld]\n", 4185 gtid, vec[i], lo, up)); 4186 return; 4187 } 4188 iter = vec[i] - lo; 4189 } else if (st > 0) { 4190 if (vec[i] < lo || vec[i] > up) { 4191 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4192 "bounds [%lld,%lld]\n", 4193 gtid, vec[i], lo, up)); 4194 return; 4195 } 4196 iter = (kmp_uint64)(vec[i] - lo) / st; 4197 } else { // st < 0 4198 if (vec[i] > lo || vec[i] < up) { 4199 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4200 "bounds [%lld,%lld]\n", 4201 gtid, vec[i], lo, up)); 4202 return; 4203 } 4204 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4205 } 4206 iter_number = iter + ln * iter_number; 4207 #if OMPT_SUPPORT && OMPT_OPTIONAL 4208 deps[i].variable.value = iter; 4209 deps[i].dependence_type = ompt_dependence_type_sink; 4210 #endif 4211 } 4212 shft = iter_number % 32; // use 32-bit granularity 4213 iter_number >>= 5; // divided by 32 4214 flag = 1 << shft; 4215 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4216 KMP_YIELD(TRUE); 4217 } 4218 KMP_MB(); 4219 #if OMPT_SUPPORT && OMPT_OPTIONAL 4220 if (ompt_enabled.ompt_callback_dependences) { 4221 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4222 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4223 } 4224 #endif 4225 KA_TRACE(20, 4226 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4227 gtid, (iter_number << 5) + shft)); 4228 } 4229 4230 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4231 __kmp_assert_valid_gtid(gtid); 4232 kmp_int64 shft; 4233 size_t num_dims, i; 4234 kmp_uint32 flag; 4235 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4236 kmp_info_t *th = __kmp_threads[gtid]; 4237 kmp_team_t *team = th->th.th_team; 4238 kmp_disp_t *pr_buf; 4239 kmp_int64 lo, st; 4240 4241 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4242 if (team->t.t_serialized) { 4243 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4244 return; // no dependencies if team is serialized 4245 } 4246 4247 // calculate sequential iteration number (same as in "wait" but no 4248 // out-of-bounds checks) 4249 pr_buf = th->th.th_dispatch; 4250 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4251 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4252 lo = pr_buf->th_doacross_info[2]; 4253 st = pr_buf->th_doacross_info[4]; 4254 #if OMPT_SUPPORT && OMPT_OPTIONAL 4255 ompt_dependence_t deps[num_dims]; 4256 #endif 4257 if (st == 1) { // most common case 4258 iter_number = vec[0] - lo; 4259 } else if (st > 0) { 4260 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4261 } else { // negative increment 4262 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4263 } 4264 #if OMPT_SUPPORT && OMPT_OPTIONAL 4265 deps[0].variable.value = iter_number; 4266 deps[0].dependence_type = ompt_dependence_type_source; 4267 #endif 4268 for (i = 1; i < num_dims; ++i) { 4269 kmp_int64 iter, ln; 4270 size_t j = i * 4; 4271 ln = pr_buf->th_doacross_info[j + 1]; 4272 lo = pr_buf->th_doacross_info[j + 2]; 4273 st = pr_buf->th_doacross_info[j + 4]; 4274 if (st == 1) { 4275 iter = vec[i] - lo; 4276 } else if (st > 0) { 4277 iter = (kmp_uint64)(vec[i] - lo) / st; 4278 } else { // st < 0 4279 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4280 } 4281 iter_number = iter + ln * iter_number; 4282 #if OMPT_SUPPORT && OMPT_OPTIONAL 4283 deps[i].variable.value = iter; 4284 deps[i].dependence_type = ompt_dependence_type_source; 4285 #endif 4286 } 4287 #if OMPT_SUPPORT && OMPT_OPTIONAL 4288 if (ompt_enabled.ompt_callback_dependences) { 4289 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4290 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4291 } 4292 #endif 4293 shft = iter_number % 32; // use 32-bit granularity 4294 iter_number >>= 5; // divided by 32 4295 flag = 1 << shft; 4296 KMP_MB(); 4297 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4298 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4299 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4300 (iter_number << 5) + shft)); 4301 } 4302 4303 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4304 __kmp_assert_valid_gtid(gtid); 4305 kmp_int32 num_done; 4306 kmp_info_t *th = __kmp_threads[gtid]; 4307 kmp_team_t *team = th->th.th_team; 4308 kmp_disp_t *pr_buf = th->th.th_dispatch; 4309 4310 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4311 if (team->t.t_serialized) { 4312 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4313 return; // nothing to do 4314 } 4315 num_done = 4316 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4317 if (num_done == th->th.th_team_nproc) { 4318 // we are the last thread, need to free shared resources 4319 int idx = pr_buf->th_doacross_buf_idx - 1; 4320 dispatch_shared_info_t *sh_buf = 4321 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4322 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4323 (kmp_int64)&sh_buf->doacross_num_done); 4324 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4325 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4326 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4327 sh_buf->doacross_flags = NULL; 4328 sh_buf->doacross_num_done = 0; 4329 sh_buf->doacross_buf_idx += 4330 __kmp_dispatch_num_buffers; // free buffer for future re-use 4331 } 4332 // free private resources (need to keep buffer index forever) 4333 pr_buf->th_doacross_flags = NULL; 4334 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4335 pr_buf->th_doacross_info = NULL; 4336 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4337 } 4338 4339 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4340 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4341 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4342 } 4343 4344 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4345 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4346 } 4347 4348 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4349 omp_allocator_handle_t free_allocator) { 4350 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4351 free_allocator); 4352 } 4353 4354 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4355 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4356 } 4357 4358 int __kmpc_get_target_offload(void) { 4359 if (!__kmp_init_serial) { 4360 __kmp_serial_initialize(); 4361 } 4362 return __kmp_target_offload; 4363 } 4364 4365 int __kmpc_pause_resource(kmp_pause_status_t level) { 4366 if (!__kmp_init_serial) { 4367 return 1; // Can't pause if runtime is not initialized 4368 } 4369 return __kmp_pause_resource(level); 4370 } 4371 4372 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4373 if (!__kmp_init_serial) 4374 __kmp_serial_initialize(); 4375 4376 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4377 4378 #if OMPT_SUPPORT 4379 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4380 ompt_callbacks.ompt_callback(ompt_callback_error)( 4381 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4382 OMPT_GET_RETURN_ADDRESS(0)); 4383 } 4384 #endif // OMPT_SUPPORT 4385 4386 char *src_loc; 4387 if (loc && loc->psource) { 4388 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4389 src_loc = 4390 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col); 4391 __kmp_str_loc_free(&str_loc); 4392 } else { 4393 src_loc = __kmp_str_format("unknown"); 4394 } 4395 4396 if (severity == severity_warning) 4397 KMP_WARNING(UserDirectedWarning, src_loc, message); 4398 else 4399 KMP_FATAL(UserDirectedError, src_loc, message); 4400 4401 __kmp_str_free(&src_loc); 4402 } 4403 4404 #ifdef KMP_USE_VERSION_SYMBOLS 4405 // For GOMP compatibility there are two versions of each omp_* API. 4406 // One is the plain C symbol and one is the Fortran symbol with an appended 4407 // underscore. When we implement a specific ompc_* version of an omp_* 4408 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4409 // instead of the Fortran versions in kmp_ftn_entry.h 4410 extern "C" { 4411 // Have to undef these from omp.h so they aren't translated into 4412 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4413 #ifdef omp_set_affinity_format 4414 #undef omp_set_affinity_format 4415 #endif 4416 #ifdef omp_get_affinity_format 4417 #undef omp_get_affinity_format 4418 #endif 4419 #ifdef omp_display_affinity 4420 #undef omp_display_affinity 4421 #endif 4422 #ifdef omp_capture_affinity 4423 #undef omp_capture_affinity 4424 #endif 4425 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4426 "OMP_5.0"); 4427 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4428 "OMP_5.0"); 4429 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4430 "OMP_5.0"); 4431 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4432 "OMP_5.0"); 4433 } // extern "C" 4434 #endif 4435