1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param global_tid global thread number 358 @param num_teams_lo lower bound on number of teams requested for the teams 359 construct 360 @param num_teams_up upper bound on number of teams requested for the teams 361 construct 362 @param num_threads number of threads per team requested for the teams construct 363 364 Set the number of teams to be used by the teams construct. The number of initial 365 teams cretaed will be greater than or equal to the lower bound and less than or 366 equal to the upper bound. 367 This call is only required if the teams construct has a `num_teams` clause 368 or a `thread_limit` clause (or both). 369 */ 370 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 371 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 372 kmp_int32 num_threads) { 373 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 374 " num_teams_ub=%d num_threads=%d\n", 375 global_tid, num_teams_lb, num_teams_ub, num_threads)); 376 __kmp_assert_valid_gtid(global_tid); 377 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 378 num_threads); 379 } 380 381 /*! 382 @ingroup PARALLEL 383 @param loc source location information 384 @param argc total number of arguments in the ellipsis 385 @param microtask pointer to callback routine consisting of outlined teams 386 construct 387 @param ... pointers to shared variables that aren't global 388 389 Do the actual fork and call the microtask in the relevant number of threads. 390 */ 391 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 392 ...) { 393 int gtid = __kmp_entry_gtid(); 394 kmp_info_t *this_thr = __kmp_threads[gtid]; 395 va_list ap; 396 va_start(ap, microtask); 397 398 #if KMP_STATS_ENABLED 399 KMP_COUNT_BLOCK(OMP_TEAMS); 400 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 401 if (previous_state == stats_state_e::SERIAL_REGION) { 402 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 403 } else { 404 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 405 } 406 #endif 407 408 // remember teams entry point and nesting level 409 this_thr->th.th_teams_microtask = microtask; 410 this_thr->th.th_teams_level = 411 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 412 413 #if OMPT_SUPPORT 414 kmp_team_t *parent_team = this_thr->th.th_team; 415 int tid = __kmp_tid_from_gtid(gtid); 416 if (ompt_enabled.enabled) { 417 parent_team->t.t_implicit_task_taskdata[tid] 418 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 419 } 420 OMPT_STORE_RETURN_ADDRESS(gtid); 421 #endif 422 423 // check if __kmpc_push_num_teams called, set default number of teams 424 // otherwise 425 if (this_thr->th.th_teams_size.nteams == 0) { 426 __kmp_push_num_teams(loc, gtid, 0, 0); 427 } 428 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 431 432 __kmp_fork_call( 433 loc, gtid, fork_context_intel, argc, 434 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 435 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 436 __kmp_join_call(loc, gtid 437 #if OMPT_SUPPORT 438 , 439 fork_context_intel 440 #endif 441 ); 442 443 // Pop current CG root off list 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 446 this_thr->th.th_cg_roots = tmp->up; 447 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 448 " to node %p. cg_nthreads was %d\n", 449 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 450 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 451 int i = tmp->cg_nthreads--; 452 if (i == 1) { // check is we are the last thread in CG (not always the case) 453 __kmp_free(tmp); 454 } 455 // Restore current task's thread_limit from CG root 456 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 457 this_thr->th.th_current_task->td_icvs.thread_limit = 458 this_thr->th.th_cg_roots->cg_thread_limit; 459 460 this_thr->th.th_teams_microtask = NULL; 461 this_thr->th.th_teams_level = 0; 462 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 463 va_end(ap); 464 #if KMP_STATS_ENABLED 465 if (previous_state == stats_state_e::SERIAL_REGION) { 466 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 467 KMP_SET_THREAD_STATE(previous_state); 468 } else { 469 KMP_POP_PARTITIONED_TIMER(); 470 } 471 #endif // KMP_STATS_ENABLED 472 } 473 474 // I don't think this function should ever have been exported. 475 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 476 // openmp code ever called it, but it's been exported from the RTL for so 477 // long that I'm afraid to remove the definition. 478 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 479 480 /*! 481 @ingroup PARALLEL 482 @param loc source location information 483 @param global_tid global thread number 484 485 Enter a serialized parallel construct. This interface is used to handle a 486 conditional parallel region, like this, 487 @code 488 #pragma omp parallel if (condition) 489 @endcode 490 when the condition is false. 491 */ 492 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 493 // The implementation is now in kmp_runtime.cpp so that it can share static 494 // functions with kmp_fork_call since the tasks to be done are similar in 495 // each case. 496 __kmp_assert_valid_gtid(global_tid); 497 #if OMPT_SUPPORT 498 OMPT_STORE_RETURN_ADDRESS(global_tid); 499 #endif 500 __kmp_serialized_parallel(loc, global_tid); 501 } 502 503 /*! 504 @ingroup PARALLEL 505 @param loc source location information 506 @param global_tid global thread number 507 508 Leave a serialized parallel construct. 509 */ 510 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 511 kmp_internal_control_t *top; 512 kmp_info_t *this_thr; 513 kmp_team_t *serial_team; 514 515 KC_TRACE(10, 516 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 517 518 /* skip all this code for autopar serialized loops since it results in 519 unacceptable overhead */ 520 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 521 return; 522 523 // Not autopar code 524 __kmp_assert_valid_gtid(global_tid); 525 if (!TCR_4(__kmp_init_parallel)) 526 __kmp_parallel_initialize(); 527 528 __kmp_resume_if_soft_paused(); 529 530 this_thr = __kmp_threads[global_tid]; 531 serial_team = this_thr->th.th_serial_team; 532 533 kmp_task_team_t *task_team = this_thr->th.th_task_team; 534 // we need to wait for the proxy tasks before finishing the thread 535 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 537 538 KMP_MB(); 539 KMP_DEBUG_ASSERT(serial_team); 540 KMP_ASSERT(serial_team->t.t_serialized); 541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 543 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 545 546 #if OMPT_SUPPORT 547 if (ompt_enabled.enabled && 548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 550 if (ompt_enabled.ompt_callback_implicit_task) { 551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 554 } 555 556 // reset clear the task id only after unlinking the task 557 ompt_data_t *parent_task_data; 558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 559 560 if (ompt_enabled.ompt_callback_parallel_end) { 561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 563 ompt_parallel_invoker_program | ompt_parallel_team, 564 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 565 } 566 __ompt_lw_taskteam_unlink(this_thr); 567 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 568 } 569 #endif 570 571 /* If necessary, pop the internal control stack values and replace the team 572 * values */ 573 top = serial_team->t.t_control_stack_top; 574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 576 serial_team->t.t_control_stack_top = top->next; 577 __kmp_free(top); 578 } 579 580 // if( serial_team -> t.t_serialized > 1 ) 581 serial_team->t.t_level--; 582 583 /* pop dispatch buffers stack */ 584 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 585 { 586 dispatch_private_info_t *disp_buffer = 587 serial_team->t.t_dispatch->th_disp_buffer; 588 serial_team->t.t_dispatch->th_disp_buffer = 589 serial_team->t.t_dispatch->th_disp_buffer->next; 590 __kmp_free(disp_buffer); 591 } 592 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 593 594 --serial_team->t.t_serialized; 595 if (serial_team->t.t_serialized == 0) { 596 597 /* return to the parallel section */ 598 599 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 600 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 601 __kmp_clear_x87_fpu_status_word(); 602 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 603 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 604 } 605 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 606 607 this_thr->th.th_team = serial_team->t.t_parent; 608 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 609 610 /* restore values cached in the thread */ 611 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 612 this_thr->th.th_team_master = 613 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 614 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 615 616 /* TODO the below shouldn't need to be adjusted for serialized teams */ 617 this_thr->th.th_dispatch = 618 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 619 620 __kmp_pop_current_task_from_thread(this_thr); 621 622 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 623 this_thr->th.th_current_task->td_flags.executing = 1; 624 625 if (__kmp_tasking_mode != tskm_immediate_exec) { 626 // Copy the task team from the new child / old parent team to the thread. 627 this_thr->th.th_task_team = 628 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 629 KA_TRACE(20, 630 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 631 "team %p\n", 632 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 633 } 634 } else { 635 if (__kmp_tasking_mode != tskm_immediate_exec) { 636 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 637 "depth of serial team %p to %d\n", 638 global_tid, serial_team, serial_team->t.t_serialized)); 639 } 640 } 641 642 if (__kmp_env_consistency_check) 643 __kmp_pop_parallel(global_tid, NULL); 644 #if OMPT_SUPPORT 645 if (ompt_enabled.enabled) 646 this_thr->th.ompt_thread_info.state = 647 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 648 : ompt_state_work_parallel); 649 #endif 650 } 651 652 /*! 653 @ingroup SYNCHRONIZATION 654 @param loc source location information. 655 656 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 657 depending on the memory ordering convention obeyed by the compiler 658 even that may not be necessary). 659 */ 660 void __kmpc_flush(ident_t *loc) { 661 KC_TRACE(10, ("__kmpc_flush: called\n")); 662 663 /* need explicit __mf() here since use volatile instead in library */ 664 KMP_MB(); /* Flush all pending memory write invalidates. */ 665 666 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 667 #if KMP_MIC 668 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 669 // We shouldn't need it, though, since the ABI rules require that 670 // * If the compiler generates NGO stores it also generates the fence 671 // * If users hand-code NGO stores they should insert the fence 672 // therefore no incomplete unordered stores should be visible. 673 #else 674 // C74404 675 // This is to address non-temporal store instructions (sfence needed). 676 // The clflush instruction is addressed either (mfence needed). 677 // Probably the non-temporal load monvtdqa instruction should also be 678 // addressed. 679 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 680 if (!__kmp_cpuinfo.initialized) { 681 __kmp_query_cpuid(&__kmp_cpuinfo); 682 } 683 if (!__kmp_cpuinfo.sse2) { 684 // CPU cannot execute SSE2 instructions. 685 } else { 686 #if KMP_COMPILER_ICC 687 _mm_mfence(); 688 #elif KMP_COMPILER_MSVC 689 MemoryBarrier(); 690 #else 691 __sync_synchronize(); 692 #endif // KMP_COMPILER_ICC 693 } 694 #endif // KMP_MIC 695 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 696 KMP_ARCH_RISCV64) 697 // Nothing to see here move along 698 #elif KMP_ARCH_PPC64 699 // Nothing needed here (we have a real MB above). 700 #else 701 #error Unknown or unsupported architecture 702 #endif 703 704 #if OMPT_SUPPORT && OMPT_OPTIONAL 705 if (ompt_enabled.ompt_callback_flush) { 706 ompt_callbacks.ompt_callback(ompt_callback_flush)( 707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 708 } 709 #endif 710 } 711 712 /* -------------------------------------------------------------------------- */ 713 /*! 714 @ingroup SYNCHRONIZATION 715 @param loc source location information 716 @param global_tid thread id. 717 718 Execute a barrier. 719 */ 720 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 721 KMP_COUNT_BLOCK(OMP_BARRIER); 722 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 723 __kmp_assert_valid_gtid(global_tid); 724 725 if (!TCR_4(__kmp_init_parallel)) 726 __kmp_parallel_initialize(); 727 728 __kmp_resume_if_soft_paused(); 729 730 if (__kmp_env_consistency_check) { 731 if (loc == 0) { 732 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 733 } 734 __kmp_check_barrier(global_tid, ct_barrier, loc); 735 } 736 737 #if OMPT_SUPPORT 738 ompt_frame_t *ompt_frame; 739 if (ompt_enabled.enabled) { 740 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 741 if (ompt_frame->enter_frame.ptr == NULL) 742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 743 } 744 OMPT_STORE_RETURN_ADDRESS(global_tid); 745 #endif 746 __kmp_threads[global_tid]->th.th_ident = loc; 747 // TODO: explicit barrier_wait_id: 748 // this function is called when 'barrier' directive is present or 749 // implicit barrier at the end of a worksharing construct. 750 // 1) better to add a per-thread barrier counter to a thread data structure 751 // 2) set to 0 when a new team is created 752 // 4) no sync is required 753 754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 755 #if OMPT_SUPPORT && OMPT_OPTIONAL 756 if (ompt_enabled.enabled) { 757 ompt_frame->enter_frame = ompt_data_none; 758 } 759 #endif 760 } 761 762 /* The BARRIER for a MASTER section is always explicit */ 763 /*! 764 @ingroup WORK_SHARING 765 @param loc source location information. 766 @param global_tid global thread number . 767 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 768 */ 769 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 770 int status = 0; 771 772 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 773 __kmp_assert_valid_gtid(global_tid); 774 775 if (!TCR_4(__kmp_init_parallel)) 776 __kmp_parallel_initialize(); 777 778 __kmp_resume_if_soft_paused(); 779 780 if (KMP_MASTER_GTID(global_tid)) { 781 KMP_COUNT_BLOCK(OMP_MASTER); 782 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 783 status = 1; 784 } 785 786 #if OMPT_SUPPORT && OMPT_OPTIONAL 787 if (status) { 788 if (ompt_enabled.ompt_callback_masked) { 789 kmp_info_t *this_thr = __kmp_threads[global_tid]; 790 kmp_team_t *team = this_thr->th.th_team; 791 792 int tid = __kmp_tid_from_gtid(global_tid); 793 ompt_callbacks.ompt_callback(ompt_callback_masked)( 794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 796 OMPT_GET_RETURN_ADDRESS(0)); 797 } 798 } 799 #endif 800 801 if (__kmp_env_consistency_check) { 802 #if KMP_USE_DYNAMIC_LOCK 803 if (status) 804 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 805 else 806 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 807 #else 808 if (status) 809 __kmp_push_sync(global_tid, ct_master, loc, NULL); 810 else 811 __kmp_check_sync(global_tid, ct_master, loc, NULL); 812 #endif 813 } 814 815 return status; 816 } 817 818 /*! 819 @ingroup WORK_SHARING 820 @param loc source location information. 821 @param global_tid global thread number . 822 823 Mark the end of a <tt>master</tt> region. This should only be called by the 824 thread that executes the <tt>master</tt> region. 825 */ 826 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 827 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 828 __kmp_assert_valid_gtid(global_tid); 829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 830 KMP_POP_PARTITIONED_TIMER(); 831 832 #if OMPT_SUPPORT && OMPT_OPTIONAL 833 kmp_info_t *this_thr = __kmp_threads[global_tid]; 834 kmp_team_t *team = this_thr->th.th_team; 835 if (ompt_enabled.ompt_callback_masked) { 836 int tid = __kmp_tid_from_gtid(global_tid); 837 ompt_callbacks.ompt_callback(ompt_callback_masked)( 838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 840 OMPT_GET_RETURN_ADDRESS(0)); 841 } 842 #endif 843 844 if (__kmp_env_consistency_check) { 845 if (KMP_MASTER_GTID(global_tid)) 846 __kmp_pop_sync(global_tid, ct_master, loc); 847 } 848 } 849 850 /*! 851 @ingroup WORK_SHARING 852 @param loc source location information. 853 @param gtid global thread number. 854 855 Start execution of an <tt>ordered</tt> construct. 856 */ 857 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 858 int cid = 0; 859 kmp_info_t *th; 860 KMP_DEBUG_ASSERT(__kmp_init_serial); 861 862 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 863 __kmp_assert_valid_gtid(gtid); 864 865 if (!TCR_4(__kmp_init_parallel)) 866 __kmp_parallel_initialize(); 867 868 __kmp_resume_if_soft_paused(); 869 870 #if USE_ITT_BUILD 871 __kmp_itt_ordered_prep(gtid); 872 // TODO: ordered_wait_id 873 #endif /* USE_ITT_BUILD */ 874 875 th = __kmp_threads[gtid]; 876 877 #if OMPT_SUPPORT && OMPT_OPTIONAL 878 kmp_team_t *team; 879 ompt_wait_id_t lck; 880 void *codeptr_ra; 881 OMPT_STORE_RETURN_ADDRESS(gtid); 882 if (ompt_enabled.enabled) { 883 team = __kmp_team_from_gtid(gtid); 884 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 885 /* OMPT state update */ 886 th->th.ompt_thread_info.wait_id = lck; 887 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 888 889 /* OMPT event callback */ 890 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 891 if (ompt_enabled.ompt_callback_mutex_acquire) { 892 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 893 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 894 codeptr_ra); 895 } 896 } 897 #endif 898 899 if (th->th.th_dispatch->th_deo_fcn != 0) 900 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 901 else 902 __kmp_parallel_deo(>id, &cid, loc); 903 904 #if OMPT_SUPPORT && OMPT_OPTIONAL 905 if (ompt_enabled.enabled) { 906 /* OMPT state update */ 907 th->th.ompt_thread_info.state = ompt_state_work_parallel; 908 th->th.ompt_thread_info.wait_id = 0; 909 910 /* OMPT event callback */ 911 if (ompt_enabled.ompt_callback_mutex_acquired) { 912 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 913 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 914 } 915 } 916 #endif 917 918 #if USE_ITT_BUILD 919 __kmp_itt_ordered_start(gtid); 920 #endif /* USE_ITT_BUILD */ 921 } 922 923 /*! 924 @ingroup WORK_SHARING 925 @param loc source location information. 926 @param gtid global thread number. 927 928 End execution of an <tt>ordered</tt> construct. 929 */ 930 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 931 int cid = 0; 932 kmp_info_t *th; 933 934 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 935 __kmp_assert_valid_gtid(gtid); 936 937 #if USE_ITT_BUILD 938 __kmp_itt_ordered_end(gtid); 939 // TODO: ordered_wait_id 940 #endif /* USE_ITT_BUILD */ 941 942 th = __kmp_threads[gtid]; 943 944 if (th->th.th_dispatch->th_dxo_fcn != 0) 945 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 946 else 947 __kmp_parallel_dxo(>id, &cid, loc); 948 949 #if OMPT_SUPPORT && OMPT_OPTIONAL 950 OMPT_STORE_RETURN_ADDRESS(gtid); 951 if (ompt_enabled.ompt_callback_mutex_released) { 952 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 953 ompt_mutex_ordered, 954 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 955 ->t.t_ordered.dt.t_value, 956 OMPT_LOAD_RETURN_ADDRESS(gtid)); 957 } 958 #endif 959 } 960 961 #if KMP_USE_DYNAMIC_LOCK 962 963 static __forceinline void 964 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 965 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 966 // Pointer to the allocated indirect lock is written to crit, while indexing 967 // is ignored. 968 void *idx; 969 kmp_indirect_lock_t **lck; 970 lck = (kmp_indirect_lock_t **)crit; 971 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 972 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 973 KMP_SET_I_LOCK_LOCATION(ilk, loc); 974 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 975 KA_TRACE(20, 976 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 977 #if USE_ITT_BUILD 978 __kmp_itt_critical_creating(ilk->lock, loc); 979 #endif 980 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 981 if (status == 0) { 982 #if USE_ITT_BUILD 983 __kmp_itt_critical_destroyed(ilk->lock); 984 #endif 985 // We don't really need to destroy the unclaimed lock here since it will be 986 // cleaned up at program exit. 987 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 988 } 989 KMP_DEBUG_ASSERT(*lck != NULL); 990 } 991 992 // Fast-path acquire tas lock 993 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 994 { \ 995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 998 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 999 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1000 kmp_uint32 spins; \ 1001 KMP_FSYNC_PREPARE(l); \ 1002 KMP_INIT_YIELD(spins); \ 1003 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1004 do { \ 1005 if (TCR_4(__kmp_nth) > \ 1006 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1007 KMP_YIELD(TRUE); \ 1008 } else { \ 1009 KMP_YIELD_SPIN(spins); \ 1010 } \ 1011 __kmp_spin_backoff(&backoff); \ 1012 } while ( \ 1013 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1014 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1015 } \ 1016 KMP_FSYNC_ACQUIRED(l); \ 1017 } 1018 1019 // Fast-path test tas lock 1020 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1021 { \ 1022 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1023 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1024 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1025 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1026 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1027 } 1028 1029 // Fast-path release tas lock 1030 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1031 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1032 1033 #if KMP_USE_FUTEX 1034 1035 #include <sys/syscall.h> 1036 #include <unistd.h> 1037 #ifndef FUTEX_WAIT 1038 #define FUTEX_WAIT 0 1039 #endif 1040 #ifndef FUTEX_WAKE 1041 #define FUTEX_WAKE 1 1042 #endif 1043 1044 // Fast-path acquire futex lock 1045 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1046 { \ 1047 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1048 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1049 KMP_MB(); \ 1050 KMP_FSYNC_PREPARE(ftx); \ 1051 kmp_int32 poll_val; \ 1052 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1053 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1054 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1055 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1056 if (!cond) { \ 1057 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1058 poll_val | \ 1059 KMP_LOCK_BUSY(1, futex))) { \ 1060 continue; \ 1061 } \ 1062 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1063 } \ 1064 kmp_int32 rc; \ 1065 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1066 NULL, NULL, 0)) != 0) { \ 1067 continue; \ 1068 } \ 1069 gtid_code |= 1; \ 1070 } \ 1071 KMP_FSYNC_ACQUIRED(ftx); \ 1072 } 1073 1074 // Fast-path test futex lock 1075 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1076 { \ 1077 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1078 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1079 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1080 KMP_FSYNC_ACQUIRED(ftx); \ 1081 rc = TRUE; \ 1082 } else { \ 1083 rc = FALSE; \ 1084 } \ 1085 } 1086 1087 // Fast-path release futex lock 1088 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1089 { \ 1090 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1091 KMP_MB(); \ 1092 KMP_FSYNC_RELEASING(ftx); \ 1093 kmp_int32 poll_val = \ 1094 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1095 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1096 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1097 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1098 } \ 1099 KMP_MB(); \ 1100 KMP_YIELD_OVERSUB(); \ 1101 } 1102 1103 #endif // KMP_USE_FUTEX 1104 1105 #else // KMP_USE_DYNAMIC_LOCK 1106 1107 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1108 ident_t const *loc, 1109 kmp_int32 gtid) { 1110 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1111 1112 // Because of the double-check, the following load doesn't need to be volatile 1113 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1114 1115 if (lck == NULL) { 1116 void *idx; 1117 1118 // Allocate & initialize the lock. 1119 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1120 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1121 __kmp_init_user_lock_with_checks(lck); 1122 __kmp_set_user_lock_location(lck, loc); 1123 #if USE_ITT_BUILD 1124 __kmp_itt_critical_creating(lck); 1125 // __kmp_itt_critical_creating() should be called *before* the first usage 1126 // of underlying lock. It is the only place where we can guarantee it. There 1127 // are chances the lock will destroyed with no usage, but it is not a 1128 // problem, because this is not real event seen by user but rather setting 1129 // name for object (lock). See more details in kmp_itt.h. 1130 #endif /* USE_ITT_BUILD */ 1131 1132 // Use a cmpxchg instruction to slam the start of the critical section with 1133 // the lock pointer. If another thread beat us to it, deallocate the lock, 1134 // and use the lock that the other thread allocated. 1135 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1136 1137 if (status == 0) { 1138 // Deallocate the lock and reload the value. 1139 #if USE_ITT_BUILD 1140 __kmp_itt_critical_destroyed(lck); 1141 // Let ITT know the lock is destroyed and the same memory location may be reused 1142 // for another purpose. 1143 #endif /* USE_ITT_BUILD */ 1144 __kmp_destroy_user_lock_with_checks(lck); 1145 __kmp_user_lock_free(&idx, gtid, lck); 1146 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1147 KMP_DEBUG_ASSERT(lck != NULL); 1148 } 1149 } 1150 return lck; 1151 } 1152 1153 #endif // KMP_USE_DYNAMIC_LOCK 1154 1155 /*! 1156 @ingroup WORK_SHARING 1157 @param loc source location information. 1158 @param global_tid global thread number. 1159 @param crit identity of the critical section. This could be a pointer to a lock 1160 associated with the critical section, or some other suitably unique value. 1161 1162 Enter code protected by a `critical` construct. 1163 This function blocks until the executing thread can enter the critical section. 1164 */ 1165 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1166 kmp_critical_name *crit) { 1167 #if KMP_USE_DYNAMIC_LOCK 1168 #if OMPT_SUPPORT && OMPT_OPTIONAL 1169 OMPT_STORE_RETURN_ADDRESS(global_tid); 1170 #endif // OMPT_SUPPORT 1171 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1172 #else 1173 KMP_COUNT_BLOCK(OMP_CRITICAL); 1174 #if OMPT_SUPPORT && OMPT_OPTIONAL 1175 ompt_state_t prev_state = ompt_state_undefined; 1176 ompt_thread_info_t ti; 1177 #endif 1178 kmp_user_lock_p lck; 1179 1180 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1181 __kmp_assert_valid_gtid(global_tid); 1182 1183 // TODO: add THR_OVHD_STATE 1184 1185 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1186 KMP_CHECK_USER_LOCK_INIT(); 1187 1188 if ((__kmp_user_lock_kind == lk_tas) && 1189 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1190 lck = (kmp_user_lock_p)crit; 1191 } 1192 #if KMP_USE_FUTEX 1193 else if ((__kmp_user_lock_kind == lk_futex) && 1194 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1195 lck = (kmp_user_lock_p)crit; 1196 } 1197 #endif 1198 else { // ticket, queuing or drdpa 1199 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1200 } 1201 1202 if (__kmp_env_consistency_check) 1203 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1204 1205 // since the critical directive binds to all threads, not just the current 1206 // team we have to check this even if we are in a serialized team. 1207 // also, even if we are the uber thread, we still have to conduct the lock, 1208 // as we have to contend with sibling threads. 1209 1210 #if USE_ITT_BUILD 1211 __kmp_itt_critical_acquiring(lck); 1212 #endif /* USE_ITT_BUILD */ 1213 #if OMPT_SUPPORT && OMPT_OPTIONAL 1214 OMPT_STORE_RETURN_ADDRESS(gtid); 1215 void *codeptr_ra = NULL; 1216 if (ompt_enabled.enabled) { 1217 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1218 /* OMPT state update */ 1219 prev_state = ti.state; 1220 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1221 ti.state = ompt_state_wait_critical; 1222 1223 /* OMPT event callback */ 1224 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1225 if (ompt_enabled.ompt_callback_mutex_acquire) { 1226 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1227 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1228 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1229 } 1230 } 1231 #endif 1232 // Value of 'crit' should be good for using as a critical_id of the critical 1233 // section directive. 1234 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1235 1236 #if USE_ITT_BUILD 1237 __kmp_itt_critical_acquired(lck); 1238 #endif /* USE_ITT_BUILD */ 1239 #if OMPT_SUPPORT && OMPT_OPTIONAL 1240 if (ompt_enabled.enabled) { 1241 /* OMPT state update */ 1242 ti.state = prev_state; 1243 ti.wait_id = 0; 1244 1245 /* OMPT event callback */ 1246 if (ompt_enabled.ompt_callback_mutex_acquired) { 1247 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1248 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1249 } 1250 } 1251 #endif 1252 KMP_POP_PARTITIONED_TIMER(); 1253 1254 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1255 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1256 #endif // KMP_USE_DYNAMIC_LOCK 1257 } 1258 1259 #if KMP_USE_DYNAMIC_LOCK 1260 1261 // Converts the given hint to an internal lock implementation 1262 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1263 #if KMP_USE_TSX 1264 #define KMP_TSX_LOCK(seq) lockseq_##seq 1265 #else 1266 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1267 #endif 1268 1269 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1270 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1271 #else 1272 #define KMP_CPUINFO_RTM 0 1273 #endif 1274 1275 // Hints that do not require further logic 1276 if (hint & kmp_lock_hint_hle) 1277 return KMP_TSX_LOCK(hle); 1278 if (hint & kmp_lock_hint_rtm) 1279 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1280 if (hint & kmp_lock_hint_adaptive) 1281 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1282 1283 // Rule out conflicting hints first by returning the default lock 1284 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1285 return __kmp_user_lock_seq; 1286 if ((hint & omp_lock_hint_speculative) && 1287 (hint & omp_lock_hint_nonspeculative)) 1288 return __kmp_user_lock_seq; 1289 1290 // Do not even consider speculation when it appears to be contended 1291 if (hint & omp_lock_hint_contended) 1292 return lockseq_queuing; 1293 1294 // Uncontended lock without speculation 1295 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1296 return lockseq_tas; 1297 1298 // Use RTM lock for speculation 1299 if (hint & omp_lock_hint_speculative) 1300 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1301 1302 return __kmp_user_lock_seq; 1303 } 1304 1305 #if OMPT_SUPPORT && OMPT_OPTIONAL 1306 #if KMP_USE_DYNAMIC_LOCK 1307 static kmp_mutex_impl_t 1308 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1309 if (user_lock) { 1310 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1311 case 0: 1312 break; 1313 #if KMP_USE_FUTEX 1314 case locktag_futex: 1315 return kmp_mutex_impl_queuing; 1316 #endif 1317 case locktag_tas: 1318 return kmp_mutex_impl_spin; 1319 #if KMP_USE_TSX 1320 case locktag_hle: 1321 case locktag_rtm_spin: 1322 return kmp_mutex_impl_speculative; 1323 #endif 1324 default: 1325 return kmp_mutex_impl_none; 1326 } 1327 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1328 } 1329 KMP_ASSERT(ilock); 1330 switch (ilock->type) { 1331 #if KMP_USE_TSX 1332 case locktag_adaptive: 1333 case locktag_rtm_queuing: 1334 return kmp_mutex_impl_speculative; 1335 #endif 1336 case locktag_nested_tas: 1337 return kmp_mutex_impl_spin; 1338 #if KMP_USE_FUTEX 1339 case locktag_nested_futex: 1340 #endif 1341 case locktag_ticket: 1342 case locktag_queuing: 1343 case locktag_drdpa: 1344 case locktag_nested_ticket: 1345 case locktag_nested_queuing: 1346 case locktag_nested_drdpa: 1347 return kmp_mutex_impl_queuing; 1348 default: 1349 return kmp_mutex_impl_none; 1350 } 1351 } 1352 #else 1353 // For locks without dynamic binding 1354 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1355 switch (__kmp_user_lock_kind) { 1356 case lk_tas: 1357 return kmp_mutex_impl_spin; 1358 #if KMP_USE_FUTEX 1359 case lk_futex: 1360 #endif 1361 case lk_ticket: 1362 case lk_queuing: 1363 case lk_drdpa: 1364 return kmp_mutex_impl_queuing; 1365 #if KMP_USE_TSX 1366 case lk_hle: 1367 case lk_rtm_queuing: 1368 case lk_rtm_spin: 1369 case lk_adaptive: 1370 return kmp_mutex_impl_speculative; 1371 #endif 1372 default: 1373 return kmp_mutex_impl_none; 1374 } 1375 } 1376 #endif // KMP_USE_DYNAMIC_LOCK 1377 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1378 1379 /*! 1380 @ingroup WORK_SHARING 1381 @param loc source location information. 1382 @param global_tid global thread number. 1383 @param crit identity of the critical section. This could be a pointer to a lock 1384 associated with the critical section, or some other suitably unique value. 1385 @param hint the lock hint. 1386 1387 Enter code protected by a `critical` construct with a hint. The hint value is 1388 used to suggest a lock implementation. This function blocks until the executing 1389 thread can enter the critical section unless the hint suggests use of 1390 speculative execution and the hardware supports it. 1391 */ 1392 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1393 kmp_critical_name *crit, uint32_t hint) { 1394 KMP_COUNT_BLOCK(OMP_CRITICAL); 1395 kmp_user_lock_p lck; 1396 #if OMPT_SUPPORT && OMPT_OPTIONAL 1397 ompt_state_t prev_state = ompt_state_undefined; 1398 ompt_thread_info_t ti; 1399 // This is the case, if called from __kmpc_critical: 1400 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1401 if (!codeptr) 1402 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1403 #endif 1404 1405 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1406 __kmp_assert_valid_gtid(global_tid); 1407 1408 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1409 // Check if it is initialized. 1410 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1411 if (*lk == 0) { 1412 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1413 if (KMP_IS_D_LOCK(lckseq)) { 1414 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1415 KMP_GET_D_TAG(lckseq)); 1416 } else { 1417 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1418 } 1419 } 1420 // Branch for accessing the actual lock object and set operation. This 1421 // branching is inevitable since this lock initialization does not follow the 1422 // normal dispatch path (lock table is not used). 1423 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1424 lck = (kmp_user_lock_p)lk; 1425 if (__kmp_env_consistency_check) { 1426 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1427 __kmp_map_hint_to_lock(hint)); 1428 } 1429 #if USE_ITT_BUILD 1430 __kmp_itt_critical_acquiring(lck); 1431 #endif 1432 #if OMPT_SUPPORT && OMPT_OPTIONAL 1433 if (ompt_enabled.enabled) { 1434 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1435 /* OMPT state update */ 1436 prev_state = ti.state; 1437 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1438 ti.state = ompt_state_wait_critical; 1439 1440 /* OMPT event callback */ 1441 if (ompt_enabled.ompt_callback_mutex_acquire) { 1442 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1443 ompt_mutex_critical, (unsigned int)hint, 1444 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1445 codeptr); 1446 } 1447 } 1448 #endif 1449 #if KMP_USE_INLINED_TAS 1450 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1451 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1452 } else 1453 #elif KMP_USE_INLINED_FUTEX 1454 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1455 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1456 } else 1457 #endif 1458 { 1459 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1460 } 1461 } else { 1462 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1463 lck = ilk->lock; 1464 if (__kmp_env_consistency_check) { 1465 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1466 __kmp_map_hint_to_lock(hint)); 1467 } 1468 #if USE_ITT_BUILD 1469 __kmp_itt_critical_acquiring(lck); 1470 #endif 1471 #if OMPT_SUPPORT && OMPT_OPTIONAL 1472 if (ompt_enabled.enabled) { 1473 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1474 /* OMPT state update */ 1475 prev_state = ti.state; 1476 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1477 ti.state = ompt_state_wait_critical; 1478 1479 /* OMPT event callback */ 1480 if (ompt_enabled.ompt_callback_mutex_acquire) { 1481 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1482 ompt_mutex_critical, (unsigned int)hint, 1483 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1484 codeptr); 1485 } 1486 } 1487 #endif 1488 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1489 } 1490 KMP_POP_PARTITIONED_TIMER(); 1491 1492 #if USE_ITT_BUILD 1493 __kmp_itt_critical_acquired(lck); 1494 #endif /* USE_ITT_BUILD */ 1495 #if OMPT_SUPPORT && OMPT_OPTIONAL 1496 if (ompt_enabled.enabled) { 1497 /* OMPT state update */ 1498 ti.state = prev_state; 1499 ti.wait_id = 0; 1500 1501 /* OMPT event callback */ 1502 if (ompt_enabled.ompt_callback_mutex_acquired) { 1503 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1504 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1505 } 1506 } 1507 #endif 1508 1509 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1510 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1511 } // __kmpc_critical_with_hint 1512 1513 #endif // KMP_USE_DYNAMIC_LOCK 1514 1515 /*! 1516 @ingroup WORK_SHARING 1517 @param loc source location information. 1518 @param global_tid global thread number . 1519 @param crit identity of the critical section. This could be a pointer to a lock 1520 associated with the critical section, or some other suitably unique value. 1521 1522 Leave a critical section, releasing any lock that was held during its execution. 1523 */ 1524 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1525 kmp_critical_name *crit) { 1526 kmp_user_lock_p lck; 1527 1528 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1529 1530 #if KMP_USE_DYNAMIC_LOCK 1531 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1532 lck = (kmp_user_lock_p)crit; 1533 KMP_ASSERT(lck != NULL); 1534 if (__kmp_env_consistency_check) { 1535 __kmp_pop_sync(global_tid, ct_critical, loc); 1536 } 1537 #if USE_ITT_BUILD 1538 __kmp_itt_critical_releasing(lck); 1539 #endif 1540 #if KMP_USE_INLINED_TAS 1541 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1542 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1543 } else 1544 #elif KMP_USE_INLINED_FUTEX 1545 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1546 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1547 } else 1548 #endif 1549 { 1550 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1551 } 1552 } else { 1553 kmp_indirect_lock_t *ilk = 1554 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1555 KMP_ASSERT(ilk != NULL); 1556 lck = ilk->lock; 1557 if (__kmp_env_consistency_check) { 1558 __kmp_pop_sync(global_tid, ct_critical, loc); 1559 } 1560 #if USE_ITT_BUILD 1561 __kmp_itt_critical_releasing(lck); 1562 #endif 1563 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1564 } 1565 1566 #else // KMP_USE_DYNAMIC_LOCK 1567 1568 if ((__kmp_user_lock_kind == lk_tas) && 1569 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1570 lck = (kmp_user_lock_p)crit; 1571 } 1572 #if KMP_USE_FUTEX 1573 else if ((__kmp_user_lock_kind == lk_futex) && 1574 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1575 lck = (kmp_user_lock_p)crit; 1576 } 1577 #endif 1578 else { // ticket, queuing or drdpa 1579 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1580 } 1581 1582 KMP_ASSERT(lck != NULL); 1583 1584 if (__kmp_env_consistency_check) 1585 __kmp_pop_sync(global_tid, ct_critical, loc); 1586 1587 #if USE_ITT_BUILD 1588 __kmp_itt_critical_releasing(lck); 1589 #endif /* USE_ITT_BUILD */ 1590 // Value of 'crit' should be good for using as a critical_id of the critical 1591 // section directive. 1592 __kmp_release_user_lock_with_checks(lck, global_tid); 1593 1594 #endif // KMP_USE_DYNAMIC_LOCK 1595 1596 #if OMPT_SUPPORT && OMPT_OPTIONAL 1597 /* OMPT release event triggers after lock is released; place here to trigger 1598 * for all #if branches */ 1599 OMPT_STORE_RETURN_ADDRESS(global_tid); 1600 if (ompt_enabled.ompt_callback_mutex_released) { 1601 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1602 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1603 OMPT_LOAD_RETURN_ADDRESS(0)); 1604 } 1605 #endif 1606 1607 KMP_POP_PARTITIONED_TIMER(); 1608 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1609 } 1610 1611 /*! 1612 @ingroup SYNCHRONIZATION 1613 @param loc source location information 1614 @param global_tid thread id. 1615 @return one if the thread should execute the master block, zero otherwise 1616 1617 Start execution of a combined barrier and master. The barrier is executed inside 1618 this function. 1619 */ 1620 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1621 int status; 1622 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1623 __kmp_assert_valid_gtid(global_tid); 1624 1625 if (!TCR_4(__kmp_init_parallel)) 1626 __kmp_parallel_initialize(); 1627 1628 __kmp_resume_if_soft_paused(); 1629 1630 if (__kmp_env_consistency_check) 1631 __kmp_check_barrier(global_tid, ct_barrier, loc); 1632 1633 #if OMPT_SUPPORT 1634 ompt_frame_t *ompt_frame; 1635 if (ompt_enabled.enabled) { 1636 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1637 if (ompt_frame->enter_frame.ptr == NULL) 1638 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1639 } 1640 OMPT_STORE_RETURN_ADDRESS(global_tid); 1641 #endif 1642 #if USE_ITT_NOTIFY 1643 __kmp_threads[global_tid]->th.th_ident = loc; 1644 #endif 1645 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1646 #if OMPT_SUPPORT && OMPT_OPTIONAL 1647 if (ompt_enabled.enabled) { 1648 ompt_frame->enter_frame = ompt_data_none; 1649 } 1650 #endif 1651 1652 return (status != 0) ? 0 : 1; 1653 } 1654 1655 /*! 1656 @ingroup SYNCHRONIZATION 1657 @param loc source location information 1658 @param global_tid thread id. 1659 1660 Complete the execution of a combined barrier and master. This function should 1661 only be called at the completion of the <tt>master</tt> code. Other threads will 1662 still be waiting at the barrier and this call releases them. 1663 */ 1664 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1665 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1666 __kmp_assert_valid_gtid(global_tid); 1667 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1668 } 1669 1670 /*! 1671 @ingroup SYNCHRONIZATION 1672 @param loc source location information 1673 @param global_tid thread id. 1674 @return one if the thread should execute the master block, zero otherwise 1675 1676 Start execution of a combined barrier and master(nowait) construct. 1677 The barrier is executed inside this function. 1678 There is no equivalent "end" function, since the 1679 */ 1680 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1681 kmp_int32 ret; 1682 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1683 __kmp_assert_valid_gtid(global_tid); 1684 1685 if (!TCR_4(__kmp_init_parallel)) 1686 __kmp_parallel_initialize(); 1687 1688 __kmp_resume_if_soft_paused(); 1689 1690 if (__kmp_env_consistency_check) { 1691 if (loc == 0) { 1692 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1693 } 1694 __kmp_check_barrier(global_tid, ct_barrier, loc); 1695 } 1696 1697 #if OMPT_SUPPORT 1698 ompt_frame_t *ompt_frame; 1699 if (ompt_enabled.enabled) { 1700 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1701 if (ompt_frame->enter_frame.ptr == NULL) 1702 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1703 } 1704 OMPT_STORE_RETURN_ADDRESS(global_tid); 1705 #endif 1706 #if USE_ITT_NOTIFY 1707 __kmp_threads[global_tid]->th.th_ident = loc; 1708 #endif 1709 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1710 #if OMPT_SUPPORT && OMPT_OPTIONAL 1711 if (ompt_enabled.enabled) { 1712 ompt_frame->enter_frame = ompt_data_none; 1713 } 1714 #endif 1715 1716 ret = __kmpc_master(loc, global_tid); 1717 1718 if (__kmp_env_consistency_check) { 1719 /* there's no __kmpc_end_master called; so the (stats) */ 1720 /* actions of __kmpc_end_master are done here */ 1721 if (ret) { 1722 /* only one thread should do the pop since only */ 1723 /* one did the push (see __kmpc_master()) */ 1724 __kmp_pop_sync(global_tid, ct_master, loc); 1725 } 1726 } 1727 1728 return (ret); 1729 } 1730 1731 /* The BARRIER for a SINGLE process section is always explicit */ 1732 /*! 1733 @ingroup WORK_SHARING 1734 @param loc source location information 1735 @param global_tid global thread number 1736 @return One if this thread should execute the single construct, zero otherwise. 1737 1738 Test whether to execute a <tt>single</tt> construct. 1739 There are no implicit barriers in the two "single" calls, rather the compiler 1740 should introduce an explicit barrier if it is required. 1741 */ 1742 1743 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1744 __kmp_assert_valid_gtid(global_tid); 1745 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1746 1747 if (rc) { 1748 // We are going to execute the single statement, so we should count it. 1749 KMP_COUNT_BLOCK(OMP_SINGLE); 1750 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1751 } 1752 1753 #if OMPT_SUPPORT && OMPT_OPTIONAL 1754 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1755 kmp_team_t *team = this_thr->th.th_team; 1756 int tid = __kmp_tid_from_gtid(global_tid); 1757 1758 if (ompt_enabled.enabled) { 1759 if (rc) { 1760 if (ompt_enabled.ompt_callback_work) { 1761 ompt_callbacks.ompt_callback(ompt_callback_work)( 1762 ompt_work_single_executor, ompt_scope_begin, 1763 &(team->t.ompt_team_info.parallel_data), 1764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1765 1, OMPT_GET_RETURN_ADDRESS(0)); 1766 } 1767 } else { 1768 if (ompt_enabled.ompt_callback_work) { 1769 ompt_callbacks.ompt_callback(ompt_callback_work)( 1770 ompt_work_single_other, ompt_scope_begin, 1771 &(team->t.ompt_team_info.parallel_data), 1772 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1773 1, OMPT_GET_RETURN_ADDRESS(0)); 1774 ompt_callbacks.ompt_callback(ompt_callback_work)( 1775 ompt_work_single_other, ompt_scope_end, 1776 &(team->t.ompt_team_info.parallel_data), 1777 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1778 1, OMPT_GET_RETURN_ADDRESS(0)); 1779 } 1780 } 1781 } 1782 #endif 1783 1784 return rc; 1785 } 1786 1787 /*! 1788 @ingroup WORK_SHARING 1789 @param loc source location information 1790 @param global_tid global thread number 1791 1792 Mark the end of a <tt>single</tt> construct. This function should 1793 only be called by the thread that executed the block of code protected 1794 by the `single` construct. 1795 */ 1796 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1797 __kmp_assert_valid_gtid(global_tid); 1798 __kmp_exit_single(global_tid); 1799 KMP_POP_PARTITIONED_TIMER(); 1800 1801 #if OMPT_SUPPORT && OMPT_OPTIONAL 1802 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1803 kmp_team_t *team = this_thr->th.th_team; 1804 int tid = __kmp_tid_from_gtid(global_tid); 1805 1806 if (ompt_enabled.ompt_callback_work) { 1807 ompt_callbacks.ompt_callback(ompt_callback_work)( 1808 ompt_work_single_executor, ompt_scope_end, 1809 &(team->t.ompt_team_info.parallel_data), 1810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1811 OMPT_GET_RETURN_ADDRESS(0)); 1812 } 1813 #endif 1814 } 1815 1816 /*! 1817 @ingroup WORK_SHARING 1818 @param loc Source location 1819 @param global_tid Global thread id 1820 1821 Mark the end of a statically scheduled loop. 1822 */ 1823 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1824 KMP_POP_PARTITIONED_TIMER(); 1825 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1826 1827 #if OMPT_SUPPORT && OMPT_OPTIONAL 1828 if (ompt_enabled.ompt_callback_work) { 1829 ompt_work_t ompt_work_type = ompt_work_loop; 1830 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1831 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1832 // Determine workshare type 1833 if (loc != NULL) { 1834 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1835 ompt_work_type = ompt_work_loop; 1836 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1837 ompt_work_type = ompt_work_sections; 1838 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1839 ompt_work_type = ompt_work_distribute; 1840 } else { 1841 // use default set above. 1842 // a warning about this case is provided in __kmpc_for_static_init 1843 } 1844 KMP_DEBUG_ASSERT(ompt_work_type); 1845 } 1846 ompt_callbacks.ompt_callback(ompt_callback_work)( 1847 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1848 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1849 } 1850 #endif 1851 if (__kmp_env_consistency_check) 1852 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1853 } 1854 1855 // User routines which take C-style arguments (call by value) 1856 // different from the Fortran equivalent routines 1857 1858 void ompc_set_num_threads(int arg) { 1859 // !!!!! TODO: check the per-task binding 1860 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1861 } 1862 1863 void ompc_set_dynamic(int flag) { 1864 kmp_info_t *thread; 1865 1866 /* For the thread-private implementation of the internal controls */ 1867 thread = __kmp_entry_thread(); 1868 1869 __kmp_save_internal_controls(thread); 1870 1871 set__dynamic(thread, flag ? true : false); 1872 } 1873 1874 void ompc_set_nested(int flag) { 1875 kmp_info_t *thread; 1876 1877 /* For the thread-private internal controls implementation */ 1878 thread = __kmp_entry_thread(); 1879 1880 __kmp_save_internal_controls(thread); 1881 1882 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1883 } 1884 1885 void ompc_set_max_active_levels(int max_active_levels) { 1886 /* TO DO */ 1887 /* we want per-task implementation of this internal control */ 1888 1889 /* For the per-thread internal controls implementation */ 1890 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1891 } 1892 1893 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1894 // !!!!! TODO: check the per-task binding 1895 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1896 } 1897 1898 int ompc_get_ancestor_thread_num(int level) { 1899 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1900 } 1901 1902 int ompc_get_team_size(int level) { 1903 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1904 } 1905 1906 /* OpenMP 5.0 Affinity Format API */ 1907 1908 void ompc_set_affinity_format(char const *format) { 1909 if (!__kmp_init_serial) { 1910 __kmp_serial_initialize(); 1911 } 1912 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1913 format, KMP_STRLEN(format) + 1); 1914 } 1915 1916 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1917 size_t format_size; 1918 if (!__kmp_init_serial) { 1919 __kmp_serial_initialize(); 1920 } 1921 format_size = KMP_STRLEN(__kmp_affinity_format); 1922 if (buffer && size) { 1923 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1924 format_size + 1); 1925 } 1926 return format_size; 1927 } 1928 1929 void ompc_display_affinity(char const *format) { 1930 int gtid; 1931 if (!TCR_4(__kmp_init_middle)) { 1932 __kmp_middle_initialize(); 1933 } 1934 gtid = __kmp_get_gtid(); 1935 __kmp_aux_display_affinity(gtid, format); 1936 } 1937 1938 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1939 char const *format) { 1940 int gtid; 1941 size_t num_required; 1942 kmp_str_buf_t capture_buf; 1943 if (!TCR_4(__kmp_init_middle)) { 1944 __kmp_middle_initialize(); 1945 } 1946 gtid = __kmp_get_gtid(); 1947 __kmp_str_buf_init(&capture_buf); 1948 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1949 if (buffer && buf_size) { 1950 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1951 capture_buf.used + 1); 1952 } 1953 __kmp_str_buf_free(&capture_buf); 1954 return num_required; 1955 } 1956 1957 void kmpc_set_stacksize(int arg) { 1958 // __kmp_aux_set_stacksize initializes the library if needed 1959 __kmp_aux_set_stacksize(arg); 1960 } 1961 1962 void kmpc_set_stacksize_s(size_t arg) { 1963 // __kmp_aux_set_stacksize initializes the library if needed 1964 __kmp_aux_set_stacksize(arg); 1965 } 1966 1967 void kmpc_set_blocktime(int arg) { 1968 int gtid, tid; 1969 kmp_info_t *thread; 1970 1971 gtid = __kmp_entry_gtid(); 1972 tid = __kmp_tid_from_gtid(gtid); 1973 thread = __kmp_thread_from_gtid(gtid); 1974 1975 __kmp_aux_set_blocktime(arg, thread, tid); 1976 } 1977 1978 void kmpc_set_library(int arg) { 1979 // __kmp_user_set_library initializes the library if needed 1980 __kmp_user_set_library((enum library_type)arg); 1981 } 1982 1983 void kmpc_set_defaults(char const *str) { 1984 // __kmp_aux_set_defaults initializes the library if needed 1985 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1986 } 1987 1988 void kmpc_set_disp_num_buffers(int arg) { 1989 // ignore after initialization because some teams have already 1990 // allocated dispatch buffers 1991 if (__kmp_init_serial == 0 && arg > 0) 1992 __kmp_dispatch_num_buffers = arg; 1993 } 1994 1995 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1996 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1997 return -1; 1998 #else 1999 if (!TCR_4(__kmp_init_middle)) { 2000 __kmp_middle_initialize(); 2001 } 2002 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2003 #endif 2004 } 2005 2006 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2007 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2008 return -1; 2009 #else 2010 if (!TCR_4(__kmp_init_middle)) { 2011 __kmp_middle_initialize(); 2012 } 2013 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2014 #endif 2015 } 2016 2017 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2018 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2019 return -1; 2020 #else 2021 if (!TCR_4(__kmp_init_middle)) { 2022 __kmp_middle_initialize(); 2023 } 2024 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2025 #endif 2026 } 2027 2028 /* -------------------------------------------------------------------------- */ 2029 /*! 2030 @ingroup THREADPRIVATE 2031 @param loc source location information 2032 @param gtid global thread number 2033 @param cpy_size size of the cpy_data buffer 2034 @param cpy_data pointer to data to be copied 2035 @param cpy_func helper function to call for copying data 2036 @param didit flag variable: 1=single thread; 0=not single thread 2037 2038 __kmpc_copyprivate implements the interface for the private data broadcast 2039 needed for the copyprivate clause associated with a single region in an 2040 OpenMP<sup>*</sup> program (both C and Fortran). 2041 All threads participating in the parallel region call this routine. 2042 One of the threads (called the single thread) should have the <tt>didit</tt> 2043 variable set to 1 and all other threads should have that variable set to 0. 2044 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2045 2046 The OpenMP specification forbids the use of nowait on the single region when a 2047 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2048 barrier internally to avoid race conditions, so the code generation for the 2049 single region should avoid generating a barrier after the call to @ref 2050 __kmpc_copyprivate. 2051 2052 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2053 The <tt>loc</tt> parameter is a pointer to source location information. 2054 2055 Internal implementation: The single thread will first copy its descriptor 2056 address (cpy_data) to a team-private location, then the other threads will each 2057 call the function pointed to by the parameter cpy_func, which carries out the 2058 copy by copying the data using the cpy_data buffer. 2059 2060 The cpy_func routine used for the copy and the contents of the data area defined 2061 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2062 to be done. For instance, the cpy_data buffer can hold the actual data to be 2063 copied or it may hold a list of pointers to the data. The cpy_func routine must 2064 interpret the cpy_data buffer appropriately. 2065 2066 The interface to cpy_func is as follows: 2067 @code 2068 void cpy_func( void *destination, void *source ) 2069 @endcode 2070 where void *destination is the cpy_data pointer for the thread being copied to 2071 and void *source is the cpy_data pointer for the thread being copied from. 2072 */ 2073 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2074 void *cpy_data, void (*cpy_func)(void *, void *), 2075 kmp_int32 didit) { 2076 void **data_ptr; 2077 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2078 __kmp_assert_valid_gtid(gtid); 2079 2080 KMP_MB(); 2081 2082 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2083 2084 if (__kmp_env_consistency_check) { 2085 if (loc == 0) { 2086 KMP_WARNING(ConstructIdentInvalid); 2087 } 2088 } 2089 2090 // ToDo: Optimize the following two barriers into some kind of split barrier 2091 2092 if (didit) 2093 *data_ptr = cpy_data; 2094 2095 #if OMPT_SUPPORT 2096 ompt_frame_t *ompt_frame; 2097 if (ompt_enabled.enabled) { 2098 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2099 if (ompt_frame->enter_frame.ptr == NULL) 2100 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2101 } 2102 OMPT_STORE_RETURN_ADDRESS(gtid); 2103 #endif 2104 /* This barrier is not a barrier region boundary */ 2105 #if USE_ITT_NOTIFY 2106 __kmp_threads[gtid]->th.th_ident = loc; 2107 #endif 2108 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2109 2110 if (!didit) 2111 (*cpy_func)(cpy_data, *data_ptr); 2112 2113 // Consider next barrier a user-visible barrier for barrier region boundaries 2114 // Nesting checks are already handled by the single construct checks 2115 { 2116 #if OMPT_SUPPORT 2117 OMPT_STORE_RETURN_ADDRESS(gtid); 2118 #endif 2119 #if USE_ITT_NOTIFY 2120 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2121 // tasks can overwrite the location) 2122 #endif 2123 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2124 #if OMPT_SUPPORT && OMPT_OPTIONAL 2125 if (ompt_enabled.enabled) { 2126 ompt_frame->enter_frame = ompt_data_none; 2127 } 2128 #endif 2129 } 2130 } 2131 2132 /* -------------------------------------------------------------------------- */ 2133 2134 #define INIT_LOCK __kmp_init_user_lock_with_checks 2135 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2136 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2137 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2138 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2139 #define ACQUIRE_NESTED_LOCK_TIMED \ 2140 __kmp_acquire_nested_user_lock_with_checks_timed 2141 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2142 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2143 #define TEST_LOCK __kmp_test_user_lock_with_checks 2144 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2145 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2146 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2147 2148 // TODO: Make check abort messages use location info & pass it into 2149 // with_checks routines 2150 2151 #if KMP_USE_DYNAMIC_LOCK 2152 2153 // internal lock initializer 2154 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2155 kmp_dyna_lockseq_t seq) { 2156 if (KMP_IS_D_LOCK(seq)) { 2157 KMP_INIT_D_LOCK(lock, seq); 2158 #if USE_ITT_BUILD 2159 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2160 #endif 2161 } else { 2162 KMP_INIT_I_LOCK(lock, seq); 2163 #if USE_ITT_BUILD 2164 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2165 __kmp_itt_lock_creating(ilk->lock, loc); 2166 #endif 2167 } 2168 } 2169 2170 // internal nest lock initializer 2171 static __forceinline void 2172 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2173 kmp_dyna_lockseq_t seq) { 2174 #if KMP_USE_TSX 2175 // Don't have nested lock implementation for speculative locks 2176 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2177 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2178 seq = __kmp_user_lock_seq; 2179 #endif 2180 switch (seq) { 2181 case lockseq_tas: 2182 seq = lockseq_nested_tas; 2183 break; 2184 #if KMP_USE_FUTEX 2185 case lockseq_futex: 2186 seq = lockseq_nested_futex; 2187 break; 2188 #endif 2189 case lockseq_ticket: 2190 seq = lockseq_nested_ticket; 2191 break; 2192 case lockseq_queuing: 2193 seq = lockseq_nested_queuing; 2194 break; 2195 case lockseq_drdpa: 2196 seq = lockseq_nested_drdpa; 2197 break; 2198 default: 2199 seq = lockseq_nested_queuing; 2200 } 2201 KMP_INIT_I_LOCK(lock, seq); 2202 #if USE_ITT_BUILD 2203 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2204 __kmp_itt_lock_creating(ilk->lock, loc); 2205 #endif 2206 } 2207 2208 /* initialize the lock with a hint */ 2209 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2210 uintptr_t hint) { 2211 KMP_DEBUG_ASSERT(__kmp_init_serial); 2212 if (__kmp_env_consistency_check && user_lock == NULL) { 2213 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2214 } 2215 2216 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2217 2218 #if OMPT_SUPPORT && OMPT_OPTIONAL 2219 // This is the case, if called from omp_init_lock_with_hint: 2220 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2221 if (!codeptr) 2222 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2223 if (ompt_enabled.ompt_callback_lock_init) { 2224 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2225 ompt_mutex_lock, (omp_lock_hint_t)hint, 2226 __ompt_get_mutex_impl_type(user_lock), 2227 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2228 } 2229 #endif 2230 } 2231 2232 /* initialize the lock with a hint */ 2233 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2234 void **user_lock, uintptr_t hint) { 2235 KMP_DEBUG_ASSERT(__kmp_init_serial); 2236 if (__kmp_env_consistency_check && user_lock == NULL) { 2237 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2238 } 2239 2240 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2241 2242 #if OMPT_SUPPORT && OMPT_OPTIONAL 2243 // This is the case, if called from omp_init_lock_with_hint: 2244 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2245 if (!codeptr) 2246 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2247 if (ompt_enabled.ompt_callback_lock_init) { 2248 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2249 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2250 __ompt_get_mutex_impl_type(user_lock), 2251 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2252 } 2253 #endif 2254 } 2255 2256 #endif // KMP_USE_DYNAMIC_LOCK 2257 2258 /* initialize the lock */ 2259 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2260 #if KMP_USE_DYNAMIC_LOCK 2261 2262 KMP_DEBUG_ASSERT(__kmp_init_serial); 2263 if (__kmp_env_consistency_check && user_lock == NULL) { 2264 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2265 } 2266 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2267 2268 #if OMPT_SUPPORT && OMPT_OPTIONAL 2269 // This is the case, if called from omp_init_lock_with_hint: 2270 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2271 if (!codeptr) 2272 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2273 if (ompt_enabled.ompt_callback_lock_init) { 2274 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2275 ompt_mutex_lock, omp_lock_hint_none, 2276 __ompt_get_mutex_impl_type(user_lock), 2277 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2278 } 2279 #endif 2280 2281 #else // KMP_USE_DYNAMIC_LOCK 2282 2283 static char const *const func = "omp_init_lock"; 2284 kmp_user_lock_p lck; 2285 KMP_DEBUG_ASSERT(__kmp_init_serial); 2286 2287 if (__kmp_env_consistency_check) { 2288 if (user_lock == NULL) { 2289 KMP_FATAL(LockIsUninitialized, func); 2290 } 2291 } 2292 2293 KMP_CHECK_USER_LOCK_INIT(); 2294 2295 if ((__kmp_user_lock_kind == lk_tas) && 2296 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2297 lck = (kmp_user_lock_p)user_lock; 2298 } 2299 #if KMP_USE_FUTEX 2300 else if ((__kmp_user_lock_kind == lk_futex) && 2301 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2302 lck = (kmp_user_lock_p)user_lock; 2303 } 2304 #endif 2305 else { 2306 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2307 } 2308 INIT_LOCK(lck); 2309 __kmp_set_user_lock_location(lck, loc); 2310 2311 #if OMPT_SUPPORT && OMPT_OPTIONAL 2312 // This is the case, if called from omp_init_lock_with_hint: 2313 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2314 if (!codeptr) 2315 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2316 if (ompt_enabled.ompt_callback_lock_init) { 2317 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2318 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2319 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2320 } 2321 #endif 2322 2323 #if USE_ITT_BUILD 2324 __kmp_itt_lock_creating(lck); 2325 #endif /* USE_ITT_BUILD */ 2326 2327 #endif // KMP_USE_DYNAMIC_LOCK 2328 } // __kmpc_init_lock 2329 2330 /* initialize the lock */ 2331 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2332 #if KMP_USE_DYNAMIC_LOCK 2333 2334 KMP_DEBUG_ASSERT(__kmp_init_serial); 2335 if (__kmp_env_consistency_check && user_lock == NULL) { 2336 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2337 } 2338 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2339 2340 #if OMPT_SUPPORT && OMPT_OPTIONAL 2341 // This is the case, if called from omp_init_lock_with_hint: 2342 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2343 if (!codeptr) 2344 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2345 if (ompt_enabled.ompt_callback_lock_init) { 2346 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2347 ompt_mutex_nest_lock, omp_lock_hint_none, 2348 __ompt_get_mutex_impl_type(user_lock), 2349 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2350 } 2351 #endif 2352 2353 #else // KMP_USE_DYNAMIC_LOCK 2354 2355 static char const *const func = "omp_init_nest_lock"; 2356 kmp_user_lock_p lck; 2357 KMP_DEBUG_ASSERT(__kmp_init_serial); 2358 2359 if (__kmp_env_consistency_check) { 2360 if (user_lock == NULL) { 2361 KMP_FATAL(LockIsUninitialized, func); 2362 } 2363 } 2364 2365 KMP_CHECK_USER_LOCK_INIT(); 2366 2367 if ((__kmp_user_lock_kind == lk_tas) && 2368 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2369 OMP_NEST_LOCK_T_SIZE)) { 2370 lck = (kmp_user_lock_p)user_lock; 2371 } 2372 #if KMP_USE_FUTEX 2373 else if ((__kmp_user_lock_kind == lk_futex) && 2374 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2375 OMP_NEST_LOCK_T_SIZE)) { 2376 lck = (kmp_user_lock_p)user_lock; 2377 } 2378 #endif 2379 else { 2380 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2381 } 2382 2383 INIT_NESTED_LOCK(lck); 2384 __kmp_set_user_lock_location(lck, loc); 2385 2386 #if OMPT_SUPPORT && OMPT_OPTIONAL 2387 // This is the case, if called from omp_init_lock_with_hint: 2388 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2389 if (!codeptr) 2390 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2391 if (ompt_enabled.ompt_callback_lock_init) { 2392 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2393 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2394 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2395 } 2396 #endif 2397 2398 #if USE_ITT_BUILD 2399 __kmp_itt_lock_creating(lck); 2400 #endif /* USE_ITT_BUILD */ 2401 2402 #endif // KMP_USE_DYNAMIC_LOCK 2403 } // __kmpc_init_nest_lock 2404 2405 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2406 #if KMP_USE_DYNAMIC_LOCK 2407 2408 #if USE_ITT_BUILD 2409 kmp_user_lock_p lck; 2410 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2411 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2412 } else { 2413 lck = (kmp_user_lock_p)user_lock; 2414 } 2415 __kmp_itt_lock_destroyed(lck); 2416 #endif 2417 #if OMPT_SUPPORT && OMPT_OPTIONAL 2418 // This is the case, if called from omp_init_lock_with_hint: 2419 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2420 if (!codeptr) 2421 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2422 if (ompt_enabled.ompt_callback_lock_destroy) { 2423 kmp_user_lock_p lck; 2424 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2425 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2426 } else { 2427 lck = (kmp_user_lock_p)user_lock; 2428 } 2429 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2430 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2431 } 2432 #endif 2433 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2434 #else 2435 kmp_user_lock_p lck; 2436 2437 if ((__kmp_user_lock_kind == lk_tas) && 2438 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2439 lck = (kmp_user_lock_p)user_lock; 2440 } 2441 #if KMP_USE_FUTEX 2442 else if ((__kmp_user_lock_kind == lk_futex) && 2443 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2444 lck = (kmp_user_lock_p)user_lock; 2445 } 2446 #endif 2447 else { 2448 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2449 } 2450 2451 #if OMPT_SUPPORT && OMPT_OPTIONAL 2452 // This is the case, if called from omp_init_lock_with_hint: 2453 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2454 if (!codeptr) 2455 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2456 if (ompt_enabled.ompt_callback_lock_destroy) { 2457 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2458 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2459 } 2460 #endif 2461 2462 #if USE_ITT_BUILD 2463 __kmp_itt_lock_destroyed(lck); 2464 #endif /* USE_ITT_BUILD */ 2465 DESTROY_LOCK(lck); 2466 2467 if ((__kmp_user_lock_kind == lk_tas) && 2468 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2469 ; 2470 } 2471 #if KMP_USE_FUTEX 2472 else if ((__kmp_user_lock_kind == lk_futex) && 2473 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2474 ; 2475 } 2476 #endif 2477 else { 2478 __kmp_user_lock_free(user_lock, gtid, lck); 2479 } 2480 #endif // KMP_USE_DYNAMIC_LOCK 2481 } // __kmpc_destroy_lock 2482 2483 /* destroy the lock */ 2484 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2485 #if KMP_USE_DYNAMIC_LOCK 2486 2487 #if USE_ITT_BUILD 2488 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2489 __kmp_itt_lock_destroyed(ilk->lock); 2490 #endif 2491 #if OMPT_SUPPORT && OMPT_OPTIONAL 2492 // This is the case, if called from omp_init_lock_with_hint: 2493 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2494 if (!codeptr) 2495 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2496 if (ompt_enabled.ompt_callback_lock_destroy) { 2497 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2498 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2499 } 2500 #endif 2501 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2502 2503 #else // KMP_USE_DYNAMIC_LOCK 2504 2505 kmp_user_lock_p lck; 2506 2507 if ((__kmp_user_lock_kind == lk_tas) && 2508 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2509 OMP_NEST_LOCK_T_SIZE)) { 2510 lck = (kmp_user_lock_p)user_lock; 2511 } 2512 #if KMP_USE_FUTEX 2513 else if ((__kmp_user_lock_kind == lk_futex) && 2514 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2515 OMP_NEST_LOCK_T_SIZE)) { 2516 lck = (kmp_user_lock_p)user_lock; 2517 } 2518 #endif 2519 else { 2520 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2521 } 2522 2523 #if OMPT_SUPPORT && OMPT_OPTIONAL 2524 // This is the case, if called from omp_init_lock_with_hint: 2525 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2526 if (!codeptr) 2527 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2528 if (ompt_enabled.ompt_callback_lock_destroy) { 2529 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2530 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2531 } 2532 #endif 2533 2534 #if USE_ITT_BUILD 2535 __kmp_itt_lock_destroyed(lck); 2536 #endif /* USE_ITT_BUILD */ 2537 2538 DESTROY_NESTED_LOCK(lck); 2539 2540 if ((__kmp_user_lock_kind == lk_tas) && 2541 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2542 OMP_NEST_LOCK_T_SIZE)) { 2543 ; 2544 } 2545 #if KMP_USE_FUTEX 2546 else if ((__kmp_user_lock_kind == lk_futex) && 2547 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2548 OMP_NEST_LOCK_T_SIZE)) { 2549 ; 2550 } 2551 #endif 2552 else { 2553 __kmp_user_lock_free(user_lock, gtid, lck); 2554 } 2555 #endif // KMP_USE_DYNAMIC_LOCK 2556 } // __kmpc_destroy_nest_lock 2557 2558 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2559 KMP_COUNT_BLOCK(OMP_set_lock); 2560 #if KMP_USE_DYNAMIC_LOCK 2561 int tag = KMP_EXTRACT_D_TAG(user_lock); 2562 #if USE_ITT_BUILD 2563 __kmp_itt_lock_acquiring( 2564 (kmp_user_lock_p) 2565 user_lock); // itt function will get to the right lock object. 2566 #endif 2567 #if OMPT_SUPPORT && OMPT_OPTIONAL 2568 // This is the case, if called from omp_init_lock_with_hint: 2569 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2570 if (!codeptr) 2571 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2572 if (ompt_enabled.ompt_callback_mutex_acquire) { 2573 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2574 ompt_mutex_lock, omp_lock_hint_none, 2575 __ompt_get_mutex_impl_type(user_lock), 2576 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2577 } 2578 #endif 2579 #if KMP_USE_INLINED_TAS 2580 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2581 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2582 } else 2583 #elif KMP_USE_INLINED_FUTEX 2584 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2585 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2586 } else 2587 #endif 2588 { 2589 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2590 } 2591 #if USE_ITT_BUILD 2592 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2593 #endif 2594 #if OMPT_SUPPORT && OMPT_OPTIONAL 2595 if (ompt_enabled.ompt_callback_mutex_acquired) { 2596 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2597 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2598 } 2599 #endif 2600 2601 #else // KMP_USE_DYNAMIC_LOCK 2602 2603 kmp_user_lock_p lck; 2604 2605 if ((__kmp_user_lock_kind == lk_tas) && 2606 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2607 lck = (kmp_user_lock_p)user_lock; 2608 } 2609 #if KMP_USE_FUTEX 2610 else if ((__kmp_user_lock_kind == lk_futex) && 2611 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2612 lck = (kmp_user_lock_p)user_lock; 2613 } 2614 #endif 2615 else { 2616 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2617 } 2618 2619 #if USE_ITT_BUILD 2620 __kmp_itt_lock_acquiring(lck); 2621 #endif /* USE_ITT_BUILD */ 2622 #if OMPT_SUPPORT && OMPT_OPTIONAL 2623 // This is the case, if called from omp_init_lock_with_hint: 2624 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2625 if (!codeptr) 2626 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2627 if (ompt_enabled.ompt_callback_mutex_acquire) { 2628 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2629 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2630 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2631 } 2632 #endif 2633 2634 ACQUIRE_LOCK(lck, gtid); 2635 2636 #if USE_ITT_BUILD 2637 __kmp_itt_lock_acquired(lck); 2638 #endif /* USE_ITT_BUILD */ 2639 2640 #if OMPT_SUPPORT && OMPT_OPTIONAL 2641 if (ompt_enabled.ompt_callback_mutex_acquired) { 2642 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2643 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2644 } 2645 #endif 2646 2647 #endif // KMP_USE_DYNAMIC_LOCK 2648 } 2649 2650 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2651 #if KMP_USE_DYNAMIC_LOCK 2652 2653 #if USE_ITT_BUILD 2654 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2655 #endif 2656 #if OMPT_SUPPORT && OMPT_OPTIONAL 2657 // This is the case, if called from omp_init_lock_with_hint: 2658 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2659 if (!codeptr) 2660 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2661 if (ompt_enabled.enabled) { 2662 if (ompt_enabled.ompt_callback_mutex_acquire) { 2663 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2664 ompt_mutex_nest_lock, omp_lock_hint_none, 2665 __ompt_get_mutex_impl_type(user_lock), 2666 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2667 } 2668 } 2669 #endif 2670 int acquire_status = 2671 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2672 (void)acquire_status; 2673 #if USE_ITT_BUILD 2674 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2675 #endif 2676 2677 #if OMPT_SUPPORT && OMPT_OPTIONAL 2678 if (ompt_enabled.enabled) { 2679 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2680 if (ompt_enabled.ompt_callback_mutex_acquired) { 2681 // lock_first 2682 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2683 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2684 codeptr); 2685 } 2686 } else { 2687 if (ompt_enabled.ompt_callback_nest_lock) { 2688 // lock_next 2689 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2690 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2691 } 2692 } 2693 } 2694 #endif 2695 2696 #else // KMP_USE_DYNAMIC_LOCK 2697 int acquire_status; 2698 kmp_user_lock_p lck; 2699 2700 if ((__kmp_user_lock_kind == lk_tas) && 2701 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2702 OMP_NEST_LOCK_T_SIZE)) { 2703 lck = (kmp_user_lock_p)user_lock; 2704 } 2705 #if KMP_USE_FUTEX 2706 else if ((__kmp_user_lock_kind == lk_futex) && 2707 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2708 OMP_NEST_LOCK_T_SIZE)) { 2709 lck = (kmp_user_lock_p)user_lock; 2710 } 2711 #endif 2712 else { 2713 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2714 } 2715 2716 #if USE_ITT_BUILD 2717 __kmp_itt_lock_acquiring(lck); 2718 #endif /* USE_ITT_BUILD */ 2719 #if OMPT_SUPPORT && OMPT_OPTIONAL 2720 // This is the case, if called from omp_init_lock_with_hint: 2721 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2722 if (!codeptr) 2723 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2724 if (ompt_enabled.enabled) { 2725 if (ompt_enabled.ompt_callback_mutex_acquire) { 2726 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2727 ompt_mutex_nest_lock, omp_lock_hint_none, 2728 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2729 codeptr); 2730 } 2731 } 2732 #endif 2733 2734 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2735 2736 #if USE_ITT_BUILD 2737 __kmp_itt_lock_acquired(lck); 2738 #endif /* USE_ITT_BUILD */ 2739 2740 #if OMPT_SUPPORT && OMPT_OPTIONAL 2741 if (ompt_enabled.enabled) { 2742 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2743 if (ompt_enabled.ompt_callback_mutex_acquired) { 2744 // lock_first 2745 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2746 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2747 } 2748 } else { 2749 if (ompt_enabled.ompt_callback_nest_lock) { 2750 // lock_next 2751 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2752 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2753 } 2754 } 2755 } 2756 #endif 2757 2758 #endif // KMP_USE_DYNAMIC_LOCK 2759 } 2760 2761 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2762 #if KMP_USE_DYNAMIC_LOCK 2763 2764 int tag = KMP_EXTRACT_D_TAG(user_lock); 2765 #if USE_ITT_BUILD 2766 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2767 #endif 2768 #if KMP_USE_INLINED_TAS 2769 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2770 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2771 } else 2772 #elif KMP_USE_INLINED_FUTEX 2773 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2774 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2775 } else 2776 #endif 2777 { 2778 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2779 } 2780 2781 #if OMPT_SUPPORT && OMPT_OPTIONAL 2782 // This is the case, if called from omp_init_lock_with_hint: 2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2784 if (!codeptr) 2785 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2786 if (ompt_enabled.ompt_callback_mutex_released) { 2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2788 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2789 } 2790 #endif 2791 2792 #else // KMP_USE_DYNAMIC_LOCK 2793 2794 kmp_user_lock_p lck; 2795 2796 /* Can't use serial interval since not block structured */ 2797 /* release the lock */ 2798 2799 if ((__kmp_user_lock_kind == lk_tas) && 2800 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2801 #if KMP_OS_LINUX && \ 2802 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2803 // "fast" path implemented to fix customer performance issue 2804 #if USE_ITT_BUILD 2805 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2806 #endif /* USE_ITT_BUILD */ 2807 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2808 KMP_MB(); 2809 2810 #if OMPT_SUPPORT && OMPT_OPTIONAL 2811 // This is the case, if called from omp_init_lock_with_hint: 2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2813 if (!codeptr) 2814 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2815 if (ompt_enabled.ompt_callback_mutex_released) { 2816 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2817 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2818 } 2819 #endif 2820 2821 return; 2822 #else 2823 lck = (kmp_user_lock_p)user_lock; 2824 #endif 2825 } 2826 #if KMP_USE_FUTEX 2827 else if ((__kmp_user_lock_kind == lk_futex) && 2828 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2829 lck = (kmp_user_lock_p)user_lock; 2830 } 2831 #endif 2832 else { 2833 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2834 } 2835 2836 #if USE_ITT_BUILD 2837 __kmp_itt_lock_releasing(lck); 2838 #endif /* USE_ITT_BUILD */ 2839 2840 RELEASE_LOCK(lck, gtid); 2841 2842 #if OMPT_SUPPORT && OMPT_OPTIONAL 2843 // This is the case, if called from omp_init_lock_with_hint: 2844 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2845 if (!codeptr) 2846 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2847 if (ompt_enabled.ompt_callback_mutex_released) { 2848 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2849 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2850 } 2851 #endif 2852 2853 #endif // KMP_USE_DYNAMIC_LOCK 2854 } 2855 2856 /* release the lock */ 2857 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2858 #if KMP_USE_DYNAMIC_LOCK 2859 2860 #if USE_ITT_BUILD 2861 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2862 #endif 2863 int release_status = 2864 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2865 (void)release_status; 2866 2867 #if OMPT_SUPPORT && OMPT_OPTIONAL 2868 // This is the case, if called from omp_init_lock_with_hint: 2869 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2870 if (!codeptr) 2871 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2872 if (ompt_enabled.enabled) { 2873 if (release_status == KMP_LOCK_RELEASED) { 2874 if (ompt_enabled.ompt_callback_mutex_released) { 2875 // release_lock_last 2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2877 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2878 codeptr); 2879 } 2880 } else if (ompt_enabled.ompt_callback_nest_lock) { 2881 // release_lock_prev 2882 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2883 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2884 } 2885 } 2886 #endif 2887 2888 #else // KMP_USE_DYNAMIC_LOCK 2889 2890 kmp_user_lock_p lck; 2891 2892 /* Can't use serial interval since not block structured */ 2893 2894 if ((__kmp_user_lock_kind == lk_tas) && 2895 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2896 OMP_NEST_LOCK_T_SIZE)) { 2897 #if KMP_OS_LINUX && \ 2898 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2899 // "fast" path implemented to fix customer performance issue 2900 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2901 #if USE_ITT_BUILD 2902 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2903 #endif /* USE_ITT_BUILD */ 2904 2905 #if OMPT_SUPPORT && OMPT_OPTIONAL 2906 int release_status = KMP_LOCK_STILL_HELD; 2907 #endif 2908 2909 if (--(tl->lk.depth_locked) == 0) { 2910 TCW_4(tl->lk.poll, 0); 2911 #if OMPT_SUPPORT && OMPT_OPTIONAL 2912 release_status = KMP_LOCK_RELEASED; 2913 #endif 2914 } 2915 KMP_MB(); 2916 2917 #if OMPT_SUPPORT && OMPT_OPTIONAL 2918 // This is the case, if called from omp_init_lock_with_hint: 2919 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2920 if (!codeptr) 2921 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2922 if (ompt_enabled.enabled) { 2923 if (release_status == KMP_LOCK_RELEASED) { 2924 if (ompt_enabled.ompt_callback_mutex_released) { 2925 // release_lock_last 2926 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2927 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2928 } 2929 } else if (ompt_enabled.ompt_callback_nest_lock) { 2930 // release_lock_previous 2931 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2932 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2933 } 2934 } 2935 #endif 2936 2937 return; 2938 #else 2939 lck = (kmp_user_lock_p)user_lock; 2940 #endif 2941 } 2942 #if KMP_USE_FUTEX 2943 else if ((__kmp_user_lock_kind == lk_futex) && 2944 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2945 OMP_NEST_LOCK_T_SIZE)) { 2946 lck = (kmp_user_lock_p)user_lock; 2947 } 2948 #endif 2949 else { 2950 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2951 } 2952 2953 #if USE_ITT_BUILD 2954 __kmp_itt_lock_releasing(lck); 2955 #endif /* USE_ITT_BUILD */ 2956 2957 int release_status; 2958 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2959 #if OMPT_SUPPORT && OMPT_OPTIONAL 2960 // This is the case, if called from omp_init_lock_with_hint: 2961 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2962 if (!codeptr) 2963 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2964 if (ompt_enabled.enabled) { 2965 if (release_status == KMP_LOCK_RELEASED) { 2966 if (ompt_enabled.ompt_callback_mutex_released) { 2967 // release_lock_last 2968 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2969 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2970 } 2971 } else if (ompt_enabled.ompt_callback_nest_lock) { 2972 // release_lock_previous 2973 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2974 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2975 } 2976 } 2977 #endif 2978 2979 #endif // KMP_USE_DYNAMIC_LOCK 2980 } 2981 2982 /* try to acquire the lock */ 2983 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2984 KMP_COUNT_BLOCK(OMP_test_lock); 2985 2986 #if KMP_USE_DYNAMIC_LOCK 2987 int rc; 2988 int tag = KMP_EXTRACT_D_TAG(user_lock); 2989 #if USE_ITT_BUILD 2990 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2991 #endif 2992 #if OMPT_SUPPORT && OMPT_OPTIONAL 2993 // This is the case, if called from omp_init_lock_with_hint: 2994 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2995 if (!codeptr) 2996 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2997 if (ompt_enabled.ompt_callback_mutex_acquire) { 2998 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2999 ompt_mutex_lock, omp_lock_hint_none, 3000 __ompt_get_mutex_impl_type(user_lock), 3001 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3002 } 3003 #endif 3004 #if KMP_USE_INLINED_TAS 3005 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3006 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3007 } else 3008 #elif KMP_USE_INLINED_FUTEX 3009 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3010 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3011 } else 3012 #endif 3013 { 3014 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3015 } 3016 if (rc) { 3017 #if USE_ITT_BUILD 3018 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3019 #endif 3020 #if OMPT_SUPPORT && OMPT_OPTIONAL 3021 if (ompt_enabled.ompt_callback_mutex_acquired) { 3022 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3023 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3024 } 3025 #endif 3026 return FTN_TRUE; 3027 } else { 3028 #if USE_ITT_BUILD 3029 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3030 #endif 3031 return FTN_FALSE; 3032 } 3033 3034 #else // KMP_USE_DYNAMIC_LOCK 3035 3036 kmp_user_lock_p lck; 3037 int rc; 3038 3039 if ((__kmp_user_lock_kind == lk_tas) && 3040 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3041 lck = (kmp_user_lock_p)user_lock; 3042 } 3043 #if KMP_USE_FUTEX 3044 else if ((__kmp_user_lock_kind == lk_futex) && 3045 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3046 lck = (kmp_user_lock_p)user_lock; 3047 } 3048 #endif 3049 else { 3050 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3051 } 3052 3053 #if USE_ITT_BUILD 3054 __kmp_itt_lock_acquiring(lck); 3055 #endif /* USE_ITT_BUILD */ 3056 #if OMPT_SUPPORT && OMPT_OPTIONAL 3057 // This is the case, if called from omp_init_lock_with_hint: 3058 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3059 if (!codeptr) 3060 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3061 if (ompt_enabled.ompt_callback_mutex_acquire) { 3062 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3063 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3064 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3065 } 3066 #endif 3067 3068 rc = TEST_LOCK(lck, gtid); 3069 #if USE_ITT_BUILD 3070 if (rc) { 3071 __kmp_itt_lock_acquired(lck); 3072 } else { 3073 __kmp_itt_lock_cancelled(lck); 3074 } 3075 #endif /* USE_ITT_BUILD */ 3076 #if OMPT_SUPPORT && OMPT_OPTIONAL 3077 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3078 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3079 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3080 } 3081 #endif 3082 3083 return (rc ? FTN_TRUE : FTN_FALSE); 3084 3085 /* Can't use serial interval since not block structured */ 3086 3087 #endif // KMP_USE_DYNAMIC_LOCK 3088 } 3089 3090 /* try to acquire the lock */ 3091 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3092 #if KMP_USE_DYNAMIC_LOCK 3093 int rc; 3094 #if USE_ITT_BUILD 3095 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3096 #endif 3097 #if OMPT_SUPPORT && OMPT_OPTIONAL 3098 // This is the case, if called from omp_init_lock_with_hint: 3099 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3100 if (!codeptr) 3101 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3102 if (ompt_enabled.ompt_callback_mutex_acquire) { 3103 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3104 ompt_mutex_nest_lock, omp_lock_hint_none, 3105 __ompt_get_mutex_impl_type(user_lock), 3106 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3107 } 3108 #endif 3109 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3110 #if USE_ITT_BUILD 3111 if (rc) { 3112 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3113 } else { 3114 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3115 } 3116 #endif 3117 #if OMPT_SUPPORT && OMPT_OPTIONAL 3118 if (ompt_enabled.enabled && rc) { 3119 if (rc == 1) { 3120 if (ompt_enabled.ompt_callback_mutex_acquired) { 3121 // lock_first 3122 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3123 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3124 codeptr); 3125 } 3126 } else { 3127 if (ompt_enabled.ompt_callback_nest_lock) { 3128 // lock_next 3129 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3130 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3131 } 3132 } 3133 } 3134 #endif 3135 return rc; 3136 3137 #else // KMP_USE_DYNAMIC_LOCK 3138 3139 kmp_user_lock_p lck; 3140 int rc; 3141 3142 if ((__kmp_user_lock_kind == lk_tas) && 3143 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3144 OMP_NEST_LOCK_T_SIZE)) { 3145 lck = (kmp_user_lock_p)user_lock; 3146 } 3147 #if KMP_USE_FUTEX 3148 else if ((__kmp_user_lock_kind == lk_futex) && 3149 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3150 OMP_NEST_LOCK_T_SIZE)) { 3151 lck = (kmp_user_lock_p)user_lock; 3152 } 3153 #endif 3154 else { 3155 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3156 } 3157 3158 #if USE_ITT_BUILD 3159 __kmp_itt_lock_acquiring(lck); 3160 #endif /* USE_ITT_BUILD */ 3161 3162 #if OMPT_SUPPORT && OMPT_OPTIONAL 3163 // This is the case, if called from omp_init_lock_with_hint: 3164 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3165 if (!codeptr) 3166 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3167 if (ompt_enabled.enabled) && 3168 ompt_enabled.ompt_callback_mutex_acquire) { 3169 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3170 ompt_mutex_nest_lock, omp_lock_hint_none, 3171 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3172 codeptr); 3173 } 3174 #endif 3175 3176 rc = TEST_NESTED_LOCK(lck, gtid); 3177 #if USE_ITT_BUILD 3178 if (rc) { 3179 __kmp_itt_lock_acquired(lck); 3180 } else { 3181 __kmp_itt_lock_cancelled(lck); 3182 } 3183 #endif /* USE_ITT_BUILD */ 3184 #if OMPT_SUPPORT && OMPT_OPTIONAL 3185 if (ompt_enabled.enabled && rc) { 3186 if (rc == 1) { 3187 if (ompt_enabled.ompt_callback_mutex_acquired) { 3188 // lock_first 3189 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3190 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3191 } 3192 } else { 3193 if (ompt_enabled.ompt_callback_nest_lock) { 3194 // lock_next 3195 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3196 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3197 } 3198 } 3199 } 3200 #endif 3201 return rc; 3202 3203 /* Can't use serial interval since not block structured */ 3204 3205 #endif // KMP_USE_DYNAMIC_LOCK 3206 } 3207 3208 // Interface to fast scalable reduce methods routines 3209 3210 // keep the selected method in a thread local structure for cross-function 3211 // usage: will be used in __kmpc_end_reduce* functions; 3212 // another solution: to re-determine the method one more time in 3213 // __kmpc_end_reduce* functions (new prototype required then) 3214 // AT: which solution is better? 3215 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3216 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3217 3218 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3219 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3220 3221 // description of the packed_reduction_method variable: look at the macros in 3222 // kmp.h 3223 3224 // used in a critical section reduce block 3225 static __forceinline void 3226 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3227 kmp_critical_name *crit) { 3228 3229 // this lock was visible to a customer and to the threading profile tool as a 3230 // serial overhead span (although it's used for an internal purpose only) 3231 // why was it visible in previous implementation? 3232 // should we keep it visible in new reduce block? 3233 kmp_user_lock_p lck; 3234 3235 #if KMP_USE_DYNAMIC_LOCK 3236 3237 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3238 // Check if it is initialized. 3239 if (*lk == 0) { 3240 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3241 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3242 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3243 } else { 3244 __kmp_init_indirect_csptr(crit, loc, global_tid, 3245 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3246 } 3247 } 3248 // Branch for accessing the actual lock object and set operation. This 3249 // branching is inevitable since this lock initialization does not follow the 3250 // normal dispatch path (lock table is not used). 3251 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3252 lck = (kmp_user_lock_p)lk; 3253 KMP_DEBUG_ASSERT(lck != NULL); 3254 if (__kmp_env_consistency_check) { 3255 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3256 } 3257 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3258 } else { 3259 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3260 lck = ilk->lock; 3261 KMP_DEBUG_ASSERT(lck != NULL); 3262 if (__kmp_env_consistency_check) { 3263 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3264 } 3265 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3266 } 3267 3268 #else // KMP_USE_DYNAMIC_LOCK 3269 3270 // We know that the fast reduction code is only emitted by Intel compilers 3271 // with 32 byte critical sections. If there isn't enough space, then we 3272 // have to use a pointer. 3273 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3274 lck = (kmp_user_lock_p)crit; 3275 } else { 3276 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3277 } 3278 KMP_DEBUG_ASSERT(lck != NULL); 3279 3280 if (__kmp_env_consistency_check) 3281 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3282 3283 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3284 3285 #endif // KMP_USE_DYNAMIC_LOCK 3286 } 3287 3288 // used in a critical section reduce block 3289 static __forceinline void 3290 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3291 kmp_critical_name *crit) { 3292 3293 kmp_user_lock_p lck; 3294 3295 #if KMP_USE_DYNAMIC_LOCK 3296 3297 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3298 lck = (kmp_user_lock_p)crit; 3299 if (__kmp_env_consistency_check) 3300 __kmp_pop_sync(global_tid, ct_critical, loc); 3301 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3302 } else { 3303 kmp_indirect_lock_t *ilk = 3304 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3305 if (__kmp_env_consistency_check) 3306 __kmp_pop_sync(global_tid, ct_critical, loc); 3307 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3308 } 3309 3310 #else // KMP_USE_DYNAMIC_LOCK 3311 3312 // We know that the fast reduction code is only emitted by Intel compilers 3313 // with 32 byte critical sections. If there isn't enough space, then we have 3314 // to use a pointer. 3315 if (__kmp_base_user_lock_size > 32) { 3316 lck = *((kmp_user_lock_p *)crit); 3317 KMP_ASSERT(lck != NULL); 3318 } else { 3319 lck = (kmp_user_lock_p)crit; 3320 } 3321 3322 if (__kmp_env_consistency_check) 3323 __kmp_pop_sync(global_tid, ct_critical, loc); 3324 3325 __kmp_release_user_lock_with_checks(lck, global_tid); 3326 3327 #endif // KMP_USE_DYNAMIC_LOCK 3328 } // __kmp_end_critical_section_reduce_block 3329 3330 static __forceinline int 3331 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3332 int *task_state) { 3333 kmp_team_t *team; 3334 3335 // Check if we are inside the teams construct? 3336 if (th->th.th_teams_microtask) { 3337 *team_p = team = th->th.th_team; 3338 if (team->t.t_level == th->th.th_teams_level) { 3339 // This is reduction at teams construct. 3340 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3341 // Let's swap teams temporarily for the reduction. 3342 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3343 th->th.th_team = team->t.t_parent; 3344 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3345 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3346 *task_state = th->th.th_task_state; 3347 th->th.th_task_state = 0; 3348 3349 return 1; 3350 } 3351 } 3352 return 0; 3353 } 3354 3355 static __forceinline void 3356 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3357 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3358 th->th.th_info.ds.ds_tid = 0; 3359 th->th.th_team = team; 3360 th->th.th_team_nproc = team->t.t_nproc; 3361 th->th.th_task_team = team->t.t_task_team[task_state]; 3362 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3363 } 3364 3365 /* 2.a.i. Reduce Block without a terminating barrier */ 3366 /*! 3367 @ingroup SYNCHRONIZATION 3368 @param loc source location information 3369 @param global_tid global thread number 3370 @param num_vars number of items (variables) to be reduced 3371 @param reduce_size size of data in bytes to be reduced 3372 @param reduce_data pointer to data to be reduced 3373 @param reduce_func callback function providing reduction operation on two 3374 operands and returning result of reduction in lhs_data 3375 @param lck pointer to the unique lock data structure 3376 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3377 threads if atomic reduction needed 3378 3379 The nowait version is used for a reduce clause with the nowait argument. 3380 */ 3381 kmp_int32 3382 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3383 size_t reduce_size, void *reduce_data, 3384 void (*reduce_func)(void *lhs_data, void *rhs_data), 3385 kmp_critical_name *lck) { 3386 3387 KMP_COUNT_BLOCK(REDUCE_nowait); 3388 int retval = 0; 3389 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3390 kmp_info_t *th; 3391 kmp_team_t *team; 3392 int teams_swapped = 0, task_state; 3393 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3394 __kmp_assert_valid_gtid(global_tid); 3395 3396 // why do we need this initialization here at all? 3397 // Reduction clause can not be used as a stand-alone directive. 3398 3399 // do not call __kmp_serial_initialize(), it will be called by 3400 // __kmp_parallel_initialize() if needed 3401 // possible detection of false-positive race by the threadchecker ??? 3402 if (!TCR_4(__kmp_init_parallel)) 3403 __kmp_parallel_initialize(); 3404 3405 __kmp_resume_if_soft_paused(); 3406 3407 // check correctness of reduce block nesting 3408 #if KMP_USE_DYNAMIC_LOCK 3409 if (__kmp_env_consistency_check) 3410 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3411 #else 3412 if (__kmp_env_consistency_check) 3413 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3414 #endif 3415 3416 th = __kmp_thread_from_gtid(global_tid); 3417 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3418 3419 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3420 // the value should be kept in a variable 3421 // the variable should be either a construct-specific or thread-specific 3422 // property, not a team specific property 3423 // (a thread can reach the next reduce block on the next construct, reduce 3424 // method may differ on the next construct) 3425 // an ident_t "loc" parameter could be used as a construct-specific property 3426 // (what if loc == 0?) 3427 // (if both construct-specific and team-specific variables were shared, 3428 // then unness extra syncs should be needed) 3429 // a thread-specific variable is better regarding two issues above (next 3430 // construct and extra syncs) 3431 // a thread-specific "th_local.reduction_method" variable is used currently 3432 // each thread executes 'determine' and 'set' lines (no need to execute by one 3433 // thread, to avoid unness extra syncs) 3434 3435 packed_reduction_method = __kmp_determine_reduction_method( 3436 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3437 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3438 3439 OMPT_REDUCTION_DECL(th, global_tid); 3440 if (packed_reduction_method == critical_reduce_block) { 3441 3442 OMPT_REDUCTION_BEGIN; 3443 3444 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3445 retval = 1; 3446 3447 } else if (packed_reduction_method == empty_reduce_block) { 3448 3449 OMPT_REDUCTION_BEGIN; 3450 3451 // usage: if team size == 1, no synchronization is required ( Intel 3452 // platforms only ) 3453 retval = 1; 3454 3455 } else if (packed_reduction_method == atomic_reduce_block) { 3456 3457 retval = 2; 3458 3459 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3460 // won't be called by the code gen) 3461 // (it's not quite good, because the checking block has been closed by 3462 // this 'pop', 3463 // but atomic operation has not been executed yet, will be executed 3464 // slightly later, literally on next instruction) 3465 if (__kmp_env_consistency_check) 3466 __kmp_pop_sync(global_tid, ct_reduce, loc); 3467 3468 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3469 tree_reduce_block)) { 3470 3471 // AT: performance issue: a real barrier here 3472 // AT: (if master goes slow, other threads are blocked here waiting for the 3473 // master to come and release them) 3474 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3475 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3476 // be confusing to a customer) 3477 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3478 // might go faster and be more in line with sense of NOWAIT 3479 // AT: TO DO: do epcc test and compare times 3480 3481 // this barrier should be invisible to a customer and to the threading profile 3482 // tool (it's neither a terminating barrier nor customer's code, it's 3483 // used for an internal purpose) 3484 #if OMPT_SUPPORT 3485 // JP: can this barrier potentially leed to task scheduling? 3486 // JP: as long as there is a barrier in the implementation, OMPT should and 3487 // will provide the barrier events 3488 // so we set-up the necessary frame/return addresses. 3489 ompt_frame_t *ompt_frame; 3490 if (ompt_enabled.enabled) { 3491 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3492 if (ompt_frame->enter_frame.ptr == NULL) 3493 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3494 } 3495 OMPT_STORE_RETURN_ADDRESS(global_tid); 3496 #endif 3497 #if USE_ITT_NOTIFY 3498 __kmp_threads[global_tid]->th.th_ident = loc; 3499 #endif 3500 retval = 3501 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3502 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3503 retval = (retval != 0) ? (0) : (1); 3504 #if OMPT_SUPPORT && OMPT_OPTIONAL 3505 if (ompt_enabled.enabled) { 3506 ompt_frame->enter_frame = ompt_data_none; 3507 } 3508 #endif 3509 3510 // all other workers except master should do this pop here 3511 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3512 if (__kmp_env_consistency_check) { 3513 if (retval == 0) { 3514 __kmp_pop_sync(global_tid, ct_reduce, loc); 3515 } 3516 } 3517 3518 } else { 3519 3520 // should never reach this block 3521 KMP_ASSERT(0); // "unexpected method" 3522 } 3523 if (teams_swapped) { 3524 __kmp_restore_swapped_teams(th, team, task_state); 3525 } 3526 KA_TRACE( 3527 10, 3528 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3529 global_tid, packed_reduction_method, retval)); 3530 3531 return retval; 3532 } 3533 3534 /*! 3535 @ingroup SYNCHRONIZATION 3536 @param loc source location information 3537 @param global_tid global thread id. 3538 @param lck pointer to the unique lock data structure 3539 3540 Finish the execution of a reduce nowait. 3541 */ 3542 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3543 kmp_critical_name *lck) { 3544 3545 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3546 3547 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3548 __kmp_assert_valid_gtid(global_tid); 3549 3550 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3551 3552 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3553 3554 if (packed_reduction_method == critical_reduce_block) { 3555 3556 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3557 OMPT_REDUCTION_END; 3558 3559 } else if (packed_reduction_method == empty_reduce_block) { 3560 3561 // usage: if team size == 1, no synchronization is required ( on Intel 3562 // platforms only ) 3563 3564 OMPT_REDUCTION_END; 3565 3566 } else if (packed_reduction_method == atomic_reduce_block) { 3567 3568 // neither master nor other workers should get here 3569 // (code gen does not generate this call in case 2: atomic reduce block) 3570 // actually it's better to remove this elseif at all; 3571 // after removal this value will checked by the 'else' and will assert 3572 3573 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3574 tree_reduce_block)) { 3575 3576 // only master gets here 3577 // OMPT: tree reduction is annotated in the barrier code 3578 3579 } else { 3580 3581 // should never reach this block 3582 KMP_ASSERT(0); // "unexpected method" 3583 } 3584 3585 if (__kmp_env_consistency_check) 3586 __kmp_pop_sync(global_tid, ct_reduce, loc); 3587 3588 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3589 global_tid, packed_reduction_method)); 3590 3591 return; 3592 } 3593 3594 /* 2.a.ii. Reduce Block with a terminating barrier */ 3595 3596 /*! 3597 @ingroup SYNCHRONIZATION 3598 @param loc source location information 3599 @param global_tid global thread number 3600 @param num_vars number of items (variables) to be reduced 3601 @param reduce_size size of data in bytes to be reduced 3602 @param reduce_data pointer to data to be reduced 3603 @param reduce_func callback function providing reduction operation on two 3604 operands and returning result of reduction in lhs_data 3605 @param lck pointer to the unique lock data structure 3606 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3607 threads if atomic reduction needed 3608 3609 A blocking reduce that includes an implicit barrier. 3610 */ 3611 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3612 size_t reduce_size, void *reduce_data, 3613 void (*reduce_func)(void *lhs_data, void *rhs_data), 3614 kmp_critical_name *lck) { 3615 KMP_COUNT_BLOCK(REDUCE_wait); 3616 int retval = 0; 3617 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3618 kmp_info_t *th; 3619 kmp_team_t *team; 3620 int teams_swapped = 0, task_state; 3621 3622 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3623 __kmp_assert_valid_gtid(global_tid); 3624 3625 // why do we need this initialization here at all? 3626 // Reduction clause can not be a stand-alone directive. 3627 3628 // do not call __kmp_serial_initialize(), it will be called by 3629 // __kmp_parallel_initialize() if needed 3630 // possible detection of false-positive race by the threadchecker ??? 3631 if (!TCR_4(__kmp_init_parallel)) 3632 __kmp_parallel_initialize(); 3633 3634 __kmp_resume_if_soft_paused(); 3635 3636 // check correctness of reduce block nesting 3637 #if KMP_USE_DYNAMIC_LOCK 3638 if (__kmp_env_consistency_check) 3639 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3640 #else 3641 if (__kmp_env_consistency_check) 3642 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3643 #endif 3644 3645 th = __kmp_thread_from_gtid(global_tid); 3646 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3647 3648 packed_reduction_method = __kmp_determine_reduction_method( 3649 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3650 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3651 3652 OMPT_REDUCTION_DECL(th, global_tid); 3653 3654 if (packed_reduction_method == critical_reduce_block) { 3655 3656 OMPT_REDUCTION_BEGIN; 3657 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3658 retval = 1; 3659 3660 } else if (packed_reduction_method == empty_reduce_block) { 3661 3662 OMPT_REDUCTION_BEGIN; 3663 // usage: if team size == 1, no synchronization is required ( Intel 3664 // platforms only ) 3665 retval = 1; 3666 3667 } else if (packed_reduction_method == atomic_reduce_block) { 3668 3669 retval = 2; 3670 3671 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3672 tree_reduce_block)) { 3673 3674 // case tree_reduce_block: 3675 // this barrier should be visible to a customer and to the threading profile 3676 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3677 #if OMPT_SUPPORT 3678 ompt_frame_t *ompt_frame; 3679 if (ompt_enabled.enabled) { 3680 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3681 if (ompt_frame->enter_frame.ptr == NULL) 3682 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3683 } 3684 OMPT_STORE_RETURN_ADDRESS(global_tid); 3685 #endif 3686 #if USE_ITT_NOTIFY 3687 __kmp_threads[global_tid]->th.th_ident = 3688 loc; // needed for correct notification of frames 3689 #endif 3690 retval = 3691 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3692 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3693 retval = (retval != 0) ? (0) : (1); 3694 #if OMPT_SUPPORT && OMPT_OPTIONAL 3695 if (ompt_enabled.enabled) { 3696 ompt_frame->enter_frame = ompt_data_none; 3697 } 3698 #endif 3699 3700 // all other workers except master should do this pop here 3701 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3702 if (__kmp_env_consistency_check) { 3703 if (retval == 0) { // 0: all other workers; 1: master 3704 __kmp_pop_sync(global_tid, ct_reduce, loc); 3705 } 3706 } 3707 3708 } else { 3709 3710 // should never reach this block 3711 KMP_ASSERT(0); // "unexpected method" 3712 } 3713 if (teams_swapped) { 3714 __kmp_restore_swapped_teams(th, team, task_state); 3715 } 3716 3717 KA_TRACE(10, 3718 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3719 global_tid, packed_reduction_method, retval)); 3720 return retval; 3721 } 3722 3723 /*! 3724 @ingroup SYNCHRONIZATION 3725 @param loc source location information 3726 @param global_tid global thread id. 3727 @param lck pointer to the unique lock data structure 3728 3729 Finish the execution of a blocking reduce. 3730 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3731 start function. 3732 */ 3733 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3734 kmp_critical_name *lck) { 3735 3736 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3737 kmp_info_t *th; 3738 kmp_team_t *team; 3739 int teams_swapped = 0, task_state; 3740 3741 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3742 __kmp_assert_valid_gtid(global_tid); 3743 3744 th = __kmp_thread_from_gtid(global_tid); 3745 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3746 3747 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3748 3749 // this barrier should be visible to a customer and to the threading profile 3750 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3751 OMPT_REDUCTION_DECL(th, global_tid); 3752 3753 if (packed_reduction_method == critical_reduce_block) { 3754 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3755 3756 OMPT_REDUCTION_END; 3757 3758 // TODO: implicit barrier: should be exposed 3759 #if OMPT_SUPPORT 3760 ompt_frame_t *ompt_frame; 3761 if (ompt_enabled.enabled) { 3762 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3763 if (ompt_frame->enter_frame.ptr == NULL) 3764 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3765 } 3766 OMPT_STORE_RETURN_ADDRESS(global_tid); 3767 #endif 3768 #if USE_ITT_NOTIFY 3769 __kmp_threads[global_tid]->th.th_ident = loc; 3770 #endif 3771 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3772 #if OMPT_SUPPORT && OMPT_OPTIONAL 3773 if (ompt_enabled.enabled) { 3774 ompt_frame->enter_frame = ompt_data_none; 3775 } 3776 #endif 3777 3778 } else if (packed_reduction_method == empty_reduce_block) { 3779 3780 OMPT_REDUCTION_END; 3781 3782 // usage: if team size==1, no synchronization is required (Intel platforms only) 3783 3784 // TODO: implicit barrier: should be exposed 3785 #if OMPT_SUPPORT 3786 ompt_frame_t *ompt_frame; 3787 if (ompt_enabled.enabled) { 3788 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3789 if (ompt_frame->enter_frame.ptr == NULL) 3790 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3791 } 3792 OMPT_STORE_RETURN_ADDRESS(global_tid); 3793 #endif 3794 #if USE_ITT_NOTIFY 3795 __kmp_threads[global_tid]->th.th_ident = loc; 3796 #endif 3797 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3798 #if OMPT_SUPPORT && OMPT_OPTIONAL 3799 if (ompt_enabled.enabled) { 3800 ompt_frame->enter_frame = ompt_data_none; 3801 } 3802 #endif 3803 3804 } else if (packed_reduction_method == atomic_reduce_block) { 3805 3806 #if OMPT_SUPPORT 3807 ompt_frame_t *ompt_frame; 3808 if (ompt_enabled.enabled) { 3809 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3810 if (ompt_frame->enter_frame.ptr == NULL) 3811 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3812 } 3813 OMPT_STORE_RETURN_ADDRESS(global_tid); 3814 #endif 3815 // TODO: implicit barrier: should be exposed 3816 #if USE_ITT_NOTIFY 3817 __kmp_threads[global_tid]->th.th_ident = loc; 3818 #endif 3819 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3820 #if OMPT_SUPPORT && OMPT_OPTIONAL 3821 if (ompt_enabled.enabled) { 3822 ompt_frame->enter_frame = ompt_data_none; 3823 } 3824 #endif 3825 3826 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3827 tree_reduce_block)) { 3828 3829 // only master executes here (master releases all other workers) 3830 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3831 global_tid); 3832 3833 } else { 3834 3835 // should never reach this block 3836 KMP_ASSERT(0); // "unexpected method" 3837 } 3838 if (teams_swapped) { 3839 __kmp_restore_swapped_teams(th, team, task_state); 3840 } 3841 3842 if (__kmp_env_consistency_check) 3843 __kmp_pop_sync(global_tid, ct_reduce, loc); 3844 3845 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3846 global_tid, packed_reduction_method)); 3847 3848 return; 3849 } 3850 3851 #undef __KMP_GET_REDUCTION_METHOD 3852 #undef __KMP_SET_REDUCTION_METHOD 3853 3854 /* end of interface to fast scalable reduce routines */ 3855 3856 kmp_uint64 __kmpc_get_taskid() { 3857 3858 kmp_int32 gtid; 3859 kmp_info_t *thread; 3860 3861 gtid = __kmp_get_gtid(); 3862 if (gtid < 0) { 3863 return 0; 3864 } 3865 thread = __kmp_thread_from_gtid(gtid); 3866 return thread->th.th_current_task->td_task_id; 3867 3868 } // __kmpc_get_taskid 3869 3870 kmp_uint64 __kmpc_get_parent_taskid() { 3871 3872 kmp_int32 gtid; 3873 kmp_info_t *thread; 3874 kmp_taskdata_t *parent_task; 3875 3876 gtid = __kmp_get_gtid(); 3877 if (gtid < 0) { 3878 return 0; 3879 } 3880 thread = __kmp_thread_from_gtid(gtid); 3881 parent_task = thread->th.th_current_task->td_parent; 3882 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3883 3884 } // __kmpc_get_parent_taskid 3885 3886 /*! 3887 @ingroup WORK_SHARING 3888 @param loc source location information. 3889 @param gtid global thread number. 3890 @param num_dims number of associated doacross loops. 3891 @param dims info on loops bounds. 3892 3893 Initialize doacross loop information. 3894 Expect compiler send us inclusive bounds, 3895 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3896 */ 3897 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3898 const struct kmp_dim *dims) { 3899 __kmp_assert_valid_gtid(gtid); 3900 int j, idx; 3901 kmp_int64 last, trace_count; 3902 kmp_info_t *th = __kmp_threads[gtid]; 3903 kmp_team_t *team = th->th.th_team; 3904 kmp_uint32 *flags; 3905 kmp_disp_t *pr_buf = th->th.th_dispatch; 3906 dispatch_shared_info_t *sh_buf; 3907 3908 KA_TRACE( 3909 20, 3910 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3911 gtid, num_dims, !team->t.t_serialized)); 3912 KMP_DEBUG_ASSERT(dims != NULL); 3913 KMP_DEBUG_ASSERT(num_dims > 0); 3914 3915 if (team->t.t_serialized) { 3916 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3917 return; // no dependencies if team is serialized 3918 } 3919 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3920 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3921 // the next loop 3922 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3923 3924 // Save bounds info into allocated private buffer 3925 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3926 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3927 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3928 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3929 pr_buf->th_doacross_info[0] = 3930 (kmp_int64)num_dims; // first element is number of dimensions 3931 // Save also address of num_done in order to access it later without knowing 3932 // the buffer index 3933 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3934 pr_buf->th_doacross_info[2] = dims[0].lo; 3935 pr_buf->th_doacross_info[3] = dims[0].up; 3936 pr_buf->th_doacross_info[4] = dims[0].st; 3937 last = 5; 3938 for (j = 1; j < num_dims; ++j) { 3939 kmp_int64 3940 range_length; // To keep ranges of all dimensions but the first dims[0] 3941 if (dims[j].st == 1) { // most common case 3942 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3943 range_length = dims[j].up - dims[j].lo + 1; 3944 } else { 3945 if (dims[j].st > 0) { 3946 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3947 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3948 } else { // negative increment 3949 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3950 range_length = 3951 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3952 } 3953 } 3954 pr_buf->th_doacross_info[last++] = range_length; 3955 pr_buf->th_doacross_info[last++] = dims[j].lo; 3956 pr_buf->th_doacross_info[last++] = dims[j].up; 3957 pr_buf->th_doacross_info[last++] = dims[j].st; 3958 } 3959 3960 // Compute total trip count. 3961 // Start with range of dims[0] which we don't need to keep in the buffer. 3962 if (dims[0].st == 1) { // most common case 3963 trace_count = dims[0].up - dims[0].lo + 1; 3964 } else if (dims[0].st > 0) { 3965 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3966 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3967 } else { // negative increment 3968 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3969 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3970 } 3971 for (j = 1; j < num_dims; ++j) { 3972 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3973 } 3974 KMP_DEBUG_ASSERT(trace_count > 0); 3975 3976 // Check if shared buffer is not occupied by other loop (idx - 3977 // __kmp_dispatch_num_buffers) 3978 if (idx != sh_buf->doacross_buf_idx) { 3979 // Shared buffer is occupied, wait for it to be free 3980 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3981 __kmp_eq_4, NULL); 3982 } 3983 #if KMP_32_BIT_ARCH 3984 // Check if we are the first thread. After the CAS the first thread gets 0, 3985 // others get 1 if initialization is in progress, allocated pointer otherwise. 3986 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3987 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3988 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3989 #else 3990 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3991 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3992 #endif 3993 if (flags == NULL) { 3994 // we are the first thread, allocate the array of flags 3995 size_t size = 3996 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 3997 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3998 KMP_MB(); 3999 sh_buf->doacross_flags = flags; 4000 } else if (flags == (kmp_uint32 *)1) { 4001 #if KMP_32_BIT_ARCH 4002 // initialization is still in progress, need to wait 4003 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4004 #else 4005 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4006 #endif 4007 KMP_YIELD(TRUE); 4008 KMP_MB(); 4009 } else { 4010 KMP_MB(); 4011 } 4012 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4013 pr_buf->th_doacross_flags = 4014 sh_buf->doacross_flags; // save private copy in order to not 4015 // touch shared buffer on each iteration 4016 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4017 } 4018 4019 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4020 __kmp_assert_valid_gtid(gtid); 4021 kmp_int64 shft; 4022 size_t num_dims, i; 4023 kmp_uint32 flag; 4024 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4025 kmp_info_t *th = __kmp_threads[gtid]; 4026 kmp_team_t *team = th->th.th_team; 4027 kmp_disp_t *pr_buf; 4028 kmp_int64 lo, up, st; 4029 4030 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4031 if (team->t.t_serialized) { 4032 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4033 return; // no dependencies if team is serialized 4034 } 4035 4036 // calculate sequential iteration number and check out-of-bounds condition 4037 pr_buf = th->th.th_dispatch; 4038 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4039 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4040 lo = pr_buf->th_doacross_info[2]; 4041 up = pr_buf->th_doacross_info[3]; 4042 st = pr_buf->th_doacross_info[4]; 4043 #if OMPT_SUPPORT && OMPT_OPTIONAL 4044 ompt_dependence_t deps[num_dims]; 4045 #endif 4046 if (st == 1) { // most common case 4047 if (vec[0] < lo || vec[0] > up) { 4048 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4049 "bounds [%lld,%lld]\n", 4050 gtid, vec[0], lo, up)); 4051 return; 4052 } 4053 iter_number = vec[0] - lo; 4054 } else if (st > 0) { 4055 if (vec[0] < lo || vec[0] > up) { 4056 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4057 "bounds [%lld,%lld]\n", 4058 gtid, vec[0], lo, up)); 4059 return; 4060 } 4061 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4062 } else { // negative increment 4063 if (vec[0] > lo || vec[0] < up) { 4064 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4065 "bounds [%lld,%lld]\n", 4066 gtid, vec[0], lo, up)); 4067 return; 4068 } 4069 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4070 } 4071 #if OMPT_SUPPORT && OMPT_OPTIONAL 4072 deps[0].variable.value = iter_number; 4073 deps[0].dependence_type = ompt_dependence_type_sink; 4074 #endif 4075 for (i = 1; i < num_dims; ++i) { 4076 kmp_int64 iter, ln; 4077 size_t j = i * 4; 4078 ln = pr_buf->th_doacross_info[j + 1]; 4079 lo = pr_buf->th_doacross_info[j + 2]; 4080 up = pr_buf->th_doacross_info[j + 3]; 4081 st = pr_buf->th_doacross_info[j + 4]; 4082 if (st == 1) { 4083 if (vec[i] < lo || vec[i] > up) { 4084 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4085 "bounds [%lld,%lld]\n", 4086 gtid, vec[i], lo, up)); 4087 return; 4088 } 4089 iter = vec[i] - lo; 4090 } else if (st > 0) { 4091 if (vec[i] < lo || vec[i] > up) { 4092 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4093 "bounds [%lld,%lld]\n", 4094 gtid, vec[i], lo, up)); 4095 return; 4096 } 4097 iter = (kmp_uint64)(vec[i] - lo) / st; 4098 } else { // st < 0 4099 if (vec[i] > lo || vec[i] < up) { 4100 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4101 "bounds [%lld,%lld]\n", 4102 gtid, vec[i], lo, up)); 4103 return; 4104 } 4105 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4106 } 4107 iter_number = iter + ln * iter_number; 4108 #if OMPT_SUPPORT && OMPT_OPTIONAL 4109 deps[i].variable.value = iter; 4110 deps[i].dependence_type = ompt_dependence_type_sink; 4111 #endif 4112 } 4113 shft = iter_number % 32; // use 32-bit granularity 4114 iter_number >>= 5; // divided by 32 4115 flag = 1 << shft; 4116 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4117 KMP_YIELD(TRUE); 4118 } 4119 KMP_MB(); 4120 #if OMPT_SUPPORT && OMPT_OPTIONAL 4121 if (ompt_enabled.ompt_callback_dependences) { 4122 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4123 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4124 } 4125 #endif 4126 KA_TRACE(20, 4127 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4128 gtid, (iter_number << 5) + shft)); 4129 } 4130 4131 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4132 __kmp_assert_valid_gtid(gtid); 4133 kmp_int64 shft; 4134 size_t num_dims, i; 4135 kmp_uint32 flag; 4136 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4137 kmp_info_t *th = __kmp_threads[gtid]; 4138 kmp_team_t *team = th->th.th_team; 4139 kmp_disp_t *pr_buf; 4140 kmp_int64 lo, st; 4141 4142 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4143 if (team->t.t_serialized) { 4144 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4145 return; // no dependencies if team is serialized 4146 } 4147 4148 // calculate sequential iteration number (same as in "wait" but no 4149 // out-of-bounds checks) 4150 pr_buf = th->th.th_dispatch; 4151 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4152 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4153 lo = pr_buf->th_doacross_info[2]; 4154 st = pr_buf->th_doacross_info[4]; 4155 #if OMPT_SUPPORT && OMPT_OPTIONAL 4156 ompt_dependence_t deps[num_dims]; 4157 #endif 4158 if (st == 1) { // most common case 4159 iter_number = vec[0] - lo; 4160 } else if (st > 0) { 4161 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4162 } else { // negative increment 4163 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4164 } 4165 #if OMPT_SUPPORT && OMPT_OPTIONAL 4166 deps[0].variable.value = iter_number; 4167 deps[0].dependence_type = ompt_dependence_type_source; 4168 #endif 4169 for (i = 1; i < num_dims; ++i) { 4170 kmp_int64 iter, ln; 4171 size_t j = i * 4; 4172 ln = pr_buf->th_doacross_info[j + 1]; 4173 lo = pr_buf->th_doacross_info[j + 2]; 4174 st = pr_buf->th_doacross_info[j + 4]; 4175 if (st == 1) { 4176 iter = vec[i] - lo; 4177 } else if (st > 0) { 4178 iter = (kmp_uint64)(vec[i] - lo) / st; 4179 } else { // st < 0 4180 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4181 } 4182 iter_number = iter + ln * iter_number; 4183 #if OMPT_SUPPORT && OMPT_OPTIONAL 4184 deps[i].variable.value = iter; 4185 deps[i].dependence_type = ompt_dependence_type_source; 4186 #endif 4187 } 4188 #if OMPT_SUPPORT && OMPT_OPTIONAL 4189 if (ompt_enabled.ompt_callback_dependences) { 4190 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4191 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4192 } 4193 #endif 4194 shft = iter_number % 32; // use 32-bit granularity 4195 iter_number >>= 5; // divided by 32 4196 flag = 1 << shft; 4197 KMP_MB(); 4198 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4199 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4200 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4201 (iter_number << 5) + shft)); 4202 } 4203 4204 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4205 __kmp_assert_valid_gtid(gtid); 4206 kmp_int32 num_done; 4207 kmp_info_t *th = __kmp_threads[gtid]; 4208 kmp_team_t *team = th->th.th_team; 4209 kmp_disp_t *pr_buf = th->th.th_dispatch; 4210 4211 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4212 if (team->t.t_serialized) { 4213 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4214 return; // nothing to do 4215 } 4216 num_done = 4217 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4218 if (num_done == th->th.th_team_nproc) { 4219 // we are the last thread, need to free shared resources 4220 int idx = pr_buf->th_doacross_buf_idx - 1; 4221 dispatch_shared_info_t *sh_buf = 4222 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4223 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4224 (kmp_int64)&sh_buf->doacross_num_done); 4225 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4226 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4227 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4228 sh_buf->doacross_flags = NULL; 4229 sh_buf->doacross_num_done = 0; 4230 sh_buf->doacross_buf_idx += 4231 __kmp_dispatch_num_buffers; // free buffer for future re-use 4232 } 4233 // free private resources (need to keep buffer index forever) 4234 pr_buf->th_doacross_flags = NULL; 4235 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4236 pr_buf->th_doacross_info = NULL; 4237 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4238 } 4239 4240 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4241 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4242 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4243 } 4244 4245 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4246 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4247 } 4248 4249 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4250 omp_allocator_handle_t free_allocator) { 4251 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4252 free_allocator); 4253 } 4254 4255 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4256 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4257 } 4258 4259 int __kmpc_get_target_offload(void) { 4260 if (!__kmp_init_serial) { 4261 __kmp_serial_initialize(); 4262 } 4263 return __kmp_target_offload; 4264 } 4265 4266 int __kmpc_pause_resource(kmp_pause_status_t level) { 4267 if (!__kmp_init_serial) { 4268 return 1; // Can't pause if runtime is not initialized 4269 } 4270 return __kmp_pause_resource(level); 4271 } 4272