1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 OMPT_STORE_RETURN_ADDRESS(gtid); 301 } 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 311 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 312 &ap 313 #else 314 ap 315 #endif 316 ); 317 #if INCLUDE_SSC_MARKS 318 SSC_MARK_JOINING(); 319 #endif 320 __kmp_join_call(loc, gtid 321 #if OMPT_SUPPORT 322 , 323 fork_context_intel 324 #endif 325 ); 326 327 va_end(ap); 328 } 329 330 #if KMP_STATS_ENABLED 331 if (previous_state == stats_state_e::SERIAL_REGION) { 332 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 333 } else { 334 KMP_POP_PARTITIONED_TIMER(); 335 } 336 #endif // KMP_STATS_ENABLED 337 } 338 339 /*! 340 @ingroup PARALLEL 341 @param loc source location information 342 @param global_tid global thread number 343 @param num_teams number of teams requested for the teams construct 344 @param num_threads number of threads per team requested for the teams construct 345 346 Set the number of teams to be used by the teams construct. 347 This call is only required if the teams construct has a `num_teams` clause 348 or a `thread_limit` clause (or both). 349 */ 350 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 351 kmp_int32 num_teams, kmp_int32 num_threads) { 352 KA_TRACE(20, 353 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 354 global_tid, num_teams, num_threads)); 355 __kmp_assert_valid_gtid(global_tid); 356 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 357 } 358 359 /*! 360 @ingroup PARALLEL 361 @param loc source location information 362 @param argc total number of arguments in the ellipsis 363 @param microtask pointer to callback routine consisting of outlined teams 364 construct 365 @param ... pointers to shared variables that aren't global 366 367 Do the actual fork and call the microtask in the relevant number of threads. 368 */ 369 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 370 ...) { 371 int gtid = __kmp_entry_gtid(); 372 kmp_info_t *this_thr = __kmp_threads[gtid]; 373 va_list ap; 374 va_start(ap, microtask); 375 376 #if KMP_STATS_ENABLED 377 KMP_COUNT_BLOCK(OMP_TEAMS); 378 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 379 if (previous_state == stats_state_e::SERIAL_REGION) { 380 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 381 } else { 382 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 383 } 384 #endif 385 386 // remember teams entry point and nesting level 387 this_thr->th.th_teams_microtask = microtask; 388 this_thr->th.th_teams_level = 389 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 390 391 #if OMPT_SUPPORT 392 kmp_team_t *parent_team = this_thr->th.th_team; 393 int tid = __kmp_tid_from_gtid(gtid); 394 if (ompt_enabled.enabled) { 395 parent_team->t.t_implicit_task_taskdata[tid] 396 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 397 } 398 OMPT_STORE_RETURN_ADDRESS(gtid); 399 #endif 400 401 // check if __kmpc_push_num_teams called, set default number of teams 402 // otherwise 403 if (this_thr->th.th_teams_size.nteams == 0) { 404 __kmp_push_num_teams(loc, gtid, 0, 0); 405 } 406 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 407 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 409 410 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 411 VOLATILE_CAST(microtask_t) 412 __kmp_teams_master, // "wrapped" task 413 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 414 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 415 &ap 416 #else 417 ap 418 #endif 419 ); 420 __kmp_join_call(loc, gtid 421 #if OMPT_SUPPORT 422 , 423 fork_context_intel 424 #endif 425 ); 426 427 // Pop current CG root off list 428 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 429 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 430 this_thr->th.th_cg_roots = tmp->up; 431 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 432 " to node %p. cg_nthreads was %d\n", 433 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 434 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 435 int i = tmp->cg_nthreads--; 436 if (i == 1) { // check is we are the last thread in CG (not always the case) 437 __kmp_free(tmp); 438 } 439 // Restore current task's thread_limit from CG root 440 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 441 this_thr->th.th_current_task->td_icvs.thread_limit = 442 this_thr->th.th_cg_roots->cg_thread_limit; 443 444 this_thr->th.th_teams_microtask = NULL; 445 this_thr->th.th_teams_level = 0; 446 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 447 va_end(ap); 448 #if KMP_STATS_ENABLED 449 if (previous_state == stats_state_e::SERIAL_REGION) { 450 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 451 } else { 452 KMP_POP_PARTITIONED_TIMER(); 453 } 454 #endif // KMP_STATS_ENABLED 455 } 456 457 // I don't think this function should ever have been exported. 458 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 459 // openmp code ever called it, but it's been exported from the RTL for so 460 // long that I'm afraid to remove the definition. 461 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 462 463 /*! 464 @ingroup PARALLEL 465 @param loc source location information 466 @param global_tid global thread number 467 468 Enter a serialized parallel construct. This interface is used to handle a 469 conditional parallel region, like this, 470 @code 471 #pragma omp parallel if (condition) 472 @endcode 473 when the condition is false. 474 */ 475 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 476 // The implementation is now in kmp_runtime.cpp so that it can share static 477 // functions with kmp_fork_call since the tasks to be done are similar in 478 // each case. 479 __kmp_assert_valid_gtid(global_tid); 480 #if OMPT_SUPPORT 481 OMPT_STORE_RETURN_ADDRESS(global_tid); 482 #endif 483 __kmp_serialized_parallel(loc, global_tid); 484 } 485 486 /*! 487 @ingroup PARALLEL 488 @param loc source location information 489 @param global_tid global thread number 490 491 Leave a serialized parallel construct. 492 */ 493 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 494 kmp_internal_control_t *top; 495 kmp_info_t *this_thr; 496 kmp_team_t *serial_team; 497 498 KC_TRACE(10, 499 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 500 501 /* skip all this code for autopar serialized loops since it results in 502 unacceptable overhead */ 503 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 504 return; 505 506 // Not autopar code 507 __kmp_assert_valid_gtid(global_tid); 508 if (!TCR_4(__kmp_init_parallel)) 509 __kmp_parallel_initialize(); 510 511 __kmp_resume_if_soft_paused(); 512 513 this_thr = __kmp_threads[global_tid]; 514 serial_team = this_thr->th.th_serial_team; 515 516 kmp_task_team_t *task_team = this_thr->th.th_task_team; 517 // we need to wait for the proxy tasks before finishing the thread 518 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 519 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 520 521 KMP_MB(); 522 KMP_DEBUG_ASSERT(serial_team); 523 KMP_ASSERT(serial_team->t.t_serialized); 524 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 525 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 526 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 527 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 528 529 #if OMPT_SUPPORT 530 if (ompt_enabled.enabled && 531 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 532 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 533 if (ompt_enabled.ompt_callback_implicit_task) { 534 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 535 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 536 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 537 } 538 539 // reset clear the task id only after unlinking the task 540 ompt_data_t *parent_task_data; 541 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 542 543 if (ompt_enabled.ompt_callback_parallel_end) { 544 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 545 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 546 ompt_parallel_invoker_program | ompt_parallel_team, 547 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 548 } 549 __ompt_lw_taskteam_unlink(this_thr); 550 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 551 } 552 #endif 553 554 /* If necessary, pop the internal control stack values and replace the team 555 * values */ 556 top = serial_team->t.t_control_stack_top; 557 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 558 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 559 serial_team->t.t_control_stack_top = top->next; 560 __kmp_free(top); 561 } 562 563 // if( serial_team -> t.t_serialized > 1 ) 564 serial_team->t.t_level--; 565 566 /* pop dispatch buffers stack */ 567 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 568 { 569 dispatch_private_info_t *disp_buffer = 570 serial_team->t.t_dispatch->th_disp_buffer; 571 serial_team->t.t_dispatch->th_disp_buffer = 572 serial_team->t.t_dispatch->th_disp_buffer->next; 573 __kmp_free(disp_buffer); 574 } 575 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 576 577 --serial_team->t.t_serialized; 578 if (serial_team->t.t_serialized == 0) { 579 580 /* return to the parallel section */ 581 582 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 583 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 584 __kmp_clear_x87_fpu_status_word(); 585 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 586 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 587 } 588 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 589 590 this_thr->th.th_team = serial_team->t.t_parent; 591 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 592 593 /* restore values cached in the thread */ 594 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 595 this_thr->th.th_team_master = 596 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 597 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 598 599 /* TODO the below shouldn't need to be adjusted for serialized teams */ 600 this_thr->th.th_dispatch = 601 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 602 603 __kmp_pop_current_task_from_thread(this_thr); 604 605 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 606 this_thr->th.th_current_task->td_flags.executing = 1; 607 608 if (__kmp_tasking_mode != tskm_immediate_exec) { 609 // Copy the task team from the new child / old parent team to the thread. 610 this_thr->th.th_task_team = 611 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 612 KA_TRACE(20, 613 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 614 "team %p\n", 615 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 616 } 617 } else { 618 if (__kmp_tasking_mode != tskm_immediate_exec) { 619 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 620 "depth of serial team %p to %d\n", 621 global_tid, serial_team, serial_team->t.t_serialized)); 622 } 623 } 624 625 if (__kmp_env_consistency_check) 626 __kmp_pop_parallel(global_tid, NULL); 627 #if OMPT_SUPPORT 628 if (ompt_enabled.enabled) 629 this_thr->th.ompt_thread_info.state = 630 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 631 : ompt_state_work_parallel); 632 #endif 633 } 634 635 /*! 636 @ingroup SYNCHRONIZATION 637 @param loc source location information. 638 639 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 640 depending on the memory ordering convention obeyed by the compiler 641 even that may not be necessary). 642 */ 643 void __kmpc_flush(ident_t *loc) { 644 KC_TRACE(10, ("__kmpc_flush: called\n")); 645 646 /* need explicit __mf() here since use volatile instead in library */ 647 KMP_MB(); /* Flush all pending memory write invalidates. */ 648 649 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 650 #if KMP_MIC 651 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 652 // We shouldn't need it, though, since the ABI rules require that 653 // * If the compiler generates NGO stores it also generates the fence 654 // * If users hand-code NGO stores they should insert the fence 655 // therefore no incomplete unordered stores should be visible. 656 #else 657 // C74404 658 // This is to address non-temporal store instructions (sfence needed). 659 // The clflush instruction is addressed either (mfence needed). 660 // Probably the non-temporal load monvtdqa instruction should also be 661 // addressed. 662 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 663 if (!__kmp_cpuinfo.initialized) { 664 __kmp_query_cpuid(&__kmp_cpuinfo); 665 } 666 if (!__kmp_cpuinfo.sse2) { 667 // CPU cannot execute SSE2 instructions. 668 } else { 669 #if KMP_COMPILER_ICC 670 _mm_mfence(); 671 #elif KMP_COMPILER_MSVC 672 MemoryBarrier(); 673 #else 674 __sync_synchronize(); 675 #endif // KMP_COMPILER_ICC 676 } 677 #endif // KMP_MIC 678 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 679 KMP_ARCH_RISCV64) 680 // Nothing to see here move along 681 #elif KMP_ARCH_PPC64 682 // Nothing needed here (we have a real MB above). 683 #else 684 #error Unknown or unsupported architecture 685 #endif 686 687 #if OMPT_SUPPORT && OMPT_OPTIONAL 688 if (ompt_enabled.ompt_callback_flush) { 689 ompt_callbacks.ompt_callback(ompt_callback_flush)( 690 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 691 } 692 #endif 693 } 694 695 /* -------------------------------------------------------------------------- */ 696 /*! 697 @ingroup SYNCHRONIZATION 698 @param loc source location information 699 @param global_tid thread id. 700 701 Execute a barrier. 702 */ 703 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 704 KMP_COUNT_BLOCK(OMP_BARRIER); 705 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 706 __kmp_assert_valid_gtid(global_tid); 707 708 if (!TCR_4(__kmp_init_parallel)) 709 __kmp_parallel_initialize(); 710 711 __kmp_resume_if_soft_paused(); 712 713 if (__kmp_env_consistency_check) { 714 if (loc == 0) { 715 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 716 } 717 __kmp_check_barrier(global_tid, ct_barrier, loc); 718 } 719 720 #if OMPT_SUPPORT 721 ompt_frame_t *ompt_frame; 722 if (ompt_enabled.enabled) { 723 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 724 if (ompt_frame->enter_frame.ptr == NULL) 725 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 726 OMPT_STORE_RETURN_ADDRESS(global_tid); 727 } 728 #endif 729 __kmp_threads[global_tid]->th.th_ident = loc; 730 // TODO: explicit barrier_wait_id: 731 // this function is called when 'barrier' directive is present or 732 // implicit barrier at the end of a worksharing construct. 733 // 1) better to add a per-thread barrier counter to a thread data structure 734 // 2) set to 0 when a new team is created 735 // 4) no sync is required 736 737 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 738 #if OMPT_SUPPORT && OMPT_OPTIONAL 739 if (ompt_enabled.enabled) { 740 ompt_frame->enter_frame = ompt_data_none; 741 } 742 #endif 743 } 744 745 /* The BARRIER for a MASTER section is always explicit */ 746 /*! 747 @ingroup WORK_SHARING 748 @param loc source location information. 749 @param global_tid global thread number . 750 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 751 */ 752 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 753 int status = 0; 754 755 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 756 __kmp_assert_valid_gtid(global_tid); 757 758 if (!TCR_4(__kmp_init_parallel)) 759 __kmp_parallel_initialize(); 760 761 __kmp_resume_if_soft_paused(); 762 763 if (KMP_MASTER_GTID(global_tid)) { 764 KMP_COUNT_BLOCK(OMP_MASTER); 765 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 766 status = 1; 767 } 768 769 #if OMPT_SUPPORT && OMPT_OPTIONAL 770 if (status) { 771 if (ompt_enabled.ompt_callback_master) { 772 kmp_info_t *this_thr = __kmp_threads[global_tid]; 773 kmp_team_t *team = this_thr->th.th_team; 774 775 int tid = __kmp_tid_from_gtid(global_tid); 776 ompt_callbacks.ompt_callback(ompt_callback_master)( 777 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 778 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 779 OMPT_GET_RETURN_ADDRESS(0)); 780 } 781 } 782 #endif 783 784 if (__kmp_env_consistency_check) { 785 #if KMP_USE_DYNAMIC_LOCK 786 if (status) 787 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 788 else 789 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 790 #else 791 if (status) 792 __kmp_push_sync(global_tid, ct_master, loc, NULL); 793 else 794 __kmp_check_sync(global_tid, ct_master, loc, NULL); 795 #endif 796 } 797 798 return status; 799 } 800 801 /*! 802 @ingroup WORK_SHARING 803 @param loc source location information. 804 @param global_tid global thread number . 805 806 Mark the end of a <tt>master</tt> region. This should only be called by the 807 thread that executes the <tt>master</tt> region. 808 */ 809 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 810 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 811 __kmp_assert_valid_gtid(global_tid); 812 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 813 KMP_POP_PARTITIONED_TIMER(); 814 815 #if OMPT_SUPPORT && OMPT_OPTIONAL 816 kmp_info_t *this_thr = __kmp_threads[global_tid]; 817 kmp_team_t *team = this_thr->th.th_team; 818 if (ompt_enabled.ompt_callback_master) { 819 int tid = __kmp_tid_from_gtid(global_tid); 820 ompt_callbacks.ompt_callback(ompt_callback_master)( 821 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 822 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 823 OMPT_GET_RETURN_ADDRESS(0)); 824 } 825 #endif 826 827 if (__kmp_env_consistency_check) { 828 if (KMP_MASTER_GTID(global_tid)) 829 __kmp_pop_sync(global_tid, ct_master, loc); 830 } 831 } 832 833 /*! 834 @ingroup WORK_SHARING 835 @param loc source location information. 836 @param gtid global thread number. 837 838 Start execution of an <tt>ordered</tt> construct. 839 */ 840 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 841 int cid = 0; 842 kmp_info_t *th; 843 KMP_DEBUG_ASSERT(__kmp_init_serial); 844 845 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 846 __kmp_assert_valid_gtid(gtid); 847 848 if (!TCR_4(__kmp_init_parallel)) 849 __kmp_parallel_initialize(); 850 851 __kmp_resume_if_soft_paused(); 852 853 #if USE_ITT_BUILD 854 __kmp_itt_ordered_prep(gtid); 855 // TODO: ordered_wait_id 856 #endif /* USE_ITT_BUILD */ 857 858 th = __kmp_threads[gtid]; 859 860 #if OMPT_SUPPORT && OMPT_OPTIONAL 861 kmp_team_t *team; 862 ompt_wait_id_t lck; 863 void *codeptr_ra; 864 if (ompt_enabled.enabled) { 865 OMPT_STORE_RETURN_ADDRESS(gtid); 866 team = __kmp_team_from_gtid(gtid); 867 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 868 /* OMPT state update */ 869 th->th.ompt_thread_info.wait_id = lck; 870 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 871 872 /* OMPT event callback */ 873 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 874 if (ompt_enabled.ompt_callback_mutex_acquire) { 875 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 876 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 877 codeptr_ra); 878 } 879 } 880 #endif 881 882 if (th->th.th_dispatch->th_deo_fcn != 0) 883 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 884 else 885 __kmp_parallel_deo(>id, &cid, loc); 886 887 #if OMPT_SUPPORT && OMPT_OPTIONAL 888 if (ompt_enabled.enabled) { 889 /* OMPT state update */ 890 th->th.ompt_thread_info.state = ompt_state_work_parallel; 891 th->th.ompt_thread_info.wait_id = 0; 892 893 /* OMPT event callback */ 894 if (ompt_enabled.ompt_callback_mutex_acquired) { 895 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 896 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 897 } 898 } 899 #endif 900 901 #if USE_ITT_BUILD 902 __kmp_itt_ordered_start(gtid); 903 #endif /* USE_ITT_BUILD */ 904 } 905 906 /*! 907 @ingroup WORK_SHARING 908 @param loc source location information. 909 @param gtid global thread number. 910 911 End execution of an <tt>ordered</tt> construct. 912 */ 913 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 914 int cid = 0; 915 kmp_info_t *th; 916 917 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 918 __kmp_assert_valid_gtid(gtid); 919 920 #if USE_ITT_BUILD 921 __kmp_itt_ordered_end(gtid); 922 // TODO: ordered_wait_id 923 #endif /* USE_ITT_BUILD */ 924 925 th = __kmp_threads[gtid]; 926 927 if (th->th.th_dispatch->th_dxo_fcn != 0) 928 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 929 else 930 __kmp_parallel_dxo(>id, &cid, loc); 931 932 #if OMPT_SUPPORT && OMPT_OPTIONAL 933 OMPT_STORE_RETURN_ADDRESS(gtid); 934 if (ompt_enabled.ompt_callback_mutex_released) { 935 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 936 ompt_mutex_ordered, 937 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 938 ->t.t_ordered.dt.t_value, 939 OMPT_LOAD_RETURN_ADDRESS(gtid)); 940 } 941 #endif 942 } 943 944 #if KMP_USE_DYNAMIC_LOCK 945 946 static __forceinline void 947 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 948 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 949 // Pointer to the allocated indirect lock is written to crit, while indexing 950 // is ignored. 951 void *idx; 952 kmp_indirect_lock_t **lck; 953 lck = (kmp_indirect_lock_t **)crit; 954 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 955 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 956 KMP_SET_I_LOCK_LOCATION(ilk, loc); 957 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 958 KA_TRACE(20, 959 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 960 #if USE_ITT_BUILD 961 __kmp_itt_critical_creating(ilk->lock, loc); 962 #endif 963 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 964 if (status == 0) { 965 #if USE_ITT_BUILD 966 __kmp_itt_critical_destroyed(ilk->lock); 967 #endif 968 // We don't really need to destroy the unclaimed lock here since it will be 969 // cleaned up at program exit. 970 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 971 } 972 KMP_DEBUG_ASSERT(*lck != NULL); 973 } 974 975 // Fast-path acquire tas lock 976 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 977 { \ 978 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 979 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 980 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 981 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 982 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 983 kmp_uint32 spins; \ 984 KMP_FSYNC_PREPARE(l); \ 985 KMP_INIT_YIELD(spins); \ 986 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 987 do { \ 988 if (TCR_4(__kmp_nth) > \ 989 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 990 KMP_YIELD(TRUE); \ 991 } else { \ 992 KMP_YIELD_SPIN(spins); \ 993 } \ 994 __kmp_spin_backoff(&backoff); \ 995 } while ( \ 996 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 997 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 998 } \ 999 KMP_FSYNC_ACQUIRED(l); \ 1000 } 1001 1002 // Fast-path test tas lock 1003 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1004 { \ 1005 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1006 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1007 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1008 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1009 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1010 } 1011 1012 // Fast-path release tas lock 1013 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1014 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1015 1016 #if KMP_USE_FUTEX 1017 1018 #include <sys/syscall.h> 1019 #include <unistd.h> 1020 #ifndef FUTEX_WAIT 1021 #define FUTEX_WAIT 0 1022 #endif 1023 #ifndef FUTEX_WAKE 1024 #define FUTEX_WAKE 1 1025 #endif 1026 1027 // Fast-path acquire futex lock 1028 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1029 { \ 1030 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1031 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1032 KMP_MB(); \ 1033 KMP_FSYNC_PREPARE(ftx); \ 1034 kmp_int32 poll_val; \ 1035 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1036 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1037 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1038 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1039 if (!cond) { \ 1040 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1041 poll_val | \ 1042 KMP_LOCK_BUSY(1, futex))) { \ 1043 continue; \ 1044 } \ 1045 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1046 } \ 1047 kmp_int32 rc; \ 1048 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1049 NULL, NULL, 0)) != 0) { \ 1050 continue; \ 1051 } \ 1052 gtid_code |= 1; \ 1053 } \ 1054 KMP_FSYNC_ACQUIRED(ftx); \ 1055 } 1056 1057 // Fast-path test futex lock 1058 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1059 { \ 1060 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1061 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1062 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1063 KMP_FSYNC_ACQUIRED(ftx); \ 1064 rc = TRUE; \ 1065 } else { \ 1066 rc = FALSE; \ 1067 } \ 1068 } 1069 1070 // Fast-path release futex lock 1071 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1072 { \ 1073 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1074 KMP_MB(); \ 1075 KMP_FSYNC_RELEASING(ftx); \ 1076 kmp_int32 poll_val = \ 1077 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1078 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1079 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1080 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1081 } \ 1082 KMP_MB(); \ 1083 KMP_YIELD_OVERSUB(); \ 1084 } 1085 1086 #endif // KMP_USE_FUTEX 1087 1088 #else // KMP_USE_DYNAMIC_LOCK 1089 1090 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1091 ident_t const *loc, 1092 kmp_int32 gtid) { 1093 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1094 1095 // Because of the double-check, the following load doesn't need to be volatile 1096 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1097 1098 if (lck == NULL) { 1099 void *idx; 1100 1101 // Allocate & initialize the lock. 1102 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1103 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1104 __kmp_init_user_lock_with_checks(lck); 1105 __kmp_set_user_lock_location(lck, loc); 1106 #if USE_ITT_BUILD 1107 __kmp_itt_critical_creating(lck); 1108 // __kmp_itt_critical_creating() should be called *before* the first usage 1109 // of underlying lock. It is the only place where we can guarantee it. There 1110 // are chances the lock will destroyed with no usage, but it is not a 1111 // problem, because this is not real event seen by user but rather setting 1112 // name for object (lock). See more details in kmp_itt.h. 1113 #endif /* USE_ITT_BUILD */ 1114 1115 // Use a cmpxchg instruction to slam the start of the critical section with 1116 // the lock pointer. If another thread beat us to it, deallocate the lock, 1117 // and use the lock that the other thread allocated. 1118 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1119 1120 if (status == 0) { 1121 // Deallocate the lock and reload the value. 1122 #if USE_ITT_BUILD 1123 __kmp_itt_critical_destroyed(lck); 1124 // Let ITT know the lock is destroyed and the same memory location may be reused 1125 // for another purpose. 1126 #endif /* USE_ITT_BUILD */ 1127 __kmp_destroy_user_lock_with_checks(lck); 1128 __kmp_user_lock_free(&idx, gtid, lck); 1129 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1130 KMP_DEBUG_ASSERT(lck != NULL); 1131 } 1132 } 1133 return lck; 1134 } 1135 1136 #endif // KMP_USE_DYNAMIC_LOCK 1137 1138 /*! 1139 @ingroup WORK_SHARING 1140 @param loc source location information. 1141 @param global_tid global thread number. 1142 @param crit identity of the critical section. This could be a pointer to a lock 1143 associated with the critical section, or some other suitably unique value. 1144 1145 Enter code protected by a `critical` construct. 1146 This function blocks until the executing thread can enter the critical section. 1147 */ 1148 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1149 kmp_critical_name *crit) { 1150 #if KMP_USE_DYNAMIC_LOCK 1151 #if OMPT_SUPPORT && OMPT_OPTIONAL 1152 OMPT_STORE_RETURN_ADDRESS(global_tid); 1153 #endif // OMPT_SUPPORT 1154 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1155 #else 1156 KMP_COUNT_BLOCK(OMP_CRITICAL); 1157 #if OMPT_SUPPORT && OMPT_OPTIONAL 1158 ompt_state_t prev_state = ompt_state_undefined; 1159 ompt_thread_info_t ti; 1160 #endif 1161 kmp_user_lock_p lck; 1162 1163 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1164 __kmp_assert_valid_gtid(global_tid); 1165 1166 // TODO: add THR_OVHD_STATE 1167 1168 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1169 KMP_CHECK_USER_LOCK_INIT(); 1170 1171 if ((__kmp_user_lock_kind == lk_tas) && 1172 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1173 lck = (kmp_user_lock_p)crit; 1174 } 1175 #if KMP_USE_FUTEX 1176 else if ((__kmp_user_lock_kind == lk_futex) && 1177 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1178 lck = (kmp_user_lock_p)crit; 1179 } 1180 #endif 1181 else { // ticket, queuing or drdpa 1182 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1183 } 1184 1185 if (__kmp_env_consistency_check) 1186 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1187 1188 // since the critical directive binds to all threads, not just the current 1189 // team we have to check this even if we are in a serialized team. 1190 // also, even if we are the uber thread, we still have to conduct the lock, 1191 // as we have to contend with sibling threads. 1192 1193 #if USE_ITT_BUILD 1194 __kmp_itt_critical_acquiring(lck); 1195 #endif /* USE_ITT_BUILD */ 1196 #if OMPT_SUPPORT && OMPT_OPTIONAL 1197 OMPT_STORE_RETURN_ADDRESS(gtid); 1198 void *codeptr_ra = NULL; 1199 if (ompt_enabled.enabled) { 1200 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1201 /* OMPT state update */ 1202 prev_state = ti.state; 1203 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1204 ti.state = ompt_state_wait_critical; 1205 1206 /* OMPT event callback */ 1207 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1208 if (ompt_enabled.ompt_callback_mutex_acquire) { 1209 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1210 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1211 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1212 } 1213 } 1214 #endif 1215 // Value of 'crit' should be good for using as a critical_id of the critical 1216 // section directive. 1217 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1218 1219 #if USE_ITT_BUILD 1220 __kmp_itt_critical_acquired(lck); 1221 #endif /* USE_ITT_BUILD */ 1222 #if OMPT_SUPPORT && OMPT_OPTIONAL 1223 if (ompt_enabled.enabled) { 1224 /* OMPT state update */ 1225 ti.state = prev_state; 1226 ti.wait_id = 0; 1227 1228 /* OMPT event callback */ 1229 if (ompt_enabled.ompt_callback_mutex_acquired) { 1230 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1231 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1232 } 1233 } 1234 #endif 1235 KMP_POP_PARTITIONED_TIMER(); 1236 1237 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1238 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1239 #endif // KMP_USE_DYNAMIC_LOCK 1240 } 1241 1242 #if KMP_USE_DYNAMIC_LOCK 1243 1244 // Converts the given hint to an internal lock implementation 1245 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1246 #if KMP_USE_TSX 1247 #define KMP_TSX_LOCK(seq) lockseq_##seq 1248 #else 1249 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1250 #endif 1251 1252 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1253 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1254 #else 1255 #define KMP_CPUINFO_RTM 0 1256 #endif 1257 1258 // Hints that do not require further logic 1259 if (hint & kmp_lock_hint_hle) 1260 return KMP_TSX_LOCK(hle); 1261 if (hint & kmp_lock_hint_rtm) 1262 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1263 if (hint & kmp_lock_hint_adaptive) 1264 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1265 1266 // Rule out conflicting hints first by returning the default lock 1267 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1268 return __kmp_user_lock_seq; 1269 if ((hint & omp_lock_hint_speculative) && 1270 (hint & omp_lock_hint_nonspeculative)) 1271 return __kmp_user_lock_seq; 1272 1273 // Do not even consider speculation when it appears to be contended 1274 if (hint & omp_lock_hint_contended) 1275 return lockseq_queuing; 1276 1277 // Uncontended lock without speculation 1278 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1279 return lockseq_tas; 1280 1281 // HLE lock for speculation 1282 if (hint & omp_lock_hint_speculative) 1283 return KMP_TSX_LOCK(hle); 1284 1285 return __kmp_user_lock_seq; 1286 } 1287 1288 #if OMPT_SUPPORT && OMPT_OPTIONAL 1289 #if KMP_USE_DYNAMIC_LOCK 1290 static kmp_mutex_impl_t 1291 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1292 if (user_lock) { 1293 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1294 case 0: 1295 break; 1296 #if KMP_USE_FUTEX 1297 case locktag_futex: 1298 return kmp_mutex_impl_queuing; 1299 #endif 1300 case locktag_tas: 1301 return kmp_mutex_impl_spin; 1302 #if KMP_USE_TSX 1303 case locktag_hle: 1304 return kmp_mutex_impl_speculative; 1305 #endif 1306 default: 1307 return kmp_mutex_impl_none; 1308 } 1309 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1310 } 1311 KMP_ASSERT(ilock); 1312 switch (ilock->type) { 1313 #if KMP_USE_TSX 1314 case locktag_adaptive: 1315 case locktag_rtm: 1316 return kmp_mutex_impl_speculative; 1317 #endif 1318 case locktag_nested_tas: 1319 return kmp_mutex_impl_spin; 1320 #if KMP_USE_FUTEX 1321 case locktag_nested_futex: 1322 #endif 1323 case locktag_ticket: 1324 case locktag_queuing: 1325 case locktag_drdpa: 1326 case locktag_nested_ticket: 1327 case locktag_nested_queuing: 1328 case locktag_nested_drdpa: 1329 return kmp_mutex_impl_queuing; 1330 default: 1331 return kmp_mutex_impl_none; 1332 } 1333 } 1334 #else 1335 // For locks without dynamic binding 1336 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1337 switch (__kmp_user_lock_kind) { 1338 case lk_tas: 1339 return kmp_mutex_impl_spin; 1340 #if KMP_USE_FUTEX 1341 case lk_futex: 1342 #endif 1343 case lk_ticket: 1344 case lk_queuing: 1345 case lk_drdpa: 1346 return kmp_mutex_impl_queuing; 1347 #if KMP_USE_TSX 1348 case lk_hle: 1349 case lk_rtm: 1350 case lk_adaptive: 1351 return kmp_mutex_impl_speculative; 1352 #endif 1353 default: 1354 return kmp_mutex_impl_none; 1355 } 1356 } 1357 #endif // KMP_USE_DYNAMIC_LOCK 1358 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1359 1360 /*! 1361 @ingroup WORK_SHARING 1362 @param loc source location information. 1363 @param global_tid global thread number. 1364 @param crit identity of the critical section. This could be a pointer to a lock 1365 associated with the critical section, or some other suitably unique value. 1366 @param hint the lock hint. 1367 1368 Enter code protected by a `critical` construct with a hint. The hint value is 1369 used to suggest a lock implementation. This function blocks until the executing 1370 thread can enter the critical section unless the hint suggests use of 1371 speculative execution and the hardware supports it. 1372 */ 1373 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1374 kmp_critical_name *crit, uint32_t hint) { 1375 KMP_COUNT_BLOCK(OMP_CRITICAL); 1376 kmp_user_lock_p lck; 1377 #if OMPT_SUPPORT && OMPT_OPTIONAL 1378 ompt_state_t prev_state = ompt_state_undefined; 1379 ompt_thread_info_t ti; 1380 // This is the case, if called from __kmpc_critical: 1381 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1382 if (!codeptr) 1383 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1384 #endif 1385 1386 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1387 __kmp_assert_valid_gtid(global_tid); 1388 1389 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1390 // Check if it is initialized. 1391 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1392 if (*lk == 0) { 1393 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1394 if (KMP_IS_D_LOCK(lckseq)) { 1395 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1396 KMP_GET_D_TAG(lckseq)); 1397 } else { 1398 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1399 } 1400 } 1401 // Branch for accessing the actual lock object and set operation. This 1402 // branching is inevitable since this lock initialization does not follow the 1403 // normal dispatch path (lock table is not used). 1404 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1405 lck = (kmp_user_lock_p)lk; 1406 if (__kmp_env_consistency_check) { 1407 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1408 __kmp_map_hint_to_lock(hint)); 1409 } 1410 #if USE_ITT_BUILD 1411 __kmp_itt_critical_acquiring(lck); 1412 #endif 1413 #if OMPT_SUPPORT && OMPT_OPTIONAL 1414 if (ompt_enabled.enabled) { 1415 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1416 /* OMPT state update */ 1417 prev_state = ti.state; 1418 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1419 ti.state = ompt_state_wait_critical; 1420 1421 /* OMPT event callback */ 1422 if (ompt_enabled.ompt_callback_mutex_acquire) { 1423 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1424 ompt_mutex_critical, (unsigned int)hint, 1425 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1426 codeptr); 1427 } 1428 } 1429 #endif 1430 #if KMP_USE_INLINED_TAS 1431 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1432 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1433 } else 1434 #elif KMP_USE_INLINED_FUTEX 1435 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1436 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1437 } else 1438 #endif 1439 { 1440 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1441 } 1442 } else { 1443 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1444 lck = ilk->lock; 1445 if (__kmp_env_consistency_check) { 1446 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1447 __kmp_map_hint_to_lock(hint)); 1448 } 1449 #if USE_ITT_BUILD 1450 __kmp_itt_critical_acquiring(lck); 1451 #endif 1452 #if OMPT_SUPPORT && OMPT_OPTIONAL 1453 if (ompt_enabled.enabled) { 1454 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1455 /* OMPT state update */ 1456 prev_state = ti.state; 1457 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1458 ti.state = ompt_state_wait_critical; 1459 1460 /* OMPT event callback */ 1461 if (ompt_enabled.ompt_callback_mutex_acquire) { 1462 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1463 ompt_mutex_critical, (unsigned int)hint, 1464 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1465 codeptr); 1466 } 1467 } 1468 #endif 1469 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1470 } 1471 KMP_POP_PARTITIONED_TIMER(); 1472 1473 #if USE_ITT_BUILD 1474 __kmp_itt_critical_acquired(lck); 1475 #endif /* USE_ITT_BUILD */ 1476 #if OMPT_SUPPORT && OMPT_OPTIONAL 1477 if (ompt_enabled.enabled) { 1478 /* OMPT state update */ 1479 ti.state = prev_state; 1480 ti.wait_id = 0; 1481 1482 /* OMPT event callback */ 1483 if (ompt_enabled.ompt_callback_mutex_acquired) { 1484 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1485 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1486 } 1487 } 1488 #endif 1489 1490 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1491 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1492 } // __kmpc_critical_with_hint 1493 1494 #endif // KMP_USE_DYNAMIC_LOCK 1495 1496 /*! 1497 @ingroup WORK_SHARING 1498 @param loc source location information. 1499 @param global_tid global thread number . 1500 @param crit identity of the critical section. This could be a pointer to a lock 1501 associated with the critical section, or some other suitably unique value. 1502 1503 Leave a critical section, releasing any lock that was held during its execution. 1504 */ 1505 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1506 kmp_critical_name *crit) { 1507 kmp_user_lock_p lck; 1508 1509 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1510 1511 #if KMP_USE_DYNAMIC_LOCK 1512 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1513 lck = (kmp_user_lock_p)crit; 1514 KMP_ASSERT(lck != NULL); 1515 if (__kmp_env_consistency_check) { 1516 __kmp_pop_sync(global_tid, ct_critical, loc); 1517 } 1518 #if USE_ITT_BUILD 1519 __kmp_itt_critical_releasing(lck); 1520 #endif 1521 #if KMP_USE_INLINED_TAS 1522 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1523 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1524 } else 1525 #elif KMP_USE_INLINED_FUTEX 1526 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1527 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1528 } else 1529 #endif 1530 { 1531 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1532 } 1533 } else { 1534 kmp_indirect_lock_t *ilk = 1535 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1536 KMP_ASSERT(ilk != NULL); 1537 lck = ilk->lock; 1538 if (__kmp_env_consistency_check) { 1539 __kmp_pop_sync(global_tid, ct_critical, loc); 1540 } 1541 #if USE_ITT_BUILD 1542 __kmp_itt_critical_releasing(lck); 1543 #endif 1544 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1545 } 1546 1547 #else // KMP_USE_DYNAMIC_LOCK 1548 1549 if ((__kmp_user_lock_kind == lk_tas) && 1550 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1551 lck = (kmp_user_lock_p)crit; 1552 } 1553 #if KMP_USE_FUTEX 1554 else if ((__kmp_user_lock_kind == lk_futex) && 1555 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1556 lck = (kmp_user_lock_p)crit; 1557 } 1558 #endif 1559 else { // ticket, queuing or drdpa 1560 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1561 } 1562 1563 KMP_ASSERT(lck != NULL); 1564 1565 if (__kmp_env_consistency_check) 1566 __kmp_pop_sync(global_tid, ct_critical, loc); 1567 1568 #if USE_ITT_BUILD 1569 __kmp_itt_critical_releasing(lck); 1570 #endif /* USE_ITT_BUILD */ 1571 // Value of 'crit' should be good for using as a critical_id of the critical 1572 // section directive. 1573 __kmp_release_user_lock_with_checks(lck, global_tid); 1574 1575 #endif // KMP_USE_DYNAMIC_LOCK 1576 1577 #if OMPT_SUPPORT && OMPT_OPTIONAL 1578 /* OMPT release event triggers after lock is released; place here to trigger 1579 * for all #if branches */ 1580 OMPT_STORE_RETURN_ADDRESS(global_tid); 1581 if (ompt_enabled.ompt_callback_mutex_released) { 1582 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1583 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1584 OMPT_LOAD_RETURN_ADDRESS(0)); 1585 } 1586 #endif 1587 1588 KMP_POP_PARTITIONED_TIMER(); 1589 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1590 } 1591 1592 /*! 1593 @ingroup SYNCHRONIZATION 1594 @param loc source location information 1595 @param global_tid thread id. 1596 @return one if the thread should execute the master block, zero otherwise 1597 1598 Start execution of a combined barrier and master. The barrier is executed inside 1599 this function. 1600 */ 1601 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1602 int status; 1603 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1604 __kmp_assert_valid_gtid(global_tid); 1605 1606 if (!TCR_4(__kmp_init_parallel)) 1607 __kmp_parallel_initialize(); 1608 1609 __kmp_resume_if_soft_paused(); 1610 1611 if (__kmp_env_consistency_check) 1612 __kmp_check_barrier(global_tid, ct_barrier, loc); 1613 1614 #if OMPT_SUPPORT 1615 ompt_frame_t *ompt_frame; 1616 if (ompt_enabled.enabled) { 1617 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1618 if (ompt_frame->enter_frame.ptr == NULL) 1619 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1620 OMPT_STORE_RETURN_ADDRESS(global_tid); 1621 } 1622 #endif 1623 #if USE_ITT_NOTIFY 1624 __kmp_threads[global_tid]->th.th_ident = loc; 1625 #endif 1626 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1627 #if OMPT_SUPPORT && OMPT_OPTIONAL 1628 if (ompt_enabled.enabled) { 1629 ompt_frame->enter_frame = ompt_data_none; 1630 } 1631 #endif 1632 1633 return (status != 0) ? 0 : 1; 1634 } 1635 1636 /*! 1637 @ingroup SYNCHRONIZATION 1638 @param loc source location information 1639 @param global_tid thread id. 1640 1641 Complete the execution of a combined barrier and master. This function should 1642 only be called at the completion of the <tt>master</tt> code. Other threads will 1643 still be waiting at the barrier and this call releases them. 1644 */ 1645 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1646 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1647 __kmp_assert_valid_gtid(global_tid); 1648 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1649 } 1650 1651 /*! 1652 @ingroup SYNCHRONIZATION 1653 @param loc source location information 1654 @param global_tid thread id. 1655 @return one if the thread should execute the master block, zero otherwise 1656 1657 Start execution of a combined barrier and master(nowait) construct. 1658 The barrier is executed inside this function. 1659 There is no equivalent "end" function, since the 1660 */ 1661 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1662 kmp_int32 ret; 1663 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1664 __kmp_assert_valid_gtid(global_tid); 1665 1666 if (!TCR_4(__kmp_init_parallel)) 1667 __kmp_parallel_initialize(); 1668 1669 __kmp_resume_if_soft_paused(); 1670 1671 if (__kmp_env_consistency_check) { 1672 if (loc == 0) { 1673 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1674 } 1675 __kmp_check_barrier(global_tid, ct_barrier, loc); 1676 } 1677 1678 #if OMPT_SUPPORT 1679 ompt_frame_t *ompt_frame; 1680 if (ompt_enabled.enabled) { 1681 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1682 if (ompt_frame->enter_frame.ptr == NULL) 1683 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1684 OMPT_STORE_RETURN_ADDRESS(global_tid); 1685 } 1686 #endif 1687 #if USE_ITT_NOTIFY 1688 __kmp_threads[global_tid]->th.th_ident = loc; 1689 #endif 1690 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1691 #if OMPT_SUPPORT && OMPT_OPTIONAL 1692 if (ompt_enabled.enabled) { 1693 ompt_frame->enter_frame = ompt_data_none; 1694 } 1695 #endif 1696 1697 ret = __kmpc_master(loc, global_tid); 1698 1699 if (__kmp_env_consistency_check) { 1700 /* there's no __kmpc_end_master called; so the (stats) */ 1701 /* actions of __kmpc_end_master are done here */ 1702 if (ret) { 1703 /* only one thread should do the pop since only */ 1704 /* one did the push (see __kmpc_master()) */ 1705 __kmp_pop_sync(global_tid, ct_master, loc); 1706 } 1707 } 1708 1709 return (ret); 1710 } 1711 1712 /* The BARRIER for a SINGLE process section is always explicit */ 1713 /*! 1714 @ingroup WORK_SHARING 1715 @param loc source location information 1716 @param global_tid global thread number 1717 @return One if this thread should execute the single construct, zero otherwise. 1718 1719 Test whether to execute a <tt>single</tt> construct. 1720 There are no implicit barriers in the two "single" calls, rather the compiler 1721 should introduce an explicit barrier if it is required. 1722 */ 1723 1724 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1725 __kmp_assert_valid_gtid(global_tid); 1726 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1727 1728 if (rc) { 1729 // We are going to execute the single statement, so we should count it. 1730 KMP_COUNT_BLOCK(OMP_SINGLE); 1731 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1732 } 1733 1734 #if OMPT_SUPPORT && OMPT_OPTIONAL 1735 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1736 kmp_team_t *team = this_thr->th.th_team; 1737 int tid = __kmp_tid_from_gtid(global_tid); 1738 1739 if (ompt_enabled.enabled) { 1740 if (rc) { 1741 if (ompt_enabled.ompt_callback_work) { 1742 ompt_callbacks.ompt_callback(ompt_callback_work)( 1743 ompt_work_single_executor, ompt_scope_begin, 1744 &(team->t.ompt_team_info.parallel_data), 1745 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1746 1, OMPT_GET_RETURN_ADDRESS(0)); 1747 } 1748 } else { 1749 if (ompt_enabled.ompt_callback_work) { 1750 ompt_callbacks.ompt_callback(ompt_callback_work)( 1751 ompt_work_single_other, ompt_scope_begin, 1752 &(team->t.ompt_team_info.parallel_data), 1753 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1754 1, OMPT_GET_RETURN_ADDRESS(0)); 1755 ompt_callbacks.ompt_callback(ompt_callback_work)( 1756 ompt_work_single_other, ompt_scope_end, 1757 &(team->t.ompt_team_info.parallel_data), 1758 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1759 1, OMPT_GET_RETURN_ADDRESS(0)); 1760 } 1761 } 1762 } 1763 #endif 1764 1765 return rc; 1766 } 1767 1768 /*! 1769 @ingroup WORK_SHARING 1770 @param loc source location information 1771 @param global_tid global thread number 1772 1773 Mark the end of a <tt>single</tt> construct. This function should 1774 only be called by the thread that executed the block of code protected 1775 by the `single` construct. 1776 */ 1777 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1778 __kmp_assert_valid_gtid(global_tid); 1779 __kmp_exit_single(global_tid); 1780 KMP_POP_PARTITIONED_TIMER(); 1781 1782 #if OMPT_SUPPORT && OMPT_OPTIONAL 1783 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1784 kmp_team_t *team = this_thr->th.th_team; 1785 int tid = __kmp_tid_from_gtid(global_tid); 1786 1787 if (ompt_enabled.ompt_callback_work) { 1788 ompt_callbacks.ompt_callback(ompt_callback_work)( 1789 ompt_work_single_executor, ompt_scope_end, 1790 &(team->t.ompt_team_info.parallel_data), 1791 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1792 OMPT_GET_RETURN_ADDRESS(0)); 1793 } 1794 #endif 1795 } 1796 1797 /*! 1798 @ingroup WORK_SHARING 1799 @param loc Source location 1800 @param global_tid Global thread id 1801 1802 Mark the end of a statically scheduled loop. 1803 */ 1804 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1805 KMP_POP_PARTITIONED_TIMER(); 1806 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1807 1808 #if OMPT_SUPPORT && OMPT_OPTIONAL 1809 if (ompt_enabled.ompt_callback_work) { 1810 ompt_work_t ompt_work_type = ompt_work_loop; 1811 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1812 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1813 // Determine workshare type 1814 if (loc != NULL) { 1815 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1816 ompt_work_type = ompt_work_loop; 1817 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1818 ompt_work_type = ompt_work_sections; 1819 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1820 ompt_work_type = ompt_work_distribute; 1821 } else { 1822 // use default set above. 1823 // a warning about this case is provided in __kmpc_for_static_init 1824 } 1825 KMP_DEBUG_ASSERT(ompt_work_type); 1826 } 1827 ompt_callbacks.ompt_callback(ompt_callback_work)( 1828 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1829 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1830 } 1831 #endif 1832 if (__kmp_env_consistency_check) 1833 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1834 } 1835 1836 // User routines which take C-style arguments (call by value) 1837 // different from the Fortran equivalent routines 1838 1839 void ompc_set_num_threads(int arg) { 1840 // !!!!! TODO: check the per-task binding 1841 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1842 } 1843 1844 void ompc_set_dynamic(int flag) { 1845 kmp_info_t *thread; 1846 1847 /* For the thread-private implementation of the internal controls */ 1848 thread = __kmp_entry_thread(); 1849 1850 __kmp_save_internal_controls(thread); 1851 1852 set__dynamic(thread, flag ? TRUE : FALSE); 1853 } 1854 1855 void ompc_set_nested(int flag) { 1856 kmp_info_t *thread; 1857 1858 /* For the thread-private internal controls implementation */ 1859 thread = __kmp_entry_thread(); 1860 1861 __kmp_save_internal_controls(thread); 1862 1863 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1864 } 1865 1866 void ompc_set_max_active_levels(int max_active_levels) { 1867 /* TO DO */ 1868 /* we want per-task implementation of this internal control */ 1869 1870 /* For the per-thread internal controls implementation */ 1871 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1872 } 1873 1874 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1875 // !!!!! TODO: check the per-task binding 1876 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1877 } 1878 1879 int ompc_get_ancestor_thread_num(int level) { 1880 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1881 } 1882 1883 int ompc_get_team_size(int level) { 1884 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1885 } 1886 1887 /* OpenMP 5.0 Affinity Format API */ 1888 1889 void ompc_set_affinity_format(char const *format) { 1890 if (!__kmp_init_serial) { 1891 __kmp_serial_initialize(); 1892 } 1893 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1894 format, KMP_STRLEN(format) + 1); 1895 } 1896 1897 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1898 size_t format_size; 1899 if (!__kmp_init_serial) { 1900 __kmp_serial_initialize(); 1901 } 1902 format_size = KMP_STRLEN(__kmp_affinity_format); 1903 if (buffer && size) { 1904 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1905 format_size + 1); 1906 } 1907 return format_size; 1908 } 1909 1910 void ompc_display_affinity(char const *format) { 1911 int gtid; 1912 if (!TCR_4(__kmp_init_middle)) { 1913 __kmp_middle_initialize(); 1914 } 1915 gtid = __kmp_get_gtid(); 1916 __kmp_aux_display_affinity(gtid, format); 1917 } 1918 1919 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1920 char const *format) { 1921 int gtid; 1922 size_t num_required; 1923 kmp_str_buf_t capture_buf; 1924 if (!TCR_4(__kmp_init_middle)) { 1925 __kmp_middle_initialize(); 1926 } 1927 gtid = __kmp_get_gtid(); 1928 __kmp_str_buf_init(&capture_buf); 1929 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1930 if (buffer && buf_size) { 1931 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1932 capture_buf.used + 1); 1933 } 1934 __kmp_str_buf_free(&capture_buf); 1935 return num_required; 1936 } 1937 1938 void kmpc_set_stacksize(int arg) { 1939 // __kmp_aux_set_stacksize initializes the library if needed 1940 __kmp_aux_set_stacksize(arg); 1941 } 1942 1943 void kmpc_set_stacksize_s(size_t arg) { 1944 // __kmp_aux_set_stacksize initializes the library if needed 1945 __kmp_aux_set_stacksize(arg); 1946 } 1947 1948 void kmpc_set_blocktime(int arg) { 1949 int gtid, tid; 1950 kmp_info_t *thread; 1951 1952 gtid = __kmp_entry_gtid(); 1953 tid = __kmp_tid_from_gtid(gtid); 1954 thread = __kmp_thread_from_gtid(gtid); 1955 1956 __kmp_aux_set_blocktime(arg, thread, tid); 1957 } 1958 1959 void kmpc_set_library(int arg) { 1960 // __kmp_user_set_library initializes the library if needed 1961 __kmp_user_set_library((enum library_type)arg); 1962 } 1963 1964 void kmpc_set_defaults(char const *str) { 1965 // __kmp_aux_set_defaults initializes the library if needed 1966 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1967 } 1968 1969 void kmpc_set_disp_num_buffers(int arg) { 1970 // ignore after initialization because some teams have already 1971 // allocated dispatch buffers 1972 if (__kmp_init_serial == 0 && arg > 0) 1973 __kmp_dispatch_num_buffers = arg; 1974 } 1975 1976 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1977 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1978 return -1; 1979 #else 1980 if (!TCR_4(__kmp_init_middle)) { 1981 __kmp_middle_initialize(); 1982 } 1983 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1984 #endif 1985 } 1986 1987 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1989 return -1; 1990 #else 1991 if (!TCR_4(__kmp_init_middle)) { 1992 __kmp_middle_initialize(); 1993 } 1994 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 1995 #endif 1996 } 1997 1998 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 1999 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2000 return -1; 2001 #else 2002 if (!TCR_4(__kmp_init_middle)) { 2003 __kmp_middle_initialize(); 2004 } 2005 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2006 #endif 2007 } 2008 2009 /* -------------------------------------------------------------------------- */ 2010 /*! 2011 @ingroup THREADPRIVATE 2012 @param loc source location information 2013 @param gtid global thread number 2014 @param cpy_size size of the cpy_data buffer 2015 @param cpy_data pointer to data to be copied 2016 @param cpy_func helper function to call for copying data 2017 @param didit flag variable: 1=single thread; 0=not single thread 2018 2019 __kmpc_copyprivate implements the interface for the private data broadcast 2020 needed for the copyprivate clause associated with a single region in an 2021 OpenMP<sup>*</sup> program (both C and Fortran). 2022 All threads participating in the parallel region call this routine. 2023 One of the threads (called the single thread) should have the <tt>didit</tt> 2024 variable set to 1 and all other threads should have that variable set to 0. 2025 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2026 2027 The OpenMP specification forbids the use of nowait on the single region when a 2028 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2029 barrier internally to avoid race conditions, so the code generation for the 2030 single region should avoid generating a barrier after the call to @ref 2031 __kmpc_copyprivate. 2032 2033 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2034 The <tt>loc</tt> parameter is a pointer to source location information. 2035 2036 Internal implementation: The single thread will first copy its descriptor 2037 address (cpy_data) to a team-private location, then the other threads will each 2038 call the function pointed to by the parameter cpy_func, which carries out the 2039 copy by copying the data using the cpy_data buffer. 2040 2041 The cpy_func routine used for the copy and the contents of the data area defined 2042 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2043 to be done. For instance, the cpy_data buffer can hold the actual data to be 2044 copied or it may hold a list of pointers to the data. The cpy_func routine must 2045 interpret the cpy_data buffer appropriately. 2046 2047 The interface to cpy_func is as follows: 2048 @code 2049 void cpy_func( void *destination, void *source ) 2050 @endcode 2051 where void *destination is the cpy_data pointer for the thread being copied to 2052 and void *source is the cpy_data pointer for the thread being copied from. 2053 */ 2054 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2055 void *cpy_data, void (*cpy_func)(void *, void *), 2056 kmp_int32 didit) { 2057 void **data_ptr; 2058 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2059 __kmp_assert_valid_gtid(gtid); 2060 2061 KMP_MB(); 2062 2063 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2064 2065 if (__kmp_env_consistency_check) { 2066 if (loc == 0) { 2067 KMP_WARNING(ConstructIdentInvalid); 2068 } 2069 } 2070 2071 // ToDo: Optimize the following two barriers into some kind of split barrier 2072 2073 if (didit) 2074 *data_ptr = cpy_data; 2075 2076 #if OMPT_SUPPORT 2077 ompt_frame_t *ompt_frame; 2078 if (ompt_enabled.enabled) { 2079 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2080 if (ompt_frame->enter_frame.ptr == NULL) 2081 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2082 OMPT_STORE_RETURN_ADDRESS(gtid); 2083 } 2084 #endif 2085 /* This barrier is not a barrier region boundary */ 2086 #if USE_ITT_NOTIFY 2087 __kmp_threads[gtid]->th.th_ident = loc; 2088 #endif 2089 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2090 2091 if (!didit) 2092 (*cpy_func)(cpy_data, *data_ptr); 2093 2094 // Consider next barrier a user-visible barrier for barrier region boundaries 2095 // Nesting checks are already handled by the single construct checks 2096 2097 #if OMPT_SUPPORT 2098 if (ompt_enabled.enabled) { 2099 OMPT_STORE_RETURN_ADDRESS(gtid); 2100 } 2101 #endif 2102 #if USE_ITT_NOTIFY 2103 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2104 // tasks can overwrite the location) 2105 #endif 2106 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2107 #if OMPT_SUPPORT && OMPT_OPTIONAL 2108 if (ompt_enabled.enabled) { 2109 ompt_frame->enter_frame = ompt_data_none; 2110 } 2111 #endif 2112 } 2113 2114 /* -------------------------------------------------------------------------- */ 2115 2116 #define INIT_LOCK __kmp_init_user_lock_with_checks 2117 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2118 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2119 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2120 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2121 #define ACQUIRE_NESTED_LOCK_TIMED \ 2122 __kmp_acquire_nested_user_lock_with_checks_timed 2123 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2124 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2125 #define TEST_LOCK __kmp_test_user_lock_with_checks 2126 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2127 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2128 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2129 2130 // TODO: Make check abort messages use location info & pass it into 2131 // with_checks routines 2132 2133 #if KMP_USE_DYNAMIC_LOCK 2134 2135 // internal lock initializer 2136 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2137 kmp_dyna_lockseq_t seq) { 2138 if (KMP_IS_D_LOCK(seq)) { 2139 KMP_INIT_D_LOCK(lock, seq); 2140 #if USE_ITT_BUILD 2141 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2142 #endif 2143 } else { 2144 KMP_INIT_I_LOCK(lock, seq); 2145 #if USE_ITT_BUILD 2146 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2147 __kmp_itt_lock_creating(ilk->lock, loc); 2148 #endif 2149 } 2150 } 2151 2152 // internal nest lock initializer 2153 static __forceinline void 2154 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2155 kmp_dyna_lockseq_t seq) { 2156 #if KMP_USE_TSX 2157 // Don't have nested lock implementation for speculative locks 2158 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2159 seq = __kmp_user_lock_seq; 2160 #endif 2161 switch (seq) { 2162 case lockseq_tas: 2163 seq = lockseq_nested_tas; 2164 break; 2165 #if KMP_USE_FUTEX 2166 case lockseq_futex: 2167 seq = lockseq_nested_futex; 2168 break; 2169 #endif 2170 case lockseq_ticket: 2171 seq = lockseq_nested_ticket; 2172 break; 2173 case lockseq_queuing: 2174 seq = lockseq_nested_queuing; 2175 break; 2176 case lockseq_drdpa: 2177 seq = lockseq_nested_drdpa; 2178 break; 2179 default: 2180 seq = lockseq_nested_queuing; 2181 } 2182 KMP_INIT_I_LOCK(lock, seq); 2183 #if USE_ITT_BUILD 2184 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2185 __kmp_itt_lock_creating(ilk->lock, loc); 2186 #endif 2187 } 2188 2189 /* initialize the lock with a hint */ 2190 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2191 uintptr_t hint) { 2192 KMP_DEBUG_ASSERT(__kmp_init_serial); 2193 if (__kmp_env_consistency_check && user_lock == NULL) { 2194 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2195 } 2196 2197 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2198 2199 #if OMPT_SUPPORT && OMPT_OPTIONAL 2200 // This is the case, if called from omp_init_lock_with_hint: 2201 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2202 if (!codeptr) 2203 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2204 if (ompt_enabled.ompt_callback_lock_init) { 2205 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2206 ompt_mutex_lock, (omp_lock_hint_t)hint, 2207 __ompt_get_mutex_impl_type(user_lock), 2208 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2209 } 2210 #endif 2211 } 2212 2213 /* initialize the lock with a hint */ 2214 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2215 void **user_lock, uintptr_t hint) { 2216 KMP_DEBUG_ASSERT(__kmp_init_serial); 2217 if (__kmp_env_consistency_check && user_lock == NULL) { 2218 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2219 } 2220 2221 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2222 2223 #if OMPT_SUPPORT && OMPT_OPTIONAL 2224 // This is the case, if called from omp_init_lock_with_hint: 2225 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2226 if (!codeptr) 2227 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2228 if (ompt_enabled.ompt_callback_lock_init) { 2229 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2230 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2231 __ompt_get_mutex_impl_type(user_lock), 2232 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2233 } 2234 #endif 2235 } 2236 2237 #endif // KMP_USE_DYNAMIC_LOCK 2238 2239 /* initialize the lock */ 2240 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2241 #if KMP_USE_DYNAMIC_LOCK 2242 2243 KMP_DEBUG_ASSERT(__kmp_init_serial); 2244 if (__kmp_env_consistency_check && user_lock == NULL) { 2245 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2246 } 2247 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2248 2249 #if OMPT_SUPPORT && OMPT_OPTIONAL 2250 // This is the case, if called from omp_init_lock_with_hint: 2251 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2252 if (!codeptr) 2253 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2254 if (ompt_enabled.ompt_callback_lock_init) { 2255 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2256 ompt_mutex_lock, omp_lock_hint_none, 2257 __ompt_get_mutex_impl_type(user_lock), 2258 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2259 } 2260 #endif 2261 2262 #else // KMP_USE_DYNAMIC_LOCK 2263 2264 static char const *const func = "omp_init_lock"; 2265 kmp_user_lock_p lck; 2266 KMP_DEBUG_ASSERT(__kmp_init_serial); 2267 2268 if (__kmp_env_consistency_check) { 2269 if (user_lock == NULL) { 2270 KMP_FATAL(LockIsUninitialized, func); 2271 } 2272 } 2273 2274 KMP_CHECK_USER_LOCK_INIT(); 2275 2276 if ((__kmp_user_lock_kind == lk_tas) && 2277 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2278 lck = (kmp_user_lock_p)user_lock; 2279 } 2280 #if KMP_USE_FUTEX 2281 else if ((__kmp_user_lock_kind == lk_futex) && 2282 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2283 lck = (kmp_user_lock_p)user_lock; 2284 } 2285 #endif 2286 else { 2287 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2288 } 2289 INIT_LOCK(lck); 2290 __kmp_set_user_lock_location(lck, loc); 2291 2292 #if OMPT_SUPPORT && OMPT_OPTIONAL 2293 // This is the case, if called from omp_init_lock_with_hint: 2294 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2295 if (!codeptr) 2296 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2297 if (ompt_enabled.ompt_callback_lock_init) { 2298 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2299 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2300 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2301 } 2302 #endif 2303 2304 #if USE_ITT_BUILD 2305 __kmp_itt_lock_creating(lck); 2306 #endif /* USE_ITT_BUILD */ 2307 2308 #endif // KMP_USE_DYNAMIC_LOCK 2309 } // __kmpc_init_lock 2310 2311 /* initialize the lock */ 2312 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2313 #if KMP_USE_DYNAMIC_LOCK 2314 2315 KMP_DEBUG_ASSERT(__kmp_init_serial); 2316 if (__kmp_env_consistency_check && user_lock == NULL) { 2317 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2318 } 2319 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2320 2321 #if OMPT_SUPPORT && OMPT_OPTIONAL 2322 // This is the case, if called from omp_init_lock_with_hint: 2323 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2324 if (!codeptr) 2325 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2326 if (ompt_enabled.ompt_callback_lock_init) { 2327 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2328 ompt_mutex_nest_lock, omp_lock_hint_none, 2329 __ompt_get_mutex_impl_type(user_lock), 2330 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2331 } 2332 #endif 2333 2334 #else // KMP_USE_DYNAMIC_LOCK 2335 2336 static char const *const func = "omp_init_nest_lock"; 2337 kmp_user_lock_p lck; 2338 KMP_DEBUG_ASSERT(__kmp_init_serial); 2339 2340 if (__kmp_env_consistency_check) { 2341 if (user_lock == NULL) { 2342 KMP_FATAL(LockIsUninitialized, func); 2343 } 2344 } 2345 2346 KMP_CHECK_USER_LOCK_INIT(); 2347 2348 if ((__kmp_user_lock_kind == lk_tas) && 2349 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2350 OMP_NEST_LOCK_T_SIZE)) { 2351 lck = (kmp_user_lock_p)user_lock; 2352 } 2353 #if KMP_USE_FUTEX 2354 else if ((__kmp_user_lock_kind == lk_futex) && 2355 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2356 OMP_NEST_LOCK_T_SIZE)) { 2357 lck = (kmp_user_lock_p)user_lock; 2358 } 2359 #endif 2360 else { 2361 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2362 } 2363 2364 INIT_NESTED_LOCK(lck); 2365 __kmp_set_user_lock_location(lck, loc); 2366 2367 #if OMPT_SUPPORT && OMPT_OPTIONAL 2368 // This is the case, if called from omp_init_lock_with_hint: 2369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2370 if (!codeptr) 2371 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2372 if (ompt_enabled.ompt_callback_lock_init) { 2373 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2374 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2375 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2376 } 2377 #endif 2378 2379 #if USE_ITT_BUILD 2380 __kmp_itt_lock_creating(lck); 2381 #endif /* USE_ITT_BUILD */ 2382 2383 #endif // KMP_USE_DYNAMIC_LOCK 2384 } // __kmpc_init_nest_lock 2385 2386 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2387 #if KMP_USE_DYNAMIC_LOCK 2388 2389 #if USE_ITT_BUILD 2390 kmp_user_lock_p lck; 2391 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2392 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2393 } else { 2394 lck = (kmp_user_lock_p)user_lock; 2395 } 2396 __kmp_itt_lock_destroyed(lck); 2397 #endif 2398 #if OMPT_SUPPORT && OMPT_OPTIONAL 2399 // This is the case, if called from omp_init_lock_with_hint: 2400 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2401 if (!codeptr) 2402 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2403 if (ompt_enabled.ompt_callback_lock_destroy) { 2404 kmp_user_lock_p lck; 2405 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2406 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2407 } else { 2408 lck = (kmp_user_lock_p)user_lock; 2409 } 2410 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2411 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2412 } 2413 #endif 2414 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2415 #else 2416 kmp_user_lock_p lck; 2417 2418 if ((__kmp_user_lock_kind == lk_tas) && 2419 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2420 lck = (kmp_user_lock_p)user_lock; 2421 } 2422 #if KMP_USE_FUTEX 2423 else if ((__kmp_user_lock_kind == lk_futex) && 2424 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2425 lck = (kmp_user_lock_p)user_lock; 2426 } 2427 #endif 2428 else { 2429 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2430 } 2431 2432 #if OMPT_SUPPORT && OMPT_OPTIONAL 2433 // This is the case, if called from omp_init_lock_with_hint: 2434 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2435 if (!codeptr) 2436 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2437 if (ompt_enabled.ompt_callback_lock_destroy) { 2438 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2439 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2440 } 2441 #endif 2442 2443 #if USE_ITT_BUILD 2444 __kmp_itt_lock_destroyed(lck); 2445 #endif /* USE_ITT_BUILD */ 2446 DESTROY_LOCK(lck); 2447 2448 if ((__kmp_user_lock_kind == lk_tas) && 2449 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2450 ; 2451 } 2452 #if KMP_USE_FUTEX 2453 else if ((__kmp_user_lock_kind == lk_futex) && 2454 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2455 ; 2456 } 2457 #endif 2458 else { 2459 __kmp_user_lock_free(user_lock, gtid, lck); 2460 } 2461 #endif // KMP_USE_DYNAMIC_LOCK 2462 } // __kmpc_destroy_lock 2463 2464 /* destroy the lock */ 2465 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2466 #if KMP_USE_DYNAMIC_LOCK 2467 2468 #if USE_ITT_BUILD 2469 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2470 __kmp_itt_lock_destroyed(ilk->lock); 2471 #endif 2472 #if OMPT_SUPPORT && OMPT_OPTIONAL 2473 // This is the case, if called from omp_init_lock_with_hint: 2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2475 if (!codeptr) 2476 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2477 if (ompt_enabled.ompt_callback_lock_destroy) { 2478 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2479 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2480 } 2481 #endif 2482 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2483 2484 #else // KMP_USE_DYNAMIC_LOCK 2485 2486 kmp_user_lock_p lck; 2487 2488 if ((__kmp_user_lock_kind == lk_tas) && 2489 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2490 OMP_NEST_LOCK_T_SIZE)) { 2491 lck = (kmp_user_lock_p)user_lock; 2492 } 2493 #if KMP_USE_FUTEX 2494 else if ((__kmp_user_lock_kind == lk_futex) && 2495 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2496 OMP_NEST_LOCK_T_SIZE)) { 2497 lck = (kmp_user_lock_p)user_lock; 2498 } 2499 #endif 2500 else { 2501 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2502 } 2503 2504 #if OMPT_SUPPORT && OMPT_OPTIONAL 2505 // This is the case, if called from omp_init_lock_with_hint: 2506 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2507 if (!codeptr) 2508 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2509 if (ompt_enabled.ompt_callback_lock_destroy) { 2510 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2511 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2512 } 2513 #endif 2514 2515 #if USE_ITT_BUILD 2516 __kmp_itt_lock_destroyed(lck); 2517 #endif /* USE_ITT_BUILD */ 2518 2519 DESTROY_NESTED_LOCK(lck); 2520 2521 if ((__kmp_user_lock_kind == lk_tas) && 2522 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2523 OMP_NEST_LOCK_T_SIZE)) { 2524 ; 2525 } 2526 #if KMP_USE_FUTEX 2527 else if ((__kmp_user_lock_kind == lk_futex) && 2528 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2529 OMP_NEST_LOCK_T_SIZE)) { 2530 ; 2531 } 2532 #endif 2533 else { 2534 __kmp_user_lock_free(user_lock, gtid, lck); 2535 } 2536 #endif // KMP_USE_DYNAMIC_LOCK 2537 } // __kmpc_destroy_nest_lock 2538 2539 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2540 KMP_COUNT_BLOCK(OMP_set_lock); 2541 #if KMP_USE_DYNAMIC_LOCK 2542 int tag = KMP_EXTRACT_D_TAG(user_lock); 2543 #if USE_ITT_BUILD 2544 __kmp_itt_lock_acquiring( 2545 (kmp_user_lock_p) 2546 user_lock); // itt function will get to the right lock object. 2547 #endif 2548 #if OMPT_SUPPORT && OMPT_OPTIONAL 2549 // This is the case, if called from omp_init_lock_with_hint: 2550 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2551 if (!codeptr) 2552 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2553 if (ompt_enabled.ompt_callback_mutex_acquire) { 2554 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2555 ompt_mutex_lock, omp_lock_hint_none, 2556 __ompt_get_mutex_impl_type(user_lock), 2557 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2558 } 2559 #endif 2560 #if KMP_USE_INLINED_TAS 2561 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2562 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2563 } else 2564 #elif KMP_USE_INLINED_FUTEX 2565 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2566 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2567 } else 2568 #endif 2569 { 2570 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2571 } 2572 #if USE_ITT_BUILD 2573 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2574 #endif 2575 #if OMPT_SUPPORT && OMPT_OPTIONAL 2576 if (ompt_enabled.ompt_callback_mutex_acquired) { 2577 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2578 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2579 } 2580 #endif 2581 2582 #else // KMP_USE_DYNAMIC_LOCK 2583 2584 kmp_user_lock_p lck; 2585 2586 if ((__kmp_user_lock_kind == lk_tas) && 2587 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2588 lck = (kmp_user_lock_p)user_lock; 2589 } 2590 #if KMP_USE_FUTEX 2591 else if ((__kmp_user_lock_kind == lk_futex) && 2592 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2593 lck = (kmp_user_lock_p)user_lock; 2594 } 2595 #endif 2596 else { 2597 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2598 } 2599 2600 #if USE_ITT_BUILD 2601 __kmp_itt_lock_acquiring(lck); 2602 #endif /* USE_ITT_BUILD */ 2603 #if OMPT_SUPPORT && OMPT_OPTIONAL 2604 // This is the case, if called from omp_init_lock_with_hint: 2605 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2606 if (!codeptr) 2607 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2608 if (ompt_enabled.ompt_callback_mutex_acquire) { 2609 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2610 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2611 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2612 } 2613 #endif 2614 2615 ACQUIRE_LOCK(lck, gtid); 2616 2617 #if USE_ITT_BUILD 2618 __kmp_itt_lock_acquired(lck); 2619 #endif /* USE_ITT_BUILD */ 2620 2621 #if OMPT_SUPPORT && OMPT_OPTIONAL 2622 if (ompt_enabled.ompt_callback_mutex_acquired) { 2623 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2624 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2625 } 2626 #endif 2627 2628 #endif // KMP_USE_DYNAMIC_LOCK 2629 } 2630 2631 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2632 #if KMP_USE_DYNAMIC_LOCK 2633 2634 #if USE_ITT_BUILD 2635 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2636 #endif 2637 #if OMPT_SUPPORT && OMPT_OPTIONAL 2638 // This is the case, if called from omp_init_lock_with_hint: 2639 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2640 if (!codeptr) 2641 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2642 if (ompt_enabled.enabled) { 2643 if (ompt_enabled.ompt_callback_mutex_acquire) { 2644 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2645 ompt_mutex_nest_lock, omp_lock_hint_none, 2646 __ompt_get_mutex_impl_type(user_lock), 2647 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2648 } 2649 } 2650 #endif 2651 int acquire_status = 2652 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2653 (void) acquire_status; 2654 #if USE_ITT_BUILD 2655 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2656 #endif 2657 2658 #if OMPT_SUPPORT && OMPT_OPTIONAL 2659 if (ompt_enabled.enabled) { 2660 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2661 if (ompt_enabled.ompt_callback_mutex_acquired) { 2662 // lock_first 2663 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2664 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2665 codeptr); 2666 } 2667 } else { 2668 if (ompt_enabled.ompt_callback_nest_lock) { 2669 // lock_next 2670 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2671 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2672 } 2673 } 2674 } 2675 #endif 2676 2677 #else // KMP_USE_DYNAMIC_LOCK 2678 int acquire_status; 2679 kmp_user_lock_p lck; 2680 2681 if ((__kmp_user_lock_kind == lk_tas) && 2682 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2683 OMP_NEST_LOCK_T_SIZE)) { 2684 lck = (kmp_user_lock_p)user_lock; 2685 } 2686 #if KMP_USE_FUTEX 2687 else if ((__kmp_user_lock_kind == lk_futex) && 2688 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2689 OMP_NEST_LOCK_T_SIZE)) { 2690 lck = (kmp_user_lock_p)user_lock; 2691 } 2692 #endif 2693 else { 2694 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2695 } 2696 2697 #if USE_ITT_BUILD 2698 __kmp_itt_lock_acquiring(lck); 2699 #endif /* USE_ITT_BUILD */ 2700 #if OMPT_SUPPORT && OMPT_OPTIONAL 2701 // This is the case, if called from omp_init_lock_with_hint: 2702 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2703 if (!codeptr) 2704 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2705 if (ompt_enabled.enabled) { 2706 if (ompt_enabled.ompt_callback_mutex_acquire) { 2707 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2708 ompt_mutex_nest_lock, omp_lock_hint_none, 2709 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2710 codeptr); 2711 } 2712 } 2713 #endif 2714 2715 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2716 2717 #if USE_ITT_BUILD 2718 __kmp_itt_lock_acquired(lck); 2719 #endif /* USE_ITT_BUILD */ 2720 2721 #if OMPT_SUPPORT && OMPT_OPTIONAL 2722 if (ompt_enabled.enabled) { 2723 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2724 if (ompt_enabled.ompt_callback_mutex_acquired) { 2725 // lock_first 2726 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2727 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2728 } 2729 } else { 2730 if (ompt_enabled.ompt_callback_nest_lock) { 2731 // lock_next 2732 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2733 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2734 } 2735 } 2736 } 2737 #endif 2738 2739 #endif // KMP_USE_DYNAMIC_LOCK 2740 } 2741 2742 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2743 #if KMP_USE_DYNAMIC_LOCK 2744 2745 int tag = KMP_EXTRACT_D_TAG(user_lock); 2746 #if USE_ITT_BUILD 2747 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2748 #endif 2749 #if KMP_USE_INLINED_TAS 2750 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2751 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2752 } else 2753 #elif KMP_USE_INLINED_FUTEX 2754 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2755 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2756 } else 2757 #endif 2758 { 2759 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2760 } 2761 2762 #if OMPT_SUPPORT && OMPT_OPTIONAL 2763 // This is the case, if called from omp_init_lock_with_hint: 2764 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2765 if (!codeptr) 2766 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2767 if (ompt_enabled.ompt_callback_mutex_released) { 2768 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2769 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2770 } 2771 #endif 2772 2773 #else // KMP_USE_DYNAMIC_LOCK 2774 2775 kmp_user_lock_p lck; 2776 2777 /* Can't use serial interval since not block structured */ 2778 /* release the lock */ 2779 2780 if ((__kmp_user_lock_kind == lk_tas) && 2781 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2782 #if KMP_OS_LINUX && \ 2783 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2784 // "fast" path implemented to fix customer performance issue 2785 #if USE_ITT_BUILD 2786 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2787 #endif /* USE_ITT_BUILD */ 2788 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2789 KMP_MB(); 2790 2791 #if OMPT_SUPPORT && OMPT_OPTIONAL 2792 // This is the case, if called from omp_init_lock_with_hint: 2793 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2794 if (!codeptr) 2795 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2796 if (ompt_enabled.ompt_callback_mutex_released) { 2797 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2798 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2799 } 2800 #endif 2801 2802 return; 2803 #else 2804 lck = (kmp_user_lock_p)user_lock; 2805 #endif 2806 } 2807 #if KMP_USE_FUTEX 2808 else if ((__kmp_user_lock_kind == lk_futex) && 2809 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2810 lck = (kmp_user_lock_p)user_lock; 2811 } 2812 #endif 2813 else { 2814 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2815 } 2816 2817 #if USE_ITT_BUILD 2818 __kmp_itt_lock_releasing(lck); 2819 #endif /* USE_ITT_BUILD */ 2820 2821 RELEASE_LOCK(lck, gtid); 2822 2823 #if OMPT_SUPPORT && OMPT_OPTIONAL 2824 // This is the case, if called from omp_init_lock_with_hint: 2825 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2826 if (!codeptr) 2827 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2828 if (ompt_enabled.ompt_callback_mutex_released) { 2829 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2830 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2831 } 2832 #endif 2833 2834 #endif // KMP_USE_DYNAMIC_LOCK 2835 } 2836 2837 /* release the lock */ 2838 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2839 #if KMP_USE_DYNAMIC_LOCK 2840 2841 #if USE_ITT_BUILD 2842 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2843 #endif 2844 int release_status = 2845 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2846 (void) release_status; 2847 2848 #if OMPT_SUPPORT && OMPT_OPTIONAL 2849 // This is the case, if called from omp_init_lock_with_hint: 2850 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2851 if (!codeptr) 2852 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2853 if (ompt_enabled.enabled) { 2854 if (release_status == KMP_LOCK_RELEASED) { 2855 if (ompt_enabled.ompt_callback_mutex_released) { 2856 // release_lock_last 2857 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2858 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2859 codeptr); 2860 } 2861 } else if (ompt_enabled.ompt_callback_nest_lock) { 2862 // release_lock_prev 2863 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2864 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2865 } 2866 } 2867 #endif 2868 2869 #else // KMP_USE_DYNAMIC_LOCK 2870 2871 kmp_user_lock_p lck; 2872 2873 /* Can't use serial interval since not block structured */ 2874 2875 if ((__kmp_user_lock_kind == lk_tas) && 2876 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2877 OMP_NEST_LOCK_T_SIZE)) { 2878 #if KMP_OS_LINUX && \ 2879 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2880 // "fast" path implemented to fix customer performance issue 2881 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2882 #if USE_ITT_BUILD 2883 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2884 #endif /* USE_ITT_BUILD */ 2885 2886 #if OMPT_SUPPORT && OMPT_OPTIONAL 2887 int release_status = KMP_LOCK_STILL_HELD; 2888 #endif 2889 2890 if (--(tl->lk.depth_locked) == 0) { 2891 TCW_4(tl->lk.poll, 0); 2892 #if OMPT_SUPPORT && OMPT_OPTIONAL 2893 release_status = KMP_LOCK_RELEASED; 2894 #endif 2895 } 2896 KMP_MB(); 2897 2898 #if OMPT_SUPPORT && OMPT_OPTIONAL 2899 // This is the case, if called from omp_init_lock_with_hint: 2900 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2901 if (!codeptr) 2902 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2903 if (ompt_enabled.enabled) { 2904 if (release_status == KMP_LOCK_RELEASED) { 2905 if (ompt_enabled.ompt_callback_mutex_released) { 2906 // release_lock_last 2907 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2908 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2909 } 2910 } else if (ompt_enabled.ompt_callback_nest_lock) { 2911 // release_lock_previous 2912 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2913 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2914 } 2915 } 2916 #endif 2917 2918 return; 2919 #else 2920 lck = (kmp_user_lock_p)user_lock; 2921 #endif 2922 } 2923 #if KMP_USE_FUTEX 2924 else if ((__kmp_user_lock_kind == lk_futex) && 2925 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2926 OMP_NEST_LOCK_T_SIZE)) { 2927 lck = (kmp_user_lock_p)user_lock; 2928 } 2929 #endif 2930 else { 2931 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2932 } 2933 2934 #if USE_ITT_BUILD 2935 __kmp_itt_lock_releasing(lck); 2936 #endif /* USE_ITT_BUILD */ 2937 2938 int release_status; 2939 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2940 #if OMPT_SUPPORT && OMPT_OPTIONAL 2941 // This is the case, if called from omp_init_lock_with_hint: 2942 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2943 if (!codeptr) 2944 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2945 if (ompt_enabled.enabled) { 2946 if (release_status == KMP_LOCK_RELEASED) { 2947 if (ompt_enabled.ompt_callback_mutex_released) { 2948 // release_lock_last 2949 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2950 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2951 } 2952 } else if (ompt_enabled.ompt_callback_nest_lock) { 2953 // release_lock_previous 2954 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2955 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2956 } 2957 } 2958 #endif 2959 2960 #endif // KMP_USE_DYNAMIC_LOCK 2961 } 2962 2963 /* try to acquire the lock */ 2964 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2965 KMP_COUNT_BLOCK(OMP_test_lock); 2966 2967 #if KMP_USE_DYNAMIC_LOCK 2968 int rc; 2969 int tag = KMP_EXTRACT_D_TAG(user_lock); 2970 #if USE_ITT_BUILD 2971 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2972 #endif 2973 #if OMPT_SUPPORT && OMPT_OPTIONAL 2974 // This is the case, if called from omp_init_lock_with_hint: 2975 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2976 if (!codeptr) 2977 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2978 if (ompt_enabled.ompt_callback_mutex_acquire) { 2979 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2980 ompt_mutex_lock, omp_lock_hint_none, 2981 __ompt_get_mutex_impl_type(user_lock), 2982 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2983 } 2984 #endif 2985 #if KMP_USE_INLINED_TAS 2986 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2987 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2988 } else 2989 #elif KMP_USE_INLINED_FUTEX 2990 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2991 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 2992 } else 2993 #endif 2994 { 2995 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2996 } 2997 if (rc) { 2998 #if USE_ITT_BUILD 2999 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3000 #endif 3001 #if OMPT_SUPPORT && OMPT_OPTIONAL 3002 if (ompt_enabled.ompt_callback_mutex_acquired) { 3003 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3004 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3005 } 3006 #endif 3007 return FTN_TRUE; 3008 } else { 3009 #if USE_ITT_BUILD 3010 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3011 #endif 3012 return FTN_FALSE; 3013 } 3014 3015 #else // KMP_USE_DYNAMIC_LOCK 3016 3017 kmp_user_lock_p lck; 3018 int rc; 3019 3020 if ((__kmp_user_lock_kind == lk_tas) && 3021 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3022 lck = (kmp_user_lock_p)user_lock; 3023 } 3024 #if KMP_USE_FUTEX 3025 else if ((__kmp_user_lock_kind == lk_futex) && 3026 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3027 lck = (kmp_user_lock_p)user_lock; 3028 } 3029 #endif 3030 else { 3031 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3032 } 3033 3034 #if USE_ITT_BUILD 3035 __kmp_itt_lock_acquiring(lck); 3036 #endif /* USE_ITT_BUILD */ 3037 #if OMPT_SUPPORT && OMPT_OPTIONAL 3038 // This is the case, if called from omp_init_lock_with_hint: 3039 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3040 if (!codeptr) 3041 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3042 if (ompt_enabled.ompt_callback_mutex_acquire) { 3043 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3044 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3045 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3046 } 3047 #endif 3048 3049 rc = TEST_LOCK(lck, gtid); 3050 #if USE_ITT_BUILD 3051 if (rc) { 3052 __kmp_itt_lock_acquired(lck); 3053 } else { 3054 __kmp_itt_lock_cancelled(lck); 3055 } 3056 #endif /* USE_ITT_BUILD */ 3057 #if OMPT_SUPPORT && OMPT_OPTIONAL 3058 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3059 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3060 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3061 } 3062 #endif 3063 3064 return (rc ? FTN_TRUE : FTN_FALSE); 3065 3066 /* Can't use serial interval since not block structured */ 3067 3068 #endif // KMP_USE_DYNAMIC_LOCK 3069 } 3070 3071 /* try to acquire the lock */ 3072 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3073 #if KMP_USE_DYNAMIC_LOCK 3074 int rc; 3075 #if USE_ITT_BUILD 3076 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3077 #endif 3078 #if OMPT_SUPPORT && OMPT_OPTIONAL 3079 // This is the case, if called from omp_init_lock_with_hint: 3080 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3081 if (!codeptr) 3082 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3083 if (ompt_enabled.ompt_callback_mutex_acquire) { 3084 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3085 ompt_mutex_nest_lock, omp_lock_hint_none, 3086 __ompt_get_mutex_impl_type(user_lock), 3087 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3088 } 3089 #endif 3090 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3091 #if USE_ITT_BUILD 3092 if (rc) { 3093 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3094 } else { 3095 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3096 } 3097 #endif 3098 #if OMPT_SUPPORT && OMPT_OPTIONAL 3099 if (ompt_enabled.enabled && rc) { 3100 if (rc == 1) { 3101 if (ompt_enabled.ompt_callback_mutex_acquired) { 3102 // lock_first 3103 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3104 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3105 codeptr); 3106 } 3107 } else { 3108 if (ompt_enabled.ompt_callback_nest_lock) { 3109 // lock_next 3110 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3111 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3112 } 3113 } 3114 } 3115 #endif 3116 return rc; 3117 3118 #else // KMP_USE_DYNAMIC_LOCK 3119 3120 kmp_user_lock_p lck; 3121 int rc; 3122 3123 if ((__kmp_user_lock_kind == lk_tas) && 3124 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3125 OMP_NEST_LOCK_T_SIZE)) { 3126 lck = (kmp_user_lock_p)user_lock; 3127 } 3128 #if KMP_USE_FUTEX 3129 else if ((__kmp_user_lock_kind == lk_futex) && 3130 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3131 OMP_NEST_LOCK_T_SIZE)) { 3132 lck = (kmp_user_lock_p)user_lock; 3133 } 3134 #endif 3135 else { 3136 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3137 } 3138 3139 #if USE_ITT_BUILD 3140 __kmp_itt_lock_acquiring(lck); 3141 #endif /* USE_ITT_BUILD */ 3142 3143 #if OMPT_SUPPORT && OMPT_OPTIONAL 3144 // This is the case, if called from omp_init_lock_with_hint: 3145 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3146 if (!codeptr) 3147 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3148 if (ompt_enabled.enabled) && 3149 ompt_enabled.ompt_callback_mutex_acquire) { 3150 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3151 ompt_mutex_nest_lock, omp_lock_hint_none, 3152 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3153 codeptr); 3154 } 3155 #endif 3156 3157 rc = TEST_NESTED_LOCK(lck, gtid); 3158 #if USE_ITT_BUILD 3159 if (rc) { 3160 __kmp_itt_lock_acquired(lck); 3161 } else { 3162 __kmp_itt_lock_cancelled(lck); 3163 } 3164 #endif /* USE_ITT_BUILD */ 3165 #if OMPT_SUPPORT && OMPT_OPTIONAL 3166 if (ompt_enabled.enabled && rc) { 3167 if (rc == 1) { 3168 if (ompt_enabled.ompt_callback_mutex_acquired) { 3169 // lock_first 3170 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3171 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3172 } 3173 } else { 3174 if (ompt_enabled.ompt_callback_nest_lock) { 3175 // lock_next 3176 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3177 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3178 } 3179 } 3180 } 3181 #endif 3182 return rc; 3183 3184 /* Can't use serial interval since not block structured */ 3185 3186 #endif // KMP_USE_DYNAMIC_LOCK 3187 } 3188 3189 // Interface to fast scalable reduce methods routines 3190 3191 // keep the selected method in a thread local structure for cross-function 3192 // usage: will be used in __kmpc_end_reduce* functions; 3193 // another solution: to re-determine the method one more time in 3194 // __kmpc_end_reduce* functions (new prototype required then) 3195 // AT: which solution is better? 3196 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3197 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3198 3199 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3200 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3201 3202 // description of the packed_reduction_method variable: look at the macros in 3203 // kmp.h 3204 3205 // used in a critical section reduce block 3206 static __forceinline void 3207 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3208 kmp_critical_name *crit) { 3209 3210 // this lock was visible to a customer and to the threading profile tool as a 3211 // serial overhead span (although it's used for an internal purpose only) 3212 // why was it visible in previous implementation? 3213 // should we keep it visible in new reduce block? 3214 kmp_user_lock_p lck; 3215 3216 #if KMP_USE_DYNAMIC_LOCK 3217 3218 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3219 // Check if it is initialized. 3220 if (*lk == 0) { 3221 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3222 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3223 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3224 } else { 3225 __kmp_init_indirect_csptr(crit, loc, global_tid, 3226 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3227 } 3228 } 3229 // Branch for accessing the actual lock object and set operation. This 3230 // branching is inevitable since this lock initialization does not follow the 3231 // normal dispatch path (lock table is not used). 3232 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3233 lck = (kmp_user_lock_p)lk; 3234 KMP_DEBUG_ASSERT(lck != NULL); 3235 if (__kmp_env_consistency_check) { 3236 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3237 } 3238 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3239 } else { 3240 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3241 lck = ilk->lock; 3242 KMP_DEBUG_ASSERT(lck != NULL); 3243 if (__kmp_env_consistency_check) { 3244 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3245 } 3246 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3247 } 3248 3249 #else // KMP_USE_DYNAMIC_LOCK 3250 3251 // We know that the fast reduction code is only emitted by Intel compilers 3252 // with 32 byte critical sections. If there isn't enough space, then we 3253 // have to use a pointer. 3254 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3255 lck = (kmp_user_lock_p)crit; 3256 } else { 3257 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3258 } 3259 KMP_DEBUG_ASSERT(lck != NULL); 3260 3261 if (__kmp_env_consistency_check) 3262 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3263 3264 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3265 3266 #endif // KMP_USE_DYNAMIC_LOCK 3267 } 3268 3269 // used in a critical section reduce block 3270 static __forceinline void 3271 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3272 kmp_critical_name *crit) { 3273 3274 kmp_user_lock_p lck; 3275 3276 #if KMP_USE_DYNAMIC_LOCK 3277 3278 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3279 lck = (kmp_user_lock_p)crit; 3280 if (__kmp_env_consistency_check) 3281 __kmp_pop_sync(global_tid, ct_critical, loc); 3282 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3283 } else { 3284 kmp_indirect_lock_t *ilk = 3285 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3286 if (__kmp_env_consistency_check) 3287 __kmp_pop_sync(global_tid, ct_critical, loc); 3288 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3289 } 3290 3291 #else // KMP_USE_DYNAMIC_LOCK 3292 3293 // We know that the fast reduction code is only emitted by Intel compilers 3294 // with 32 byte critical sections. If there isn't enough space, then we have 3295 // to use a pointer. 3296 if (__kmp_base_user_lock_size > 32) { 3297 lck = *((kmp_user_lock_p *)crit); 3298 KMP_ASSERT(lck != NULL); 3299 } else { 3300 lck = (kmp_user_lock_p)crit; 3301 } 3302 3303 if (__kmp_env_consistency_check) 3304 __kmp_pop_sync(global_tid, ct_critical, loc); 3305 3306 __kmp_release_user_lock_with_checks(lck, global_tid); 3307 3308 #endif // KMP_USE_DYNAMIC_LOCK 3309 } // __kmp_end_critical_section_reduce_block 3310 3311 static __forceinline int 3312 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3313 int *task_state) { 3314 kmp_team_t *team; 3315 3316 // Check if we are inside the teams construct? 3317 if (th->th.th_teams_microtask) { 3318 *team_p = team = th->th.th_team; 3319 if (team->t.t_level == th->th.th_teams_level) { 3320 // This is reduction at teams construct. 3321 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3322 // Let's swap teams temporarily for the reduction. 3323 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3324 th->th.th_team = team->t.t_parent; 3325 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3326 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3327 *task_state = th->th.th_task_state; 3328 th->th.th_task_state = 0; 3329 3330 return 1; 3331 } 3332 } 3333 return 0; 3334 } 3335 3336 static __forceinline void 3337 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3338 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3339 th->th.th_info.ds.ds_tid = 0; 3340 th->th.th_team = team; 3341 th->th.th_team_nproc = team->t.t_nproc; 3342 th->th.th_task_team = team->t.t_task_team[task_state]; 3343 th->th.th_task_state = task_state; 3344 } 3345 3346 /* 2.a.i. Reduce Block without a terminating barrier */ 3347 /*! 3348 @ingroup SYNCHRONIZATION 3349 @param loc source location information 3350 @param global_tid global thread number 3351 @param num_vars number of items (variables) to be reduced 3352 @param reduce_size size of data in bytes to be reduced 3353 @param reduce_data pointer to data to be reduced 3354 @param reduce_func callback function providing reduction operation on two 3355 operands and returning result of reduction in lhs_data 3356 @param lck pointer to the unique lock data structure 3357 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3358 threads if atomic reduction needed 3359 3360 The nowait version is used for a reduce clause with the nowait argument. 3361 */ 3362 kmp_int32 3363 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3364 size_t reduce_size, void *reduce_data, 3365 void (*reduce_func)(void *lhs_data, void *rhs_data), 3366 kmp_critical_name *lck) { 3367 3368 KMP_COUNT_BLOCK(REDUCE_nowait); 3369 int retval = 0; 3370 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3371 kmp_info_t *th; 3372 kmp_team_t *team; 3373 int teams_swapped = 0, task_state; 3374 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3375 __kmp_assert_valid_gtid(global_tid); 3376 3377 // why do we need this initialization here at all? 3378 // Reduction clause can not be used as a stand-alone directive. 3379 3380 // do not call __kmp_serial_initialize(), it will be called by 3381 // __kmp_parallel_initialize() if needed 3382 // possible detection of false-positive race by the threadchecker ??? 3383 if (!TCR_4(__kmp_init_parallel)) 3384 __kmp_parallel_initialize(); 3385 3386 __kmp_resume_if_soft_paused(); 3387 3388 // check correctness of reduce block nesting 3389 #if KMP_USE_DYNAMIC_LOCK 3390 if (__kmp_env_consistency_check) 3391 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3392 #else 3393 if (__kmp_env_consistency_check) 3394 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3395 #endif 3396 3397 th = __kmp_thread_from_gtid(global_tid); 3398 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3399 3400 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3401 // the value should be kept in a variable 3402 // the variable should be either a construct-specific or thread-specific 3403 // property, not a team specific property 3404 // (a thread can reach the next reduce block on the next construct, reduce 3405 // method may differ on the next construct) 3406 // an ident_t "loc" parameter could be used as a construct-specific property 3407 // (what if loc == 0?) 3408 // (if both construct-specific and team-specific variables were shared, 3409 // then unness extra syncs should be needed) 3410 // a thread-specific variable is better regarding two issues above (next 3411 // construct and extra syncs) 3412 // a thread-specific "th_local.reduction_method" variable is used currently 3413 // each thread executes 'determine' and 'set' lines (no need to execute by one 3414 // thread, to avoid unness extra syncs) 3415 3416 packed_reduction_method = __kmp_determine_reduction_method( 3417 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3418 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3419 3420 OMPT_REDUCTION_DECL(th, global_tid); 3421 if (packed_reduction_method == critical_reduce_block) { 3422 3423 OMPT_REDUCTION_BEGIN; 3424 3425 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3426 retval = 1; 3427 3428 } else if (packed_reduction_method == empty_reduce_block) { 3429 3430 OMPT_REDUCTION_BEGIN; 3431 3432 // usage: if team size == 1, no synchronization is required ( Intel 3433 // platforms only ) 3434 retval = 1; 3435 3436 } else if (packed_reduction_method == atomic_reduce_block) { 3437 3438 retval = 2; 3439 3440 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3441 // won't be called by the code gen) 3442 // (it's not quite good, because the checking block has been closed by 3443 // this 'pop', 3444 // but atomic operation has not been executed yet, will be executed 3445 // slightly later, literally on next instruction) 3446 if (__kmp_env_consistency_check) 3447 __kmp_pop_sync(global_tid, ct_reduce, loc); 3448 3449 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3450 tree_reduce_block)) { 3451 3452 // AT: performance issue: a real barrier here 3453 // AT: (if master goes slow, other threads are blocked here waiting for the 3454 // master to come and release them) 3455 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3456 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3457 // be confusing to a customer) 3458 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3459 // might go faster and be more in line with sense of NOWAIT 3460 // AT: TO DO: do epcc test and compare times 3461 3462 // this barrier should be invisible to a customer and to the threading profile 3463 // tool (it's neither a terminating barrier nor customer's code, it's 3464 // used for an internal purpose) 3465 #if OMPT_SUPPORT 3466 // JP: can this barrier potentially leed to task scheduling? 3467 // JP: as long as there is a barrier in the implementation, OMPT should and 3468 // will provide the barrier events 3469 // so we set-up the necessary frame/return addresses. 3470 ompt_frame_t *ompt_frame; 3471 if (ompt_enabled.enabled) { 3472 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3473 if (ompt_frame->enter_frame.ptr == NULL) 3474 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3475 OMPT_STORE_RETURN_ADDRESS(global_tid); 3476 } 3477 #endif 3478 #if USE_ITT_NOTIFY 3479 __kmp_threads[global_tid]->th.th_ident = loc; 3480 #endif 3481 retval = 3482 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3483 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3484 retval = (retval != 0) ? (0) : (1); 3485 #if OMPT_SUPPORT && OMPT_OPTIONAL 3486 if (ompt_enabled.enabled) { 3487 ompt_frame->enter_frame = ompt_data_none; 3488 } 3489 #endif 3490 3491 // all other workers except master should do this pop here 3492 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3493 if (__kmp_env_consistency_check) { 3494 if (retval == 0) { 3495 __kmp_pop_sync(global_tid, ct_reduce, loc); 3496 } 3497 } 3498 3499 } else { 3500 3501 // should never reach this block 3502 KMP_ASSERT(0); // "unexpected method" 3503 } 3504 if (teams_swapped) { 3505 __kmp_restore_swapped_teams(th, team, task_state); 3506 } 3507 KA_TRACE( 3508 10, 3509 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3510 global_tid, packed_reduction_method, retval)); 3511 3512 return retval; 3513 } 3514 3515 /*! 3516 @ingroup SYNCHRONIZATION 3517 @param loc source location information 3518 @param global_tid global thread id. 3519 @param lck pointer to the unique lock data structure 3520 3521 Finish the execution of a reduce nowait. 3522 */ 3523 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3524 kmp_critical_name *lck) { 3525 3526 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3527 3528 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3529 __kmp_assert_valid_gtid(global_tid); 3530 3531 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3532 3533 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3534 3535 if (packed_reduction_method == critical_reduce_block) { 3536 3537 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3538 OMPT_REDUCTION_END; 3539 3540 } else if (packed_reduction_method == empty_reduce_block) { 3541 3542 // usage: if team size == 1, no synchronization is required ( on Intel 3543 // platforms only ) 3544 3545 OMPT_REDUCTION_END; 3546 3547 } else if (packed_reduction_method == atomic_reduce_block) { 3548 3549 // neither master nor other workers should get here 3550 // (code gen does not generate this call in case 2: atomic reduce block) 3551 // actually it's better to remove this elseif at all; 3552 // after removal this value will checked by the 'else' and will assert 3553 3554 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3555 tree_reduce_block)) { 3556 3557 // only master gets here 3558 // OMPT: tree reduction is annotated in the barrier code 3559 3560 } else { 3561 3562 // should never reach this block 3563 KMP_ASSERT(0); // "unexpected method" 3564 } 3565 3566 if (__kmp_env_consistency_check) 3567 __kmp_pop_sync(global_tid, ct_reduce, loc); 3568 3569 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3570 global_tid, packed_reduction_method)); 3571 3572 return; 3573 } 3574 3575 /* 2.a.ii. Reduce Block with a terminating barrier */ 3576 3577 /*! 3578 @ingroup SYNCHRONIZATION 3579 @param loc source location information 3580 @param global_tid global thread number 3581 @param num_vars number of items (variables) to be reduced 3582 @param reduce_size size of data in bytes to be reduced 3583 @param reduce_data pointer to data to be reduced 3584 @param reduce_func callback function providing reduction operation on two 3585 operands and returning result of reduction in lhs_data 3586 @param lck pointer to the unique lock data structure 3587 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3588 threads if atomic reduction needed 3589 3590 A blocking reduce that includes an implicit barrier. 3591 */ 3592 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3593 size_t reduce_size, void *reduce_data, 3594 void (*reduce_func)(void *lhs_data, void *rhs_data), 3595 kmp_critical_name *lck) { 3596 KMP_COUNT_BLOCK(REDUCE_wait); 3597 int retval = 0; 3598 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3599 kmp_info_t *th; 3600 kmp_team_t *team; 3601 int teams_swapped = 0, task_state; 3602 3603 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3604 __kmp_assert_valid_gtid(global_tid); 3605 3606 // why do we need this initialization here at all? 3607 // Reduction clause can not be a stand-alone directive. 3608 3609 // do not call __kmp_serial_initialize(), it will be called by 3610 // __kmp_parallel_initialize() if needed 3611 // possible detection of false-positive race by the threadchecker ??? 3612 if (!TCR_4(__kmp_init_parallel)) 3613 __kmp_parallel_initialize(); 3614 3615 __kmp_resume_if_soft_paused(); 3616 3617 // check correctness of reduce block nesting 3618 #if KMP_USE_DYNAMIC_LOCK 3619 if (__kmp_env_consistency_check) 3620 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3621 #else 3622 if (__kmp_env_consistency_check) 3623 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3624 #endif 3625 3626 th = __kmp_thread_from_gtid(global_tid); 3627 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3628 3629 packed_reduction_method = __kmp_determine_reduction_method( 3630 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3631 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3632 3633 OMPT_REDUCTION_DECL(th, global_tid); 3634 3635 if (packed_reduction_method == critical_reduce_block) { 3636 3637 OMPT_REDUCTION_BEGIN; 3638 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3639 retval = 1; 3640 3641 } else if (packed_reduction_method == empty_reduce_block) { 3642 3643 OMPT_REDUCTION_BEGIN; 3644 // usage: if team size == 1, no synchronization is required ( Intel 3645 // platforms only ) 3646 retval = 1; 3647 3648 } else if (packed_reduction_method == atomic_reduce_block) { 3649 3650 retval = 2; 3651 3652 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3653 tree_reduce_block)) { 3654 3655 // case tree_reduce_block: 3656 // this barrier should be visible to a customer and to the threading profile 3657 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3658 #if OMPT_SUPPORT 3659 ompt_frame_t *ompt_frame; 3660 if (ompt_enabled.enabled) { 3661 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3662 if (ompt_frame->enter_frame.ptr == NULL) 3663 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3664 OMPT_STORE_RETURN_ADDRESS(global_tid); 3665 } 3666 #endif 3667 #if USE_ITT_NOTIFY 3668 __kmp_threads[global_tid]->th.th_ident = 3669 loc; // needed for correct notification of frames 3670 #endif 3671 retval = 3672 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3673 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3674 retval = (retval != 0) ? (0) : (1); 3675 #if OMPT_SUPPORT && OMPT_OPTIONAL 3676 if (ompt_enabled.enabled) { 3677 ompt_frame->enter_frame = ompt_data_none; 3678 } 3679 #endif 3680 3681 // all other workers except master should do this pop here 3682 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3683 if (__kmp_env_consistency_check) { 3684 if (retval == 0) { // 0: all other workers; 1: master 3685 __kmp_pop_sync(global_tid, ct_reduce, loc); 3686 } 3687 } 3688 3689 } else { 3690 3691 // should never reach this block 3692 KMP_ASSERT(0); // "unexpected method" 3693 } 3694 if (teams_swapped) { 3695 __kmp_restore_swapped_teams(th, team, task_state); 3696 } 3697 3698 KA_TRACE(10, 3699 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3700 global_tid, packed_reduction_method, retval)); 3701 return retval; 3702 } 3703 3704 /*! 3705 @ingroup SYNCHRONIZATION 3706 @param loc source location information 3707 @param global_tid global thread id. 3708 @param lck pointer to the unique lock data structure 3709 3710 Finish the execution of a blocking reduce. 3711 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3712 start function. 3713 */ 3714 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3715 kmp_critical_name *lck) { 3716 3717 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3718 kmp_info_t *th; 3719 kmp_team_t *team; 3720 int teams_swapped = 0, task_state; 3721 3722 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3723 __kmp_assert_valid_gtid(global_tid); 3724 3725 th = __kmp_thread_from_gtid(global_tid); 3726 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3727 3728 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3729 3730 // this barrier should be visible to a customer and to the threading profile 3731 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3732 OMPT_REDUCTION_DECL(th, global_tid); 3733 3734 if (packed_reduction_method == critical_reduce_block) { 3735 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3736 3737 OMPT_REDUCTION_END; 3738 3739 // TODO: implicit barrier: should be exposed 3740 #if OMPT_SUPPORT 3741 ompt_frame_t *ompt_frame; 3742 if (ompt_enabled.enabled) { 3743 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3744 if (ompt_frame->enter_frame.ptr == NULL) 3745 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3746 OMPT_STORE_RETURN_ADDRESS(global_tid); 3747 } 3748 #endif 3749 #if USE_ITT_NOTIFY 3750 __kmp_threads[global_tid]->th.th_ident = loc; 3751 #endif 3752 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3753 #if OMPT_SUPPORT && OMPT_OPTIONAL 3754 if (ompt_enabled.enabled) { 3755 ompt_frame->enter_frame = ompt_data_none; 3756 } 3757 #endif 3758 3759 } else if (packed_reduction_method == empty_reduce_block) { 3760 3761 OMPT_REDUCTION_END; 3762 3763 // usage: if team size==1, no synchronization is required (Intel platforms only) 3764 3765 // TODO: implicit barrier: should be exposed 3766 #if OMPT_SUPPORT 3767 ompt_frame_t *ompt_frame; 3768 if (ompt_enabled.enabled) { 3769 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3770 if (ompt_frame->enter_frame.ptr == NULL) 3771 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3772 OMPT_STORE_RETURN_ADDRESS(global_tid); 3773 } 3774 #endif 3775 #if USE_ITT_NOTIFY 3776 __kmp_threads[global_tid]->th.th_ident = loc; 3777 #endif 3778 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3779 #if OMPT_SUPPORT && OMPT_OPTIONAL 3780 if (ompt_enabled.enabled) { 3781 ompt_frame->enter_frame = ompt_data_none; 3782 } 3783 #endif 3784 3785 } else if (packed_reduction_method == atomic_reduce_block) { 3786 3787 #if OMPT_SUPPORT 3788 ompt_frame_t *ompt_frame; 3789 if (ompt_enabled.enabled) { 3790 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3791 if (ompt_frame->enter_frame.ptr == NULL) 3792 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3793 OMPT_STORE_RETURN_ADDRESS(global_tid); 3794 } 3795 #endif 3796 // TODO: implicit barrier: should be exposed 3797 #if USE_ITT_NOTIFY 3798 __kmp_threads[global_tid]->th.th_ident = loc; 3799 #endif 3800 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3801 #if OMPT_SUPPORT && OMPT_OPTIONAL 3802 if (ompt_enabled.enabled) { 3803 ompt_frame->enter_frame = ompt_data_none; 3804 } 3805 #endif 3806 3807 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3808 tree_reduce_block)) { 3809 3810 // only master executes here (master releases all other workers) 3811 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3812 global_tid); 3813 3814 } else { 3815 3816 // should never reach this block 3817 KMP_ASSERT(0); // "unexpected method" 3818 } 3819 if (teams_swapped) { 3820 __kmp_restore_swapped_teams(th, team, task_state); 3821 } 3822 3823 if (__kmp_env_consistency_check) 3824 __kmp_pop_sync(global_tid, ct_reduce, loc); 3825 3826 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3827 global_tid, packed_reduction_method)); 3828 3829 return; 3830 } 3831 3832 #undef __KMP_GET_REDUCTION_METHOD 3833 #undef __KMP_SET_REDUCTION_METHOD 3834 3835 /* end of interface to fast scalable reduce routines */ 3836 3837 kmp_uint64 __kmpc_get_taskid() { 3838 3839 kmp_int32 gtid; 3840 kmp_info_t *thread; 3841 3842 gtid = __kmp_get_gtid(); 3843 if (gtid < 0) { 3844 return 0; 3845 } 3846 thread = __kmp_thread_from_gtid(gtid); 3847 return thread->th.th_current_task->td_task_id; 3848 3849 } // __kmpc_get_taskid 3850 3851 kmp_uint64 __kmpc_get_parent_taskid() { 3852 3853 kmp_int32 gtid; 3854 kmp_info_t *thread; 3855 kmp_taskdata_t *parent_task; 3856 3857 gtid = __kmp_get_gtid(); 3858 if (gtid < 0) { 3859 return 0; 3860 } 3861 thread = __kmp_thread_from_gtid(gtid); 3862 parent_task = thread->th.th_current_task->td_parent; 3863 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3864 3865 } // __kmpc_get_parent_taskid 3866 3867 /*! 3868 @ingroup WORK_SHARING 3869 @param loc source location information. 3870 @param gtid global thread number. 3871 @param num_dims number of associated doacross loops. 3872 @param dims info on loops bounds. 3873 3874 Initialize doacross loop information. 3875 Expect compiler send us inclusive bounds, 3876 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3877 */ 3878 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3879 const struct kmp_dim *dims) { 3880 __kmp_assert_valid_gtid(gtid); 3881 int j, idx; 3882 kmp_int64 last, trace_count; 3883 kmp_info_t *th = __kmp_threads[gtid]; 3884 kmp_team_t *team = th->th.th_team; 3885 kmp_uint32 *flags; 3886 kmp_disp_t *pr_buf = th->th.th_dispatch; 3887 dispatch_shared_info_t *sh_buf; 3888 3889 KA_TRACE( 3890 20, 3891 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3892 gtid, num_dims, !team->t.t_serialized)); 3893 KMP_DEBUG_ASSERT(dims != NULL); 3894 KMP_DEBUG_ASSERT(num_dims > 0); 3895 3896 if (team->t.t_serialized) { 3897 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3898 return; // no dependencies if team is serialized 3899 } 3900 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3901 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3902 // the next loop 3903 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3904 3905 // Save bounds info into allocated private buffer 3906 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3907 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3908 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3909 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3910 pr_buf->th_doacross_info[0] = 3911 (kmp_int64)num_dims; // first element is number of dimensions 3912 // Save also address of num_done in order to access it later without knowing 3913 // the buffer index 3914 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3915 pr_buf->th_doacross_info[2] = dims[0].lo; 3916 pr_buf->th_doacross_info[3] = dims[0].up; 3917 pr_buf->th_doacross_info[4] = dims[0].st; 3918 last = 5; 3919 for (j = 1; j < num_dims; ++j) { 3920 kmp_int64 3921 range_length; // To keep ranges of all dimensions but the first dims[0] 3922 if (dims[j].st == 1) { // most common case 3923 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3924 range_length = dims[j].up - dims[j].lo + 1; 3925 } else { 3926 if (dims[j].st > 0) { 3927 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3928 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3929 } else { // negative increment 3930 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3931 range_length = 3932 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3933 } 3934 } 3935 pr_buf->th_doacross_info[last++] = range_length; 3936 pr_buf->th_doacross_info[last++] = dims[j].lo; 3937 pr_buf->th_doacross_info[last++] = dims[j].up; 3938 pr_buf->th_doacross_info[last++] = dims[j].st; 3939 } 3940 3941 // Compute total trip count. 3942 // Start with range of dims[0] which we don't need to keep in the buffer. 3943 if (dims[0].st == 1) { // most common case 3944 trace_count = dims[0].up - dims[0].lo + 1; 3945 } else if (dims[0].st > 0) { 3946 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3947 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3948 } else { // negative increment 3949 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3950 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3951 } 3952 for (j = 1; j < num_dims; ++j) { 3953 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3954 } 3955 KMP_DEBUG_ASSERT(trace_count > 0); 3956 3957 // Check if shared buffer is not occupied by other loop (idx - 3958 // __kmp_dispatch_num_buffers) 3959 if (idx != sh_buf->doacross_buf_idx) { 3960 // Shared buffer is occupied, wait for it to be free 3961 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3962 __kmp_eq_4, NULL); 3963 } 3964 #if KMP_32_BIT_ARCH 3965 // Check if we are the first thread. After the CAS the first thread gets 0, 3966 // others get 1 if initialization is in progress, allocated pointer otherwise. 3967 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3968 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3969 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3970 #else 3971 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3972 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3973 #endif 3974 if (flags == NULL) { 3975 // we are the first thread, allocate the array of flags 3976 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3977 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3978 KMP_MB(); 3979 sh_buf->doacross_flags = flags; 3980 } else if (flags == (kmp_uint32 *)1) { 3981 #if KMP_32_BIT_ARCH 3982 // initialization is still in progress, need to wait 3983 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3984 #else 3985 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3986 #endif 3987 KMP_YIELD(TRUE); 3988 KMP_MB(); 3989 } else { 3990 KMP_MB(); 3991 } 3992 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3993 pr_buf->th_doacross_flags = 3994 sh_buf->doacross_flags; // save private copy in order to not 3995 // touch shared buffer on each iteration 3996 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3997 } 3998 3999 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4000 __kmp_assert_valid_gtid(gtid); 4001 kmp_int32 shft, num_dims, i; 4002 kmp_uint32 flag; 4003 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4004 kmp_info_t *th = __kmp_threads[gtid]; 4005 kmp_team_t *team = th->th.th_team; 4006 kmp_disp_t *pr_buf; 4007 kmp_int64 lo, up, st; 4008 4009 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4010 if (team->t.t_serialized) { 4011 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4012 return; // no dependencies if team is serialized 4013 } 4014 4015 // calculate sequential iteration number and check out-of-bounds condition 4016 pr_buf = th->th.th_dispatch; 4017 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4018 num_dims = pr_buf->th_doacross_info[0]; 4019 lo = pr_buf->th_doacross_info[2]; 4020 up = pr_buf->th_doacross_info[3]; 4021 st = pr_buf->th_doacross_info[4]; 4022 #if OMPT_SUPPORT && OMPT_OPTIONAL 4023 ompt_dependence_t deps[num_dims]; 4024 #endif 4025 if (st == 1) { // most common case 4026 if (vec[0] < lo || vec[0] > up) { 4027 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4028 "bounds [%lld,%lld]\n", 4029 gtid, vec[0], lo, up)); 4030 return; 4031 } 4032 iter_number = vec[0] - lo; 4033 } else if (st > 0) { 4034 if (vec[0] < lo || vec[0] > up) { 4035 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4036 "bounds [%lld,%lld]\n", 4037 gtid, vec[0], lo, up)); 4038 return; 4039 } 4040 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4041 } else { // negative increment 4042 if (vec[0] > lo || vec[0] < up) { 4043 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4044 "bounds [%lld,%lld]\n", 4045 gtid, vec[0], lo, up)); 4046 return; 4047 } 4048 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4049 } 4050 #if OMPT_SUPPORT && OMPT_OPTIONAL 4051 deps[0].variable.value = iter_number; 4052 deps[0].dependence_type = ompt_dependence_type_sink; 4053 #endif 4054 for (i = 1; i < num_dims; ++i) { 4055 kmp_int64 iter, ln; 4056 kmp_int32 j = i * 4; 4057 ln = pr_buf->th_doacross_info[j + 1]; 4058 lo = pr_buf->th_doacross_info[j + 2]; 4059 up = pr_buf->th_doacross_info[j + 3]; 4060 st = pr_buf->th_doacross_info[j + 4]; 4061 if (st == 1) { 4062 if (vec[i] < lo || vec[i] > up) { 4063 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4064 "bounds [%lld,%lld]\n", 4065 gtid, vec[i], lo, up)); 4066 return; 4067 } 4068 iter = vec[i] - lo; 4069 } else if (st > 0) { 4070 if (vec[i] < lo || vec[i] > up) { 4071 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4072 "bounds [%lld,%lld]\n", 4073 gtid, vec[i], lo, up)); 4074 return; 4075 } 4076 iter = (kmp_uint64)(vec[i] - lo) / st; 4077 } else { // st < 0 4078 if (vec[i] > lo || vec[i] < up) { 4079 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4080 "bounds [%lld,%lld]\n", 4081 gtid, vec[i], lo, up)); 4082 return; 4083 } 4084 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4085 } 4086 iter_number = iter + ln * iter_number; 4087 #if OMPT_SUPPORT && OMPT_OPTIONAL 4088 deps[i].variable.value = iter; 4089 deps[i].dependence_type = ompt_dependence_type_sink; 4090 #endif 4091 } 4092 shft = iter_number % 32; // use 32-bit granularity 4093 iter_number >>= 5; // divided by 32 4094 flag = 1 << shft; 4095 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4096 KMP_YIELD(TRUE); 4097 } 4098 KMP_MB(); 4099 #if OMPT_SUPPORT && OMPT_OPTIONAL 4100 if (ompt_enabled.ompt_callback_dependences) { 4101 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4102 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4103 } 4104 #endif 4105 KA_TRACE(20, 4106 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4107 gtid, (iter_number << 5) + shft)); 4108 } 4109 4110 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4111 __kmp_assert_valid_gtid(gtid); 4112 kmp_int32 shft, num_dims, i; 4113 kmp_uint32 flag; 4114 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4115 kmp_info_t *th = __kmp_threads[gtid]; 4116 kmp_team_t *team = th->th.th_team; 4117 kmp_disp_t *pr_buf; 4118 kmp_int64 lo, st; 4119 4120 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4121 if (team->t.t_serialized) { 4122 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4123 return; // no dependencies if team is serialized 4124 } 4125 4126 // calculate sequential iteration number (same as in "wait" but no 4127 // out-of-bounds checks) 4128 pr_buf = th->th.th_dispatch; 4129 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4130 num_dims = pr_buf->th_doacross_info[0]; 4131 lo = pr_buf->th_doacross_info[2]; 4132 st = pr_buf->th_doacross_info[4]; 4133 #if OMPT_SUPPORT && OMPT_OPTIONAL 4134 ompt_dependence_t deps[num_dims]; 4135 #endif 4136 if (st == 1) { // most common case 4137 iter_number = vec[0] - lo; 4138 } else if (st > 0) { 4139 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4140 } else { // negative increment 4141 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4142 } 4143 #if OMPT_SUPPORT && OMPT_OPTIONAL 4144 deps[0].variable.value = iter_number; 4145 deps[0].dependence_type = ompt_dependence_type_source; 4146 #endif 4147 for (i = 1; i < num_dims; ++i) { 4148 kmp_int64 iter, ln; 4149 kmp_int32 j = i * 4; 4150 ln = pr_buf->th_doacross_info[j + 1]; 4151 lo = pr_buf->th_doacross_info[j + 2]; 4152 st = pr_buf->th_doacross_info[j + 4]; 4153 if (st == 1) { 4154 iter = vec[i] - lo; 4155 } else if (st > 0) { 4156 iter = (kmp_uint64)(vec[i] - lo) / st; 4157 } else { // st < 0 4158 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4159 } 4160 iter_number = iter + ln * iter_number; 4161 #if OMPT_SUPPORT && OMPT_OPTIONAL 4162 deps[i].variable.value = iter; 4163 deps[i].dependence_type = ompt_dependence_type_source; 4164 #endif 4165 } 4166 #if OMPT_SUPPORT && OMPT_OPTIONAL 4167 if (ompt_enabled.ompt_callback_dependences) { 4168 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4169 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4170 } 4171 #endif 4172 shft = iter_number % 32; // use 32-bit granularity 4173 iter_number >>= 5; // divided by 32 4174 flag = 1 << shft; 4175 KMP_MB(); 4176 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4177 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4178 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4179 (iter_number << 5) + shft)); 4180 } 4181 4182 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4183 __kmp_assert_valid_gtid(gtid); 4184 kmp_int32 num_done; 4185 kmp_info_t *th = __kmp_threads[gtid]; 4186 kmp_team_t *team = th->th.th_team; 4187 kmp_disp_t *pr_buf = th->th.th_dispatch; 4188 4189 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4190 if (team->t.t_serialized) { 4191 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4192 return; // nothing to do 4193 } 4194 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4195 if (num_done == th->th.th_team_nproc) { 4196 // we are the last thread, need to free shared resources 4197 int idx = pr_buf->th_doacross_buf_idx - 1; 4198 dispatch_shared_info_t *sh_buf = 4199 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4200 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4201 (kmp_int64)&sh_buf->doacross_num_done); 4202 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4203 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4204 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4205 sh_buf->doacross_flags = NULL; 4206 sh_buf->doacross_num_done = 0; 4207 sh_buf->doacross_buf_idx += 4208 __kmp_dispatch_num_buffers; // free buffer for future re-use 4209 } 4210 // free private resources (need to keep buffer index forever) 4211 pr_buf->th_doacross_flags = NULL; 4212 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4213 pr_buf->th_doacross_info = NULL; 4214 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4215 } 4216 4217 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4218 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4219 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4220 } 4221 4222 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4223 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4224 } 4225 4226 int __kmpc_get_target_offload(void) { 4227 if (!__kmp_init_serial) { 4228 __kmp_serial_initialize(); 4229 } 4230 return __kmp_target_offload; 4231 } 4232 4233 int __kmpc_pause_resource(kmp_pause_status_t level) { 4234 if (!__kmp_init_serial) { 4235 return 1; // Can't pause if runtime is not initialized 4236 } 4237 return __kmp_pause_resource(level); 4238 } 4239