1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 } 301 OMPT_STORE_RETURN_ADDRESS(gtid); 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param argc total number of arguments in the ellipsis 358 @param microtask pointer to callback routine consisting of outlined teams 359 construct 360 @param ... pointers to shared variables that aren't global 361 362 Do the actual fork and call the microtask in the relevant number of threads. 363 */ 364 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 365 ...) { 366 int gtid = __kmp_entry_gtid(); 367 kmp_info_t *this_thr = __kmp_threads[gtid]; 368 va_list ap; 369 va_start(ap, microtask); 370 371 #if KMP_STATS_ENABLED 372 KMP_COUNT_BLOCK(OMP_TEAMS); 373 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 374 if (previous_state == stats_state_e::SERIAL_REGION) { 375 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 376 } else { 377 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 378 } 379 #endif 380 381 // remember teams entry point and nesting level 382 this_thr->th.th_teams_microtask = microtask; 383 this_thr->th.th_teams_level = 384 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 385 386 #if OMPT_SUPPORT 387 kmp_team_t *parent_team = this_thr->th.th_team; 388 int tid = __kmp_tid_from_gtid(gtid); 389 if (ompt_enabled.enabled) { 390 parent_team->t.t_implicit_task_taskdata[tid] 391 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 392 } 393 OMPT_STORE_RETURN_ADDRESS(gtid); 394 #endif 395 396 // check if __kmpc_push_num_teams called, set default number of teams 397 // otherwise 398 if (this_thr->th.th_teams_size.nteams == 0) { 399 __kmp_push_num_teams(loc, gtid, 0, 0); 400 } 401 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 403 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 404 405 __kmp_fork_call( 406 loc, gtid, fork_context_intel, argc, 407 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 408 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 409 __kmp_join_call(loc, gtid 410 #if OMPT_SUPPORT 411 , 412 fork_context_intel 413 #endif 414 ); 415 416 // Pop current CG root off list 417 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 418 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 419 this_thr->th.th_cg_roots = tmp->up; 420 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 421 " to node %p. cg_nthreads was %d\n", 422 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 423 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 424 int i = tmp->cg_nthreads--; 425 if (i == 1) { // check is we are the last thread in CG (not always the case) 426 __kmp_free(tmp); 427 } 428 // Restore current task's thread_limit from CG root 429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 430 this_thr->th.th_current_task->td_icvs.thread_limit = 431 this_thr->th.th_cg_roots->cg_thread_limit; 432 433 this_thr->th.th_teams_microtask = NULL; 434 this_thr->th.th_teams_level = 0; 435 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 436 va_end(ap); 437 #if KMP_STATS_ENABLED 438 if (previous_state == stats_state_e::SERIAL_REGION) { 439 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 440 KMP_SET_THREAD_STATE(previous_state); 441 } else { 442 KMP_POP_PARTITIONED_TIMER(); 443 } 444 #endif // KMP_STATS_ENABLED 445 } 446 447 // I don't think this function should ever have been exported. 448 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 449 // openmp code ever called it, but it's been exported from the RTL for so 450 // long that I'm afraid to remove the definition. 451 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 452 453 /*! 454 @ingroup PARALLEL 455 @param loc source location information 456 @param global_tid global thread number 457 458 Enter a serialized parallel construct. This interface is used to handle a 459 conditional parallel region, like this, 460 @code 461 #pragma omp parallel if (condition) 462 @endcode 463 when the condition is false. 464 */ 465 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 466 // The implementation is now in kmp_runtime.cpp so that it can share static 467 // functions with kmp_fork_call since the tasks to be done are similar in 468 // each case. 469 __kmp_assert_valid_gtid(global_tid); 470 #if OMPT_SUPPORT 471 OMPT_STORE_RETURN_ADDRESS(global_tid); 472 #endif 473 __kmp_serialized_parallel(loc, global_tid); 474 } 475 476 /*! 477 @ingroup PARALLEL 478 @param loc source location information 479 @param global_tid global thread number 480 481 Leave a serialized parallel construct. 482 */ 483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 484 kmp_internal_control_t *top; 485 kmp_info_t *this_thr; 486 kmp_team_t *serial_team; 487 488 KC_TRACE(10, 489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 490 491 /* skip all this code for autopar serialized loops since it results in 492 unacceptable overhead */ 493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 494 return; 495 496 // Not autopar code 497 __kmp_assert_valid_gtid(global_tid); 498 if (!TCR_4(__kmp_init_parallel)) 499 __kmp_parallel_initialize(); 500 501 __kmp_resume_if_soft_paused(); 502 503 this_thr = __kmp_threads[global_tid]; 504 serial_team = this_thr->th.th_serial_team; 505 506 kmp_task_team_t *task_team = this_thr->th.th_task_team; 507 // we need to wait for the proxy tasks before finishing the thread 508 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 509 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 510 511 KMP_MB(); 512 KMP_DEBUG_ASSERT(serial_team); 513 KMP_ASSERT(serial_team->t.t_serialized); 514 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 515 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 516 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 517 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 518 519 #if OMPT_SUPPORT 520 if (ompt_enabled.enabled && 521 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 522 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 523 if (ompt_enabled.ompt_callback_implicit_task) { 524 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 525 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 526 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 527 } 528 529 // reset clear the task id only after unlinking the task 530 ompt_data_t *parent_task_data; 531 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 532 533 if (ompt_enabled.ompt_callback_parallel_end) { 534 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 535 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 536 ompt_parallel_invoker_program | ompt_parallel_team, 537 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 538 } 539 __ompt_lw_taskteam_unlink(this_thr); 540 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 541 } 542 #endif 543 544 /* If necessary, pop the internal control stack values and replace the team 545 * values */ 546 top = serial_team->t.t_control_stack_top; 547 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 548 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 549 serial_team->t.t_control_stack_top = top->next; 550 __kmp_free(top); 551 } 552 553 // if( serial_team -> t.t_serialized > 1 ) 554 serial_team->t.t_level--; 555 556 /* pop dispatch buffers stack */ 557 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 558 { 559 dispatch_private_info_t *disp_buffer = 560 serial_team->t.t_dispatch->th_disp_buffer; 561 serial_team->t.t_dispatch->th_disp_buffer = 562 serial_team->t.t_dispatch->th_disp_buffer->next; 563 __kmp_free(disp_buffer); 564 } 565 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 566 567 --serial_team->t.t_serialized; 568 if (serial_team->t.t_serialized == 0) { 569 570 /* return to the parallel section */ 571 572 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 573 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 574 __kmp_clear_x87_fpu_status_word(); 575 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 576 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 577 } 578 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 579 580 this_thr->th.th_team = serial_team->t.t_parent; 581 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 582 583 /* restore values cached in the thread */ 584 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 585 this_thr->th.th_team_master = 586 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 587 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 588 589 /* TODO the below shouldn't need to be adjusted for serialized teams */ 590 this_thr->th.th_dispatch = 591 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 592 593 __kmp_pop_current_task_from_thread(this_thr); 594 595 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 596 this_thr->th.th_current_task->td_flags.executing = 1; 597 598 if (__kmp_tasking_mode != tskm_immediate_exec) { 599 // Copy the task team from the new child / old parent team to the thread. 600 this_thr->th.th_task_team = 601 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 602 KA_TRACE(20, 603 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 604 "team %p\n", 605 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 606 } 607 } else { 608 if (__kmp_tasking_mode != tskm_immediate_exec) { 609 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 610 "depth of serial team %p to %d\n", 611 global_tid, serial_team, serial_team->t.t_serialized)); 612 } 613 } 614 615 if (__kmp_env_consistency_check) 616 __kmp_pop_parallel(global_tid, NULL); 617 #if OMPT_SUPPORT 618 if (ompt_enabled.enabled) 619 this_thr->th.ompt_thread_info.state = 620 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 621 : ompt_state_work_parallel); 622 #endif 623 } 624 625 /*! 626 @ingroup SYNCHRONIZATION 627 @param loc source location information. 628 629 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 630 depending on the memory ordering convention obeyed by the compiler 631 even that may not be necessary). 632 */ 633 void __kmpc_flush(ident_t *loc) { 634 KC_TRACE(10, ("__kmpc_flush: called\n")); 635 636 /* need explicit __mf() here since use volatile instead in library */ 637 KMP_MB(); /* Flush all pending memory write invalidates. */ 638 639 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 640 #if KMP_MIC 641 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 642 // We shouldn't need it, though, since the ABI rules require that 643 // * If the compiler generates NGO stores it also generates the fence 644 // * If users hand-code NGO stores they should insert the fence 645 // therefore no incomplete unordered stores should be visible. 646 #else 647 // C74404 648 // This is to address non-temporal store instructions (sfence needed). 649 // The clflush instruction is addressed either (mfence needed). 650 // Probably the non-temporal load monvtdqa instruction should also be 651 // addressed. 652 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 653 if (!__kmp_cpuinfo.initialized) { 654 __kmp_query_cpuid(&__kmp_cpuinfo); 655 } 656 if (!__kmp_cpuinfo.sse2) { 657 // CPU cannot execute SSE2 instructions. 658 } else { 659 #if KMP_COMPILER_ICC 660 _mm_mfence(); 661 #elif KMP_COMPILER_MSVC 662 MemoryBarrier(); 663 #else 664 __sync_synchronize(); 665 #endif // KMP_COMPILER_ICC 666 } 667 #endif // KMP_MIC 668 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 669 KMP_ARCH_RISCV64) 670 // Nothing to see here move along 671 #elif KMP_ARCH_PPC64 672 // Nothing needed here (we have a real MB above). 673 #else 674 #error Unknown or unsupported architecture 675 #endif 676 677 #if OMPT_SUPPORT && OMPT_OPTIONAL 678 if (ompt_enabled.ompt_callback_flush) { 679 ompt_callbacks.ompt_callback(ompt_callback_flush)( 680 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 681 } 682 #endif 683 } 684 685 /* -------------------------------------------------------------------------- */ 686 /*! 687 @ingroup SYNCHRONIZATION 688 @param loc source location information 689 @param global_tid thread id. 690 691 Execute a barrier. 692 */ 693 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 694 KMP_COUNT_BLOCK(OMP_BARRIER); 695 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 696 __kmp_assert_valid_gtid(global_tid); 697 698 if (!TCR_4(__kmp_init_parallel)) 699 __kmp_parallel_initialize(); 700 701 __kmp_resume_if_soft_paused(); 702 703 if (__kmp_env_consistency_check) { 704 if (loc == 0) { 705 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 706 } 707 __kmp_check_barrier(global_tid, ct_barrier, loc); 708 } 709 710 #if OMPT_SUPPORT 711 ompt_frame_t *ompt_frame; 712 if (ompt_enabled.enabled) { 713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 714 if (ompt_frame->enter_frame.ptr == NULL) 715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 716 } 717 OMPT_STORE_RETURN_ADDRESS(global_tid); 718 #endif 719 __kmp_threads[global_tid]->th.th_ident = loc; 720 // TODO: explicit barrier_wait_id: 721 // this function is called when 'barrier' directive is present or 722 // implicit barrier at the end of a worksharing construct. 723 // 1) better to add a per-thread barrier counter to a thread data structure 724 // 2) set to 0 when a new team is created 725 // 4) no sync is required 726 727 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 728 #if OMPT_SUPPORT && OMPT_OPTIONAL 729 if (ompt_enabled.enabled) { 730 ompt_frame->enter_frame = ompt_data_none; 731 } 732 #endif 733 } 734 735 /* The BARRIER for a MASTER section is always explicit */ 736 /*! 737 @ingroup WORK_SHARING 738 @param loc source location information. 739 @param global_tid global thread number . 740 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 741 */ 742 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 743 int status = 0; 744 745 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 746 __kmp_assert_valid_gtid(global_tid); 747 748 if (!TCR_4(__kmp_init_parallel)) 749 __kmp_parallel_initialize(); 750 751 __kmp_resume_if_soft_paused(); 752 753 if (KMP_MASTER_GTID(global_tid)) { 754 KMP_COUNT_BLOCK(OMP_MASTER); 755 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 756 status = 1; 757 } 758 759 #if OMPT_SUPPORT && OMPT_OPTIONAL 760 if (status) { 761 if (ompt_enabled.ompt_callback_masked) { 762 kmp_info_t *this_thr = __kmp_threads[global_tid]; 763 kmp_team_t *team = this_thr->th.th_team; 764 765 int tid = __kmp_tid_from_gtid(global_tid); 766 ompt_callbacks.ompt_callback(ompt_callback_masked)( 767 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 768 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 769 OMPT_GET_RETURN_ADDRESS(0)); 770 } 771 } 772 #endif 773 774 if (__kmp_env_consistency_check) { 775 #if KMP_USE_DYNAMIC_LOCK 776 if (status) 777 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 778 else 779 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 780 #else 781 if (status) 782 __kmp_push_sync(global_tid, ct_master, loc, NULL); 783 else 784 __kmp_check_sync(global_tid, ct_master, loc, NULL); 785 #endif 786 } 787 788 return status; 789 } 790 791 /*! 792 @ingroup WORK_SHARING 793 @param loc source location information. 794 @param global_tid global thread number . 795 796 Mark the end of a <tt>master</tt> region. This should only be called by the 797 thread that executes the <tt>master</tt> region. 798 */ 799 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 800 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 801 __kmp_assert_valid_gtid(global_tid); 802 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 803 KMP_POP_PARTITIONED_TIMER(); 804 805 #if OMPT_SUPPORT && OMPT_OPTIONAL 806 kmp_info_t *this_thr = __kmp_threads[global_tid]; 807 kmp_team_t *team = this_thr->th.th_team; 808 if (ompt_enabled.ompt_callback_masked) { 809 int tid = __kmp_tid_from_gtid(global_tid); 810 ompt_callbacks.ompt_callback(ompt_callback_masked)( 811 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 812 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 813 OMPT_GET_RETURN_ADDRESS(0)); 814 } 815 #endif 816 817 if (__kmp_env_consistency_check) { 818 if (KMP_MASTER_GTID(global_tid)) 819 __kmp_pop_sync(global_tid, ct_master, loc); 820 } 821 } 822 823 /*! 824 @ingroup WORK_SHARING 825 @param loc source location information. 826 @param gtid global thread number. 827 828 Start execution of an <tt>ordered</tt> construct. 829 */ 830 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 831 int cid = 0; 832 kmp_info_t *th; 833 KMP_DEBUG_ASSERT(__kmp_init_serial); 834 835 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 836 __kmp_assert_valid_gtid(gtid); 837 838 if (!TCR_4(__kmp_init_parallel)) 839 __kmp_parallel_initialize(); 840 841 __kmp_resume_if_soft_paused(); 842 843 #if USE_ITT_BUILD 844 __kmp_itt_ordered_prep(gtid); 845 // TODO: ordered_wait_id 846 #endif /* USE_ITT_BUILD */ 847 848 th = __kmp_threads[gtid]; 849 850 #if OMPT_SUPPORT && OMPT_OPTIONAL 851 kmp_team_t *team; 852 ompt_wait_id_t lck; 853 void *codeptr_ra; 854 OMPT_STORE_RETURN_ADDRESS(gtid); 855 if (ompt_enabled.enabled) { 856 team = __kmp_team_from_gtid(gtid); 857 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 858 /* OMPT state update */ 859 th->th.ompt_thread_info.wait_id = lck; 860 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 861 862 /* OMPT event callback */ 863 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 864 if (ompt_enabled.ompt_callback_mutex_acquire) { 865 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 866 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 867 codeptr_ra); 868 } 869 } 870 #endif 871 872 if (th->th.th_dispatch->th_deo_fcn != 0) 873 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 874 else 875 __kmp_parallel_deo(>id, &cid, loc); 876 877 #if OMPT_SUPPORT && OMPT_OPTIONAL 878 if (ompt_enabled.enabled) { 879 /* OMPT state update */ 880 th->th.ompt_thread_info.state = ompt_state_work_parallel; 881 th->th.ompt_thread_info.wait_id = 0; 882 883 /* OMPT event callback */ 884 if (ompt_enabled.ompt_callback_mutex_acquired) { 885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 886 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 887 } 888 } 889 #endif 890 891 #if USE_ITT_BUILD 892 __kmp_itt_ordered_start(gtid); 893 #endif /* USE_ITT_BUILD */ 894 } 895 896 /*! 897 @ingroup WORK_SHARING 898 @param loc source location information. 899 @param gtid global thread number. 900 901 End execution of an <tt>ordered</tt> construct. 902 */ 903 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 904 int cid = 0; 905 kmp_info_t *th; 906 907 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 908 __kmp_assert_valid_gtid(gtid); 909 910 #if USE_ITT_BUILD 911 __kmp_itt_ordered_end(gtid); 912 // TODO: ordered_wait_id 913 #endif /* USE_ITT_BUILD */ 914 915 th = __kmp_threads[gtid]; 916 917 if (th->th.th_dispatch->th_dxo_fcn != 0) 918 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 919 else 920 __kmp_parallel_dxo(>id, &cid, loc); 921 922 #if OMPT_SUPPORT && OMPT_OPTIONAL 923 OMPT_STORE_RETURN_ADDRESS(gtid); 924 if (ompt_enabled.ompt_callback_mutex_released) { 925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 926 ompt_mutex_ordered, 927 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 928 ->t.t_ordered.dt.t_value, 929 OMPT_LOAD_RETURN_ADDRESS(gtid)); 930 } 931 #endif 932 } 933 934 #if KMP_USE_DYNAMIC_LOCK 935 936 static __forceinline void 937 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 938 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 939 // Pointer to the allocated indirect lock is written to crit, while indexing 940 // is ignored. 941 void *idx; 942 kmp_indirect_lock_t **lck; 943 lck = (kmp_indirect_lock_t **)crit; 944 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 945 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 946 KMP_SET_I_LOCK_LOCATION(ilk, loc); 947 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 948 KA_TRACE(20, 949 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 950 #if USE_ITT_BUILD 951 __kmp_itt_critical_creating(ilk->lock, loc); 952 #endif 953 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 954 if (status == 0) { 955 #if USE_ITT_BUILD 956 __kmp_itt_critical_destroyed(ilk->lock); 957 #endif 958 // We don't really need to destroy the unclaimed lock here since it will be 959 // cleaned up at program exit. 960 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 961 } 962 KMP_DEBUG_ASSERT(*lck != NULL); 963 } 964 965 // Fast-path acquire tas lock 966 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 967 { \ 968 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 969 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 970 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 971 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 972 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 973 kmp_uint32 spins; \ 974 KMP_FSYNC_PREPARE(l); \ 975 KMP_INIT_YIELD(spins); \ 976 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 977 do { \ 978 if (TCR_4(__kmp_nth) > \ 979 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 980 KMP_YIELD(TRUE); \ 981 } else { \ 982 KMP_YIELD_SPIN(spins); \ 983 } \ 984 __kmp_spin_backoff(&backoff); \ 985 } while ( \ 986 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 987 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 988 } \ 989 KMP_FSYNC_ACQUIRED(l); \ 990 } 991 992 // Fast-path test tas lock 993 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 994 { \ 995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 998 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 999 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1000 } 1001 1002 // Fast-path release tas lock 1003 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1004 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1005 1006 #if KMP_USE_FUTEX 1007 1008 #include <sys/syscall.h> 1009 #include <unistd.h> 1010 #ifndef FUTEX_WAIT 1011 #define FUTEX_WAIT 0 1012 #endif 1013 #ifndef FUTEX_WAKE 1014 #define FUTEX_WAKE 1 1015 #endif 1016 1017 // Fast-path acquire futex lock 1018 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1019 { \ 1020 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1021 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1022 KMP_MB(); \ 1023 KMP_FSYNC_PREPARE(ftx); \ 1024 kmp_int32 poll_val; \ 1025 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1026 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1027 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1028 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1029 if (!cond) { \ 1030 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1031 poll_val | \ 1032 KMP_LOCK_BUSY(1, futex))) { \ 1033 continue; \ 1034 } \ 1035 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1036 } \ 1037 kmp_int32 rc; \ 1038 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1039 NULL, NULL, 0)) != 0) { \ 1040 continue; \ 1041 } \ 1042 gtid_code |= 1; \ 1043 } \ 1044 KMP_FSYNC_ACQUIRED(ftx); \ 1045 } 1046 1047 // Fast-path test futex lock 1048 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1049 { \ 1050 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1051 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1052 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1053 KMP_FSYNC_ACQUIRED(ftx); \ 1054 rc = TRUE; \ 1055 } else { \ 1056 rc = FALSE; \ 1057 } \ 1058 } 1059 1060 // Fast-path release futex lock 1061 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1062 { \ 1063 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1064 KMP_MB(); \ 1065 KMP_FSYNC_RELEASING(ftx); \ 1066 kmp_int32 poll_val = \ 1067 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1068 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1069 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1070 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1071 } \ 1072 KMP_MB(); \ 1073 KMP_YIELD_OVERSUB(); \ 1074 } 1075 1076 #endif // KMP_USE_FUTEX 1077 1078 #else // KMP_USE_DYNAMIC_LOCK 1079 1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1081 ident_t const *loc, 1082 kmp_int32 gtid) { 1083 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1084 1085 // Because of the double-check, the following load doesn't need to be volatile 1086 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1087 1088 if (lck == NULL) { 1089 void *idx; 1090 1091 // Allocate & initialize the lock. 1092 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1093 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1094 __kmp_init_user_lock_with_checks(lck); 1095 __kmp_set_user_lock_location(lck, loc); 1096 #if USE_ITT_BUILD 1097 __kmp_itt_critical_creating(lck); 1098 // __kmp_itt_critical_creating() should be called *before* the first usage 1099 // of underlying lock. It is the only place where we can guarantee it. There 1100 // are chances the lock will destroyed with no usage, but it is not a 1101 // problem, because this is not real event seen by user but rather setting 1102 // name for object (lock). See more details in kmp_itt.h. 1103 #endif /* USE_ITT_BUILD */ 1104 1105 // Use a cmpxchg instruction to slam the start of the critical section with 1106 // the lock pointer. If another thread beat us to it, deallocate the lock, 1107 // and use the lock that the other thread allocated. 1108 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1109 1110 if (status == 0) { 1111 // Deallocate the lock and reload the value. 1112 #if USE_ITT_BUILD 1113 __kmp_itt_critical_destroyed(lck); 1114 // Let ITT know the lock is destroyed and the same memory location may be reused 1115 // for another purpose. 1116 #endif /* USE_ITT_BUILD */ 1117 __kmp_destroy_user_lock_with_checks(lck); 1118 __kmp_user_lock_free(&idx, gtid, lck); 1119 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1120 KMP_DEBUG_ASSERT(lck != NULL); 1121 } 1122 } 1123 return lck; 1124 } 1125 1126 #endif // KMP_USE_DYNAMIC_LOCK 1127 1128 /*! 1129 @ingroup WORK_SHARING 1130 @param loc source location information. 1131 @param global_tid global thread number. 1132 @param crit identity of the critical section. This could be a pointer to a lock 1133 associated with the critical section, or some other suitably unique value. 1134 1135 Enter code protected by a `critical` construct. 1136 This function blocks until the executing thread can enter the critical section. 1137 */ 1138 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1139 kmp_critical_name *crit) { 1140 #if KMP_USE_DYNAMIC_LOCK 1141 #if OMPT_SUPPORT && OMPT_OPTIONAL 1142 OMPT_STORE_RETURN_ADDRESS(global_tid); 1143 #endif // OMPT_SUPPORT 1144 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1145 #else 1146 KMP_COUNT_BLOCK(OMP_CRITICAL); 1147 #if OMPT_SUPPORT && OMPT_OPTIONAL 1148 ompt_state_t prev_state = ompt_state_undefined; 1149 ompt_thread_info_t ti; 1150 #endif 1151 kmp_user_lock_p lck; 1152 1153 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1154 __kmp_assert_valid_gtid(global_tid); 1155 1156 // TODO: add THR_OVHD_STATE 1157 1158 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1159 KMP_CHECK_USER_LOCK_INIT(); 1160 1161 if ((__kmp_user_lock_kind == lk_tas) && 1162 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1163 lck = (kmp_user_lock_p)crit; 1164 } 1165 #if KMP_USE_FUTEX 1166 else if ((__kmp_user_lock_kind == lk_futex) && 1167 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1168 lck = (kmp_user_lock_p)crit; 1169 } 1170 #endif 1171 else { // ticket, queuing or drdpa 1172 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1173 } 1174 1175 if (__kmp_env_consistency_check) 1176 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1177 1178 // since the critical directive binds to all threads, not just the current 1179 // team we have to check this even if we are in a serialized team. 1180 // also, even if we are the uber thread, we still have to conduct the lock, 1181 // as we have to contend with sibling threads. 1182 1183 #if USE_ITT_BUILD 1184 __kmp_itt_critical_acquiring(lck); 1185 #endif /* USE_ITT_BUILD */ 1186 #if OMPT_SUPPORT && OMPT_OPTIONAL 1187 OMPT_STORE_RETURN_ADDRESS(gtid); 1188 void *codeptr_ra = NULL; 1189 if (ompt_enabled.enabled) { 1190 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1191 /* OMPT state update */ 1192 prev_state = ti.state; 1193 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1194 ti.state = ompt_state_wait_critical; 1195 1196 /* OMPT event callback */ 1197 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1198 if (ompt_enabled.ompt_callback_mutex_acquire) { 1199 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1200 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1201 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1202 } 1203 } 1204 #endif 1205 // Value of 'crit' should be good for using as a critical_id of the critical 1206 // section directive. 1207 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1208 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_acquired(lck); 1211 #endif /* USE_ITT_BUILD */ 1212 #if OMPT_SUPPORT && OMPT_OPTIONAL 1213 if (ompt_enabled.enabled) { 1214 /* OMPT state update */ 1215 ti.state = prev_state; 1216 ti.wait_id = 0; 1217 1218 /* OMPT event callback */ 1219 if (ompt_enabled.ompt_callback_mutex_acquired) { 1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1221 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1222 } 1223 } 1224 #endif 1225 KMP_POP_PARTITIONED_TIMER(); 1226 1227 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1228 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1229 #endif // KMP_USE_DYNAMIC_LOCK 1230 } 1231 1232 #if KMP_USE_DYNAMIC_LOCK 1233 1234 // Converts the given hint to an internal lock implementation 1235 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1236 #if KMP_USE_TSX 1237 #define KMP_TSX_LOCK(seq) lockseq_##seq 1238 #else 1239 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1240 #endif 1241 1242 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1243 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1244 #else 1245 #define KMP_CPUINFO_RTM 0 1246 #endif 1247 1248 // Hints that do not require further logic 1249 if (hint & kmp_lock_hint_hle) 1250 return KMP_TSX_LOCK(hle); 1251 if (hint & kmp_lock_hint_rtm) 1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1253 if (hint & kmp_lock_hint_adaptive) 1254 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1255 1256 // Rule out conflicting hints first by returning the default lock 1257 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1258 return __kmp_user_lock_seq; 1259 if ((hint & omp_lock_hint_speculative) && 1260 (hint & omp_lock_hint_nonspeculative)) 1261 return __kmp_user_lock_seq; 1262 1263 // Do not even consider speculation when it appears to be contended 1264 if (hint & omp_lock_hint_contended) 1265 return lockseq_queuing; 1266 1267 // Uncontended lock without speculation 1268 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1269 return lockseq_tas; 1270 1271 // HLE lock for speculation 1272 if (hint & omp_lock_hint_speculative) 1273 return KMP_TSX_LOCK(hle); 1274 1275 return __kmp_user_lock_seq; 1276 } 1277 1278 #if OMPT_SUPPORT && OMPT_OPTIONAL 1279 #if KMP_USE_DYNAMIC_LOCK 1280 static kmp_mutex_impl_t 1281 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1282 if (user_lock) { 1283 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1284 case 0: 1285 break; 1286 #if KMP_USE_FUTEX 1287 case locktag_futex: 1288 return kmp_mutex_impl_queuing; 1289 #endif 1290 case locktag_tas: 1291 return kmp_mutex_impl_spin; 1292 #if KMP_USE_TSX 1293 case locktag_hle: 1294 return kmp_mutex_impl_speculative; 1295 #endif 1296 default: 1297 return kmp_mutex_impl_none; 1298 } 1299 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1300 } 1301 KMP_ASSERT(ilock); 1302 switch (ilock->type) { 1303 #if KMP_USE_TSX 1304 case locktag_adaptive: 1305 case locktag_rtm: 1306 return kmp_mutex_impl_speculative; 1307 #endif 1308 case locktag_nested_tas: 1309 return kmp_mutex_impl_spin; 1310 #if KMP_USE_FUTEX 1311 case locktag_nested_futex: 1312 #endif 1313 case locktag_ticket: 1314 case locktag_queuing: 1315 case locktag_drdpa: 1316 case locktag_nested_ticket: 1317 case locktag_nested_queuing: 1318 case locktag_nested_drdpa: 1319 return kmp_mutex_impl_queuing; 1320 default: 1321 return kmp_mutex_impl_none; 1322 } 1323 } 1324 #else 1325 // For locks without dynamic binding 1326 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1327 switch (__kmp_user_lock_kind) { 1328 case lk_tas: 1329 return kmp_mutex_impl_spin; 1330 #if KMP_USE_FUTEX 1331 case lk_futex: 1332 #endif 1333 case lk_ticket: 1334 case lk_queuing: 1335 case lk_drdpa: 1336 return kmp_mutex_impl_queuing; 1337 #if KMP_USE_TSX 1338 case lk_hle: 1339 case lk_rtm: 1340 case lk_adaptive: 1341 return kmp_mutex_impl_speculative; 1342 #endif 1343 default: 1344 return kmp_mutex_impl_none; 1345 } 1346 } 1347 #endif // KMP_USE_DYNAMIC_LOCK 1348 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1349 1350 /*! 1351 @ingroup WORK_SHARING 1352 @param loc source location information. 1353 @param global_tid global thread number. 1354 @param crit identity of the critical section. This could be a pointer to a lock 1355 associated with the critical section, or some other suitably unique value. 1356 @param hint the lock hint. 1357 1358 Enter code protected by a `critical` construct with a hint. The hint value is 1359 used to suggest a lock implementation. This function blocks until the executing 1360 thread can enter the critical section unless the hint suggests use of 1361 speculative execution and the hardware supports it. 1362 */ 1363 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1364 kmp_critical_name *crit, uint32_t hint) { 1365 KMP_COUNT_BLOCK(OMP_CRITICAL); 1366 kmp_user_lock_p lck; 1367 #if OMPT_SUPPORT && OMPT_OPTIONAL 1368 ompt_state_t prev_state = ompt_state_undefined; 1369 ompt_thread_info_t ti; 1370 // This is the case, if called from __kmpc_critical: 1371 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1372 if (!codeptr) 1373 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1374 #endif 1375 1376 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1377 __kmp_assert_valid_gtid(global_tid); 1378 1379 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1380 // Check if it is initialized. 1381 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1382 if (*lk == 0) { 1383 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1384 if (KMP_IS_D_LOCK(lckseq)) { 1385 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1386 KMP_GET_D_TAG(lckseq)); 1387 } else { 1388 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1389 } 1390 } 1391 // Branch for accessing the actual lock object and set operation. This 1392 // branching is inevitable since this lock initialization does not follow the 1393 // normal dispatch path (lock table is not used). 1394 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1395 lck = (kmp_user_lock_p)lk; 1396 if (__kmp_env_consistency_check) { 1397 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1398 __kmp_map_hint_to_lock(hint)); 1399 } 1400 #if USE_ITT_BUILD 1401 __kmp_itt_critical_acquiring(lck); 1402 #endif 1403 #if OMPT_SUPPORT && OMPT_OPTIONAL 1404 if (ompt_enabled.enabled) { 1405 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1406 /* OMPT state update */ 1407 prev_state = ti.state; 1408 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1409 ti.state = ompt_state_wait_critical; 1410 1411 /* OMPT event callback */ 1412 if (ompt_enabled.ompt_callback_mutex_acquire) { 1413 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1414 ompt_mutex_critical, (unsigned int)hint, 1415 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1416 codeptr); 1417 } 1418 } 1419 #endif 1420 #if KMP_USE_INLINED_TAS 1421 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1422 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1423 } else 1424 #elif KMP_USE_INLINED_FUTEX 1425 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1426 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1427 } else 1428 #endif 1429 { 1430 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1431 } 1432 } else { 1433 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1434 lck = ilk->lock; 1435 if (__kmp_env_consistency_check) { 1436 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1437 __kmp_map_hint_to_lock(hint)); 1438 } 1439 #if USE_ITT_BUILD 1440 __kmp_itt_critical_acquiring(lck); 1441 #endif 1442 #if OMPT_SUPPORT && OMPT_OPTIONAL 1443 if (ompt_enabled.enabled) { 1444 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1445 /* OMPT state update */ 1446 prev_state = ti.state; 1447 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1448 ti.state = ompt_state_wait_critical; 1449 1450 /* OMPT event callback */ 1451 if (ompt_enabled.ompt_callback_mutex_acquire) { 1452 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1453 ompt_mutex_critical, (unsigned int)hint, 1454 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1455 codeptr); 1456 } 1457 } 1458 #endif 1459 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1460 } 1461 KMP_POP_PARTITIONED_TIMER(); 1462 1463 #if USE_ITT_BUILD 1464 __kmp_itt_critical_acquired(lck); 1465 #endif /* USE_ITT_BUILD */ 1466 #if OMPT_SUPPORT && OMPT_OPTIONAL 1467 if (ompt_enabled.enabled) { 1468 /* OMPT state update */ 1469 ti.state = prev_state; 1470 ti.wait_id = 0; 1471 1472 /* OMPT event callback */ 1473 if (ompt_enabled.ompt_callback_mutex_acquired) { 1474 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1475 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1476 } 1477 } 1478 #endif 1479 1480 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1481 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1482 } // __kmpc_critical_with_hint 1483 1484 #endif // KMP_USE_DYNAMIC_LOCK 1485 1486 /*! 1487 @ingroup WORK_SHARING 1488 @param loc source location information. 1489 @param global_tid global thread number . 1490 @param crit identity of the critical section. This could be a pointer to a lock 1491 associated with the critical section, or some other suitably unique value. 1492 1493 Leave a critical section, releasing any lock that was held during its execution. 1494 */ 1495 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1496 kmp_critical_name *crit) { 1497 kmp_user_lock_p lck; 1498 1499 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1500 1501 #if KMP_USE_DYNAMIC_LOCK 1502 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1503 lck = (kmp_user_lock_p)crit; 1504 KMP_ASSERT(lck != NULL); 1505 if (__kmp_env_consistency_check) { 1506 __kmp_pop_sync(global_tid, ct_critical, loc); 1507 } 1508 #if USE_ITT_BUILD 1509 __kmp_itt_critical_releasing(lck); 1510 #endif 1511 #if KMP_USE_INLINED_TAS 1512 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1513 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1514 } else 1515 #elif KMP_USE_INLINED_FUTEX 1516 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1517 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1518 } else 1519 #endif 1520 { 1521 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1522 } 1523 } else { 1524 kmp_indirect_lock_t *ilk = 1525 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1526 KMP_ASSERT(ilk != NULL); 1527 lck = ilk->lock; 1528 if (__kmp_env_consistency_check) { 1529 __kmp_pop_sync(global_tid, ct_critical, loc); 1530 } 1531 #if USE_ITT_BUILD 1532 __kmp_itt_critical_releasing(lck); 1533 #endif 1534 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1535 } 1536 1537 #else // KMP_USE_DYNAMIC_LOCK 1538 1539 if ((__kmp_user_lock_kind == lk_tas) && 1540 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1541 lck = (kmp_user_lock_p)crit; 1542 } 1543 #if KMP_USE_FUTEX 1544 else if ((__kmp_user_lock_kind == lk_futex) && 1545 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1546 lck = (kmp_user_lock_p)crit; 1547 } 1548 #endif 1549 else { // ticket, queuing or drdpa 1550 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1551 } 1552 1553 KMP_ASSERT(lck != NULL); 1554 1555 if (__kmp_env_consistency_check) 1556 __kmp_pop_sync(global_tid, ct_critical, loc); 1557 1558 #if USE_ITT_BUILD 1559 __kmp_itt_critical_releasing(lck); 1560 #endif /* USE_ITT_BUILD */ 1561 // Value of 'crit' should be good for using as a critical_id of the critical 1562 // section directive. 1563 __kmp_release_user_lock_with_checks(lck, global_tid); 1564 1565 #endif // KMP_USE_DYNAMIC_LOCK 1566 1567 #if OMPT_SUPPORT && OMPT_OPTIONAL 1568 /* OMPT release event triggers after lock is released; place here to trigger 1569 * for all #if branches */ 1570 OMPT_STORE_RETURN_ADDRESS(global_tid); 1571 if (ompt_enabled.ompt_callback_mutex_released) { 1572 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1573 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1574 OMPT_LOAD_RETURN_ADDRESS(0)); 1575 } 1576 #endif 1577 1578 KMP_POP_PARTITIONED_TIMER(); 1579 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1580 } 1581 1582 /*! 1583 @ingroup SYNCHRONIZATION 1584 @param loc source location information 1585 @param global_tid thread id. 1586 @return one if the thread should execute the master block, zero otherwise 1587 1588 Start execution of a combined barrier and master. The barrier is executed inside 1589 this function. 1590 */ 1591 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1592 int status; 1593 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1594 __kmp_assert_valid_gtid(global_tid); 1595 1596 if (!TCR_4(__kmp_init_parallel)) 1597 __kmp_parallel_initialize(); 1598 1599 __kmp_resume_if_soft_paused(); 1600 1601 if (__kmp_env_consistency_check) 1602 __kmp_check_barrier(global_tid, ct_barrier, loc); 1603 1604 #if OMPT_SUPPORT 1605 ompt_frame_t *ompt_frame; 1606 if (ompt_enabled.enabled) { 1607 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1608 if (ompt_frame->enter_frame.ptr == NULL) 1609 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1610 } 1611 OMPT_STORE_RETURN_ADDRESS(global_tid); 1612 #endif 1613 #if USE_ITT_NOTIFY 1614 __kmp_threads[global_tid]->th.th_ident = loc; 1615 #endif 1616 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1617 #if OMPT_SUPPORT && OMPT_OPTIONAL 1618 if (ompt_enabled.enabled) { 1619 ompt_frame->enter_frame = ompt_data_none; 1620 } 1621 #endif 1622 1623 return (status != 0) ? 0 : 1; 1624 } 1625 1626 /*! 1627 @ingroup SYNCHRONIZATION 1628 @param loc source location information 1629 @param global_tid thread id. 1630 1631 Complete the execution of a combined barrier and master. This function should 1632 only be called at the completion of the <tt>master</tt> code. Other threads will 1633 still be waiting at the barrier and this call releases them. 1634 */ 1635 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1636 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1637 __kmp_assert_valid_gtid(global_tid); 1638 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1639 } 1640 1641 /*! 1642 @ingroup SYNCHRONIZATION 1643 @param loc source location information 1644 @param global_tid thread id. 1645 @return one if the thread should execute the master block, zero otherwise 1646 1647 Start execution of a combined barrier and master(nowait) construct. 1648 The barrier is executed inside this function. 1649 There is no equivalent "end" function, since the 1650 */ 1651 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1652 kmp_int32 ret; 1653 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1654 __kmp_assert_valid_gtid(global_tid); 1655 1656 if (!TCR_4(__kmp_init_parallel)) 1657 __kmp_parallel_initialize(); 1658 1659 __kmp_resume_if_soft_paused(); 1660 1661 if (__kmp_env_consistency_check) { 1662 if (loc == 0) { 1663 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1664 } 1665 __kmp_check_barrier(global_tid, ct_barrier, loc); 1666 } 1667 1668 #if OMPT_SUPPORT 1669 ompt_frame_t *ompt_frame; 1670 if (ompt_enabled.enabled) { 1671 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1672 if (ompt_frame->enter_frame.ptr == NULL) 1673 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1674 } 1675 OMPT_STORE_RETURN_ADDRESS(global_tid); 1676 #endif 1677 #if USE_ITT_NOTIFY 1678 __kmp_threads[global_tid]->th.th_ident = loc; 1679 #endif 1680 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1681 #if OMPT_SUPPORT && OMPT_OPTIONAL 1682 if (ompt_enabled.enabled) { 1683 ompt_frame->enter_frame = ompt_data_none; 1684 } 1685 #endif 1686 1687 ret = __kmpc_master(loc, global_tid); 1688 1689 if (__kmp_env_consistency_check) { 1690 /* there's no __kmpc_end_master called; so the (stats) */ 1691 /* actions of __kmpc_end_master are done here */ 1692 if (ret) { 1693 /* only one thread should do the pop since only */ 1694 /* one did the push (see __kmpc_master()) */ 1695 __kmp_pop_sync(global_tid, ct_master, loc); 1696 } 1697 } 1698 1699 return (ret); 1700 } 1701 1702 /* The BARRIER for a SINGLE process section is always explicit */ 1703 /*! 1704 @ingroup WORK_SHARING 1705 @param loc source location information 1706 @param global_tid global thread number 1707 @return One if this thread should execute the single construct, zero otherwise. 1708 1709 Test whether to execute a <tt>single</tt> construct. 1710 There are no implicit barriers in the two "single" calls, rather the compiler 1711 should introduce an explicit barrier if it is required. 1712 */ 1713 1714 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1715 __kmp_assert_valid_gtid(global_tid); 1716 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1717 1718 if (rc) { 1719 // We are going to execute the single statement, so we should count it. 1720 KMP_COUNT_BLOCK(OMP_SINGLE); 1721 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1722 } 1723 1724 #if OMPT_SUPPORT && OMPT_OPTIONAL 1725 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1726 kmp_team_t *team = this_thr->th.th_team; 1727 int tid = __kmp_tid_from_gtid(global_tid); 1728 1729 if (ompt_enabled.enabled) { 1730 if (rc) { 1731 if (ompt_enabled.ompt_callback_work) { 1732 ompt_callbacks.ompt_callback(ompt_callback_work)( 1733 ompt_work_single_executor, ompt_scope_begin, 1734 &(team->t.ompt_team_info.parallel_data), 1735 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1736 1, OMPT_GET_RETURN_ADDRESS(0)); 1737 } 1738 } else { 1739 if (ompt_enabled.ompt_callback_work) { 1740 ompt_callbacks.ompt_callback(ompt_callback_work)( 1741 ompt_work_single_other, ompt_scope_begin, 1742 &(team->t.ompt_team_info.parallel_data), 1743 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1744 1, OMPT_GET_RETURN_ADDRESS(0)); 1745 ompt_callbacks.ompt_callback(ompt_callback_work)( 1746 ompt_work_single_other, ompt_scope_end, 1747 &(team->t.ompt_team_info.parallel_data), 1748 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1749 1, OMPT_GET_RETURN_ADDRESS(0)); 1750 } 1751 } 1752 } 1753 #endif 1754 1755 return rc; 1756 } 1757 1758 /*! 1759 @ingroup WORK_SHARING 1760 @param loc source location information 1761 @param global_tid global thread number 1762 1763 Mark the end of a <tt>single</tt> construct. This function should 1764 only be called by the thread that executed the block of code protected 1765 by the `single` construct. 1766 */ 1767 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1768 __kmp_assert_valid_gtid(global_tid); 1769 __kmp_exit_single(global_tid); 1770 KMP_POP_PARTITIONED_TIMER(); 1771 1772 #if OMPT_SUPPORT && OMPT_OPTIONAL 1773 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1774 kmp_team_t *team = this_thr->th.th_team; 1775 int tid = __kmp_tid_from_gtid(global_tid); 1776 1777 if (ompt_enabled.ompt_callback_work) { 1778 ompt_callbacks.ompt_callback(ompt_callback_work)( 1779 ompt_work_single_executor, ompt_scope_end, 1780 &(team->t.ompt_team_info.parallel_data), 1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1782 OMPT_GET_RETURN_ADDRESS(0)); 1783 } 1784 #endif 1785 } 1786 1787 /*! 1788 @ingroup WORK_SHARING 1789 @param loc Source location 1790 @param global_tid Global thread id 1791 1792 Mark the end of a statically scheduled loop. 1793 */ 1794 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1795 KMP_POP_PARTITIONED_TIMER(); 1796 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1797 1798 #if OMPT_SUPPORT && OMPT_OPTIONAL 1799 if (ompt_enabled.ompt_callback_work) { 1800 ompt_work_t ompt_work_type = ompt_work_loop; 1801 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1802 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1803 // Determine workshare type 1804 if (loc != NULL) { 1805 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1806 ompt_work_type = ompt_work_loop; 1807 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1808 ompt_work_type = ompt_work_sections; 1809 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1810 ompt_work_type = ompt_work_distribute; 1811 } else { 1812 // use default set above. 1813 // a warning about this case is provided in __kmpc_for_static_init 1814 } 1815 KMP_DEBUG_ASSERT(ompt_work_type); 1816 } 1817 ompt_callbacks.ompt_callback(ompt_callback_work)( 1818 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1819 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1820 } 1821 #endif 1822 if (__kmp_env_consistency_check) 1823 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1824 } 1825 1826 // User routines which take C-style arguments (call by value) 1827 // different from the Fortran equivalent routines 1828 1829 void ompc_set_num_threads(int arg) { 1830 // !!!!! TODO: check the per-task binding 1831 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1832 } 1833 1834 void ompc_set_dynamic(int flag) { 1835 kmp_info_t *thread; 1836 1837 /* For the thread-private implementation of the internal controls */ 1838 thread = __kmp_entry_thread(); 1839 1840 __kmp_save_internal_controls(thread); 1841 1842 set__dynamic(thread, flag ? TRUE : FALSE); 1843 } 1844 1845 void ompc_set_nested(int flag) { 1846 kmp_info_t *thread; 1847 1848 /* For the thread-private internal controls implementation */ 1849 thread = __kmp_entry_thread(); 1850 1851 __kmp_save_internal_controls(thread); 1852 1853 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1854 } 1855 1856 void ompc_set_max_active_levels(int max_active_levels) { 1857 /* TO DO */ 1858 /* we want per-task implementation of this internal control */ 1859 1860 /* For the per-thread internal controls implementation */ 1861 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1862 } 1863 1864 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1865 // !!!!! TODO: check the per-task binding 1866 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1867 } 1868 1869 int ompc_get_ancestor_thread_num(int level) { 1870 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1871 } 1872 1873 int ompc_get_team_size(int level) { 1874 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1875 } 1876 1877 /* OpenMP 5.0 Affinity Format API */ 1878 1879 void ompc_set_affinity_format(char const *format) { 1880 if (!__kmp_init_serial) { 1881 __kmp_serial_initialize(); 1882 } 1883 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1884 format, KMP_STRLEN(format) + 1); 1885 } 1886 1887 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1888 size_t format_size; 1889 if (!__kmp_init_serial) { 1890 __kmp_serial_initialize(); 1891 } 1892 format_size = KMP_STRLEN(__kmp_affinity_format); 1893 if (buffer && size) { 1894 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1895 format_size + 1); 1896 } 1897 return format_size; 1898 } 1899 1900 void ompc_display_affinity(char const *format) { 1901 int gtid; 1902 if (!TCR_4(__kmp_init_middle)) { 1903 __kmp_middle_initialize(); 1904 } 1905 gtid = __kmp_get_gtid(); 1906 __kmp_aux_display_affinity(gtid, format); 1907 } 1908 1909 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1910 char const *format) { 1911 int gtid; 1912 size_t num_required; 1913 kmp_str_buf_t capture_buf; 1914 if (!TCR_4(__kmp_init_middle)) { 1915 __kmp_middle_initialize(); 1916 } 1917 gtid = __kmp_get_gtid(); 1918 __kmp_str_buf_init(&capture_buf); 1919 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1920 if (buffer && buf_size) { 1921 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1922 capture_buf.used + 1); 1923 } 1924 __kmp_str_buf_free(&capture_buf); 1925 return num_required; 1926 } 1927 1928 void kmpc_set_stacksize(int arg) { 1929 // __kmp_aux_set_stacksize initializes the library if needed 1930 __kmp_aux_set_stacksize(arg); 1931 } 1932 1933 void kmpc_set_stacksize_s(size_t arg) { 1934 // __kmp_aux_set_stacksize initializes the library if needed 1935 __kmp_aux_set_stacksize(arg); 1936 } 1937 1938 void kmpc_set_blocktime(int arg) { 1939 int gtid, tid; 1940 kmp_info_t *thread; 1941 1942 gtid = __kmp_entry_gtid(); 1943 tid = __kmp_tid_from_gtid(gtid); 1944 thread = __kmp_thread_from_gtid(gtid); 1945 1946 __kmp_aux_set_blocktime(arg, thread, tid); 1947 } 1948 1949 void kmpc_set_library(int arg) { 1950 // __kmp_user_set_library initializes the library if needed 1951 __kmp_user_set_library((enum library_type)arg); 1952 } 1953 1954 void kmpc_set_defaults(char const *str) { 1955 // __kmp_aux_set_defaults initializes the library if needed 1956 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1957 } 1958 1959 void kmpc_set_disp_num_buffers(int arg) { 1960 // ignore after initialization because some teams have already 1961 // allocated dispatch buffers 1962 if (__kmp_init_serial == 0 && arg > 0) 1963 __kmp_dispatch_num_buffers = arg; 1964 } 1965 1966 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1967 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1968 return -1; 1969 #else 1970 if (!TCR_4(__kmp_init_middle)) { 1971 __kmp_middle_initialize(); 1972 } 1973 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1974 #endif 1975 } 1976 1977 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1978 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1979 return -1; 1980 #else 1981 if (!TCR_4(__kmp_init_middle)) { 1982 __kmp_middle_initialize(); 1983 } 1984 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 1985 #endif 1986 } 1987 1988 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 1989 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1990 return -1; 1991 #else 1992 if (!TCR_4(__kmp_init_middle)) { 1993 __kmp_middle_initialize(); 1994 } 1995 return __kmp_aux_get_affinity_mask_proc(proc, mask); 1996 #endif 1997 } 1998 1999 /* -------------------------------------------------------------------------- */ 2000 /*! 2001 @ingroup THREADPRIVATE 2002 @param loc source location information 2003 @param gtid global thread number 2004 @param cpy_size size of the cpy_data buffer 2005 @param cpy_data pointer to data to be copied 2006 @param cpy_func helper function to call for copying data 2007 @param didit flag variable: 1=single thread; 0=not single thread 2008 2009 __kmpc_copyprivate implements the interface for the private data broadcast 2010 needed for the copyprivate clause associated with a single region in an 2011 OpenMP<sup>*</sup> program (both C and Fortran). 2012 All threads participating in the parallel region call this routine. 2013 One of the threads (called the single thread) should have the <tt>didit</tt> 2014 variable set to 1 and all other threads should have that variable set to 0. 2015 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2016 2017 The OpenMP specification forbids the use of nowait on the single region when a 2018 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2019 barrier internally to avoid race conditions, so the code generation for the 2020 single region should avoid generating a barrier after the call to @ref 2021 __kmpc_copyprivate. 2022 2023 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2024 The <tt>loc</tt> parameter is a pointer to source location information. 2025 2026 Internal implementation: The single thread will first copy its descriptor 2027 address (cpy_data) to a team-private location, then the other threads will each 2028 call the function pointed to by the parameter cpy_func, which carries out the 2029 copy by copying the data using the cpy_data buffer. 2030 2031 The cpy_func routine used for the copy and the contents of the data area defined 2032 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2033 to be done. For instance, the cpy_data buffer can hold the actual data to be 2034 copied or it may hold a list of pointers to the data. The cpy_func routine must 2035 interpret the cpy_data buffer appropriately. 2036 2037 The interface to cpy_func is as follows: 2038 @code 2039 void cpy_func( void *destination, void *source ) 2040 @endcode 2041 where void *destination is the cpy_data pointer for the thread being copied to 2042 and void *source is the cpy_data pointer for the thread being copied from. 2043 */ 2044 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2045 void *cpy_data, void (*cpy_func)(void *, void *), 2046 kmp_int32 didit) { 2047 void **data_ptr; 2048 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2049 __kmp_assert_valid_gtid(gtid); 2050 2051 KMP_MB(); 2052 2053 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2054 2055 if (__kmp_env_consistency_check) { 2056 if (loc == 0) { 2057 KMP_WARNING(ConstructIdentInvalid); 2058 } 2059 } 2060 2061 // ToDo: Optimize the following two barriers into some kind of split barrier 2062 2063 if (didit) 2064 *data_ptr = cpy_data; 2065 2066 #if OMPT_SUPPORT 2067 ompt_frame_t *ompt_frame; 2068 if (ompt_enabled.enabled) { 2069 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2070 if (ompt_frame->enter_frame.ptr == NULL) 2071 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2072 } 2073 OMPT_STORE_RETURN_ADDRESS(gtid); 2074 #endif 2075 /* This barrier is not a barrier region boundary */ 2076 #if USE_ITT_NOTIFY 2077 __kmp_threads[gtid]->th.th_ident = loc; 2078 #endif 2079 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2080 2081 if (!didit) 2082 (*cpy_func)(cpy_data, *data_ptr); 2083 2084 // Consider next barrier a user-visible barrier for barrier region boundaries 2085 // Nesting checks are already handled by the single construct checks 2086 { 2087 #if OMPT_SUPPORT 2088 OMPT_STORE_RETURN_ADDRESS(gtid); 2089 #endif 2090 #if USE_ITT_NOTIFY 2091 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2092 // tasks can overwrite the location) 2093 #endif 2094 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2095 #if OMPT_SUPPORT && OMPT_OPTIONAL 2096 if (ompt_enabled.enabled) { 2097 ompt_frame->enter_frame = ompt_data_none; 2098 } 2099 #endif 2100 } 2101 } 2102 2103 /* -------------------------------------------------------------------------- */ 2104 2105 #define INIT_LOCK __kmp_init_user_lock_with_checks 2106 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2107 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2108 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2109 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2110 #define ACQUIRE_NESTED_LOCK_TIMED \ 2111 __kmp_acquire_nested_user_lock_with_checks_timed 2112 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2113 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2114 #define TEST_LOCK __kmp_test_user_lock_with_checks 2115 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2116 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2117 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2118 2119 // TODO: Make check abort messages use location info & pass it into 2120 // with_checks routines 2121 2122 #if KMP_USE_DYNAMIC_LOCK 2123 2124 // internal lock initializer 2125 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2126 kmp_dyna_lockseq_t seq) { 2127 if (KMP_IS_D_LOCK(seq)) { 2128 KMP_INIT_D_LOCK(lock, seq); 2129 #if USE_ITT_BUILD 2130 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2131 #endif 2132 } else { 2133 KMP_INIT_I_LOCK(lock, seq); 2134 #if USE_ITT_BUILD 2135 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2136 __kmp_itt_lock_creating(ilk->lock, loc); 2137 #endif 2138 } 2139 } 2140 2141 // internal nest lock initializer 2142 static __forceinline void 2143 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2144 kmp_dyna_lockseq_t seq) { 2145 #if KMP_USE_TSX 2146 // Don't have nested lock implementation for speculative locks 2147 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2148 seq = __kmp_user_lock_seq; 2149 #endif 2150 switch (seq) { 2151 case lockseq_tas: 2152 seq = lockseq_nested_tas; 2153 break; 2154 #if KMP_USE_FUTEX 2155 case lockseq_futex: 2156 seq = lockseq_nested_futex; 2157 break; 2158 #endif 2159 case lockseq_ticket: 2160 seq = lockseq_nested_ticket; 2161 break; 2162 case lockseq_queuing: 2163 seq = lockseq_nested_queuing; 2164 break; 2165 case lockseq_drdpa: 2166 seq = lockseq_nested_drdpa; 2167 break; 2168 default: 2169 seq = lockseq_nested_queuing; 2170 } 2171 KMP_INIT_I_LOCK(lock, seq); 2172 #if USE_ITT_BUILD 2173 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2174 __kmp_itt_lock_creating(ilk->lock, loc); 2175 #endif 2176 } 2177 2178 /* initialize the lock with a hint */ 2179 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2180 uintptr_t hint) { 2181 KMP_DEBUG_ASSERT(__kmp_init_serial); 2182 if (__kmp_env_consistency_check && user_lock == NULL) { 2183 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2184 } 2185 2186 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2187 2188 #if OMPT_SUPPORT && OMPT_OPTIONAL 2189 // This is the case, if called from omp_init_lock_with_hint: 2190 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2191 if (!codeptr) 2192 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2193 if (ompt_enabled.ompt_callback_lock_init) { 2194 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2195 ompt_mutex_lock, (omp_lock_hint_t)hint, 2196 __ompt_get_mutex_impl_type(user_lock), 2197 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2198 } 2199 #endif 2200 } 2201 2202 /* initialize the lock with a hint */ 2203 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2204 void **user_lock, uintptr_t hint) { 2205 KMP_DEBUG_ASSERT(__kmp_init_serial); 2206 if (__kmp_env_consistency_check && user_lock == NULL) { 2207 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2208 } 2209 2210 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2211 2212 #if OMPT_SUPPORT && OMPT_OPTIONAL 2213 // This is the case, if called from omp_init_lock_with_hint: 2214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2215 if (!codeptr) 2216 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2217 if (ompt_enabled.ompt_callback_lock_init) { 2218 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2219 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2220 __ompt_get_mutex_impl_type(user_lock), 2221 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2222 } 2223 #endif 2224 } 2225 2226 #endif // KMP_USE_DYNAMIC_LOCK 2227 2228 /* initialize the lock */ 2229 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2230 #if KMP_USE_DYNAMIC_LOCK 2231 2232 KMP_DEBUG_ASSERT(__kmp_init_serial); 2233 if (__kmp_env_consistency_check && user_lock == NULL) { 2234 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2235 } 2236 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2237 2238 #if OMPT_SUPPORT && OMPT_OPTIONAL 2239 // This is the case, if called from omp_init_lock_with_hint: 2240 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2241 if (!codeptr) 2242 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2243 if (ompt_enabled.ompt_callback_lock_init) { 2244 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2245 ompt_mutex_lock, omp_lock_hint_none, 2246 __ompt_get_mutex_impl_type(user_lock), 2247 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2248 } 2249 #endif 2250 2251 #else // KMP_USE_DYNAMIC_LOCK 2252 2253 static char const *const func = "omp_init_lock"; 2254 kmp_user_lock_p lck; 2255 KMP_DEBUG_ASSERT(__kmp_init_serial); 2256 2257 if (__kmp_env_consistency_check) { 2258 if (user_lock == NULL) { 2259 KMP_FATAL(LockIsUninitialized, func); 2260 } 2261 } 2262 2263 KMP_CHECK_USER_LOCK_INIT(); 2264 2265 if ((__kmp_user_lock_kind == lk_tas) && 2266 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2267 lck = (kmp_user_lock_p)user_lock; 2268 } 2269 #if KMP_USE_FUTEX 2270 else if ((__kmp_user_lock_kind == lk_futex) && 2271 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2272 lck = (kmp_user_lock_p)user_lock; 2273 } 2274 #endif 2275 else { 2276 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2277 } 2278 INIT_LOCK(lck); 2279 __kmp_set_user_lock_location(lck, loc); 2280 2281 #if OMPT_SUPPORT && OMPT_OPTIONAL 2282 // This is the case, if called from omp_init_lock_with_hint: 2283 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2284 if (!codeptr) 2285 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2286 if (ompt_enabled.ompt_callback_lock_init) { 2287 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2288 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2289 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2290 } 2291 #endif 2292 2293 #if USE_ITT_BUILD 2294 __kmp_itt_lock_creating(lck); 2295 #endif /* USE_ITT_BUILD */ 2296 2297 #endif // KMP_USE_DYNAMIC_LOCK 2298 } // __kmpc_init_lock 2299 2300 /* initialize the lock */ 2301 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2302 #if KMP_USE_DYNAMIC_LOCK 2303 2304 KMP_DEBUG_ASSERT(__kmp_init_serial); 2305 if (__kmp_env_consistency_check && user_lock == NULL) { 2306 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2307 } 2308 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2309 2310 #if OMPT_SUPPORT && OMPT_OPTIONAL 2311 // This is the case, if called from omp_init_lock_with_hint: 2312 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2313 if (!codeptr) 2314 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2315 if (ompt_enabled.ompt_callback_lock_init) { 2316 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2317 ompt_mutex_nest_lock, omp_lock_hint_none, 2318 __ompt_get_mutex_impl_type(user_lock), 2319 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2320 } 2321 #endif 2322 2323 #else // KMP_USE_DYNAMIC_LOCK 2324 2325 static char const *const func = "omp_init_nest_lock"; 2326 kmp_user_lock_p lck; 2327 KMP_DEBUG_ASSERT(__kmp_init_serial); 2328 2329 if (__kmp_env_consistency_check) { 2330 if (user_lock == NULL) { 2331 KMP_FATAL(LockIsUninitialized, func); 2332 } 2333 } 2334 2335 KMP_CHECK_USER_LOCK_INIT(); 2336 2337 if ((__kmp_user_lock_kind == lk_tas) && 2338 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2339 OMP_NEST_LOCK_T_SIZE)) { 2340 lck = (kmp_user_lock_p)user_lock; 2341 } 2342 #if KMP_USE_FUTEX 2343 else if ((__kmp_user_lock_kind == lk_futex) && 2344 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2345 OMP_NEST_LOCK_T_SIZE)) { 2346 lck = (kmp_user_lock_p)user_lock; 2347 } 2348 #endif 2349 else { 2350 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2351 } 2352 2353 INIT_NESTED_LOCK(lck); 2354 __kmp_set_user_lock_location(lck, loc); 2355 2356 #if OMPT_SUPPORT && OMPT_OPTIONAL 2357 // This is the case, if called from omp_init_lock_with_hint: 2358 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2359 if (!codeptr) 2360 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2361 if (ompt_enabled.ompt_callback_lock_init) { 2362 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2363 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2364 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2365 } 2366 #endif 2367 2368 #if USE_ITT_BUILD 2369 __kmp_itt_lock_creating(lck); 2370 #endif /* USE_ITT_BUILD */ 2371 2372 #endif // KMP_USE_DYNAMIC_LOCK 2373 } // __kmpc_init_nest_lock 2374 2375 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2376 #if KMP_USE_DYNAMIC_LOCK 2377 2378 #if USE_ITT_BUILD 2379 kmp_user_lock_p lck; 2380 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2381 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2382 } else { 2383 lck = (kmp_user_lock_p)user_lock; 2384 } 2385 __kmp_itt_lock_destroyed(lck); 2386 #endif 2387 #if OMPT_SUPPORT && OMPT_OPTIONAL 2388 // This is the case, if called from omp_init_lock_with_hint: 2389 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2390 if (!codeptr) 2391 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2392 if (ompt_enabled.ompt_callback_lock_destroy) { 2393 kmp_user_lock_p lck; 2394 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2395 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2396 } else { 2397 lck = (kmp_user_lock_p)user_lock; 2398 } 2399 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2400 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2401 } 2402 #endif 2403 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2404 #else 2405 kmp_user_lock_p lck; 2406 2407 if ((__kmp_user_lock_kind == lk_tas) && 2408 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2409 lck = (kmp_user_lock_p)user_lock; 2410 } 2411 #if KMP_USE_FUTEX 2412 else if ((__kmp_user_lock_kind == lk_futex) && 2413 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2414 lck = (kmp_user_lock_p)user_lock; 2415 } 2416 #endif 2417 else { 2418 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2419 } 2420 2421 #if OMPT_SUPPORT && OMPT_OPTIONAL 2422 // This is the case, if called from omp_init_lock_with_hint: 2423 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2424 if (!codeptr) 2425 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2426 if (ompt_enabled.ompt_callback_lock_destroy) { 2427 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2428 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2429 } 2430 #endif 2431 2432 #if USE_ITT_BUILD 2433 __kmp_itt_lock_destroyed(lck); 2434 #endif /* USE_ITT_BUILD */ 2435 DESTROY_LOCK(lck); 2436 2437 if ((__kmp_user_lock_kind == lk_tas) && 2438 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2439 ; 2440 } 2441 #if KMP_USE_FUTEX 2442 else if ((__kmp_user_lock_kind == lk_futex) && 2443 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2444 ; 2445 } 2446 #endif 2447 else { 2448 __kmp_user_lock_free(user_lock, gtid, lck); 2449 } 2450 #endif // KMP_USE_DYNAMIC_LOCK 2451 } // __kmpc_destroy_lock 2452 2453 /* destroy the lock */ 2454 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2455 #if KMP_USE_DYNAMIC_LOCK 2456 2457 #if USE_ITT_BUILD 2458 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2459 __kmp_itt_lock_destroyed(ilk->lock); 2460 #endif 2461 #if OMPT_SUPPORT && OMPT_OPTIONAL 2462 // This is the case, if called from omp_init_lock_with_hint: 2463 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2464 if (!codeptr) 2465 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2466 if (ompt_enabled.ompt_callback_lock_destroy) { 2467 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2468 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2469 } 2470 #endif 2471 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2472 2473 #else // KMP_USE_DYNAMIC_LOCK 2474 2475 kmp_user_lock_p lck; 2476 2477 if ((__kmp_user_lock_kind == lk_tas) && 2478 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2479 OMP_NEST_LOCK_T_SIZE)) { 2480 lck = (kmp_user_lock_p)user_lock; 2481 } 2482 #if KMP_USE_FUTEX 2483 else if ((__kmp_user_lock_kind == lk_futex) && 2484 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2485 OMP_NEST_LOCK_T_SIZE)) { 2486 lck = (kmp_user_lock_p)user_lock; 2487 } 2488 #endif 2489 else { 2490 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2491 } 2492 2493 #if OMPT_SUPPORT && OMPT_OPTIONAL 2494 // This is the case, if called from omp_init_lock_with_hint: 2495 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2496 if (!codeptr) 2497 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2498 if (ompt_enabled.ompt_callback_lock_destroy) { 2499 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2500 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2501 } 2502 #endif 2503 2504 #if USE_ITT_BUILD 2505 __kmp_itt_lock_destroyed(lck); 2506 #endif /* USE_ITT_BUILD */ 2507 2508 DESTROY_NESTED_LOCK(lck); 2509 2510 if ((__kmp_user_lock_kind == lk_tas) && 2511 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2512 OMP_NEST_LOCK_T_SIZE)) { 2513 ; 2514 } 2515 #if KMP_USE_FUTEX 2516 else if ((__kmp_user_lock_kind == lk_futex) && 2517 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2518 OMP_NEST_LOCK_T_SIZE)) { 2519 ; 2520 } 2521 #endif 2522 else { 2523 __kmp_user_lock_free(user_lock, gtid, lck); 2524 } 2525 #endif // KMP_USE_DYNAMIC_LOCK 2526 } // __kmpc_destroy_nest_lock 2527 2528 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2529 KMP_COUNT_BLOCK(OMP_set_lock); 2530 #if KMP_USE_DYNAMIC_LOCK 2531 int tag = KMP_EXTRACT_D_TAG(user_lock); 2532 #if USE_ITT_BUILD 2533 __kmp_itt_lock_acquiring( 2534 (kmp_user_lock_p) 2535 user_lock); // itt function will get to the right lock object. 2536 #endif 2537 #if OMPT_SUPPORT && OMPT_OPTIONAL 2538 // This is the case, if called from omp_init_lock_with_hint: 2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2540 if (!codeptr) 2541 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2542 if (ompt_enabled.ompt_callback_mutex_acquire) { 2543 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2544 ompt_mutex_lock, omp_lock_hint_none, 2545 __ompt_get_mutex_impl_type(user_lock), 2546 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2547 } 2548 #endif 2549 #if KMP_USE_INLINED_TAS 2550 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2551 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2552 } else 2553 #elif KMP_USE_INLINED_FUTEX 2554 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2555 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2556 } else 2557 #endif 2558 { 2559 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2560 } 2561 #if USE_ITT_BUILD 2562 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2563 #endif 2564 #if OMPT_SUPPORT && OMPT_OPTIONAL 2565 if (ompt_enabled.ompt_callback_mutex_acquired) { 2566 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2567 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2568 } 2569 #endif 2570 2571 #else // KMP_USE_DYNAMIC_LOCK 2572 2573 kmp_user_lock_p lck; 2574 2575 if ((__kmp_user_lock_kind == lk_tas) && 2576 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2577 lck = (kmp_user_lock_p)user_lock; 2578 } 2579 #if KMP_USE_FUTEX 2580 else if ((__kmp_user_lock_kind == lk_futex) && 2581 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2582 lck = (kmp_user_lock_p)user_lock; 2583 } 2584 #endif 2585 else { 2586 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2587 } 2588 2589 #if USE_ITT_BUILD 2590 __kmp_itt_lock_acquiring(lck); 2591 #endif /* USE_ITT_BUILD */ 2592 #if OMPT_SUPPORT && OMPT_OPTIONAL 2593 // This is the case, if called from omp_init_lock_with_hint: 2594 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2595 if (!codeptr) 2596 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2597 if (ompt_enabled.ompt_callback_mutex_acquire) { 2598 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2599 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2600 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2601 } 2602 #endif 2603 2604 ACQUIRE_LOCK(lck, gtid); 2605 2606 #if USE_ITT_BUILD 2607 __kmp_itt_lock_acquired(lck); 2608 #endif /* USE_ITT_BUILD */ 2609 2610 #if OMPT_SUPPORT && OMPT_OPTIONAL 2611 if (ompt_enabled.ompt_callback_mutex_acquired) { 2612 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2613 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2614 } 2615 #endif 2616 2617 #endif // KMP_USE_DYNAMIC_LOCK 2618 } 2619 2620 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2621 #if KMP_USE_DYNAMIC_LOCK 2622 2623 #if USE_ITT_BUILD 2624 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2625 #endif 2626 #if OMPT_SUPPORT && OMPT_OPTIONAL 2627 // This is the case, if called from omp_init_lock_with_hint: 2628 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2629 if (!codeptr) 2630 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2631 if (ompt_enabled.enabled) { 2632 if (ompt_enabled.ompt_callback_mutex_acquire) { 2633 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2634 ompt_mutex_nest_lock, omp_lock_hint_none, 2635 __ompt_get_mutex_impl_type(user_lock), 2636 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2637 } 2638 } 2639 #endif 2640 int acquire_status = 2641 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2642 (void) acquire_status; 2643 #if USE_ITT_BUILD 2644 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2645 #endif 2646 2647 #if OMPT_SUPPORT && OMPT_OPTIONAL 2648 if (ompt_enabled.enabled) { 2649 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2650 if (ompt_enabled.ompt_callback_mutex_acquired) { 2651 // lock_first 2652 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2653 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2654 codeptr); 2655 } 2656 } else { 2657 if (ompt_enabled.ompt_callback_nest_lock) { 2658 // lock_next 2659 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2660 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2661 } 2662 } 2663 } 2664 #endif 2665 2666 #else // KMP_USE_DYNAMIC_LOCK 2667 int acquire_status; 2668 kmp_user_lock_p lck; 2669 2670 if ((__kmp_user_lock_kind == lk_tas) && 2671 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2672 OMP_NEST_LOCK_T_SIZE)) { 2673 lck = (kmp_user_lock_p)user_lock; 2674 } 2675 #if KMP_USE_FUTEX 2676 else if ((__kmp_user_lock_kind == lk_futex) && 2677 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2678 OMP_NEST_LOCK_T_SIZE)) { 2679 lck = (kmp_user_lock_p)user_lock; 2680 } 2681 #endif 2682 else { 2683 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2684 } 2685 2686 #if USE_ITT_BUILD 2687 __kmp_itt_lock_acquiring(lck); 2688 #endif /* USE_ITT_BUILD */ 2689 #if OMPT_SUPPORT && OMPT_OPTIONAL 2690 // This is the case, if called from omp_init_lock_with_hint: 2691 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2692 if (!codeptr) 2693 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2694 if (ompt_enabled.enabled) { 2695 if (ompt_enabled.ompt_callback_mutex_acquire) { 2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2697 ompt_mutex_nest_lock, omp_lock_hint_none, 2698 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2699 codeptr); 2700 } 2701 } 2702 #endif 2703 2704 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2705 2706 #if USE_ITT_BUILD 2707 __kmp_itt_lock_acquired(lck); 2708 #endif /* USE_ITT_BUILD */ 2709 2710 #if OMPT_SUPPORT && OMPT_OPTIONAL 2711 if (ompt_enabled.enabled) { 2712 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2713 if (ompt_enabled.ompt_callback_mutex_acquired) { 2714 // lock_first 2715 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2716 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2717 } 2718 } else { 2719 if (ompt_enabled.ompt_callback_nest_lock) { 2720 // lock_next 2721 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2722 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2723 } 2724 } 2725 } 2726 #endif 2727 2728 #endif // KMP_USE_DYNAMIC_LOCK 2729 } 2730 2731 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2732 #if KMP_USE_DYNAMIC_LOCK 2733 2734 int tag = KMP_EXTRACT_D_TAG(user_lock); 2735 #if USE_ITT_BUILD 2736 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2737 #endif 2738 #if KMP_USE_INLINED_TAS 2739 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2740 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2741 } else 2742 #elif KMP_USE_INLINED_FUTEX 2743 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2744 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2745 } else 2746 #endif 2747 { 2748 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2749 } 2750 2751 #if OMPT_SUPPORT && OMPT_OPTIONAL 2752 // This is the case, if called from omp_init_lock_with_hint: 2753 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2754 if (!codeptr) 2755 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2756 if (ompt_enabled.ompt_callback_mutex_released) { 2757 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2758 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2759 } 2760 #endif 2761 2762 #else // KMP_USE_DYNAMIC_LOCK 2763 2764 kmp_user_lock_p lck; 2765 2766 /* Can't use serial interval since not block structured */ 2767 /* release the lock */ 2768 2769 if ((__kmp_user_lock_kind == lk_tas) && 2770 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2771 #if KMP_OS_LINUX && \ 2772 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2773 // "fast" path implemented to fix customer performance issue 2774 #if USE_ITT_BUILD 2775 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2776 #endif /* USE_ITT_BUILD */ 2777 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2778 KMP_MB(); 2779 2780 #if OMPT_SUPPORT && OMPT_OPTIONAL 2781 // This is the case, if called from omp_init_lock_with_hint: 2782 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2783 if (!codeptr) 2784 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2785 if (ompt_enabled.ompt_callback_mutex_released) { 2786 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2787 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2788 } 2789 #endif 2790 2791 return; 2792 #else 2793 lck = (kmp_user_lock_p)user_lock; 2794 #endif 2795 } 2796 #if KMP_USE_FUTEX 2797 else if ((__kmp_user_lock_kind == lk_futex) && 2798 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2799 lck = (kmp_user_lock_p)user_lock; 2800 } 2801 #endif 2802 else { 2803 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2804 } 2805 2806 #if USE_ITT_BUILD 2807 __kmp_itt_lock_releasing(lck); 2808 #endif /* USE_ITT_BUILD */ 2809 2810 RELEASE_LOCK(lck, gtid); 2811 2812 #if OMPT_SUPPORT && OMPT_OPTIONAL 2813 // This is the case, if called from omp_init_lock_with_hint: 2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2815 if (!codeptr) 2816 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2817 if (ompt_enabled.ompt_callback_mutex_released) { 2818 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2819 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2820 } 2821 #endif 2822 2823 #endif // KMP_USE_DYNAMIC_LOCK 2824 } 2825 2826 /* release the lock */ 2827 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2828 #if KMP_USE_DYNAMIC_LOCK 2829 2830 #if USE_ITT_BUILD 2831 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2832 #endif 2833 int release_status = 2834 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2835 (void) release_status; 2836 2837 #if OMPT_SUPPORT && OMPT_OPTIONAL 2838 // This is the case, if called from omp_init_lock_with_hint: 2839 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2840 if (!codeptr) 2841 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2842 if (ompt_enabled.enabled) { 2843 if (release_status == KMP_LOCK_RELEASED) { 2844 if (ompt_enabled.ompt_callback_mutex_released) { 2845 // release_lock_last 2846 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2847 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2848 codeptr); 2849 } 2850 } else if (ompt_enabled.ompt_callback_nest_lock) { 2851 // release_lock_prev 2852 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2853 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2854 } 2855 } 2856 #endif 2857 2858 #else // KMP_USE_DYNAMIC_LOCK 2859 2860 kmp_user_lock_p lck; 2861 2862 /* Can't use serial interval since not block structured */ 2863 2864 if ((__kmp_user_lock_kind == lk_tas) && 2865 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2866 OMP_NEST_LOCK_T_SIZE)) { 2867 #if KMP_OS_LINUX && \ 2868 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2869 // "fast" path implemented to fix customer performance issue 2870 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2871 #if USE_ITT_BUILD 2872 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2873 #endif /* USE_ITT_BUILD */ 2874 2875 #if OMPT_SUPPORT && OMPT_OPTIONAL 2876 int release_status = KMP_LOCK_STILL_HELD; 2877 #endif 2878 2879 if (--(tl->lk.depth_locked) == 0) { 2880 TCW_4(tl->lk.poll, 0); 2881 #if OMPT_SUPPORT && OMPT_OPTIONAL 2882 release_status = KMP_LOCK_RELEASED; 2883 #endif 2884 } 2885 KMP_MB(); 2886 2887 #if OMPT_SUPPORT && OMPT_OPTIONAL 2888 // This is the case, if called from omp_init_lock_with_hint: 2889 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2890 if (!codeptr) 2891 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2892 if (ompt_enabled.enabled) { 2893 if (release_status == KMP_LOCK_RELEASED) { 2894 if (ompt_enabled.ompt_callback_mutex_released) { 2895 // release_lock_last 2896 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2897 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2898 } 2899 } else if (ompt_enabled.ompt_callback_nest_lock) { 2900 // release_lock_previous 2901 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2902 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2903 } 2904 } 2905 #endif 2906 2907 return; 2908 #else 2909 lck = (kmp_user_lock_p)user_lock; 2910 #endif 2911 } 2912 #if KMP_USE_FUTEX 2913 else if ((__kmp_user_lock_kind == lk_futex) && 2914 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2915 OMP_NEST_LOCK_T_SIZE)) { 2916 lck = (kmp_user_lock_p)user_lock; 2917 } 2918 #endif 2919 else { 2920 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2921 } 2922 2923 #if USE_ITT_BUILD 2924 __kmp_itt_lock_releasing(lck); 2925 #endif /* USE_ITT_BUILD */ 2926 2927 int release_status; 2928 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2929 #if OMPT_SUPPORT && OMPT_OPTIONAL 2930 // This is the case, if called from omp_init_lock_with_hint: 2931 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2932 if (!codeptr) 2933 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2934 if (ompt_enabled.enabled) { 2935 if (release_status == KMP_LOCK_RELEASED) { 2936 if (ompt_enabled.ompt_callback_mutex_released) { 2937 // release_lock_last 2938 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2939 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2940 } 2941 } else if (ompt_enabled.ompt_callback_nest_lock) { 2942 // release_lock_previous 2943 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2944 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2945 } 2946 } 2947 #endif 2948 2949 #endif // KMP_USE_DYNAMIC_LOCK 2950 } 2951 2952 /* try to acquire the lock */ 2953 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2954 KMP_COUNT_BLOCK(OMP_test_lock); 2955 2956 #if KMP_USE_DYNAMIC_LOCK 2957 int rc; 2958 int tag = KMP_EXTRACT_D_TAG(user_lock); 2959 #if USE_ITT_BUILD 2960 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2961 #endif 2962 #if OMPT_SUPPORT && OMPT_OPTIONAL 2963 // This is the case, if called from omp_init_lock_with_hint: 2964 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2965 if (!codeptr) 2966 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2967 if (ompt_enabled.ompt_callback_mutex_acquire) { 2968 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2969 ompt_mutex_lock, omp_lock_hint_none, 2970 __ompt_get_mutex_impl_type(user_lock), 2971 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2972 } 2973 #endif 2974 #if KMP_USE_INLINED_TAS 2975 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2976 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2977 } else 2978 #elif KMP_USE_INLINED_FUTEX 2979 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2980 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 2981 } else 2982 #endif 2983 { 2984 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2985 } 2986 if (rc) { 2987 #if USE_ITT_BUILD 2988 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2989 #endif 2990 #if OMPT_SUPPORT && OMPT_OPTIONAL 2991 if (ompt_enabled.ompt_callback_mutex_acquired) { 2992 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2993 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2994 } 2995 #endif 2996 return FTN_TRUE; 2997 } else { 2998 #if USE_ITT_BUILD 2999 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3000 #endif 3001 return FTN_FALSE; 3002 } 3003 3004 #else // KMP_USE_DYNAMIC_LOCK 3005 3006 kmp_user_lock_p lck; 3007 int rc; 3008 3009 if ((__kmp_user_lock_kind == lk_tas) && 3010 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3011 lck = (kmp_user_lock_p)user_lock; 3012 } 3013 #if KMP_USE_FUTEX 3014 else if ((__kmp_user_lock_kind == lk_futex) && 3015 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3016 lck = (kmp_user_lock_p)user_lock; 3017 } 3018 #endif 3019 else { 3020 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3021 } 3022 3023 #if USE_ITT_BUILD 3024 __kmp_itt_lock_acquiring(lck); 3025 #endif /* USE_ITT_BUILD */ 3026 #if OMPT_SUPPORT && OMPT_OPTIONAL 3027 // This is the case, if called from omp_init_lock_with_hint: 3028 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3029 if (!codeptr) 3030 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3031 if (ompt_enabled.ompt_callback_mutex_acquire) { 3032 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3033 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3034 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3035 } 3036 #endif 3037 3038 rc = TEST_LOCK(lck, gtid); 3039 #if USE_ITT_BUILD 3040 if (rc) { 3041 __kmp_itt_lock_acquired(lck); 3042 } else { 3043 __kmp_itt_lock_cancelled(lck); 3044 } 3045 #endif /* USE_ITT_BUILD */ 3046 #if OMPT_SUPPORT && OMPT_OPTIONAL 3047 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3048 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3049 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3050 } 3051 #endif 3052 3053 return (rc ? FTN_TRUE : FTN_FALSE); 3054 3055 /* Can't use serial interval since not block structured */ 3056 3057 #endif // KMP_USE_DYNAMIC_LOCK 3058 } 3059 3060 /* try to acquire the lock */ 3061 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3062 #if KMP_USE_DYNAMIC_LOCK 3063 int rc; 3064 #if USE_ITT_BUILD 3065 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3066 #endif 3067 #if OMPT_SUPPORT && OMPT_OPTIONAL 3068 // This is the case, if called from omp_init_lock_with_hint: 3069 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3070 if (!codeptr) 3071 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3072 if (ompt_enabled.ompt_callback_mutex_acquire) { 3073 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3074 ompt_mutex_nest_lock, omp_lock_hint_none, 3075 __ompt_get_mutex_impl_type(user_lock), 3076 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3077 } 3078 #endif 3079 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3080 #if USE_ITT_BUILD 3081 if (rc) { 3082 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3083 } else { 3084 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3085 } 3086 #endif 3087 #if OMPT_SUPPORT && OMPT_OPTIONAL 3088 if (ompt_enabled.enabled && rc) { 3089 if (rc == 1) { 3090 if (ompt_enabled.ompt_callback_mutex_acquired) { 3091 // lock_first 3092 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3093 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3094 codeptr); 3095 } 3096 } else { 3097 if (ompt_enabled.ompt_callback_nest_lock) { 3098 // lock_next 3099 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3100 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3101 } 3102 } 3103 } 3104 #endif 3105 return rc; 3106 3107 #else // KMP_USE_DYNAMIC_LOCK 3108 3109 kmp_user_lock_p lck; 3110 int rc; 3111 3112 if ((__kmp_user_lock_kind == lk_tas) && 3113 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3114 OMP_NEST_LOCK_T_SIZE)) { 3115 lck = (kmp_user_lock_p)user_lock; 3116 } 3117 #if KMP_USE_FUTEX 3118 else if ((__kmp_user_lock_kind == lk_futex) && 3119 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3120 OMP_NEST_LOCK_T_SIZE)) { 3121 lck = (kmp_user_lock_p)user_lock; 3122 } 3123 #endif 3124 else { 3125 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3126 } 3127 3128 #if USE_ITT_BUILD 3129 __kmp_itt_lock_acquiring(lck); 3130 #endif /* USE_ITT_BUILD */ 3131 3132 #if OMPT_SUPPORT && OMPT_OPTIONAL 3133 // This is the case, if called from omp_init_lock_with_hint: 3134 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3135 if (!codeptr) 3136 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3137 if (ompt_enabled.enabled) && 3138 ompt_enabled.ompt_callback_mutex_acquire) { 3139 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3140 ompt_mutex_nest_lock, omp_lock_hint_none, 3141 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3142 codeptr); 3143 } 3144 #endif 3145 3146 rc = TEST_NESTED_LOCK(lck, gtid); 3147 #if USE_ITT_BUILD 3148 if (rc) { 3149 __kmp_itt_lock_acquired(lck); 3150 } else { 3151 __kmp_itt_lock_cancelled(lck); 3152 } 3153 #endif /* USE_ITT_BUILD */ 3154 #if OMPT_SUPPORT && OMPT_OPTIONAL 3155 if (ompt_enabled.enabled && rc) { 3156 if (rc == 1) { 3157 if (ompt_enabled.ompt_callback_mutex_acquired) { 3158 // lock_first 3159 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3160 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3161 } 3162 } else { 3163 if (ompt_enabled.ompt_callback_nest_lock) { 3164 // lock_next 3165 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3166 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3167 } 3168 } 3169 } 3170 #endif 3171 return rc; 3172 3173 /* Can't use serial interval since not block structured */ 3174 3175 #endif // KMP_USE_DYNAMIC_LOCK 3176 } 3177 3178 // Interface to fast scalable reduce methods routines 3179 3180 // keep the selected method in a thread local structure for cross-function 3181 // usage: will be used in __kmpc_end_reduce* functions; 3182 // another solution: to re-determine the method one more time in 3183 // __kmpc_end_reduce* functions (new prototype required then) 3184 // AT: which solution is better? 3185 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3186 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3187 3188 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3189 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3190 3191 // description of the packed_reduction_method variable: look at the macros in 3192 // kmp.h 3193 3194 // used in a critical section reduce block 3195 static __forceinline void 3196 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3197 kmp_critical_name *crit) { 3198 3199 // this lock was visible to a customer and to the threading profile tool as a 3200 // serial overhead span (although it's used for an internal purpose only) 3201 // why was it visible in previous implementation? 3202 // should we keep it visible in new reduce block? 3203 kmp_user_lock_p lck; 3204 3205 #if KMP_USE_DYNAMIC_LOCK 3206 3207 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3208 // Check if it is initialized. 3209 if (*lk == 0) { 3210 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3211 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3212 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3213 } else { 3214 __kmp_init_indirect_csptr(crit, loc, global_tid, 3215 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3216 } 3217 } 3218 // Branch for accessing the actual lock object and set operation. This 3219 // branching is inevitable since this lock initialization does not follow the 3220 // normal dispatch path (lock table is not used). 3221 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3222 lck = (kmp_user_lock_p)lk; 3223 KMP_DEBUG_ASSERT(lck != NULL); 3224 if (__kmp_env_consistency_check) { 3225 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3226 } 3227 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3228 } else { 3229 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3230 lck = ilk->lock; 3231 KMP_DEBUG_ASSERT(lck != NULL); 3232 if (__kmp_env_consistency_check) { 3233 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3234 } 3235 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3236 } 3237 3238 #else // KMP_USE_DYNAMIC_LOCK 3239 3240 // We know that the fast reduction code is only emitted by Intel compilers 3241 // with 32 byte critical sections. If there isn't enough space, then we 3242 // have to use a pointer. 3243 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3244 lck = (kmp_user_lock_p)crit; 3245 } else { 3246 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3247 } 3248 KMP_DEBUG_ASSERT(lck != NULL); 3249 3250 if (__kmp_env_consistency_check) 3251 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3252 3253 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3254 3255 #endif // KMP_USE_DYNAMIC_LOCK 3256 } 3257 3258 // used in a critical section reduce block 3259 static __forceinline void 3260 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3261 kmp_critical_name *crit) { 3262 3263 kmp_user_lock_p lck; 3264 3265 #if KMP_USE_DYNAMIC_LOCK 3266 3267 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3268 lck = (kmp_user_lock_p)crit; 3269 if (__kmp_env_consistency_check) 3270 __kmp_pop_sync(global_tid, ct_critical, loc); 3271 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3272 } else { 3273 kmp_indirect_lock_t *ilk = 3274 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3275 if (__kmp_env_consistency_check) 3276 __kmp_pop_sync(global_tid, ct_critical, loc); 3277 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3278 } 3279 3280 #else // KMP_USE_DYNAMIC_LOCK 3281 3282 // We know that the fast reduction code is only emitted by Intel compilers 3283 // with 32 byte critical sections. If there isn't enough space, then we have 3284 // to use a pointer. 3285 if (__kmp_base_user_lock_size > 32) { 3286 lck = *((kmp_user_lock_p *)crit); 3287 KMP_ASSERT(lck != NULL); 3288 } else { 3289 lck = (kmp_user_lock_p)crit; 3290 } 3291 3292 if (__kmp_env_consistency_check) 3293 __kmp_pop_sync(global_tid, ct_critical, loc); 3294 3295 __kmp_release_user_lock_with_checks(lck, global_tid); 3296 3297 #endif // KMP_USE_DYNAMIC_LOCK 3298 } // __kmp_end_critical_section_reduce_block 3299 3300 static __forceinline int 3301 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3302 int *task_state) { 3303 kmp_team_t *team; 3304 3305 // Check if we are inside the teams construct? 3306 if (th->th.th_teams_microtask) { 3307 *team_p = team = th->th.th_team; 3308 if (team->t.t_level == th->th.th_teams_level) { 3309 // This is reduction at teams construct. 3310 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3311 // Let's swap teams temporarily for the reduction. 3312 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3313 th->th.th_team = team->t.t_parent; 3314 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3315 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3316 *task_state = th->th.th_task_state; 3317 th->th.th_task_state = 0; 3318 3319 return 1; 3320 } 3321 } 3322 return 0; 3323 } 3324 3325 static __forceinline void 3326 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3327 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3328 th->th.th_info.ds.ds_tid = 0; 3329 th->th.th_team = team; 3330 th->th.th_team_nproc = team->t.t_nproc; 3331 th->th.th_task_team = team->t.t_task_team[task_state]; 3332 th->th.th_task_state = task_state; 3333 } 3334 3335 /* 2.a.i. Reduce Block without a terminating barrier */ 3336 /*! 3337 @ingroup SYNCHRONIZATION 3338 @param loc source location information 3339 @param global_tid global thread number 3340 @param num_vars number of items (variables) to be reduced 3341 @param reduce_size size of data in bytes to be reduced 3342 @param reduce_data pointer to data to be reduced 3343 @param reduce_func callback function providing reduction operation on two 3344 operands and returning result of reduction in lhs_data 3345 @param lck pointer to the unique lock data structure 3346 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3347 threads if atomic reduction needed 3348 3349 The nowait version is used for a reduce clause with the nowait argument. 3350 */ 3351 kmp_int32 3352 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3353 size_t reduce_size, void *reduce_data, 3354 void (*reduce_func)(void *lhs_data, void *rhs_data), 3355 kmp_critical_name *lck) { 3356 3357 KMP_COUNT_BLOCK(REDUCE_nowait); 3358 int retval = 0; 3359 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3360 kmp_info_t *th; 3361 kmp_team_t *team; 3362 int teams_swapped = 0, task_state; 3363 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3364 __kmp_assert_valid_gtid(global_tid); 3365 3366 // why do we need this initialization here at all? 3367 // Reduction clause can not be used as a stand-alone directive. 3368 3369 // do not call __kmp_serial_initialize(), it will be called by 3370 // __kmp_parallel_initialize() if needed 3371 // possible detection of false-positive race by the threadchecker ??? 3372 if (!TCR_4(__kmp_init_parallel)) 3373 __kmp_parallel_initialize(); 3374 3375 __kmp_resume_if_soft_paused(); 3376 3377 // check correctness of reduce block nesting 3378 #if KMP_USE_DYNAMIC_LOCK 3379 if (__kmp_env_consistency_check) 3380 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3381 #else 3382 if (__kmp_env_consistency_check) 3383 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3384 #endif 3385 3386 th = __kmp_thread_from_gtid(global_tid); 3387 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3388 3389 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3390 // the value should be kept in a variable 3391 // the variable should be either a construct-specific or thread-specific 3392 // property, not a team specific property 3393 // (a thread can reach the next reduce block on the next construct, reduce 3394 // method may differ on the next construct) 3395 // an ident_t "loc" parameter could be used as a construct-specific property 3396 // (what if loc == 0?) 3397 // (if both construct-specific and team-specific variables were shared, 3398 // then unness extra syncs should be needed) 3399 // a thread-specific variable is better regarding two issues above (next 3400 // construct and extra syncs) 3401 // a thread-specific "th_local.reduction_method" variable is used currently 3402 // each thread executes 'determine' and 'set' lines (no need to execute by one 3403 // thread, to avoid unness extra syncs) 3404 3405 packed_reduction_method = __kmp_determine_reduction_method( 3406 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3407 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3408 3409 OMPT_REDUCTION_DECL(th, global_tid); 3410 if (packed_reduction_method == critical_reduce_block) { 3411 3412 OMPT_REDUCTION_BEGIN; 3413 3414 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3415 retval = 1; 3416 3417 } else if (packed_reduction_method == empty_reduce_block) { 3418 3419 OMPT_REDUCTION_BEGIN; 3420 3421 // usage: if team size == 1, no synchronization is required ( Intel 3422 // platforms only ) 3423 retval = 1; 3424 3425 } else if (packed_reduction_method == atomic_reduce_block) { 3426 3427 retval = 2; 3428 3429 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3430 // won't be called by the code gen) 3431 // (it's not quite good, because the checking block has been closed by 3432 // this 'pop', 3433 // but atomic operation has not been executed yet, will be executed 3434 // slightly later, literally on next instruction) 3435 if (__kmp_env_consistency_check) 3436 __kmp_pop_sync(global_tid, ct_reduce, loc); 3437 3438 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3439 tree_reduce_block)) { 3440 3441 // AT: performance issue: a real barrier here 3442 // AT: (if master goes slow, other threads are blocked here waiting for the 3443 // master to come and release them) 3444 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3445 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3446 // be confusing to a customer) 3447 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3448 // might go faster and be more in line with sense of NOWAIT 3449 // AT: TO DO: do epcc test and compare times 3450 3451 // this barrier should be invisible to a customer and to the threading profile 3452 // tool (it's neither a terminating barrier nor customer's code, it's 3453 // used for an internal purpose) 3454 #if OMPT_SUPPORT 3455 // JP: can this barrier potentially leed to task scheduling? 3456 // JP: as long as there is a barrier in the implementation, OMPT should and 3457 // will provide the barrier events 3458 // so we set-up the necessary frame/return addresses. 3459 ompt_frame_t *ompt_frame; 3460 if (ompt_enabled.enabled) { 3461 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3462 if (ompt_frame->enter_frame.ptr == NULL) 3463 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3464 } 3465 OMPT_STORE_RETURN_ADDRESS(global_tid); 3466 #endif 3467 #if USE_ITT_NOTIFY 3468 __kmp_threads[global_tid]->th.th_ident = loc; 3469 #endif 3470 retval = 3471 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3472 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3473 retval = (retval != 0) ? (0) : (1); 3474 #if OMPT_SUPPORT && OMPT_OPTIONAL 3475 if (ompt_enabled.enabled) { 3476 ompt_frame->enter_frame = ompt_data_none; 3477 } 3478 #endif 3479 3480 // all other workers except master should do this pop here 3481 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3482 if (__kmp_env_consistency_check) { 3483 if (retval == 0) { 3484 __kmp_pop_sync(global_tid, ct_reduce, loc); 3485 } 3486 } 3487 3488 } else { 3489 3490 // should never reach this block 3491 KMP_ASSERT(0); // "unexpected method" 3492 } 3493 if (teams_swapped) { 3494 __kmp_restore_swapped_teams(th, team, task_state); 3495 } 3496 KA_TRACE( 3497 10, 3498 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3499 global_tid, packed_reduction_method, retval)); 3500 3501 return retval; 3502 } 3503 3504 /*! 3505 @ingroup SYNCHRONIZATION 3506 @param loc source location information 3507 @param global_tid global thread id. 3508 @param lck pointer to the unique lock data structure 3509 3510 Finish the execution of a reduce nowait. 3511 */ 3512 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3513 kmp_critical_name *lck) { 3514 3515 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3516 3517 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3518 __kmp_assert_valid_gtid(global_tid); 3519 3520 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3521 3522 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3523 3524 if (packed_reduction_method == critical_reduce_block) { 3525 3526 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3527 OMPT_REDUCTION_END; 3528 3529 } else if (packed_reduction_method == empty_reduce_block) { 3530 3531 // usage: if team size == 1, no synchronization is required ( on Intel 3532 // platforms only ) 3533 3534 OMPT_REDUCTION_END; 3535 3536 } else if (packed_reduction_method == atomic_reduce_block) { 3537 3538 // neither master nor other workers should get here 3539 // (code gen does not generate this call in case 2: atomic reduce block) 3540 // actually it's better to remove this elseif at all; 3541 // after removal this value will checked by the 'else' and will assert 3542 3543 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3544 tree_reduce_block)) { 3545 3546 // only master gets here 3547 // OMPT: tree reduction is annotated in the barrier code 3548 3549 } else { 3550 3551 // should never reach this block 3552 KMP_ASSERT(0); // "unexpected method" 3553 } 3554 3555 if (__kmp_env_consistency_check) 3556 __kmp_pop_sync(global_tid, ct_reduce, loc); 3557 3558 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3559 global_tid, packed_reduction_method)); 3560 3561 return; 3562 } 3563 3564 /* 2.a.ii. Reduce Block with a terminating barrier */ 3565 3566 /*! 3567 @ingroup SYNCHRONIZATION 3568 @param loc source location information 3569 @param global_tid global thread number 3570 @param num_vars number of items (variables) to be reduced 3571 @param reduce_size size of data in bytes to be reduced 3572 @param reduce_data pointer to data to be reduced 3573 @param reduce_func callback function providing reduction operation on two 3574 operands and returning result of reduction in lhs_data 3575 @param lck pointer to the unique lock data structure 3576 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3577 threads if atomic reduction needed 3578 3579 A blocking reduce that includes an implicit barrier. 3580 */ 3581 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3582 size_t reduce_size, void *reduce_data, 3583 void (*reduce_func)(void *lhs_data, void *rhs_data), 3584 kmp_critical_name *lck) { 3585 KMP_COUNT_BLOCK(REDUCE_wait); 3586 int retval = 0; 3587 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3588 kmp_info_t *th; 3589 kmp_team_t *team; 3590 int teams_swapped = 0, task_state; 3591 3592 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3593 __kmp_assert_valid_gtid(global_tid); 3594 3595 // why do we need this initialization here at all? 3596 // Reduction clause can not be a stand-alone directive. 3597 3598 // do not call __kmp_serial_initialize(), it will be called by 3599 // __kmp_parallel_initialize() if needed 3600 // possible detection of false-positive race by the threadchecker ??? 3601 if (!TCR_4(__kmp_init_parallel)) 3602 __kmp_parallel_initialize(); 3603 3604 __kmp_resume_if_soft_paused(); 3605 3606 // check correctness of reduce block nesting 3607 #if KMP_USE_DYNAMIC_LOCK 3608 if (__kmp_env_consistency_check) 3609 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3610 #else 3611 if (__kmp_env_consistency_check) 3612 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3613 #endif 3614 3615 th = __kmp_thread_from_gtid(global_tid); 3616 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3617 3618 packed_reduction_method = __kmp_determine_reduction_method( 3619 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3620 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3621 3622 OMPT_REDUCTION_DECL(th, global_tid); 3623 3624 if (packed_reduction_method == critical_reduce_block) { 3625 3626 OMPT_REDUCTION_BEGIN; 3627 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3628 retval = 1; 3629 3630 } else if (packed_reduction_method == empty_reduce_block) { 3631 3632 OMPT_REDUCTION_BEGIN; 3633 // usage: if team size == 1, no synchronization is required ( Intel 3634 // platforms only ) 3635 retval = 1; 3636 3637 } else if (packed_reduction_method == atomic_reduce_block) { 3638 3639 retval = 2; 3640 3641 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3642 tree_reduce_block)) { 3643 3644 // case tree_reduce_block: 3645 // this barrier should be visible to a customer and to the threading profile 3646 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3647 #if OMPT_SUPPORT 3648 ompt_frame_t *ompt_frame; 3649 if (ompt_enabled.enabled) { 3650 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3651 if (ompt_frame->enter_frame.ptr == NULL) 3652 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3653 } 3654 OMPT_STORE_RETURN_ADDRESS(global_tid); 3655 #endif 3656 #if USE_ITT_NOTIFY 3657 __kmp_threads[global_tid]->th.th_ident = 3658 loc; // needed for correct notification of frames 3659 #endif 3660 retval = 3661 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3662 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3663 retval = (retval != 0) ? (0) : (1); 3664 #if OMPT_SUPPORT && OMPT_OPTIONAL 3665 if (ompt_enabled.enabled) { 3666 ompt_frame->enter_frame = ompt_data_none; 3667 } 3668 #endif 3669 3670 // all other workers except master should do this pop here 3671 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3672 if (__kmp_env_consistency_check) { 3673 if (retval == 0) { // 0: all other workers; 1: master 3674 __kmp_pop_sync(global_tid, ct_reduce, loc); 3675 } 3676 } 3677 3678 } else { 3679 3680 // should never reach this block 3681 KMP_ASSERT(0); // "unexpected method" 3682 } 3683 if (teams_swapped) { 3684 __kmp_restore_swapped_teams(th, team, task_state); 3685 } 3686 3687 KA_TRACE(10, 3688 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3689 global_tid, packed_reduction_method, retval)); 3690 return retval; 3691 } 3692 3693 /*! 3694 @ingroup SYNCHRONIZATION 3695 @param loc source location information 3696 @param global_tid global thread id. 3697 @param lck pointer to the unique lock data structure 3698 3699 Finish the execution of a blocking reduce. 3700 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3701 start function. 3702 */ 3703 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3704 kmp_critical_name *lck) { 3705 3706 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3707 kmp_info_t *th; 3708 kmp_team_t *team; 3709 int teams_swapped = 0, task_state; 3710 3711 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3712 __kmp_assert_valid_gtid(global_tid); 3713 3714 th = __kmp_thread_from_gtid(global_tid); 3715 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3716 3717 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3718 3719 // this barrier should be visible to a customer and to the threading profile 3720 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3721 OMPT_REDUCTION_DECL(th, global_tid); 3722 3723 if (packed_reduction_method == critical_reduce_block) { 3724 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3725 3726 OMPT_REDUCTION_END; 3727 3728 // TODO: implicit barrier: should be exposed 3729 #if OMPT_SUPPORT 3730 ompt_frame_t *ompt_frame; 3731 if (ompt_enabled.enabled) { 3732 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3733 if (ompt_frame->enter_frame.ptr == NULL) 3734 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3735 } 3736 OMPT_STORE_RETURN_ADDRESS(global_tid); 3737 #endif 3738 #if USE_ITT_NOTIFY 3739 __kmp_threads[global_tid]->th.th_ident = loc; 3740 #endif 3741 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3742 #if OMPT_SUPPORT && OMPT_OPTIONAL 3743 if (ompt_enabled.enabled) { 3744 ompt_frame->enter_frame = ompt_data_none; 3745 } 3746 #endif 3747 3748 } else if (packed_reduction_method == empty_reduce_block) { 3749 3750 OMPT_REDUCTION_END; 3751 3752 // usage: if team size==1, no synchronization is required (Intel platforms only) 3753 3754 // TODO: implicit barrier: should be exposed 3755 #if OMPT_SUPPORT 3756 ompt_frame_t *ompt_frame; 3757 if (ompt_enabled.enabled) { 3758 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3759 if (ompt_frame->enter_frame.ptr == NULL) 3760 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3761 } 3762 OMPT_STORE_RETURN_ADDRESS(global_tid); 3763 #endif 3764 #if USE_ITT_NOTIFY 3765 __kmp_threads[global_tid]->th.th_ident = loc; 3766 #endif 3767 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3768 #if OMPT_SUPPORT && OMPT_OPTIONAL 3769 if (ompt_enabled.enabled) { 3770 ompt_frame->enter_frame = ompt_data_none; 3771 } 3772 #endif 3773 3774 } else if (packed_reduction_method == atomic_reduce_block) { 3775 3776 #if OMPT_SUPPORT 3777 ompt_frame_t *ompt_frame; 3778 if (ompt_enabled.enabled) { 3779 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3780 if (ompt_frame->enter_frame.ptr == NULL) 3781 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3782 } 3783 OMPT_STORE_RETURN_ADDRESS(global_tid); 3784 #endif 3785 // TODO: implicit barrier: should be exposed 3786 #if USE_ITT_NOTIFY 3787 __kmp_threads[global_tid]->th.th_ident = loc; 3788 #endif 3789 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3790 #if OMPT_SUPPORT && OMPT_OPTIONAL 3791 if (ompt_enabled.enabled) { 3792 ompt_frame->enter_frame = ompt_data_none; 3793 } 3794 #endif 3795 3796 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3797 tree_reduce_block)) { 3798 3799 // only master executes here (master releases all other workers) 3800 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3801 global_tid); 3802 3803 } else { 3804 3805 // should never reach this block 3806 KMP_ASSERT(0); // "unexpected method" 3807 } 3808 if (teams_swapped) { 3809 __kmp_restore_swapped_teams(th, team, task_state); 3810 } 3811 3812 if (__kmp_env_consistency_check) 3813 __kmp_pop_sync(global_tid, ct_reduce, loc); 3814 3815 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3816 global_tid, packed_reduction_method)); 3817 3818 return; 3819 } 3820 3821 #undef __KMP_GET_REDUCTION_METHOD 3822 #undef __KMP_SET_REDUCTION_METHOD 3823 3824 /* end of interface to fast scalable reduce routines */ 3825 3826 kmp_uint64 __kmpc_get_taskid() { 3827 3828 kmp_int32 gtid; 3829 kmp_info_t *thread; 3830 3831 gtid = __kmp_get_gtid(); 3832 if (gtid < 0) { 3833 return 0; 3834 } 3835 thread = __kmp_thread_from_gtid(gtid); 3836 return thread->th.th_current_task->td_task_id; 3837 3838 } // __kmpc_get_taskid 3839 3840 kmp_uint64 __kmpc_get_parent_taskid() { 3841 3842 kmp_int32 gtid; 3843 kmp_info_t *thread; 3844 kmp_taskdata_t *parent_task; 3845 3846 gtid = __kmp_get_gtid(); 3847 if (gtid < 0) { 3848 return 0; 3849 } 3850 thread = __kmp_thread_from_gtid(gtid); 3851 parent_task = thread->th.th_current_task->td_parent; 3852 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3853 3854 } // __kmpc_get_parent_taskid 3855 3856 /*! 3857 @ingroup WORK_SHARING 3858 @param loc source location information. 3859 @param gtid global thread number. 3860 @param num_dims number of associated doacross loops. 3861 @param dims info on loops bounds. 3862 3863 Initialize doacross loop information. 3864 Expect compiler send us inclusive bounds, 3865 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3866 */ 3867 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3868 const struct kmp_dim *dims) { 3869 __kmp_assert_valid_gtid(gtid); 3870 int j, idx; 3871 kmp_int64 last, trace_count; 3872 kmp_info_t *th = __kmp_threads[gtid]; 3873 kmp_team_t *team = th->th.th_team; 3874 kmp_uint32 *flags; 3875 kmp_disp_t *pr_buf = th->th.th_dispatch; 3876 dispatch_shared_info_t *sh_buf; 3877 3878 KA_TRACE( 3879 20, 3880 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3881 gtid, num_dims, !team->t.t_serialized)); 3882 KMP_DEBUG_ASSERT(dims != NULL); 3883 KMP_DEBUG_ASSERT(num_dims > 0); 3884 3885 if (team->t.t_serialized) { 3886 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3887 return; // no dependencies if team is serialized 3888 } 3889 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3890 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3891 // the next loop 3892 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3893 3894 // Save bounds info into allocated private buffer 3895 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3896 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3897 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3898 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3899 pr_buf->th_doacross_info[0] = 3900 (kmp_int64)num_dims; // first element is number of dimensions 3901 // Save also address of num_done in order to access it later without knowing 3902 // the buffer index 3903 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3904 pr_buf->th_doacross_info[2] = dims[0].lo; 3905 pr_buf->th_doacross_info[3] = dims[0].up; 3906 pr_buf->th_doacross_info[4] = dims[0].st; 3907 last = 5; 3908 for (j = 1; j < num_dims; ++j) { 3909 kmp_int64 3910 range_length; // To keep ranges of all dimensions but the first dims[0] 3911 if (dims[j].st == 1) { // most common case 3912 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3913 range_length = dims[j].up - dims[j].lo + 1; 3914 } else { 3915 if (dims[j].st > 0) { 3916 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3917 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3918 } else { // negative increment 3919 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3920 range_length = 3921 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3922 } 3923 } 3924 pr_buf->th_doacross_info[last++] = range_length; 3925 pr_buf->th_doacross_info[last++] = dims[j].lo; 3926 pr_buf->th_doacross_info[last++] = dims[j].up; 3927 pr_buf->th_doacross_info[last++] = dims[j].st; 3928 } 3929 3930 // Compute total trip count. 3931 // Start with range of dims[0] which we don't need to keep in the buffer. 3932 if (dims[0].st == 1) { // most common case 3933 trace_count = dims[0].up - dims[0].lo + 1; 3934 } else if (dims[0].st > 0) { 3935 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3936 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3937 } else { // negative increment 3938 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3939 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3940 } 3941 for (j = 1; j < num_dims; ++j) { 3942 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3943 } 3944 KMP_DEBUG_ASSERT(trace_count > 0); 3945 3946 // Check if shared buffer is not occupied by other loop (idx - 3947 // __kmp_dispatch_num_buffers) 3948 if (idx != sh_buf->doacross_buf_idx) { 3949 // Shared buffer is occupied, wait for it to be free 3950 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3951 __kmp_eq_4, NULL); 3952 } 3953 #if KMP_32_BIT_ARCH 3954 // Check if we are the first thread. After the CAS the first thread gets 0, 3955 // others get 1 if initialization is in progress, allocated pointer otherwise. 3956 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3957 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3958 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3959 #else 3960 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3961 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3962 #endif 3963 if (flags == NULL) { 3964 // we are the first thread, allocate the array of flags 3965 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3966 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3967 KMP_MB(); 3968 sh_buf->doacross_flags = flags; 3969 } else if (flags == (kmp_uint32 *)1) { 3970 #if KMP_32_BIT_ARCH 3971 // initialization is still in progress, need to wait 3972 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3973 #else 3974 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3975 #endif 3976 KMP_YIELD(TRUE); 3977 KMP_MB(); 3978 } else { 3979 KMP_MB(); 3980 } 3981 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3982 pr_buf->th_doacross_flags = 3983 sh_buf->doacross_flags; // save private copy in order to not 3984 // touch shared buffer on each iteration 3985 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3986 } 3987 3988 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3989 __kmp_assert_valid_gtid(gtid); 3990 kmp_int32 shft, num_dims, i; 3991 kmp_uint32 flag; 3992 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3993 kmp_info_t *th = __kmp_threads[gtid]; 3994 kmp_team_t *team = th->th.th_team; 3995 kmp_disp_t *pr_buf; 3996 kmp_int64 lo, up, st; 3997 3998 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 3999 if (team->t.t_serialized) { 4000 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4001 return; // no dependencies if team is serialized 4002 } 4003 4004 // calculate sequential iteration number and check out-of-bounds condition 4005 pr_buf = th->th.th_dispatch; 4006 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4007 num_dims = pr_buf->th_doacross_info[0]; 4008 lo = pr_buf->th_doacross_info[2]; 4009 up = pr_buf->th_doacross_info[3]; 4010 st = pr_buf->th_doacross_info[4]; 4011 #if OMPT_SUPPORT && OMPT_OPTIONAL 4012 ompt_dependence_t deps[num_dims]; 4013 #endif 4014 if (st == 1) { // most common case 4015 if (vec[0] < lo || vec[0] > up) { 4016 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4017 "bounds [%lld,%lld]\n", 4018 gtid, vec[0], lo, up)); 4019 return; 4020 } 4021 iter_number = vec[0] - lo; 4022 } else if (st > 0) { 4023 if (vec[0] < lo || vec[0] > up) { 4024 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4025 "bounds [%lld,%lld]\n", 4026 gtid, vec[0], lo, up)); 4027 return; 4028 } 4029 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4030 } else { // negative increment 4031 if (vec[0] > lo || vec[0] < up) { 4032 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4033 "bounds [%lld,%lld]\n", 4034 gtid, vec[0], lo, up)); 4035 return; 4036 } 4037 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4038 } 4039 #if OMPT_SUPPORT && OMPT_OPTIONAL 4040 deps[0].variable.value = iter_number; 4041 deps[0].dependence_type = ompt_dependence_type_sink; 4042 #endif 4043 for (i = 1; i < num_dims; ++i) { 4044 kmp_int64 iter, ln; 4045 kmp_int32 j = i * 4; 4046 ln = pr_buf->th_doacross_info[j + 1]; 4047 lo = pr_buf->th_doacross_info[j + 2]; 4048 up = pr_buf->th_doacross_info[j + 3]; 4049 st = pr_buf->th_doacross_info[j + 4]; 4050 if (st == 1) { 4051 if (vec[i] < lo || vec[i] > up) { 4052 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4053 "bounds [%lld,%lld]\n", 4054 gtid, vec[i], lo, up)); 4055 return; 4056 } 4057 iter = vec[i] - lo; 4058 } else if (st > 0) { 4059 if (vec[i] < lo || vec[i] > up) { 4060 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4061 "bounds [%lld,%lld]\n", 4062 gtid, vec[i], lo, up)); 4063 return; 4064 } 4065 iter = (kmp_uint64)(vec[i] - lo) / st; 4066 } else { // st < 0 4067 if (vec[i] > lo || vec[i] < up) { 4068 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4069 "bounds [%lld,%lld]\n", 4070 gtid, vec[i], lo, up)); 4071 return; 4072 } 4073 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4074 } 4075 iter_number = iter + ln * iter_number; 4076 #if OMPT_SUPPORT && OMPT_OPTIONAL 4077 deps[i].variable.value = iter; 4078 deps[i].dependence_type = ompt_dependence_type_sink; 4079 #endif 4080 } 4081 shft = iter_number % 32; // use 32-bit granularity 4082 iter_number >>= 5; // divided by 32 4083 flag = 1 << shft; 4084 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4085 KMP_YIELD(TRUE); 4086 } 4087 KMP_MB(); 4088 #if OMPT_SUPPORT && OMPT_OPTIONAL 4089 if (ompt_enabled.ompt_callback_dependences) { 4090 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4091 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4092 } 4093 #endif 4094 KA_TRACE(20, 4095 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4096 gtid, (iter_number << 5) + shft)); 4097 } 4098 4099 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4100 __kmp_assert_valid_gtid(gtid); 4101 kmp_int32 shft, num_dims, i; 4102 kmp_uint32 flag; 4103 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4104 kmp_info_t *th = __kmp_threads[gtid]; 4105 kmp_team_t *team = th->th.th_team; 4106 kmp_disp_t *pr_buf; 4107 kmp_int64 lo, st; 4108 4109 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4110 if (team->t.t_serialized) { 4111 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4112 return; // no dependencies if team is serialized 4113 } 4114 4115 // calculate sequential iteration number (same as in "wait" but no 4116 // out-of-bounds checks) 4117 pr_buf = th->th.th_dispatch; 4118 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4119 num_dims = pr_buf->th_doacross_info[0]; 4120 lo = pr_buf->th_doacross_info[2]; 4121 st = pr_buf->th_doacross_info[4]; 4122 #if OMPT_SUPPORT && OMPT_OPTIONAL 4123 ompt_dependence_t deps[num_dims]; 4124 #endif 4125 if (st == 1) { // most common case 4126 iter_number = vec[0] - lo; 4127 } else if (st > 0) { 4128 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4129 } else { // negative increment 4130 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4131 } 4132 #if OMPT_SUPPORT && OMPT_OPTIONAL 4133 deps[0].variable.value = iter_number; 4134 deps[0].dependence_type = ompt_dependence_type_source; 4135 #endif 4136 for (i = 1; i < num_dims; ++i) { 4137 kmp_int64 iter, ln; 4138 kmp_int32 j = i * 4; 4139 ln = pr_buf->th_doacross_info[j + 1]; 4140 lo = pr_buf->th_doacross_info[j + 2]; 4141 st = pr_buf->th_doacross_info[j + 4]; 4142 if (st == 1) { 4143 iter = vec[i] - lo; 4144 } else if (st > 0) { 4145 iter = (kmp_uint64)(vec[i] - lo) / st; 4146 } else { // st < 0 4147 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4148 } 4149 iter_number = iter + ln * iter_number; 4150 #if OMPT_SUPPORT && OMPT_OPTIONAL 4151 deps[i].variable.value = iter; 4152 deps[i].dependence_type = ompt_dependence_type_source; 4153 #endif 4154 } 4155 #if OMPT_SUPPORT && OMPT_OPTIONAL 4156 if (ompt_enabled.ompt_callback_dependences) { 4157 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4158 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4159 } 4160 #endif 4161 shft = iter_number % 32; // use 32-bit granularity 4162 iter_number >>= 5; // divided by 32 4163 flag = 1 << shft; 4164 KMP_MB(); 4165 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4166 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4167 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4168 (iter_number << 5) + shft)); 4169 } 4170 4171 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4172 __kmp_assert_valid_gtid(gtid); 4173 kmp_int32 num_done; 4174 kmp_info_t *th = __kmp_threads[gtid]; 4175 kmp_team_t *team = th->th.th_team; 4176 kmp_disp_t *pr_buf = th->th.th_dispatch; 4177 4178 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4179 if (team->t.t_serialized) { 4180 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4181 return; // nothing to do 4182 } 4183 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4184 if (num_done == th->th.th_team_nproc) { 4185 // we are the last thread, need to free shared resources 4186 int idx = pr_buf->th_doacross_buf_idx - 1; 4187 dispatch_shared_info_t *sh_buf = 4188 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4189 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4190 (kmp_int64)&sh_buf->doacross_num_done); 4191 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4192 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4193 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4194 sh_buf->doacross_flags = NULL; 4195 sh_buf->doacross_num_done = 0; 4196 sh_buf->doacross_buf_idx += 4197 __kmp_dispatch_num_buffers; // free buffer for future re-use 4198 } 4199 // free private resources (need to keep buffer index forever) 4200 pr_buf->th_doacross_flags = NULL; 4201 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4202 pr_buf->th_doacross_info = NULL; 4203 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4204 } 4205 4206 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4207 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4208 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4209 } 4210 4211 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4212 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4213 } 4214 4215 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4216 omp_allocator_handle_t free_allocator) { 4217 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4218 free_allocator); 4219 } 4220 4221 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4222 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4223 } 4224 4225 int __kmpc_get_target_offload(void) { 4226 if (!__kmp_init_serial) { 4227 __kmp_serial_initialize(); 4228 } 4229 return __kmp_target_offload; 4230 } 4231 4232 int __kmpc_pause_resource(kmp_pause_status_t level) { 4233 if (!__kmp_init_serial) { 4234 return 1; // Can't pause if runtime is not initialized 4235 } 4236 return __kmp_pause_resource(level); 4237 } 4238