1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 OMPT_STORE_RETURN_ADDRESS(gtid); 301 } 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 KMP_SET_THREAD_STATE(previous_state); 328 } else { 329 KMP_POP_PARTITIONED_TIMER(); 330 } 331 #endif // KMP_STATS_ENABLED 332 } 333 334 /*! 335 @ingroup PARALLEL 336 @param loc source location information 337 @param global_tid global thread number 338 @param num_teams number of teams requested for the teams construct 339 @param num_threads number of threads per team requested for the teams construct 340 341 Set the number of teams to be used by the teams construct. 342 This call is only required if the teams construct has a `num_teams` clause 343 or a `thread_limit` clause (or both). 344 */ 345 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 346 kmp_int32 num_teams, kmp_int32 num_threads) { 347 KA_TRACE(20, 348 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 349 global_tid, num_teams, num_threads)); 350 __kmp_assert_valid_gtid(global_tid); 351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 352 } 353 354 /*! 355 @ingroup PARALLEL 356 @param loc source location information 357 @param argc total number of arguments in the ellipsis 358 @param microtask pointer to callback routine consisting of outlined teams 359 construct 360 @param ... pointers to shared variables that aren't global 361 362 Do the actual fork and call the microtask in the relevant number of threads. 363 */ 364 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 365 ...) { 366 int gtid = __kmp_entry_gtid(); 367 kmp_info_t *this_thr = __kmp_threads[gtid]; 368 va_list ap; 369 va_start(ap, microtask); 370 371 #if KMP_STATS_ENABLED 372 KMP_COUNT_BLOCK(OMP_TEAMS); 373 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 374 if (previous_state == stats_state_e::SERIAL_REGION) { 375 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 376 } else { 377 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 378 } 379 #endif 380 381 // remember teams entry point and nesting level 382 this_thr->th.th_teams_microtask = microtask; 383 this_thr->th.th_teams_level = 384 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 385 386 #if OMPT_SUPPORT 387 kmp_team_t *parent_team = this_thr->th.th_team; 388 int tid = __kmp_tid_from_gtid(gtid); 389 if (ompt_enabled.enabled) { 390 parent_team->t.t_implicit_task_taskdata[tid] 391 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 392 } 393 OMPT_STORE_RETURN_ADDRESS(gtid); 394 #endif 395 396 // check if __kmpc_push_num_teams called, set default number of teams 397 // otherwise 398 if (this_thr->th.th_teams_size.nteams == 0) { 399 __kmp_push_num_teams(loc, gtid, 0, 0); 400 } 401 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 403 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 404 405 __kmp_fork_call( 406 loc, gtid, fork_context_intel, argc, 407 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 408 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 409 __kmp_join_call(loc, gtid 410 #if OMPT_SUPPORT 411 , 412 fork_context_intel 413 #endif 414 ); 415 416 // Pop current CG root off list 417 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 418 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 419 this_thr->th.th_cg_roots = tmp->up; 420 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 421 " to node %p. cg_nthreads was %d\n", 422 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 423 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 424 int i = tmp->cg_nthreads--; 425 if (i == 1) { // check is we are the last thread in CG (not always the case) 426 __kmp_free(tmp); 427 } 428 // Restore current task's thread_limit from CG root 429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 430 this_thr->th.th_current_task->td_icvs.thread_limit = 431 this_thr->th.th_cg_roots->cg_thread_limit; 432 433 this_thr->th.th_teams_microtask = NULL; 434 this_thr->th.th_teams_level = 0; 435 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 436 va_end(ap); 437 #if KMP_STATS_ENABLED 438 if (previous_state == stats_state_e::SERIAL_REGION) { 439 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 440 KMP_SET_THREAD_STATE(previous_state); 441 } else { 442 KMP_POP_PARTITIONED_TIMER(); 443 } 444 #endif // KMP_STATS_ENABLED 445 } 446 447 // I don't think this function should ever have been exported. 448 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 449 // openmp code ever called it, but it's been exported from the RTL for so 450 // long that I'm afraid to remove the definition. 451 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 452 453 /*! 454 @ingroup PARALLEL 455 @param loc source location information 456 @param global_tid global thread number 457 458 Enter a serialized parallel construct. This interface is used to handle a 459 conditional parallel region, like this, 460 @code 461 #pragma omp parallel if (condition) 462 @endcode 463 when the condition is false. 464 */ 465 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 466 // The implementation is now in kmp_runtime.cpp so that it can share static 467 // functions with kmp_fork_call since the tasks to be done are similar in 468 // each case. 469 __kmp_assert_valid_gtid(global_tid); 470 #if OMPT_SUPPORT 471 OMPT_STORE_RETURN_ADDRESS(global_tid); 472 #endif 473 __kmp_serialized_parallel(loc, global_tid); 474 } 475 476 /*! 477 @ingroup PARALLEL 478 @param loc source location information 479 @param global_tid global thread number 480 481 Leave a serialized parallel construct. 482 */ 483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 484 kmp_internal_control_t *top; 485 kmp_info_t *this_thr; 486 kmp_team_t *serial_team; 487 488 KC_TRACE(10, 489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 490 491 /* skip all this code for autopar serialized loops since it results in 492 unacceptable overhead */ 493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 494 return; 495 496 // Not autopar code 497 __kmp_assert_valid_gtid(global_tid); 498 if (!TCR_4(__kmp_init_parallel)) 499 __kmp_parallel_initialize(); 500 501 __kmp_resume_if_soft_paused(); 502 503 this_thr = __kmp_threads[global_tid]; 504 serial_team = this_thr->th.th_serial_team; 505 506 kmp_task_team_t *task_team = this_thr->th.th_task_team; 507 // we need to wait for the proxy tasks before finishing the thread 508 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 509 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 510 511 KMP_MB(); 512 KMP_DEBUG_ASSERT(serial_team); 513 KMP_ASSERT(serial_team->t.t_serialized); 514 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 515 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 516 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 517 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 518 519 #if OMPT_SUPPORT 520 if (ompt_enabled.enabled && 521 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 522 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 523 if (ompt_enabled.ompt_callback_implicit_task) { 524 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 525 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 526 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 527 } 528 529 // reset clear the task id only after unlinking the task 530 ompt_data_t *parent_task_data; 531 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 532 533 if (ompt_enabled.ompt_callback_parallel_end) { 534 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 535 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 536 ompt_parallel_invoker_program | ompt_parallel_team, 537 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 538 } 539 __ompt_lw_taskteam_unlink(this_thr); 540 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 541 } 542 #endif 543 544 /* If necessary, pop the internal control stack values and replace the team 545 * values */ 546 top = serial_team->t.t_control_stack_top; 547 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 548 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 549 serial_team->t.t_control_stack_top = top->next; 550 __kmp_free(top); 551 } 552 553 // if( serial_team -> t.t_serialized > 1 ) 554 serial_team->t.t_level--; 555 556 /* pop dispatch buffers stack */ 557 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 558 { 559 dispatch_private_info_t *disp_buffer = 560 serial_team->t.t_dispatch->th_disp_buffer; 561 serial_team->t.t_dispatch->th_disp_buffer = 562 serial_team->t.t_dispatch->th_disp_buffer->next; 563 __kmp_free(disp_buffer); 564 } 565 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 566 567 --serial_team->t.t_serialized; 568 if (serial_team->t.t_serialized == 0) { 569 570 /* return to the parallel section */ 571 572 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 573 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 574 __kmp_clear_x87_fpu_status_word(); 575 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 576 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 577 } 578 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 579 580 this_thr->th.th_team = serial_team->t.t_parent; 581 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 582 583 /* restore values cached in the thread */ 584 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 585 this_thr->th.th_team_master = 586 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 587 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 588 589 /* TODO the below shouldn't need to be adjusted for serialized teams */ 590 this_thr->th.th_dispatch = 591 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 592 593 __kmp_pop_current_task_from_thread(this_thr); 594 595 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 596 this_thr->th.th_current_task->td_flags.executing = 1; 597 598 if (__kmp_tasking_mode != tskm_immediate_exec) { 599 // Copy the task team from the new child / old parent team to the thread. 600 this_thr->th.th_task_team = 601 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 602 KA_TRACE(20, 603 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 604 "team %p\n", 605 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 606 } 607 } else { 608 if (__kmp_tasking_mode != tskm_immediate_exec) { 609 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 610 "depth of serial team %p to %d\n", 611 global_tid, serial_team, serial_team->t.t_serialized)); 612 } 613 } 614 615 if (__kmp_env_consistency_check) 616 __kmp_pop_parallel(global_tid, NULL); 617 #if OMPT_SUPPORT 618 if (ompt_enabled.enabled) 619 this_thr->th.ompt_thread_info.state = 620 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 621 : ompt_state_work_parallel); 622 #endif 623 } 624 625 /*! 626 @ingroup SYNCHRONIZATION 627 @param loc source location information. 628 629 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 630 depending on the memory ordering convention obeyed by the compiler 631 even that may not be necessary). 632 */ 633 void __kmpc_flush(ident_t *loc) { 634 KC_TRACE(10, ("__kmpc_flush: called\n")); 635 636 /* need explicit __mf() here since use volatile instead in library */ 637 KMP_MB(); /* Flush all pending memory write invalidates. */ 638 639 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 640 #if KMP_MIC 641 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 642 // We shouldn't need it, though, since the ABI rules require that 643 // * If the compiler generates NGO stores it also generates the fence 644 // * If users hand-code NGO stores they should insert the fence 645 // therefore no incomplete unordered stores should be visible. 646 #else 647 // C74404 648 // This is to address non-temporal store instructions (sfence needed). 649 // The clflush instruction is addressed either (mfence needed). 650 // Probably the non-temporal load monvtdqa instruction should also be 651 // addressed. 652 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 653 if (!__kmp_cpuinfo.initialized) { 654 __kmp_query_cpuid(&__kmp_cpuinfo); 655 } 656 if (!__kmp_cpuinfo.sse2) { 657 // CPU cannot execute SSE2 instructions. 658 } else { 659 #if KMP_COMPILER_ICC 660 _mm_mfence(); 661 #elif KMP_COMPILER_MSVC 662 MemoryBarrier(); 663 #else 664 __sync_synchronize(); 665 #endif // KMP_COMPILER_ICC 666 } 667 #endif // KMP_MIC 668 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 669 KMP_ARCH_RISCV64) 670 // Nothing to see here move along 671 #elif KMP_ARCH_PPC64 672 // Nothing needed here (we have a real MB above). 673 #else 674 #error Unknown or unsupported architecture 675 #endif 676 677 #if OMPT_SUPPORT && OMPT_OPTIONAL 678 if (ompt_enabled.ompt_callback_flush) { 679 ompt_callbacks.ompt_callback(ompt_callback_flush)( 680 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 681 } 682 #endif 683 } 684 685 /* -------------------------------------------------------------------------- */ 686 /*! 687 @ingroup SYNCHRONIZATION 688 @param loc source location information 689 @param global_tid thread id. 690 691 Execute a barrier. 692 */ 693 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 694 KMP_COUNT_BLOCK(OMP_BARRIER); 695 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 696 __kmp_assert_valid_gtid(global_tid); 697 698 if (!TCR_4(__kmp_init_parallel)) 699 __kmp_parallel_initialize(); 700 701 __kmp_resume_if_soft_paused(); 702 703 if (__kmp_env_consistency_check) { 704 if (loc == 0) { 705 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 706 } 707 __kmp_check_barrier(global_tid, ct_barrier, loc); 708 } 709 710 #if OMPT_SUPPORT 711 ompt_frame_t *ompt_frame; 712 if (ompt_enabled.enabled) { 713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 714 if (ompt_frame->enter_frame.ptr == NULL) 715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 716 OMPT_STORE_RETURN_ADDRESS(global_tid); 717 } 718 #endif 719 __kmp_threads[global_tid]->th.th_ident = loc; 720 // TODO: explicit barrier_wait_id: 721 // this function is called when 'barrier' directive is present or 722 // implicit barrier at the end of a worksharing construct. 723 // 1) better to add a per-thread barrier counter to a thread data structure 724 // 2) set to 0 when a new team is created 725 // 4) no sync is required 726 727 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 728 #if OMPT_SUPPORT && OMPT_OPTIONAL 729 if (ompt_enabled.enabled) { 730 ompt_frame->enter_frame = ompt_data_none; 731 } 732 #endif 733 } 734 735 /* The BARRIER for a MASTER section is always explicit */ 736 /*! 737 @ingroup WORK_SHARING 738 @param loc source location information. 739 @param global_tid global thread number . 740 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 741 */ 742 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 743 int status = 0; 744 745 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 746 __kmp_assert_valid_gtid(global_tid); 747 748 if (!TCR_4(__kmp_init_parallel)) 749 __kmp_parallel_initialize(); 750 751 __kmp_resume_if_soft_paused(); 752 753 if (KMP_MASTER_GTID(global_tid)) { 754 KMP_COUNT_BLOCK(OMP_MASTER); 755 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 756 status = 1; 757 } 758 759 #if OMPT_SUPPORT && OMPT_OPTIONAL 760 if (status) { 761 if (ompt_enabled.ompt_callback_masked) { 762 kmp_info_t *this_thr = __kmp_threads[global_tid]; 763 kmp_team_t *team = this_thr->th.th_team; 764 765 int tid = __kmp_tid_from_gtid(global_tid); 766 ompt_callbacks.ompt_callback(ompt_callback_masked)( 767 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 768 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 769 OMPT_GET_RETURN_ADDRESS(0)); 770 } 771 } 772 #endif 773 774 if (__kmp_env_consistency_check) { 775 #if KMP_USE_DYNAMIC_LOCK 776 if (status) 777 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 778 else 779 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 780 #else 781 if (status) 782 __kmp_push_sync(global_tid, ct_master, loc, NULL); 783 else 784 __kmp_check_sync(global_tid, ct_master, loc, NULL); 785 #endif 786 } 787 788 return status; 789 } 790 791 /*! 792 @ingroup WORK_SHARING 793 @param loc source location information. 794 @param global_tid global thread number . 795 796 Mark the end of a <tt>master</tt> region. This should only be called by the 797 thread that executes the <tt>master</tt> region. 798 */ 799 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 800 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 801 __kmp_assert_valid_gtid(global_tid); 802 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 803 KMP_POP_PARTITIONED_TIMER(); 804 805 #if OMPT_SUPPORT && OMPT_OPTIONAL 806 kmp_info_t *this_thr = __kmp_threads[global_tid]; 807 kmp_team_t *team = this_thr->th.th_team; 808 if (ompt_enabled.ompt_callback_masked) { 809 int tid = __kmp_tid_from_gtid(global_tid); 810 ompt_callbacks.ompt_callback(ompt_callback_masked)( 811 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 812 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 813 OMPT_GET_RETURN_ADDRESS(0)); 814 } 815 #endif 816 817 if (__kmp_env_consistency_check) { 818 if (KMP_MASTER_GTID(global_tid)) 819 __kmp_pop_sync(global_tid, ct_master, loc); 820 } 821 } 822 823 /*! 824 @ingroup WORK_SHARING 825 @param loc source location information. 826 @param gtid global thread number. 827 828 Start execution of an <tt>ordered</tt> construct. 829 */ 830 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 831 int cid = 0; 832 kmp_info_t *th; 833 KMP_DEBUG_ASSERT(__kmp_init_serial); 834 835 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 836 __kmp_assert_valid_gtid(gtid); 837 838 if (!TCR_4(__kmp_init_parallel)) 839 __kmp_parallel_initialize(); 840 841 __kmp_resume_if_soft_paused(); 842 843 #if USE_ITT_BUILD 844 __kmp_itt_ordered_prep(gtid); 845 // TODO: ordered_wait_id 846 #endif /* USE_ITT_BUILD */ 847 848 th = __kmp_threads[gtid]; 849 850 #if OMPT_SUPPORT && OMPT_OPTIONAL 851 kmp_team_t *team; 852 ompt_wait_id_t lck; 853 void *codeptr_ra; 854 if (ompt_enabled.enabled) { 855 OMPT_STORE_RETURN_ADDRESS(gtid); 856 team = __kmp_team_from_gtid(gtid); 857 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 858 /* OMPT state update */ 859 th->th.ompt_thread_info.wait_id = lck; 860 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 861 862 /* OMPT event callback */ 863 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 864 if (ompt_enabled.ompt_callback_mutex_acquire) { 865 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 866 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 867 codeptr_ra); 868 } 869 } 870 #endif 871 872 if (th->th.th_dispatch->th_deo_fcn != 0) 873 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 874 else 875 __kmp_parallel_deo(>id, &cid, loc); 876 877 #if OMPT_SUPPORT && OMPT_OPTIONAL 878 if (ompt_enabled.enabled) { 879 /* OMPT state update */ 880 th->th.ompt_thread_info.state = ompt_state_work_parallel; 881 th->th.ompt_thread_info.wait_id = 0; 882 883 /* OMPT event callback */ 884 if (ompt_enabled.ompt_callback_mutex_acquired) { 885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 886 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 887 } 888 } 889 #endif 890 891 #if USE_ITT_BUILD 892 __kmp_itt_ordered_start(gtid); 893 #endif /* USE_ITT_BUILD */ 894 } 895 896 /*! 897 @ingroup WORK_SHARING 898 @param loc source location information. 899 @param gtid global thread number. 900 901 End execution of an <tt>ordered</tt> construct. 902 */ 903 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 904 int cid = 0; 905 kmp_info_t *th; 906 907 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 908 __kmp_assert_valid_gtid(gtid); 909 910 #if USE_ITT_BUILD 911 __kmp_itt_ordered_end(gtid); 912 // TODO: ordered_wait_id 913 #endif /* USE_ITT_BUILD */ 914 915 th = __kmp_threads[gtid]; 916 917 if (th->th.th_dispatch->th_dxo_fcn != 0) 918 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 919 else 920 __kmp_parallel_dxo(>id, &cid, loc); 921 922 #if OMPT_SUPPORT && OMPT_OPTIONAL 923 OMPT_STORE_RETURN_ADDRESS(gtid); 924 if (ompt_enabled.ompt_callback_mutex_released) { 925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 926 ompt_mutex_ordered, 927 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 928 ->t.t_ordered.dt.t_value, 929 OMPT_LOAD_RETURN_ADDRESS(gtid)); 930 } 931 #endif 932 } 933 934 #if KMP_USE_DYNAMIC_LOCK 935 936 static __forceinline void 937 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 938 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 939 // Pointer to the allocated indirect lock is written to crit, while indexing 940 // is ignored. 941 void *idx; 942 kmp_indirect_lock_t **lck; 943 lck = (kmp_indirect_lock_t **)crit; 944 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 945 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 946 KMP_SET_I_LOCK_LOCATION(ilk, loc); 947 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 948 KA_TRACE(20, 949 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 950 #if USE_ITT_BUILD 951 __kmp_itt_critical_creating(ilk->lock, loc); 952 #endif 953 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 954 if (status == 0) { 955 #if USE_ITT_BUILD 956 __kmp_itt_critical_destroyed(ilk->lock); 957 #endif 958 // We don't really need to destroy the unclaimed lock here since it will be 959 // cleaned up at program exit. 960 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 961 } 962 KMP_DEBUG_ASSERT(*lck != NULL); 963 } 964 965 // Fast-path acquire tas lock 966 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 967 { \ 968 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 969 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 970 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 971 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 972 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 973 kmp_uint32 spins; \ 974 KMP_FSYNC_PREPARE(l); \ 975 KMP_INIT_YIELD(spins); \ 976 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 977 do { \ 978 if (TCR_4(__kmp_nth) > \ 979 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 980 KMP_YIELD(TRUE); \ 981 } else { \ 982 KMP_YIELD_SPIN(spins); \ 983 } \ 984 __kmp_spin_backoff(&backoff); \ 985 } while ( \ 986 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 987 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 988 } \ 989 KMP_FSYNC_ACQUIRED(l); \ 990 } 991 992 // Fast-path test tas lock 993 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 994 { \ 995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 998 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 999 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1000 } 1001 1002 // Fast-path release tas lock 1003 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1004 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1005 1006 #if KMP_USE_FUTEX 1007 1008 #include <sys/syscall.h> 1009 #include <unistd.h> 1010 #ifndef FUTEX_WAIT 1011 #define FUTEX_WAIT 0 1012 #endif 1013 #ifndef FUTEX_WAKE 1014 #define FUTEX_WAKE 1 1015 #endif 1016 1017 // Fast-path acquire futex lock 1018 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1019 { \ 1020 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1021 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1022 KMP_MB(); \ 1023 KMP_FSYNC_PREPARE(ftx); \ 1024 kmp_int32 poll_val; \ 1025 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1026 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1027 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1028 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1029 if (!cond) { \ 1030 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1031 poll_val | \ 1032 KMP_LOCK_BUSY(1, futex))) { \ 1033 continue; \ 1034 } \ 1035 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1036 } \ 1037 kmp_int32 rc; \ 1038 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1039 NULL, NULL, 0)) != 0) { \ 1040 continue; \ 1041 } \ 1042 gtid_code |= 1; \ 1043 } \ 1044 KMP_FSYNC_ACQUIRED(ftx); \ 1045 } 1046 1047 // Fast-path test futex lock 1048 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1049 { \ 1050 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1051 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1052 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1053 KMP_FSYNC_ACQUIRED(ftx); \ 1054 rc = TRUE; \ 1055 } else { \ 1056 rc = FALSE; \ 1057 } \ 1058 } 1059 1060 // Fast-path release futex lock 1061 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1062 { \ 1063 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1064 KMP_MB(); \ 1065 KMP_FSYNC_RELEASING(ftx); \ 1066 kmp_int32 poll_val = \ 1067 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1068 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1069 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1070 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1071 } \ 1072 KMP_MB(); \ 1073 KMP_YIELD_OVERSUB(); \ 1074 } 1075 1076 #endif // KMP_USE_FUTEX 1077 1078 #else // KMP_USE_DYNAMIC_LOCK 1079 1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1081 ident_t const *loc, 1082 kmp_int32 gtid) { 1083 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1084 1085 // Because of the double-check, the following load doesn't need to be volatile 1086 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1087 1088 if (lck == NULL) { 1089 void *idx; 1090 1091 // Allocate & initialize the lock. 1092 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1093 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1094 __kmp_init_user_lock_with_checks(lck); 1095 __kmp_set_user_lock_location(lck, loc); 1096 #if USE_ITT_BUILD 1097 __kmp_itt_critical_creating(lck); 1098 // __kmp_itt_critical_creating() should be called *before* the first usage 1099 // of underlying lock. It is the only place where we can guarantee it. There 1100 // are chances the lock will destroyed with no usage, but it is not a 1101 // problem, because this is not real event seen by user but rather setting 1102 // name for object (lock). See more details in kmp_itt.h. 1103 #endif /* USE_ITT_BUILD */ 1104 1105 // Use a cmpxchg instruction to slam the start of the critical section with 1106 // the lock pointer. If another thread beat us to it, deallocate the lock, 1107 // and use the lock that the other thread allocated. 1108 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1109 1110 if (status == 0) { 1111 // Deallocate the lock and reload the value. 1112 #if USE_ITT_BUILD 1113 __kmp_itt_critical_destroyed(lck); 1114 // Let ITT know the lock is destroyed and the same memory location may be reused 1115 // for another purpose. 1116 #endif /* USE_ITT_BUILD */ 1117 __kmp_destroy_user_lock_with_checks(lck); 1118 __kmp_user_lock_free(&idx, gtid, lck); 1119 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1120 KMP_DEBUG_ASSERT(lck != NULL); 1121 } 1122 } 1123 return lck; 1124 } 1125 1126 #endif // KMP_USE_DYNAMIC_LOCK 1127 1128 /*! 1129 @ingroup WORK_SHARING 1130 @param loc source location information. 1131 @param global_tid global thread number. 1132 @param crit identity of the critical section. This could be a pointer to a lock 1133 associated with the critical section, or some other suitably unique value. 1134 1135 Enter code protected by a `critical` construct. 1136 This function blocks until the executing thread can enter the critical section. 1137 */ 1138 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1139 kmp_critical_name *crit) { 1140 #if KMP_USE_DYNAMIC_LOCK 1141 #if OMPT_SUPPORT && OMPT_OPTIONAL 1142 OMPT_STORE_RETURN_ADDRESS(global_tid); 1143 #endif // OMPT_SUPPORT 1144 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1145 #else 1146 KMP_COUNT_BLOCK(OMP_CRITICAL); 1147 #if OMPT_SUPPORT && OMPT_OPTIONAL 1148 ompt_state_t prev_state = ompt_state_undefined; 1149 ompt_thread_info_t ti; 1150 #endif 1151 kmp_user_lock_p lck; 1152 1153 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1154 __kmp_assert_valid_gtid(global_tid); 1155 1156 // TODO: add THR_OVHD_STATE 1157 1158 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1159 KMP_CHECK_USER_LOCK_INIT(); 1160 1161 if ((__kmp_user_lock_kind == lk_tas) && 1162 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1163 lck = (kmp_user_lock_p)crit; 1164 } 1165 #if KMP_USE_FUTEX 1166 else if ((__kmp_user_lock_kind == lk_futex) && 1167 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1168 lck = (kmp_user_lock_p)crit; 1169 } 1170 #endif 1171 else { // ticket, queuing or drdpa 1172 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1173 } 1174 1175 if (__kmp_env_consistency_check) 1176 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1177 1178 // since the critical directive binds to all threads, not just the current 1179 // team we have to check this even if we are in a serialized team. 1180 // also, even if we are the uber thread, we still have to conduct the lock, 1181 // as we have to contend with sibling threads. 1182 1183 #if USE_ITT_BUILD 1184 __kmp_itt_critical_acquiring(lck); 1185 #endif /* USE_ITT_BUILD */ 1186 #if OMPT_SUPPORT && OMPT_OPTIONAL 1187 OMPT_STORE_RETURN_ADDRESS(gtid); 1188 void *codeptr_ra = NULL; 1189 if (ompt_enabled.enabled) { 1190 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1191 /* OMPT state update */ 1192 prev_state = ti.state; 1193 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1194 ti.state = ompt_state_wait_critical; 1195 1196 /* OMPT event callback */ 1197 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1198 if (ompt_enabled.ompt_callback_mutex_acquire) { 1199 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1200 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1201 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1202 } 1203 } 1204 #endif 1205 // Value of 'crit' should be good for using as a critical_id of the critical 1206 // section directive. 1207 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1208 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_acquired(lck); 1211 #endif /* USE_ITT_BUILD */ 1212 #if OMPT_SUPPORT && OMPT_OPTIONAL 1213 if (ompt_enabled.enabled) { 1214 /* OMPT state update */ 1215 ti.state = prev_state; 1216 ti.wait_id = 0; 1217 1218 /* OMPT event callback */ 1219 if (ompt_enabled.ompt_callback_mutex_acquired) { 1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1221 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1222 } 1223 } 1224 #endif 1225 KMP_POP_PARTITIONED_TIMER(); 1226 1227 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1228 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1229 #endif // KMP_USE_DYNAMIC_LOCK 1230 } 1231 1232 #if KMP_USE_DYNAMIC_LOCK 1233 1234 // Converts the given hint to an internal lock implementation 1235 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1236 #if KMP_USE_TSX 1237 #define KMP_TSX_LOCK(seq) lockseq_##seq 1238 #else 1239 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1240 #endif 1241 1242 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1243 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1244 #else 1245 #define KMP_CPUINFO_RTM 0 1246 #endif 1247 1248 // Hints that do not require further logic 1249 if (hint & kmp_lock_hint_hle) 1250 return KMP_TSX_LOCK(hle); 1251 if (hint & kmp_lock_hint_rtm) 1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1253 if (hint & kmp_lock_hint_adaptive) 1254 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1255 1256 // Rule out conflicting hints first by returning the default lock 1257 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1258 return __kmp_user_lock_seq; 1259 if ((hint & omp_lock_hint_speculative) && 1260 (hint & omp_lock_hint_nonspeculative)) 1261 return __kmp_user_lock_seq; 1262 1263 // Do not even consider speculation when it appears to be contended 1264 if (hint & omp_lock_hint_contended) 1265 return lockseq_queuing; 1266 1267 // Uncontended lock without speculation 1268 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1269 return lockseq_tas; 1270 1271 // HLE lock for speculation 1272 if (hint & omp_lock_hint_speculative) 1273 return KMP_TSX_LOCK(hle); 1274 1275 return __kmp_user_lock_seq; 1276 } 1277 1278 #if OMPT_SUPPORT && OMPT_OPTIONAL 1279 #if KMP_USE_DYNAMIC_LOCK 1280 static kmp_mutex_impl_t 1281 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1282 if (user_lock) { 1283 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1284 case 0: 1285 break; 1286 #if KMP_USE_FUTEX 1287 case locktag_futex: 1288 return kmp_mutex_impl_queuing; 1289 #endif 1290 case locktag_tas: 1291 return kmp_mutex_impl_spin; 1292 #if KMP_USE_TSX 1293 case locktag_hle: 1294 return kmp_mutex_impl_speculative; 1295 #endif 1296 default: 1297 return kmp_mutex_impl_none; 1298 } 1299 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1300 } 1301 KMP_ASSERT(ilock); 1302 switch (ilock->type) { 1303 #if KMP_USE_TSX 1304 case locktag_adaptive: 1305 case locktag_rtm: 1306 return kmp_mutex_impl_speculative; 1307 #endif 1308 case locktag_nested_tas: 1309 return kmp_mutex_impl_spin; 1310 #if KMP_USE_FUTEX 1311 case locktag_nested_futex: 1312 #endif 1313 case locktag_ticket: 1314 case locktag_queuing: 1315 case locktag_drdpa: 1316 case locktag_nested_ticket: 1317 case locktag_nested_queuing: 1318 case locktag_nested_drdpa: 1319 return kmp_mutex_impl_queuing; 1320 default: 1321 return kmp_mutex_impl_none; 1322 } 1323 } 1324 #else 1325 // For locks without dynamic binding 1326 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1327 switch (__kmp_user_lock_kind) { 1328 case lk_tas: 1329 return kmp_mutex_impl_spin; 1330 #if KMP_USE_FUTEX 1331 case lk_futex: 1332 #endif 1333 case lk_ticket: 1334 case lk_queuing: 1335 case lk_drdpa: 1336 return kmp_mutex_impl_queuing; 1337 #if KMP_USE_TSX 1338 case lk_hle: 1339 case lk_rtm: 1340 case lk_adaptive: 1341 return kmp_mutex_impl_speculative; 1342 #endif 1343 default: 1344 return kmp_mutex_impl_none; 1345 } 1346 } 1347 #endif // KMP_USE_DYNAMIC_LOCK 1348 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1349 1350 /*! 1351 @ingroup WORK_SHARING 1352 @param loc source location information. 1353 @param global_tid global thread number. 1354 @param crit identity of the critical section. This could be a pointer to a lock 1355 associated with the critical section, or some other suitably unique value. 1356 @param hint the lock hint. 1357 1358 Enter code protected by a `critical` construct with a hint. The hint value is 1359 used to suggest a lock implementation. This function blocks until the executing 1360 thread can enter the critical section unless the hint suggests use of 1361 speculative execution and the hardware supports it. 1362 */ 1363 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1364 kmp_critical_name *crit, uint32_t hint) { 1365 KMP_COUNT_BLOCK(OMP_CRITICAL); 1366 kmp_user_lock_p lck; 1367 #if OMPT_SUPPORT && OMPT_OPTIONAL 1368 ompt_state_t prev_state = ompt_state_undefined; 1369 ompt_thread_info_t ti; 1370 // This is the case, if called from __kmpc_critical: 1371 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1372 if (!codeptr) 1373 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1374 #endif 1375 1376 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1377 __kmp_assert_valid_gtid(global_tid); 1378 1379 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1380 // Check if it is initialized. 1381 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1382 if (*lk == 0) { 1383 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1384 if (KMP_IS_D_LOCK(lckseq)) { 1385 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1386 KMP_GET_D_TAG(lckseq)); 1387 } else { 1388 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1389 } 1390 } 1391 // Branch for accessing the actual lock object and set operation. This 1392 // branching is inevitable since this lock initialization does not follow the 1393 // normal dispatch path (lock table is not used). 1394 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1395 lck = (kmp_user_lock_p)lk; 1396 if (__kmp_env_consistency_check) { 1397 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1398 __kmp_map_hint_to_lock(hint)); 1399 } 1400 #if USE_ITT_BUILD 1401 __kmp_itt_critical_acquiring(lck); 1402 #endif 1403 #if OMPT_SUPPORT && OMPT_OPTIONAL 1404 if (ompt_enabled.enabled) { 1405 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1406 /* OMPT state update */ 1407 prev_state = ti.state; 1408 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1409 ti.state = ompt_state_wait_critical; 1410 1411 /* OMPT event callback */ 1412 if (ompt_enabled.ompt_callback_mutex_acquire) { 1413 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1414 ompt_mutex_critical, (unsigned int)hint, 1415 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1416 codeptr); 1417 } 1418 } 1419 #endif 1420 #if KMP_USE_INLINED_TAS 1421 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1422 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1423 } else 1424 #elif KMP_USE_INLINED_FUTEX 1425 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1426 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1427 } else 1428 #endif 1429 { 1430 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1431 } 1432 } else { 1433 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1434 lck = ilk->lock; 1435 if (__kmp_env_consistency_check) { 1436 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1437 __kmp_map_hint_to_lock(hint)); 1438 } 1439 #if USE_ITT_BUILD 1440 __kmp_itt_critical_acquiring(lck); 1441 #endif 1442 #if OMPT_SUPPORT && OMPT_OPTIONAL 1443 if (ompt_enabled.enabled) { 1444 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1445 /* OMPT state update */ 1446 prev_state = ti.state; 1447 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1448 ti.state = ompt_state_wait_critical; 1449 1450 /* OMPT event callback */ 1451 if (ompt_enabled.ompt_callback_mutex_acquire) { 1452 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1453 ompt_mutex_critical, (unsigned int)hint, 1454 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1455 codeptr); 1456 } 1457 } 1458 #endif 1459 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1460 } 1461 KMP_POP_PARTITIONED_TIMER(); 1462 1463 #if USE_ITT_BUILD 1464 __kmp_itt_critical_acquired(lck); 1465 #endif /* USE_ITT_BUILD */ 1466 #if OMPT_SUPPORT && OMPT_OPTIONAL 1467 if (ompt_enabled.enabled) { 1468 /* OMPT state update */ 1469 ti.state = prev_state; 1470 ti.wait_id = 0; 1471 1472 /* OMPT event callback */ 1473 if (ompt_enabled.ompt_callback_mutex_acquired) { 1474 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1475 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1476 } 1477 } 1478 #endif 1479 1480 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1481 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1482 } // __kmpc_critical_with_hint 1483 1484 #endif // KMP_USE_DYNAMIC_LOCK 1485 1486 /*! 1487 @ingroup WORK_SHARING 1488 @param loc source location information. 1489 @param global_tid global thread number . 1490 @param crit identity of the critical section. This could be a pointer to a lock 1491 associated with the critical section, or some other suitably unique value. 1492 1493 Leave a critical section, releasing any lock that was held during its execution. 1494 */ 1495 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1496 kmp_critical_name *crit) { 1497 kmp_user_lock_p lck; 1498 1499 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1500 1501 #if KMP_USE_DYNAMIC_LOCK 1502 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1503 lck = (kmp_user_lock_p)crit; 1504 KMP_ASSERT(lck != NULL); 1505 if (__kmp_env_consistency_check) { 1506 __kmp_pop_sync(global_tid, ct_critical, loc); 1507 } 1508 #if USE_ITT_BUILD 1509 __kmp_itt_critical_releasing(lck); 1510 #endif 1511 #if KMP_USE_INLINED_TAS 1512 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1513 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1514 } else 1515 #elif KMP_USE_INLINED_FUTEX 1516 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1517 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1518 } else 1519 #endif 1520 { 1521 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1522 } 1523 } else { 1524 kmp_indirect_lock_t *ilk = 1525 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1526 KMP_ASSERT(ilk != NULL); 1527 lck = ilk->lock; 1528 if (__kmp_env_consistency_check) { 1529 __kmp_pop_sync(global_tid, ct_critical, loc); 1530 } 1531 #if USE_ITT_BUILD 1532 __kmp_itt_critical_releasing(lck); 1533 #endif 1534 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1535 } 1536 1537 #else // KMP_USE_DYNAMIC_LOCK 1538 1539 if ((__kmp_user_lock_kind == lk_tas) && 1540 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1541 lck = (kmp_user_lock_p)crit; 1542 } 1543 #if KMP_USE_FUTEX 1544 else if ((__kmp_user_lock_kind == lk_futex) && 1545 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1546 lck = (kmp_user_lock_p)crit; 1547 } 1548 #endif 1549 else { // ticket, queuing or drdpa 1550 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1551 } 1552 1553 KMP_ASSERT(lck != NULL); 1554 1555 if (__kmp_env_consistency_check) 1556 __kmp_pop_sync(global_tid, ct_critical, loc); 1557 1558 #if USE_ITT_BUILD 1559 __kmp_itt_critical_releasing(lck); 1560 #endif /* USE_ITT_BUILD */ 1561 // Value of 'crit' should be good for using as a critical_id of the critical 1562 // section directive. 1563 __kmp_release_user_lock_with_checks(lck, global_tid); 1564 1565 #endif // KMP_USE_DYNAMIC_LOCK 1566 1567 #if OMPT_SUPPORT && OMPT_OPTIONAL 1568 /* OMPT release event triggers after lock is released; place here to trigger 1569 * for all #if branches */ 1570 OMPT_STORE_RETURN_ADDRESS(global_tid); 1571 if (ompt_enabled.ompt_callback_mutex_released) { 1572 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1573 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1574 OMPT_LOAD_RETURN_ADDRESS(0)); 1575 } 1576 #endif 1577 1578 KMP_POP_PARTITIONED_TIMER(); 1579 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1580 } 1581 1582 /*! 1583 @ingroup SYNCHRONIZATION 1584 @param loc source location information 1585 @param global_tid thread id. 1586 @return one if the thread should execute the master block, zero otherwise 1587 1588 Start execution of a combined barrier and master. The barrier is executed inside 1589 this function. 1590 */ 1591 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1592 int status; 1593 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1594 __kmp_assert_valid_gtid(global_tid); 1595 1596 if (!TCR_4(__kmp_init_parallel)) 1597 __kmp_parallel_initialize(); 1598 1599 __kmp_resume_if_soft_paused(); 1600 1601 if (__kmp_env_consistency_check) 1602 __kmp_check_barrier(global_tid, ct_barrier, loc); 1603 1604 #if OMPT_SUPPORT 1605 ompt_frame_t *ompt_frame; 1606 if (ompt_enabled.enabled) { 1607 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1608 if (ompt_frame->enter_frame.ptr == NULL) 1609 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1610 OMPT_STORE_RETURN_ADDRESS(global_tid); 1611 } 1612 #endif 1613 #if USE_ITT_NOTIFY 1614 __kmp_threads[global_tid]->th.th_ident = loc; 1615 #endif 1616 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1617 #if OMPT_SUPPORT && OMPT_OPTIONAL 1618 if (ompt_enabled.enabled) { 1619 ompt_frame->enter_frame = ompt_data_none; 1620 } 1621 #endif 1622 1623 return (status != 0) ? 0 : 1; 1624 } 1625 1626 /*! 1627 @ingroup SYNCHRONIZATION 1628 @param loc source location information 1629 @param global_tid thread id. 1630 1631 Complete the execution of a combined barrier and master. This function should 1632 only be called at the completion of the <tt>master</tt> code. Other threads will 1633 still be waiting at the barrier and this call releases them. 1634 */ 1635 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1636 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1637 __kmp_assert_valid_gtid(global_tid); 1638 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1639 } 1640 1641 /*! 1642 @ingroup SYNCHRONIZATION 1643 @param loc source location information 1644 @param global_tid thread id. 1645 @return one if the thread should execute the master block, zero otherwise 1646 1647 Start execution of a combined barrier and master(nowait) construct. 1648 The barrier is executed inside this function. 1649 There is no equivalent "end" function, since the 1650 */ 1651 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1652 kmp_int32 ret; 1653 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1654 __kmp_assert_valid_gtid(global_tid); 1655 1656 if (!TCR_4(__kmp_init_parallel)) 1657 __kmp_parallel_initialize(); 1658 1659 __kmp_resume_if_soft_paused(); 1660 1661 if (__kmp_env_consistency_check) { 1662 if (loc == 0) { 1663 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1664 } 1665 __kmp_check_barrier(global_tid, ct_barrier, loc); 1666 } 1667 1668 #if OMPT_SUPPORT 1669 ompt_frame_t *ompt_frame; 1670 if (ompt_enabled.enabled) { 1671 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1672 if (ompt_frame->enter_frame.ptr == NULL) 1673 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1674 OMPT_STORE_RETURN_ADDRESS(global_tid); 1675 } 1676 #endif 1677 #if USE_ITT_NOTIFY 1678 __kmp_threads[global_tid]->th.th_ident = loc; 1679 #endif 1680 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1681 #if OMPT_SUPPORT && OMPT_OPTIONAL 1682 if (ompt_enabled.enabled) { 1683 ompt_frame->enter_frame = ompt_data_none; 1684 } 1685 #endif 1686 1687 ret = __kmpc_master(loc, global_tid); 1688 1689 if (__kmp_env_consistency_check) { 1690 /* there's no __kmpc_end_master called; so the (stats) */ 1691 /* actions of __kmpc_end_master are done here */ 1692 if (ret) { 1693 /* only one thread should do the pop since only */ 1694 /* one did the push (see __kmpc_master()) */ 1695 __kmp_pop_sync(global_tid, ct_master, loc); 1696 } 1697 } 1698 1699 return (ret); 1700 } 1701 1702 /* The BARRIER for a SINGLE process section is always explicit */ 1703 /*! 1704 @ingroup WORK_SHARING 1705 @param loc source location information 1706 @param global_tid global thread number 1707 @return One if this thread should execute the single construct, zero otherwise. 1708 1709 Test whether to execute a <tt>single</tt> construct. 1710 There are no implicit barriers in the two "single" calls, rather the compiler 1711 should introduce an explicit barrier if it is required. 1712 */ 1713 1714 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1715 __kmp_assert_valid_gtid(global_tid); 1716 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1717 1718 if (rc) { 1719 // We are going to execute the single statement, so we should count it. 1720 KMP_COUNT_BLOCK(OMP_SINGLE); 1721 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1722 } 1723 1724 #if OMPT_SUPPORT && OMPT_OPTIONAL 1725 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1726 kmp_team_t *team = this_thr->th.th_team; 1727 int tid = __kmp_tid_from_gtid(global_tid); 1728 1729 if (ompt_enabled.enabled) { 1730 if (rc) { 1731 if (ompt_enabled.ompt_callback_work) { 1732 ompt_callbacks.ompt_callback(ompt_callback_work)( 1733 ompt_work_single_executor, ompt_scope_begin, 1734 &(team->t.ompt_team_info.parallel_data), 1735 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1736 1, OMPT_GET_RETURN_ADDRESS(0)); 1737 } 1738 } else { 1739 if (ompt_enabled.ompt_callback_work) { 1740 ompt_callbacks.ompt_callback(ompt_callback_work)( 1741 ompt_work_single_other, ompt_scope_begin, 1742 &(team->t.ompt_team_info.parallel_data), 1743 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1744 1, OMPT_GET_RETURN_ADDRESS(0)); 1745 ompt_callbacks.ompt_callback(ompt_callback_work)( 1746 ompt_work_single_other, ompt_scope_end, 1747 &(team->t.ompt_team_info.parallel_data), 1748 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1749 1, OMPT_GET_RETURN_ADDRESS(0)); 1750 } 1751 } 1752 } 1753 #endif 1754 1755 return rc; 1756 } 1757 1758 /*! 1759 @ingroup WORK_SHARING 1760 @param loc source location information 1761 @param global_tid global thread number 1762 1763 Mark the end of a <tt>single</tt> construct. This function should 1764 only be called by the thread that executed the block of code protected 1765 by the `single` construct. 1766 */ 1767 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1768 __kmp_assert_valid_gtid(global_tid); 1769 __kmp_exit_single(global_tid); 1770 KMP_POP_PARTITIONED_TIMER(); 1771 1772 #if OMPT_SUPPORT && OMPT_OPTIONAL 1773 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1774 kmp_team_t *team = this_thr->th.th_team; 1775 int tid = __kmp_tid_from_gtid(global_tid); 1776 1777 if (ompt_enabled.ompt_callback_work) { 1778 ompt_callbacks.ompt_callback(ompt_callback_work)( 1779 ompt_work_single_executor, ompt_scope_end, 1780 &(team->t.ompt_team_info.parallel_data), 1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1782 OMPT_GET_RETURN_ADDRESS(0)); 1783 } 1784 #endif 1785 } 1786 1787 /*! 1788 @ingroup WORK_SHARING 1789 @param loc Source location 1790 @param global_tid Global thread id 1791 1792 Mark the end of a statically scheduled loop. 1793 */ 1794 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1795 KMP_POP_PARTITIONED_TIMER(); 1796 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1797 1798 #if OMPT_SUPPORT && OMPT_OPTIONAL 1799 if (ompt_enabled.ompt_callback_work) { 1800 ompt_work_t ompt_work_type = ompt_work_loop; 1801 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1802 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1803 // Determine workshare type 1804 if (loc != NULL) { 1805 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1806 ompt_work_type = ompt_work_loop; 1807 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1808 ompt_work_type = ompt_work_sections; 1809 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1810 ompt_work_type = ompt_work_distribute; 1811 } else { 1812 // use default set above. 1813 // a warning about this case is provided in __kmpc_for_static_init 1814 } 1815 KMP_DEBUG_ASSERT(ompt_work_type); 1816 } 1817 ompt_callbacks.ompt_callback(ompt_callback_work)( 1818 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1819 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1820 } 1821 #endif 1822 if (__kmp_env_consistency_check) 1823 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1824 } 1825 1826 // User routines which take C-style arguments (call by value) 1827 // different from the Fortran equivalent routines 1828 1829 void ompc_set_num_threads(int arg) { 1830 // !!!!! TODO: check the per-task binding 1831 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1832 } 1833 1834 void ompc_set_dynamic(int flag) { 1835 kmp_info_t *thread; 1836 1837 /* For the thread-private implementation of the internal controls */ 1838 thread = __kmp_entry_thread(); 1839 1840 __kmp_save_internal_controls(thread); 1841 1842 set__dynamic(thread, flag ? TRUE : FALSE); 1843 } 1844 1845 void ompc_set_nested(int flag) { 1846 kmp_info_t *thread; 1847 1848 /* For the thread-private internal controls implementation */ 1849 thread = __kmp_entry_thread(); 1850 1851 __kmp_save_internal_controls(thread); 1852 1853 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1854 } 1855 1856 void ompc_set_max_active_levels(int max_active_levels) { 1857 /* TO DO */ 1858 /* we want per-task implementation of this internal control */ 1859 1860 /* For the per-thread internal controls implementation */ 1861 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1862 } 1863 1864 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1865 // !!!!! TODO: check the per-task binding 1866 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1867 } 1868 1869 int ompc_get_ancestor_thread_num(int level) { 1870 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1871 } 1872 1873 int ompc_get_team_size(int level) { 1874 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1875 } 1876 1877 /* OpenMP 5.0 Affinity Format API */ 1878 1879 void ompc_set_affinity_format(char const *format) { 1880 if (!__kmp_init_serial) { 1881 __kmp_serial_initialize(); 1882 } 1883 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1884 format, KMP_STRLEN(format) + 1); 1885 } 1886 1887 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1888 size_t format_size; 1889 if (!__kmp_init_serial) { 1890 __kmp_serial_initialize(); 1891 } 1892 format_size = KMP_STRLEN(__kmp_affinity_format); 1893 if (buffer && size) { 1894 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1895 format_size + 1); 1896 } 1897 return format_size; 1898 } 1899 1900 void ompc_display_affinity(char const *format) { 1901 int gtid; 1902 if (!TCR_4(__kmp_init_middle)) { 1903 __kmp_middle_initialize(); 1904 } 1905 gtid = __kmp_get_gtid(); 1906 __kmp_aux_display_affinity(gtid, format); 1907 } 1908 1909 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1910 char const *format) { 1911 int gtid; 1912 size_t num_required; 1913 kmp_str_buf_t capture_buf; 1914 if (!TCR_4(__kmp_init_middle)) { 1915 __kmp_middle_initialize(); 1916 } 1917 gtid = __kmp_get_gtid(); 1918 __kmp_str_buf_init(&capture_buf); 1919 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1920 if (buffer && buf_size) { 1921 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1922 capture_buf.used + 1); 1923 } 1924 __kmp_str_buf_free(&capture_buf); 1925 return num_required; 1926 } 1927 1928 void kmpc_set_stacksize(int arg) { 1929 // __kmp_aux_set_stacksize initializes the library if needed 1930 __kmp_aux_set_stacksize(arg); 1931 } 1932 1933 void kmpc_set_stacksize_s(size_t arg) { 1934 // __kmp_aux_set_stacksize initializes the library if needed 1935 __kmp_aux_set_stacksize(arg); 1936 } 1937 1938 void kmpc_set_blocktime(int arg) { 1939 int gtid, tid; 1940 kmp_info_t *thread; 1941 1942 gtid = __kmp_entry_gtid(); 1943 tid = __kmp_tid_from_gtid(gtid); 1944 thread = __kmp_thread_from_gtid(gtid); 1945 1946 __kmp_aux_set_blocktime(arg, thread, tid); 1947 } 1948 1949 void kmpc_set_library(int arg) { 1950 // __kmp_user_set_library initializes the library if needed 1951 __kmp_user_set_library((enum library_type)arg); 1952 } 1953 1954 void kmpc_set_defaults(char const *str) { 1955 // __kmp_aux_set_defaults initializes the library if needed 1956 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1957 } 1958 1959 void kmpc_set_disp_num_buffers(int arg) { 1960 // ignore after initialization because some teams have already 1961 // allocated dispatch buffers 1962 if (__kmp_init_serial == 0 && arg > 0) 1963 __kmp_dispatch_num_buffers = arg; 1964 } 1965 1966 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1967 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1968 return -1; 1969 #else 1970 if (!TCR_4(__kmp_init_middle)) { 1971 __kmp_middle_initialize(); 1972 } 1973 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1974 #endif 1975 } 1976 1977 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1978 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1979 return -1; 1980 #else 1981 if (!TCR_4(__kmp_init_middle)) { 1982 __kmp_middle_initialize(); 1983 } 1984 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 1985 #endif 1986 } 1987 1988 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 1989 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1990 return -1; 1991 #else 1992 if (!TCR_4(__kmp_init_middle)) { 1993 __kmp_middle_initialize(); 1994 } 1995 return __kmp_aux_get_affinity_mask_proc(proc, mask); 1996 #endif 1997 } 1998 1999 /* -------------------------------------------------------------------------- */ 2000 /*! 2001 @ingroup THREADPRIVATE 2002 @param loc source location information 2003 @param gtid global thread number 2004 @param cpy_size size of the cpy_data buffer 2005 @param cpy_data pointer to data to be copied 2006 @param cpy_func helper function to call for copying data 2007 @param didit flag variable: 1=single thread; 0=not single thread 2008 2009 __kmpc_copyprivate implements the interface for the private data broadcast 2010 needed for the copyprivate clause associated with a single region in an 2011 OpenMP<sup>*</sup> program (both C and Fortran). 2012 All threads participating in the parallel region call this routine. 2013 One of the threads (called the single thread) should have the <tt>didit</tt> 2014 variable set to 1 and all other threads should have that variable set to 0. 2015 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2016 2017 The OpenMP specification forbids the use of nowait on the single region when a 2018 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2019 barrier internally to avoid race conditions, so the code generation for the 2020 single region should avoid generating a barrier after the call to @ref 2021 __kmpc_copyprivate. 2022 2023 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2024 The <tt>loc</tt> parameter is a pointer to source location information. 2025 2026 Internal implementation: The single thread will first copy its descriptor 2027 address (cpy_data) to a team-private location, then the other threads will each 2028 call the function pointed to by the parameter cpy_func, which carries out the 2029 copy by copying the data using the cpy_data buffer. 2030 2031 The cpy_func routine used for the copy and the contents of the data area defined 2032 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2033 to be done. For instance, the cpy_data buffer can hold the actual data to be 2034 copied or it may hold a list of pointers to the data. The cpy_func routine must 2035 interpret the cpy_data buffer appropriately. 2036 2037 The interface to cpy_func is as follows: 2038 @code 2039 void cpy_func( void *destination, void *source ) 2040 @endcode 2041 where void *destination is the cpy_data pointer for the thread being copied to 2042 and void *source is the cpy_data pointer for the thread being copied from. 2043 */ 2044 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2045 void *cpy_data, void (*cpy_func)(void *, void *), 2046 kmp_int32 didit) { 2047 void **data_ptr; 2048 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2049 __kmp_assert_valid_gtid(gtid); 2050 2051 KMP_MB(); 2052 2053 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2054 2055 if (__kmp_env_consistency_check) { 2056 if (loc == 0) { 2057 KMP_WARNING(ConstructIdentInvalid); 2058 } 2059 } 2060 2061 // ToDo: Optimize the following two barriers into some kind of split barrier 2062 2063 if (didit) 2064 *data_ptr = cpy_data; 2065 2066 #if OMPT_SUPPORT 2067 ompt_frame_t *ompt_frame; 2068 if (ompt_enabled.enabled) { 2069 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2070 if (ompt_frame->enter_frame.ptr == NULL) 2071 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2072 OMPT_STORE_RETURN_ADDRESS(gtid); 2073 } 2074 #endif 2075 /* This barrier is not a barrier region boundary */ 2076 #if USE_ITT_NOTIFY 2077 __kmp_threads[gtid]->th.th_ident = loc; 2078 #endif 2079 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2080 2081 if (!didit) 2082 (*cpy_func)(cpy_data, *data_ptr); 2083 2084 // Consider next barrier a user-visible barrier for barrier region boundaries 2085 // Nesting checks are already handled by the single construct checks 2086 2087 #if OMPT_SUPPORT 2088 if (ompt_enabled.enabled) { 2089 OMPT_STORE_RETURN_ADDRESS(gtid); 2090 } 2091 #endif 2092 #if USE_ITT_NOTIFY 2093 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2094 // tasks can overwrite the location) 2095 #endif 2096 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2097 #if OMPT_SUPPORT && OMPT_OPTIONAL 2098 if (ompt_enabled.enabled) { 2099 ompt_frame->enter_frame = ompt_data_none; 2100 } 2101 #endif 2102 } 2103 2104 /* -------------------------------------------------------------------------- */ 2105 2106 #define INIT_LOCK __kmp_init_user_lock_with_checks 2107 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2108 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2109 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2110 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2111 #define ACQUIRE_NESTED_LOCK_TIMED \ 2112 __kmp_acquire_nested_user_lock_with_checks_timed 2113 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2114 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2115 #define TEST_LOCK __kmp_test_user_lock_with_checks 2116 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2117 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2118 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2119 2120 // TODO: Make check abort messages use location info & pass it into 2121 // with_checks routines 2122 2123 #if KMP_USE_DYNAMIC_LOCK 2124 2125 // internal lock initializer 2126 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2127 kmp_dyna_lockseq_t seq) { 2128 if (KMP_IS_D_LOCK(seq)) { 2129 KMP_INIT_D_LOCK(lock, seq); 2130 #if USE_ITT_BUILD 2131 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2132 #endif 2133 } else { 2134 KMP_INIT_I_LOCK(lock, seq); 2135 #if USE_ITT_BUILD 2136 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2137 __kmp_itt_lock_creating(ilk->lock, loc); 2138 #endif 2139 } 2140 } 2141 2142 // internal nest lock initializer 2143 static __forceinline void 2144 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2145 kmp_dyna_lockseq_t seq) { 2146 #if KMP_USE_TSX 2147 // Don't have nested lock implementation for speculative locks 2148 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2149 seq = __kmp_user_lock_seq; 2150 #endif 2151 switch (seq) { 2152 case lockseq_tas: 2153 seq = lockseq_nested_tas; 2154 break; 2155 #if KMP_USE_FUTEX 2156 case lockseq_futex: 2157 seq = lockseq_nested_futex; 2158 break; 2159 #endif 2160 case lockseq_ticket: 2161 seq = lockseq_nested_ticket; 2162 break; 2163 case lockseq_queuing: 2164 seq = lockseq_nested_queuing; 2165 break; 2166 case lockseq_drdpa: 2167 seq = lockseq_nested_drdpa; 2168 break; 2169 default: 2170 seq = lockseq_nested_queuing; 2171 } 2172 KMP_INIT_I_LOCK(lock, seq); 2173 #if USE_ITT_BUILD 2174 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2175 __kmp_itt_lock_creating(ilk->lock, loc); 2176 #endif 2177 } 2178 2179 /* initialize the lock with a hint */ 2180 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2181 uintptr_t hint) { 2182 KMP_DEBUG_ASSERT(__kmp_init_serial); 2183 if (__kmp_env_consistency_check && user_lock == NULL) { 2184 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2185 } 2186 2187 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2188 2189 #if OMPT_SUPPORT && OMPT_OPTIONAL 2190 // This is the case, if called from omp_init_lock_with_hint: 2191 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2192 if (!codeptr) 2193 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2194 if (ompt_enabled.ompt_callback_lock_init) { 2195 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2196 ompt_mutex_lock, (omp_lock_hint_t)hint, 2197 __ompt_get_mutex_impl_type(user_lock), 2198 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2199 } 2200 #endif 2201 } 2202 2203 /* initialize the lock with a hint */ 2204 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2205 void **user_lock, uintptr_t hint) { 2206 KMP_DEBUG_ASSERT(__kmp_init_serial); 2207 if (__kmp_env_consistency_check && user_lock == NULL) { 2208 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2209 } 2210 2211 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2212 2213 #if OMPT_SUPPORT && OMPT_OPTIONAL 2214 // This is the case, if called from omp_init_lock_with_hint: 2215 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2216 if (!codeptr) 2217 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2218 if (ompt_enabled.ompt_callback_lock_init) { 2219 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2220 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2221 __ompt_get_mutex_impl_type(user_lock), 2222 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2223 } 2224 #endif 2225 } 2226 2227 #endif // KMP_USE_DYNAMIC_LOCK 2228 2229 /* initialize the lock */ 2230 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2231 #if KMP_USE_DYNAMIC_LOCK 2232 2233 KMP_DEBUG_ASSERT(__kmp_init_serial); 2234 if (__kmp_env_consistency_check && user_lock == NULL) { 2235 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2236 } 2237 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2238 2239 #if OMPT_SUPPORT && OMPT_OPTIONAL 2240 // This is the case, if called from omp_init_lock_with_hint: 2241 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2242 if (!codeptr) 2243 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2244 if (ompt_enabled.ompt_callback_lock_init) { 2245 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2246 ompt_mutex_lock, omp_lock_hint_none, 2247 __ompt_get_mutex_impl_type(user_lock), 2248 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2249 } 2250 #endif 2251 2252 #else // KMP_USE_DYNAMIC_LOCK 2253 2254 static char const *const func = "omp_init_lock"; 2255 kmp_user_lock_p lck; 2256 KMP_DEBUG_ASSERT(__kmp_init_serial); 2257 2258 if (__kmp_env_consistency_check) { 2259 if (user_lock == NULL) { 2260 KMP_FATAL(LockIsUninitialized, func); 2261 } 2262 } 2263 2264 KMP_CHECK_USER_LOCK_INIT(); 2265 2266 if ((__kmp_user_lock_kind == lk_tas) && 2267 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2268 lck = (kmp_user_lock_p)user_lock; 2269 } 2270 #if KMP_USE_FUTEX 2271 else if ((__kmp_user_lock_kind == lk_futex) && 2272 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2273 lck = (kmp_user_lock_p)user_lock; 2274 } 2275 #endif 2276 else { 2277 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2278 } 2279 INIT_LOCK(lck); 2280 __kmp_set_user_lock_location(lck, loc); 2281 2282 #if OMPT_SUPPORT && OMPT_OPTIONAL 2283 // This is the case, if called from omp_init_lock_with_hint: 2284 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2285 if (!codeptr) 2286 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2287 if (ompt_enabled.ompt_callback_lock_init) { 2288 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2289 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2290 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2291 } 2292 #endif 2293 2294 #if USE_ITT_BUILD 2295 __kmp_itt_lock_creating(lck); 2296 #endif /* USE_ITT_BUILD */ 2297 2298 #endif // KMP_USE_DYNAMIC_LOCK 2299 } // __kmpc_init_lock 2300 2301 /* initialize the lock */ 2302 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2303 #if KMP_USE_DYNAMIC_LOCK 2304 2305 KMP_DEBUG_ASSERT(__kmp_init_serial); 2306 if (__kmp_env_consistency_check && user_lock == NULL) { 2307 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2308 } 2309 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2310 2311 #if OMPT_SUPPORT && OMPT_OPTIONAL 2312 // This is the case, if called from omp_init_lock_with_hint: 2313 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2314 if (!codeptr) 2315 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2316 if (ompt_enabled.ompt_callback_lock_init) { 2317 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2318 ompt_mutex_nest_lock, omp_lock_hint_none, 2319 __ompt_get_mutex_impl_type(user_lock), 2320 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2321 } 2322 #endif 2323 2324 #else // KMP_USE_DYNAMIC_LOCK 2325 2326 static char const *const func = "omp_init_nest_lock"; 2327 kmp_user_lock_p lck; 2328 KMP_DEBUG_ASSERT(__kmp_init_serial); 2329 2330 if (__kmp_env_consistency_check) { 2331 if (user_lock == NULL) { 2332 KMP_FATAL(LockIsUninitialized, func); 2333 } 2334 } 2335 2336 KMP_CHECK_USER_LOCK_INIT(); 2337 2338 if ((__kmp_user_lock_kind == lk_tas) && 2339 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2340 OMP_NEST_LOCK_T_SIZE)) { 2341 lck = (kmp_user_lock_p)user_lock; 2342 } 2343 #if KMP_USE_FUTEX 2344 else if ((__kmp_user_lock_kind == lk_futex) && 2345 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2346 OMP_NEST_LOCK_T_SIZE)) { 2347 lck = (kmp_user_lock_p)user_lock; 2348 } 2349 #endif 2350 else { 2351 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2352 } 2353 2354 INIT_NESTED_LOCK(lck); 2355 __kmp_set_user_lock_location(lck, loc); 2356 2357 #if OMPT_SUPPORT && OMPT_OPTIONAL 2358 // This is the case, if called from omp_init_lock_with_hint: 2359 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2360 if (!codeptr) 2361 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2362 if (ompt_enabled.ompt_callback_lock_init) { 2363 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2364 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2365 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2366 } 2367 #endif 2368 2369 #if USE_ITT_BUILD 2370 __kmp_itt_lock_creating(lck); 2371 #endif /* USE_ITT_BUILD */ 2372 2373 #endif // KMP_USE_DYNAMIC_LOCK 2374 } // __kmpc_init_nest_lock 2375 2376 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2377 #if KMP_USE_DYNAMIC_LOCK 2378 2379 #if USE_ITT_BUILD 2380 kmp_user_lock_p lck; 2381 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2382 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2383 } else { 2384 lck = (kmp_user_lock_p)user_lock; 2385 } 2386 __kmp_itt_lock_destroyed(lck); 2387 #endif 2388 #if OMPT_SUPPORT && OMPT_OPTIONAL 2389 // This is the case, if called from omp_init_lock_with_hint: 2390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2391 if (!codeptr) 2392 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2393 if (ompt_enabled.ompt_callback_lock_destroy) { 2394 kmp_user_lock_p lck; 2395 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2396 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2397 } else { 2398 lck = (kmp_user_lock_p)user_lock; 2399 } 2400 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2401 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2402 } 2403 #endif 2404 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2405 #else 2406 kmp_user_lock_p lck; 2407 2408 if ((__kmp_user_lock_kind == lk_tas) && 2409 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2410 lck = (kmp_user_lock_p)user_lock; 2411 } 2412 #if KMP_USE_FUTEX 2413 else if ((__kmp_user_lock_kind == lk_futex) && 2414 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2415 lck = (kmp_user_lock_p)user_lock; 2416 } 2417 #endif 2418 else { 2419 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2420 } 2421 2422 #if OMPT_SUPPORT && OMPT_OPTIONAL 2423 // This is the case, if called from omp_init_lock_with_hint: 2424 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2425 if (!codeptr) 2426 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2427 if (ompt_enabled.ompt_callback_lock_destroy) { 2428 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2429 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2430 } 2431 #endif 2432 2433 #if USE_ITT_BUILD 2434 __kmp_itt_lock_destroyed(lck); 2435 #endif /* USE_ITT_BUILD */ 2436 DESTROY_LOCK(lck); 2437 2438 if ((__kmp_user_lock_kind == lk_tas) && 2439 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2440 ; 2441 } 2442 #if KMP_USE_FUTEX 2443 else if ((__kmp_user_lock_kind == lk_futex) && 2444 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2445 ; 2446 } 2447 #endif 2448 else { 2449 __kmp_user_lock_free(user_lock, gtid, lck); 2450 } 2451 #endif // KMP_USE_DYNAMIC_LOCK 2452 } // __kmpc_destroy_lock 2453 2454 /* destroy the lock */ 2455 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2456 #if KMP_USE_DYNAMIC_LOCK 2457 2458 #if USE_ITT_BUILD 2459 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2460 __kmp_itt_lock_destroyed(ilk->lock); 2461 #endif 2462 #if OMPT_SUPPORT && OMPT_OPTIONAL 2463 // This is the case, if called from omp_init_lock_with_hint: 2464 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2465 if (!codeptr) 2466 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2467 if (ompt_enabled.ompt_callback_lock_destroy) { 2468 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2469 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2470 } 2471 #endif 2472 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2473 2474 #else // KMP_USE_DYNAMIC_LOCK 2475 2476 kmp_user_lock_p lck; 2477 2478 if ((__kmp_user_lock_kind == lk_tas) && 2479 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2480 OMP_NEST_LOCK_T_SIZE)) { 2481 lck = (kmp_user_lock_p)user_lock; 2482 } 2483 #if KMP_USE_FUTEX 2484 else if ((__kmp_user_lock_kind == lk_futex) && 2485 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2486 OMP_NEST_LOCK_T_SIZE)) { 2487 lck = (kmp_user_lock_p)user_lock; 2488 } 2489 #endif 2490 else { 2491 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2492 } 2493 2494 #if OMPT_SUPPORT && OMPT_OPTIONAL 2495 // This is the case, if called from omp_init_lock_with_hint: 2496 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2497 if (!codeptr) 2498 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2499 if (ompt_enabled.ompt_callback_lock_destroy) { 2500 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2501 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2502 } 2503 #endif 2504 2505 #if USE_ITT_BUILD 2506 __kmp_itt_lock_destroyed(lck); 2507 #endif /* USE_ITT_BUILD */ 2508 2509 DESTROY_NESTED_LOCK(lck); 2510 2511 if ((__kmp_user_lock_kind == lk_tas) && 2512 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2513 OMP_NEST_LOCK_T_SIZE)) { 2514 ; 2515 } 2516 #if KMP_USE_FUTEX 2517 else if ((__kmp_user_lock_kind == lk_futex) && 2518 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2519 OMP_NEST_LOCK_T_SIZE)) { 2520 ; 2521 } 2522 #endif 2523 else { 2524 __kmp_user_lock_free(user_lock, gtid, lck); 2525 } 2526 #endif // KMP_USE_DYNAMIC_LOCK 2527 } // __kmpc_destroy_nest_lock 2528 2529 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2530 KMP_COUNT_BLOCK(OMP_set_lock); 2531 #if KMP_USE_DYNAMIC_LOCK 2532 int tag = KMP_EXTRACT_D_TAG(user_lock); 2533 #if USE_ITT_BUILD 2534 __kmp_itt_lock_acquiring( 2535 (kmp_user_lock_p) 2536 user_lock); // itt function will get to the right lock object. 2537 #endif 2538 #if OMPT_SUPPORT && OMPT_OPTIONAL 2539 // This is the case, if called from omp_init_lock_with_hint: 2540 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2541 if (!codeptr) 2542 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2543 if (ompt_enabled.ompt_callback_mutex_acquire) { 2544 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2545 ompt_mutex_lock, omp_lock_hint_none, 2546 __ompt_get_mutex_impl_type(user_lock), 2547 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2548 } 2549 #endif 2550 #if KMP_USE_INLINED_TAS 2551 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2552 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2553 } else 2554 #elif KMP_USE_INLINED_FUTEX 2555 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2556 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2557 } else 2558 #endif 2559 { 2560 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2561 } 2562 #if USE_ITT_BUILD 2563 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2564 #endif 2565 #if OMPT_SUPPORT && OMPT_OPTIONAL 2566 if (ompt_enabled.ompt_callback_mutex_acquired) { 2567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2568 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2569 } 2570 #endif 2571 2572 #else // KMP_USE_DYNAMIC_LOCK 2573 2574 kmp_user_lock_p lck; 2575 2576 if ((__kmp_user_lock_kind == lk_tas) && 2577 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2578 lck = (kmp_user_lock_p)user_lock; 2579 } 2580 #if KMP_USE_FUTEX 2581 else if ((__kmp_user_lock_kind == lk_futex) && 2582 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2583 lck = (kmp_user_lock_p)user_lock; 2584 } 2585 #endif 2586 else { 2587 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2588 } 2589 2590 #if USE_ITT_BUILD 2591 __kmp_itt_lock_acquiring(lck); 2592 #endif /* USE_ITT_BUILD */ 2593 #if OMPT_SUPPORT && OMPT_OPTIONAL 2594 // This is the case, if called from omp_init_lock_with_hint: 2595 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2596 if (!codeptr) 2597 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2598 if (ompt_enabled.ompt_callback_mutex_acquire) { 2599 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2600 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2601 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2602 } 2603 #endif 2604 2605 ACQUIRE_LOCK(lck, gtid); 2606 2607 #if USE_ITT_BUILD 2608 __kmp_itt_lock_acquired(lck); 2609 #endif /* USE_ITT_BUILD */ 2610 2611 #if OMPT_SUPPORT && OMPT_OPTIONAL 2612 if (ompt_enabled.ompt_callback_mutex_acquired) { 2613 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2614 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2615 } 2616 #endif 2617 2618 #endif // KMP_USE_DYNAMIC_LOCK 2619 } 2620 2621 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2622 #if KMP_USE_DYNAMIC_LOCK 2623 2624 #if USE_ITT_BUILD 2625 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2626 #endif 2627 #if OMPT_SUPPORT && OMPT_OPTIONAL 2628 // This is the case, if called from omp_init_lock_with_hint: 2629 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2630 if (!codeptr) 2631 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2632 if (ompt_enabled.enabled) { 2633 if (ompt_enabled.ompt_callback_mutex_acquire) { 2634 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2635 ompt_mutex_nest_lock, omp_lock_hint_none, 2636 __ompt_get_mutex_impl_type(user_lock), 2637 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2638 } 2639 } 2640 #endif 2641 int acquire_status = 2642 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2643 (void) acquire_status; 2644 #if USE_ITT_BUILD 2645 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2646 #endif 2647 2648 #if OMPT_SUPPORT && OMPT_OPTIONAL 2649 if (ompt_enabled.enabled) { 2650 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2651 if (ompt_enabled.ompt_callback_mutex_acquired) { 2652 // lock_first 2653 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2654 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2655 codeptr); 2656 } 2657 } else { 2658 if (ompt_enabled.ompt_callback_nest_lock) { 2659 // lock_next 2660 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2661 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2662 } 2663 } 2664 } 2665 #endif 2666 2667 #else // KMP_USE_DYNAMIC_LOCK 2668 int acquire_status; 2669 kmp_user_lock_p lck; 2670 2671 if ((__kmp_user_lock_kind == lk_tas) && 2672 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2673 OMP_NEST_LOCK_T_SIZE)) { 2674 lck = (kmp_user_lock_p)user_lock; 2675 } 2676 #if KMP_USE_FUTEX 2677 else if ((__kmp_user_lock_kind == lk_futex) && 2678 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2679 OMP_NEST_LOCK_T_SIZE)) { 2680 lck = (kmp_user_lock_p)user_lock; 2681 } 2682 #endif 2683 else { 2684 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2685 } 2686 2687 #if USE_ITT_BUILD 2688 __kmp_itt_lock_acquiring(lck); 2689 #endif /* USE_ITT_BUILD */ 2690 #if OMPT_SUPPORT && OMPT_OPTIONAL 2691 // This is the case, if called from omp_init_lock_with_hint: 2692 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2693 if (!codeptr) 2694 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2695 if (ompt_enabled.enabled) { 2696 if (ompt_enabled.ompt_callback_mutex_acquire) { 2697 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2698 ompt_mutex_nest_lock, omp_lock_hint_none, 2699 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2700 codeptr); 2701 } 2702 } 2703 #endif 2704 2705 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2706 2707 #if USE_ITT_BUILD 2708 __kmp_itt_lock_acquired(lck); 2709 #endif /* USE_ITT_BUILD */ 2710 2711 #if OMPT_SUPPORT && OMPT_OPTIONAL 2712 if (ompt_enabled.enabled) { 2713 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2714 if (ompt_enabled.ompt_callback_mutex_acquired) { 2715 // lock_first 2716 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2717 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2718 } 2719 } else { 2720 if (ompt_enabled.ompt_callback_nest_lock) { 2721 // lock_next 2722 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2723 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2724 } 2725 } 2726 } 2727 #endif 2728 2729 #endif // KMP_USE_DYNAMIC_LOCK 2730 } 2731 2732 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2733 #if KMP_USE_DYNAMIC_LOCK 2734 2735 int tag = KMP_EXTRACT_D_TAG(user_lock); 2736 #if USE_ITT_BUILD 2737 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2738 #endif 2739 #if KMP_USE_INLINED_TAS 2740 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2741 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2742 } else 2743 #elif KMP_USE_INLINED_FUTEX 2744 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2745 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2746 } else 2747 #endif 2748 { 2749 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2750 } 2751 2752 #if OMPT_SUPPORT && OMPT_OPTIONAL 2753 // This is the case, if called from omp_init_lock_with_hint: 2754 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2755 if (!codeptr) 2756 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2757 if (ompt_enabled.ompt_callback_mutex_released) { 2758 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2759 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2760 } 2761 #endif 2762 2763 #else // KMP_USE_DYNAMIC_LOCK 2764 2765 kmp_user_lock_p lck; 2766 2767 /* Can't use serial interval since not block structured */ 2768 /* release the lock */ 2769 2770 if ((__kmp_user_lock_kind == lk_tas) && 2771 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2772 #if KMP_OS_LINUX && \ 2773 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2774 // "fast" path implemented to fix customer performance issue 2775 #if USE_ITT_BUILD 2776 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2777 #endif /* USE_ITT_BUILD */ 2778 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2779 KMP_MB(); 2780 2781 #if OMPT_SUPPORT && OMPT_OPTIONAL 2782 // This is the case, if called from omp_init_lock_with_hint: 2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2784 if (!codeptr) 2785 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2786 if (ompt_enabled.ompt_callback_mutex_released) { 2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2788 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2789 } 2790 #endif 2791 2792 return; 2793 #else 2794 lck = (kmp_user_lock_p)user_lock; 2795 #endif 2796 } 2797 #if KMP_USE_FUTEX 2798 else if ((__kmp_user_lock_kind == lk_futex) && 2799 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2800 lck = (kmp_user_lock_p)user_lock; 2801 } 2802 #endif 2803 else { 2804 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2805 } 2806 2807 #if USE_ITT_BUILD 2808 __kmp_itt_lock_releasing(lck); 2809 #endif /* USE_ITT_BUILD */ 2810 2811 RELEASE_LOCK(lck, gtid); 2812 2813 #if OMPT_SUPPORT && OMPT_OPTIONAL 2814 // This is the case, if called from omp_init_lock_with_hint: 2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2816 if (!codeptr) 2817 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2818 if (ompt_enabled.ompt_callback_mutex_released) { 2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2820 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2821 } 2822 #endif 2823 2824 #endif // KMP_USE_DYNAMIC_LOCK 2825 } 2826 2827 /* release the lock */ 2828 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2829 #if KMP_USE_DYNAMIC_LOCK 2830 2831 #if USE_ITT_BUILD 2832 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2833 #endif 2834 int release_status = 2835 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2836 (void) release_status; 2837 2838 #if OMPT_SUPPORT && OMPT_OPTIONAL 2839 // This is the case, if called from omp_init_lock_with_hint: 2840 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2841 if (!codeptr) 2842 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2843 if (ompt_enabled.enabled) { 2844 if (release_status == KMP_LOCK_RELEASED) { 2845 if (ompt_enabled.ompt_callback_mutex_released) { 2846 // release_lock_last 2847 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2848 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2849 codeptr); 2850 } 2851 } else if (ompt_enabled.ompt_callback_nest_lock) { 2852 // release_lock_prev 2853 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2854 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2855 } 2856 } 2857 #endif 2858 2859 #else // KMP_USE_DYNAMIC_LOCK 2860 2861 kmp_user_lock_p lck; 2862 2863 /* Can't use serial interval since not block structured */ 2864 2865 if ((__kmp_user_lock_kind == lk_tas) && 2866 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2867 OMP_NEST_LOCK_T_SIZE)) { 2868 #if KMP_OS_LINUX && \ 2869 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2870 // "fast" path implemented to fix customer performance issue 2871 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2872 #if USE_ITT_BUILD 2873 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2874 #endif /* USE_ITT_BUILD */ 2875 2876 #if OMPT_SUPPORT && OMPT_OPTIONAL 2877 int release_status = KMP_LOCK_STILL_HELD; 2878 #endif 2879 2880 if (--(tl->lk.depth_locked) == 0) { 2881 TCW_4(tl->lk.poll, 0); 2882 #if OMPT_SUPPORT && OMPT_OPTIONAL 2883 release_status = KMP_LOCK_RELEASED; 2884 #endif 2885 } 2886 KMP_MB(); 2887 2888 #if OMPT_SUPPORT && OMPT_OPTIONAL 2889 // This is the case, if called from omp_init_lock_with_hint: 2890 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2891 if (!codeptr) 2892 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2893 if (ompt_enabled.enabled) { 2894 if (release_status == KMP_LOCK_RELEASED) { 2895 if (ompt_enabled.ompt_callback_mutex_released) { 2896 // release_lock_last 2897 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2898 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2899 } 2900 } else if (ompt_enabled.ompt_callback_nest_lock) { 2901 // release_lock_previous 2902 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2903 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2904 } 2905 } 2906 #endif 2907 2908 return; 2909 #else 2910 lck = (kmp_user_lock_p)user_lock; 2911 #endif 2912 } 2913 #if KMP_USE_FUTEX 2914 else if ((__kmp_user_lock_kind == lk_futex) && 2915 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2916 OMP_NEST_LOCK_T_SIZE)) { 2917 lck = (kmp_user_lock_p)user_lock; 2918 } 2919 #endif 2920 else { 2921 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2922 } 2923 2924 #if USE_ITT_BUILD 2925 __kmp_itt_lock_releasing(lck); 2926 #endif /* USE_ITT_BUILD */ 2927 2928 int release_status; 2929 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2930 #if OMPT_SUPPORT && OMPT_OPTIONAL 2931 // This is the case, if called from omp_init_lock_with_hint: 2932 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2933 if (!codeptr) 2934 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2935 if (ompt_enabled.enabled) { 2936 if (release_status == KMP_LOCK_RELEASED) { 2937 if (ompt_enabled.ompt_callback_mutex_released) { 2938 // release_lock_last 2939 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2940 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2941 } 2942 } else if (ompt_enabled.ompt_callback_nest_lock) { 2943 // release_lock_previous 2944 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2945 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2946 } 2947 } 2948 #endif 2949 2950 #endif // KMP_USE_DYNAMIC_LOCK 2951 } 2952 2953 /* try to acquire the lock */ 2954 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2955 KMP_COUNT_BLOCK(OMP_test_lock); 2956 2957 #if KMP_USE_DYNAMIC_LOCK 2958 int rc; 2959 int tag = KMP_EXTRACT_D_TAG(user_lock); 2960 #if USE_ITT_BUILD 2961 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2962 #endif 2963 #if OMPT_SUPPORT && OMPT_OPTIONAL 2964 // This is the case, if called from omp_init_lock_with_hint: 2965 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2966 if (!codeptr) 2967 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2968 if (ompt_enabled.ompt_callback_mutex_acquire) { 2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2970 ompt_mutex_lock, omp_lock_hint_none, 2971 __ompt_get_mutex_impl_type(user_lock), 2972 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2973 } 2974 #endif 2975 #if KMP_USE_INLINED_TAS 2976 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2977 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2978 } else 2979 #elif KMP_USE_INLINED_FUTEX 2980 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2981 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 2982 } else 2983 #endif 2984 { 2985 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2986 } 2987 if (rc) { 2988 #if USE_ITT_BUILD 2989 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2990 #endif 2991 #if OMPT_SUPPORT && OMPT_OPTIONAL 2992 if (ompt_enabled.ompt_callback_mutex_acquired) { 2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2994 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2995 } 2996 #endif 2997 return FTN_TRUE; 2998 } else { 2999 #if USE_ITT_BUILD 3000 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3001 #endif 3002 return FTN_FALSE; 3003 } 3004 3005 #else // KMP_USE_DYNAMIC_LOCK 3006 3007 kmp_user_lock_p lck; 3008 int rc; 3009 3010 if ((__kmp_user_lock_kind == lk_tas) && 3011 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3012 lck = (kmp_user_lock_p)user_lock; 3013 } 3014 #if KMP_USE_FUTEX 3015 else if ((__kmp_user_lock_kind == lk_futex) && 3016 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3017 lck = (kmp_user_lock_p)user_lock; 3018 } 3019 #endif 3020 else { 3021 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3022 } 3023 3024 #if USE_ITT_BUILD 3025 __kmp_itt_lock_acquiring(lck); 3026 #endif /* USE_ITT_BUILD */ 3027 #if OMPT_SUPPORT && OMPT_OPTIONAL 3028 // This is the case, if called from omp_init_lock_with_hint: 3029 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3030 if (!codeptr) 3031 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3032 if (ompt_enabled.ompt_callback_mutex_acquire) { 3033 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3034 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3035 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3036 } 3037 #endif 3038 3039 rc = TEST_LOCK(lck, gtid); 3040 #if USE_ITT_BUILD 3041 if (rc) { 3042 __kmp_itt_lock_acquired(lck); 3043 } else { 3044 __kmp_itt_lock_cancelled(lck); 3045 } 3046 #endif /* USE_ITT_BUILD */ 3047 #if OMPT_SUPPORT && OMPT_OPTIONAL 3048 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3049 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3050 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3051 } 3052 #endif 3053 3054 return (rc ? FTN_TRUE : FTN_FALSE); 3055 3056 /* Can't use serial interval since not block structured */ 3057 3058 #endif // KMP_USE_DYNAMIC_LOCK 3059 } 3060 3061 /* try to acquire the lock */ 3062 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3063 #if KMP_USE_DYNAMIC_LOCK 3064 int rc; 3065 #if USE_ITT_BUILD 3066 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3067 #endif 3068 #if OMPT_SUPPORT && OMPT_OPTIONAL 3069 // This is the case, if called from omp_init_lock_with_hint: 3070 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3071 if (!codeptr) 3072 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3073 if (ompt_enabled.ompt_callback_mutex_acquire) { 3074 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3075 ompt_mutex_nest_lock, omp_lock_hint_none, 3076 __ompt_get_mutex_impl_type(user_lock), 3077 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3078 } 3079 #endif 3080 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3081 #if USE_ITT_BUILD 3082 if (rc) { 3083 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3084 } else { 3085 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3086 } 3087 #endif 3088 #if OMPT_SUPPORT && OMPT_OPTIONAL 3089 if (ompt_enabled.enabled && rc) { 3090 if (rc == 1) { 3091 if (ompt_enabled.ompt_callback_mutex_acquired) { 3092 // lock_first 3093 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3094 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3095 codeptr); 3096 } 3097 } else { 3098 if (ompt_enabled.ompt_callback_nest_lock) { 3099 // lock_next 3100 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3101 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3102 } 3103 } 3104 } 3105 #endif 3106 return rc; 3107 3108 #else // KMP_USE_DYNAMIC_LOCK 3109 3110 kmp_user_lock_p lck; 3111 int rc; 3112 3113 if ((__kmp_user_lock_kind == lk_tas) && 3114 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3115 OMP_NEST_LOCK_T_SIZE)) { 3116 lck = (kmp_user_lock_p)user_lock; 3117 } 3118 #if KMP_USE_FUTEX 3119 else if ((__kmp_user_lock_kind == lk_futex) && 3120 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3121 OMP_NEST_LOCK_T_SIZE)) { 3122 lck = (kmp_user_lock_p)user_lock; 3123 } 3124 #endif 3125 else { 3126 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3127 } 3128 3129 #if USE_ITT_BUILD 3130 __kmp_itt_lock_acquiring(lck); 3131 #endif /* USE_ITT_BUILD */ 3132 3133 #if OMPT_SUPPORT && OMPT_OPTIONAL 3134 // This is the case, if called from omp_init_lock_with_hint: 3135 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3136 if (!codeptr) 3137 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3138 if (ompt_enabled.enabled) && 3139 ompt_enabled.ompt_callback_mutex_acquire) { 3140 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3141 ompt_mutex_nest_lock, omp_lock_hint_none, 3142 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3143 codeptr); 3144 } 3145 #endif 3146 3147 rc = TEST_NESTED_LOCK(lck, gtid); 3148 #if USE_ITT_BUILD 3149 if (rc) { 3150 __kmp_itt_lock_acquired(lck); 3151 } else { 3152 __kmp_itt_lock_cancelled(lck); 3153 } 3154 #endif /* USE_ITT_BUILD */ 3155 #if OMPT_SUPPORT && OMPT_OPTIONAL 3156 if (ompt_enabled.enabled && rc) { 3157 if (rc == 1) { 3158 if (ompt_enabled.ompt_callback_mutex_acquired) { 3159 // lock_first 3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3161 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3162 } 3163 } else { 3164 if (ompt_enabled.ompt_callback_nest_lock) { 3165 // lock_next 3166 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3167 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3168 } 3169 } 3170 } 3171 #endif 3172 return rc; 3173 3174 /* Can't use serial interval since not block structured */ 3175 3176 #endif // KMP_USE_DYNAMIC_LOCK 3177 } 3178 3179 // Interface to fast scalable reduce methods routines 3180 3181 // keep the selected method in a thread local structure for cross-function 3182 // usage: will be used in __kmpc_end_reduce* functions; 3183 // another solution: to re-determine the method one more time in 3184 // __kmpc_end_reduce* functions (new prototype required then) 3185 // AT: which solution is better? 3186 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3187 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3188 3189 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3190 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3191 3192 // description of the packed_reduction_method variable: look at the macros in 3193 // kmp.h 3194 3195 // used in a critical section reduce block 3196 static __forceinline void 3197 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3198 kmp_critical_name *crit) { 3199 3200 // this lock was visible to a customer and to the threading profile tool as a 3201 // serial overhead span (although it's used for an internal purpose only) 3202 // why was it visible in previous implementation? 3203 // should we keep it visible in new reduce block? 3204 kmp_user_lock_p lck; 3205 3206 #if KMP_USE_DYNAMIC_LOCK 3207 3208 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3209 // Check if it is initialized. 3210 if (*lk == 0) { 3211 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3212 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3213 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3214 } else { 3215 __kmp_init_indirect_csptr(crit, loc, global_tid, 3216 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3217 } 3218 } 3219 // Branch for accessing the actual lock object and set operation. This 3220 // branching is inevitable since this lock initialization does not follow the 3221 // normal dispatch path (lock table is not used). 3222 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3223 lck = (kmp_user_lock_p)lk; 3224 KMP_DEBUG_ASSERT(lck != NULL); 3225 if (__kmp_env_consistency_check) { 3226 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3227 } 3228 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3229 } else { 3230 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3231 lck = ilk->lock; 3232 KMP_DEBUG_ASSERT(lck != NULL); 3233 if (__kmp_env_consistency_check) { 3234 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3235 } 3236 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3237 } 3238 3239 #else // KMP_USE_DYNAMIC_LOCK 3240 3241 // We know that the fast reduction code is only emitted by Intel compilers 3242 // with 32 byte critical sections. If there isn't enough space, then we 3243 // have to use a pointer. 3244 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3245 lck = (kmp_user_lock_p)crit; 3246 } else { 3247 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3248 } 3249 KMP_DEBUG_ASSERT(lck != NULL); 3250 3251 if (__kmp_env_consistency_check) 3252 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3253 3254 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3255 3256 #endif // KMP_USE_DYNAMIC_LOCK 3257 } 3258 3259 // used in a critical section reduce block 3260 static __forceinline void 3261 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3262 kmp_critical_name *crit) { 3263 3264 kmp_user_lock_p lck; 3265 3266 #if KMP_USE_DYNAMIC_LOCK 3267 3268 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3269 lck = (kmp_user_lock_p)crit; 3270 if (__kmp_env_consistency_check) 3271 __kmp_pop_sync(global_tid, ct_critical, loc); 3272 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3273 } else { 3274 kmp_indirect_lock_t *ilk = 3275 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3276 if (__kmp_env_consistency_check) 3277 __kmp_pop_sync(global_tid, ct_critical, loc); 3278 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3279 } 3280 3281 #else // KMP_USE_DYNAMIC_LOCK 3282 3283 // We know that the fast reduction code is only emitted by Intel compilers 3284 // with 32 byte critical sections. If there isn't enough space, then we have 3285 // to use a pointer. 3286 if (__kmp_base_user_lock_size > 32) { 3287 lck = *((kmp_user_lock_p *)crit); 3288 KMP_ASSERT(lck != NULL); 3289 } else { 3290 lck = (kmp_user_lock_p)crit; 3291 } 3292 3293 if (__kmp_env_consistency_check) 3294 __kmp_pop_sync(global_tid, ct_critical, loc); 3295 3296 __kmp_release_user_lock_with_checks(lck, global_tid); 3297 3298 #endif // KMP_USE_DYNAMIC_LOCK 3299 } // __kmp_end_critical_section_reduce_block 3300 3301 static __forceinline int 3302 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3303 int *task_state) { 3304 kmp_team_t *team; 3305 3306 // Check if we are inside the teams construct? 3307 if (th->th.th_teams_microtask) { 3308 *team_p = team = th->th.th_team; 3309 if (team->t.t_level == th->th.th_teams_level) { 3310 // This is reduction at teams construct. 3311 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3312 // Let's swap teams temporarily for the reduction. 3313 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3314 th->th.th_team = team->t.t_parent; 3315 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3316 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3317 *task_state = th->th.th_task_state; 3318 th->th.th_task_state = 0; 3319 3320 return 1; 3321 } 3322 } 3323 return 0; 3324 } 3325 3326 static __forceinline void 3327 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3328 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3329 th->th.th_info.ds.ds_tid = 0; 3330 th->th.th_team = team; 3331 th->th.th_team_nproc = team->t.t_nproc; 3332 th->th.th_task_team = team->t.t_task_team[task_state]; 3333 th->th.th_task_state = task_state; 3334 } 3335 3336 /* 2.a.i. Reduce Block without a terminating barrier */ 3337 /*! 3338 @ingroup SYNCHRONIZATION 3339 @param loc source location information 3340 @param global_tid global thread number 3341 @param num_vars number of items (variables) to be reduced 3342 @param reduce_size size of data in bytes to be reduced 3343 @param reduce_data pointer to data to be reduced 3344 @param reduce_func callback function providing reduction operation on two 3345 operands and returning result of reduction in lhs_data 3346 @param lck pointer to the unique lock data structure 3347 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3348 threads if atomic reduction needed 3349 3350 The nowait version is used for a reduce clause with the nowait argument. 3351 */ 3352 kmp_int32 3353 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3354 size_t reduce_size, void *reduce_data, 3355 void (*reduce_func)(void *lhs_data, void *rhs_data), 3356 kmp_critical_name *lck) { 3357 3358 KMP_COUNT_BLOCK(REDUCE_nowait); 3359 int retval = 0; 3360 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3361 kmp_info_t *th; 3362 kmp_team_t *team; 3363 int teams_swapped = 0, task_state; 3364 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3365 __kmp_assert_valid_gtid(global_tid); 3366 3367 // why do we need this initialization here at all? 3368 // Reduction clause can not be used as a stand-alone directive. 3369 3370 // do not call __kmp_serial_initialize(), it will be called by 3371 // __kmp_parallel_initialize() if needed 3372 // possible detection of false-positive race by the threadchecker ??? 3373 if (!TCR_4(__kmp_init_parallel)) 3374 __kmp_parallel_initialize(); 3375 3376 __kmp_resume_if_soft_paused(); 3377 3378 // check correctness of reduce block nesting 3379 #if KMP_USE_DYNAMIC_LOCK 3380 if (__kmp_env_consistency_check) 3381 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3382 #else 3383 if (__kmp_env_consistency_check) 3384 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3385 #endif 3386 3387 th = __kmp_thread_from_gtid(global_tid); 3388 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3389 3390 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3391 // the value should be kept in a variable 3392 // the variable should be either a construct-specific or thread-specific 3393 // property, not a team specific property 3394 // (a thread can reach the next reduce block on the next construct, reduce 3395 // method may differ on the next construct) 3396 // an ident_t "loc" parameter could be used as a construct-specific property 3397 // (what if loc == 0?) 3398 // (if both construct-specific and team-specific variables were shared, 3399 // then unness extra syncs should be needed) 3400 // a thread-specific variable is better regarding two issues above (next 3401 // construct and extra syncs) 3402 // a thread-specific "th_local.reduction_method" variable is used currently 3403 // each thread executes 'determine' and 'set' lines (no need to execute by one 3404 // thread, to avoid unness extra syncs) 3405 3406 packed_reduction_method = __kmp_determine_reduction_method( 3407 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3408 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3409 3410 OMPT_REDUCTION_DECL(th, global_tid); 3411 if (packed_reduction_method == critical_reduce_block) { 3412 3413 OMPT_REDUCTION_BEGIN; 3414 3415 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3416 retval = 1; 3417 3418 } else if (packed_reduction_method == empty_reduce_block) { 3419 3420 OMPT_REDUCTION_BEGIN; 3421 3422 // usage: if team size == 1, no synchronization is required ( Intel 3423 // platforms only ) 3424 retval = 1; 3425 3426 } else if (packed_reduction_method == atomic_reduce_block) { 3427 3428 retval = 2; 3429 3430 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3431 // won't be called by the code gen) 3432 // (it's not quite good, because the checking block has been closed by 3433 // this 'pop', 3434 // but atomic operation has not been executed yet, will be executed 3435 // slightly later, literally on next instruction) 3436 if (__kmp_env_consistency_check) 3437 __kmp_pop_sync(global_tid, ct_reduce, loc); 3438 3439 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3440 tree_reduce_block)) { 3441 3442 // AT: performance issue: a real barrier here 3443 // AT: (if master goes slow, other threads are blocked here waiting for the 3444 // master to come and release them) 3445 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3446 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3447 // be confusing to a customer) 3448 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3449 // might go faster and be more in line with sense of NOWAIT 3450 // AT: TO DO: do epcc test and compare times 3451 3452 // this barrier should be invisible to a customer and to the threading profile 3453 // tool (it's neither a terminating barrier nor customer's code, it's 3454 // used for an internal purpose) 3455 #if OMPT_SUPPORT 3456 // JP: can this barrier potentially leed to task scheduling? 3457 // JP: as long as there is a barrier in the implementation, OMPT should and 3458 // will provide the barrier events 3459 // so we set-up the necessary frame/return addresses. 3460 ompt_frame_t *ompt_frame; 3461 if (ompt_enabled.enabled) { 3462 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3463 if (ompt_frame->enter_frame.ptr == NULL) 3464 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3465 OMPT_STORE_RETURN_ADDRESS(global_tid); 3466 } 3467 #endif 3468 #if USE_ITT_NOTIFY 3469 __kmp_threads[global_tid]->th.th_ident = loc; 3470 #endif 3471 retval = 3472 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3473 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3474 retval = (retval != 0) ? (0) : (1); 3475 #if OMPT_SUPPORT && OMPT_OPTIONAL 3476 if (ompt_enabled.enabled) { 3477 ompt_frame->enter_frame = ompt_data_none; 3478 } 3479 #endif 3480 3481 // all other workers except master should do this pop here 3482 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3483 if (__kmp_env_consistency_check) { 3484 if (retval == 0) { 3485 __kmp_pop_sync(global_tid, ct_reduce, loc); 3486 } 3487 } 3488 3489 } else { 3490 3491 // should never reach this block 3492 KMP_ASSERT(0); // "unexpected method" 3493 } 3494 if (teams_swapped) { 3495 __kmp_restore_swapped_teams(th, team, task_state); 3496 } 3497 KA_TRACE( 3498 10, 3499 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3500 global_tid, packed_reduction_method, retval)); 3501 3502 return retval; 3503 } 3504 3505 /*! 3506 @ingroup SYNCHRONIZATION 3507 @param loc source location information 3508 @param global_tid global thread id. 3509 @param lck pointer to the unique lock data structure 3510 3511 Finish the execution of a reduce nowait. 3512 */ 3513 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3514 kmp_critical_name *lck) { 3515 3516 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3517 3518 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3519 __kmp_assert_valid_gtid(global_tid); 3520 3521 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3522 3523 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3524 3525 if (packed_reduction_method == critical_reduce_block) { 3526 3527 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3528 OMPT_REDUCTION_END; 3529 3530 } else if (packed_reduction_method == empty_reduce_block) { 3531 3532 // usage: if team size == 1, no synchronization is required ( on Intel 3533 // platforms only ) 3534 3535 OMPT_REDUCTION_END; 3536 3537 } else if (packed_reduction_method == atomic_reduce_block) { 3538 3539 // neither master nor other workers should get here 3540 // (code gen does not generate this call in case 2: atomic reduce block) 3541 // actually it's better to remove this elseif at all; 3542 // after removal this value will checked by the 'else' and will assert 3543 3544 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3545 tree_reduce_block)) { 3546 3547 // only master gets here 3548 // OMPT: tree reduction is annotated in the barrier code 3549 3550 } else { 3551 3552 // should never reach this block 3553 KMP_ASSERT(0); // "unexpected method" 3554 } 3555 3556 if (__kmp_env_consistency_check) 3557 __kmp_pop_sync(global_tid, ct_reduce, loc); 3558 3559 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3560 global_tid, packed_reduction_method)); 3561 3562 return; 3563 } 3564 3565 /* 2.a.ii. Reduce Block with a terminating barrier */ 3566 3567 /*! 3568 @ingroup SYNCHRONIZATION 3569 @param loc source location information 3570 @param global_tid global thread number 3571 @param num_vars number of items (variables) to be reduced 3572 @param reduce_size size of data in bytes to be reduced 3573 @param reduce_data pointer to data to be reduced 3574 @param reduce_func callback function providing reduction operation on two 3575 operands and returning result of reduction in lhs_data 3576 @param lck pointer to the unique lock data structure 3577 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3578 threads if atomic reduction needed 3579 3580 A blocking reduce that includes an implicit barrier. 3581 */ 3582 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3583 size_t reduce_size, void *reduce_data, 3584 void (*reduce_func)(void *lhs_data, void *rhs_data), 3585 kmp_critical_name *lck) { 3586 KMP_COUNT_BLOCK(REDUCE_wait); 3587 int retval = 0; 3588 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3589 kmp_info_t *th; 3590 kmp_team_t *team; 3591 int teams_swapped = 0, task_state; 3592 3593 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3594 __kmp_assert_valid_gtid(global_tid); 3595 3596 // why do we need this initialization here at all? 3597 // Reduction clause can not be a stand-alone directive. 3598 3599 // do not call __kmp_serial_initialize(), it will be called by 3600 // __kmp_parallel_initialize() if needed 3601 // possible detection of false-positive race by the threadchecker ??? 3602 if (!TCR_4(__kmp_init_parallel)) 3603 __kmp_parallel_initialize(); 3604 3605 __kmp_resume_if_soft_paused(); 3606 3607 // check correctness of reduce block nesting 3608 #if KMP_USE_DYNAMIC_LOCK 3609 if (__kmp_env_consistency_check) 3610 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3611 #else 3612 if (__kmp_env_consistency_check) 3613 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3614 #endif 3615 3616 th = __kmp_thread_from_gtid(global_tid); 3617 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3618 3619 packed_reduction_method = __kmp_determine_reduction_method( 3620 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3621 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3622 3623 OMPT_REDUCTION_DECL(th, global_tid); 3624 3625 if (packed_reduction_method == critical_reduce_block) { 3626 3627 OMPT_REDUCTION_BEGIN; 3628 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3629 retval = 1; 3630 3631 } else if (packed_reduction_method == empty_reduce_block) { 3632 3633 OMPT_REDUCTION_BEGIN; 3634 // usage: if team size == 1, no synchronization is required ( Intel 3635 // platforms only ) 3636 retval = 1; 3637 3638 } else if (packed_reduction_method == atomic_reduce_block) { 3639 3640 retval = 2; 3641 3642 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3643 tree_reduce_block)) { 3644 3645 // case tree_reduce_block: 3646 // this barrier should be visible to a customer and to the threading profile 3647 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3648 #if OMPT_SUPPORT 3649 ompt_frame_t *ompt_frame; 3650 if (ompt_enabled.enabled) { 3651 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3652 if (ompt_frame->enter_frame.ptr == NULL) 3653 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3654 OMPT_STORE_RETURN_ADDRESS(global_tid); 3655 } 3656 #endif 3657 #if USE_ITT_NOTIFY 3658 __kmp_threads[global_tid]->th.th_ident = 3659 loc; // needed for correct notification of frames 3660 #endif 3661 retval = 3662 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3663 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3664 retval = (retval != 0) ? (0) : (1); 3665 #if OMPT_SUPPORT && OMPT_OPTIONAL 3666 if (ompt_enabled.enabled) { 3667 ompt_frame->enter_frame = ompt_data_none; 3668 } 3669 #endif 3670 3671 // all other workers except master should do this pop here 3672 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3673 if (__kmp_env_consistency_check) { 3674 if (retval == 0) { // 0: all other workers; 1: master 3675 __kmp_pop_sync(global_tid, ct_reduce, loc); 3676 } 3677 } 3678 3679 } else { 3680 3681 // should never reach this block 3682 KMP_ASSERT(0); // "unexpected method" 3683 } 3684 if (teams_swapped) { 3685 __kmp_restore_swapped_teams(th, team, task_state); 3686 } 3687 3688 KA_TRACE(10, 3689 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3690 global_tid, packed_reduction_method, retval)); 3691 return retval; 3692 } 3693 3694 /*! 3695 @ingroup SYNCHRONIZATION 3696 @param loc source location information 3697 @param global_tid global thread id. 3698 @param lck pointer to the unique lock data structure 3699 3700 Finish the execution of a blocking reduce. 3701 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3702 start function. 3703 */ 3704 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3705 kmp_critical_name *lck) { 3706 3707 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3708 kmp_info_t *th; 3709 kmp_team_t *team; 3710 int teams_swapped = 0, task_state; 3711 3712 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3713 __kmp_assert_valid_gtid(global_tid); 3714 3715 th = __kmp_thread_from_gtid(global_tid); 3716 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3717 3718 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3719 3720 // this barrier should be visible to a customer and to the threading profile 3721 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3722 OMPT_REDUCTION_DECL(th, global_tid); 3723 3724 if (packed_reduction_method == critical_reduce_block) { 3725 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3726 3727 OMPT_REDUCTION_END; 3728 3729 // TODO: implicit barrier: should be exposed 3730 #if OMPT_SUPPORT 3731 ompt_frame_t *ompt_frame; 3732 if (ompt_enabled.enabled) { 3733 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3734 if (ompt_frame->enter_frame.ptr == NULL) 3735 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3736 OMPT_STORE_RETURN_ADDRESS(global_tid); 3737 } 3738 #endif 3739 #if USE_ITT_NOTIFY 3740 __kmp_threads[global_tid]->th.th_ident = loc; 3741 #endif 3742 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3743 #if OMPT_SUPPORT && OMPT_OPTIONAL 3744 if (ompt_enabled.enabled) { 3745 ompt_frame->enter_frame = ompt_data_none; 3746 } 3747 #endif 3748 3749 } else if (packed_reduction_method == empty_reduce_block) { 3750 3751 OMPT_REDUCTION_END; 3752 3753 // usage: if team size==1, no synchronization is required (Intel platforms only) 3754 3755 // TODO: implicit barrier: should be exposed 3756 #if OMPT_SUPPORT 3757 ompt_frame_t *ompt_frame; 3758 if (ompt_enabled.enabled) { 3759 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3760 if (ompt_frame->enter_frame.ptr == NULL) 3761 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3762 OMPT_STORE_RETURN_ADDRESS(global_tid); 3763 } 3764 #endif 3765 #if USE_ITT_NOTIFY 3766 __kmp_threads[global_tid]->th.th_ident = loc; 3767 #endif 3768 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3769 #if OMPT_SUPPORT && OMPT_OPTIONAL 3770 if (ompt_enabled.enabled) { 3771 ompt_frame->enter_frame = ompt_data_none; 3772 } 3773 #endif 3774 3775 } else if (packed_reduction_method == atomic_reduce_block) { 3776 3777 #if OMPT_SUPPORT 3778 ompt_frame_t *ompt_frame; 3779 if (ompt_enabled.enabled) { 3780 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3781 if (ompt_frame->enter_frame.ptr == NULL) 3782 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3783 OMPT_STORE_RETURN_ADDRESS(global_tid); 3784 } 3785 #endif 3786 // TODO: implicit barrier: should be exposed 3787 #if USE_ITT_NOTIFY 3788 __kmp_threads[global_tid]->th.th_ident = loc; 3789 #endif 3790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3791 #if OMPT_SUPPORT && OMPT_OPTIONAL 3792 if (ompt_enabled.enabled) { 3793 ompt_frame->enter_frame = ompt_data_none; 3794 } 3795 #endif 3796 3797 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3798 tree_reduce_block)) { 3799 3800 // only master executes here (master releases all other workers) 3801 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3802 global_tid); 3803 3804 } else { 3805 3806 // should never reach this block 3807 KMP_ASSERT(0); // "unexpected method" 3808 } 3809 if (teams_swapped) { 3810 __kmp_restore_swapped_teams(th, team, task_state); 3811 } 3812 3813 if (__kmp_env_consistency_check) 3814 __kmp_pop_sync(global_tid, ct_reduce, loc); 3815 3816 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3817 global_tid, packed_reduction_method)); 3818 3819 return; 3820 } 3821 3822 #undef __KMP_GET_REDUCTION_METHOD 3823 #undef __KMP_SET_REDUCTION_METHOD 3824 3825 /* end of interface to fast scalable reduce routines */ 3826 3827 kmp_uint64 __kmpc_get_taskid() { 3828 3829 kmp_int32 gtid; 3830 kmp_info_t *thread; 3831 3832 gtid = __kmp_get_gtid(); 3833 if (gtid < 0) { 3834 return 0; 3835 } 3836 thread = __kmp_thread_from_gtid(gtid); 3837 return thread->th.th_current_task->td_task_id; 3838 3839 } // __kmpc_get_taskid 3840 3841 kmp_uint64 __kmpc_get_parent_taskid() { 3842 3843 kmp_int32 gtid; 3844 kmp_info_t *thread; 3845 kmp_taskdata_t *parent_task; 3846 3847 gtid = __kmp_get_gtid(); 3848 if (gtid < 0) { 3849 return 0; 3850 } 3851 thread = __kmp_thread_from_gtid(gtid); 3852 parent_task = thread->th.th_current_task->td_parent; 3853 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3854 3855 } // __kmpc_get_parent_taskid 3856 3857 /*! 3858 @ingroup WORK_SHARING 3859 @param loc source location information. 3860 @param gtid global thread number. 3861 @param num_dims number of associated doacross loops. 3862 @param dims info on loops bounds. 3863 3864 Initialize doacross loop information. 3865 Expect compiler send us inclusive bounds, 3866 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3867 */ 3868 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3869 const struct kmp_dim *dims) { 3870 __kmp_assert_valid_gtid(gtid); 3871 int j, idx; 3872 kmp_int64 last, trace_count; 3873 kmp_info_t *th = __kmp_threads[gtid]; 3874 kmp_team_t *team = th->th.th_team; 3875 kmp_uint32 *flags; 3876 kmp_disp_t *pr_buf = th->th.th_dispatch; 3877 dispatch_shared_info_t *sh_buf; 3878 3879 KA_TRACE( 3880 20, 3881 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3882 gtid, num_dims, !team->t.t_serialized)); 3883 KMP_DEBUG_ASSERT(dims != NULL); 3884 KMP_DEBUG_ASSERT(num_dims > 0); 3885 3886 if (team->t.t_serialized) { 3887 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3888 return; // no dependencies if team is serialized 3889 } 3890 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3891 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3892 // the next loop 3893 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3894 3895 // Save bounds info into allocated private buffer 3896 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3897 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3898 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3899 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3900 pr_buf->th_doacross_info[0] = 3901 (kmp_int64)num_dims; // first element is number of dimensions 3902 // Save also address of num_done in order to access it later without knowing 3903 // the buffer index 3904 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3905 pr_buf->th_doacross_info[2] = dims[0].lo; 3906 pr_buf->th_doacross_info[3] = dims[0].up; 3907 pr_buf->th_doacross_info[4] = dims[0].st; 3908 last = 5; 3909 for (j = 1; j < num_dims; ++j) { 3910 kmp_int64 3911 range_length; // To keep ranges of all dimensions but the first dims[0] 3912 if (dims[j].st == 1) { // most common case 3913 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3914 range_length = dims[j].up - dims[j].lo + 1; 3915 } else { 3916 if (dims[j].st > 0) { 3917 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3918 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3919 } else { // negative increment 3920 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3921 range_length = 3922 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3923 } 3924 } 3925 pr_buf->th_doacross_info[last++] = range_length; 3926 pr_buf->th_doacross_info[last++] = dims[j].lo; 3927 pr_buf->th_doacross_info[last++] = dims[j].up; 3928 pr_buf->th_doacross_info[last++] = dims[j].st; 3929 } 3930 3931 // Compute total trip count. 3932 // Start with range of dims[0] which we don't need to keep in the buffer. 3933 if (dims[0].st == 1) { // most common case 3934 trace_count = dims[0].up - dims[0].lo + 1; 3935 } else if (dims[0].st > 0) { 3936 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3937 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3938 } else { // negative increment 3939 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3940 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3941 } 3942 for (j = 1; j < num_dims; ++j) { 3943 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3944 } 3945 KMP_DEBUG_ASSERT(trace_count > 0); 3946 3947 // Check if shared buffer is not occupied by other loop (idx - 3948 // __kmp_dispatch_num_buffers) 3949 if (idx != sh_buf->doacross_buf_idx) { 3950 // Shared buffer is occupied, wait for it to be free 3951 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3952 __kmp_eq_4, NULL); 3953 } 3954 #if KMP_32_BIT_ARCH 3955 // Check if we are the first thread. After the CAS the first thread gets 0, 3956 // others get 1 if initialization is in progress, allocated pointer otherwise. 3957 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3958 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3959 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3960 #else 3961 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3962 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3963 #endif 3964 if (flags == NULL) { 3965 // we are the first thread, allocate the array of flags 3966 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3967 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3968 KMP_MB(); 3969 sh_buf->doacross_flags = flags; 3970 } else if (flags == (kmp_uint32 *)1) { 3971 #if KMP_32_BIT_ARCH 3972 // initialization is still in progress, need to wait 3973 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3974 #else 3975 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3976 #endif 3977 KMP_YIELD(TRUE); 3978 KMP_MB(); 3979 } else { 3980 KMP_MB(); 3981 } 3982 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3983 pr_buf->th_doacross_flags = 3984 sh_buf->doacross_flags; // save private copy in order to not 3985 // touch shared buffer on each iteration 3986 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3987 } 3988 3989 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3990 __kmp_assert_valid_gtid(gtid); 3991 kmp_int32 shft, num_dims, i; 3992 kmp_uint32 flag; 3993 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3994 kmp_info_t *th = __kmp_threads[gtid]; 3995 kmp_team_t *team = th->th.th_team; 3996 kmp_disp_t *pr_buf; 3997 kmp_int64 lo, up, st; 3998 3999 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4000 if (team->t.t_serialized) { 4001 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4002 return; // no dependencies if team is serialized 4003 } 4004 4005 // calculate sequential iteration number and check out-of-bounds condition 4006 pr_buf = th->th.th_dispatch; 4007 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4008 num_dims = pr_buf->th_doacross_info[0]; 4009 lo = pr_buf->th_doacross_info[2]; 4010 up = pr_buf->th_doacross_info[3]; 4011 st = pr_buf->th_doacross_info[4]; 4012 #if OMPT_SUPPORT && OMPT_OPTIONAL 4013 ompt_dependence_t deps[num_dims]; 4014 #endif 4015 if (st == 1) { // most common case 4016 if (vec[0] < lo || vec[0] > up) { 4017 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4018 "bounds [%lld,%lld]\n", 4019 gtid, vec[0], lo, up)); 4020 return; 4021 } 4022 iter_number = vec[0] - lo; 4023 } else if (st > 0) { 4024 if (vec[0] < lo || vec[0] > up) { 4025 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4026 "bounds [%lld,%lld]\n", 4027 gtid, vec[0], lo, up)); 4028 return; 4029 } 4030 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4031 } else { // negative increment 4032 if (vec[0] > lo || vec[0] < up) { 4033 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4034 "bounds [%lld,%lld]\n", 4035 gtid, vec[0], lo, up)); 4036 return; 4037 } 4038 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4039 } 4040 #if OMPT_SUPPORT && OMPT_OPTIONAL 4041 deps[0].variable.value = iter_number; 4042 deps[0].dependence_type = ompt_dependence_type_sink; 4043 #endif 4044 for (i = 1; i < num_dims; ++i) { 4045 kmp_int64 iter, ln; 4046 kmp_int32 j = i * 4; 4047 ln = pr_buf->th_doacross_info[j + 1]; 4048 lo = pr_buf->th_doacross_info[j + 2]; 4049 up = pr_buf->th_doacross_info[j + 3]; 4050 st = pr_buf->th_doacross_info[j + 4]; 4051 if (st == 1) { 4052 if (vec[i] < lo || vec[i] > up) { 4053 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4054 "bounds [%lld,%lld]\n", 4055 gtid, vec[i], lo, up)); 4056 return; 4057 } 4058 iter = vec[i] - lo; 4059 } else if (st > 0) { 4060 if (vec[i] < lo || vec[i] > up) { 4061 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4062 "bounds [%lld,%lld]\n", 4063 gtid, vec[i], lo, up)); 4064 return; 4065 } 4066 iter = (kmp_uint64)(vec[i] - lo) / st; 4067 } else { // st < 0 4068 if (vec[i] > lo || vec[i] < up) { 4069 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4070 "bounds [%lld,%lld]\n", 4071 gtid, vec[i], lo, up)); 4072 return; 4073 } 4074 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4075 } 4076 iter_number = iter + ln * iter_number; 4077 #if OMPT_SUPPORT && OMPT_OPTIONAL 4078 deps[i].variable.value = iter; 4079 deps[i].dependence_type = ompt_dependence_type_sink; 4080 #endif 4081 } 4082 shft = iter_number % 32; // use 32-bit granularity 4083 iter_number >>= 5; // divided by 32 4084 flag = 1 << shft; 4085 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4086 KMP_YIELD(TRUE); 4087 } 4088 KMP_MB(); 4089 #if OMPT_SUPPORT && OMPT_OPTIONAL 4090 if (ompt_enabled.ompt_callback_dependences) { 4091 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4092 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4093 } 4094 #endif 4095 KA_TRACE(20, 4096 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4097 gtid, (iter_number << 5) + shft)); 4098 } 4099 4100 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4101 __kmp_assert_valid_gtid(gtid); 4102 kmp_int32 shft, num_dims, i; 4103 kmp_uint32 flag; 4104 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4105 kmp_info_t *th = __kmp_threads[gtid]; 4106 kmp_team_t *team = th->th.th_team; 4107 kmp_disp_t *pr_buf; 4108 kmp_int64 lo, st; 4109 4110 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4111 if (team->t.t_serialized) { 4112 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4113 return; // no dependencies if team is serialized 4114 } 4115 4116 // calculate sequential iteration number (same as in "wait" but no 4117 // out-of-bounds checks) 4118 pr_buf = th->th.th_dispatch; 4119 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4120 num_dims = pr_buf->th_doacross_info[0]; 4121 lo = pr_buf->th_doacross_info[2]; 4122 st = pr_buf->th_doacross_info[4]; 4123 #if OMPT_SUPPORT && OMPT_OPTIONAL 4124 ompt_dependence_t deps[num_dims]; 4125 #endif 4126 if (st == 1) { // most common case 4127 iter_number = vec[0] - lo; 4128 } else if (st > 0) { 4129 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4130 } else { // negative increment 4131 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4132 } 4133 #if OMPT_SUPPORT && OMPT_OPTIONAL 4134 deps[0].variable.value = iter_number; 4135 deps[0].dependence_type = ompt_dependence_type_source; 4136 #endif 4137 for (i = 1; i < num_dims; ++i) { 4138 kmp_int64 iter, ln; 4139 kmp_int32 j = i * 4; 4140 ln = pr_buf->th_doacross_info[j + 1]; 4141 lo = pr_buf->th_doacross_info[j + 2]; 4142 st = pr_buf->th_doacross_info[j + 4]; 4143 if (st == 1) { 4144 iter = vec[i] - lo; 4145 } else if (st > 0) { 4146 iter = (kmp_uint64)(vec[i] - lo) / st; 4147 } else { // st < 0 4148 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4149 } 4150 iter_number = iter + ln * iter_number; 4151 #if OMPT_SUPPORT && OMPT_OPTIONAL 4152 deps[i].variable.value = iter; 4153 deps[i].dependence_type = ompt_dependence_type_source; 4154 #endif 4155 } 4156 #if OMPT_SUPPORT && OMPT_OPTIONAL 4157 if (ompt_enabled.ompt_callback_dependences) { 4158 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4159 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4160 } 4161 #endif 4162 shft = iter_number % 32; // use 32-bit granularity 4163 iter_number >>= 5; // divided by 32 4164 flag = 1 << shft; 4165 KMP_MB(); 4166 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4167 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4168 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4169 (iter_number << 5) + shft)); 4170 } 4171 4172 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4173 __kmp_assert_valid_gtid(gtid); 4174 kmp_int32 num_done; 4175 kmp_info_t *th = __kmp_threads[gtid]; 4176 kmp_team_t *team = th->th.th_team; 4177 kmp_disp_t *pr_buf = th->th.th_dispatch; 4178 4179 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4180 if (team->t.t_serialized) { 4181 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4182 return; // nothing to do 4183 } 4184 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4185 if (num_done == th->th.th_team_nproc) { 4186 // we are the last thread, need to free shared resources 4187 int idx = pr_buf->th_doacross_buf_idx - 1; 4188 dispatch_shared_info_t *sh_buf = 4189 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4190 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4191 (kmp_int64)&sh_buf->doacross_num_done); 4192 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4193 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4194 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4195 sh_buf->doacross_flags = NULL; 4196 sh_buf->doacross_num_done = 0; 4197 sh_buf->doacross_buf_idx += 4198 __kmp_dispatch_num_buffers; // free buffer for future re-use 4199 } 4200 // free private resources (need to keep buffer index forever) 4201 pr_buf->th_doacross_flags = NULL; 4202 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4203 pr_buf->th_doacross_info = NULL; 4204 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4205 } 4206 4207 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ 4208 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4209 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4210 } 4211 4212 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4213 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); 4214 } 4215 4216 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4217 omp_allocator_handle_t free_allocator) { 4218 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4219 free_allocator); 4220 } 4221 4222 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4223 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4224 } 4225 4226 int __kmpc_get_target_offload(void) { 4227 if (!__kmp_init_serial) { 4228 __kmp_serial_initialize(); 4229 } 4230 return __kmp_target_offload; 4231 } 4232 4233 int __kmpc_pause_resource(kmp_pause_status_t level) { 4234 if (!__kmp_init_serial) { 4235 return 1; // Can't pause if runtime is not initialized 4236 } 4237 return __kmp_pause_resource(level); 4238 } 4239