1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 43 } else if (__kmp_ignore_mppbeg() == FALSE) { 44 // By default __kmp_ignore_mppbeg() returns TRUE. 45 __kmp_internal_begin(); 46 KC_TRACE(10, ("__kmpc_begin: called\n")); 47 } 48 } 49 50 /*! 51 * @ingroup STARTUP_SHUTDOWN 52 * @param loc source location information 53 * 54 * Shutdown the runtime library. This is also optional, and even if called will 55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 56 * zero. 57 */ 58 void __kmpc_end(ident_t *loc) { 59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 62 // returns FALSE and __kmpc_end() will unregister this root (it can cause 63 // library shut down). 64 if (__kmp_ignore_mppend() == FALSE) { 65 KC_TRACE(10, ("__kmpc_end: called\n")); 66 KA_TRACE(30, ("__kmpc_end\n")); 67 68 __kmp_internal_end_thread(-1); 69 } 70 #if KMP_OS_WINDOWS && OMPT_SUPPORT 71 // Normal exit process on Windows does not allow worker threads of the final 72 // parallel region to finish reporting their events, so shutting down the 73 // library here fixes the issue at least for the cases where __kmpc_end() is 74 // placed properly. 75 if (ompt_enabled.enabled) 76 __kmp_internal_end_library(__kmp_gtid_get_specific()); 77 #endif 78 } 79 80 /*! 81 @ingroup THREAD_STATES 82 @param loc Source location information. 83 @return The global thread index of the active thread. 84 85 This function can be called in any context. 86 87 If the runtime has ony been entered at the outermost level from a 88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 89 that which would be returned by omp_get_thread_num() in the outermost 90 active parallel construct. (Or zero if there is no active parallel 91 construct, since the master thread is necessarily thread zero). 92 93 If multiple non-OpenMP threads all enter an OpenMP construct then this 94 will be a unique thread identifier among all the threads created by 95 the OpenMP runtime (but the value cannot be defined in terms of 96 OpenMP thread ids returned by omp_get_thread_num()). 97 */ 98 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 99 kmp_int32 gtid = __kmp_entry_gtid(); 100 101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 102 103 return gtid; 104 } 105 106 /*! 107 @ingroup THREAD_STATES 108 @param loc Source location information. 109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 110 111 This function can be called in any context. 112 It returns the total number of threads under the control of the OpenMP runtime. 113 That is not a number that can be determined by any OpenMP standard calls, since 114 the library may be called from more than one non-OpenMP thread, and this 115 reflects the total over all such calls. Similarly the runtime maintains 116 underlying threads even when they are not active (since the cost of creating 117 and destroying OS threads is high), this call counts all such threads even if 118 they are not waiting for work. 119 */ 120 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 121 KC_TRACE(10, 122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 123 124 return TCR_4(__kmp_all_nth); 125 } 126 127 /*! 128 @ingroup THREAD_STATES 129 @param loc Source location information. 130 @return The thread number of the calling thread in the innermost active parallel 131 construct. 132 */ 133 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 135 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 136 } 137 138 /*! 139 @ingroup THREAD_STATES 140 @param loc Source location information. 141 @return The number of threads in the innermost active parallel construct. 142 */ 143 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 145 146 return __kmp_entry_thread()->th.th_team->t.t_nproc; 147 } 148 149 /*! 150 * @ingroup DEPRECATED 151 * @param loc location description 152 * 153 * This function need not be called. It always returns TRUE. 154 */ 155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 156 #ifndef KMP_DEBUG 157 158 return TRUE; 159 160 #else 161 162 const char *semi2; 163 const char *semi3; 164 int line_no; 165 166 if (__kmp_par_range == 0) { 167 return TRUE; 168 } 169 semi2 = loc->psource; 170 if (semi2 == NULL) { 171 return TRUE; 172 } 173 semi2 = strchr(semi2, ';'); 174 if (semi2 == NULL) { 175 return TRUE; 176 } 177 semi2 = strchr(semi2 + 1, ';'); 178 if (semi2 == NULL) { 179 return TRUE; 180 } 181 if (__kmp_par_range_filename[0]) { 182 const char *name = semi2 - 1; 183 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 184 name--; 185 } 186 if ((*name == '/') || (*name == ';')) { 187 name++; 188 } 189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 190 return __kmp_par_range < 0; 191 } 192 } 193 semi3 = strchr(semi2 + 1, ';'); 194 if (__kmp_par_range_routine[0]) { 195 if ((semi3 != NULL) && (semi3 > semi2) && 196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 197 return __kmp_par_range < 0; 198 } 199 } 200 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 202 return __kmp_par_range > 0; 203 } 204 return __kmp_par_range < 0; 205 } 206 return TRUE; 207 208 #endif /* KMP_DEBUG */ 209 } 210 211 /*! 212 @ingroup THREAD_STATES 213 @param loc Source location information. 214 @return 1 if this thread is executing inside an active parallel region, zero if 215 not. 216 */ 217 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 218 return __kmp_entry_thread()->th.th_root->r.r_active; 219 } 220 221 /*! 222 @ingroup PARALLEL 223 @param loc source location information 224 @param global_tid global thread number 225 @param num_threads number of threads requested for this parallel construct 226 227 Set the number of threads to be used by the next fork spawned by this thread. 228 This call is only required if the parallel construct has a `num_threads` clause. 229 */ 230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 231 kmp_int32 num_threads) { 232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 233 global_tid, num_threads)); 234 __kmp_assert_valid_gtid(global_tid); 235 __kmp_push_num_threads(loc, global_tid, num_threads); 236 } 237 238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 240 /* the num_threads are automatically popped */ 241 } 242 243 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 244 kmp_int32 proc_bind) { 245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 246 proc_bind)); 247 __kmp_assert_valid_gtid(global_tid); 248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 249 } 250 251 /*! 252 @ingroup PARALLEL 253 @param loc source location information 254 @param argc total number of arguments in the ellipsis 255 @param microtask pointer to callback routine consisting of outlined parallel 256 construct 257 @param ... pointers to shared variables that aren't global 258 259 Do the actual fork and call the microtask in the relevant number of threads. 260 */ 261 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 262 int gtid = __kmp_entry_gtid(); 263 264 #if (KMP_STATS_ENABLED) 265 // If we were in a serial region, then stop the serial timer, record 266 // the event, and start parallel region timer 267 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 268 if (previous_state == stats_state_e::SERIAL_REGION) { 269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 270 } else { 271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 272 } 273 int inParallel = __kmpc_in_parallel(loc); 274 if (inParallel) { 275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 276 } else { 277 KMP_COUNT_BLOCK(OMP_PARALLEL); 278 } 279 #endif 280 281 // maybe to save thr_state is enough here 282 { 283 va_list ap; 284 va_start(ap, microtask); 285 286 #if OMPT_SUPPORT 287 ompt_frame_t *ompt_frame; 288 if (ompt_enabled.enabled) { 289 kmp_info_t *master_th = __kmp_threads[gtid]; 290 kmp_team_t *parent_team = master_th->th.th_team; 291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 292 if (lwt) 293 ompt_frame = &(lwt->ompt_task_info.frame); 294 else { 295 int tid = __kmp_tid_from_gtid(gtid); 296 ompt_frame = &( 297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 298 } 299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 300 OMPT_STORE_RETURN_ADDRESS(gtid); 301 } 302 #endif 303 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_FORKING(); 306 #endif 307 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 308 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 310 kmp_va_addr_of(ap)); 311 #if INCLUDE_SSC_MARKS 312 SSC_MARK_JOINING(); 313 #endif 314 __kmp_join_call(loc, gtid 315 #if OMPT_SUPPORT 316 , 317 fork_context_intel 318 #endif 319 ); 320 321 va_end(ap); 322 } 323 324 #if KMP_STATS_ENABLED 325 if (previous_state == stats_state_e::SERIAL_REGION) { 326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param global_tid global thread number 337 @param num_teams number of teams requested for the teams construct 338 @param num_threads number of threads per team requested for the teams construct 339 340 Set the number of teams to be used by the teams construct. 341 This call is only required if the teams construct has a `num_teams` clause 342 or a `thread_limit` clause (or both). 343 */ 344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 345 kmp_int32 num_teams, kmp_int32 num_threads) { 346 KA_TRACE(20, 347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 348 global_tid, num_teams, num_threads)); 349 __kmp_assert_valid_gtid(global_tid); 350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 351 } 352 353 /*! 354 @ingroup PARALLEL 355 @param loc source location information 356 @param argc total number of arguments in the ellipsis 357 @param microtask pointer to callback routine consisting of outlined teams 358 construct 359 @param ... pointers to shared variables that aren't global 360 361 Do the actual fork and call the microtask in the relevant number of threads. 362 */ 363 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 364 ...) { 365 int gtid = __kmp_entry_gtid(); 366 kmp_info_t *this_thr = __kmp_threads[gtid]; 367 va_list ap; 368 va_start(ap, microtask); 369 370 #if KMP_STATS_ENABLED 371 KMP_COUNT_BLOCK(OMP_TEAMS); 372 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 373 if (previous_state == stats_state_e::SERIAL_REGION) { 374 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 375 } else { 376 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 377 } 378 #endif 379 380 // remember teams entry point and nesting level 381 this_thr->th.th_teams_microtask = microtask; 382 this_thr->th.th_teams_level = 383 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 384 385 #if OMPT_SUPPORT 386 kmp_team_t *parent_team = this_thr->th.th_team; 387 int tid = __kmp_tid_from_gtid(gtid); 388 if (ompt_enabled.enabled) { 389 parent_team->t.t_implicit_task_taskdata[tid] 390 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 391 } 392 OMPT_STORE_RETURN_ADDRESS(gtid); 393 #endif 394 395 // check if __kmpc_push_num_teams called, set default number of teams 396 // otherwise 397 if (this_thr->th.th_teams_size.nteams == 0) { 398 __kmp_push_num_teams(loc, gtid, 0, 0); 399 } 400 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 401 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 403 404 __kmp_fork_call( 405 loc, gtid, fork_context_intel, argc, 406 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 407 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 408 __kmp_join_call(loc, gtid 409 #if OMPT_SUPPORT 410 , 411 fork_context_intel 412 #endif 413 ); 414 415 // Pop current CG root off list 416 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 417 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 418 this_thr->th.th_cg_roots = tmp->up; 419 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 420 " to node %p. cg_nthreads was %d\n", 421 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 422 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 423 int i = tmp->cg_nthreads--; 424 if (i == 1) { // check is we are the last thread in CG (not always the case) 425 __kmp_free(tmp); 426 } 427 // Restore current task's thread_limit from CG root 428 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 429 this_thr->th.th_current_task->td_icvs.thread_limit = 430 this_thr->th.th_cg_roots->cg_thread_limit; 431 432 this_thr->th.th_teams_microtask = NULL; 433 this_thr->th.th_teams_level = 0; 434 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 435 va_end(ap); 436 #if KMP_STATS_ENABLED 437 if (previous_state == stats_state_e::SERIAL_REGION) { 438 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 439 } else { 440 KMP_POP_PARTITIONED_TIMER(); 441 } 442 #endif // KMP_STATS_ENABLED 443 } 444 445 // I don't think this function should ever have been exported. 446 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 447 // openmp code ever called it, but it's been exported from the RTL for so 448 // long that I'm afraid to remove the definition. 449 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 450 451 /*! 452 @ingroup PARALLEL 453 @param loc source location information 454 @param global_tid global thread number 455 456 Enter a serialized parallel construct. This interface is used to handle a 457 conditional parallel region, like this, 458 @code 459 #pragma omp parallel if (condition) 460 @endcode 461 when the condition is false. 462 */ 463 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 464 // The implementation is now in kmp_runtime.cpp so that it can share static 465 // functions with kmp_fork_call since the tasks to be done are similar in 466 // each case. 467 __kmp_assert_valid_gtid(global_tid); 468 #if OMPT_SUPPORT 469 OMPT_STORE_RETURN_ADDRESS(global_tid); 470 #endif 471 __kmp_serialized_parallel(loc, global_tid); 472 } 473 474 /*! 475 @ingroup PARALLEL 476 @param loc source location information 477 @param global_tid global thread number 478 479 Leave a serialized parallel construct. 480 */ 481 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 482 kmp_internal_control_t *top; 483 kmp_info_t *this_thr; 484 kmp_team_t *serial_team; 485 486 KC_TRACE(10, 487 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 488 489 /* skip all this code for autopar serialized loops since it results in 490 unacceptable overhead */ 491 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 492 return; 493 494 // Not autopar code 495 __kmp_assert_valid_gtid(global_tid); 496 if (!TCR_4(__kmp_init_parallel)) 497 __kmp_parallel_initialize(); 498 499 __kmp_resume_if_soft_paused(); 500 501 this_thr = __kmp_threads[global_tid]; 502 serial_team = this_thr->th.th_serial_team; 503 504 kmp_task_team_t *task_team = this_thr->th.th_task_team; 505 // we need to wait for the proxy tasks before finishing the thread 506 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 507 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 508 509 KMP_MB(); 510 KMP_DEBUG_ASSERT(serial_team); 511 KMP_ASSERT(serial_team->t.t_serialized); 512 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 513 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 514 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 515 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 516 517 #if OMPT_SUPPORT 518 if (ompt_enabled.enabled && 519 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 520 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 521 if (ompt_enabled.ompt_callback_implicit_task) { 522 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 523 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 524 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 525 } 526 527 // reset clear the task id only after unlinking the task 528 ompt_data_t *parent_task_data; 529 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 530 531 if (ompt_enabled.ompt_callback_parallel_end) { 532 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 533 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 534 ompt_parallel_invoker_program | ompt_parallel_team, 535 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 536 } 537 __ompt_lw_taskteam_unlink(this_thr); 538 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 539 } 540 #endif 541 542 /* If necessary, pop the internal control stack values and replace the team 543 * values */ 544 top = serial_team->t.t_control_stack_top; 545 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 546 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 547 serial_team->t.t_control_stack_top = top->next; 548 __kmp_free(top); 549 } 550 551 // if( serial_team -> t.t_serialized > 1 ) 552 serial_team->t.t_level--; 553 554 /* pop dispatch buffers stack */ 555 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 556 { 557 dispatch_private_info_t *disp_buffer = 558 serial_team->t.t_dispatch->th_disp_buffer; 559 serial_team->t.t_dispatch->th_disp_buffer = 560 serial_team->t.t_dispatch->th_disp_buffer->next; 561 __kmp_free(disp_buffer); 562 } 563 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 564 565 --serial_team->t.t_serialized; 566 if (serial_team->t.t_serialized == 0) { 567 568 /* return to the parallel section */ 569 570 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 571 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 572 __kmp_clear_x87_fpu_status_word(); 573 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 574 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 575 } 576 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 577 578 this_thr->th.th_team = serial_team->t.t_parent; 579 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 580 581 /* restore values cached in the thread */ 582 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 583 this_thr->th.th_team_master = 584 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 585 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 586 587 /* TODO the below shouldn't need to be adjusted for serialized teams */ 588 this_thr->th.th_dispatch = 589 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 590 591 __kmp_pop_current_task_from_thread(this_thr); 592 593 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 594 this_thr->th.th_current_task->td_flags.executing = 1; 595 596 if (__kmp_tasking_mode != tskm_immediate_exec) { 597 // Copy the task team from the new child / old parent team to the thread. 598 this_thr->th.th_task_team = 599 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 600 KA_TRACE(20, 601 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 602 "team %p\n", 603 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 604 } 605 } else { 606 if (__kmp_tasking_mode != tskm_immediate_exec) { 607 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 608 "depth of serial team %p to %d\n", 609 global_tid, serial_team, serial_team->t.t_serialized)); 610 } 611 } 612 613 if (__kmp_env_consistency_check) 614 __kmp_pop_parallel(global_tid, NULL); 615 #if OMPT_SUPPORT 616 if (ompt_enabled.enabled) 617 this_thr->th.ompt_thread_info.state = 618 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 619 : ompt_state_work_parallel); 620 #endif 621 } 622 623 /*! 624 @ingroup SYNCHRONIZATION 625 @param loc source location information. 626 627 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 628 depending on the memory ordering convention obeyed by the compiler 629 even that may not be necessary). 630 */ 631 void __kmpc_flush(ident_t *loc) { 632 KC_TRACE(10, ("__kmpc_flush: called\n")); 633 634 /* need explicit __mf() here since use volatile instead in library */ 635 KMP_MB(); /* Flush all pending memory write invalidates. */ 636 637 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 638 #if KMP_MIC 639 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 640 // We shouldn't need it, though, since the ABI rules require that 641 // * If the compiler generates NGO stores it also generates the fence 642 // * If users hand-code NGO stores they should insert the fence 643 // therefore no incomplete unordered stores should be visible. 644 #else 645 // C74404 646 // This is to address non-temporal store instructions (sfence needed). 647 // The clflush instruction is addressed either (mfence needed). 648 // Probably the non-temporal load monvtdqa instruction should also be 649 // addressed. 650 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 651 if (!__kmp_cpuinfo.initialized) { 652 __kmp_query_cpuid(&__kmp_cpuinfo); 653 } 654 if (!__kmp_cpuinfo.sse2) { 655 // CPU cannot execute SSE2 instructions. 656 } else { 657 #if KMP_COMPILER_ICC 658 _mm_mfence(); 659 #elif KMP_COMPILER_MSVC 660 MemoryBarrier(); 661 #else 662 __sync_synchronize(); 663 #endif // KMP_COMPILER_ICC 664 } 665 #endif // KMP_MIC 666 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 667 KMP_ARCH_RISCV64) 668 // Nothing to see here move along 669 #elif KMP_ARCH_PPC64 670 // Nothing needed here (we have a real MB above). 671 #else 672 #error Unknown or unsupported architecture 673 #endif 674 675 #if OMPT_SUPPORT && OMPT_OPTIONAL 676 if (ompt_enabled.ompt_callback_flush) { 677 ompt_callbacks.ompt_callback(ompt_callback_flush)( 678 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 679 } 680 #endif 681 } 682 683 /* -------------------------------------------------------------------------- */ 684 /*! 685 @ingroup SYNCHRONIZATION 686 @param loc source location information 687 @param global_tid thread id. 688 689 Execute a barrier. 690 */ 691 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 692 KMP_COUNT_BLOCK(OMP_BARRIER); 693 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 694 __kmp_assert_valid_gtid(global_tid); 695 696 if (!TCR_4(__kmp_init_parallel)) 697 __kmp_parallel_initialize(); 698 699 __kmp_resume_if_soft_paused(); 700 701 if (__kmp_env_consistency_check) { 702 if (loc == 0) { 703 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 704 } 705 __kmp_check_barrier(global_tid, ct_barrier, loc); 706 } 707 708 #if OMPT_SUPPORT 709 ompt_frame_t *ompt_frame; 710 if (ompt_enabled.enabled) { 711 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 712 if (ompt_frame->enter_frame.ptr == NULL) 713 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 714 OMPT_STORE_RETURN_ADDRESS(global_tid); 715 } 716 #endif 717 __kmp_threads[global_tid]->th.th_ident = loc; 718 // TODO: explicit barrier_wait_id: 719 // this function is called when 'barrier' directive is present or 720 // implicit barrier at the end of a worksharing construct. 721 // 1) better to add a per-thread barrier counter to a thread data structure 722 // 2) set to 0 when a new team is created 723 // 4) no sync is required 724 725 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 726 #if OMPT_SUPPORT && OMPT_OPTIONAL 727 if (ompt_enabled.enabled) { 728 ompt_frame->enter_frame = ompt_data_none; 729 } 730 #endif 731 } 732 733 /* The BARRIER for a MASTER section is always explicit */ 734 /*! 735 @ingroup WORK_SHARING 736 @param loc source location information. 737 @param global_tid global thread number . 738 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 739 */ 740 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 741 int status = 0; 742 743 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 744 __kmp_assert_valid_gtid(global_tid); 745 746 if (!TCR_4(__kmp_init_parallel)) 747 __kmp_parallel_initialize(); 748 749 __kmp_resume_if_soft_paused(); 750 751 if (KMP_MASTER_GTID(global_tid)) { 752 KMP_COUNT_BLOCK(OMP_MASTER); 753 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 754 status = 1; 755 } 756 757 #if OMPT_SUPPORT && OMPT_OPTIONAL 758 if (status) { 759 if (ompt_enabled.ompt_callback_master) { 760 kmp_info_t *this_thr = __kmp_threads[global_tid]; 761 kmp_team_t *team = this_thr->th.th_team; 762 763 int tid = __kmp_tid_from_gtid(global_tid); 764 ompt_callbacks.ompt_callback(ompt_callback_master)( 765 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 767 OMPT_GET_RETURN_ADDRESS(0)); 768 } 769 } 770 #endif 771 772 if (__kmp_env_consistency_check) { 773 #if KMP_USE_DYNAMIC_LOCK 774 if (status) 775 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 776 else 777 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 778 #else 779 if (status) 780 __kmp_push_sync(global_tid, ct_master, loc, NULL); 781 else 782 __kmp_check_sync(global_tid, ct_master, loc, NULL); 783 #endif 784 } 785 786 return status; 787 } 788 789 /*! 790 @ingroup WORK_SHARING 791 @param loc source location information. 792 @param global_tid global thread number . 793 794 Mark the end of a <tt>master</tt> region. This should only be called by the 795 thread that executes the <tt>master</tt> region. 796 */ 797 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 798 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 799 __kmp_assert_valid_gtid(global_tid); 800 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 801 KMP_POP_PARTITIONED_TIMER(); 802 803 #if OMPT_SUPPORT && OMPT_OPTIONAL 804 kmp_info_t *this_thr = __kmp_threads[global_tid]; 805 kmp_team_t *team = this_thr->th.th_team; 806 if (ompt_enabled.ompt_callback_master) { 807 int tid = __kmp_tid_from_gtid(global_tid); 808 ompt_callbacks.ompt_callback(ompt_callback_master)( 809 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 811 OMPT_GET_RETURN_ADDRESS(0)); 812 } 813 #endif 814 815 if (__kmp_env_consistency_check) { 816 if (KMP_MASTER_GTID(global_tid)) 817 __kmp_pop_sync(global_tid, ct_master, loc); 818 } 819 } 820 821 /*! 822 @ingroup WORK_SHARING 823 @param loc source location information. 824 @param gtid global thread number. 825 826 Start execution of an <tt>ordered</tt> construct. 827 */ 828 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 829 int cid = 0; 830 kmp_info_t *th; 831 KMP_DEBUG_ASSERT(__kmp_init_serial); 832 833 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 834 __kmp_assert_valid_gtid(gtid); 835 836 if (!TCR_4(__kmp_init_parallel)) 837 __kmp_parallel_initialize(); 838 839 __kmp_resume_if_soft_paused(); 840 841 #if USE_ITT_BUILD 842 __kmp_itt_ordered_prep(gtid); 843 // TODO: ordered_wait_id 844 #endif /* USE_ITT_BUILD */ 845 846 th = __kmp_threads[gtid]; 847 848 #if OMPT_SUPPORT && OMPT_OPTIONAL 849 kmp_team_t *team; 850 ompt_wait_id_t lck; 851 void *codeptr_ra; 852 if (ompt_enabled.enabled) { 853 OMPT_STORE_RETURN_ADDRESS(gtid); 854 team = __kmp_team_from_gtid(gtid); 855 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 856 /* OMPT state update */ 857 th->th.ompt_thread_info.wait_id = lck; 858 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 859 860 /* OMPT event callback */ 861 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 862 if (ompt_enabled.ompt_callback_mutex_acquire) { 863 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 864 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 865 codeptr_ra); 866 } 867 } 868 #endif 869 870 if (th->th.th_dispatch->th_deo_fcn != 0) 871 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 872 else 873 __kmp_parallel_deo(>id, &cid, loc); 874 875 #if OMPT_SUPPORT && OMPT_OPTIONAL 876 if (ompt_enabled.enabled) { 877 /* OMPT state update */ 878 th->th.ompt_thread_info.state = ompt_state_work_parallel; 879 th->th.ompt_thread_info.wait_id = 0; 880 881 /* OMPT event callback */ 882 if (ompt_enabled.ompt_callback_mutex_acquired) { 883 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 884 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 885 } 886 } 887 #endif 888 889 #if USE_ITT_BUILD 890 __kmp_itt_ordered_start(gtid); 891 #endif /* USE_ITT_BUILD */ 892 } 893 894 /*! 895 @ingroup WORK_SHARING 896 @param loc source location information. 897 @param gtid global thread number. 898 899 End execution of an <tt>ordered</tt> construct. 900 */ 901 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 902 int cid = 0; 903 kmp_info_t *th; 904 905 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 906 __kmp_assert_valid_gtid(gtid); 907 908 #if USE_ITT_BUILD 909 __kmp_itt_ordered_end(gtid); 910 // TODO: ordered_wait_id 911 #endif /* USE_ITT_BUILD */ 912 913 th = __kmp_threads[gtid]; 914 915 if (th->th.th_dispatch->th_dxo_fcn != 0) 916 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 917 else 918 __kmp_parallel_dxo(>id, &cid, loc); 919 920 #if OMPT_SUPPORT && OMPT_OPTIONAL 921 OMPT_STORE_RETURN_ADDRESS(gtid); 922 if (ompt_enabled.ompt_callback_mutex_released) { 923 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 924 ompt_mutex_ordered, 925 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 926 ->t.t_ordered.dt.t_value, 927 OMPT_LOAD_RETURN_ADDRESS(gtid)); 928 } 929 #endif 930 } 931 932 #if KMP_USE_DYNAMIC_LOCK 933 934 static __forceinline void 935 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 936 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 937 // Pointer to the allocated indirect lock is written to crit, while indexing 938 // is ignored. 939 void *idx; 940 kmp_indirect_lock_t **lck; 941 lck = (kmp_indirect_lock_t **)crit; 942 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 943 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 944 KMP_SET_I_LOCK_LOCATION(ilk, loc); 945 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 946 KA_TRACE(20, 947 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 948 #if USE_ITT_BUILD 949 __kmp_itt_critical_creating(ilk->lock, loc); 950 #endif 951 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 952 if (status == 0) { 953 #if USE_ITT_BUILD 954 __kmp_itt_critical_destroyed(ilk->lock); 955 #endif 956 // We don't really need to destroy the unclaimed lock here since it will be 957 // cleaned up at program exit. 958 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 959 } 960 KMP_DEBUG_ASSERT(*lck != NULL); 961 } 962 963 // Fast-path acquire tas lock 964 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 965 { \ 966 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 967 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 968 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 969 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 970 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 971 kmp_uint32 spins; \ 972 KMP_FSYNC_PREPARE(l); \ 973 KMP_INIT_YIELD(spins); \ 974 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 975 do { \ 976 if (TCR_4(__kmp_nth) > \ 977 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 978 KMP_YIELD(TRUE); \ 979 } else { \ 980 KMP_YIELD_SPIN(spins); \ 981 } \ 982 __kmp_spin_backoff(&backoff); \ 983 } while ( \ 984 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 985 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 986 } \ 987 KMP_FSYNC_ACQUIRED(l); \ 988 } 989 990 // Fast-path test tas lock 991 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 992 { \ 993 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 994 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 995 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 996 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 997 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 998 } 999 1000 // Fast-path release tas lock 1001 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1002 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1003 1004 #if KMP_USE_FUTEX 1005 1006 #include <sys/syscall.h> 1007 #include <unistd.h> 1008 #ifndef FUTEX_WAIT 1009 #define FUTEX_WAIT 0 1010 #endif 1011 #ifndef FUTEX_WAKE 1012 #define FUTEX_WAKE 1 1013 #endif 1014 1015 // Fast-path acquire futex lock 1016 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1017 { \ 1018 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1019 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1020 KMP_MB(); \ 1021 KMP_FSYNC_PREPARE(ftx); \ 1022 kmp_int32 poll_val; \ 1023 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1024 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1025 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1026 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1027 if (!cond) { \ 1028 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1029 poll_val | \ 1030 KMP_LOCK_BUSY(1, futex))) { \ 1031 continue; \ 1032 } \ 1033 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1034 } \ 1035 kmp_int32 rc; \ 1036 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1037 NULL, NULL, 0)) != 0) { \ 1038 continue; \ 1039 } \ 1040 gtid_code |= 1; \ 1041 } \ 1042 KMP_FSYNC_ACQUIRED(ftx); \ 1043 } 1044 1045 // Fast-path test futex lock 1046 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1047 { \ 1048 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1049 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1050 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1051 KMP_FSYNC_ACQUIRED(ftx); \ 1052 rc = TRUE; \ 1053 } else { \ 1054 rc = FALSE; \ 1055 } \ 1056 } 1057 1058 // Fast-path release futex lock 1059 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1060 { \ 1061 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1062 KMP_MB(); \ 1063 KMP_FSYNC_RELEASING(ftx); \ 1064 kmp_int32 poll_val = \ 1065 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1066 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1067 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1068 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1069 } \ 1070 KMP_MB(); \ 1071 KMP_YIELD_OVERSUB(); \ 1072 } 1073 1074 #endif // KMP_USE_FUTEX 1075 1076 #else // KMP_USE_DYNAMIC_LOCK 1077 1078 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1079 ident_t const *loc, 1080 kmp_int32 gtid) { 1081 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1082 1083 // Because of the double-check, the following load doesn't need to be volatile 1084 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1085 1086 if (lck == NULL) { 1087 void *idx; 1088 1089 // Allocate & initialize the lock. 1090 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1091 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1092 __kmp_init_user_lock_with_checks(lck); 1093 __kmp_set_user_lock_location(lck, loc); 1094 #if USE_ITT_BUILD 1095 __kmp_itt_critical_creating(lck); 1096 // __kmp_itt_critical_creating() should be called *before* the first usage 1097 // of underlying lock. It is the only place where we can guarantee it. There 1098 // are chances the lock will destroyed with no usage, but it is not a 1099 // problem, because this is not real event seen by user but rather setting 1100 // name for object (lock). See more details in kmp_itt.h. 1101 #endif /* USE_ITT_BUILD */ 1102 1103 // Use a cmpxchg instruction to slam the start of the critical section with 1104 // the lock pointer. If another thread beat us to it, deallocate the lock, 1105 // and use the lock that the other thread allocated. 1106 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1107 1108 if (status == 0) { 1109 // Deallocate the lock and reload the value. 1110 #if USE_ITT_BUILD 1111 __kmp_itt_critical_destroyed(lck); 1112 // Let ITT know the lock is destroyed and the same memory location may be reused 1113 // for another purpose. 1114 #endif /* USE_ITT_BUILD */ 1115 __kmp_destroy_user_lock_with_checks(lck); 1116 __kmp_user_lock_free(&idx, gtid, lck); 1117 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1118 KMP_DEBUG_ASSERT(lck != NULL); 1119 } 1120 } 1121 return lck; 1122 } 1123 1124 #endif // KMP_USE_DYNAMIC_LOCK 1125 1126 /*! 1127 @ingroup WORK_SHARING 1128 @param loc source location information. 1129 @param global_tid global thread number. 1130 @param crit identity of the critical section. This could be a pointer to a lock 1131 associated with the critical section, or some other suitably unique value. 1132 1133 Enter code protected by a `critical` construct. 1134 This function blocks until the executing thread can enter the critical section. 1135 */ 1136 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1137 kmp_critical_name *crit) { 1138 #if KMP_USE_DYNAMIC_LOCK 1139 #if OMPT_SUPPORT && OMPT_OPTIONAL 1140 OMPT_STORE_RETURN_ADDRESS(global_tid); 1141 #endif // OMPT_SUPPORT 1142 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1143 #else 1144 KMP_COUNT_BLOCK(OMP_CRITICAL); 1145 #if OMPT_SUPPORT && OMPT_OPTIONAL 1146 ompt_state_t prev_state = ompt_state_undefined; 1147 ompt_thread_info_t ti; 1148 #endif 1149 kmp_user_lock_p lck; 1150 1151 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1152 __kmp_assert_valid_gtid(global_tid); 1153 1154 // TODO: add THR_OVHD_STATE 1155 1156 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1157 KMP_CHECK_USER_LOCK_INIT(); 1158 1159 if ((__kmp_user_lock_kind == lk_tas) && 1160 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1161 lck = (kmp_user_lock_p)crit; 1162 } 1163 #if KMP_USE_FUTEX 1164 else if ((__kmp_user_lock_kind == lk_futex) && 1165 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1166 lck = (kmp_user_lock_p)crit; 1167 } 1168 #endif 1169 else { // ticket, queuing or drdpa 1170 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1171 } 1172 1173 if (__kmp_env_consistency_check) 1174 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1175 1176 // since the critical directive binds to all threads, not just the current 1177 // team we have to check this even if we are in a serialized team. 1178 // also, even if we are the uber thread, we still have to conduct the lock, 1179 // as we have to contend with sibling threads. 1180 1181 #if USE_ITT_BUILD 1182 __kmp_itt_critical_acquiring(lck); 1183 #endif /* USE_ITT_BUILD */ 1184 #if OMPT_SUPPORT && OMPT_OPTIONAL 1185 OMPT_STORE_RETURN_ADDRESS(gtid); 1186 void *codeptr_ra = NULL; 1187 if (ompt_enabled.enabled) { 1188 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1189 /* OMPT state update */ 1190 prev_state = ti.state; 1191 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1192 ti.state = ompt_state_wait_critical; 1193 1194 /* OMPT event callback */ 1195 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1196 if (ompt_enabled.ompt_callback_mutex_acquire) { 1197 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1198 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1199 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1200 } 1201 } 1202 #endif 1203 // Value of 'crit' should be good for using as a critical_id of the critical 1204 // section directive. 1205 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1206 1207 #if USE_ITT_BUILD 1208 __kmp_itt_critical_acquired(lck); 1209 #endif /* USE_ITT_BUILD */ 1210 #if OMPT_SUPPORT && OMPT_OPTIONAL 1211 if (ompt_enabled.enabled) { 1212 /* OMPT state update */ 1213 ti.state = prev_state; 1214 ti.wait_id = 0; 1215 1216 /* OMPT event callback */ 1217 if (ompt_enabled.ompt_callback_mutex_acquired) { 1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1219 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1220 } 1221 } 1222 #endif 1223 KMP_POP_PARTITIONED_TIMER(); 1224 1225 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1226 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1227 #endif // KMP_USE_DYNAMIC_LOCK 1228 } 1229 1230 #if KMP_USE_DYNAMIC_LOCK 1231 1232 // Converts the given hint to an internal lock implementation 1233 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1234 #if KMP_USE_TSX 1235 #define KMP_TSX_LOCK(seq) lockseq_##seq 1236 #else 1237 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1238 #endif 1239 1240 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1241 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1242 #else 1243 #define KMP_CPUINFO_RTM 0 1244 #endif 1245 1246 // Hints that do not require further logic 1247 if (hint & kmp_lock_hint_hle) 1248 return KMP_TSX_LOCK(hle); 1249 if (hint & kmp_lock_hint_rtm) 1250 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1251 if (hint & kmp_lock_hint_adaptive) 1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1253 1254 // Rule out conflicting hints first by returning the default lock 1255 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1256 return __kmp_user_lock_seq; 1257 if ((hint & omp_lock_hint_speculative) && 1258 (hint & omp_lock_hint_nonspeculative)) 1259 return __kmp_user_lock_seq; 1260 1261 // Do not even consider speculation when it appears to be contended 1262 if (hint & omp_lock_hint_contended) 1263 return lockseq_queuing; 1264 1265 // Uncontended lock without speculation 1266 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1267 return lockseq_tas; 1268 1269 // HLE lock for speculation 1270 if (hint & omp_lock_hint_speculative) 1271 return KMP_TSX_LOCK(hle); 1272 1273 return __kmp_user_lock_seq; 1274 } 1275 1276 #if OMPT_SUPPORT && OMPT_OPTIONAL 1277 #if KMP_USE_DYNAMIC_LOCK 1278 static kmp_mutex_impl_t 1279 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1280 if (user_lock) { 1281 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1282 case 0: 1283 break; 1284 #if KMP_USE_FUTEX 1285 case locktag_futex: 1286 return kmp_mutex_impl_queuing; 1287 #endif 1288 case locktag_tas: 1289 return kmp_mutex_impl_spin; 1290 #if KMP_USE_TSX 1291 case locktag_hle: 1292 return kmp_mutex_impl_speculative; 1293 #endif 1294 default: 1295 return kmp_mutex_impl_none; 1296 } 1297 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1298 } 1299 KMP_ASSERT(ilock); 1300 switch (ilock->type) { 1301 #if KMP_USE_TSX 1302 case locktag_adaptive: 1303 case locktag_rtm: 1304 return kmp_mutex_impl_speculative; 1305 #endif 1306 case locktag_nested_tas: 1307 return kmp_mutex_impl_spin; 1308 #if KMP_USE_FUTEX 1309 case locktag_nested_futex: 1310 #endif 1311 case locktag_ticket: 1312 case locktag_queuing: 1313 case locktag_drdpa: 1314 case locktag_nested_ticket: 1315 case locktag_nested_queuing: 1316 case locktag_nested_drdpa: 1317 return kmp_mutex_impl_queuing; 1318 default: 1319 return kmp_mutex_impl_none; 1320 } 1321 } 1322 #else 1323 // For locks without dynamic binding 1324 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1325 switch (__kmp_user_lock_kind) { 1326 case lk_tas: 1327 return kmp_mutex_impl_spin; 1328 #if KMP_USE_FUTEX 1329 case lk_futex: 1330 #endif 1331 case lk_ticket: 1332 case lk_queuing: 1333 case lk_drdpa: 1334 return kmp_mutex_impl_queuing; 1335 #if KMP_USE_TSX 1336 case lk_hle: 1337 case lk_rtm: 1338 case lk_adaptive: 1339 return kmp_mutex_impl_speculative; 1340 #endif 1341 default: 1342 return kmp_mutex_impl_none; 1343 } 1344 } 1345 #endif // KMP_USE_DYNAMIC_LOCK 1346 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1347 1348 /*! 1349 @ingroup WORK_SHARING 1350 @param loc source location information. 1351 @param global_tid global thread number. 1352 @param crit identity of the critical section. This could be a pointer to a lock 1353 associated with the critical section, or some other suitably unique value. 1354 @param hint the lock hint. 1355 1356 Enter code protected by a `critical` construct with a hint. The hint value is 1357 used to suggest a lock implementation. This function blocks until the executing 1358 thread can enter the critical section unless the hint suggests use of 1359 speculative execution and the hardware supports it. 1360 */ 1361 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1362 kmp_critical_name *crit, uint32_t hint) { 1363 KMP_COUNT_BLOCK(OMP_CRITICAL); 1364 kmp_user_lock_p lck; 1365 #if OMPT_SUPPORT && OMPT_OPTIONAL 1366 ompt_state_t prev_state = ompt_state_undefined; 1367 ompt_thread_info_t ti; 1368 // This is the case, if called from __kmpc_critical: 1369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1370 if (!codeptr) 1371 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1372 #endif 1373 1374 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1375 __kmp_assert_valid_gtid(global_tid); 1376 1377 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1378 // Check if it is initialized. 1379 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1380 if (*lk == 0) { 1381 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1382 if (KMP_IS_D_LOCK(lckseq)) { 1383 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1384 KMP_GET_D_TAG(lckseq)); 1385 } else { 1386 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1387 } 1388 } 1389 // Branch for accessing the actual lock object and set operation. This 1390 // branching is inevitable since this lock initialization does not follow the 1391 // normal dispatch path (lock table is not used). 1392 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1393 lck = (kmp_user_lock_p)lk; 1394 if (__kmp_env_consistency_check) { 1395 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1396 __kmp_map_hint_to_lock(hint)); 1397 } 1398 #if USE_ITT_BUILD 1399 __kmp_itt_critical_acquiring(lck); 1400 #endif 1401 #if OMPT_SUPPORT && OMPT_OPTIONAL 1402 if (ompt_enabled.enabled) { 1403 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1404 /* OMPT state update */ 1405 prev_state = ti.state; 1406 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1407 ti.state = ompt_state_wait_critical; 1408 1409 /* OMPT event callback */ 1410 if (ompt_enabled.ompt_callback_mutex_acquire) { 1411 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1412 ompt_mutex_critical, (unsigned int)hint, 1413 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1414 codeptr); 1415 } 1416 } 1417 #endif 1418 #if KMP_USE_INLINED_TAS 1419 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1420 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1421 } else 1422 #elif KMP_USE_INLINED_FUTEX 1423 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1424 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1425 } else 1426 #endif 1427 { 1428 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1429 } 1430 } else { 1431 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1432 lck = ilk->lock; 1433 if (__kmp_env_consistency_check) { 1434 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1435 __kmp_map_hint_to_lock(hint)); 1436 } 1437 #if USE_ITT_BUILD 1438 __kmp_itt_critical_acquiring(lck); 1439 #endif 1440 #if OMPT_SUPPORT && OMPT_OPTIONAL 1441 if (ompt_enabled.enabled) { 1442 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1443 /* OMPT state update */ 1444 prev_state = ti.state; 1445 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1446 ti.state = ompt_state_wait_critical; 1447 1448 /* OMPT event callback */ 1449 if (ompt_enabled.ompt_callback_mutex_acquire) { 1450 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1451 ompt_mutex_critical, (unsigned int)hint, 1452 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1453 codeptr); 1454 } 1455 } 1456 #endif 1457 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1458 } 1459 KMP_POP_PARTITIONED_TIMER(); 1460 1461 #if USE_ITT_BUILD 1462 __kmp_itt_critical_acquired(lck); 1463 #endif /* USE_ITT_BUILD */ 1464 #if OMPT_SUPPORT && OMPT_OPTIONAL 1465 if (ompt_enabled.enabled) { 1466 /* OMPT state update */ 1467 ti.state = prev_state; 1468 ti.wait_id = 0; 1469 1470 /* OMPT event callback */ 1471 if (ompt_enabled.ompt_callback_mutex_acquired) { 1472 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1473 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1474 } 1475 } 1476 #endif 1477 1478 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1479 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1480 } // __kmpc_critical_with_hint 1481 1482 #endif // KMP_USE_DYNAMIC_LOCK 1483 1484 /*! 1485 @ingroup WORK_SHARING 1486 @param loc source location information. 1487 @param global_tid global thread number . 1488 @param crit identity of the critical section. This could be a pointer to a lock 1489 associated with the critical section, or some other suitably unique value. 1490 1491 Leave a critical section, releasing any lock that was held during its execution. 1492 */ 1493 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1494 kmp_critical_name *crit) { 1495 kmp_user_lock_p lck; 1496 1497 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1498 1499 #if KMP_USE_DYNAMIC_LOCK 1500 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1501 lck = (kmp_user_lock_p)crit; 1502 KMP_ASSERT(lck != NULL); 1503 if (__kmp_env_consistency_check) { 1504 __kmp_pop_sync(global_tid, ct_critical, loc); 1505 } 1506 #if USE_ITT_BUILD 1507 __kmp_itt_critical_releasing(lck); 1508 #endif 1509 #if KMP_USE_INLINED_TAS 1510 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1511 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1512 } else 1513 #elif KMP_USE_INLINED_FUTEX 1514 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1515 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1516 } else 1517 #endif 1518 { 1519 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1520 } 1521 } else { 1522 kmp_indirect_lock_t *ilk = 1523 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1524 KMP_ASSERT(ilk != NULL); 1525 lck = ilk->lock; 1526 if (__kmp_env_consistency_check) { 1527 __kmp_pop_sync(global_tid, ct_critical, loc); 1528 } 1529 #if USE_ITT_BUILD 1530 __kmp_itt_critical_releasing(lck); 1531 #endif 1532 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1533 } 1534 1535 #else // KMP_USE_DYNAMIC_LOCK 1536 1537 if ((__kmp_user_lock_kind == lk_tas) && 1538 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1539 lck = (kmp_user_lock_p)crit; 1540 } 1541 #if KMP_USE_FUTEX 1542 else if ((__kmp_user_lock_kind == lk_futex) && 1543 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1544 lck = (kmp_user_lock_p)crit; 1545 } 1546 #endif 1547 else { // ticket, queuing or drdpa 1548 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1549 } 1550 1551 KMP_ASSERT(lck != NULL); 1552 1553 if (__kmp_env_consistency_check) 1554 __kmp_pop_sync(global_tid, ct_critical, loc); 1555 1556 #if USE_ITT_BUILD 1557 __kmp_itt_critical_releasing(lck); 1558 #endif /* USE_ITT_BUILD */ 1559 // Value of 'crit' should be good for using as a critical_id of the critical 1560 // section directive. 1561 __kmp_release_user_lock_with_checks(lck, global_tid); 1562 1563 #endif // KMP_USE_DYNAMIC_LOCK 1564 1565 #if OMPT_SUPPORT && OMPT_OPTIONAL 1566 /* OMPT release event triggers after lock is released; place here to trigger 1567 * for all #if branches */ 1568 OMPT_STORE_RETURN_ADDRESS(global_tid); 1569 if (ompt_enabled.ompt_callback_mutex_released) { 1570 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1571 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1572 OMPT_LOAD_RETURN_ADDRESS(0)); 1573 } 1574 #endif 1575 1576 KMP_POP_PARTITIONED_TIMER(); 1577 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1578 } 1579 1580 /*! 1581 @ingroup SYNCHRONIZATION 1582 @param loc source location information 1583 @param global_tid thread id. 1584 @return one if the thread should execute the master block, zero otherwise 1585 1586 Start execution of a combined barrier and master. The barrier is executed inside 1587 this function. 1588 */ 1589 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1590 int status; 1591 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1592 __kmp_assert_valid_gtid(global_tid); 1593 1594 if (!TCR_4(__kmp_init_parallel)) 1595 __kmp_parallel_initialize(); 1596 1597 __kmp_resume_if_soft_paused(); 1598 1599 if (__kmp_env_consistency_check) 1600 __kmp_check_barrier(global_tid, ct_barrier, loc); 1601 1602 #if OMPT_SUPPORT 1603 ompt_frame_t *ompt_frame; 1604 if (ompt_enabled.enabled) { 1605 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1606 if (ompt_frame->enter_frame.ptr == NULL) 1607 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1608 OMPT_STORE_RETURN_ADDRESS(global_tid); 1609 } 1610 #endif 1611 #if USE_ITT_NOTIFY 1612 __kmp_threads[global_tid]->th.th_ident = loc; 1613 #endif 1614 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1615 #if OMPT_SUPPORT && OMPT_OPTIONAL 1616 if (ompt_enabled.enabled) { 1617 ompt_frame->enter_frame = ompt_data_none; 1618 } 1619 #endif 1620 1621 return (status != 0) ? 0 : 1; 1622 } 1623 1624 /*! 1625 @ingroup SYNCHRONIZATION 1626 @param loc source location information 1627 @param global_tid thread id. 1628 1629 Complete the execution of a combined barrier and master. This function should 1630 only be called at the completion of the <tt>master</tt> code. Other threads will 1631 still be waiting at the barrier and this call releases them. 1632 */ 1633 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1634 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1635 __kmp_assert_valid_gtid(global_tid); 1636 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1637 } 1638 1639 /*! 1640 @ingroup SYNCHRONIZATION 1641 @param loc source location information 1642 @param global_tid thread id. 1643 @return one if the thread should execute the master block, zero otherwise 1644 1645 Start execution of a combined barrier and master(nowait) construct. 1646 The barrier is executed inside this function. 1647 There is no equivalent "end" function, since the 1648 */ 1649 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1650 kmp_int32 ret; 1651 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1652 __kmp_assert_valid_gtid(global_tid); 1653 1654 if (!TCR_4(__kmp_init_parallel)) 1655 __kmp_parallel_initialize(); 1656 1657 __kmp_resume_if_soft_paused(); 1658 1659 if (__kmp_env_consistency_check) { 1660 if (loc == 0) { 1661 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1662 } 1663 __kmp_check_barrier(global_tid, ct_barrier, loc); 1664 } 1665 1666 #if OMPT_SUPPORT 1667 ompt_frame_t *ompt_frame; 1668 if (ompt_enabled.enabled) { 1669 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1670 if (ompt_frame->enter_frame.ptr == NULL) 1671 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1672 OMPT_STORE_RETURN_ADDRESS(global_tid); 1673 } 1674 #endif 1675 #if USE_ITT_NOTIFY 1676 __kmp_threads[global_tid]->th.th_ident = loc; 1677 #endif 1678 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1679 #if OMPT_SUPPORT && OMPT_OPTIONAL 1680 if (ompt_enabled.enabled) { 1681 ompt_frame->enter_frame = ompt_data_none; 1682 } 1683 #endif 1684 1685 ret = __kmpc_master(loc, global_tid); 1686 1687 if (__kmp_env_consistency_check) { 1688 /* there's no __kmpc_end_master called; so the (stats) */ 1689 /* actions of __kmpc_end_master are done here */ 1690 if (ret) { 1691 /* only one thread should do the pop since only */ 1692 /* one did the push (see __kmpc_master()) */ 1693 __kmp_pop_sync(global_tid, ct_master, loc); 1694 } 1695 } 1696 1697 return (ret); 1698 } 1699 1700 /* The BARRIER for a SINGLE process section is always explicit */ 1701 /*! 1702 @ingroup WORK_SHARING 1703 @param loc source location information 1704 @param global_tid global thread number 1705 @return One if this thread should execute the single construct, zero otherwise. 1706 1707 Test whether to execute a <tt>single</tt> construct. 1708 There are no implicit barriers in the two "single" calls, rather the compiler 1709 should introduce an explicit barrier if it is required. 1710 */ 1711 1712 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1713 __kmp_assert_valid_gtid(global_tid); 1714 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1715 1716 if (rc) { 1717 // We are going to execute the single statement, so we should count it. 1718 KMP_COUNT_BLOCK(OMP_SINGLE); 1719 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1720 } 1721 1722 #if OMPT_SUPPORT && OMPT_OPTIONAL 1723 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1724 kmp_team_t *team = this_thr->th.th_team; 1725 int tid = __kmp_tid_from_gtid(global_tid); 1726 1727 if (ompt_enabled.enabled) { 1728 if (rc) { 1729 if (ompt_enabled.ompt_callback_work) { 1730 ompt_callbacks.ompt_callback(ompt_callback_work)( 1731 ompt_work_single_executor, ompt_scope_begin, 1732 &(team->t.ompt_team_info.parallel_data), 1733 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1734 1, OMPT_GET_RETURN_ADDRESS(0)); 1735 } 1736 } else { 1737 if (ompt_enabled.ompt_callback_work) { 1738 ompt_callbacks.ompt_callback(ompt_callback_work)( 1739 ompt_work_single_other, ompt_scope_begin, 1740 &(team->t.ompt_team_info.parallel_data), 1741 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1742 1, OMPT_GET_RETURN_ADDRESS(0)); 1743 ompt_callbacks.ompt_callback(ompt_callback_work)( 1744 ompt_work_single_other, ompt_scope_end, 1745 &(team->t.ompt_team_info.parallel_data), 1746 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1747 1, OMPT_GET_RETURN_ADDRESS(0)); 1748 } 1749 } 1750 } 1751 #endif 1752 1753 return rc; 1754 } 1755 1756 /*! 1757 @ingroup WORK_SHARING 1758 @param loc source location information 1759 @param global_tid global thread number 1760 1761 Mark the end of a <tt>single</tt> construct. This function should 1762 only be called by the thread that executed the block of code protected 1763 by the `single` construct. 1764 */ 1765 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1766 __kmp_assert_valid_gtid(global_tid); 1767 __kmp_exit_single(global_tid); 1768 KMP_POP_PARTITIONED_TIMER(); 1769 1770 #if OMPT_SUPPORT && OMPT_OPTIONAL 1771 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1772 kmp_team_t *team = this_thr->th.th_team; 1773 int tid = __kmp_tid_from_gtid(global_tid); 1774 1775 if (ompt_enabled.ompt_callback_work) { 1776 ompt_callbacks.ompt_callback(ompt_callback_work)( 1777 ompt_work_single_executor, ompt_scope_end, 1778 &(team->t.ompt_team_info.parallel_data), 1779 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1780 OMPT_GET_RETURN_ADDRESS(0)); 1781 } 1782 #endif 1783 } 1784 1785 /*! 1786 @ingroup WORK_SHARING 1787 @param loc Source location 1788 @param global_tid Global thread id 1789 1790 Mark the end of a statically scheduled loop. 1791 */ 1792 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1793 KMP_POP_PARTITIONED_TIMER(); 1794 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1795 1796 #if OMPT_SUPPORT && OMPT_OPTIONAL 1797 if (ompt_enabled.ompt_callback_work) { 1798 ompt_work_t ompt_work_type = ompt_work_loop; 1799 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1800 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1801 // Determine workshare type 1802 if (loc != NULL) { 1803 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1804 ompt_work_type = ompt_work_loop; 1805 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1806 ompt_work_type = ompt_work_sections; 1807 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1808 ompt_work_type = ompt_work_distribute; 1809 } else { 1810 // use default set above. 1811 // a warning about this case is provided in __kmpc_for_static_init 1812 } 1813 KMP_DEBUG_ASSERT(ompt_work_type); 1814 } 1815 ompt_callbacks.ompt_callback(ompt_callback_work)( 1816 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1817 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1818 } 1819 #endif 1820 if (__kmp_env_consistency_check) 1821 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1822 } 1823 1824 // User routines which take C-style arguments (call by value) 1825 // different from the Fortran equivalent routines 1826 1827 void ompc_set_num_threads(int arg) { 1828 // !!!!! TODO: check the per-task binding 1829 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1830 } 1831 1832 void ompc_set_dynamic(int flag) { 1833 kmp_info_t *thread; 1834 1835 /* For the thread-private implementation of the internal controls */ 1836 thread = __kmp_entry_thread(); 1837 1838 __kmp_save_internal_controls(thread); 1839 1840 set__dynamic(thread, flag ? TRUE : FALSE); 1841 } 1842 1843 void ompc_set_nested(int flag) { 1844 kmp_info_t *thread; 1845 1846 /* For the thread-private internal controls implementation */ 1847 thread = __kmp_entry_thread(); 1848 1849 __kmp_save_internal_controls(thread); 1850 1851 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1852 } 1853 1854 void ompc_set_max_active_levels(int max_active_levels) { 1855 /* TO DO */ 1856 /* we want per-task implementation of this internal control */ 1857 1858 /* For the per-thread internal controls implementation */ 1859 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1860 } 1861 1862 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1863 // !!!!! TODO: check the per-task binding 1864 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1865 } 1866 1867 int ompc_get_ancestor_thread_num(int level) { 1868 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1869 } 1870 1871 int ompc_get_team_size(int level) { 1872 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1873 } 1874 1875 /* OpenMP 5.0 Affinity Format API */ 1876 1877 void ompc_set_affinity_format(char const *format) { 1878 if (!__kmp_init_serial) { 1879 __kmp_serial_initialize(); 1880 } 1881 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1882 format, KMP_STRLEN(format) + 1); 1883 } 1884 1885 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1886 size_t format_size; 1887 if (!__kmp_init_serial) { 1888 __kmp_serial_initialize(); 1889 } 1890 format_size = KMP_STRLEN(__kmp_affinity_format); 1891 if (buffer && size) { 1892 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1893 format_size + 1); 1894 } 1895 return format_size; 1896 } 1897 1898 void ompc_display_affinity(char const *format) { 1899 int gtid; 1900 if (!TCR_4(__kmp_init_middle)) { 1901 __kmp_middle_initialize(); 1902 } 1903 gtid = __kmp_get_gtid(); 1904 __kmp_aux_display_affinity(gtid, format); 1905 } 1906 1907 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1908 char const *format) { 1909 int gtid; 1910 size_t num_required; 1911 kmp_str_buf_t capture_buf; 1912 if (!TCR_4(__kmp_init_middle)) { 1913 __kmp_middle_initialize(); 1914 } 1915 gtid = __kmp_get_gtid(); 1916 __kmp_str_buf_init(&capture_buf); 1917 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1918 if (buffer && buf_size) { 1919 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1920 capture_buf.used + 1); 1921 } 1922 __kmp_str_buf_free(&capture_buf); 1923 return num_required; 1924 } 1925 1926 void kmpc_set_stacksize(int arg) { 1927 // __kmp_aux_set_stacksize initializes the library if needed 1928 __kmp_aux_set_stacksize(arg); 1929 } 1930 1931 void kmpc_set_stacksize_s(size_t arg) { 1932 // __kmp_aux_set_stacksize initializes the library if needed 1933 __kmp_aux_set_stacksize(arg); 1934 } 1935 1936 void kmpc_set_blocktime(int arg) { 1937 int gtid, tid; 1938 kmp_info_t *thread; 1939 1940 gtid = __kmp_entry_gtid(); 1941 tid = __kmp_tid_from_gtid(gtid); 1942 thread = __kmp_thread_from_gtid(gtid); 1943 1944 __kmp_aux_set_blocktime(arg, thread, tid); 1945 } 1946 1947 void kmpc_set_library(int arg) { 1948 // __kmp_user_set_library initializes the library if needed 1949 __kmp_user_set_library((enum library_type)arg); 1950 } 1951 1952 void kmpc_set_defaults(char const *str) { 1953 // __kmp_aux_set_defaults initializes the library if needed 1954 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1955 } 1956 1957 void kmpc_set_disp_num_buffers(int arg) { 1958 // ignore after initialization because some teams have already 1959 // allocated dispatch buffers 1960 if (__kmp_init_serial == 0 && arg > 0) 1961 __kmp_dispatch_num_buffers = arg; 1962 } 1963 1964 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1965 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1966 return -1; 1967 #else 1968 if (!TCR_4(__kmp_init_middle)) { 1969 __kmp_middle_initialize(); 1970 } 1971 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1972 #endif 1973 } 1974 1975 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 1976 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1977 return -1; 1978 #else 1979 if (!TCR_4(__kmp_init_middle)) { 1980 __kmp_middle_initialize(); 1981 } 1982 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 1983 #endif 1984 } 1985 1986 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 1987 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1988 return -1; 1989 #else 1990 if (!TCR_4(__kmp_init_middle)) { 1991 __kmp_middle_initialize(); 1992 } 1993 return __kmp_aux_get_affinity_mask_proc(proc, mask); 1994 #endif 1995 } 1996 1997 /* -------------------------------------------------------------------------- */ 1998 /*! 1999 @ingroup THREADPRIVATE 2000 @param loc source location information 2001 @param gtid global thread number 2002 @param cpy_size size of the cpy_data buffer 2003 @param cpy_data pointer to data to be copied 2004 @param cpy_func helper function to call for copying data 2005 @param didit flag variable: 1=single thread; 0=not single thread 2006 2007 __kmpc_copyprivate implements the interface for the private data broadcast 2008 needed for the copyprivate clause associated with a single region in an 2009 OpenMP<sup>*</sup> program (both C and Fortran). 2010 All threads participating in the parallel region call this routine. 2011 One of the threads (called the single thread) should have the <tt>didit</tt> 2012 variable set to 1 and all other threads should have that variable set to 0. 2013 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2014 2015 The OpenMP specification forbids the use of nowait on the single region when a 2016 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2017 barrier internally to avoid race conditions, so the code generation for the 2018 single region should avoid generating a barrier after the call to @ref 2019 __kmpc_copyprivate. 2020 2021 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2022 The <tt>loc</tt> parameter is a pointer to source location information. 2023 2024 Internal implementation: The single thread will first copy its descriptor 2025 address (cpy_data) to a team-private location, then the other threads will each 2026 call the function pointed to by the parameter cpy_func, which carries out the 2027 copy by copying the data using the cpy_data buffer. 2028 2029 The cpy_func routine used for the copy and the contents of the data area defined 2030 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2031 to be done. For instance, the cpy_data buffer can hold the actual data to be 2032 copied or it may hold a list of pointers to the data. The cpy_func routine must 2033 interpret the cpy_data buffer appropriately. 2034 2035 The interface to cpy_func is as follows: 2036 @code 2037 void cpy_func( void *destination, void *source ) 2038 @endcode 2039 where void *destination is the cpy_data pointer for the thread being copied to 2040 and void *source is the cpy_data pointer for the thread being copied from. 2041 */ 2042 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2043 void *cpy_data, void (*cpy_func)(void *, void *), 2044 kmp_int32 didit) { 2045 void **data_ptr; 2046 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2047 __kmp_assert_valid_gtid(gtid); 2048 2049 KMP_MB(); 2050 2051 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2052 2053 if (__kmp_env_consistency_check) { 2054 if (loc == 0) { 2055 KMP_WARNING(ConstructIdentInvalid); 2056 } 2057 } 2058 2059 // ToDo: Optimize the following two barriers into some kind of split barrier 2060 2061 if (didit) 2062 *data_ptr = cpy_data; 2063 2064 #if OMPT_SUPPORT 2065 ompt_frame_t *ompt_frame; 2066 if (ompt_enabled.enabled) { 2067 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2068 if (ompt_frame->enter_frame.ptr == NULL) 2069 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2070 OMPT_STORE_RETURN_ADDRESS(gtid); 2071 } 2072 #endif 2073 /* This barrier is not a barrier region boundary */ 2074 #if USE_ITT_NOTIFY 2075 __kmp_threads[gtid]->th.th_ident = loc; 2076 #endif 2077 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2078 2079 if (!didit) 2080 (*cpy_func)(cpy_data, *data_ptr); 2081 2082 // Consider next barrier a user-visible barrier for barrier region boundaries 2083 // Nesting checks are already handled by the single construct checks 2084 2085 #if OMPT_SUPPORT 2086 if (ompt_enabled.enabled) { 2087 OMPT_STORE_RETURN_ADDRESS(gtid); 2088 } 2089 #endif 2090 #if USE_ITT_NOTIFY 2091 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2092 // tasks can overwrite the location) 2093 #endif 2094 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2095 #if OMPT_SUPPORT && OMPT_OPTIONAL 2096 if (ompt_enabled.enabled) { 2097 ompt_frame->enter_frame = ompt_data_none; 2098 } 2099 #endif 2100 } 2101 2102 /* -------------------------------------------------------------------------- */ 2103 2104 #define INIT_LOCK __kmp_init_user_lock_with_checks 2105 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2106 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2107 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2108 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2109 #define ACQUIRE_NESTED_LOCK_TIMED \ 2110 __kmp_acquire_nested_user_lock_with_checks_timed 2111 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2112 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2113 #define TEST_LOCK __kmp_test_user_lock_with_checks 2114 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2115 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2116 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2117 2118 // TODO: Make check abort messages use location info & pass it into 2119 // with_checks routines 2120 2121 #if KMP_USE_DYNAMIC_LOCK 2122 2123 // internal lock initializer 2124 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2125 kmp_dyna_lockseq_t seq) { 2126 if (KMP_IS_D_LOCK(seq)) { 2127 KMP_INIT_D_LOCK(lock, seq); 2128 #if USE_ITT_BUILD 2129 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2130 #endif 2131 } else { 2132 KMP_INIT_I_LOCK(lock, seq); 2133 #if USE_ITT_BUILD 2134 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2135 __kmp_itt_lock_creating(ilk->lock, loc); 2136 #endif 2137 } 2138 } 2139 2140 // internal nest lock initializer 2141 static __forceinline void 2142 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2143 kmp_dyna_lockseq_t seq) { 2144 #if KMP_USE_TSX 2145 // Don't have nested lock implementation for speculative locks 2146 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2147 seq = __kmp_user_lock_seq; 2148 #endif 2149 switch (seq) { 2150 case lockseq_tas: 2151 seq = lockseq_nested_tas; 2152 break; 2153 #if KMP_USE_FUTEX 2154 case lockseq_futex: 2155 seq = lockseq_nested_futex; 2156 break; 2157 #endif 2158 case lockseq_ticket: 2159 seq = lockseq_nested_ticket; 2160 break; 2161 case lockseq_queuing: 2162 seq = lockseq_nested_queuing; 2163 break; 2164 case lockseq_drdpa: 2165 seq = lockseq_nested_drdpa; 2166 break; 2167 default: 2168 seq = lockseq_nested_queuing; 2169 } 2170 KMP_INIT_I_LOCK(lock, seq); 2171 #if USE_ITT_BUILD 2172 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2173 __kmp_itt_lock_creating(ilk->lock, loc); 2174 #endif 2175 } 2176 2177 /* initialize the lock with a hint */ 2178 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2179 uintptr_t hint) { 2180 KMP_DEBUG_ASSERT(__kmp_init_serial); 2181 if (__kmp_env_consistency_check && user_lock == NULL) { 2182 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2183 } 2184 2185 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2186 2187 #if OMPT_SUPPORT && OMPT_OPTIONAL 2188 // This is the case, if called from omp_init_lock_with_hint: 2189 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2190 if (!codeptr) 2191 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2192 if (ompt_enabled.ompt_callback_lock_init) { 2193 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2194 ompt_mutex_lock, (omp_lock_hint_t)hint, 2195 __ompt_get_mutex_impl_type(user_lock), 2196 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2197 } 2198 #endif 2199 } 2200 2201 /* initialize the lock with a hint */ 2202 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2203 void **user_lock, uintptr_t hint) { 2204 KMP_DEBUG_ASSERT(__kmp_init_serial); 2205 if (__kmp_env_consistency_check && user_lock == NULL) { 2206 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2207 } 2208 2209 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2210 2211 #if OMPT_SUPPORT && OMPT_OPTIONAL 2212 // This is the case, if called from omp_init_lock_with_hint: 2213 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2214 if (!codeptr) 2215 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2216 if (ompt_enabled.ompt_callback_lock_init) { 2217 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2218 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2219 __ompt_get_mutex_impl_type(user_lock), 2220 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2221 } 2222 #endif 2223 } 2224 2225 #endif // KMP_USE_DYNAMIC_LOCK 2226 2227 /* initialize the lock */ 2228 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2229 #if KMP_USE_DYNAMIC_LOCK 2230 2231 KMP_DEBUG_ASSERT(__kmp_init_serial); 2232 if (__kmp_env_consistency_check && user_lock == NULL) { 2233 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2234 } 2235 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2236 2237 #if OMPT_SUPPORT && OMPT_OPTIONAL 2238 // This is the case, if called from omp_init_lock_with_hint: 2239 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2240 if (!codeptr) 2241 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2242 if (ompt_enabled.ompt_callback_lock_init) { 2243 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2244 ompt_mutex_lock, omp_lock_hint_none, 2245 __ompt_get_mutex_impl_type(user_lock), 2246 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2247 } 2248 #endif 2249 2250 #else // KMP_USE_DYNAMIC_LOCK 2251 2252 static char const *const func = "omp_init_lock"; 2253 kmp_user_lock_p lck; 2254 KMP_DEBUG_ASSERT(__kmp_init_serial); 2255 2256 if (__kmp_env_consistency_check) { 2257 if (user_lock == NULL) { 2258 KMP_FATAL(LockIsUninitialized, func); 2259 } 2260 } 2261 2262 KMP_CHECK_USER_LOCK_INIT(); 2263 2264 if ((__kmp_user_lock_kind == lk_tas) && 2265 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2266 lck = (kmp_user_lock_p)user_lock; 2267 } 2268 #if KMP_USE_FUTEX 2269 else if ((__kmp_user_lock_kind == lk_futex) && 2270 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2271 lck = (kmp_user_lock_p)user_lock; 2272 } 2273 #endif 2274 else { 2275 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2276 } 2277 INIT_LOCK(lck); 2278 __kmp_set_user_lock_location(lck, loc); 2279 2280 #if OMPT_SUPPORT && OMPT_OPTIONAL 2281 // This is the case, if called from omp_init_lock_with_hint: 2282 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2283 if (!codeptr) 2284 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2285 if (ompt_enabled.ompt_callback_lock_init) { 2286 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2287 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2288 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2289 } 2290 #endif 2291 2292 #if USE_ITT_BUILD 2293 __kmp_itt_lock_creating(lck); 2294 #endif /* USE_ITT_BUILD */ 2295 2296 #endif // KMP_USE_DYNAMIC_LOCK 2297 } // __kmpc_init_lock 2298 2299 /* initialize the lock */ 2300 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2301 #if KMP_USE_DYNAMIC_LOCK 2302 2303 KMP_DEBUG_ASSERT(__kmp_init_serial); 2304 if (__kmp_env_consistency_check && user_lock == NULL) { 2305 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2306 } 2307 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2308 2309 #if OMPT_SUPPORT && OMPT_OPTIONAL 2310 // This is the case, if called from omp_init_lock_with_hint: 2311 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2312 if (!codeptr) 2313 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2314 if (ompt_enabled.ompt_callback_lock_init) { 2315 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2316 ompt_mutex_nest_lock, omp_lock_hint_none, 2317 __ompt_get_mutex_impl_type(user_lock), 2318 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2319 } 2320 #endif 2321 2322 #else // KMP_USE_DYNAMIC_LOCK 2323 2324 static char const *const func = "omp_init_nest_lock"; 2325 kmp_user_lock_p lck; 2326 KMP_DEBUG_ASSERT(__kmp_init_serial); 2327 2328 if (__kmp_env_consistency_check) { 2329 if (user_lock == NULL) { 2330 KMP_FATAL(LockIsUninitialized, func); 2331 } 2332 } 2333 2334 KMP_CHECK_USER_LOCK_INIT(); 2335 2336 if ((__kmp_user_lock_kind == lk_tas) && 2337 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2338 OMP_NEST_LOCK_T_SIZE)) { 2339 lck = (kmp_user_lock_p)user_lock; 2340 } 2341 #if KMP_USE_FUTEX 2342 else if ((__kmp_user_lock_kind == lk_futex) && 2343 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2344 OMP_NEST_LOCK_T_SIZE)) { 2345 lck = (kmp_user_lock_p)user_lock; 2346 } 2347 #endif 2348 else { 2349 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2350 } 2351 2352 INIT_NESTED_LOCK(lck); 2353 __kmp_set_user_lock_location(lck, loc); 2354 2355 #if OMPT_SUPPORT && OMPT_OPTIONAL 2356 // This is the case, if called from omp_init_lock_with_hint: 2357 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2358 if (!codeptr) 2359 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2360 if (ompt_enabled.ompt_callback_lock_init) { 2361 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2362 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2363 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2364 } 2365 #endif 2366 2367 #if USE_ITT_BUILD 2368 __kmp_itt_lock_creating(lck); 2369 #endif /* USE_ITT_BUILD */ 2370 2371 #endif // KMP_USE_DYNAMIC_LOCK 2372 } // __kmpc_init_nest_lock 2373 2374 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2375 #if KMP_USE_DYNAMIC_LOCK 2376 2377 #if USE_ITT_BUILD 2378 kmp_user_lock_p lck; 2379 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2380 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2381 } else { 2382 lck = (kmp_user_lock_p)user_lock; 2383 } 2384 __kmp_itt_lock_destroyed(lck); 2385 #endif 2386 #if OMPT_SUPPORT && OMPT_OPTIONAL 2387 // This is the case, if called from omp_init_lock_with_hint: 2388 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2389 if (!codeptr) 2390 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2391 if (ompt_enabled.ompt_callback_lock_destroy) { 2392 kmp_user_lock_p lck; 2393 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2394 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2395 } else { 2396 lck = (kmp_user_lock_p)user_lock; 2397 } 2398 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2399 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2400 } 2401 #endif 2402 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2403 #else 2404 kmp_user_lock_p lck; 2405 2406 if ((__kmp_user_lock_kind == lk_tas) && 2407 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2408 lck = (kmp_user_lock_p)user_lock; 2409 } 2410 #if KMP_USE_FUTEX 2411 else if ((__kmp_user_lock_kind == lk_futex) && 2412 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2413 lck = (kmp_user_lock_p)user_lock; 2414 } 2415 #endif 2416 else { 2417 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2418 } 2419 2420 #if OMPT_SUPPORT && OMPT_OPTIONAL 2421 // This is the case, if called from omp_init_lock_with_hint: 2422 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2423 if (!codeptr) 2424 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2425 if (ompt_enabled.ompt_callback_lock_destroy) { 2426 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2427 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2428 } 2429 #endif 2430 2431 #if USE_ITT_BUILD 2432 __kmp_itt_lock_destroyed(lck); 2433 #endif /* USE_ITT_BUILD */ 2434 DESTROY_LOCK(lck); 2435 2436 if ((__kmp_user_lock_kind == lk_tas) && 2437 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2438 ; 2439 } 2440 #if KMP_USE_FUTEX 2441 else if ((__kmp_user_lock_kind == lk_futex) && 2442 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2443 ; 2444 } 2445 #endif 2446 else { 2447 __kmp_user_lock_free(user_lock, gtid, lck); 2448 } 2449 #endif // KMP_USE_DYNAMIC_LOCK 2450 } // __kmpc_destroy_lock 2451 2452 /* destroy the lock */ 2453 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2454 #if KMP_USE_DYNAMIC_LOCK 2455 2456 #if USE_ITT_BUILD 2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2458 __kmp_itt_lock_destroyed(ilk->lock); 2459 #endif 2460 #if OMPT_SUPPORT && OMPT_OPTIONAL 2461 // This is the case, if called from omp_init_lock_with_hint: 2462 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2463 if (!codeptr) 2464 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2465 if (ompt_enabled.ompt_callback_lock_destroy) { 2466 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2467 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2468 } 2469 #endif 2470 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2471 2472 #else // KMP_USE_DYNAMIC_LOCK 2473 2474 kmp_user_lock_p lck; 2475 2476 if ((__kmp_user_lock_kind == lk_tas) && 2477 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2478 OMP_NEST_LOCK_T_SIZE)) { 2479 lck = (kmp_user_lock_p)user_lock; 2480 } 2481 #if KMP_USE_FUTEX 2482 else if ((__kmp_user_lock_kind == lk_futex) && 2483 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2484 OMP_NEST_LOCK_T_SIZE)) { 2485 lck = (kmp_user_lock_p)user_lock; 2486 } 2487 #endif 2488 else { 2489 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2490 } 2491 2492 #if OMPT_SUPPORT && OMPT_OPTIONAL 2493 // This is the case, if called from omp_init_lock_with_hint: 2494 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2495 if (!codeptr) 2496 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2497 if (ompt_enabled.ompt_callback_lock_destroy) { 2498 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2499 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2500 } 2501 #endif 2502 2503 #if USE_ITT_BUILD 2504 __kmp_itt_lock_destroyed(lck); 2505 #endif /* USE_ITT_BUILD */ 2506 2507 DESTROY_NESTED_LOCK(lck); 2508 2509 if ((__kmp_user_lock_kind == lk_tas) && 2510 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2511 OMP_NEST_LOCK_T_SIZE)) { 2512 ; 2513 } 2514 #if KMP_USE_FUTEX 2515 else if ((__kmp_user_lock_kind == lk_futex) && 2516 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2517 OMP_NEST_LOCK_T_SIZE)) { 2518 ; 2519 } 2520 #endif 2521 else { 2522 __kmp_user_lock_free(user_lock, gtid, lck); 2523 } 2524 #endif // KMP_USE_DYNAMIC_LOCK 2525 } // __kmpc_destroy_nest_lock 2526 2527 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2528 KMP_COUNT_BLOCK(OMP_set_lock); 2529 #if KMP_USE_DYNAMIC_LOCK 2530 int tag = KMP_EXTRACT_D_TAG(user_lock); 2531 #if USE_ITT_BUILD 2532 __kmp_itt_lock_acquiring( 2533 (kmp_user_lock_p) 2534 user_lock); // itt function will get to the right lock object. 2535 #endif 2536 #if OMPT_SUPPORT && OMPT_OPTIONAL 2537 // This is the case, if called from omp_init_lock_with_hint: 2538 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2539 if (!codeptr) 2540 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2541 if (ompt_enabled.ompt_callback_mutex_acquire) { 2542 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2543 ompt_mutex_lock, omp_lock_hint_none, 2544 __ompt_get_mutex_impl_type(user_lock), 2545 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2546 } 2547 #endif 2548 #if KMP_USE_INLINED_TAS 2549 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2550 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2551 } else 2552 #elif KMP_USE_INLINED_FUTEX 2553 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2554 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2555 } else 2556 #endif 2557 { 2558 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2559 } 2560 #if USE_ITT_BUILD 2561 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2562 #endif 2563 #if OMPT_SUPPORT && OMPT_OPTIONAL 2564 if (ompt_enabled.ompt_callback_mutex_acquired) { 2565 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2566 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2567 } 2568 #endif 2569 2570 #else // KMP_USE_DYNAMIC_LOCK 2571 2572 kmp_user_lock_p lck; 2573 2574 if ((__kmp_user_lock_kind == lk_tas) && 2575 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2576 lck = (kmp_user_lock_p)user_lock; 2577 } 2578 #if KMP_USE_FUTEX 2579 else if ((__kmp_user_lock_kind == lk_futex) && 2580 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2581 lck = (kmp_user_lock_p)user_lock; 2582 } 2583 #endif 2584 else { 2585 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2586 } 2587 2588 #if USE_ITT_BUILD 2589 __kmp_itt_lock_acquiring(lck); 2590 #endif /* USE_ITT_BUILD */ 2591 #if OMPT_SUPPORT && OMPT_OPTIONAL 2592 // This is the case, if called from omp_init_lock_with_hint: 2593 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2594 if (!codeptr) 2595 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2596 if (ompt_enabled.ompt_callback_mutex_acquire) { 2597 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2598 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2599 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2600 } 2601 #endif 2602 2603 ACQUIRE_LOCK(lck, gtid); 2604 2605 #if USE_ITT_BUILD 2606 __kmp_itt_lock_acquired(lck); 2607 #endif /* USE_ITT_BUILD */ 2608 2609 #if OMPT_SUPPORT && OMPT_OPTIONAL 2610 if (ompt_enabled.ompt_callback_mutex_acquired) { 2611 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2612 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2613 } 2614 #endif 2615 2616 #endif // KMP_USE_DYNAMIC_LOCK 2617 } 2618 2619 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2620 #if KMP_USE_DYNAMIC_LOCK 2621 2622 #if USE_ITT_BUILD 2623 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2624 #endif 2625 #if OMPT_SUPPORT && OMPT_OPTIONAL 2626 // This is the case, if called from omp_init_lock_with_hint: 2627 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2628 if (!codeptr) 2629 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2630 if (ompt_enabled.enabled) { 2631 if (ompt_enabled.ompt_callback_mutex_acquire) { 2632 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2633 ompt_mutex_nest_lock, omp_lock_hint_none, 2634 __ompt_get_mutex_impl_type(user_lock), 2635 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2636 } 2637 } 2638 #endif 2639 int acquire_status = 2640 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2641 (void) acquire_status; 2642 #if USE_ITT_BUILD 2643 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2644 #endif 2645 2646 #if OMPT_SUPPORT && OMPT_OPTIONAL 2647 if (ompt_enabled.enabled) { 2648 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2649 if (ompt_enabled.ompt_callback_mutex_acquired) { 2650 // lock_first 2651 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2652 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2653 codeptr); 2654 } 2655 } else { 2656 if (ompt_enabled.ompt_callback_nest_lock) { 2657 // lock_next 2658 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2659 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2660 } 2661 } 2662 } 2663 #endif 2664 2665 #else // KMP_USE_DYNAMIC_LOCK 2666 int acquire_status; 2667 kmp_user_lock_p lck; 2668 2669 if ((__kmp_user_lock_kind == lk_tas) && 2670 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2671 OMP_NEST_LOCK_T_SIZE)) { 2672 lck = (kmp_user_lock_p)user_lock; 2673 } 2674 #if KMP_USE_FUTEX 2675 else if ((__kmp_user_lock_kind == lk_futex) && 2676 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2677 OMP_NEST_LOCK_T_SIZE)) { 2678 lck = (kmp_user_lock_p)user_lock; 2679 } 2680 #endif 2681 else { 2682 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2683 } 2684 2685 #if USE_ITT_BUILD 2686 __kmp_itt_lock_acquiring(lck); 2687 #endif /* USE_ITT_BUILD */ 2688 #if OMPT_SUPPORT && OMPT_OPTIONAL 2689 // This is the case, if called from omp_init_lock_with_hint: 2690 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2691 if (!codeptr) 2692 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2693 if (ompt_enabled.enabled) { 2694 if (ompt_enabled.ompt_callback_mutex_acquire) { 2695 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2696 ompt_mutex_nest_lock, omp_lock_hint_none, 2697 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2698 codeptr); 2699 } 2700 } 2701 #endif 2702 2703 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2704 2705 #if USE_ITT_BUILD 2706 __kmp_itt_lock_acquired(lck); 2707 #endif /* USE_ITT_BUILD */ 2708 2709 #if OMPT_SUPPORT && OMPT_OPTIONAL 2710 if (ompt_enabled.enabled) { 2711 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2712 if (ompt_enabled.ompt_callback_mutex_acquired) { 2713 // lock_first 2714 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2715 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2716 } 2717 } else { 2718 if (ompt_enabled.ompt_callback_nest_lock) { 2719 // lock_next 2720 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2721 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2722 } 2723 } 2724 } 2725 #endif 2726 2727 #endif // KMP_USE_DYNAMIC_LOCK 2728 } 2729 2730 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2731 #if KMP_USE_DYNAMIC_LOCK 2732 2733 int tag = KMP_EXTRACT_D_TAG(user_lock); 2734 #if USE_ITT_BUILD 2735 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2736 #endif 2737 #if KMP_USE_INLINED_TAS 2738 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2739 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2740 } else 2741 #elif KMP_USE_INLINED_FUTEX 2742 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2743 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2744 } else 2745 #endif 2746 { 2747 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2748 } 2749 2750 #if OMPT_SUPPORT && OMPT_OPTIONAL 2751 // This is the case, if called from omp_init_lock_with_hint: 2752 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2753 if (!codeptr) 2754 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2755 if (ompt_enabled.ompt_callback_mutex_released) { 2756 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2757 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2758 } 2759 #endif 2760 2761 #else // KMP_USE_DYNAMIC_LOCK 2762 2763 kmp_user_lock_p lck; 2764 2765 /* Can't use serial interval since not block structured */ 2766 /* release the lock */ 2767 2768 if ((__kmp_user_lock_kind == lk_tas) && 2769 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2770 #if KMP_OS_LINUX && \ 2771 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2772 // "fast" path implemented to fix customer performance issue 2773 #if USE_ITT_BUILD 2774 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2775 #endif /* USE_ITT_BUILD */ 2776 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2777 KMP_MB(); 2778 2779 #if OMPT_SUPPORT && OMPT_OPTIONAL 2780 // This is the case, if called from omp_init_lock_with_hint: 2781 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2782 if (!codeptr) 2783 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2784 if (ompt_enabled.ompt_callback_mutex_released) { 2785 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2786 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2787 } 2788 #endif 2789 2790 return; 2791 #else 2792 lck = (kmp_user_lock_p)user_lock; 2793 #endif 2794 } 2795 #if KMP_USE_FUTEX 2796 else if ((__kmp_user_lock_kind == lk_futex) && 2797 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2798 lck = (kmp_user_lock_p)user_lock; 2799 } 2800 #endif 2801 else { 2802 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2803 } 2804 2805 #if USE_ITT_BUILD 2806 __kmp_itt_lock_releasing(lck); 2807 #endif /* USE_ITT_BUILD */ 2808 2809 RELEASE_LOCK(lck, gtid); 2810 2811 #if OMPT_SUPPORT && OMPT_OPTIONAL 2812 // This is the case, if called from omp_init_lock_with_hint: 2813 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2814 if (!codeptr) 2815 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2816 if (ompt_enabled.ompt_callback_mutex_released) { 2817 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2818 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2819 } 2820 #endif 2821 2822 #endif // KMP_USE_DYNAMIC_LOCK 2823 } 2824 2825 /* release the lock */ 2826 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2827 #if KMP_USE_DYNAMIC_LOCK 2828 2829 #if USE_ITT_BUILD 2830 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2831 #endif 2832 int release_status = 2833 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2834 (void) release_status; 2835 2836 #if OMPT_SUPPORT && OMPT_OPTIONAL 2837 // This is the case, if called from omp_init_lock_with_hint: 2838 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2839 if (!codeptr) 2840 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2841 if (ompt_enabled.enabled) { 2842 if (release_status == KMP_LOCK_RELEASED) { 2843 if (ompt_enabled.ompt_callback_mutex_released) { 2844 // release_lock_last 2845 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2846 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2847 codeptr); 2848 } 2849 } else if (ompt_enabled.ompt_callback_nest_lock) { 2850 // release_lock_prev 2851 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2852 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2853 } 2854 } 2855 #endif 2856 2857 #else // KMP_USE_DYNAMIC_LOCK 2858 2859 kmp_user_lock_p lck; 2860 2861 /* Can't use serial interval since not block structured */ 2862 2863 if ((__kmp_user_lock_kind == lk_tas) && 2864 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2865 OMP_NEST_LOCK_T_SIZE)) { 2866 #if KMP_OS_LINUX && \ 2867 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2868 // "fast" path implemented to fix customer performance issue 2869 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2870 #if USE_ITT_BUILD 2871 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2872 #endif /* USE_ITT_BUILD */ 2873 2874 #if OMPT_SUPPORT && OMPT_OPTIONAL 2875 int release_status = KMP_LOCK_STILL_HELD; 2876 #endif 2877 2878 if (--(tl->lk.depth_locked) == 0) { 2879 TCW_4(tl->lk.poll, 0); 2880 #if OMPT_SUPPORT && OMPT_OPTIONAL 2881 release_status = KMP_LOCK_RELEASED; 2882 #endif 2883 } 2884 KMP_MB(); 2885 2886 #if OMPT_SUPPORT && OMPT_OPTIONAL 2887 // This is the case, if called from omp_init_lock_with_hint: 2888 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2889 if (!codeptr) 2890 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2891 if (ompt_enabled.enabled) { 2892 if (release_status == KMP_LOCK_RELEASED) { 2893 if (ompt_enabled.ompt_callback_mutex_released) { 2894 // release_lock_last 2895 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2896 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2897 } 2898 } else if (ompt_enabled.ompt_callback_nest_lock) { 2899 // release_lock_previous 2900 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2901 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2902 } 2903 } 2904 #endif 2905 2906 return; 2907 #else 2908 lck = (kmp_user_lock_p)user_lock; 2909 #endif 2910 } 2911 #if KMP_USE_FUTEX 2912 else if ((__kmp_user_lock_kind == lk_futex) && 2913 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2914 OMP_NEST_LOCK_T_SIZE)) { 2915 lck = (kmp_user_lock_p)user_lock; 2916 } 2917 #endif 2918 else { 2919 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2920 } 2921 2922 #if USE_ITT_BUILD 2923 __kmp_itt_lock_releasing(lck); 2924 #endif /* USE_ITT_BUILD */ 2925 2926 int release_status; 2927 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2928 #if OMPT_SUPPORT && OMPT_OPTIONAL 2929 // This is the case, if called from omp_init_lock_with_hint: 2930 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2931 if (!codeptr) 2932 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2933 if (ompt_enabled.enabled) { 2934 if (release_status == KMP_LOCK_RELEASED) { 2935 if (ompt_enabled.ompt_callback_mutex_released) { 2936 // release_lock_last 2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2938 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2939 } 2940 } else if (ompt_enabled.ompt_callback_nest_lock) { 2941 // release_lock_previous 2942 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2943 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2944 } 2945 } 2946 #endif 2947 2948 #endif // KMP_USE_DYNAMIC_LOCK 2949 } 2950 2951 /* try to acquire the lock */ 2952 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2953 KMP_COUNT_BLOCK(OMP_test_lock); 2954 2955 #if KMP_USE_DYNAMIC_LOCK 2956 int rc; 2957 int tag = KMP_EXTRACT_D_TAG(user_lock); 2958 #if USE_ITT_BUILD 2959 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2960 #endif 2961 #if OMPT_SUPPORT && OMPT_OPTIONAL 2962 // This is the case, if called from omp_init_lock_with_hint: 2963 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2964 if (!codeptr) 2965 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2966 if (ompt_enabled.ompt_callback_mutex_acquire) { 2967 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2968 ompt_mutex_lock, omp_lock_hint_none, 2969 __ompt_get_mutex_impl_type(user_lock), 2970 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2971 } 2972 #endif 2973 #if KMP_USE_INLINED_TAS 2974 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2975 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2976 } else 2977 #elif KMP_USE_INLINED_FUTEX 2978 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2979 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 2980 } else 2981 #endif 2982 { 2983 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2984 } 2985 if (rc) { 2986 #if USE_ITT_BUILD 2987 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2988 #endif 2989 #if OMPT_SUPPORT && OMPT_OPTIONAL 2990 if (ompt_enabled.ompt_callback_mutex_acquired) { 2991 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2992 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2993 } 2994 #endif 2995 return FTN_TRUE; 2996 } else { 2997 #if USE_ITT_BUILD 2998 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 2999 #endif 3000 return FTN_FALSE; 3001 } 3002 3003 #else // KMP_USE_DYNAMIC_LOCK 3004 3005 kmp_user_lock_p lck; 3006 int rc; 3007 3008 if ((__kmp_user_lock_kind == lk_tas) && 3009 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3010 lck = (kmp_user_lock_p)user_lock; 3011 } 3012 #if KMP_USE_FUTEX 3013 else if ((__kmp_user_lock_kind == lk_futex) && 3014 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3015 lck = (kmp_user_lock_p)user_lock; 3016 } 3017 #endif 3018 else { 3019 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3020 } 3021 3022 #if USE_ITT_BUILD 3023 __kmp_itt_lock_acquiring(lck); 3024 #endif /* USE_ITT_BUILD */ 3025 #if OMPT_SUPPORT && OMPT_OPTIONAL 3026 // This is the case, if called from omp_init_lock_with_hint: 3027 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3028 if (!codeptr) 3029 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3030 if (ompt_enabled.ompt_callback_mutex_acquire) { 3031 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3032 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3033 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3034 } 3035 #endif 3036 3037 rc = TEST_LOCK(lck, gtid); 3038 #if USE_ITT_BUILD 3039 if (rc) { 3040 __kmp_itt_lock_acquired(lck); 3041 } else { 3042 __kmp_itt_lock_cancelled(lck); 3043 } 3044 #endif /* USE_ITT_BUILD */ 3045 #if OMPT_SUPPORT && OMPT_OPTIONAL 3046 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3047 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3048 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3049 } 3050 #endif 3051 3052 return (rc ? FTN_TRUE : FTN_FALSE); 3053 3054 /* Can't use serial interval since not block structured */ 3055 3056 #endif // KMP_USE_DYNAMIC_LOCK 3057 } 3058 3059 /* try to acquire the lock */ 3060 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3061 #if KMP_USE_DYNAMIC_LOCK 3062 int rc; 3063 #if USE_ITT_BUILD 3064 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3065 #endif 3066 #if OMPT_SUPPORT && OMPT_OPTIONAL 3067 // This is the case, if called from omp_init_lock_with_hint: 3068 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3069 if (!codeptr) 3070 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3071 if (ompt_enabled.ompt_callback_mutex_acquire) { 3072 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3073 ompt_mutex_nest_lock, omp_lock_hint_none, 3074 __ompt_get_mutex_impl_type(user_lock), 3075 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3076 } 3077 #endif 3078 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3079 #if USE_ITT_BUILD 3080 if (rc) { 3081 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3082 } else { 3083 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3084 } 3085 #endif 3086 #if OMPT_SUPPORT && OMPT_OPTIONAL 3087 if (ompt_enabled.enabled && rc) { 3088 if (rc == 1) { 3089 if (ompt_enabled.ompt_callback_mutex_acquired) { 3090 // lock_first 3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3092 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3093 codeptr); 3094 } 3095 } else { 3096 if (ompt_enabled.ompt_callback_nest_lock) { 3097 // lock_next 3098 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3099 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3100 } 3101 } 3102 } 3103 #endif 3104 return rc; 3105 3106 #else // KMP_USE_DYNAMIC_LOCK 3107 3108 kmp_user_lock_p lck; 3109 int rc; 3110 3111 if ((__kmp_user_lock_kind == lk_tas) && 3112 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3113 OMP_NEST_LOCK_T_SIZE)) { 3114 lck = (kmp_user_lock_p)user_lock; 3115 } 3116 #if KMP_USE_FUTEX 3117 else if ((__kmp_user_lock_kind == lk_futex) && 3118 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3119 OMP_NEST_LOCK_T_SIZE)) { 3120 lck = (kmp_user_lock_p)user_lock; 3121 } 3122 #endif 3123 else { 3124 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3125 } 3126 3127 #if USE_ITT_BUILD 3128 __kmp_itt_lock_acquiring(lck); 3129 #endif /* USE_ITT_BUILD */ 3130 3131 #if OMPT_SUPPORT && OMPT_OPTIONAL 3132 // This is the case, if called from omp_init_lock_with_hint: 3133 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3134 if (!codeptr) 3135 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3136 if (ompt_enabled.enabled) && 3137 ompt_enabled.ompt_callback_mutex_acquire) { 3138 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3139 ompt_mutex_nest_lock, omp_lock_hint_none, 3140 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3141 codeptr); 3142 } 3143 #endif 3144 3145 rc = TEST_NESTED_LOCK(lck, gtid); 3146 #if USE_ITT_BUILD 3147 if (rc) { 3148 __kmp_itt_lock_acquired(lck); 3149 } else { 3150 __kmp_itt_lock_cancelled(lck); 3151 } 3152 #endif /* USE_ITT_BUILD */ 3153 #if OMPT_SUPPORT && OMPT_OPTIONAL 3154 if (ompt_enabled.enabled && rc) { 3155 if (rc == 1) { 3156 if (ompt_enabled.ompt_callback_mutex_acquired) { 3157 // lock_first 3158 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3159 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3160 } 3161 } else { 3162 if (ompt_enabled.ompt_callback_nest_lock) { 3163 // lock_next 3164 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3165 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3166 } 3167 } 3168 } 3169 #endif 3170 return rc; 3171 3172 /* Can't use serial interval since not block structured */ 3173 3174 #endif // KMP_USE_DYNAMIC_LOCK 3175 } 3176 3177 // Interface to fast scalable reduce methods routines 3178 3179 // keep the selected method in a thread local structure for cross-function 3180 // usage: will be used in __kmpc_end_reduce* functions; 3181 // another solution: to re-determine the method one more time in 3182 // __kmpc_end_reduce* functions (new prototype required then) 3183 // AT: which solution is better? 3184 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3185 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3186 3187 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3188 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3189 3190 // description of the packed_reduction_method variable: look at the macros in 3191 // kmp.h 3192 3193 // used in a critical section reduce block 3194 static __forceinline void 3195 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3196 kmp_critical_name *crit) { 3197 3198 // this lock was visible to a customer and to the threading profile tool as a 3199 // serial overhead span (although it's used for an internal purpose only) 3200 // why was it visible in previous implementation? 3201 // should we keep it visible in new reduce block? 3202 kmp_user_lock_p lck; 3203 3204 #if KMP_USE_DYNAMIC_LOCK 3205 3206 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3207 // Check if it is initialized. 3208 if (*lk == 0) { 3209 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3210 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3211 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3212 } else { 3213 __kmp_init_indirect_csptr(crit, loc, global_tid, 3214 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3215 } 3216 } 3217 // Branch for accessing the actual lock object and set operation. This 3218 // branching is inevitable since this lock initialization does not follow the 3219 // normal dispatch path (lock table is not used). 3220 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3221 lck = (kmp_user_lock_p)lk; 3222 KMP_DEBUG_ASSERT(lck != NULL); 3223 if (__kmp_env_consistency_check) { 3224 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3225 } 3226 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3227 } else { 3228 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3229 lck = ilk->lock; 3230 KMP_DEBUG_ASSERT(lck != NULL); 3231 if (__kmp_env_consistency_check) { 3232 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3233 } 3234 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3235 } 3236 3237 #else // KMP_USE_DYNAMIC_LOCK 3238 3239 // We know that the fast reduction code is only emitted by Intel compilers 3240 // with 32 byte critical sections. If there isn't enough space, then we 3241 // have to use a pointer. 3242 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3243 lck = (kmp_user_lock_p)crit; 3244 } else { 3245 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3246 } 3247 KMP_DEBUG_ASSERT(lck != NULL); 3248 3249 if (__kmp_env_consistency_check) 3250 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3251 3252 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3253 3254 #endif // KMP_USE_DYNAMIC_LOCK 3255 } 3256 3257 // used in a critical section reduce block 3258 static __forceinline void 3259 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3260 kmp_critical_name *crit) { 3261 3262 kmp_user_lock_p lck; 3263 3264 #if KMP_USE_DYNAMIC_LOCK 3265 3266 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3267 lck = (kmp_user_lock_p)crit; 3268 if (__kmp_env_consistency_check) 3269 __kmp_pop_sync(global_tid, ct_critical, loc); 3270 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3271 } else { 3272 kmp_indirect_lock_t *ilk = 3273 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3274 if (__kmp_env_consistency_check) 3275 __kmp_pop_sync(global_tid, ct_critical, loc); 3276 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3277 } 3278 3279 #else // KMP_USE_DYNAMIC_LOCK 3280 3281 // We know that the fast reduction code is only emitted by Intel compilers 3282 // with 32 byte critical sections. If there isn't enough space, then we have 3283 // to use a pointer. 3284 if (__kmp_base_user_lock_size > 32) { 3285 lck = *((kmp_user_lock_p *)crit); 3286 KMP_ASSERT(lck != NULL); 3287 } else { 3288 lck = (kmp_user_lock_p)crit; 3289 } 3290 3291 if (__kmp_env_consistency_check) 3292 __kmp_pop_sync(global_tid, ct_critical, loc); 3293 3294 __kmp_release_user_lock_with_checks(lck, global_tid); 3295 3296 #endif // KMP_USE_DYNAMIC_LOCK 3297 } // __kmp_end_critical_section_reduce_block 3298 3299 static __forceinline int 3300 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3301 int *task_state) { 3302 kmp_team_t *team; 3303 3304 // Check if we are inside the teams construct? 3305 if (th->th.th_teams_microtask) { 3306 *team_p = team = th->th.th_team; 3307 if (team->t.t_level == th->th.th_teams_level) { 3308 // This is reduction at teams construct. 3309 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3310 // Let's swap teams temporarily for the reduction. 3311 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3312 th->th.th_team = team->t.t_parent; 3313 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3314 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3315 *task_state = th->th.th_task_state; 3316 th->th.th_task_state = 0; 3317 3318 return 1; 3319 } 3320 } 3321 return 0; 3322 } 3323 3324 static __forceinline void 3325 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3326 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3327 th->th.th_info.ds.ds_tid = 0; 3328 th->th.th_team = team; 3329 th->th.th_team_nproc = team->t.t_nproc; 3330 th->th.th_task_team = team->t.t_task_team[task_state]; 3331 th->th.th_task_state = task_state; 3332 } 3333 3334 /* 2.a.i. Reduce Block without a terminating barrier */ 3335 /*! 3336 @ingroup SYNCHRONIZATION 3337 @param loc source location information 3338 @param global_tid global thread number 3339 @param num_vars number of items (variables) to be reduced 3340 @param reduce_size size of data in bytes to be reduced 3341 @param reduce_data pointer to data to be reduced 3342 @param reduce_func callback function providing reduction operation on two 3343 operands and returning result of reduction in lhs_data 3344 @param lck pointer to the unique lock data structure 3345 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3346 threads if atomic reduction needed 3347 3348 The nowait version is used for a reduce clause with the nowait argument. 3349 */ 3350 kmp_int32 3351 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3352 size_t reduce_size, void *reduce_data, 3353 void (*reduce_func)(void *lhs_data, void *rhs_data), 3354 kmp_critical_name *lck) { 3355 3356 KMP_COUNT_BLOCK(REDUCE_nowait); 3357 int retval = 0; 3358 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3359 kmp_info_t *th; 3360 kmp_team_t *team; 3361 int teams_swapped = 0, task_state; 3362 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3363 __kmp_assert_valid_gtid(global_tid); 3364 3365 // why do we need this initialization here at all? 3366 // Reduction clause can not be used as a stand-alone directive. 3367 3368 // do not call __kmp_serial_initialize(), it will be called by 3369 // __kmp_parallel_initialize() if needed 3370 // possible detection of false-positive race by the threadchecker ??? 3371 if (!TCR_4(__kmp_init_parallel)) 3372 __kmp_parallel_initialize(); 3373 3374 __kmp_resume_if_soft_paused(); 3375 3376 // check correctness of reduce block nesting 3377 #if KMP_USE_DYNAMIC_LOCK 3378 if (__kmp_env_consistency_check) 3379 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3380 #else 3381 if (__kmp_env_consistency_check) 3382 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3383 #endif 3384 3385 th = __kmp_thread_from_gtid(global_tid); 3386 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3387 3388 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3389 // the value should be kept in a variable 3390 // the variable should be either a construct-specific or thread-specific 3391 // property, not a team specific property 3392 // (a thread can reach the next reduce block on the next construct, reduce 3393 // method may differ on the next construct) 3394 // an ident_t "loc" parameter could be used as a construct-specific property 3395 // (what if loc == 0?) 3396 // (if both construct-specific and team-specific variables were shared, 3397 // then unness extra syncs should be needed) 3398 // a thread-specific variable is better regarding two issues above (next 3399 // construct and extra syncs) 3400 // a thread-specific "th_local.reduction_method" variable is used currently 3401 // each thread executes 'determine' and 'set' lines (no need to execute by one 3402 // thread, to avoid unness extra syncs) 3403 3404 packed_reduction_method = __kmp_determine_reduction_method( 3405 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3406 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3407 3408 OMPT_REDUCTION_DECL(th, global_tid); 3409 if (packed_reduction_method == critical_reduce_block) { 3410 3411 OMPT_REDUCTION_BEGIN; 3412 3413 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3414 retval = 1; 3415 3416 } else if (packed_reduction_method == empty_reduce_block) { 3417 3418 OMPT_REDUCTION_BEGIN; 3419 3420 // usage: if team size == 1, no synchronization is required ( Intel 3421 // platforms only ) 3422 retval = 1; 3423 3424 } else if (packed_reduction_method == atomic_reduce_block) { 3425 3426 retval = 2; 3427 3428 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3429 // won't be called by the code gen) 3430 // (it's not quite good, because the checking block has been closed by 3431 // this 'pop', 3432 // but atomic operation has not been executed yet, will be executed 3433 // slightly later, literally on next instruction) 3434 if (__kmp_env_consistency_check) 3435 __kmp_pop_sync(global_tid, ct_reduce, loc); 3436 3437 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3438 tree_reduce_block)) { 3439 3440 // AT: performance issue: a real barrier here 3441 // AT: (if master goes slow, other threads are blocked here waiting for the 3442 // master to come and release them) 3443 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3444 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3445 // be confusing to a customer) 3446 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3447 // might go faster and be more in line with sense of NOWAIT 3448 // AT: TO DO: do epcc test and compare times 3449 3450 // this barrier should be invisible to a customer and to the threading profile 3451 // tool (it's neither a terminating barrier nor customer's code, it's 3452 // used for an internal purpose) 3453 #if OMPT_SUPPORT 3454 // JP: can this barrier potentially leed to task scheduling? 3455 // JP: as long as there is a barrier in the implementation, OMPT should and 3456 // will provide the barrier events 3457 // so we set-up the necessary frame/return addresses. 3458 ompt_frame_t *ompt_frame; 3459 if (ompt_enabled.enabled) { 3460 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3461 if (ompt_frame->enter_frame.ptr == NULL) 3462 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3463 OMPT_STORE_RETURN_ADDRESS(global_tid); 3464 } 3465 #endif 3466 #if USE_ITT_NOTIFY 3467 __kmp_threads[global_tid]->th.th_ident = loc; 3468 #endif 3469 retval = 3470 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3471 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3472 retval = (retval != 0) ? (0) : (1); 3473 #if OMPT_SUPPORT && OMPT_OPTIONAL 3474 if (ompt_enabled.enabled) { 3475 ompt_frame->enter_frame = ompt_data_none; 3476 } 3477 #endif 3478 3479 // all other workers except master should do this pop here 3480 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3481 if (__kmp_env_consistency_check) { 3482 if (retval == 0) { 3483 __kmp_pop_sync(global_tid, ct_reduce, loc); 3484 } 3485 } 3486 3487 } else { 3488 3489 // should never reach this block 3490 KMP_ASSERT(0); // "unexpected method" 3491 } 3492 if (teams_swapped) { 3493 __kmp_restore_swapped_teams(th, team, task_state); 3494 } 3495 KA_TRACE( 3496 10, 3497 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3498 global_tid, packed_reduction_method, retval)); 3499 3500 return retval; 3501 } 3502 3503 /*! 3504 @ingroup SYNCHRONIZATION 3505 @param loc source location information 3506 @param global_tid global thread id. 3507 @param lck pointer to the unique lock data structure 3508 3509 Finish the execution of a reduce nowait. 3510 */ 3511 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3512 kmp_critical_name *lck) { 3513 3514 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3515 3516 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3517 __kmp_assert_valid_gtid(global_tid); 3518 3519 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3520 3521 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3522 3523 if (packed_reduction_method == critical_reduce_block) { 3524 3525 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3526 OMPT_REDUCTION_END; 3527 3528 } else if (packed_reduction_method == empty_reduce_block) { 3529 3530 // usage: if team size == 1, no synchronization is required ( on Intel 3531 // platforms only ) 3532 3533 OMPT_REDUCTION_END; 3534 3535 } else if (packed_reduction_method == atomic_reduce_block) { 3536 3537 // neither master nor other workers should get here 3538 // (code gen does not generate this call in case 2: atomic reduce block) 3539 // actually it's better to remove this elseif at all; 3540 // after removal this value will checked by the 'else' and will assert 3541 3542 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3543 tree_reduce_block)) { 3544 3545 // only master gets here 3546 // OMPT: tree reduction is annotated in the barrier code 3547 3548 } else { 3549 3550 // should never reach this block 3551 KMP_ASSERT(0); // "unexpected method" 3552 } 3553 3554 if (__kmp_env_consistency_check) 3555 __kmp_pop_sync(global_tid, ct_reduce, loc); 3556 3557 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3558 global_tid, packed_reduction_method)); 3559 3560 return; 3561 } 3562 3563 /* 2.a.ii. Reduce Block with a terminating barrier */ 3564 3565 /*! 3566 @ingroup SYNCHRONIZATION 3567 @param loc source location information 3568 @param global_tid global thread number 3569 @param num_vars number of items (variables) to be reduced 3570 @param reduce_size size of data in bytes to be reduced 3571 @param reduce_data pointer to data to be reduced 3572 @param reduce_func callback function providing reduction operation on two 3573 operands and returning result of reduction in lhs_data 3574 @param lck pointer to the unique lock data structure 3575 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3576 threads if atomic reduction needed 3577 3578 A blocking reduce that includes an implicit barrier. 3579 */ 3580 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3581 size_t reduce_size, void *reduce_data, 3582 void (*reduce_func)(void *lhs_data, void *rhs_data), 3583 kmp_critical_name *lck) { 3584 KMP_COUNT_BLOCK(REDUCE_wait); 3585 int retval = 0; 3586 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3587 kmp_info_t *th; 3588 kmp_team_t *team; 3589 int teams_swapped = 0, task_state; 3590 3591 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3592 __kmp_assert_valid_gtid(global_tid); 3593 3594 // why do we need this initialization here at all? 3595 // Reduction clause can not be a stand-alone directive. 3596 3597 // do not call __kmp_serial_initialize(), it will be called by 3598 // __kmp_parallel_initialize() if needed 3599 // possible detection of false-positive race by the threadchecker ??? 3600 if (!TCR_4(__kmp_init_parallel)) 3601 __kmp_parallel_initialize(); 3602 3603 __kmp_resume_if_soft_paused(); 3604 3605 // check correctness of reduce block nesting 3606 #if KMP_USE_DYNAMIC_LOCK 3607 if (__kmp_env_consistency_check) 3608 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3609 #else 3610 if (__kmp_env_consistency_check) 3611 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3612 #endif 3613 3614 th = __kmp_thread_from_gtid(global_tid); 3615 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3616 3617 packed_reduction_method = __kmp_determine_reduction_method( 3618 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3619 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3620 3621 OMPT_REDUCTION_DECL(th, global_tid); 3622 3623 if (packed_reduction_method == critical_reduce_block) { 3624 3625 OMPT_REDUCTION_BEGIN; 3626 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3627 retval = 1; 3628 3629 } else if (packed_reduction_method == empty_reduce_block) { 3630 3631 OMPT_REDUCTION_BEGIN; 3632 // usage: if team size == 1, no synchronization is required ( Intel 3633 // platforms only ) 3634 retval = 1; 3635 3636 } else if (packed_reduction_method == atomic_reduce_block) { 3637 3638 retval = 2; 3639 3640 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3641 tree_reduce_block)) { 3642 3643 // case tree_reduce_block: 3644 // this barrier should be visible to a customer and to the threading profile 3645 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3646 #if OMPT_SUPPORT 3647 ompt_frame_t *ompt_frame; 3648 if (ompt_enabled.enabled) { 3649 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3650 if (ompt_frame->enter_frame.ptr == NULL) 3651 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3652 OMPT_STORE_RETURN_ADDRESS(global_tid); 3653 } 3654 #endif 3655 #if USE_ITT_NOTIFY 3656 __kmp_threads[global_tid]->th.th_ident = 3657 loc; // needed for correct notification of frames 3658 #endif 3659 retval = 3660 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3661 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3662 retval = (retval != 0) ? (0) : (1); 3663 #if OMPT_SUPPORT && OMPT_OPTIONAL 3664 if (ompt_enabled.enabled) { 3665 ompt_frame->enter_frame = ompt_data_none; 3666 } 3667 #endif 3668 3669 // all other workers except master should do this pop here 3670 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3671 if (__kmp_env_consistency_check) { 3672 if (retval == 0) { // 0: all other workers; 1: master 3673 __kmp_pop_sync(global_tid, ct_reduce, loc); 3674 } 3675 } 3676 3677 } else { 3678 3679 // should never reach this block 3680 KMP_ASSERT(0); // "unexpected method" 3681 } 3682 if (teams_swapped) { 3683 __kmp_restore_swapped_teams(th, team, task_state); 3684 } 3685 3686 KA_TRACE(10, 3687 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3688 global_tid, packed_reduction_method, retval)); 3689 return retval; 3690 } 3691 3692 /*! 3693 @ingroup SYNCHRONIZATION 3694 @param loc source location information 3695 @param global_tid global thread id. 3696 @param lck pointer to the unique lock data structure 3697 3698 Finish the execution of a blocking reduce. 3699 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3700 start function. 3701 */ 3702 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3703 kmp_critical_name *lck) { 3704 3705 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3706 kmp_info_t *th; 3707 kmp_team_t *team; 3708 int teams_swapped = 0, task_state; 3709 3710 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3711 __kmp_assert_valid_gtid(global_tid); 3712 3713 th = __kmp_thread_from_gtid(global_tid); 3714 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3715 3716 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3717 3718 // this barrier should be visible to a customer and to the threading profile 3719 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3720 OMPT_REDUCTION_DECL(th, global_tid); 3721 3722 if (packed_reduction_method == critical_reduce_block) { 3723 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3724 3725 OMPT_REDUCTION_END; 3726 3727 // TODO: implicit barrier: should be exposed 3728 #if OMPT_SUPPORT 3729 ompt_frame_t *ompt_frame; 3730 if (ompt_enabled.enabled) { 3731 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3732 if (ompt_frame->enter_frame.ptr == NULL) 3733 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3734 OMPT_STORE_RETURN_ADDRESS(global_tid); 3735 } 3736 #endif 3737 #if USE_ITT_NOTIFY 3738 __kmp_threads[global_tid]->th.th_ident = loc; 3739 #endif 3740 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3741 #if OMPT_SUPPORT && OMPT_OPTIONAL 3742 if (ompt_enabled.enabled) { 3743 ompt_frame->enter_frame = ompt_data_none; 3744 } 3745 #endif 3746 3747 } else if (packed_reduction_method == empty_reduce_block) { 3748 3749 OMPT_REDUCTION_END; 3750 3751 // usage: if team size==1, no synchronization is required (Intel platforms only) 3752 3753 // TODO: implicit barrier: should be exposed 3754 #if OMPT_SUPPORT 3755 ompt_frame_t *ompt_frame; 3756 if (ompt_enabled.enabled) { 3757 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3758 if (ompt_frame->enter_frame.ptr == NULL) 3759 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3760 OMPT_STORE_RETURN_ADDRESS(global_tid); 3761 } 3762 #endif 3763 #if USE_ITT_NOTIFY 3764 __kmp_threads[global_tid]->th.th_ident = loc; 3765 #endif 3766 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3767 #if OMPT_SUPPORT && OMPT_OPTIONAL 3768 if (ompt_enabled.enabled) { 3769 ompt_frame->enter_frame = ompt_data_none; 3770 } 3771 #endif 3772 3773 } else if (packed_reduction_method == atomic_reduce_block) { 3774 3775 #if OMPT_SUPPORT 3776 ompt_frame_t *ompt_frame; 3777 if (ompt_enabled.enabled) { 3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3779 if (ompt_frame->enter_frame.ptr == NULL) 3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3781 OMPT_STORE_RETURN_ADDRESS(global_tid); 3782 } 3783 #endif 3784 // TODO: implicit barrier: should be exposed 3785 #if USE_ITT_NOTIFY 3786 __kmp_threads[global_tid]->th.th_ident = loc; 3787 #endif 3788 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3789 #if OMPT_SUPPORT && OMPT_OPTIONAL 3790 if (ompt_enabled.enabled) { 3791 ompt_frame->enter_frame = ompt_data_none; 3792 } 3793 #endif 3794 3795 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3796 tree_reduce_block)) { 3797 3798 // only master executes here (master releases all other workers) 3799 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3800 global_tid); 3801 3802 } else { 3803 3804 // should never reach this block 3805 KMP_ASSERT(0); // "unexpected method" 3806 } 3807 if (teams_swapped) { 3808 __kmp_restore_swapped_teams(th, team, task_state); 3809 } 3810 3811 if (__kmp_env_consistency_check) 3812 __kmp_pop_sync(global_tid, ct_reduce, loc); 3813 3814 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3815 global_tid, packed_reduction_method)); 3816 3817 return; 3818 } 3819 3820 #undef __KMP_GET_REDUCTION_METHOD 3821 #undef __KMP_SET_REDUCTION_METHOD 3822 3823 /* end of interface to fast scalable reduce routines */ 3824 3825 kmp_uint64 __kmpc_get_taskid() { 3826 3827 kmp_int32 gtid; 3828 kmp_info_t *thread; 3829 3830 gtid = __kmp_get_gtid(); 3831 if (gtid < 0) { 3832 return 0; 3833 } 3834 thread = __kmp_thread_from_gtid(gtid); 3835 return thread->th.th_current_task->td_task_id; 3836 3837 } // __kmpc_get_taskid 3838 3839 kmp_uint64 __kmpc_get_parent_taskid() { 3840 3841 kmp_int32 gtid; 3842 kmp_info_t *thread; 3843 kmp_taskdata_t *parent_task; 3844 3845 gtid = __kmp_get_gtid(); 3846 if (gtid < 0) { 3847 return 0; 3848 } 3849 thread = __kmp_thread_from_gtid(gtid); 3850 parent_task = thread->th.th_current_task->td_parent; 3851 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3852 3853 } // __kmpc_get_parent_taskid 3854 3855 /*! 3856 @ingroup WORK_SHARING 3857 @param loc source location information. 3858 @param gtid global thread number. 3859 @param num_dims number of associated doacross loops. 3860 @param dims info on loops bounds. 3861 3862 Initialize doacross loop information. 3863 Expect compiler send us inclusive bounds, 3864 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3865 */ 3866 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3867 const struct kmp_dim *dims) { 3868 __kmp_assert_valid_gtid(gtid); 3869 int j, idx; 3870 kmp_int64 last, trace_count; 3871 kmp_info_t *th = __kmp_threads[gtid]; 3872 kmp_team_t *team = th->th.th_team; 3873 kmp_uint32 *flags; 3874 kmp_disp_t *pr_buf = th->th.th_dispatch; 3875 dispatch_shared_info_t *sh_buf; 3876 3877 KA_TRACE( 3878 20, 3879 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3880 gtid, num_dims, !team->t.t_serialized)); 3881 KMP_DEBUG_ASSERT(dims != NULL); 3882 KMP_DEBUG_ASSERT(num_dims > 0); 3883 3884 if (team->t.t_serialized) { 3885 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3886 return; // no dependencies if team is serialized 3887 } 3888 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3889 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3890 // the next loop 3891 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3892 3893 // Save bounds info into allocated private buffer 3894 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3895 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3896 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3897 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3898 pr_buf->th_doacross_info[0] = 3899 (kmp_int64)num_dims; // first element is number of dimensions 3900 // Save also address of num_done in order to access it later without knowing 3901 // the buffer index 3902 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3903 pr_buf->th_doacross_info[2] = dims[0].lo; 3904 pr_buf->th_doacross_info[3] = dims[0].up; 3905 pr_buf->th_doacross_info[4] = dims[0].st; 3906 last = 5; 3907 for (j = 1; j < num_dims; ++j) { 3908 kmp_int64 3909 range_length; // To keep ranges of all dimensions but the first dims[0] 3910 if (dims[j].st == 1) { // most common case 3911 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3912 range_length = dims[j].up - dims[j].lo + 1; 3913 } else { 3914 if (dims[j].st > 0) { 3915 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3916 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3917 } else { // negative increment 3918 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3919 range_length = 3920 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3921 } 3922 } 3923 pr_buf->th_doacross_info[last++] = range_length; 3924 pr_buf->th_doacross_info[last++] = dims[j].lo; 3925 pr_buf->th_doacross_info[last++] = dims[j].up; 3926 pr_buf->th_doacross_info[last++] = dims[j].st; 3927 } 3928 3929 // Compute total trip count. 3930 // Start with range of dims[0] which we don't need to keep in the buffer. 3931 if (dims[0].st == 1) { // most common case 3932 trace_count = dims[0].up - dims[0].lo + 1; 3933 } else if (dims[0].st > 0) { 3934 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3935 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3936 } else { // negative increment 3937 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3938 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3939 } 3940 for (j = 1; j < num_dims; ++j) { 3941 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3942 } 3943 KMP_DEBUG_ASSERT(trace_count > 0); 3944 3945 // Check if shared buffer is not occupied by other loop (idx - 3946 // __kmp_dispatch_num_buffers) 3947 if (idx != sh_buf->doacross_buf_idx) { 3948 // Shared buffer is occupied, wait for it to be free 3949 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3950 __kmp_eq_4, NULL); 3951 } 3952 #if KMP_32_BIT_ARCH 3953 // Check if we are the first thread. After the CAS the first thread gets 0, 3954 // others get 1 if initialization is in progress, allocated pointer otherwise. 3955 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3956 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3957 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3958 #else 3959 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3960 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3961 #endif 3962 if (flags == NULL) { 3963 // we are the first thread, allocate the array of flags 3964 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3965 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3966 KMP_MB(); 3967 sh_buf->doacross_flags = flags; 3968 } else if (flags == (kmp_uint32 *)1) { 3969 #if KMP_32_BIT_ARCH 3970 // initialization is still in progress, need to wait 3971 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3972 #else 3973 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3974 #endif 3975 KMP_YIELD(TRUE); 3976 KMP_MB(); 3977 } else { 3978 KMP_MB(); 3979 } 3980 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3981 pr_buf->th_doacross_flags = 3982 sh_buf->doacross_flags; // save private copy in order to not 3983 // touch shared buffer on each iteration 3984 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3985 } 3986 3987 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3988 __kmp_assert_valid_gtid(gtid); 3989 kmp_int32 shft, num_dims, i; 3990 kmp_uint32 flag; 3991 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3992 kmp_info_t *th = __kmp_threads[gtid]; 3993 kmp_team_t *team = th->th.th_team; 3994 kmp_disp_t *pr_buf; 3995 kmp_int64 lo, up, st; 3996 3997 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 3998 if (team->t.t_serialized) { 3999 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4000 return; // no dependencies if team is serialized 4001 } 4002 4003 // calculate sequential iteration number and check out-of-bounds condition 4004 pr_buf = th->th.th_dispatch; 4005 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4006 num_dims = pr_buf->th_doacross_info[0]; 4007 lo = pr_buf->th_doacross_info[2]; 4008 up = pr_buf->th_doacross_info[3]; 4009 st = pr_buf->th_doacross_info[4]; 4010 #if OMPT_SUPPORT && OMPT_OPTIONAL 4011 ompt_dependence_t deps[num_dims]; 4012 #endif 4013 if (st == 1) { // most common case 4014 if (vec[0] < lo || vec[0] > up) { 4015 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4016 "bounds [%lld,%lld]\n", 4017 gtid, vec[0], lo, up)); 4018 return; 4019 } 4020 iter_number = vec[0] - lo; 4021 } else if (st > 0) { 4022 if (vec[0] < lo || vec[0] > up) { 4023 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4024 "bounds [%lld,%lld]\n", 4025 gtid, vec[0], lo, up)); 4026 return; 4027 } 4028 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4029 } else { // negative increment 4030 if (vec[0] > lo || vec[0] < up) { 4031 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4032 "bounds [%lld,%lld]\n", 4033 gtid, vec[0], lo, up)); 4034 return; 4035 } 4036 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4037 } 4038 #if OMPT_SUPPORT && OMPT_OPTIONAL 4039 deps[0].variable.value = iter_number; 4040 deps[0].dependence_type = ompt_dependence_type_sink; 4041 #endif 4042 for (i = 1; i < num_dims; ++i) { 4043 kmp_int64 iter, ln; 4044 kmp_int32 j = i * 4; 4045 ln = pr_buf->th_doacross_info[j + 1]; 4046 lo = pr_buf->th_doacross_info[j + 2]; 4047 up = pr_buf->th_doacross_info[j + 3]; 4048 st = pr_buf->th_doacross_info[j + 4]; 4049 if (st == 1) { 4050 if (vec[i] < lo || vec[i] > up) { 4051 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4052 "bounds [%lld,%lld]\n", 4053 gtid, vec[i], lo, up)); 4054 return; 4055 } 4056 iter = vec[i] - lo; 4057 } else if (st > 0) { 4058 if (vec[i] < lo || vec[i] > up) { 4059 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4060 "bounds [%lld,%lld]\n", 4061 gtid, vec[i], lo, up)); 4062 return; 4063 } 4064 iter = (kmp_uint64)(vec[i] - lo) / st; 4065 } else { // st < 0 4066 if (vec[i] > lo || vec[i] < up) { 4067 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4068 "bounds [%lld,%lld]\n", 4069 gtid, vec[i], lo, up)); 4070 return; 4071 } 4072 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4073 } 4074 iter_number = iter + ln * iter_number; 4075 #if OMPT_SUPPORT && OMPT_OPTIONAL 4076 deps[i].variable.value = iter; 4077 deps[i].dependence_type = ompt_dependence_type_sink; 4078 #endif 4079 } 4080 shft = iter_number % 32; // use 32-bit granularity 4081 iter_number >>= 5; // divided by 32 4082 flag = 1 << shft; 4083 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4084 KMP_YIELD(TRUE); 4085 } 4086 KMP_MB(); 4087 #if OMPT_SUPPORT && OMPT_OPTIONAL 4088 if (ompt_enabled.ompt_callback_dependences) { 4089 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4090 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4091 } 4092 #endif 4093 KA_TRACE(20, 4094 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4095 gtid, (iter_number << 5) + shft)); 4096 } 4097 4098 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4099 __kmp_assert_valid_gtid(gtid); 4100 kmp_int32 shft, num_dims, i; 4101 kmp_uint32 flag; 4102 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4103 kmp_info_t *th = __kmp_threads[gtid]; 4104 kmp_team_t *team = th->th.th_team; 4105 kmp_disp_t *pr_buf; 4106 kmp_int64 lo, st; 4107 4108 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4109 if (team->t.t_serialized) { 4110 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4111 return; // no dependencies if team is serialized 4112 } 4113 4114 // calculate sequential iteration number (same as in "wait" but no 4115 // out-of-bounds checks) 4116 pr_buf = th->th.th_dispatch; 4117 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4118 num_dims = pr_buf->th_doacross_info[0]; 4119 lo = pr_buf->th_doacross_info[2]; 4120 st = pr_buf->th_doacross_info[4]; 4121 #if OMPT_SUPPORT && OMPT_OPTIONAL 4122 ompt_dependence_t deps[num_dims]; 4123 #endif 4124 if (st == 1) { // most common case 4125 iter_number = vec[0] - lo; 4126 } else if (st > 0) { 4127 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4128 } else { // negative increment 4129 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4130 } 4131 #if OMPT_SUPPORT && OMPT_OPTIONAL 4132 deps[0].variable.value = iter_number; 4133 deps[0].dependence_type = ompt_dependence_type_source; 4134 #endif 4135 for (i = 1; i < num_dims; ++i) { 4136 kmp_int64 iter, ln; 4137 kmp_int32 j = i * 4; 4138 ln = pr_buf->th_doacross_info[j + 1]; 4139 lo = pr_buf->th_doacross_info[j + 2]; 4140 st = pr_buf->th_doacross_info[j + 4]; 4141 if (st == 1) { 4142 iter = vec[i] - lo; 4143 } else if (st > 0) { 4144 iter = (kmp_uint64)(vec[i] - lo) / st; 4145 } else { // st < 0 4146 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4147 } 4148 iter_number = iter + ln * iter_number; 4149 #if OMPT_SUPPORT && OMPT_OPTIONAL 4150 deps[i].variable.value = iter; 4151 deps[i].dependence_type = ompt_dependence_type_source; 4152 #endif 4153 } 4154 #if OMPT_SUPPORT && OMPT_OPTIONAL 4155 if (ompt_enabled.ompt_callback_dependences) { 4156 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4157 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); 4158 } 4159 #endif 4160 shft = iter_number % 32; // use 32-bit granularity 4161 iter_number >>= 5; // divided by 32 4162 flag = 1 << shft; 4163 KMP_MB(); 4164 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4165 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4166 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4167 (iter_number << 5) + shft)); 4168 } 4169 4170 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4171 __kmp_assert_valid_gtid(gtid); 4172 kmp_int32 num_done; 4173 kmp_info_t *th = __kmp_threads[gtid]; 4174 kmp_team_t *team = th->th.th_team; 4175 kmp_disp_t *pr_buf = th->th.th_dispatch; 4176 4177 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4178 if (team->t.t_serialized) { 4179 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4180 return; // nothing to do 4181 } 4182 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4183 if (num_done == th->th.th_team_nproc) { 4184 // we are the last thread, need to free shared resources 4185 int idx = pr_buf->th_doacross_buf_idx - 1; 4186 dispatch_shared_info_t *sh_buf = 4187 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4188 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4189 (kmp_int64)&sh_buf->doacross_num_done); 4190 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4191 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4192 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4193 sh_buf->doacross_flags = NULL; 4194 sh_buf->doacross_num_done = 0; 4195 sh_buf->doacross_buf_idx += 4196 __kmp_dispatch_num_buffers; // free buffer for future re-use 4197 } 4198 // free private resources (need to keep buffer index forever) 4199 pr_buf->th_doacross_flags = NULL; 4200 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4201 pr_buf->th_doacross_info = NULL; 4202 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4203 } 4204 4205 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4206 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4207 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4208 } 4209 4210 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4211 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4212 } 4213 4214 int __kmpc_get_target_offload(void) { 4215 if (!__kmp_init_serial) { 4216 __kmp_serial_initialize(); 4217 } 4218 return __kmp_target_offload; 4219 } 4220 4221 int __kmpc_pause_resource(kmp_pause_status_t level) { 4222 if (!__kmp_init_serial) { 4223 return 1; // Can't pause if runtime is not initialized 4224 } 4225 return __kmp_pause_resource(level); 4226 } 4227