1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 #if OMP_40_ENABLED 248 249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 250 kmp_int32 proc_bind) { 251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 252 proc_bind)); 253 254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 255 } 256 257 #endif /* OMP_40_ENABLED */ 258 259 /*! 260 @ingroup PARALLEL 261 @param loc source location information 262 @param argc total number of arguments in the ellipsis 263 @param microtask pointer to callback routine consisting of outlined parallel 264 construct 265 @param ... pointers to shared variables that aren't global 266 267 Do the actual fork and call the microtask in the relevant number of threads. 268 */ 269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 270 int gtid = __kmp_entry_gtid(); 271 272 #if (KMP_STATS_ENABLED) 273 // If we were in a serial region, then stop the serial timer, record 274 // the event, and start parallel region timer 275 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 276 if (previous_state == stats_state_e::SERIAL_REGION) { 277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 278 } else { 279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 280 } 281 int inParallel = __kmpc_in_parallel(loc); 282 if (inParallel) { 283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 284 } else { 285 KMP_COUNT_BLOCK(OMP_PARALLEL); 286 } 287 #endif 288 289 // maybe to save thr_state is enough here 290 { 291 va_list ap; 292 va_start(ap, microtask); 293 294 #if OMPT_SUPPORT 295 ompt_frame_t *ompt_frame; 296 if (ompt_enabled.enabled) { 297 kmp_info_t *master_th = __kmp_threads[gtid]; 298 kmp_team_t *parent_team = master_th->th.th_team; 299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 300 if (lwt) 301 ompt_frame = &(lwt->ompt_task_info.frame); 302 else { 303 int tid = __kmp_tid_from_gtid(gtid); 304 ompt_frame = &( 305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 306 } 307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 308 OMPT_STORE_RETURN_ADDRESS(gtid); 309 } 310 #endif 311 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_FORKING(); 314 #endif 315 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 320 &ap 321 #else 322 ap 323 #endif 324 ); 325 #if INCLUDE_SSC_MARKS 326 SSC_MARK_JOINING(); 327 #endif 328 __kmp_join_call(loc, gtid 329 #if OMPT_SUPPORT 330 , 331 fork_context_intel 332 #endif 333 ); 334 335 va_end(ap); 336 } 337 338 #if KMP_STATS_ENABLED 339 if (previous_state == stats_state_e::SERIAL_REGION) { 340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 341 } else { 342 KMP_POP_PARTITIONED_TIMER(); 343 } 344 #endif // KMP_STATS_ENABLED 345 } 346 347 #if OMP_40_ENABLED 348 /*! 349 @ingroup PARALLEL 350 @param loc source location information 351 @param global_tid global thread number 352 @param num_teams number of teams requested for the teams construct 353 @param num_threads number of threads per team requested for the teams construct 354 355 Set the number of teams to be used by the teams construct. 356 This call is only required if the teams construct has a `num_teams` clause 357 or a `thread_limit` clause (or both). 358 */ 359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 360 kmp_int32 num_teams, kmp_int32 num_threads) { 361 KA_TRACE(20, 362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 363 global_tid, num_teams, num_threads)); 364 365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 366 } 367 368 /*! 369 @ingroup PARALLEL 370 @param loc source location information 371 @param argc total number of arguments in the ellipsis 372 @param microtask pointer to callback routine consisting of outlined teams 373 construct 374 @param ... pointers to shared variables that aren't global 375 376 Do the actual fork and call the microtask in the relevant number of threads. 377 */ 378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 379 ...) { 380 int gtid = __kmp_entry_gtid(); 381 kmp_info_t *this_thr = __kmp_threads[gtid]; 382 va_list ap; 383 va_start(ap, microtask); 384 385 #if KMP_STATS_ENABLED 386 KMP_COUNT_BLOCK(OMP_TEAMS); 387 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 388 if (previous_state == stats_state_e::SERIAL_REGION) { 389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 390 } else { 391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 392 } 393 #endif 394 395 // remember teams entry point and nesting level 396 this_thr->th.th_teams_microtask = microtask; 397 this_thr->th.th_teams_level = 398 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 399 400 #if OMPT_SUPPORT 401 kmp_team_t *parent_team = this_thr->th.th_team; 402 int tid = __kmp_tid_from_gtid(gtid); 403 if (ompt_enabled.enabled) { 404 parent_team->t.t_implicit_task_taskdata[tid] 405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 406 } 407 OMPT_STORE_RETURN_ADDRESS(gtid); 408 #endif 409 410 // check if __kmpc_push_num_teams called, set default number of teams 411 // otherwise 412 if (this_thr->th.th_teams_size.nteams == 0) { 413 __kmp_push_num_teams(loc, gtid, 0, 0); 414 } 415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 418 419 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 420 VOLATILE_CAST(microtask_t) 421 __kmp_teams_master, // "wrapped" task 422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 423 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 424 &ap 425 #else 426 ap 427 #endif 428 ); 429 __kmp_join_call(loc, gtid 430 #if OMPT_SUPPORT 431 , 432 fork_context_intel 433 #endif 434 ); 435 436 // Pop current CG root off list 437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 439 this_thr->th.th_cg_roots = tmp->up; 440 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 441 " to node %p. cg_nthreads was %d\n", 442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 443 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 444 int i = tmp->cg_nthreads--; 445 if (i == 1) { // check is we are the last thread in CG (not always the case) 446 __kmp_free(tmp); 447 } 448 // Restore current task's thread_limit from CG root 449 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 450 this_thr->th.th_current_task->td_icvs.thread_limit = 451 this_thr->th.th_cg_roots->cg_thread_limit; 452 453 this_thr->th.th_teams_microtask = NULL; 454 this_thr->th.th_teams_level = 0; 455 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 456 va_end(ap); 457 #if KMP_STATS_ENABLED 458 if (previous_state == stats_state_e::SERIAL_REGION) { 459 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 460 } else { 461 KMP_POP_PARTITIONED_TIMER(); 462 } 463 #endif // KMP_STATS_ENABLED 464 } 465 #endif /* OMP_40_ENABLED */ 466 467 // I don't think this function should ever have been exported. 468 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 469 // openmp code ever called it, but it's been exported from the RTL for so 470 // long that I'm afraid to remove the definition. 471 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 472 473 /*! 474 @ingroup PARALLEL 475 @param loc source location information 476 @param global_tid global thread number 477 478 Enter a serialized parallel construct. This interface is used to handle a 479 conditional parallel region, like this, 480 @code 481 #pragma omp parallel if (condition) 482 @endcode 483 when the condition is false. 484 */ 485 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 486 // The implementation is now in kmp_runtime.cpp so that it can share static 487 // functions with kmp_fork_call since the tasks to be done are similar in 488 // each case. 489 #if OMPT_SUPPORT 490 OMPT_STORE_RETURN_ADDRESS(global_tid); 491 #endif 492 __kmp_serialized_parallel(loc, global_tid); 493 } 494 495 /*! 496 @ingroup PARALLEL 497 @param loc source location information 498 @param global_tid global thread number 499 500 Leave a serialized parallel construct. 501 */ 502 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 503 kmp_internal_control_t *top; 504 kmp_info_t *this_thr; 505 kmp_team_t *serial_team; 506 507 KC_TRACE(10, 508 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 509 510 /* skip all this code for autopar serialized loops since it results in 511 unacceptable overhead */ 512 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 513 return; 514 515 // Not autopar code 516 if (!TCR_4(__kmp_init_parallel)) 517 __kmp_parallel_initialize(); 518 519 #if OMP_50_ENABLED 520 __kmp_resume_if_soft_paused(); 521 #endif 522 523 this_thr = __kmp_threads[global_tid]; 524 serial_team = this_thr->th.th_serial_team; 525 526 #if OMP_45_ENABLED 527 kmp_task_team_t *task_team = this_thr->th.th_task_team; 528 529 // we need to wait for the proxy tasks before finishing the thread 530 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 531 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 532 #endif 533 534 KMP_MB(); 535 KMP_DEBUG_ASSERT(serial_team); 536 KMP_ASSERT(serial_team->t.t_serialized); 537 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 538 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 539 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 540 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 541 542 #if OMPT_SUPPORT 543 if (ompt_enabled.enabled && 544 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 545 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 546 if (ompt_enabled.ompt_callback_implicit_task) { 547 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 548 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 549 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 550 } 551 552 // reset clear the task id only after unlinking the task 553 ompt_data_t *parent_task_data; 554 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 555 556 if (ompt_enabled.ompt_callback_parallel_end) { 557 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 558 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 559 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 560 } 561 __ompt_lw_taskteam_unlink(this_thr); 562 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 563 } 564 #endif 565 566 /* If necessary, pop the internal control stack values and replace the team 567 * values */ 568 top = serial_team->t.t_control_stack_top; 569 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 570 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 571 serial_team->t.t_control_stack_top = top->next; 572 __kmp_free(top); 573 } 574 575 // if( serial_team -> t.t_serialized > 1 ) 576 serial_team->t.t_level--; 577 578 /* pop dispatch buffers stack */ 579 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 580 { 581 dispatch_private_info_t *disp_buffer = 582 serial_team->t.t_dispatch->th_disp_buffer; 583 serial_team->t.t_dispatch->th_disp_buffer = 584 serial_team->t.t_dispatch->th_disp_buffer->next; 585 __kmp_free(disp_buffer); 586 } 587 #if OMP_50_ENABLED 588 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 589 #endif 590 591 --serial_team->t.t_serialized; 592 if (serial_team->t.t_serialized == 0) { 593 594 /* return to the parallel section */ 595 596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 598 __kmp_clear_x87_fpu_status_word(); 599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 601 } 602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 603 604 this_thr->th.th_team = serial_team->t.t_parent; 605 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 606 607 /* restore values cached in the thread */ 608 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 609 this_thr->th.th_team_master = 610 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 611 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 612 613 /* TODO the below shouldn't need to be adjusted for serialized teams */ 614 this_thr->th.th_dispatch = 615 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 616 617 __kmp_pop_current_task_from_thread(this_thr); 618 619 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 620 this_thr->th.th_current_task->td_flags.executing = 1; 621 622 if (__kmp_tasking_mode != tskm_immediate_exec) { 623 // Copy the task team from the new child / old parent team to the thread. 624 this_thr->th.th_task_team = 625 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 626 KA_TRACE(20, 627 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 628 "team %p\n", 629 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 630 } 631 } else { 632 if (__kmp_tasking_mode != tskm_immediate_exec) { 633 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 634 "depth of serial team %p to %d\n", 635 global_tid, serial_team, serial_team->t.t_serialized)); 636 } 637 } 638 639 if (__kmp_env_consistency_check) 640 __kmp_pop_parallel(global_tid, NULL); 641 #if OMPT_SUPPORT 642 if (ompt_enabled.enabled) 643 this_thr->th.ompt_thread_info.state = 644 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 645 : ompt_state_work_parallel); 646 #endif 647 } 648 649 /*! 650 @ingroup SYNCHRONIZATION 651 @param loc source location information. 652 653 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 654 depending on the memory ordering convention obeyed by the compiler 655 even that may not be necessary). 656 */ 657 void __kmpc_flush(ident_t *loc) { 658 KC_TRACE(10, ("__kmpc_flush: called\n")); 659 660 /* need explicit __mf() here since use volatile instead in library */ 661 KMP_MB(); /* Flush all pending memory write invalidates. */ 662 663 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 664 #if KMP_MIC 665 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 666 // We shouldn't need it, though, since the ABI rules require that 667 // * If the compiler generates NGO stores it also generates the fence 668 // * If users hand-code NGO stores they should insert the fence 669 // therefore no incomplete unordered stores should be visible. 670 #else 671 // C74404 672 // This is to address non-temporal store instructions (sfence needed). 673 // The clflush instruction is addressed either (mfence needed). 674 // Probably the non-temporal load monvtdqa instruction should also be 675 // addressed. 676 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 677 if (!__kmp_cpuinfo.initialized) { 678 __kmp_query_cpuid(&__kmp_cpuinfo); 679 } 680 if (!__kmp_cpuinfo.sse2) { 681 // CPU cannot execute SSE2 instructions. 682 } else { 683 #if KMP_COMPILER_ICC 684 _mm_mfence(); 685 #elif KMP_COMPILER_MSVC 686 MemoryBarrier(); 687 #else 688 __sync_synchronize(); 689 #endif // KMP_COMPILER_ICC 690 } 691 #endif // KMP_MIC 692 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 693 // Nothing to see here move along 694 #elif KMP_ARCH_PPC64 695 // Nothing needed here (we have a real MB above). 696 #if KMP_OS_CNK 697 // The flushing thread needs to yield here; this prevents a 698 // busy-waiting thread from saturating the pipeline. flush is 699 // often used in loops like this: 700 // while (!flag) { 701 // #pragma omp flush(flag) 702 // } 703 // and adding the yield here is good for at least a 10x speedup 704 // when running >2 threads per core (on the NAS LU benchmark). 705 __kmp_yield(); 706 #endif 707 #else 708 #error Unknown or unsupported architecture 709 #endif 710 711 #if OMPT_SUPPORT && OMPT_OPTIONAL 712 if (ompt_enabled.ompt_callback_flush) { 713 ompt_callbacks.ompt_callback(ompt_callback_flush)( 714 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 715 } 716 #endif 717 } 718 719 /* -------------------------------------------------------------------------- */ 720 /*! 721 @ingroup SYNCHRONIZATION 722 @param loc source location information 723 @param global_tid thread id. 724 725 Execute a barrier. 726 */ 727 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 728 KMP_COUNT_BLOCK(OMP_BARRIER); 729 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 730 731 if (!TCR_4(__kmp_init_parallel)) 732 __kmp_parallel_initialize(); 733 734 #if OMP_50_ENABLED 735 __kmp_resume_if_soft_paused(); 736 #endif 737 738 if (__kmp_env_consistency_check) { 739 if (loc == 0) { 740 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 741 } 742 743 __kmp_check_barrier(global_tid, ct_barrier, loc); 744 } 745 746 #if OMPT_SUPPORT 747 ompt_frame_t *ompt_frame; 748 if (ompt_enabled.enabled) { 749 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 750 if (ompt_frame->enter_frame.ptr == NULL) 751 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 752 OMPT_STORE_RETURN_ADDRESS(global_tid); 753 } 754 #endif 755 __kmp_threads[global_tid]->th.th_ident = loc; 756 // TODO: explicit barrier_wait_id: 757 // this function is called when 'barrier' directive is present or 758 // implicit barrier at the end of a worksharing construct. 759 // 1) better to add a per-thread barrier counter to a thread data structure 760 // 2) set to 0 when a new team is created 761 // 4) no sync is required 762 763 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 764 #if OMPT_SUPPORT && OMPT_OPTIONAL 765 if (ompt_enabled.enabled) { 766 ompt_frame->enter_frame = ompt_data_none; 767 } 768 #endif 769 } 770 771 /* The BARRIER for a MASTER section is always explicit */ 772 /*! 773 @ingroup WORK_SHARING 774 @param loc source location information. 775 @param global_tid global thread number . 776 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 777 */ 778 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 779 int status = 0; 780 781 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 782 783 if (!TCR_4(__kmp_init_parallel)) 784 __kmp_parallel_initialize(); 785 786 #if OMP_50_ENABLED 787 __kmp_resume_if_soft_paused(); 788 #endif 789 790 if (KMP_MASTER_GTID(global_tid)) { 791 KMP_COUNT_BLOCK(OMP_MASTER); 792 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 793 status = 1; 794 } 795 796 #if OMPT_SUPPORT && OMPT_OPTIONAL 797 if (status) { 798 if (ompt_enabled.ompt_callback_master) { 799 kmp_info_t *this_thr = __kmp_threads[global_tid]; 800 kmp_team_t *team = this_thr->th.th_team; 801 802 int tid = __kmp_tid_from_gtid(global_tid); 803 ompt_callbacks.ompt_callback(ompt_callback_master)( 804 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 805 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 806 OMPT_GET_RETURN_ADDRESS(0)); 807 } 808 } 809 #endif 810 811 if (__kmp_env_consistency_check) { 812 #if KMP_USE_DYNAMIC_LOCK 813 if (status) 814 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 815 else 816 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 817 #else 818 if (status) 819 __kmp_push_sync(global_tid, ct_master, loc, NULL); 820 else 821 __kmp_check_sync(global_tid, ct_master, loc, NULL); 822 #endif 823 } 824 825 return status; 826 } 827 828 /*! 829 @ingroup WORK_SHARING 830 @param loc source location information. 831 @param global_tid global thread number . 832 833 Mark the end of a <tt>master</tt> region. This should only be called by the 834 thread that executes the <tt>master</tt> region. 835 */ 836 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 837 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 838 839 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 840 KMP_POP_PARTITIONED_TIMER(); 841 842 #if OMPT_SUPPORT && OMPT_OPTIONAL 843 kmp_info_t *this_thr = __kmp_threads[global_tid]; 844 kmp_team_t *team = this_thr->th.th_team; 845 if (ompt_enabled.ompt_callback_master) { 846 int tid = __kmp_tid_from_gtid(global_tid); 847 ompt_callbacks.ompt_callback(ompt_callback_master)( 848 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 849 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 850 OMPT_GET_RETURN_ADDRESS(0)); 851 } 852 #endif 853 854 if (__kmp_env_consistency_check) { 855 if (global_tid < 0) 856 KMP_WARNING(ThreadIdentInvalid); 857 858 if (KMP_MASTER_GTID(global_tid)) 859 __kmp_pop_sync(global_tid, ct_master, loc); 860 } 861 } 862 863 /*! 864 @ingroup WORK_SHARING 865 @param loc source location information. 866 @param gtid global thread number. 867 868 Start execution of an <tt>ordered</tt> construct. 869 */ 870 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 871 int cid = 0; 872 kmp_info_t *th; 873 KMP_DEBUG_ASSERT(__kmp_init_serial); 874 875 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 876 877 if (!TCR_4(__kmp_init_parallel)) 878 __kmp_parallel_initialize(); 879 880 #if OMP_50_ENABLED 881 __kmp_resume_if_soft_paused(); 882 #endif 883 884 #if USE_ITT_BUILD 885 __kmp_itt_ordered_prep(gtid); 886 // TODO: ordered_wait_id 887 #endif /* USE_ITT_BUILD */ 888 889 th = __kmp_threads[gtid]; 890 891 #if OMPT_SUPPORT && OMPT_OPTIONAL 892 kmp_team_t *team; 893 ompt_wait_id_t lck; 894 void *codeptr_ra; 895 if (ompt_enabled.enabled) { 896 OMPT_STORE_RETURN_ADDRESS(gtid); 897 team = __kmp_team_from_gtid(gtid); 898 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 899 /* OMPT state update */ 900 th->th.ompt_thread_info.wait_id = lck; 901 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 902 903 /* OMPT event callback */ 904 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 905 if (ompt_enabled.ompt_callback_mutex_acquire) { 906 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 907 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 908 codeptr_ra); 909 } 910 } 911 #endif 912 913 if (th->th.th_dispatch->th_deo_fcn != 0) 914 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 915 else 916 __kmp_parallel_deo(>id, &cid, loc); 917 918 #if OMPT_SUPPORT && OMPT_OPTIONAL 919 if (ompt_enabled.enabled) { 920 /* OMPT state update */ 921 th->th.ompt_thread_info.state = ompt_state_work_parallel; 922 th->th.ompt_thread_info.wait_id = 0; 923 924 /* OMPT event callback */ 925 if (ompt_enabled.ompt_callback_mutex_acquired) { 926 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 927 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 928 } 929 } 930 #endif 931 932 #if USE_ITT_BUILD 933 __kmp_itt_ordered_start(gtid); 934 #endif /* USE_ITT_BUILD */ 935 } 936 937 /*! 938 @ingroup WORK_SHARING 939 @param loc source location information. 940 @param gtid global thread number. 941 942 End execution of an <tt>ordered</tt> construct. 943 */ 944 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 945 int cid = 0; 946 kmp_info_t *th; 947 948 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 949 950 #if USE_ITT_BUILD 951 __kmp_itt_ordered_end(gtid); 952 // TODO: ordered_wait_id 953 #endif /* USE_ITT_BUILD */ 954 955 th = __kmp_threads[gtid]; 956 957 if (th->th.th_dispatch->th_dxo_fcn != 0) 958 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 959 else 960 __kmp_parallel_dxo(>id, &cid, loc); 961 962 #if OMPT_SUPPORT && OMPT_OPTIONAL 963 OMPT_STORE_RETURN_ADDRESS(gtid); 964 if (ompt_enabled.ompt_callback_mutex_released) { 965 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 966 ompt_mutex_ordered, 967 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 968 ->t.t_ordered.dt.t_value, 969 OMPT_LOAD_RETURN_ADDRESS(gtid)); 970 } 971 #endif 972 } 973 974 #if KMP_USE_DYNAMIC_LOCK 975 976 static __forceinline void 977 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 978 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 979 // Pointer to the allocated indirect lock is written to crit, while indexing 980 // is ignored. 981 void *idx; 982 kmp_indirect_lock_t **lck; 983 lck = (kmp_indirect_lock_t **)crit; 984 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 985 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 986 KMP_SET_I_LOCK_LOCATION(ilk, loc); 987 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 988 KA_TRACE(20, 989 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 990 #if USE_ITT_BUILD 991 __kmp_itt_critical_creating(ilk->lock, loc); 992 #endif 993 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 994 if (status == 0) { 995 #if USE_ITT_BUILD 996 __kmp_itt_critical_destroyed(ilk->lock); 997 #endif 998 // We don't really need to destroy the unclaimed lock here since it will be 999 // cleaned up at program exit. 1000 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1001 } 1002 KMP_DEBUG_ASSERT(*lck != NULL); 1003 } 1004 1005 // Fast-path acquire tas lock 1006 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1007 { \ 1008 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1009 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1010 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1011 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1012 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1013 kmp_uint32 spins; \ 1014 KMP_FSYNC_PREPARE(l); \ 1015 KMP_INIT_YIELD(spins); \ 1016 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1017 do { \ 1018 if (TCR_4(__kmp_nth) > \ 1019 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1020 KMP_YIELD(TRUE); \ 1021 } else { \ 1022 KMP_YIELD_SPIN(spins); \ 1023 } \ 1024 __kmp_spin_backoff(&backoff); \ 1025 } while ( \ 1026 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1027 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1028 } \ 1029 KMP_FSYNC_ACQUIRED(l); \ 1030 } 1031 1032 // Fast-path test tas lock 1033 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1034 { \ 1035 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1036 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1037 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1038 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1039 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1040 } 1041 1042 // Fast-path release tas lock 1043 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1044 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1045 1046 #if KMP_USE_FUTEX 1047 1048 #include <sys/syscall.h> 1049 #include <unistd.h> 1050 #ifndef FUTEX_WAIT 1051 #define FUTEX_WAIT 0 1052 #endif 1053 #ifndef FUTEX_WAKE 1054 #define FUTEX_WAKE 1 1055 #endif 1056 1057 // Fast-path acquire futex lock 1058 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1059 { \ 1060 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1061 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1062 KMP_MB(); \ 1063 KMP_FSYNC_PREPARE(ftx); \ 1064 kmp_int32 poll_val; \ 1065 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1066 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1067 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1068 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1069 if (!cond) { \ 1070 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1071 poll_val | \ 1072 KMP_LOCK_BUSY(1, futex))) { \ 1073 continue; \ 1074 } \ 1075 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1076 } \ 1077 kmp_int32 rc; \ 1078 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1079 NULL, NULL, 0)) != 0) { \ 1080 continue; \ 1081 } \ 1082 gtid_code |= 1; \ 1083 } \ 1084 KMP_FSYNC_ACQUIRED(ftx); \ 1085 } 1086 1087 // Fast-path test futex lock 1088 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1089 { \ 1090 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1091 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1092 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1093 KMP_FSYNC_ACQUIRED(ftx); \ 1094 rc = TRUE; \ 1095 } else { \ 1096 rc = FALSE; \ 1097 } \ 1098 } 1099 1100 // Fast-path release futex lock 1101 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1102 { \ 1103 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1104 KMP_MB(); \ 1105 KMP_FSYNC_RELEASING(ftx); \ 1106 kmp_int32 poll_val = \ 1107 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1108 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1109 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1110 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1111 } \ 1112 KMP_MB(); \ 1113 KMP_YIELD_OVERSUB(); \ 1114 } 1115 1116 #endif // KMP_USE_FUTEX 1117 1118 #else // KMP_USE_DYNAMIC_LOCK 1119 1120 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1121 ident_t const *loc, 1122 kmp_int32 gtid) { 1123 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1124 1125 // Because of the double-check, the following load doesn't need to be volatile 1126 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1127 1128 if (lck == NULL) { 1129 void *idx; 1130 1131 // Allocate & initialize the lock. 1132 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1133 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1134 __kmp_init_user_lock_with_checks(lck); 1135 __kmp_set_user_lock_location(lck, loc); 1136 #if USE_ITT_BUILD 1137 __kmp_itt_critical_creating(lck); 1138 // __kmp_itt_critical_creating() should be called *before* the first usage 1139 // of underlying lock. It is the only place where we can guarantee it. There 1140 // are chances the lock will destroyed with no usage, but it is not a 1141 // problem, because this is not real event seen by user but rather setting 1142 // name for object (lock). See more details in kmp_itt.h. 1143 #endif /* USE_ITT_BUILD */ 1144 1145 // Use a cmpxchg instruction to slam the start of the critical section with 1146 // the lock pointer. If another thread beat us to it, deallocate the lock, 1147 // and use the lock that the other thread allocated. 1148 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1149 1150 if (status == 0) { 1151 // Deallocate the lock and reload the value. 1152 #if USE_ITT_BUILD 1153 __kmp_itt_critical_destroyed(lck); 1154 // Let ITT know the lock is destroyed and the same memory location may be reused 1155 // for another purpose. 1156 #endif /* USE_ITT_BUILD */ 1157 __kmp_destroy_user_lock_with_checks(lck); 1158 __kmp_user_lock_free(&idx, gtid, lck); 1159 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1160 KMP_DEBUG_ASSERT(lck != NULL); 1161 } 1162 } 1163 return lck; 1164 } 1165 1166 #endif // KMP_USE_DYNAMIC_LOCK 1167 1168 /*! 1169 @ingroup WORK_SHARING 1170 @param loc source location information. 1171 @param global_tid global thread number . 1172 @param crit identity of the critical section. This could be a pointer to a lock 1173 associated with the critical section, or some other suitably unique value. 1174 1175 Enter code protected by a `critical` construct. 1176 This function blocks until the executing thread can enter the critical section. 1177 */ 1178 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1179 kmp_critical_name *crit) { 1180 #if KMP_USE_DYNAMIC_LOCK 1181 #if OMPT_SUPPORT && OMPT_OPTIONAL 1182 OMPT_STORE_RETURN_ADDRESS(global_tid); 1183 #endif // OMPT_SUPPORT 1184 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1185 #else 1186 KMP_COUNT_BLOCK(OMP_CRITICAL); 1187 #if OMPT_SUPPORT && OMPT_OPTIONAL 1188 ompt_state_t prev_state = ompt_state_undefined; 1189 ompt_thread_info_t ti; 1190 #endif 1191 kmp_user_lock_p lck; 1192 1193 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1194 1195 // TODO: add THR_OVHD_STATE 1196 1197 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1198 KMP_CHECK_USER_LOCK_INIT(); 1199 1200 if ((__kmp_user_lock_kind == lk_tas) && 1201 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1202 lck = (kmp_user_lock_p)crit; 1203 } 1204 #if KMP_USE_FUTEX 1205 else if ((__kmp_user_lock_kind == lk_futex) && 1206 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1207 lck = (kmp_user_lock_p)crit; 1208 } 1209 #endif 1210 else { // ticket, queuing or drdpa 1211 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1212 } 1213 1214 if (__kmp_env_consistency_check) 1215 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1216 1217 // since the critical directive binds to all threads, not just the current 1218 // team we have to check this even if we are in a serialized team. 1219 // also, even if we are the uber thread, we still have to conduct the lock, 1220 // as we have to contend with sibling threads. 1221 1222 #if USE_ITT_BUILD 1223 __kmp_itt_critical_acquiring(lck); 1224 #endif /* USE_ITT_BUILD */ 1225 #if OMPT_SUPPORT && OMPT_OPTIONAL 1226 OMPT_STORE_RETURN_ADDRESS(gtid); 1227 void *codeptr_ra = NULL; 1228 if (ompt_enabled.enabled) { 1229 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1230 /* OMPT state update */ 1231 prev_state = ti.state; 1232 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1233 ti.state = ompt_state_wait_critical; 1234 1235 /* OMPT event callback */ 1236 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1237 if (ompt_enabled.ompt_callback_mutex_acquire) { 1238 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1239 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1240 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1241 } 1242 } 1243 #endif 1244 // Value of 'crit' should be good for using as a critical_id of the critical 1245 // section directive. 1246 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1247 1248 #if USE_ITT_BUILD 1249 __kmp_itt_critical_acquired(lck); 1250 #endif /* USE_ITT_BUILD */ 1251 #if OMPT_SUPPORT && OMPT_OPTIONAL 1252 if (ompt_enabled.enabled) { 1253 /* OMPT state update */ 1254 ti.state = prev_state; 1255 ti.wait_id = 0; 1256 1257 /* OMPT event callback */ 1258 if (ompt_enabled.ompt_callback_mutex_acquired) { 1259 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1260 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1261 } 1262 } 1263 #endif 1264 KMP_POP_PARTITIONED_TIMER(); 1265 1266 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1267 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1268 #endif // KMP_USE_DYNAMIC_LOCK 1269 } 1270 1271 #if KMP_USE_DYNAMIC_LOCK 1272 1273 // Converts the given hint to an internal lock implementation 1274 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1275 #if KMP_USE_TSX 1276 #define KMP_TSX_LOCK(seq) lockseq_##seq 1277 #else 1278 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1279 #endif 1280 1281 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1282 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1283 #else 1284 #define KMP_CPUINFO_RTM 0 1285 #endif 1286 1287 // Hints that do not require further logic 1288 if (hint & kmp_lock_hint_hle) 1289 return KMP_TSX_LOCK(hle); 1290 if (hint & kmp_lock_hint_rtm) 1291 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1292 if (hint & kmp_lock_hint_adaptive) 1293 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1294 1295 // Rule out conflicting hints first by returning the default lock 1296 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1297 return __kmp_user_lock_seq; 1298 if ((hint & omp_lock_hint_speculative) && 1299 (hint & omp_lock_hint_nonspeculative)) 1300 return __kmp_user_lock_seq; 1301 1302 // Do not even consider speculation when it appears to be contended 1303 if (hint & omp_lock_hint_contended) 1304 return lockseq_queuing; 1305 1306 // Uncontended lock without speculation 1307 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1308 return lockseq_tas; 1309 1310 // HLE lock for speculation 1311 if (hint & omp_lock_hint_speculative) 1312 return KMP_TSX_LOCK(hle); 1313 1314 return __kmp_user_lock_seq; 1315 } 1316 1317 #if OMPT_SUPPORT && OMPT_OPTIONAL 1318 #if KMP_USE_DYNAMIC_LOCK 1319 static kmp_mutex_impl_t 1320 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1321 if (user_lock) { 1322 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1323 case 0: 1324 break; 1325 #if KMP_USE_FUTEX 1326 case locktag_futex: 1327 return kmp_mutex_impl_queuing; 1328 #endif 1329 case locktag_tas: 1330 return kmp_mutex_impl_spin; 1331 #if KMP_USE_TSX 1332 case locktag_hle: 1333 return kmp_mutex_impl_speculative; 1334 #endif 1335 default: 1336 return kmp_mutex_impl_none; 1337 } 1338 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1339 } 1340 KMP_ASSERT(ilock); 1341 switch (ilock->type) { 1342 #if KMP_USE_TSX 1343 case locktag_adaptive: 1344 case locktag_rtm: 1345 return kmp_mutex_impl_speculative; 1346 #endif 1347 case locktag_nested_tas: 1348 return kmp_mutex_impl_spin; 1349 #if KMP_USE_FUTEX 1350 case locktag_nested_futex: 1351 #endif 1352 case locktag_ticket: 1353 case locktag_queuing: 1354 case locktag_drdpa: 1355 case locktag_nested_ticket: 1356 case locktag_nested_queuing: 1357 case locktag_nested_drdpa: 1358 return kmp_mutex_impl_queuing; 1359 default: 1360 return kmp_mutex_impl_none; 1361 } 1362 } 1363 #else 1364 // For locks without dynamic binding 1365 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1366 switch (__kmp_user_lock_kind) { 1367 case lk_tas: 1368 return kmp_mutex_impl_spin; 1369 #if KMP_USE_FUTEX 1370 case lk_futex: 1371 #endif 1372 case lk_ticket: 1373 case lk_queuing: 1374 case lk_drdpa: 1375 return kmp_mutex_impl_queuing; 1376 #if KMP_USE_TSX 1377 case lk_hle: 1378 case lk_rtm: 1379 case lk_adaptive: 1380 return kmp_mutex_impl_speculative; 1381 #endif 1382 default: 1383 return kmp_mutex_impl_none; 1384 } 1385 } 1386 #endif // KMP_USE_DYNAMIC_LOCK 1387 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1388 1389 /*! 1390 @ingroup WORK_SHARING 1391 @param loc source location information. 1392 @param global_tid global thread number. 1393 @param crit identity of the critical section. This could be a pointer to a lock 1394 associated with the critical section, or some other suitably unique value. 1395 @param hint the lock hint. 1396 1397 Enter code protected by a `critical` construct with a hint. The hint value is 1398 used to suggest a lock implementation. This function blocks until the executing 1399 thread can enter the critical section unless the hint suggests use of 1400 speculative execution and the hardware supports it. 1401 */ 1402 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1403 kmp_critical_name *crit, uint32_t hint) { 1404 KMP_COUNT_BLOCK(OMP_CRITICAL); 1405 kmp_user_lock_p lck; 1406 #if OMPT_SUPPORT && OMPT_OPTIONAL 1407 ompt_state_t prev_state = ompt_state_undefined; 1408 ompt_thread_info_t ti; 1409 // This is the case, if called from __kmpc_critical: 1410 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1411 if (!codeptr) 1412 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1413 #endif 1414 1415 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1416 1417 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1418 // Check if it is initialized. 1419 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1420 if (*lk == 0) { 1421 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1422 if (KMP_IS_D_LOCK(lckseq)) { 1423 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1424 KMP_GET_D_TAG(lckseq)); 1425 } else { 1426 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1427 } 1428 } 1429 // Branch for accessing the actual lock object and set operation. This 1430 // branching is inevitable since this lock initialization does not follow the 1431 // normal dispatch path (lock table is not used). 1432 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1433 lck = (kmp_user_lock_p)lk; 1434 if (__kmp_env_consistency_check) { 1435 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1436 __kmp_map_hint_to_lock(hint)); 1437 } 1438 #if USE_ITT_BUILD 1439 __kmp_itt_critical_acquiring(lck); 1440 #endif 1441 #if OMPT_SUPPORT && OMPT_OPTIONAL 1442 if (ompt_enabled.enabled) { 1443 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1444 /* OMPT state update */ 1445 prev_state = ti.state; 1446 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1447 ti.state = ompt_state_wait_critical; 1448 1449 /* OMPT event callback */ 1450 if (ompt_enabled.ompt_callback_mutex_acquire) { 1451 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1452 ompt_mutex_critical, (unsigned int)hint, 1453 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1454 codeptr); 1455 } 1456 } 1457 #endif 1458 #if KMP_USE_INLINED_TAS 1459 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1460 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1461 } else 1462 #elif KMP_USE_INLINED_FUTEX 1463 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1464 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1465 } else 1466 #endif 1467 { 1468 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1469 } 1470 } else { 1471 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1472 lck = ilk->lock; 1473 if (__kmp_env_consistency_check) { 1474 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1475 __kmp_map_hint_to_lock(hint)); 1476 } 1477 #if USE_ITT_BUILD 1478 __kmp_itt_critical_acquiring(lck); 1479 #endif 1480 #if OMPT_SUPPORT && OMPT_OPTIONAL 1481 if (ompt_enabled.enabled) { 1482 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1483 /* OMPT state update */ 1484 prev_state = ti.state; 1485 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1486 ti.state = ompt_state_wait_critical; 1487 1488 /* OMPT event callback */ 1489 if (ompt_enabled.ompt_callback_mutex_acquire) { 1490 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1491 ompt_mutex_critical, (unsigned int)hint, 1492 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1493 codeptr); 1494 } 1495 } 1496 #endif 1497 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1498 } 1499 KMP_POP_PARTITIONED_TIMER(); 1500 1501 #if USE_ITT_BUILD 1502 __kmp_itt_critical_acquired(lck); 1503 #endif /* USE_ITT_BUILD */ 1504 #if OMPT_SUPPORT && OMPT_OPTIONAL 1505 if (ompt_enabled.enabled) { 1506 /* OMPT state update */ 1507 ti.state = prev_state; 1508 ti.wait_id = 0; 1509 1510 /* OMPT event callback */ 1511 if (ompt_enabled.ompt_callback_mutex_acquired) { 1512 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1513 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1514 } 1515 } 1516 #endif 1517 1518 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1519 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1520 } // __kmpc_critical_with_hint 1521 1522 #endif // KMP_USE_DYNAMIC_LOCK 1523 1524 /*! 1525 @ingroup WORK_SHARING 1526 @param loc source location information. 1527 @param global_tid global thread number . 1528 @param crit identity of the critical section. This could be a pointer to a lock 1529 associated with the critical section, or some other suitably unique value. 1530 1531 Leave a critical section, releasing any lock that was held during its execution. 1532 */ 1533 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1534 kmp_critical_name *crit) { 1535 kmp_user_lock_p lck; 1536 1537 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1538 1539 #if KMP_USE_DYNAMIC_LOCK 1540 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1541 lck = (kmp_user_lock_p)crit; 1542 KMP_ASSERT(lck != NULL); 1543 if (__kmp_env_consistency_check) { 1544 __kmp_pop_sync(global_tid, ct_critical, loc); 1545 } 1546 #if USE_ITT_BUILD 1547 __kmp_itt_critical_releasing(lck); 1548 #endif 1549 #if KMP_USE_INLINED_TAS 1550 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1551 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1552 } else 1553 #elif KMP_USE_INLINED_FUTEX 1554 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1555 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1556 } else 1557 #endif 1558 { 1559 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1560 } 1561 } else { 1562 kmp_indirect_lock_t *ilk = 1563 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1564 KMP_ASSERT(ilk != NULL); 1565 lck = ilk->lock; 1566 if (__kmp_env_consistency_check) { 1567 __kmp_pop_sync(global_tid, ct_critical, loc); 1568 } 1569 #if USE_ITT_BUILD 1570 __kmp_itt_critical_releasing(lck); 1571 #endif 1572 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1573 } 1574 1575 #else // KMP_USE_DYNAMIC_LOCK 1576 1577 if ((__kmp_user_lock_kind == lk_tas) && 1578 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1579 lck = (kmp_user_lock_p)crit; 1580 } 1581 #if KMP_USE_FUTEX 1582 else if ((__kmp_user_lock_kind == lk_futex) && 1583 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1584 lck = (kmp_user_lock_p)crit; 1585 } 1586 #endif 1587 else { // ticket, queuing or drdpa 1588 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1589 } 1590 1591 KMP_ASSERT(lck != NULL); 1592 1593 if (__kmp_env_consistency_check) 1594 __kmp_pop_sync(global_tid, ct_critical, loc); 1595 1596 #if USE_ITT_BUILD 1597 __kmp_itt_critical_releasing(lck); 1598 #endif /* USE_ITT_BUILD */ 1599 // Value of 'crit' should be good for using as a critical_id of the critical 1600 // section directive. 1601 __kmp_release_user_lock_with_checks(lck, global_tid); 1602 1603 #endif // KMP_USE_DYNAMIC_LOCK 1604 1605 #if OMPT_SUPPORT && OMPT_OPTIONAL 1606 /* OMPT release event triggers after lock is released; place here to trigger 1607 * for all #if branches */ 1608 OMPT_STORE_RETURN_ADDRESS(global_tid); 1609 if (ompt_enabled.ompt_callback_mutex_released) { 1610 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1611 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1612 OMPT_LOAD_RETURN_ADDRESS(0)); 1613 } 1614 #endif 1615 1616 KMP_POP_PARTITIONED_TIMER(); 1617 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1618 } 1619 1620 /*! 1621 @ingroup SYNCHRONIZATION 1622 @param loc source location information 1623 @param global_tid thread id. 1624 @return one if the thread should execute the master block, zero otherwise 1625 1626 Start execution of a combined barrier and master. The barrier is executed inside 1627 this function. 1628 */ 1629 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1630 int status; 1631 1632 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1633 1634 if (!TCR_4(__kmp_init_parallel)) 1635 __kmp_parallel_initialize(); 1636 1637 #if OMP_50_ENABLED 1638 __kmp_resume_if_soft_paused(); 1639 #endif 1640 1641 if (__kmp_env_consistency_check) 1642 __kmp_check_barrier(global_tid, ct_barrier, loc); 1643 1644 #if OMPT_SUPPORT 1645 ompt_frame_t *ompt_frame; 1646 if (ompt_enabled.enabled) { 1647 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1648 if (ompt_frame->enter_frame.ptr == NULL) 1649 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1650 OMPT_STORE_RETURN_ADDRESS(global_tid); 1651 } 1652 #endif 1653 #if USE_ITT_NOTIFY 1654 __kmp_threads[global_tid]->th.th_ident = loc; 1655 #endif 1656 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1657 #if OMPT_SUPPORT && OMPT_OPTIONAL 1658 if (ompt_enabled.enabled) { 1659 ompt_frame->enter_frame = ompt_data_none; 1660 } 1661 #endif 1662 1663 return (status != 0) ? 0 : 1; 1664 } 1665 1666 /*! 1667 @ingroup SYNCHRONIZATION 1668 @param loc source location information 1669 @param global_tid thread id. 1670 1671 Complete the execution of a combined barrier and master. This function should 1672 only be called at the completion of the <tt>master</tt> code. Other threads will 1673 still be waiting at the barrier and this call releases them. 1674 */ 1675 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1676 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1677 1678 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1679 } 1680 1681 /*! 1682 @ingroup SYNCHRONIZATION 1683 @param loc source location information 1684 @param global_tid thread id. 1685 @return one if the thread should execute the master block, zero otherwise 1686 1687 Start execution of a combined barrier and master(nowait) construct. 1688 The barrier is executed inside this function. 1689 There is no equivalent "end" function, since the 1690 */ 1691 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1692 kmp_int32 ret; 1693 1694 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1695 1696 if (!TCR_4(__kmp_init_parallel)) 1697 __kmp_parallel_initialize(); 1698 1699 #if OMP_50_ENABLED 1700 __kmp_resume_if_soft_paused(); 1701 #endif 1702 1703 if (__kmp_env_consistency_check) { 1704 if (loc == 0) { 1705 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1706 } 1707 __kmp_check_barrier(global_tid, ct_barrier, loc); 1708 } 1709 1710 #if OMPT_SUPPORT 1711 ompt_frame_t *ompt_frame; 1712 if (ompt_enabled.enabled) { 1713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1714 if (ompt_frame->enter_frame.ptr == NULL) 1715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1716 OMPT_STORE_RETURN_ADDRESS(global_tid); 1717 } 1718 #endif 1719 #if USE_ITT_NOTIFY 1720 __kmp_threads[global_tid]->th.th_ident = loc; 1721 #endif 1722 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1723 #if OMPT_SUPPORT && OMPT_OPTIONAL 1724 if (ompt_enabled.enabled) { 1725 ompt_frame->enter_frame = ompt_data_none; 1726 } 1727 #endif 1728 1729 ret = __kmpc_master(loc, global_tid); 1730 1731 if (__kmp_env_consistency_check) { 1732 /* there's no __kmpc_end_master called; so the (stats) */ 1733 /* actions of __kmpc_end_master are done here */ 1734 1735 if (global_tid < 0) { 1736 KMP_WARNING(ThreadIdentInvalid); 1737 } 1738 if (ret) { 1739 /* only one thread should do the pop since only */ 1740 /* one did the push (see __kmpc_master()) */ 1741 1742 __kmp_pop_sync(global_tid, ct_master, loc); 1743 } 1744 } 1745 1746 return (ret); 1747 } 1748 1749 /* The BARRIER for a SINGLE process section is always explicit */ 1750 /*! 1751 @ingroup WORK_SHARING 1752 @param loc source location information 1753 @param global_tid global thread number 1754 @return One if this thread should execute the single construct, zero otherwise. 1755 1756 Test whether to execute a <tt>single</tt> construct. 1757 There are no implicit barriers in the two "single" calls, rather the compiler 1758 should introduce an explicit barrier if it is required. 1759 */ 1760 1761 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1762 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1763 1764 if (rc) { 1765 // We are going to execute the single statement, so we should count it. 1766 KMP_COUNT_BLOCK(OMP_SINGLE); 1767 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1768 } 1769 1770 #if OMPT_SUPPORT && OMPT_OPTIONAL 1771 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1772 kmp_team_t *team = this_thr->th.th_team; 1773 int tid = __kmp_tid_from_gtid(global_tid); 1774 1775 if (ompt_enabled.enabled) { 1776 if (rc) { 1777 if (ompt_enabled.ompt_callback_work) { 1778 ompt_callbacks.ompt_callback(ompt_callback_work)( 1779 ompt_work_single_executor, ompt_scope_begin, 1780 &(team->t.ompt_team_info.parallel_data), 1781 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1782 1, OMPT_GET_RETURN_ADDRESS(0)); 1783 } 1784 } else { 1785 if (ompt_enabled.ompt_callback_work) { 1786 ompt_callbacks.ompt_callback(ompt_callback_work)( 1787 ompt_work_single_other, ompt_scope_begin, 1788 &(team->t.ompt_team_info.parallel_data), 1789 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1790 1, OMPT_GET_RETURN_ADDRESS(0)); 1791 ompt_callbacks.ompt_callback(ompt_callback_work)( 1792 ompt_work_single_other, ompt_scope_end, 1793 &(team->t.ompt_team_info.parallel_data), 1794 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1795 1, OMPT_GET_RETURN_ADDRESS(0)); 1796 } 1797 } 1798 } 1799 #endif 1800 1801 return rc; 1802 } 1803 1804 /*! 1805 @ingroup WORK_SHARING 1806 @param loc source location information 1807 @param global_tid global thread number 1808 1809 Mark the end of a <tt>single</tt> construct. This function should 1810 only be called by the thread that executed the block of code protected 1811 by the `single` construct. 1812 */ 1813 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1814 __kmp_exit_single(global_tid); 1815 KMP_POP_PARTITIONED_TIMER(); 1816 1817 #if OMPT_SUPPORT && OMPT_OPTIONAL 1818 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1819 kmp_team_t *team = this_thr->th.th_team; 1820 int tid = __kmp_tid_from_gtid(global_tid); 1821 1822 if (ompt_enabled.ompt_callback_work) { 1823 ompt_callbacks.ompt_callback(ompt_callback_work)( 1824 ompt_work_single_executor, ompt_scope_end, 1825 &(team->t.ompt_team_info.parallel_data), 1826 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1827 OMPT_GET_RETURN_ADDRESS(0)); 1828 } 1829 #endif 1830 } 1831 1832 /*! 1833 @ingroup WORK_SHARING 1834 @param loc Source location 1835 @param global_tid Global thread id 1836 1837 Mark the end of a statically scheduled loop. 1838 */ 1839 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1840 KMP_POP_PARTITIONED_TIMER(); 1841 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1842 1843 #if OMPT_SUPPORT && OMPT_OPTIONAL 1844 if (ompt_enabled.ompt_callback_work) { 1845 ompt_work_t ompt_work_type = ompt_work_loop; 1846 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1847 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1848 // Determine workshare type 1849 if (loc != NULL) { 1850 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1851 ompt_work_type = ompt_work_loop; 1852 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1853 ompt_work_type = ompt_work_sections; 1854 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1855 ompt_work_type = ompt_work_distribute; 1856 } else { 1857 // use default set above. 1858 // a warning about this case is provided in __kmpc_for_static_init 1859 } 1860 KMP_DEBUG_ASSERT(ompt_work_type); 1861 } 1862 ompt_callbacks.ompt_callback(ompt_callback_work)( 1863 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1864 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1865 } 1866 #endif 1867 if (__kmp_env_consistency_check) 1868 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1869 } 1870 1871 // User routines which take C-style arguments (call by value) 1872 // different from the Fortran equivalent routines 1873 1874 void ompc_set_num_threads(int arg) { 1875 // !!!!! TODO: check the per-task binding 1876 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1877 } 1878 1879 void ompc_set_dynamic(int flag) { 1880 kmp_info_t *thread; 1881 1882 /* For the thread-private implementation of the internal controls */ 1883 thread = __kmp_entry_thread(); 1884 1885 __kmp_save_internal_controls(thread); 1886 1887 set__dynamic(thread, flag ? TRUE : FALSE); 1888 } 1889 1890 void ompc_set_nested(int flag) { 1891 kmp_info_t *thread; 1892 1893 /* For the thread-private internal controls implementation */ 1894 thread = __kmp_entry_thread(); 1895 1896 __kmp_save_internal_controls(thread); 1897 1898 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1899 } 1900 1901 void ompc_set_max_active_levels(int max_active_levels) { 1902 /* TO DO */ 1903 /* we want per-task implementation of this internal control */ 1904 1905 /* For the per-thread internal controls implementation */ 1906 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1907 } 1908 1909 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1910 // !!!!! TODO: check the per-task binding 1911 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1912 } 1913 1914 int ompc_get_ancestor_thread_num(int level) { 1915 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1916 } 1917 1918 int ompc_get_team_size(int level) { 1919 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1920 } 1921 1922 #if OMP_50_ENABLED 1923 /* OpenMP 5.0 Affinity Format API */ 1924 1925 void ompc_set_affinity_format(char const *format) { 1926 if (!__kmp_init_serial) { 1927 __kmp_serial_initialize(); 1928 } 1929 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1930 format, KMP_STRLEN(format) + 1); 1931 } 1932 1933 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1934 size_t format_size; 1935 if (!__kmp_init_serial) { 1936 __kmp_serial_initialize(); 1937 } 1938 format_size = KMP_STRLEN(__kmp_affinity_format); 1939 if (buffer && size) { 1940 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1941 format_size + 1); 1942 } 1943 return format_size; 1944 } 1945 1946 void ompc_display_affinity(char const *format) { 1947 int gtid; 1948 if (!TCR_4(__kmp_init_middle)) { 1949 __kmp_middle_initialize(); 1950 } 1951 gtid = __kmp_get_gtid(); 1952 __kmp_aux_display_affinity(gtid, format); 1953 } 1954 1955 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1956 char const *format) { 1957 int gtid; 1958 size_t num_required; 1959 kmp_str_buf_t capture_buf; 1960 if (!TCR_4(__kmp_init_middle)) { 1961 __kmp_middle_initialize(); 1962 } 1963 gtid = __kmp_get_gtid(); 1964 __kmp_str_buf_init(&capture_buf); 1965 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1966 if (buffer && buf_size) { 1967 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1968 capture_buf.used + 1); 1969 } 1970 __kmp_str_buf_free(&capture_buf); 1971 return num_required; 1972 } 1973 #endif /* OMP_50_ENABLED */ 1974 1975 void kmpc_set_stacksize(int arg) { 1976 // __kmp_aux_set_stacksize initializes the library if needed 1977 __kmp_aux_set_stacksize(arg); 1978 } 1979 1980 void kmpc_set_stacksize_s(size_t arg) { 1981 // __kmp_aux_set_stacksize initializes the library if needed 1982 __kmp_aux_set_stacksize(arg); 1983 } 1984 1985 void kmpc_set_blocktime(int arg) { 1986 int gtid, tid; 1987 kmp_info_t *thread; 1988 1989 gtid = __kmp_entry_gtid(); 1990 tid = __kmp_tid_from_gtid(gtid); 1991 thread = __kmp_thread_from_gtid(gtid); 1992 1993 __kmp_aux_set_blocktime(arg, thread, tid); 1994 } 1995 1996 void kmpc_set_library(int arg) { 1997 // __kmp_user_set_library initializes the library if needed 1998 __kmp_user_set_library((enum library_type)arg); 1999 } 2000 2001 void kmpc_set_defaults(char const *str) { 2002 // __kmp_aux_set_defaults initializes the library if needed 2003 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2004 } 2005 2006 void kmpc_set_disp_num_buffers(int arg) { 2007 // ignore after initialization because some teams have already 2008 // allocated dispatch buffers 2009 if (__kmp_init_serial == 0 && arg > 0) 2010 __kmp_dispatch_num_buffers = arg; 2011 } 2012 2013 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2014 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2015 return -1; 2016 #else 2017 if (!TCR_4(__kmp_init_middle)) { 2018 __kmp_middle_initialize(); 2019 } 2020 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2021 #endif 2022 } 2023 2024 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2025 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2026 return -1; 2027 #else 2028 if (!TCR_4(__kmp_init_middle)) { 2029 __kmp_middle_initialize(); 2030 } 2031 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2032 #endif 2033 } 2034 2035 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2036 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2037 return -1; 2038 #else 2039 if (!TCR_4(__kmp_init_middle)) { 2040 __kmp_middle_initialize(); 2041 } 2042 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2043 #endif 2044 } 2045 2046 /* -------------------------------------------------------------------------- */ 2047 /*! 2048 @ingroup THREADPRIVATE 2049 @param loc source location information 2050 @param gtid global thread number 2051 @param cpy_size size of the cpy_data buffer 2052 @param cpy_data pointer to data to be copied 2053 @param cpy_func helper function to call for copying data 2054 @param didit flag variable: 1=single thread; 0=not single thread 2055 2056 __kmpc_copyprivate implements the interface for the private data broadcast 2057 needed for the copyprivate clause associated with a single region in an 2058 OpenMP<sup>*</sup> program (both C and Fortran). 2059 All threads participating in the parallel region call this routine. 2060 One of the threads (called the single thread) should have the <tt>didit</tt> 2061 variable set to 1 and all other threads should have that variable set to 0. 2062 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2063 2064 The OpenMP specification forbids the use of nowait on the single region when a 2065 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2066 barrier internally to avoid race conditions, so the code generation for the 2067 single region should avoid generating a barrier after the call to @ref 2068 __kmpc_copyprivate. 2069 2070 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2071 The <tt>loc</tt> parameter is a pointer to source location information. 2072 2073 Internal implementation: The single thread will first copy its descriptor 2074 address (cpy_data) to a team-private location, then the other threads will each 2075 call the function pointed to by the parameter cpy_func, which carries out the 2076 copy by copying the data using the cpy_data buffer. 2077 2078 The cpy_func routine used for the copy and the contents of the data area defined 2079 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2080 to be done. For instance, the cpy_data buffer can hold the actual data to be 2081 copied or it may hold a list of pointers to the data. The cpy_func routine must 2082 interpret the cpy_data buffer appropriately. 2083 2084 The interface to cpy_func is as follows: 2085 @code 2086 void cpy_func( void *destination, void *source ) 2087 @endcode 2088 where void *destination is the cpy_data pointer for the thread being copied to 2089 and void *source is the cpy_data pointer for the thread being copied from. 2090 */ 2091 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2092 void *cpy_data, void (*cpy_func)(void *, void *), 2093 kmp_int32 didit) { 2094 void **data_ptr; 2095 2096 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2097 2098 KMP_MB(); 2099 2100 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2101 2102 if (__kmp_env_consistency_check) { 2103 if (loc == 0) { 2104 KMP_WARNING(ConstructIdentInvalid); 2105 } 2106 } 2107 2108 // ToDo: Optimize the following two barriers into some kind of split barrier 2109 2110 if (didit) 2111 *data_ptr = cpy_data; 2112 2113 #if OMPT_SUPPORT 2114 ompt_frame_t *ompt_frame; 2115 if (ompt_enabled.enabled) { 2116 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2117 if (ompt_frame->enter_frame.ptr == NULL) 2118 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2119 OMPT_STORE_RETURN_ADDRESS(gtid); 2120 } 2121 #endif 2122 /* This barrier is not a barrier region boundary */ 2123 #if USE_ITT_NOTIFY 2124 __kmp_threads[gtid]->th.th_ident = loc; 2125 #endif 2126 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2127 2128 if (!didit) 2129 (*cpy_func)(cpy_data, *data_ptr); 2130 2131 // Consider next barrier a user-visible barrier for barrier region boundaries 2132 // Nesting checks are already handled by the single construct checks 2133 2134 #if OMPT_SUPPORT 2135 if (ompt_enabled.enabled) { 2136 OMPT_STORE_RETURN_ADDRESS(gtid); 2137 } 2138 #endif 2139 #if USE_ITT_NOTIFY 2140 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2141 // tasks can overwrite the location) 2142 #endif 2143 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2144 #if OMPT_SUPPORT && OMPT_OPTIONAL 2145 if (ompt_enabled.enabled) { 2146 ompt_frame->enter_frame = ompt_data_none; 2147 } 2148 #endif 2149 } 2150 2151 /* -------------------------------------------------------------------------- */ 2152 2153 #define INIT_LOCK __kmp_init_user_lock_with_checks 2154 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2155 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2156 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2157 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2158 #define ACQUIRE_NESTED_LOCK_TIMED \ 2159 __kmp_acquire_nested_user_lock_with_checks_timed 2160 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2161 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2162 #define TEST_LOCK __kmp_test_user_lock_with_checks 2163 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2164 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2165 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2166 2167 // TODO: Make check abort messages use location info & pass it into 2168 // with_checks routines 2169 2170 #if KMP_USE_DYNAMIC_LOCK 2171 2172 // internal lock initializer 2173 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2174 kmp_dyna_lockseq_t seq) { 2175 if (KMP_IS_D_LOCK(seq)) { 2176 KMP_INIT_D_LOCK(lock, seq); 2177 #if USE_ITT_BUILD 2178 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2179 #endif 2180 } else { 2181 KMP_INIT_I_LOCK(lock, seq); 2182 #if USE_ITT_BUILD 2183 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2184 __kmp_itt_lock_creating(ilk->lock, loc); 2185 #endif 2186 } 2187 } 2188 2189 // internal nest lock initializer 2190 static __forceinline void 2191 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2192 kmp_dyna_lockseq_t seq) { 2193 #if KMP_USE_TSX 2194 // Don't have nested lock implementation for speculative locks 2195 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2196 seq = __kmp_user_lock_seq; 2197 #endif 2198 switch (seq) { 2199 case lockseq_tas: 2200 seq = lockseq_nested_tas; 2201 break; 2202 #if KMP_USE_FUTEX 2203 case lockseq_futex: 2204 seq = lockseq_nested_futex; 2205 break; 2206 #endif 2207 case lockseq_ticket: 2208 seq = lockseq_nested_ticket; 2209 break; 2210 case lockseq_queuing: 2211 seq = lockseq_nested_queuing; 2212 break; 2213 case lockseq_drdpa: 2214 seq = lockseq_nested_drdpa; 2215 break; 2216 default: 2217 seq = lockseq_nested_queuing; 2218 } 2219 KMP_INIT_I_LOCK(lock, seq); 2220 #if USE_ITT_BUILD 2221 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2222 __kmp_itt_lock_creating(ilk->lock, loc); 2223 #endif 2224 } 2225 2226 /* initialize the lock with a hint */ 2227 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2228 uintptr_t hint) { 2229 KMP_DEBUG_ASSERT(__kmp_init_serial); 2230 if (__kmp_env_consistency_check && user_lock == NULL) { 2231 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2232 } 2233 2234 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2235 2236 #if OMPT_SUPPORT && OMPT_OPTIONAL 2237 // This is the case, if called from omp_init_lock_with_hint: 2238 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2239 if (!codeptr) 2240 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2241 if (ompt_enabled.ompt_callback_lock_init) { 2242 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2243 ompt_mutex_lock, (omp_lock_hint_t)hint, 2244 __ompt_get_mutex_impl_type(user_lock), 2245 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2246 } 2247 #endif 2248 } 2249 2250 /* initialize the lock with a hint */ 2251 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2252 void **user_lock, uintptr_t hint) { 2253 KMP_DEBUG_ASSERT(__kmp_init_serial); 2254 if (__kmp_env_consistency_check && user_lock == NULL) { 2255 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2256 } 2257 2258 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2259 2260 #if OMPT_SUPPORT && OMPT_OPTIONAL 2261 // This is the case, if called from omp_init_lock_with_hint: 2262 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2263 if (!codeptr) 2264 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2265 if (ompt_enabled.ompt_callback_lock_init) { 2266 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2267 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2268 __ompt_get_mutex_impl_type(user_lock), 2269 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2270 } 2271 #endif 2272 } 2273 2274 #endif // KMP_USE_DYNAMIC_LOCK 2275 2276 /* initialize the lock */ 2277 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2278 #if KMP_USE_DYNAMIC_LOCK 2279 2280 KMP_DEBUG_ASSERT(__kmp_init_serial); 2281 if (__kmp_env_consistency_check && user_lock == NULL) { 2282 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2283 } 2284 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2285 2286 #if OMPT_SUPPORT && OMPT_OPTIONAL 2287 // This is the case, if called from omp_init_lock_with_hint: 2288 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2289 if (!codeptr) 2290 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2291 if (ompt_enabled.ompt_callback_lock_init) { 2292 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2293 ompt_mutex_lock, omp_lock_hint_none, 2294 __ompt_get_mutex_impl_type(user_lock), 2295 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2296 } 2297 #endif 2298 2299 #else // KMP_USE_DYNAMIC_LOCK 2300 2301 static char const *const func = "omp_init_lock"; 2302 kmp_user_lock_p lck; 2303 KMP_DEBUG_ASSERT(__kmp_init_serial); 2304 2305 if (__kmp_env_consistency_check) { 2306 if (user_lock == NULL) { 2307 KMP_FATAL(LockIsUninitialized, func); 2308 } 2309 } 2310 2311 KMP_CHECK_USER_LOCK_INIT(); 2312 2313 if ((__kmp_user_lock_kind == lk_tas) && 2314 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2315 lck = (kmp_user_lock_p)user_lock; 2316 } 2317 #if KMP_USE_FUTEX 2318 else if ((__kmp_user_lock_kind == lk_futex) && 2319 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2320 lck = (kmp_user_lock_p)user_lock; 2321 } 2322 #endif 2323 else { 2324 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2325 } 2326 INIT_LOCK(lck); 2327 __kmp_set_user_lock_location(lck, loc); 2328 2329 #if OMPT_SUPPORT && OMPT_OPTIONAL 2330 // This is the case, if called from omp_init_lock_with_hint: 2331 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2332 if (!codeptr) 2333 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2334 if (ompt_enabled.ompt_callback_lock_init) { 2335 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2336 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2337 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2338 } 2339 #endif 2340 2341 #if USE_ITT_BUILD 2342 __kmp_itt_lock_creating(lck); 2343 #endif /* USE_ITT_BUILD */ 2344 2345 #endif // KMP_USE_DYNAMIC_LOCK 2346 } // __kmpc_init_lock 2347 2348 /* initialize the lock */ 2349 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2350 #if KMP_USE_DYNAMIC_LOCK 2351 2352 KMP_DEBUG_ASSERT(__kmp_init_serial); 2353 if (__kmp_env_consistency_check && user_lock == NULL) { 2354 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2355 } 2356 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2357 2358 #if OMPT_SUPPORT && OMPT_OPTIONAL 2359 // This is the case, if called from omp_init_lock_with_hint: 2360 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2361 if (!codeptr) 2362 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2363 if (ompt_enabled.ompt_callback_lock_init) { 2364 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2365 ompt_mutex_nest_lock, omp_lock_hint_none, 2366 __ompt_get_mutex_impl_type(user_lock), 2367 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2368 } 2369 #endif 2370 2371 #else // KMP_USE_DYNAMIC_LOCK 2372 2373 static char const *const func = "omp_init_nest_lock"; 2374 kmp_user_lock_p lck; 2375 KMP_DEBUG_ASSERT(__kmp_init_serial); 2376 2377 if (__kmp_env_consistency_check) { 2378 if (user_lock == NULL) { 2379 KMP_FATAL(LockIsUninitialized, func); 2380 } 2381 } 2382 2383 KMP_CHECK_USER_LOCK_INIT(); 2384 2385 if ((__kmp_user_lock_kind == lk_tas) && 2386 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2387 OMP_NEST_LOCK_T_SIZE)) { 2388 lck = (kmp_user_lock_p)user_lock; 2389 } 2390 #if KMP_USE_FUTEX 2391 else if ((__kmp_user_lock_kind == lk_futex) && 2392 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2393 OMP_NEST_LOCK_T_SIZE)) { 2394 lck = (kmp_user_lock_p)user_lock; 2395 } 2396 #endif 2397 else { 2398 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2399 } 2400 2401 INIT_NESTED_LOCK(lck); 2402 __kmp_set_user_lock_location(lck, loc); 2403 2404 #if OMPT_SUPPORT && OMPT_OPTIONAL 2405 // This is the case, if called from omp_init_lock_with_hint: 2406 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2407 if (!codeptr) 2408 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2409 if (ompt_enabled.ompt_callback_lock_init) { 2410 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2411 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2412 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2413 } 2414 #endif 2415 2416 #if USE_ITT_BUILD 2417 __kmp_itt_lock_creating(lck); 2418 #endif /* USE_ITT_BUILD */ 2419 2420 #endif // KMP_USE_DYNAMIC_LOCK 2421 } // __kmpc_init_nest_lock 2422 2423 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2424 #if KMP_USE_DYNAMIC_LOCK 2425 2426 #if USE_ITT_BUILD 2427 kmp_user_lock_p lck; 2428 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2429 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2430 } else { 2431 lck = (kmp_user_lock_p)user_lock; 2432 } 2433 __kmp_itt_lock_destroyed(lck); 2434 #endif 2435 #if OMPT_SUPPORT && OMPT_OPTIONAL 2436 // This is the case, if called from omp_init_lock_with_hint: 2437 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2438 if (!codeptr) 2439 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2440 if (ompt_enabled.ompt_callback_lock_destroy) { 2441 kmp_user_lock_p lck; 2442 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2443 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2444 } else { 2445 lck = (kmp_user_lock_p)user_lock; 2446 } 2447 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2448 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2449 } 2450 #endif 2451 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2452 #else 2453 kmp_user_lock_p lck; 2454 2455 if ((__kmp_user_lock_kind == lk_tas) && 2456 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2457 lck = (kmp_user_lock_p)user_lock; 2458 } 2459 #if KMP_USE_FUTEX 2460 else if ((__kmp_user_lock_kind == lk_futex) && 2461 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2462 lck = (kmp_user_lock_p)user_lock; 2463 } 2464 #endif 2465 else { 2466 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2467 } 2468 2469 #if OMPT_SUPPORT && OMPT_OPTIONAL 2470 // This is the case, if called from omp_init_lock_with_hint: 2471 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2472 if (!codeptr) 2473 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2474 if (ompt_enabled.ompt_callback_lock_destroy) { 2475 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2476 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2477 } 2478 #endif 2479 2480 #if USE_ITT_BUILD 2481 __kmp_itt_lock_destroyed(lck); 2482 #endif /* USE_ITT_BUILD */ 2483 DESTROY_LOCK(lck); 2484 2485 if ((__kmp_user_lock_kind == lk_tas) && 2486 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2487 ; 2488 } 2489 #if KMP_USE_FUTEX 2490 else if ((__kmp_user_lock_kind == lk_futex) && 2491 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2492 ; 2493 } 2494 #endif 2495 else { 2496 __kmp_user_lock_free(user_lock, gtid, lck); 2497 } 2498 #endif // KMP_USE_DYNAMIC_LOCK 2499 } // __kmpc_destroy_lock 2500 2501 /* destroy the lock */ 2502 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2503 #if KMP_USE_DYNAMIC_LOCK 2504 2505 #if USE_ITT_BUILD 2506 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2507 __kmp_itt_lock_destroyed(ilk->lock); 2508 #endif 2509 #if OMPT_SUPPORT && OMPT_OPTIONAL 2510 // This is the case, if called from omp_init_lock_with_hint: 2511 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2512 if (!codeptr) 2513 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2514 if (ompt_enabled.ompt_callback_lock_destroy) { 2515 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2516 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2517 } 2518 #endif 2519 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2520 2521 #else // KMP_USE_DYNAMIC_LOCK 2522 2523 kmp_user_lock_p lck; 2524 2525 if ((__kmp_user_lock_kind == lk_tas) && 2526 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2527 OMP_NEST_LOCK_T_SIZE)) { 2528 lck = (kmp_user_lock_p)user_lock; 2529 } 2530 #if KMP_USE_FUTEX 2531 else if ((__kmp_user_lock_kind == lk_futex) && 2532 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2533 OMP_NEST_LOCK_T_SIZE)) { 2534 lck = (kmp_user_lock_p)user_lock; 2535 } 2536 #endif 2537 else { 2538 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2539 } 2540 2541 #if OMPT_SUPPORT && OMPT_OPTIONAL 2542 // This is the case, if called from omp_init_lock_with_hint: 2543 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2544 if (!codeptr) 2545 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2546 if (ompt_enabled.ompt_callback_lock_destroy) { 2547 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2548 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2549 } 2550 #endif 2551 2552 #if USE_ITT_BUILD 2553 __kmp_itt_lock_destroyed(lck); 2554 #endif /* USE_ITT_BUILD */ 2555 2556 DESTROY_NESTED_LOCK(lck); 2557 2558 if ((__kmp_user_lock_kind == lk_tas) && 2559 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2560 OMP_NEST_LOCK_T_SIZE)) { 2561 ; 2562 } 2563 #if KMP_USE_FUTEX 2564 else if ((__kmp_user_lock_kind == lk_futex) && 2565 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2566 OMP_NEST_LOCK_T_SIZE)) { 2567 ; 2568 } 2569 #endif 2570 else { 2571 __kmp_user_lock_free(user_lock, gtid, lck); 2572 } 2573 #endif // KMP_USE_DYNAMIC_LOCK 2574 } // __kmpc_destroy_nest_lock 2575 2576 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2577 KMP_COUNT_BLOCK(OMP_set_lock); 2578 #if KMP_USE_DYNAMIC_LOCK 2579 int tag = KMP_EXTRACT_D_TAG(user_lock); 2580 #if USE_ITT_BUILD 2581 __kmp_itt_lock_acquiring( 2582 (kmp_user_lock_p) 2583 user_lock); // itt function will get to the right lock object. 2584 #endif 2585 #if OMPT_SUPPORT && OMPT_OPTIONAL 2586 // This is the case, if called from omp_init_lock_with_hint: 2587 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2588 if (!codeptr) 2589 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2590 if (ompt_enabled.ompt_callback_mutex_acquire) { 2591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2592 ompt_mutex_lock, omp_lock_hint_none, 2593 __ompt_get_mutex_impl_type(user_lock), 2594 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2595 } 2596 #endif 2597 #if KMP_USE_INLINED_TAS 2598 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2599 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2600 } else 2601 #elif KMP_USE_INLINED_FUTEX 2602 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2603 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2604 } else 2605 #endif 2606 { 2607 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2608 } 2609 #if USE_ITT_BUILD 2610 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2611 #endif 2612 #if OMPT_SUPPORT && OMPT_OPTIONAL 2613 if (ompt_enabled.ompt_callback_mutex_acquired) { 2614 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2615 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2616 } 2617 #endif 2618 2619 #else // KMP_USE_DYNAMIC_LOCK 2620 2621 kmp_user_lock_p lck; 2622 2623 if ((__kmp_user_lock_kind == lk_tas) && 2624 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2625 lck = (kmp_user_lock_p)user_lock; 2626 } 2627 #if KMP_USE_FUTEX 2628 else if ((__kmp_user_lock_kind == lk_futex) && 2629 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2630 lck = (kmp_user_lock_p)user_lock; 2631 } 2632 #endif 2633 else { 2634 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2635 } 2636 2637 #if USE_ITT_BUILD 2638 __kmp_itt_lock_acquiring(lck); 2639 #endif /* USE_ITT_BUILD */ 2640 #if OMPT_SUPPORT && OMPT_OPTIONAL 2641 // This is the case, if called from omp_init_lock_with_hint: 2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2643 if (!codeptr) 2644 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2645 if (ompt_enabled.ompt_callback_mutex_acquire) { 2646 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2647 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2648 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2649 } 2650 #endif 2651 2652 ACQUIRE_LOCK(lck, gtid); 2653 2654 #if USE_ITT_BUILD 2655 __kmp_itt_lock_acquired(lck); 2656 #endif /* USE_ITT_BUILD */ 2657 2658 #if OMPT_SUPPORT && OMPT_OPTIONAL 2659 if (ompt_enabled.ompt_callback_mutex_acquired) { 2660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2661 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2662 } 2663 #endif 2664 2665 #endif // KMP_USE_DYNAMIC_LOCK 2666 } 2667 2668 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2669 #if KMP_USE_DYNAMIC_LOCK 2670 2671 #if USE_ITT_BUILD 2672 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2673 #endif 2674 #if OMPT_SUPPORT && OMPT_OPTIONAL 2675 // This is the case, if called from omp_init_lock_with_hint: 2676 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2677 if (!codeptr) 2678 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2679 if (ompt_enabled.enabled) { 2680 if (ompt_enabled.ompt_callback_mutex_acquire) { 2681 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2682 ompt_mutex_nest_lock, omp_lock_hint_none, 2683 __ompt_get_mutex_impl_type(user_lock), 2684 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2685 } 2686 } 2687 #endif 2688 int acquire_status = 2689 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2690 (void) acquire_status; 2691 #if USE_ITT_BUILD 2692 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2693 #endif 2694 2695 #if OMPT_SUPPORT && OMPT_OPTIONAL 2696 if (ompt_enabled.enabled) { 2697 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2698 if (ompt_enabled.ompt_callback_mutex_acquired) { 2699 // lock_first 2700 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2701 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2702 codeptr); 2703 } 2704 } else { 2705 if (ompt_enabled.ompt_callback_nest_lock) { 2706 // lock_next 2707 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2708 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2709 } 2710 } 2711 } 2712 #endif 2713 2714 #else // KMP_USE_DYNAMIC_LOCK 2715 int acquire_status; 2716 kmp_user_lock_p lck; 2717 2718 if ((__kmp_user_lock_kind == lk_tas) && 2719 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2720 OMP_NEST_LOCK_T_SIZE)) { 2721 lck = (kmp_user_lock_p)user_lock; 2722 } 2723 #if KMP_USE_FUTEX 2724 else if ((__kmp_user_lock_kind == lk_futex) && 2725 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2726 OMP_NEST_LOCK_T_SIZE)) { 2727 lck = (kmp_user_lock_p)user_lock; 2728 } 2729 #endif 2730 else { 2731 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2732 } 2733 2734 #if USE_ITT_BUILD 2735 __kmp_itt_lock_acquiring(lck); 2736 #endif /* USE_ITT_BUILD */ 2737 #if OMPT_SUPPORT && OMPT_OPTIONAL 2738 // This is the case, if called from omp_init_lock_with_hint: 2739 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2740 if (!codeptr) 2741 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2742 if (ompt_enabled.enabled) { 2743 if (ompt_enabled.ompt_callback_mutex_acquire) { 2744 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2745 ompt_mutex_nest_lock, omp_lock_hint_none, 2746 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2747 codeptr); 2748 } 2749 } 2750 #endif 2751 2752 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2753 2754 #if USE_ITT_BUILD 2755 __kmp_itt_lock_acquired(lck); 2756 #endif /* USE_ITT_BUILD */ 2757 2758 #if OMPT_SUPPORT && OMPT_OPTIONAL 2759 if (ompt_enabled.enabled) { 2760 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2761 if (ompt_enabled.ompt_callback_mutex_acquired) { 2762 // lock_first 2763 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2764 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2765 } 2766 } else { 2767 if (ompt_enabled.ompt_callback_nest_lock) { 2768 // lock_next 2769 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2770 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2771 } 2772 } 2773 } 2774 #endif 2775 2776 #endif // KMP_USE_DYNAMIC_LOCK 2777 } 2778 2779 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2780 #if KMP_USE_DYNAMIC_LOCK 2781 2782 int tag = KMP_EXTRACT_D_TAG(user_lock); 2783 #if USE_ITT_BUILD 2784 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2785 #endif 2786 #if KMP_USE_INLINED_TAS 2787 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2788 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2789 } else 2790 #elif KMP_USE_INLINED_FUTEX 2791 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2792 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2793 } else 2794 #endif 2795 { 2796 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2797 } 2798 2799 #if OMPT_SUPPORT && OMPT_OPTIONAL 2800 // This is the case, if called from omp_init_lock_with_hint: 2801 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2802 if (!codeptr) 2803 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2804 if (ompt_enabled.ompt_callback_mutex_released) { 2805 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2806 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2807 } 2808 #endif 2809 2810 #else // KMP_USE_DYNAMIC_LOCK 2811 2812 kmp_user_lock_p lck; 2813 2814 /* Can't use serial interval since not block structured */ 2815 /* release the lock */ 2816 2817 if ((__kmp_user_lock_kind == lk_tas) && 2818 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2819 #if KMP_OS_LINUX && \ 2820 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2821 // "fast" path implemented to fix customer performance issue 2822 #if USE_ITT_BUILD 2823 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2824 #endif /* USE_ITT_BUILD */ 2825 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2826 KMP_MB(); 2827 2828 #if OMPT_SUPPORT && OMPT_OPTIONAL 2829 // This is the case, if called from omp_init_lock_with_hint: 2830 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2831 if (!codeptr) 2832 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2833 if (ompt_enabled.ompt_callback_mutex_released) { 2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2835 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2836 } 2837 #endif 2838 2839 return; 2840 #else 2841 lck = (kmp_user_lock_p)user_lock; 2842 #endif 2843 } 2844 #if KMP_USE_FUTEX 2845 else if ((__kmp_user_lock_kind == lk_futex) && 2846 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2847 lck = (kmp_user_lock_p)user_lock; 2848 } 2849 #endif 2850 else { 2851 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2852 } 2853 2854 #if USE_ITT_BUILD 2855 __kmp_itt_lock_releasing(lck); 2856 #endif /* USE_ITT_BUILD */ 2857 2858 RELEASE_LOCK(lck, gtid); 2859 2860 #if OMPT_SUPPORT && OMPT_OPTIONAL 2861 // This is the case, if called from omp_init_lock_with_hint: 2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2863 if (!codeptr) 2864 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2865 if (ompt_enabled.ompt_callback_mutex_released) { 2866 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2867 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2868 } 2869 #endif 2870 2871 #endif // KMP_USE_DYNAMIC_LOCK 2872 } 2873 2874 /* release the lock */ 2875 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2876 #if KMP_USE_DYNAMIC_LOCK 2877 2878 #if USE_ITT_BUILD 2879 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2880 #endif 2881 int release_status = 2882 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2883 (void) release_status; 2884 2885 #if OMPT_SUPPORT && OMPT_OPTIONAL 2886 // This is the case, if called from omp_init_lock_with_hint: 2887 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2888 if (!codeptr) 2889 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2890 if (ompt_enabled.enabled) { 2891 if (release_status == KMP_LOCK_RELEASED) { 2892 if (ompt_enabled.ompt_callback_mutex_released) { 2893 // release_lock_last 2894 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2895 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2896 codeptr); 2897 } 2898 } else if (ompt_enabled.ompt_callback_nest_lock) { 2899 // release_lock_prev 2900 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2901 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2902 } 2903 } 2904 #endif 2905 2906 #else // KMP_USE_DYNAMIC_LOCK 2907 2908 kmp_user_lock_p lck; 2909 2910 /* Can't use serial interval since not block structured */ 2911 2912 if ((__kmp_user_lock_kind == lk_tas) && 2913 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2914 OMP_NEST_LOCK_T_SIZE)) { 2915 #if KMP_OS_LINUX && \ 2916 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2917 // "fast" path implemented to fix customer performance issue 2918 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2919 #if USE_ITT_BUILD 2920 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2921 #endif /* USE_ITT_BUILD */ 2922 2923 #if OMPT_SUPPORT && OMPT_OPTIONAL 2924 int release_status = KMP_LOCK_STILL_HELD; 2925 #endif 2926 2927 if (--(tl->lk.depth_locked) == 0) { 2928 TCW_4(tl->lk.poll, 0); 2929 #if OMPT_SUPPORT && OMPT_OPTIONAL 2930 release_status = KMP_LOCK_RELEASED; 2931 #endif 2932 } 2933 KMP_MB(); 2934 2935 #if OMPT_SUPPORT && OMPT_OPTIONAL 2936 // This is the case, if called from omp_init_lock_with_hint: 2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2938 if (!codeptr) 2939 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2940 if (ompt_enabled.enabled) { 2941 if (release_status == KMP_LOCK_RELEASED) { 2942 if (ompt_enabled.ompt_callback_mutex_released) { 2943 // release_lock_last 2944 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2945 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2946 } 2947 } else if (ompt_enabled.ompt_callback_nest_lock) { 2948 // release_lock_previous 2949 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2950 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2951 } 2952 } 2953 #endif 2954 2955 return; 2956 #else 2957 lck = (kmp_user_lock_p)user_lock; 2958 #endif 2959 } 2960 #if KMP_USE_FUTEX 2961 else if ((__kmp_user_lock_kind == lk_futex) && 2962 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2963 OMP_NEST_LOCK_T_SIZE)) { 2964 lck = (kmp_user_lock_p)user_lock; 2965 } 2966 #endif 2967 else { 2968 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2969 } 2970 2971 #if USE_ITT_BUILD 2972 __kmp_itt_lock_releasing(lck); 2973 #endif /* USE_ITT_BUILD */ 2974 2975 int release_status; 2976 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2977 #if OMPT_SUPPORT && OMPT_OPTIONAL 2978 // This is the case, if called from omp_init_lock_with_hint: 2979 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2980 if (!codeptr) 2981 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2982 if (ompt_enabled.enabled) { 2983 if (release_status == KMP_LOCK_RELEASED) { 2984 if (ompt_enabled.ompt_callback_mutex_released) { 2985 // release_lock_last 2986 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2987 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2988 } 2989 } else if (ompt_enabled.ompt_callback_nest_lock) { 2990 // release_lock_previous 2991 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2992 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2993 } 2994 } 2995 #endif 2996 2997 #endif // KMP_USE_DYNAMIC_LOCK 2998 } 2999 3000 /* try to acquire the lock */ 3001 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3002 KMP_COUNT_BLOCK(OMP_test_lock); 3003 3004 #if KMP_USE_DYNAMIC_LOCK 3005 int rc; 3006 int tag = KMP_EXTRACT_D_TAG(user_lock); 3007 #if USE_ITT_BUILD 3008 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3009 #endif 3010 #if OMPT_SUPPORT && OMPT_OPTIONAL 3011 // This is the case, if called from omp_init_lock_with_hint: 3012 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3013 if (!codeptr) 3014 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3015 if (ompt_enabled.ompt_callback_mutex_acquire) { 3016 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3017 ompt_mutex_lock, omp_lock_hint_none, 3018 __ompt_get_mutex_impl_type(user_lock), 3019 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3020 } 3021 #endif 3022 #if KMP_USE_INLINED_TAS 3023 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3024 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3025 } else 3026 #elif KMP_USE_INLINED_FUTEX 3027 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3028 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3029 } else 3030 #endif 3031 { 3032 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3033 } 3034 if (rc) { 3035 #if USE_ITT_BUILD 3036 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3037 #endif 3038 #if OMPT_SUPPORT && OMPT_OPTIONAL 3039 if (ompt_enabled.ompt_callback_mutex_acquired) { 3040 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3041 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3042 } 3043 #endif 3044 return FTN_TRUE; 3045 } else { 3046 #if USE_ITT_BUILD 3047 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3048 #endif 3049 return FTN_FALSE; 3050 } 3051 3052 #else // KMP_USE_DYNAMIC_LOCK 3053 3054 kmp_user_lock_p lck; 3055 int rc; 3056 3057 if ((__kmp_user_lock_kind == lk_tas) && 3058 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3059 lck = (kmp_user_lock_p)user_lock; 3060 } 3061 #if KMP_USE_FUTEX 3062 else if ((__kmp_user_lock_kind == lk_futex) && 3063 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3064 lck = (kmp_user_lock_p)user_lock; 3065 } 3066 #endif 3067 else { 3068 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3069 } 3070 3071 #if USE_ITT_BUILD 3072 __kmp_itt_lock_acquiring(lck); 3073 #endif /* USE_ITT_BUILD */ 3074 #if OMPT_SUPPORT && OMPT_OPTIONAL 3075 // This is the case, if called from omp_init_lock_with_hint: 3076 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3077 if (!codeptr) 3078 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3079 if (ompt_enabled.ompt_callback_mutex_acquire) { 3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3081 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3082 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3083 } 3084 #endif 3085 3086 rc = TEST_LOCK(lck, gtid); 3087 #if USE_ITT_BUILD 3088 if (rc) { 3089 __kmp_itt_lock_acquired(lck); 3090 } else { 3091 __kmp_itt_lock_cancelled(lck); 3092 } 3093 #endif /* USE_ITT_BUILD */ 3094 #if OMPT_SUPPORT && OMPT_OPTIONAL 3095 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3098 } 3099 #endif 3100 3101 return (rc ? FTN_TRUE : FTN_FALSE); 3102 3103 /* Can't use serial interval since not block structured */ 3104 3105 #endif // KMP_USE_DYNAMIC_LOCK 3106 } 3107 3108 /* try to acquire the lock */ 3109 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3110 #if KMP_USE_DYNAMIC_LOCK 3111 int rc; 3112 #if USE_ITT_BUILD 3113 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3114 #endif 3115 #if OMPT_SUPPORT && OMPT_OPTIONAL 3116 // This is the case, if called from omp_init_lock_with_hint: 3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3118 if (!codeptr) 3119 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3120 if (ompt_enabled.ompt_callback_mutex_acquire) { 3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3122 ompt_mutex_nest_lock, omp_lock_hint_none, 3123 __ompt_get_mutex_impl_type(user_lock), 3124 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3125 } 3126 #endif 3127 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3128 #if USE_ITT_BUILD 3129 if (rc) { 3130 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3131 } else { 3132 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3133 } 3134 #endif 3135 #if OMPT_SUPPORT && OMPT_OPTIONAL 3136 if (ompt_enabled.enabled && rc) { 3137 if (rc == 1) { 3138 if (ompt_enabled.ompt_callback_mutex_acquired) { 3139 // lock_first 3140 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3141 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3142 codeptr); 3143 } 3144 } else { 3145 if (ompt_enabled.ompt_callback_nest_lock) { 3146 // lock_next 3147 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3148 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3149 } 3150 } 3151 } 3152 #endif 3153 return rc; 3154 3155 #else // KMP_USE_DYNAMIC_LOCK 3156 3157 kmp_user_lock_p lck; 3158 int rc; 3159 3160 if ((__kmp_user_lock_kind == lk_tas) && 3161 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3162 OMP_NEST_LOCK_T_SIZE)) { 3163 lck = (kmp_user_lock_p)user_lock; 3164 } 3165 #if KMP_USE_FUTEX 3166 else if ((__kmp_user_lock_kind == lk_futex) && 3167 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3168 OMP_NEST_LOCK_T_SIZE)) { 3169 lck = (kmp_user_lock_p)user_lock; 3170 } 3171 #endif 3172 else { 3173 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3174 } 3175 3176 #if USE_ITT_BUILD 3177 __kmp_itt_lock_acquiring(lck); 3178 #endif /* USE_ITT_BUILD */ 3179 3180 #if OMPT_SUPPORT && OMPT_OPTIONAL 3181 // This is the case, if called from omp_init_lock_with_hint: 3182 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3183 if (!codeptr) 3184 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3185 if (ompt_enabled.enabled) && 3186 ompt_enabled.ompt_callback_mutex_acquire) { 3187 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3188 ompt_mutex_nest_lock, omp_lock_hint_none, 3189 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3190 codeptr); 3191 } 3192 #endif 3193 3194 rc = TEST_NESTED_LOCK(lck, gtid); 3195 #if USE_ITT_BUILD 3196 if (rc) { 3197 __kmp_itt_lock_acquired(lck); 3198 } else { 3199 __kmp_itt_lock_cancelled(lck); 3200 } 3201 #endif /* USE_ITT_BUILD */ 3202 #if OMPT_SUPPORT && OMPT_OPTIONAL 3203 if (ompt_enabled.enabled && rc) { 3204 if (rc == 1) { 3205 if (ompt_enabled.ompt_callback_mutex_acquired) { 3206 // lock_first 3207 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3208 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3209 } 3210 } else { 3211 if (ompt_enabled.ompt_callback_nest_lock) { 3212 // lock_next 3213 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3214 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3215 } 3216 } 3217 } 3218 #endif 3219 return rc; 3220 3221 /* Can't use serial interval since not block structured */ 3222 3223 #endif // KMP_USE_DYNAMIC_LOCK 3224 } 3225 3226 // Interface to fast scalable reduce methods routines 3227 3228 // keep the selected method in a thread local structure for cross-function 3229 // usage: will be used in __kmpc_end_reduce* functions; 3230 // another solution: to re-determine the method one more time in 3231 // __kmpc_end_reduce* functions (new prototype required then) 3232 // AT: which solution is better? 3233 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3234 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3235 3236 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3237 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3238 3239 // description of the packed_reduction_method variable: look at the macros in 3240 // kmp.h 3241 3242 // used in a critical section reduce block 3243 static __forceinline void 3244 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3245 kmp_critical_name *crit) { 3246 3247 // this lock was visible to a customer and to the threading profile tool as a 3248 // serial overhead span (although it's used for an internal purpose only) 3249 // why was it visible in previous implementation? 3250 // should we keep it visible in new reduce block? 3251 kmp_user_lock_p lck; 3252 3253 #if KMP_USE_DYNAMIC_LOCK 3254 3255 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3256 // Check if it is initialized. 3257 if (*lk == 0) { 3258 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3259 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3260 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3261 } else { 3262 __kmp_init_indirect_csptr(crit, loc, global_tid, 3263 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3264 } 3265 } 3266 // Branch for accessing the actual lock object and set operation. This 3267 // branching is inevitable since this lock initialization does not follow the 3268 // normal dispatch path (lock table is not used). 3269 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3270 lck = (kmp_user_lock_p)lk; 3271 KMP_DEBUG_ASSERT(lck != NULL); 3272 if (__kmp_env_consistency_check) { 3273 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3274 } 3275 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3276 } else { 3277 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3278 lck = ilk->lock; 3279 KMP_DEBUG_ASSERT(lck != NULL); 3280 if (__kmp_env_consistency_check) { 3281 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3282 } 3283 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3284 } 3285 3286 #else // KMP_USE_DYNAMIC_LOCK 3287 3288 // We know that the fast reduction code is only emitted by Intel compilers 3289 // with 32 byte critical sections. If there isn't enough space, then we 3290 // have to use a pointer. 3291 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3292 lck = (kmp_user_lock_p)crit; 3293 } else { 3294 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3295 } 3296 KMP_DEBUG_ASSERT(lck != NULL); 3297 3298 if (__kmp_env_consistency_check) 3299 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3300 3301 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3302 3303 #endif // KMP_USE_DYNAMIC_LOCK 3304 } 3305 3306 // used in a critical section reduce block 3307 static __forceinline void 3308 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3309 kmp_critical_name *crit) { 3310 3311 kmp_user_lock_p lck; 3312 3313 #if KMP_USE_DYNAMIC_LOCK 3314 3315 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3316 lck = (kmp_user_lock_p)crit; 3317 if (__kmp_env_consistency_check) 3318 __kmp_pop_sync(global_tid, ct_critical, loc); 3319 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3320 } else { 3321 kmp_indirect_lock_t *ilk = 3322 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3323 if (__kmp_env_consistency_check) 3324 __kmp_pop_sync(global_tid, ct_critical, loc); 3325 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3326 } 3327 3328 #else // KMP_USE_DYNAMIC_LOCK 3329 3330 // We know that the fast reduction code is only emitted by Intel compilers 3331 // with 32 byte critical sections. If there isn't enough space, then we have 3332 // to use a pointer. 3333 if (__kmp_base_user_lock_size > 32) { 3334 lck = *((kmp_user_lock_p *)crit); 3335 KMP_ASSERT(lck != NULL); 3336 } else { 3337 lck = (kmp_user_lock_p)crit; 3338 } 3339 3340 if (__kmp_env_consistency_check) 3341 __kmp_pop_sync(global_tid, ct_critical, loc); 3342 3343 __kmp_release_user_lock_with_checks(lck, global_tid); 3344 3345 #endif // KMP_USE_DYNAMIC_LOCK 3346 } // __kmp_end_critical_section_reduce_block 3347 3348 #if OMP_40_ENABLED 3349 static __forceinline int 3350 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3351 int *task_state) { 3352 kmp_team_t *team; 3353 3354 // Check if we are inside the teams construct? 3355 if (th->th.th_teams_microtask) { 3356 *team_p = team = th->th.th_team; 3357 if (team->t.t_level == th->th.th_teams_level) { 3358 // This is reduction at teams construct. 3359 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3360 // Let's swap teams temporarily for the reduction. 3361 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3362 th->th.th_team = team->t.t_parent; 3363 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3364 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3365 *task_state = th->th.th_task_state; 3366 th->th.th_task_state = 0; 3367 3368 return 1; 3369 } 3370 } 3371 return 0; 3372 } 3373 3374 static __forceinline void 3375 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3376 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3377 th->th.th_info.ds.ds_tid = 0; 3378 th->th.th_team = team; 3379 th->th.th_team_nproc = team->t.t_nproc; 3380 th->th.th_task_team = team->t.t_task_team[task_state]; 3381 th->th.th_task_state = task_state; 3382 } 3383 #endif 3384 3385 /* 2.a.i. Reduce Block without a terminating barrier */ 3386 /*! 3387 @ingroup SYNCHRONIZATION 3388 @param loc source location information 3389 @param global_tid global thread number 3390 @param num_vars number of items (variables) to be reduced 3391 @param reduce_size size of data in bytes to be reduced 3392 @param reduce_data pointer to data to be reduced 3393 @param reduce_func callback function providing reduction operation on two 3394 operands and returning result of reduction in lhs_data 3395 @param lck pointer to the unique lock data structure 3396 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3397 threads if atomic reduction needed 3398 3399 The nowait version is used for a reduce clause with the nowait argument. 3400 */ 3401 kmp_int32 3402 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3403 size_t reduce_size, void *reduce_data, 3404 void (*reduce_func)(void *lhs_data, void *rhs_data), 3405 kmp_critical_name *lck) { 3406 3407 KMP_COUNT_BLOCK(REDUCE_nowait); 3408 int retval = 0; 3409 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3410 #if OMP_40_ENABLED 3411 kmp_info_t *th; 3412 kmp_team_t *team; 3413 int teams_swapped = 0, task_state; 3414 #endif 3415 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3416 3417 // why do we need this initialization here at all? 3418 // Reduction clause can not be used as a stand-alone directive. 3419 3420 // do not call __kmp_serial_initialize(), it will be called by 3421 // __kmp_parallel_initialize() if needed 3422 // possible detection of false-positive race by the threadchecker ??? 3423 if (!TCR_4(__kmp_init_parallel)) 3424 __kmp_parallel_initialize(); 3425 3426 #if OMP_50_ENABLED 3427 __kmp_resume_if_soft_paused(); 3428 #endif 3429 3430 // check correctness of reduce block nesting 3431 #if KMP_USE_DYNAMIC_LOCK 3432 if (__kmp_env_consistency_check) 3433 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3434 #else 3435 if (__kmp_env_consistency_check) 3436 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3437 #endif 3438 3439 #if OMP_40_ENABLED 3440 th = __kmp_thread_from_gtid(global_tid); 3441 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3442 #endif // OMP_40_ENABLED 3443 3444 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3445 // the value should be kept in a variable 3446 // the variable should be either a construct-specific or thread-specific 3447 // property, not a team specific property 3448 // (a thread can reach the next reduce block on the next construct, reduce 3449 // method may differ on the next construct) 3450 // an ident_t "loc" parameter could be used as a construct-specific property 3451 // (what if loc == 0?) 3452 // (if both construct-specific and team-specific variables were shared, 3453 // then unness extra syncs should be needed) 3454 // a thread-specific variable is better regarding two issues above (next 3455 // construct and extra syncs) 3456 // a thread-specific "th_local.reduction_method" variable is used currently 3457 // each thread executes 'determine' and 'set' lines (no need to execute by one 3458 // thread, to avoid unness extra syncs) 3459 3460 packed_reduction_method = __kmp_determine_reduction_method( 3461 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3462 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3463 3464 if (packed_reduction_method == critical_reduce_block) { 3465 3466 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3467 retval = 1; 3468 3469 } else if (packed_reduction_method == empty_reduce_block) { 3470 3471 // usage: if team size == 1, no synchronization is required ( Intel 3472 // platforms only ) 3473 retval = 1; 3474 3475 } else if (packed_reduction_method == atomic_reduce_block) { 3476 3477 retval = 2; 3478 3479 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3480 // won't be called by the code gen) 3481 // (it's not quite good, because the checking block has been closed by 3482 // this 'pop', 3483 // but atomic operation has not been executed yet, will be executed 3484 // slightly later, literally on next instruction) 3485 if (__kmp_env_consistency_check) 3486 __kmp_pop_sync(global_tid, ct_reduce, loc); 3487 3488 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3489 tree_reduce_block)) { 3490 3491 // AT: performance issue: a real barrier here 3492 // AT: (if master goes slow, other threads are blocked here waiting for the 3493 // master to come and release them) 3494 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3495 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3496 // be confusing to a customer) 3497 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3498 // might go faster and be more in line with sense of NOWAIT 3499 // AT: TO DO: do epcc test and compare times 3500 3501 // this barrier should be invisible to a customer and to the threading profile 3502 // tool (it's neither a terminating barrier nor customer's code, it's 3503 // used for an internal purpose) 3504 #if OMPT_SUPPORT 3505 // JP: can this barrier potentially leed to task scheduling? 3506 // JP: as long as there is a barrier in the implementation, OMPT should and 3507 // will provide the barrier events 3508 // so we set-up the necessary frame/return addresses. 3509 ompt_frame_t *ompt_frame; 3510 if (ompt_enabled.enabled) { 3511 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3512 if (ompt_frame->enter_frame.ptr == NULL) 3513 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3514 OMPT_STORE_RETURN_ADDRESS(global_tid); 3515 } 3516 #endif 3517 #if USE_ITT_NOTIFY 3518 __kmp_threads[global_tid]->th.th_ident = loc; 3519 #endif 3520 retval = 3521 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3522 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3523 retval = (retval != 0) ? (0) : (1); 3524 #if OMPT_SUPPORT && OMPT_OPTIONAL 3525 if (ompt_enabled.enabled) { 3526 ompt_frame->enter_frame = ompt_data_none; 3527 } 3528 #endif 3529 3530 // all other workers except master should do this pop here 3531 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3532 if (__kmp_env_consistency_check) { 3533 if (retval == 0) { 3534 __kmp_pop_sync(global_tid, ct_reduce, loc); 3535 } 3536 } 3537 3538 } else { 3539 3540 // should never reach this block 3541 KMP_ASSERT(0); // "unexpected method" 3542 } 3543 #if OMP_40_ENABLED 3544 if (teams_swapped) { 3545 __kmp_restore_swapped_teams(th, team, task_state); 3546 } 3547 #endif 3548 KA_TRACE( 3549 10, 3550 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3551 global_tid, packed_reduction_method, retval)); 3552 3553 return retval; 3554 } 3555 3556 /*! 3557 @ingroup SYNCHRONIZATION 3558 @param loc source location information 3559 @param global_tid global thread id. 3560 @param lck pointer to the unique lock data structure 3561 3562 Finish the execution of a reduce nowait. 3563 */ 3564 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3565 kmp_critical_name *lck) { 3566 3567 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3568 3569 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3570 3571 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3572 3573 if (packed_reduction_method == critical_reduce_block) { 3574 3575 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3576 3577 } else if (packed_reduction_method == empty_reduce_block) { 3578 3579 // usage: if team size == 1, no synchronization is required ( on Intel 3580 // platforms only ) 3581 3582 } else if (packed_reduction_method == atomic_reduce_block) { 3583 3584 // neither master nor other workers should get here 3585 // (code gen does not generate this call in case 2: atomic reduce block) 3586 // actually it's better to remove this elseif at all; 3587 // after removal this value will checked by the 'else' and will assert 3588 3589 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3590 tree_reduce_block)) { 3591 3592 // only master gets here 3593 3594 } else { 3595 3596 // should never reach this block 3597 KMP_ASSERT(0); // "unexpected method" 3598 } 3599 3600 if (__kmp_env_consistency_check) 3601 __kmp_pop_sync(global_tid, ct_reduce, loc); 3602 3603 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3604 global_tid, packed_reduction_method)); 3605 3606 return; 3607 } 3608 3609 /* 2.a.ii. Reduce Block with a terminating barrier */ 3610 3611 /*! 3612 @ingroup SYNCHRONIZATION 3613 @param loc source location information 3614 @param global_tid global thread number 3615 @param num_vars number of items (variables) to be reduced 3616 @param reduce_size size of data in bytes to be reduced 3617 @param reduce_data pointer to data to be reduced 3618 @param reduce_func callback function providing reduction operation on two 3619 operands and returning result of reduction in lhs_data 3620 @param lck pointer to the unique lock data structure 3621 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3622 threads if atomic reduction needed 3623 3624 A blocking reduce that includes an implicit barrier. 3625 */ 3626 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3627 size_t reduce_size, void *reduce_data, 3628 void (*reduce_func)(void *lhs_data, void *rhs_data), 3629 kmp_critical_name *lck) { 3630 KMP_COUNT_BLOCK(REDUCE_wait); 3631 int retval = 0; 3632 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3633 #if OMP_40_ENABLED 3634 kmp_info_t *th; 3635 kmp_team_t *team; 3636 int teams_swapped = 0, task_state; 3637 #endif 3638 3639 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3640 3641 // why do we need this initialization here at all? 3642 // Reduction clause can not be a stand-alone directive. 3643 3644 // do not call __kmp_serial_initialize(), it will be called by 3645 // __kmp_parallel_initialize() if needed 3646 // possible detection of false-positive race by the threadchecker ??? 3647 if (!TCR_4(__kmp_init_parallel)) 3648 __kmp_parallel_initialize(); 3649 3650 #if OMP_50_ENABLED 3651 __kmp_resume_if_soft_paused(); 3652 #endif 3653 3654 // check correctness of reduce block nesting 3655 #if KMP_USE_DYNAMIC_LOCK 3656 if (__kmp_env_consistency_check) 3657 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3658 #else 3659 if (__kmp_env_consistency_check) 3660 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3661 #endif 3662 3663 #if OMP_40_ENABLED 3664 th = __kmp_thread_from_gtid(global_tid); 3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3666 #endif // OMP_40_ENABLED 3667 3668 packed_reduction_method = __kmp_determine_reduction_method( 3669 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3670 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3671 3672 if (packed_reduction_method == critical_reduce_block) { 3673 3674 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3675 retval = 1; 3676 3677 } else if (packed_reduction_method == empty_reduce_block) { 3678 3679 // usage: if team size == 1, no synchronization is required ( Intel 3680 // platforms only ) 3681 retval = 1; 3682 3683 } else if (packed_reduction_method == atomic_reduce_block) { 3684 3685 retval = 2; 3686 3687 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3688 tree_reduce_block)) { 3689 3690 // case tree_reduce_block: 3691 // this barrier should be visible to a customer and to the threading profile 3692 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3693 #if OMPT_SUPPORT 3694 ompt_frame_t *ompt_frame; 3695 if (ompt_enabled.enabled) { 3696 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3697 if (ompt_frame->enter_frame.ptr == NULL) 3698 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3699 OMPT_STORE_RETURN_ADDRESS(global_tid); 3700 } 3701 #endif 3702 #if USE_ITT_NOTIFY 3703 __kmp_threads[global_tid]->th.th_ident = 3704 loc; // needed for correct notification of frames 3705 #endif 3706 retval = 3707 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3708 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3709 retval = (retval != 0) ? (0) : (1); 3710 #if OMPT_SUPPORT && OMPT_OPTIONAL 3711 if (ompt_enabled.enabled) { 3712 ompt_frame->enter_frame = ompt_data_none; 3713 } 3714 #endif 3715 3716 // all other workers except master should do this pop here 3717 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3718 if (__kmp_env_consistency_check) { 3719 if (retval == 0) { // 0: all other workers; 1: master 3720 __kmp_pop_sync(global_tid, ct_reduce, loc); 3721 } 3722 } 3723 3724 } else { 3725 3726 // should never reach this block 3727 KMP_ASSERT(0); // "unexpected method" 3728 } 3729 #if OMP_40_ENABLED 3730 if (teams_swapped) { 3731 __kmp_restore_swapped_teams(th, team, task_state); 3732 } 3733 #endif 3734 3735 KA_TRACE(10, 3736 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3737 global_tid, packed_reduction_method, retval)); 3738 3739 return retval; 3740 } 3741 3742 /*! 3743 @ingroup SYNCHRONIZATION 3744 @param loc source location information 3745 @param global_tid global thread id. 3746 @param lck pointer to the unique lock data structure 3747 3748 Finish the execution of a blocking reduce. 3749 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3750 start function. 3751 */ 3752 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3753 kmp_critical_name *lck) { 3754 3755 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3756 #if OMP_40_ENABLED 3757 kmp_info_t *th; 3758 kmp_team_t *team; 3759 int teams_swapped = 0, task_state; 3760 #endif 3761 3762 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3763 3764 #if OMP_40_ENABLED 3765 th = __kmp_thread_from_gtid(global_tid); 3766 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3767 #endif // OMP_40_ENABLED 3768 3769 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3770 3771 // this barrier should be visible to a customer and to the threading profile 3772 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3773 3774 if (packed_reduction_method == critical_reduce_block) { 3775 3776 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3777 3778 // TODO: implicit barrier: should be exposed 3779 #if OMPT_SUPPORT 3780 ompt_frame_t *ompt_frame; 3781 if (ompt_enabled.enabled) { 3782 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3783 if (ompt_frame->enter_frame.ptr == NULL) 3784 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3785 OMPT_STORE_RETURN_ADDRESS(global_tid); 3786 } 3787 #endif 3788 #if USE_ITT_NOTIFY 3789 __kmp_threads[global_tid]->th.th_ident = loc; 3790 #endif 3791 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3792 #if OMPT_SUPPORT && OMPT_OPTIONAL 3793 if (ompt_enabled.enabled) { 3794 ompt_frame->enter_frame = ompt_data_none; 3795 } 3796 #endif 3797 3798 } else if (packed_reduction_method == empty_reduce_block) { 3799 3800 // usage: if team size==1, no synchronization is required (Intel platforms only) 3801 3802 // TODO: implicit barrier: should be exposed 3803 #if OMPT_SUPPORT 3804 ompt_frame_t *ompt_frame; 3805 if (ompt_enabled.enabled) { 3806 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3807 if (ompt_frame->enter_frame.ptr == NULL) 3808 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3809 OMPT_STORE_RETURN_ADDRESS(global_tid); 3810 } 3811 #endif 3812 #if USE_ITT_NOTIFY 3813 __kmp_threads[global_tid]->th.th_ident = loc; 3814 #endif 3815 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3816 #if OMPT_SUPPORT && OMPT_OPTIONAL 3817 if (ompt_enabled.enabled) { 3818 ompt_frame->enter_frame = ompt_data_none; 3819 } 3820 #endif 3821 3822 } else if (packed_reduction_method == atomic_reduce_block) { 3823 3824 #if OMPT_SUPPORT 3825 ompt_frame_t *ompt_frame; 3826 if (ompt_enabled.enabled) { 3827 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3828 if (ompt_frame->enter_frame.ptr == NULL) 3829 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3830 OMPT_STORE_RETURN_ADDRESS(global_tid); 3831 } 3832 #endif 3833 // TODO: implicit barrier: should be exposed 3834 #if USE_ITT_NOTIFY 3835 __kmp_threads[global_tid]->th.th_ident = loc; 3836 #endif 3837 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3838 #if OMPT_SUPPORT && OMPT_OPTIONAL 3839 if (ompt_enabled.enabled) { 3840 ompt_frame->enter_frame = ompt_data_none; 3841 } 3842 #endif 3843 3844 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3845 tree_reduce_block)) { 3846 3847 // only master executes here (master releases all other workers) 3848 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3849 global_tid); 3850 3851 } else { 3852 3853 // should never reach this block 3854 KMP_ASSERT(0); // "unexpected method" 3855 } 3856 #if OMP_40_ENABLED 3857 if (teams_swapped) { 3858 __kmp_restore_swapped_teams(th, team, task_state); 3859 } 3860 #endif 3861 3862 if (__kmp_env_consistency_check) 3863 __kmp_pop_sync(global_tid, ct_reduce, loc); 3864 3865 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3866 global_tid, packed_reduction_method)); 3867 3868 return; 3869 } 3870 3871 #undef __KMP_GET_REDUCTION_METHOD 3872 #undef __KMP_SET_REDUCTION_METHOD 3873 3874 /* end of interface to fast scalable reduce routines */ 3875 3876 kmp_uint64 __kmpc_get_taskid() { 3877 3878 kmp_int32 gtid; 3879 kmp_info_t *thread; 3880 3881 gtid = __kmp_get_gtid(); 3882 if (gtid < 0) { 3883 return 0; 3884 } 3885 thread = __kmp_thread_from_gtid(gtid); 3886 return thread->th.th_current_task->td_task_id; 3887 3888 } // __kmpc_get_taskid 3889 3890 kmp_uint64 __kmpc_get_parent_taskid() { 3891 3892 kmp_int32 gtid; 3893 kmp_info_t *thread; 3894 kmp_taskdata_t *parent_task; 3895 3896 gtid = __kmp_get_gtid(); 3897 if (gtid < 0) { 3898 return 0; 3899 } 3900 thread = __kmp_thread_from_gtid(gtid); 3901 parent_task = thread->th.th_current_task->td_parent; 3902 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3903 3904 } // __kmpc_get_parent_taskid 3905 3906 #if OMP_45_ENABLED 3907 /*! 3908 @ingroup WORK_SHARING 3909 @param loc source location information. 3910 @param gtid global thread number. 3911 @param num_dims number of associated doacross loops. 3912 @param dims info on loops bounds. 3913 3914 Initialize doacross loop information. 3915 Expect compiler send us inclusive bounds, 3916 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3917 */ 3918 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3919 const struct kmp_dim *dims) { 3920 int j, idx; 3921 kmp_int64 last, trace_count; 3922 kmp_info_t *th = __kmp_threads[gtid]; 3923 kmp_team_t *team = th->th.th_team; 3924 kmp_uint32 *flags; 3925 kmp_disp_t *pr_buf = th->th.th_dispatch; 3926 dispatch_shared_info_t *sh_buf; 3927 3928 KA_TRACE( 3929 20, 3930 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3931 gtid, num_dims, !team->t.t_serialized)); 3932 KMP_DEBUG_ASSERT(dims != NULL); 3933 KMP_DEBUG_ASSERT(num_dims > 0); 3934 3935 if (team->t.t_serialized) { 3936 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3937 return; // no dependencies if team is serialized 3938 } 3939 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3940 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3941 // the next loop 3942 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3943 3944 // Save bounds info into allocated private buffer 3945 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3946 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3947 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3948 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3949 pr_buf->th_doacross_info[0] = 3950 (kmp_int64)num_dims; // first element is number of dimensions 3951 // Save also address of num_done in order to access it later without knowing 3952 // the buffer index 3953 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3954 pr_buf->th_doacross_info[2] = dims[0].lo; 3955 pr_buf->th_doacross_info[3] = dims[0].up; 3956 pr_buf->th_doacross_info[4] = dims[0].st; 3957 last = 5; 3958 for (j = 1; j < num_dims; ++j) { 3959 kmp_int64 3960 range_length; // To keep ranges of all dimensions but the first dims[0] 3961 if (dims[j].st == 1) { // most common case 3962 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3963 range_length = dims[j].up - dims[j].lo + 1; 3964 } else { 3965 if (dims[j].st > 0) { 3966 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3967 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3968 } else { // negative increment 3969 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3970 range_length = 3971 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3972 } 3973 } 3974 pr_buf->th_doacross_info[last++] = range_length; 3975 pr_buf->th_doacross_info[last++] = dims[j].lo; 3976 pr_buf->th_doacross_info[last++] = dims[j].up; 3977 pr_buf->th_doacross_info[last++] = dims[j].st; 3978 } 3979 3980 // Compute total trip count. 3981 // Start with range of dims[0] which we don't need to keep in the buffer. 3982 if (dims[0].st == 1) { // most common case 3983 trace_count = dims[0].up - dims[0].lo + 1; 3984 } else if (dims[0].st > 0) { 3985 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3986 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3987 } else { // negative increment 3988 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3989 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3990 } 3991 for (j = 1; j < num_dims; ++j) { 3992 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3993 } 3994 KMP_DEBUG_ASSERT(trace_count > 0); 3995 3996 // Check if shared buffer is not occupied by other loop (idx - 3997 // __kmp_dispatch_num_buffers) 3998 if (idx != sh_buf->doacross_buf_idx) { 3999 // Shared buffer is occupied, wait for it to be free 4000 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4001 __kmp_eq_4, NULL); 4002 } 4003 #if KMP_32_BIT_ARCH 4004 // Check if we are the first thread. After the CAS the first thread gets 0, 4005 // others get 1 if initialization is in progress, allocated pointer otherwise. 4006 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4007 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4008 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4009 #else 4010 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4011 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4012 #endif 4013 if (flags == NULL) { 4014 // we are the first thread, allocate the array of flags 4015 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 4016 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4017 KMP_MB(); 4018 sh_buf->doacross_flags = flags; 4019 } else if (flags == (kmp_uint32 *)1) { 4020 #if KMP_32_BIT_ARCH 4021 // initialization is still in progress, need to wait 4022 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4023 #else 4024 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4025 #endif 4026 KMP_YIELD(TRUE); 4027 KMP_MB(); 4028 } else { 4029 KMP_MB(); 4030 } 4031 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4032 pr_buf->th_doacross_flags = 4033 sh_buf->doacross_flags; // save private copy in order to not 4034 // touch shared buffer on each iteration 4035 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4036 } 4037 4038 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4039 kmp_int32 shft, num_dims, i; 4040 kmp_uint32 flag; 4041 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4042 kmp_info_t *th = __kmp_threads[gtid]; 4043 kmp_team_t *team = th->th.th_team; 4044 kmp_disp_t *pr_buf; 4045 kmp_int64 lo, up, st; 4046 4047 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4048 if (team->t.t_serialized) { 4049 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4050 return; // no dependencies if team is serialized 4051 } 4052 4053 // calculate sequential iteration number and check out-of-bounds condition 4054 pr_buf = th->th.th_dispatch; 4055 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4056 num_dims = pr_buf->th_doacross_info[0]; 4057 lo = pr_buf->th_doacross_info[2]; 4058 up = pr_buf->th_doacross_info[3]; 4059 st = pr_buf->th_doacross_info[4]; 4060 if (st == 1) { // most common case 4061 if (vec[0] < lo || vec[0] > up) { 4062 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4063 "bounds [%lld,%lld]\n", 4064 gtid, vec[0], lo, up)); 4065 return; 4066 } 4067 iter_number = vec[0] - lo; 4068 } else if (st > 0) { 4069 if (vec[0] < lo || vec[0] > up) { 4070 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4071 "bounds [%lld,%lld]\n", 4072 gtid, vec[0], lo, up)); 4073 return; 4074 } 4075 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4076 } else { // negative increment 4077 if (vec[0] > lo || vec[0] < up) { 4078 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4079 "bounds [%lld,%lld]\n", 4080 gtid, vec[0], lo, up)); 4081 return; 4082 } 4083 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4084 } 4085 for (i = 1; i < num_dims; ++i) { 4086 kmp_int64 iter, ln; 4087 kmp_int32 j = i * 4; 4088 ln = pr_buf->th_doacross_info[j + 1]; 4089 lo = pr_buf->th_doacross_info[j + 2]; 4090 up = pr_buf->th_doacross_info[j + 3]; 4091 st = pr_buf->th_doacross_info[j + 4]; 4092 if (st == 1) { 4093 if (vec[i] < lo || vec[i] > up) { 4094 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4095 "bounds [%lld,%lld]\n", 4096 gtid, vec[i], lo, up)); 4097 return; 4098 } 4099 iter = vec[i] - lo; 4100 } else if (st > 0) { 4101 if (vec[i] < lo || vec[i] > up) { 4102 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4103 "bounds [%lld,%lld]\n", 4104 gtid, vec[i], lo, up)); 4105 return; 4106 } 4107 iter = (kmp_uint64)(vec[i] - lo) / st; 4108 } else { // st < 0 4109 if (vec[i] > lo || vec[i] < up) { 4110 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4111 "bounds [%lld,%lld]\n", 4112 gtid, vec[i], lo, up)); 4113 return; 4114 } 4115 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4116 } 4117 iter_number = iter + ln * iter_number; 4118 } 4119 shft = iter_number % 32; // use 32-bit granularity 4120 iter_number >>= 5; // divided by 32 4121 flag = 1 << shft; 4122 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4123 KMP_YIELD(TRUE); 4124 } 4125 KMP_MB(); 4126 KA_TRACE(20, 4127 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4128 gtid, (iter_number << 5) + shft)); 4129 } 4130 4131 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4132 kmp_int32 shft, num_dims, i; 4133 kmp_uint32 flag; 4134 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4135 kmp_info_t *th = __kmp_threads[gtid]; 4136 kmp_team_t *team = th->th.th_team; 4137 kmp_disp_t *pr_buf; 4138 kmp_int64 lo, st; 4139 4140 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4141 if (team->t.t_serialized) { 4142 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4143 return; // no dependencies if team is serialized 4144 } 4145 4146 // calculate sequential iteration number (same as in "wait" but no 4147 // out-of-bounds checks) 4148 pr_buf = th->th.th_dispatch; 4149 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4150 num_dims = pr_buf->th_doacross_info[0]; 4151 lo = pr_buf->th_doacross_info[2]; 4152 st = pr_buf->th_doacross_info[4]; 4153 if (st == 1) { // most common case 4154 iter_number = vec[0] - lo; 4155 } else if (st > 0) { 4156 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4157 } else { // negative increment 4158 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4159 } 4160 for (i = 1; i < num_dims; ++i) { 4161 kmp_int64 iter, ln; 4162 kmp_int32 j = i * 4; 4163 ln = pr_buf->th_doacross_info[j + 1]; 4164 lo = pr_buf->th_doacross_info[j + 2]; 4165 st = pr_buf->th_doacross_info[j + 4]; 4166 if (st == 1) { 4167 iter = vec[i] - lo; 4168 } else if (st > 0) { 4169 iter = (kmp_uint64)(vec[i] - lo) / st; 4170 } else { // st < 0 4171 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4172 } 4173 iter_number = iter + ln * iter_number; 4174 } 4175 shft = iter_number % 32; // use 32-bit granularity 4176 iter_number >>= 5; // divided by 32 4177 flag = 1 << shft; 4178 KMP_MB(); 4179 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4180 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4181 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4182 (iter_number << 5) + shft)); 4183 } 4184 4185 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4186 kmp_int32 num_done; 4187 kmp_info_t *th = __kmp_threads[gtid]; 4188 kmp_team_t *team = th->th.th_team; 4189 kmp_disp_t *pr_buf = th->th.th_dispatch; 4190 4191 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4192 if (team->t.t_serialized) { 4193 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4194 return; // nothing to do 4195 } 4196 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4197 if (num_done == th->th.th_team_nproc) { 4198 // we are the last thread, need to free shared resources 4199 int idx = pr_buf->th_doacross_buf_idx - 1; 4200 dispatch_shared_info_t *sh_buf = 4201 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4202 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4203 (kmp_int64)&sh_buf->doacross_num_done); 4204 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4205 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4206 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4207 sh_buf->doacross_flags = NULL; 4208 sh_buf->doacross_num_done = 0; 4209 sh_buf->doacross_buf_idx += 4210 __kmp_dispatch_num_buffers; // free buffer for future re-use 4211 } 4212 // free private resources (need to keep buffer index forever) 4213 pr_buf->th_doacross_flags = NULL; 4214 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4215 pr_buf->th_doacross_info = NULL; 4216 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4217 } 4218 #endif 4219 4220 #if OMP_50_ENABLED 4221 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4222 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4223 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4224 } 4225 4226 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4227 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4228 } 4229 4230 int __kmpc_get_target_offload(void) { 4231 if (!__kmp_init_serial) { 4232 __kmp_serial_initialize(); 4233 } 4234 return __kmp_target_offload; 4235 } 4236 4237 int __kmpc_pause_resource(kmp_pause_status_t level) { 4238 if (!__kmp_init_serial) { 4239 return 1; // Can't pause if runtime is not initialized 4240 } 4241 return __kmp_pause_resource(level); 4242 } 4243 #endif // OMP_50_ENABLED 4244 4245 // end of file // 4246