1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 #if OMP_40_ENABLED 248 249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 250 kmp_int32 proc_bind) { 251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 252 proc_bind)); 253 254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 255 } 256 257 #endif /* OMP_40_ENABLED */ 258 259 /*! 260 @ingroup PARALLEL 261 @param loc source location information 262 @param argc total number of arguments in the ellipsis 263 @param microtask pointer to callback routine consisting of outlined parallel 264 construct 265 @param ... pointers to shared variables that aren't global 266 267 Do the actual fork and call the microtask in the relevant number of threads. 268 */ 269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 270 int gtid = __kmp_entry_gtid(); 271 272 #if (KMP_STATS_ENABLED) 273 // If we were in a serial region, then stop the serial timer, record 274 // the event, and start parallel region timer 275 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 276 if (previous_state == stats_state_e::SERIAL_REGION) { 277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 278 } else { 279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 280 } 281 int inParallel = __kmpc_in_parallel(loc); 282 if (inParallel) { 283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 284 } else { 285 KMP_COUNT_BLOCK(OMP_PARALLEL); 286 } 287 #endif 288 289 // maybe to save thr_state is enough here 290 { 291 va_list ap; 292 va_start(ap, microtask); 293 294 #if OMPT_SUPPORT 295 ompt_frame_t *ompt_frame; 296 if (ompt_enabled.enabled) { 297 kmp_info_t *master_th = __kmp_threads[gtid]; 298 kmp_team_t *parent_team = master_th->th.th_team; 299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 300 if (lwt) 301 ompt_frame = &(lwt->ompt_task_info.frame); 302 else { 303 int tid = __kmp_tid_from_gtid(gtid); 304 ompt_frame = &( 305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 306 } 307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 308 OMPT_STORE_RETURN_ADDRESS(gtid); 309 } 310 #endif 311 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_FORKING(); 314 #endif 315 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 320 &ap 321 #else 322 ap 323 #endif 324 ); 325 #if INCLUDE_SSC_MARKS 326 SSC_MARK_JOINING(); 327 #endif 328 __kmp_join_call(loc, gtid 329 #if OMPT_SUPPORT 330 , 331 fork_context_intel 332 #endif 333 ); 334 335 va_end(ap); 336 } 337 338 #if KMP_STATS_ENABLED 339 if (previous_state == stats_state_e::SERIAL_REGION) { 340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 341 } else { 342 KMP_POP_PARTITIONED_TIMER(); 343 } 344 #endif // KMP_STATS_ENABLED 345 } 346 347 #if OMP_40_ENABLED 348 /*! 349 @ingroup PARALLEL 350 @param loc source location information 351 @param global_tid global thread number 352 @param num_teams number of teams requested for the teams construct 353 @param num_threads number of threads per team requested for the teams construct 354 355 Set the number of teams to be used by the teams construct. 356 This call is only required if the teams construct has a `num_teams` clause 357 or a `thread_limit` clause (or both). 358 */ 359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 360 kmp_int32 num_teams, kmp_int32 num_threads) { 361 KA_TRACE(20, 362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 363 global_tid, num_teams, num_threads)); 364 365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 366 } 367 368 /*! 369 @ingroup PARALLEL 370 @param loc source location information 371 @param argc total number of arguments in the ellipsis 372 @param microtask pointer to callback routine consisting of outlined teams 373 construct 374 @param ... pointers to shared variables that aren't global 375 376 Do the actual fork and call the microtask in the relevant number of threads. 377 */ 378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 379 ...) { 380 int gtid = __kmp_entry_gtid(); 381 kmp_info_t *this_thr = __kmp_threads[gtid]; 382 va_list ap; 383 va_start(ap, microtask); 384 385 #if KMP_STATS_ENABLED 386 KMP_COUNT_BLOCK(OMP_TEAMS); 387 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 388 if (previous_state == stats_state_e::SERIAL_REGION) { 389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 390 } else { 391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 392 } 393 #endif 394 395 // remember teams entry point and nesting level 396 this_thr->th.th_teams_microtask = microtask; 397 this_thr->th.th_teams_level = 398 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 399 400 #if OMPT_SUPPORT 401 kmp_team_t *parent_team = this_thr->th.th_team; 402 int tid = __kmp_tid_from_gtid(gtid); 403 if (ompt_enabled.enabled) { 404 parent_team->t.t_implicit_task_taskdata[tid] 405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 406 } 407 OMPT_STORE_RETURN_ADDRESS(gtid); 408 #endif 409 410 // check if __kmpc_push_num_teams called, set default number of teams 411 // otherwise 412 if (this_thr->th.th_teams_size.nteams == 0) { 413 __kmp_push_num_teams(loc, gtid, 0, 0); 414 } 415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 418 419 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 420 VOLATILE_CAST(microtask_t) 421 __kmp_teams_master, // "wrapped" task 422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 423 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 424 &ap 425 #else 426 ap 427 #endif 428 ); 429 __kmp_join_call(loc, gtid 430 #if OMPT_SUPPORT 431 , 432 fork_context_intel 433 #endif 434 ); 435 436 // Pop current CG root off list 437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 439 this_thr->th.th_cg_roots = tmp->up; 440 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 441 " to node %p. cg_nthreads was %d\n", 442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 443 __kmp_free(tmp); 444 // Restore current task's thread_limit from CG root 445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 446 this_thr->th.th_current_task->td_icvs.thread_limit = 447 this_thr->th.th_cg_roots->cg_thread_limit; 448 449 this_thr->th.th_teams_microtask = NULL; 450 this_thr->th.th_teams_level = 0; 451 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 452 va_end(ap); 453 #if KMP_STATS_ENABLED 454 if (previous_state == stats_state_e::SERIAL_REGION) { 455 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 456 } else { 457 KMP_POP_PARTITIONED_TIMER(); 458 } 459 #endif // KMP_STATS_ENABLED 460 } 461 #endif /* OMP_40_ENABLED */ 462 463 // I don't think this function should ever have been exported. 464 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 465 // openmp code ever called it, but it's been exported from the RTL for so 466 // long that I'm afraid to remove the definition. 467 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 468 469 /*! 470 @ingroup PARALLEL 471 @param loc source location information 472 @param global_tid global thread number 473 474 Enter a serialized parallel construct. This interface is used to handle a 475 conditional parallel region, like this, 476 @code 477 #pragma omp parallel if (condition) 478 @endcode 479 when the condition is false. 480 */ 481 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 482 // The implementation is now in kmp_runtime.cpp so that it can share static 483 // functions with kmp_fork_call since the tasks to be done are similar in 484 // each case. 485 #if OMPT_SUPPORT 486 OMPT_STORE_RETURN_ADDRESS(global_tid); 487 #endif 488 __kmp_serialized_parallel(loc, global_tid); 489 } 490 491 /*! 492 @ingroup PARALLEL 493 @param loc source location information 494 @param global_tid global thread number 495 496 Leave a serialized parallel construct. 497 */ 498 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 499 kmp_internal_control_t *top; 500 kmp_info_t *this_thr; 501 kmp_team_t *serial_team; 502 503 KC_TRACE(10, 504 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 505 506 /* skip all this code for autopar serialized loops since it results in 507 unacceptable overhead */ 508 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 509 return; 510 511 // Not autopar code 512 if (!TCR_4(__kmp_init_parallel)) 513 __kmp_parallel_initialize(); 514 515 #if OMP_50_ENABLED 516 __kmp_resume_if_soft_paused(); 517 #endif 518 519 this_thr = __kmp_threads[global_tid]; 520 serial_team = this_thr->th.th_serial_team; 521 522 #if OMP_45_ENABLED 523 kmp_task_team_t *task_team = this_thr->th.th_task_team; 524 525 // we need to wait for the proxy tasks before finishing the thread 526 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 527 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 528 #endif 529 530 KMP_MB(); 531 KMP_DEBUG_ASSERT(serial_team); 532 KMP_ASSERT(serial_team->t.t_serialized); 533 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 534 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 535 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 536 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 537 538 #if OMPT_SUPPORT 539 if (ompt_enabled.enabled && 540 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 541 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 542 if (ompt_enabled.ompt_callback_implicit_task) { 543 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 544 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 545 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 546 } 547 548 // reset clear the task id only after unlinking the task 549 ompt_data_t *parent_task_data; 550 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 551 552 if (ompt_enabled.ompt_callback_parallel_end) { 553 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 554 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 555 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 556 } 557 __ompt_lw_taskteam_unlink(this_thr); 558 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 559 } 560 #endif 561 562 /* If necessary, pop the internal control stack values and replace the team 563 * values */ 564 top = serial_team->t.t_control_stack_top; 565 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 566 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 567 serial_team->t.t_control_stack_top = top->next; 568 __kmp_free(top); 569 } 570 571 // if( serial_team -> t.t_serialized > 1 ) 572 serial_team->t.t_level--; 573 574 /* pop dispatch buffers stack */ 575 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 576 { 577 dispatch_private_info_t *disp_buffer = 578 serial_team->t.t_dispatch->th_disp_buffer; 579 serial_team->t.t_dispatch->th_disp_buffer = 580 serial_team->t.t_dispatch->th_disp_buffer->next; 581 __kmp_free(disp_buffer); 582 } 583 #if OMP_50_ENABLED 584 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 585 #endif 586 587 --serial_team->t.t_serialized; 588 if (serial_team->t.t_serialized == 0) { 589 590 /* return to the parallel section */ 591 592 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 593 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 594 __kmp_clear_x87_fpu_status_word(); 595 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 596 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 597 } 598 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 599 600 this_thr->th.th_team = serial_team->t.t_parent; 601 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 602 603 /* restore values cached in the thread */ 604 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 605 this_thr->th.th_team_master = 606 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 607 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 608 609 /* TODO the below shouldn't need to be adjusted for serialized teams */ 610 this_thr->th.th_dispatch = 611 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 612 613 __kmp_pop_current_task_from_thread(this_thr); 614 615 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 616 this_thr->th.th_current_task->td_flags.executing = 1; 617 618 if (__kmp_tasking_mode != tskm_immediate_exec) { 619 // Copy the task team from the new child / old parent team to the thread. 620 this_thr->th.th_task_team = 621 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 622 KA_TRACE(20, 623 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 624 "team %p\n", 625 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 626 } 627 } else { 628 if (__kmp_tasking_mode != tskm_immediate_exec) { 629 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 630 "depth of serial team %p to %d\n", 631 global_tid, serial_team, serial_team->t.t_serialized)); 632 } 633 } 634 635 if (__kmp_env_consistency_check) 636 __kmp_pop_parallel(global_tid, NULL); 637 #if OMPT_SUPPORT 638 if (ompt_enabled.enabled) 639 this_thr->th.ompt_thread_info.state = 640 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 641 : ompt_state_work_parallel); 642 #endif 643 } 644 645 /*! 646 @ingroup SYNCHRONIZATION 647 @param loc source location information. 648 649 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 650 depending on the memory ordering convention obeyed by the compiler 651 even that may not be necessary). 652 */ 653 void __kmpc_flush(ident_t *loc) { 654 KC_TRACE(10, ("__kmpc_flush: called\n")); 655 656 /* need explicit __mf() here since use volatile instead in library */ 657 KMP_MB(); /* Flush all pending memory write invalidates. */ 658 659 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 660 #if KMP_MIC 661 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 662 // We shouldn't need it, though, since the ABI rules require that 663 // * If the compiler generates NGO stores it also generates the fence 664 // * If users hand-code NGO stores they should insert the fence 665 // therefore no incomplete unordered stores should be visible. 666 #else 667 // C74404 668 // This is to address non-temporal store instructions (sfence needed). 669 // The clflush instruction is addressed either (mfence needed). 670 // Probably the non-temporal load monvtdqa instruction should also be 671 // addressed. 672 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 673 if (!__kmp_cpuinfo.initialized) { 674 __kmp_query_cpuid(&__kmp_cpuinfo); 675 } 676 if (!__kmp_cpuinfo.sse2) { 677 // CPU cannot execute SSE2 instructions. 678 } else { 679 #if KMP_COMPILER_ICC 680 _mm_mfence(); 681 #elif KMP_COMPILER_MSVC 682 MemoryBarrier(); 683 #else 684 __sync_synchronize(); 685 #endif // KMP_COMPILER_ICC 686 } 687 #endif // KMP_MIC 688 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 689 // Nothing to see here move along 690 #elif KMP_ARCH_PPC64 691 // Nothing needed here (we have a real MB above). 692 #if KMP_OS_CNK 693 // The flushing thread needs to yield here; this prevents a 694 // busy-waiting thread from saturating the pipeline. flush is 695 // often used in loops like this: 696 // while (!flag) { 697 // #pragma omp flush(flag) 698 // } 699 // and adding the yield here is good for at least a 10x speedup 700 // when running >2 threads per core (on the NAS LU benchmark). 701 __kmp_yield(); 702 #endif 703 #else 704 #error Unknown or unsupported architecture 705 #endif 706 707 #if OMPT_SUPPORT && OMPT_OPTIONAL 708 if (ompt_enabled.ompt_callback_flush) { 709 ompt_callbacks.ompt_callback(ompt_callback_flush)( 710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 711 } 712 #endif 713 } 714 715 /* -------------------------------------------------------------------------- */ 716 /*! 717 @ingroup SYNCHRONIZATION 718 @param loc source location information 719 @param global_tid thread id. 720 721 Execute a barrier. 722 */ 723 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 724 KMP_COUNT_BLOCK(OMP_BARRIER); 725 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 726 727 if (!TCR_4(__kmp_init_parallel)) 728 __kmp_parallel_initialize(); 729 730 #if OMP_50_ENABLED 731 __kmp_resume_if_soft_paused(); 732 #endif 733 734 if (__kmp_env_consistency_check) { 735 if (loc == 0) { 736 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 737 } 738 739 __kmp_check_barrier(global_tid, ct_barrier, loc); 740 } 741 742 #if OMPT_SUPPORT 743 ompt_frame_t *ompt_frame; 744 if (ompt_enabled.enabled) { 745 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 746 if (ompt_frame->enter_frame.ptr == NULL) 747 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 748 OMPT_STORE_RETURN_ADDRESS(global_tid); 749 } 750 #endif 751 __kmp_threads[global_tid]->th.th_ident = loc; 752 // TODO: explicit barrier_wait_id: 753 // this function is called when 'barrier' directive is present or 754 // implicit barrier at the end of a worksharing construct. 755 // 1) better to add a per-thread barrier counter to a thread data structure 756 // 2) set to 0 when a new team is created 757 // 4) no sync is required 758 759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 760 #if OMPT_SUPPORT && OMPT_OPTIONAL 761 if (ompt_enabled.enabled) { 762 ompt_frame->enter_frame = ompt_data_none; 763 } 764 #endif 765 } 766 767 /* The BARRIER for a MASTER section is always explicit */ 768 /*! 769 @ingroup WORK_SHARING 770 @param loc source location information. 771 @param global_tid global thread number . 772 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 773 */ 774 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 775 int status = 0; 776 777 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 778 779 if (!TCR_4(__kmp_init_parallel)) 780 __kmp_parallel_initialize(); 781 782 #if OMP_50_ENABLED 783 __kmp_resume_if_soft_paused(); 784 #endif 785 786 if (KMP_MASTER_GTID(global_tid)) { 787 KMP_COUNT_BLOCK(OMP_MASTER); 788 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 789 status = 1; 790 } 791 792 #if OMPT_SUPPORT && OMPT_OPTIONAL 793 if (status) { 794 if (ompt_enabled.ompt_callback_master) { 795 kmp_info_t *this_thr = __kmp_threads[global_tid]; 796 kmp_team_t *team = this_thr->th.th_team; 797 798 int tid = __kmp_tid_from_gtid(global_tid); 799 ompt_callbacks.ompt_callback(ompt_callback_master)( 800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 802 OMPT_GET_RETURN_ADDRESS(0)); 803 } 804 } 805 #endif 806 807 if (__kmp_env_consistency_check) { 808 #if KMP_USE_DYNAMIC_LOCK 809 if (status) 810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 811 else 812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 813 #else 814 if (status) 815 __kmp_push_sync(global_tid, ct_master, loc, NULL); 816 else 817 __kmp_check_sync(global_tid, ct_master, loc, NULL); 818 #endif 819 } 820 821 return status; 822 } 823 824 /*! 825 @ingroup WORK_SHARING 826 @param loc source location information. 827 @param global_tid global thread number . 828 829 Mark the end of a <tt>master</tt> region. This should only be called by the 830 thread that executes the <tt>master</tt> region. 831 */ 832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 834 835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 836 KMP_POP_PARTITIONED_TIMER(); 837 838 #if OMPT_SUPPORT && OMPT_OPTIONAL 839 kmp_info_t *this_thr = __kmp_threads[global_tid]; 840 kmp_team_t *team = this_thr->th.th_team; 841 if (ompt_enabled.ompt_callback_master) { 842 int tid = __kmp_tid_from_gtid(global_tid); 843 ompt_callbacks.ompt_callback(ompt_callback_master)( 844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 846 OMPT_GET_RETURN_ADDRESS(0)); 847 } 848 #endif 849 850 if (__kmp_env_consistency_check) { 851 if (global_tid < 0) 852 KMP_WARNING(ThreadIdentInvalid); 853 854 if (KMP_MASTER_GTID(global_tid)) 855 __kmp_pop_sync(global_tid, ct_master, loc); 856 } 857 } 858 859 /*! 860 @ingroup WORK_SHARING 861 @param loc source location information. 862 @param gtid global thread number. 863 864 Start execution of an <tt>ordered</tt> construct. 865 */ 866 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 867 int cid = 0; 868 kmp_info_t *th; 869 KMP_DEBUG_ASSERT(__kmp_init_serial); 870 871 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 872 873 if (!TCR_4(__kmp_init_parallel)) 874 __kmp_parallel_initialize(); 875 876 #if OMP_50_ENABLED 877 __kmp_resume_if_soft_paused(); 878 #endif 879 880 #if USE_ITT_BUILD 881 __kmp_itt_ordered_prep(gtid); 882 // TODO: ordered_wait_id 883 #endif /* USE_ITT_BUILD */ 884 885 th = __kmp_threads[gtid]; 886 887 #if OMPT_SUPPORT && OMPT_OPTIONAL 888 kmp_team_t *team; 889 ompt_wait_id_t lck; 890 void *codeptr_ra; 891 if (ompt_enabled.enabled) { 892 OMPT_STORE_RETURN_ADDRESS(gtid); 893 team = __kmp_team_from_gtid(gtid); 894 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 895 /* OMPT state update */ 896 th->th.ompt_thread_info.wait_id = lck; 897 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 898 899 /* OMPT event callback */ 900 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 901 if (ompt_enabled.ompt_callback_mutex_acquire) { 902 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 903 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 904 codeptr_ra); 905 } 906 } 907 #endif 908 909 if (th->th.th_dispatch->th_deo_fcn != 0) 910 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 911 else 912 __kmp_parallel_deo(>id, &cid, loc); 913 914 #if OMPT_SUPPORT && OMPT_OPTIONAL 915 if (ompt_enabled.enabled) { 916 /* OMPT state update */ 917 th->th.ompt_thread_info.state = ompt_state_work_parallel; 918 th->th.ompt_thread_info.wait_id = 0; 919 920 /* OMPT event callback */ 921 if (ompt_enabled.ompt_callback_mutex_acquired) { 922 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 923 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 924 } 925 } 926 #endif 927 928 #if USE_ITT_BUILD 929 __kmp_itt_ordered_start(gtid); 930 #endif /* USE_ITT_BUILD */ 931 } 932 933 /*! 934 @ingroup WORK_SHARING 935 @param loc source location information. 936 @param gtid global thread number. 937 938 End execution of an <tt>ordered</tt> construct. 939 */ 940 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 941 int cid = 0; 942 kmp_info_t *th; 943 944 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 945 946 #if USE_ITT_BUILD 947 __kmp_itt_ordered_end(gtid); 948 // TODO: ordered_wait_id 949 #endif /* USE_ITT_BUILD */ 950 951 th = __kmp_threads[gtid]; 952 953 if (th->th.th_dispatch->th_dxo_fcn != 0) 954 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 955 else 956 __kmp_parallel_dxo(>id, &cid, loc); 957 958 #if OMPT_SUPPORT && OMPT_OPTIONAL 959 OMPT_STORE_RETURN_ADDRESS(gtid); 960 if (ompt_enabled.ompt_callback_mutex_released) { 961 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 962 ompt_mutex_ordered, 963 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 964 ->t.t_ordered.dt.t_value, 965 OMPT_LOAD_RETURN_ADDRESS(gtid)); 966 } 967 #endif 968 } 969 970 #if KMP_USE_DYNAMIC_LOCK 971 972 static __forceinline void 973 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 974 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 975 // Pointer to the allocated indirect lock is written to crit, while indexing 976 // is ignored. 977 void *idx; 978 kmp_indirect_lock_t **lck; 979 lck = (kmp_indirect_lock_t **)crit; 980 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 981 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 982 KMP_SET_I_LOCK_LOCATION(ilk, loc); 983 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 984 KA_TRACE(20, 985 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 986 #if USE_ITT_BUILD 987 __kmp_itt_critical_creating(ilk->lock, loc); 988 #endif 989 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 990 if (status == 0) { 991 #if USE_ITT_BUILD 992 __kmp_itt_critical_destroyed(ilk->lock); 993 #endif 994 // We don't really need to destroy the unclaimed lock here since it will be 995 // cleaned up at program exit. 996 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 997 } 998 KMP_DEBUG_ASSERT(*lck != NULL); 999 } 1000 1001 // Fast-path acquire tas lock 1002 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1003 { \ 1004 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1005 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1006 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1007 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1008 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1009 kmp_uint32 spins; \ 1010 KMP_FSYNC_PREPARE(l); \ 1011 KMP_INIT_YIELD(spins); \ 1012 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1013 do { \ 1014 if (TCR_4(__kmp_nth) > \ 1015 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1016 KMP_YIELD(TRUE); \ 1017 } else { \ 1018 KMP_YIELD_SPIN(spins); \ 1019 } \ 1020 __kmp_spin_backoff(&backoff); \ 1021 } while ( \ 1022 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1023 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1024 } \ 1025 KMP_FSYNC_ACQUIRED(l); \ 1026 } 1027 1028 // Fast-path test tas lock 1029 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1030 { \ 1031 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1032 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1033 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1034 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1035 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1036 } 1037 1038 // Fast-path release tas lock 1039 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1040 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1041 1042 #if KMP_USE_FUTEX 1043 1044 #include <sys/syscall.h> 1045 #include <unistd.h> 1046 #ifndef FUTEX_WAIT 1047 #define FUTEX_WAIT 0 1048 #endif 1049 #ifndef FUTEX_WAKE 1050 #define FUTEX_WAKE 1 1051 #endif 1052 1053 // Fast-path acquire futex lock 1054 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1055 { \ 1056 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1057 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1058 KMP_MB(); \ 1059 KMP_FSYNC_PREPARE(ftx); \ 1060 kmp_int32 poll_val; \ 1061 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1062 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1063 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1064 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1065 if (!cond) { \ 1066 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1067 poll_val | \ 1068 KMP_LOCK_BUSY(1, futex))) { \ 1069 continue; \ 1070 } \ 1071 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1072 } \ 1073 kmp_int32 rc; \ 1074 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1075 NULL, NULL, 0)) != 0) { \ 1076 continue; \ 1077 } \ 1078 gtid_code |= 1; \ 1079 } \ 1080 KMP_FSYNC_ACQUIRED(ftx); \ 1081 } 1082 1083 // Fast-path test futex lock 1084 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1085 { \ 1086 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1087 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1088 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1089 KMP_FSYNC_ACQUIRED(ftx); \ 1090 rc = TRUE; \ 1091 } else { \ 1092 rc = FALSE; \ 1093 } \ 1094 } 1095 1096 // Fast-path release futex lock 1097 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1098 { \ 1099 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1100 KMP_MB(); \ 1101 KMP_FSYNC_RELEASING(ftx); \ 1102 kmp_int32 poll_val = \ 1103 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1104 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1105 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1106 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1107 } \ 1108 KMP_MB(); \ 1109 KMP_YIELD_OVERSUB(); \ 1110 } 1111 1112 #endif // KMP_USE_FUTEX 1113 1114 #else // KMP_USE_DYNAMIC_LOCK 1115 1116 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1117 ident_t const *loc, 1118 kmp_int32 gtid) { 1119 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1120 1121 // Because of the double-check, the following load doesn't need to be volatile 1122 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1123 1124 if (lck == NULL) { 1125 void *idx; 1126 1127 // Allocate & initialize the lock. 1128 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1129 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1130 __kmp_init_user_lock_with_checks(lck); 1131 __kmp_set_user_lock_location(lck, loc); 1132 #if USE_ITT_BUILD 1133 __kmp_itt_critical_creating(lck); 1134 // __kmp_itt_critical_creating() should be called *before* the first usage 1135 // of underlying lock. It is the only place where we can guarantee it. There 1136 // are chances the lock will destroyed with no usage, but it is not a 1137 // problem, because this is not real event seen by user but rather setting 1138 // name for object (lock). See more details in kmp_itt.h. 1139 #endif /* USE_ITT_BUILD */ 1140 1141 // Use a cmpxchg instruction to slam the start of the critical section with 1142 // the lock pointer. If another thread beat us to it, deallocate the lock, 1143 // and use the lock that the other thread allocated. 1144 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1145 1146 if (status == 0) { 1147 // Deallocate the lock and reload the value. 1148 #if USE_ITT_BUILD 1149 __kmp_itt_critical_destroyed(lck); 1150 // Let ITT know the lock is destroyed and the same memory location may be reused 1151 // for another purpose. 1152 #endif /* USE_ITT_BUILD */ 1153 __kmp_destroy_user_lock_with_checks(lck); 1154 __kmp_user_lock_free(&idx, gtid, lck); 1155 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1156 KMP_DEBUG_ASSERT(lck != NULL); 1157 } 1158 } 1159 return lck; 1160 } 1161 1162 #endif // KMP_USE_DYNAMIC_LOCK 1163 1164 /*! 1165 @ingroup WORK_SHARING 1166 @param loc source location information. 1167 @param global_tid global thread number . 1168 @param crit identity of the critical section. This could be a pointer to a lock 1169 associated with the critical section, or some other suitably unique value. 1170 1171 Enter code protected by a `critical` construct. 1172 This function blocks until the executing thread can enter the critical section. 1173 */ 1174 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1175 kmp_critical_name *crit) { 1176 #if KMP_USE_DYNAMIC_LOCK 1177 #if OMPT_SUPPORT && OMPT_OPTIONAL 1178 OMPT_STORE_RETURN_ADDRESS(global_tid); 1179 #endif // OMPT_SUPPORT 1180 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1181 #else 1182 KMP_COUNT_BLOCK(OMP_CRITICAL); 1183 #if OMPT_SUPPORT && OMPT_OPTIONAL 1184 ompt_state_t prev_state = ompt_state_undefined; 1185 ompt_thread_info_t ti; 1186 #endif 1187 kmp_user_lock_p lck; 1188 1189 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1190 1191 // TODO: add THR_OVHD_STATE 1192 1193 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1194 KMP_CHECK_USER_LOCK_INIT(); 1195 1196 if ((__kmp_user_lock_kind == lk_tas) && 1197 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1198 lck = (kmp_user_lock_p)crit; 1199 } 1200 #if KMP_USE_FUTEX 1201 else if ((__kmp_user_lock_kind == lk_futex) && 1202 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1203 lck = (kmp_user_lock_p)crit; 1204 } 1205 #endif 1206 else { // ticket, queuing or drdpa 1207 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1208 } 1209 1210 if (__kmp_env_consistency_check) 1211 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1212 1213 // since the critical directive binds to all threads, not just the current 1214 // team we have to check this even if we are in a serialized team. 1215 // also, even if we are the uber thread, we still have to conduct the lock, 1216 // as we have to contend with sibling threads. 1217 1218 #if USE_ITT_BUILD 1219 __kmp_itt_critical_acquiring(lck); 1220 #endif /* USE_ITT_BUILD */ 1221 #if OMPT_SUPPORT && OMPT_OPTIONAL 1222 OMPT_STORE_RETURN_ADDRESS(gtid); 1223 void *codeptr_ra = NULL; 1224 if (ompt_enabled.enabled) { 1225 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1226 /* OMPT state update */ 1227 prev_state = ti.state; 1228 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1229 ti.state = ompt_state_wait_critical; 1230 1231 /* OMPT event callback */ 1232 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1233 if (ompt_enabled.ompt_callback_mutex_acquire) { 1234 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1235 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1236 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1237 } 1238 } 1239 #endif 1240 // Value of 'crit' should be good for using as a critical_id of the critical 1241 // section directive. 1242 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1243 1244 #if USE_ITT_BUILD 1245 __kmp_itt_critical_acquired(lck); 1246 #endif /* USE_ITT_BUILD */ 1247 #if OMPT_SUPPORT && OMPT_OPTIONAL 1248 if (ompt_enabled.enabled) { 1249 /* OMPT state update */ 1250 ti.state = prev_state; 1251 ti.wait_id = 0; 1252 1253 /* OMPT event callback */ 1254 if (ompt_enabled.ompt_callback_mutex_acquired) { 1255 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1256 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1257 } 1258 } 1259 #endif 1260 KMP_POP_PARTITIONED_TIMER(); 1261 1262 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1263 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1264 #endif // KMP_USE_DYNAMIC_LOCK 1265 } 1266 1267 #if KMP_USE_DYNAMIC_LOCK 1268 1269 // Converts the given hint to an internal lock implementation 1270 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1271 #if KMP_USE_TSX 1272 #define KMP_TSX_LOCK(seq) lockseq_##seq 1273 #else 1274 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1275 #endif 1276 1277 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1278 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1279 #else 1280 #define KMP_CPUINFO_RTM 0 1281 #endif 1282 1283 // Hints that do not require further logic 1284 if (hint & kmp_lock_hint_hle) 1285 return KMP_TSX_LOCK(hle); 1286 if (hint & kmp_lock_hint_rtm) 1287 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1288 if (hint & kmp_lock_hint_adaptive) 1289 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1290 1291 // Rule out conflicting hints first by returning the default lock 1292 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1293 return __kmp_user_lock_seq; 1294 if ((hint & omp_lock_hint_speculative) && 1295 (hint & omp_lock_hint_nonspeculative)) 1296 return __kmp_user_lock_seq; 1297 1298 // Do not even consider speculation when it appears to be contended 1299 if (hint & omp_lock_hint_contended) 1300 return lockseq_queuing; 1301 1302 // Uncontended lock without speculation 1303 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1304 return lockseq_tas; 1305 1306 // HLE lock for speculation 1307 if (hint & omp_lock_hint_speculative) 1308 return KMP_TSX_LOCK(hle); 1309 1310 return __kmp_user_lock_seq; 1311 } 1312 1313 #if OMPT_SUPPORT && OMPT_OPTIONAL 1314 #if KMP_USE_DYNAMIC_LOCK 1315 static kmp_mutex_impl_t 1316 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1317 if (user_lock) { 1318 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1319 case 0: 1320 break; 1321 #if KMP_USE_FUTEX 1322 case locktag_futex: 1323 return kmp_mutex_impl_queuing; 1324 #endif 1325 case locktag_tas: 1326 return kmp_mutex_impl_spin; 1327 #if KMP_USE_TSX 1328 case locktag_hle: 1329 return kmp_mutex_impl_speculative; 1330 #endif 1331 default: 1332 return kmp_mutex_impl_none; 1333 } 1334 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1335 } 1336 KMP_ASSERT(ilock); 1337 switch (ilock->type) { 1338 #if KMP_USE_TSX 1339 case locktag_adaptive: 1340 case locktag_rtm: 1341 return kmp_mutex_impl_speculative; 1342 #endif 1343 case locktag_nested_tas: 1344 return kmp_mutex_impl_spin; 1345 #if KMP_USE_FUTEX 1346 case locktag_nested_futex: 1347 #endif 1348 case locktag_ticket: 1349 case locktag_queuing: 1350 case locktag_drdpa: 1351 case locktag_nested_ticket: 1352 case locktag_nested_queuing: 1353 case locktag_nested_drdpa: 1354 return kmp_mutex_impl_queuing; 1355 default: 1356 return kmp_mutex_impl_none; 1357 } 1358 } 1359 #else 1360 // For locks without dynamic binding 1361 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1362 switch (__kmp_user_lock_kind) { 1363 case lk_tas: 1364 return kmp_mutex_impl_spin; 1365 #if KMP_USE_FUTEX 1366 case lk_futex: 1367 #endif 1368 case lk_ticket: 1369 case lk_queuing: 1370 case lk_drdpa: 1371 return kmp_mutex_impl_queuing; 1372 #if KMP_USE_TSX 1373 case lk_hle: 1374 case lk_rtm: 1375 case lk_adaptive: 1376 return kmp_mutex_impl_speculative; 1377 #endif 1378 default: 1379 return kmp_mutex_impl_none; 1380 } 1381 } 1382 #endif // KMP_USE_DYNAMIC_LOCK 1383 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1384 1385 /*! 1386 @ingroup WORK_SHARING 1387 @param loc source location information. 1388 @param global_tid global thread number. 1389 @param crit identity of the critical section. This could be a pointer to a lock 1390 associated with the critical section, or some other suitably unique value. 1391 @param hint the lock hint. 1392 1393 Enter code protected by a `critical` construct with a hint. The hint value is 1394 used to suggest a lock implementation. This function blocks until the executing 1395 thread can enter the critical section unless the hint suggests use of 1396 speculative execution and the hardware supports it. 1397 */ 1398 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1399 kmp_critical_name *crit, uint32_t hint) { 1400 KMP_COUNT_BLOCK(OMP_CRITICAL); 1401 kmp_user_lock_p lck; 1402 #if OMPT_SUPPORT && OMPT_OPTIONAL 1403 ompt_state_t prev_state = ompt_state_undefined; 1404 ompt_thread_info_t ti; 1405 // This is the case, if called from __kmpc_critical: 1406 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1407 if (!codeptr) 1408 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1409 #endif 1410 1411 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1412 1413 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1414 // Check if it is initialized. 1415 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1416 if (*lk == 0) { 1417 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1418 if (KMP_IS_D_LOCK(lckseq)) { 1419 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1420 KMP_GET_D_TAG(lckseq)); 1421 } else { 1422 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1423 } 1424 } 1425 // Branch for accessing the actual lock object and set operation. This 1426 // branching is inevitable since this lock initialization does not follow the 1427 // normal dispatch path (lock table is not used). 1428 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1429 lck = (kmp_user_lock_p)lk; 1430 if (__kmp_env_consistency_check) { 1431 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1432 __kmp_map_hint_to_lock(hint)); 1433 } 1434 #if USE_ITT_BUILD 1435 __kmp_itt_critical_acquiring(lck); 1436 #endif 1437 #if OMPT_SUPPORT && OMPT_OPTIONAL 1438 if (ompt_enabled.enabled) { 1439 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1440 /* OMPT state update */ 1441 prev_state = ti.state; 1442 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1443 ti.state = ompt_state_wait_critical; 1444 1445 /* OMPT event callback */ 1446 if (ompt_enabled.ompt_callback_mutex_acquire) { 1447 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1448 ompt_mutex_critical, (unsigned int)hint, 1449 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1450 codeptr); 1451 } 1452 } 1453 #endif 1454 #if KMP_USE_INLINED_TAS 1455 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1456 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1457 } else 1458 #elif KMP_USE_INLINED_FUTEX 1459 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1460 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1461 } else 1462 #endif 1463 { 1464 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1465 } 1466 } else { 1467 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1468 lck = ilk->lock; 1469 if (__kmp_env_consistency_check) { 1470 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1471 __kmp_map_hint_to_lock(hint)); 1472 } 1473 #if USE_ITT_BUILD 1474 __kmp_itt_critical_acquiring(lck); 1475 #endif 1476 #if OMPT_SUPPORT && OMPT_OPTIONAL 1477 if (ompt_enabled.enabled) { 1478 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1479 /* OMPT state update */ 1480 prev_state = ti.state; 1481 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1482 ti.state = ompt_state_wait_critical; 1483 1484 /* OMPT event callback */ 1485 if (ompt_enabled.ompt_callback_mutex_acquire) { 1486 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1487 ompt_mutex_critical, (unsigned int)hint, 1488 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1489 codeptr); 1490 } 1491 } 1492 #endif 1493 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1494 } 1495 KMP_POP_PARTITIONED_TIMER(); 1496 1497 #if USE_ITT_BUILD 1498 __kmp_itt_critical_acquired(lck); 1499 #endif /* USE_ITT_BUILD */ 1500 #if OMPT_SUPPORT && OMPT_OPTIONAL 1501 if (ompt_enabled.enabled) { 1502 /* OMPT state update */ 1503 ti.state = prev_state; 1504 ti.wait_id = 0; 1505 1506 /* OMPT event callback */ 1507 if (ompt_enabled.ompt_callback_mutex_acquired) { 1508 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1509 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1510 } 1511 } 1512 #endif 1513 1514 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1515 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1516 } // __kmpc_critical_with_hint 1517 1518 #endif // KMP_USE_DYNAMIC_LOCK 1519 1520 /*! 1521 @ingroup WORK_SHARING 1522 @param loc source location information. 1523 @param global_tid global thread number . 1524 @param crit identity of the critical section. This could be a pointer to a lock 1525 associated with the critical section, or some other suitably unique value. 1526 1527 Leave a critical section, releasing any lock that was held during its execution. 1528 */ 1529 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1530 kmp_critical_name *crit) { 1531 kmp_user_lock_p lck; 1532 1533 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1534 1535 #if KMP_USE_DYNAMIC_LOCK 1536 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1537 lck = (kmp_user_lock_p)crit; 1538 KMP_ASSERT(lck != NULL); 1539 if (__kmp_env_consistency_check) { 1540 __kmp_pop_sync(global_tid, ct_critical, loc); 1541 } 1542 #if USE_ITT_BUILD 1543 __kmp_itt_critical_releasing(lck); 1544 #endif 1545 #if KMP_USE_INLINED_TAS 1546 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1547 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1548 } else 1549 #elif KMP_USE_INLINED_FUTEX 1550 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1551 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1552 } else 1553 #endif 1554 { 1555 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1556 } 1557 } else { 1558 kmp_indirect_lock_t *ilk = 1559 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1560 KMP_ASSERT(ilk != NULL); 1561 lck = ilk->lock; 1562 if (__kmp_env_consistency_check) { 1563 __kmp_pop_sync(global_tid, ct_critical, loc); 1564 } 1565 #if USE_ITT_BUILD 1566 __kmp_itt_critical_releasing(lck); 1567 #endif 1568 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1569 } 1570 1571 #else // KMP_USE_DYNAMIC_LOCK 1572 1573 if ((__kmp_user_lock_kind == lk_tas) && 1574 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1575 lck = (kmp_user_lock_p)crit; 1576 } 1577 #if KMP_USE_FUTEX 1578 else if ((__kmp_user_lock_kind == lk_futex) && 1579 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1580 lck = (kmp_user_lock_p)crit; 1581 } 1582 #endif 1583 else { // ticket, queuing or drdpa 1584 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1585 } 1586 1587 KMP_ASSERT(lck != NULL); 1588 1589 if (__kmp_env_consistency_check) 1590 __kmp_pop_sync(global_tid, ct_critical, loc); 1591 1592 #if USE_ITT_BUILD 1593 __kmp_itt_critical_releasing(lck); 1594 #endif /* USE_ITT_BUILD */ 1595 // Value of 'crit' should be good for using as a critical_id of the critical 1596 // section directive. 1597 __kmp_release_user_lock_with_checks(lck, global_tid); 1598 1599 #endif // KMP_USE_DYNAMIC_LOCK 1600 1601 #if OMPT_SUPPORT && OMPT_OPTIONAL 1602 /* OMPT release event triggers after lock is released; place here to trigger 1603 * for all #if branches */ 1604 OMPT_STORE_RETURN_ADDRESS(global_tid); 1605 if (ompt_enabled.ompt_callback_mutex_released) { 1606 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1607 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1608 OMPT_LOAD_RETURN_ADDRESS(0)); 1609 } 1610 #endif 1611 1612 KMP_POP_PARTITIONED_TIMER(); 1613 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1614 } 1615 1616 /*! 1617 @ingroup SYNCHRONIZATION 1618 @param loc source location information 1619 @param global_tid thread id. 1620 @return one if the thread should execute the master block, zero otherwise 1621 1622 Start execution of a combined barrier and master. The barrier is executed inside 1623 this function. 1624 */ 1625 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1626 int status; 1627 1628 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1629 1630 if (!TCR_4(__kmp_init_parallel)) 1631 __kmp_parallel_initialize(); 1632 1633 #if OMP_50_ENABLED 1634 __kmp_resume_if_soft_paused(); 1635 #endif 1636 1637 if (__kmp_env_consistency_check) 1638 __kmp_check_barrier(global_tid, ct_barrier, loc); 1639 1640 #if OMPT_SUPPORT 1641 ompt_frame_t *ompt_frame; 1642 if (ompt_enabled.enabled) { 1643 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1644 if (ompt_frame->enter_frame.ptr == NULL) 1645 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1646 OMPT_STORE_RETURN_ADDRESS(global_tid); 1647 } 1648 #endif 1649 #if USE_ITT_NOTIFY 1650 __kmp_threads[global_tid]->th.th_ident = loc; 1651 #endif 1652 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1653 #if OMPT_SUPPORT && OMPT_OPTIONAL 1654 if (ompt_enabled.enabled) { 1655 ompt_frame->enter_frame = ompt_data_none; 1656 } 1657 #endif 1658 1659 return (status != 0) ? 0 : 1; 1660 } 1661 1662 /*! 1663 @ingroup SYNCHRONIZATION 1664 @param loc source location information 1665 @param global_tid thread id. 1666 1667 Complete the execution of a combined barrier and master. This function should 1668 only be called at the completion of the <tt>master</tt> code. Other threads will 1669 still be waiting at the barrier and this call releases them. 1670 */ 1671 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1672 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1673 1674 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1675 } 1676 1677 /*! 1678 @ingroup SYNCHRONIZATION 1679 @param loc source location information 1680 @param global_tid thread id. 1681 @return one if the thread should execute the master block, zero otherwise 1682 1683 Start execution of a combined barrier and master(nowait) construct. 1684 The barrier is executed inside this function. 1685 There is no equivalent "end" function, since the 1686 */ 1687 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1688 kmp_int32 ret; 1689 1690 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1691 1692 if (!TCR_4(__kmp_init_parallel)) 1693 __kmp_parallel_initialize(); 1694 1695 #if OMP_50_ENABLED 1696 __kmp_resume_if_soft_paused(); 1697 #endif 1698 1699 if (__kmp_env_consistency_check) { 1700 if (loc == 0) { 1701 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1702 } 1703 __kmp_check_barrier(global_tid, ct_barrier, loc); 1704 } 1705 1706 #if OMPT_SUPPORT 1707 ompt_frame_t *ompt_frame; 1708 if (ompt_enabled.enabled) { 1709 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1710 if (ompt_frame->enter_frame.ptr == NULL) 1711 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1712 OMPT_STORE_RETURN_ADDRESS(global_tid); 1713 } 1714 #endif 1715 #if USE_ITT_NOTIFY 1716 __kmp_threads[global_tid]->th.th_ident = loc; 1717 #endif 1718 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1719 #if OMPT_SUPPORT && OMPT_OPTIONAL 1720 if (ompt_enabled.enabled) { 1721 ompt_frame->enter_frame = ompt_data_none; 1722 } 1723 #endif 1724 1725 ret = __kmpc_master(loc, global_tid); 1726 1727 if (__kmp_env_consistency_check) { 1728 /* there's no __kmpc_end_master called; so the (stats) */ 1729 /* actions of __kmpc_end_master are done here */ 1730 1731 if (global_tid < 0) { 1732 KMP_WARNING(ThreadIdentInvalid); 1733 } 1734 if (ret) { 1735 /* only one thread should do the pop since only */ 1736 /* one did the push (see __kmpc_master()) */ 1737 1738 __kmp_pop_sync(global_tid, ct_master, loc); 1739 } 1740 } 1741 1742 return (ret); 1743 } 1744 1745 /* The BARRIER for a SINGLE process section is always explicit */ 1746 /*! 1747 @ingroup WORK_SHARING 1748 @param loc source location information 1749 @param global_tid global thread number 1750 @return One if this thread should execute the single construct, zero otherwise. 1751 1752 Test whether to execute a <tt>single</tt> construct. 1753 There are no implicit barriers in the two "single" calls, rather the compiler 1754 should introduce an explicit barrier if it is required. 1755 */ 1756 1757 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1758 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1759 1760 if (rc) { 1761 // We are going to execute the single statement, so we should count it. 1762 KMP_COUNT_BLOCK(OMP_SINGLE); 1763 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1764 } 1765 1766 #if OMPT_SUPPORT && OMPT_OPTIONAL 1767 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1768 kmp_team_t *team = this_thr->th.th_team; 1769 int tid = __kmp_tid_from_gtid(global_tid); 1770 1771 if (ompt_enabled.enabled) { 1772 if (rc) { 1773 if (ompt_enabled.ompt_callback_work) { 1774 ompt_callbacks.ompt_callback(ompt_callback_work)( 1775 ompt_work_single_executor, ompt_scope_begin, 1776 &(team->t.ompt_team_info.parallel_data), 1777 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1778 1, OMPT_GET_RETURN_ADDRESS(0)); 1779 } 1780 } else { 1781 if (ompt_enabled.ompt_callback_work) { 1782 ompt_callbacks.ompt_callback(ompt_callback_work)( 1783 ompt_work_single_other, ompt_scope_begin, 1784 &(team->t.ompt_team_info.parallel_data), 1785 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1786 1, OMPT_GET_RETURN_ADDRESS(0)); 1787 ompt_callbacks.ompt_callback(ompt_callback_work)( 1788 ompt_work_single_other, ompt_scope_end, 1789 &(team->t.ompt_team_info.parallel_data), 1790 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1791 1, OMPT_GET_RETURN_ADDRESS(0)); 1792 } 1793 } 1794 } 1795 #endif 1796 1797 return rc; 1798 } 1799 1800 /*! 1801 @ingroup WORK_SHARING 1802 @param loc source location information 1803 @param global_tid global thread number 1804 1805 Mark the end of a <tt>single</tt> construct. This function should 1806 only be called by the thread that executed the block of code protected 1807 by the `single` construct. 1808 */ 1809 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1810 __kmp_exit_single(global_tid); 1811 KMP_POP_PARTITIONED_TIMER(); 1812 1813 #if OMPT_SUPPORT && OMPT_OPTIONAL 1814 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1815 kmp_team_t *team = this_thr->th.th_team; 1816 int tid = __kmp_tid_from_gtid(global_tid); 1817 1818 if (ompt_enabled.ompt_callback_work) { 1819 ompt_callbacks.ompt_callback(ompt_callback_work)( 1820 ompt_work_single_executor, ompt_scope_end, 1821 &(team->t.ompt_team_info.parallel_data), 1822 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1823 OMPT_GET_RETURN_ADDRESS(0)); 1824 } 1825 #endif 1826 } 1827 1828 /*! 1829 @ingroup WORK_SHARING 1830 @param loc Source location 1831 @param global_tid Global thread id 1832 1833 Mark the end of a statically scheduled loop. 1834 */ 1835 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1836 KMP_POP_PARTITIONED_TIMER(); 1837 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1838 1839 #if OMPT_SUPPORT && OMPT_OPTIONAL 1840 if (ompt_enabled.ompt_callback_work) { 1841 ompt_work_t ompt_work_type = ompt_work_loop; 1842 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1843 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1844 // Determine workshare type 1845 if (loc != NULL) { 1846 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1847 ompt_work_type = ompt_work_loop; 1848 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1849 ompt_work_type = ompt_work_sections; 1850 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1851 ompt_work_type = ompt_work_distribute; 1852 } else { 1853 // use default set above. 1854 // a warning about this case is provided in __kmpc_for_static_init 1855 } 1856 KMP_DEBUG_ASSERT(ompt_work_type); 1857 } 1858 ompt_callbacks.ompt_callback(ompt_callback_work)( 1859 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1860 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1861 } 1862 #endif 1863 if (__kmp_env_consistency_check) 1864 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1865 } 1866 1867 // User routines which take C-style arguments (call by value) 1868 // different from the Fortran equivalent routines 1869 1870 void ompc_set_num_threads(int arg) { 1871 // !!!!! TODO: check the per-task binding 1872 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1873 } 1874 1875 void ompc_set_dynamic(int flag) { 1876 kmp_info_t *thread; 1877 1878 /* For the thread-private implementation of the internal controls */ 1879 thread = __kmp_entry_thread(); 1880 1881 __kmp_save_internal_controls(thread); 1882 1883 set__dynamic(thread, flag ? TRUE : FALSE); 1884 } 1885 1886 void ompc_set_nested(int flag) { 1887 kmp_info_t *thread; 1888 1889 /* For the thread-private internal controls implementation */ 1890 thread = __kmp_entry_thread(); 1891 1892 __kmp_save_internal_controls(thread); 1893 1894 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1895 } 1896 1897 void ompc_set_max_active_levels(int max_active_levels) { 1898 /* TO DO */ 1899 /* we want per-task implementation of this internal control */ 1900 1901 /* For the per-thread internal controls implementation */ 1902 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1903 } 1904 1905 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1906 // !!!!! TODO: check the per-task binding 1907 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1908 } 1909 1910 int ompc_get_ancestor_thread_num(int level) { 1911 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1912 } 1913 1914 int ompc_get_team_size(int level) { 1915 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1916 } 1917 1918 #if OMP_50_ENABLED 1919 /* OpenMP 5.0 Affinity Format API */ 1920 1921 void ompc_set_affinity_format(char const *format) { 1922 if (!__kmp_init_serial) { 1923 __kmp_serial_initialize(); 1924 } 1925 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1926 format, KMP_STRLEN(format) + 1); 1927 } 1928 1929 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1930 size_t format_size; 1931 if (!__kmp_init_serial) { 1932 __kmp_serial_initialize(); 1933 } 1934 format_size = KMP_STRLEN(__kmp_affinity_format); 1935 if (buffer && size) { 1936 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1937 format_size + 1); 1938 } 1939 return format_size; 1940 } 1941 1942 void ompc_display_affinity(char const *format) { 1943 int gtid; 1944 if (!TCR_4(__kmp_init_middle)) { 1945 __kmp_middle_initialize(); 1946 } 1947 gtid = __kmp_get_gtid(); 1948 __kmp_aux_display_affinity(gtid, format); 1949 } 1950 1951 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1952 char const *format) { 1953 int gtid; 1954 size_t num_required; 1955 kmp_str_buf_t capture_buf; 1956 if (!TCR_4(__kmp_init_middle)) { 1957 __kmp_middle_initialize(); 1958 } 1959 gtid = __kmp_get_gtid(); 1960 __kmp_str_buf_init(&capture_buf); 1961 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1962 if (buffer && buf_size) { 1963 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1964 capture_buf.used + 1); 1965 } 1966 __kmp_str_buf_free(&capture_buf); 1967 return num_required; 1968 } 1969 #endif /* OMP_50_ENABLED */ 1970 1971 void kmpc_set_stacksize(int arg) { 1972 // __kmp_aux_set_stacksize initializes the library if needed 1973 __kmp_aux_set_stacksize(arg); 1974 } 1975 1976 void kmpc_set_stacksize_s(size_t arg) { 1977 // __kmp_aux_set_stacksize initializes the library if needed 1978 __kmp_aux_set_stacksize(arg); 1979 } 1980 1981 void kmpc_set_blocktime(int arg) { 1982 int gtid, tid; 1983 kmp_info_t *thread; 1984 1985 gtid = __kmp_entry_gtid(); 1986 tid = __kmp_tid_from_gtid(gtid); 1987 thread = __kmp_thread_from_gtid(gtid); 1988 1989 __kmp_aux_set_blocktime(arg, thread, tid); 1990 } 1991 1992 void kmpc_set_library(int arg) { 1993 // __kmp_user_set_library initializes the library if needed 1994 __kmp_user_set_library((enum library_type)arg); 1995 } 1996 1997 void kmpc_set_defaults(char const *str) { 1998 // __kmp_aux_set_defaults initializes the library if needed 1999 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2000 } 2001 2002 void kmpc_set_disp_num_buffers(int arg) { 2003 // ignore after initialization because some teams have already 2004 // allocated dispatch buffers 2005 if (__kmp_init_serial == 0 && arg > 0) 2006 __kmp_dispatch_num_buffers = arg; 2007 } 2008 2009 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2010 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2011 return -1; 2012 #else 2013 if (!TCR_4(__kmp_init_middle)) { 2014 __kmp_middle_initialize(); 2015 } 2016 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2017 #endif 2018 } 2019 2020 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2021 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2022 return -1; 2023 #else 2024 if (!TCR_4(__kmp_init_middle)) { 2025 __kmp_middle_initialize(); 2026 } 2027 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2028 #endif 2029 } 2030 2031 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2032 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2033 return -1; 2034 #else 2035 if (!TCR_4(__kmp_init_middle)) { 2036 __kmp_middle_initialize(); 2037 } 2038 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2039 #endif 2040 } 2041 2042 /* -------------------------------------------------------------------------- */ 2043 /*! 2044 @ingroup THREADPRIVATE 2045 @param loc source location information 2046 @param gtid global thread number 2047 @param cpy_size size of the cpy_data buffer 2048 @param cpy_data pointer to data to be copied 2049 @param cpy_func helper function to call for copying data 2050 @param didit flag variable: 1=single thread; 0=not single thread 2051 2052 __kmpc_copyprivate implements the interface for the private data broadcast 2053 needed for the copyprivate clause associated with a single region in an 2054 OpenMP<sup>*</sup> program (both C and Fortran). 2055 All threads participating in the parallel region call this routine. 2056 One of the threads (called the single thread) should have the <tt>didit</tt> 2057 variable set to 1 and all other threads should have that variable set to 0. 2058 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2059 2060 The OpenMP specification forbids the use of nowait on the single region when a 2061 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2062 barrier internally to avoid race conditions, so the code generation for the 2063 single region should avoid generating a barrier after the call to @ref 2064 __kmpc_copyprivate. 2065 2066 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2067 The <tt>loc</tt> parameter is a pointer to source location information. 2068 2069 Internal implementation: The single thread will first copy its descriptor 2070 address (cpy_data) to a team-private location, then the other threads will each 2071 call the function pointed to by the parameter cpy_func, which carries out the 2072 copy by copying the data using the cpy_data buffer. 2073 2074 The cpy_func routine used for the copy and the contents of the data area defined 2075 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2076 to be done. For instance, the cpy_data buffer can hold the actual data to be 2077 copied or it may hold a list of pointers to the data. The cpy_func routine must 2078 interpret the cpy_data buffer appropriately. 2079 2080 The interface to cpy_func is as follows: 2081 @code 2082 void cpy_func( void *destination, void *source ) 2083 @endcode 2084 where void *destination is the cpy_data pointer for the thread being copied to 2085 and void *source is the cpy_data pointer for the thread being copied from. 2086 */ 2087 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2088 void *cpy_data, void (*cpy_func)(void *, void *), 2089 kmp_int32 didit) { 2090 void **data_ptr; 2091 2092 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2093 2094 KMP_MB(); 2095 2096 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2097 2098 if (__kmp_env_consistency_check) { 2099 if (loc == 0) { 2100 KMP_WARNING(ConstructIdentInvalid); 2101 } 2102 } 2103 2104 // ToDo: Optimize the following two barriers into some kind of split barrier 2105 2106 if (didit) 2107 *data_ptr = cpy_data; 2108 2109 #if OMPT_SUPPORT 2110 ompt_frame_t *ompt_frame; 2111 if (ompt_enabled.enabled) { 2112 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2113 if (ompt_frame->enter_frame.ptr == NULL) 2114 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2115 OMPT_STORE_RETURN_ADDRESS(gtid); 2116 } 2117 #endif 2118 /* This barrier is not a barrier region boundary */ 2119 #if USE_ITT_NOTIFY 2120 __kmp_threads[gtid]->th.th_ident = loc; 2121 #endif 2122 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2123 2124 if (!didit) 2125 (*cpy_func)(cpy_data, *data_ptr); 2126 2127 // Consider next barrier a user-visible barrier for barrier region boundaries 2128 // Nesting checks are already handled by the single construct checks 2129 2130 #if OMPT_SUPPORT 2131 if (ompt_enabled.enabled) { 2132 OMPT_STORE_RETURN_ADDRESS(gtid); 2133 } 2134 #endif 2135 #if USE_ITT_NOTIFY 2136 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2137 // tasks can overwrite the location) 2138 #endif 2139 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2140 #if OMPT_SUPPORT && OMPT_OPTIONAL 2141 if (ompt_enabled.enabled) { 2142 ompt_frame->enter_frame = ompt_data_none; 2143 } 2144 #endif 2145 } 2146 2147 /* -------------------------------------------------------------------------- */ 2148 2149 #define INIT_LOCK __kmp_init_user_lock_with_checks 2150 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2151 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2152 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2153 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2154 #define ACQUIRE_NESTED_LOCK_TIMED \ 2155 __kmp_acquire_nested_user_lock_with_checks_timed 2156 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2157 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2158 #define TEST_LOCK __kmp_test_user_lock_with_checks 2159 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2160 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2161 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2162 2163 // TODO: Make check abort messages use location info & pass it into 2164 // with_checks routines 2165 2166 #if KMP_USE_DYNAMIC_LOCK 2167 2168 // internal lock initializer 2169 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2170 kmp_dyna_lockseq_t seq) { 2171 if (KMP_IS_D_LOCK(seq)) { 2172 KMP_INIT_D_LOCK(lock, seq); 2173 #if USE_ITT_BUILD 2174 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2175 #endif 2176 } else { 2177 KMP_INIT_I_LOCK(lock, seq); 2178 #if USE_ITT_BUILD 2179 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2180 __kmp_itt_lock_creating(ilk->lock, loc); 2181 #endif 2182 } 2183 } 2184 2185 // internal nest lock initializer 2186 static __forceinline void 2187 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2188 kmp_dyna_lockseq_t seq) { 2189 #if KMP_USE_TSX 2190 // Don't have nested lock implementation for speculative locks 2191 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2192 seq = __kmp_user_lock_seq; 2193 #endif 2194 switch (seq) { 2195 case lockseq_tas: 2196 seq = lockseq_nested_tas; 2197 break; 2198 #if KMP_USE_FUTEX 2199 case lockseq_futex: 2200 seq = lockseq_nested_futex; 2201 break; 2202 #endif 2203 case lockseq_ticket: 2204 seq = lockseq_nested_ticket; 2205 break; 2206 case lockseq_queuing: 2207 seq = lockseq_nested_queuing; 2208 break; 2209 case lockseq_drdpa: 2210 seq = lockseq_nested_drdpa; 2211 break; 2212 default: 2213 seq = lockseq_nested_queuing; 2214 } 2215 KMP_INIT_I_LOCK(lock, seq); 2216 #if USE_ITT_BUILD 2217 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2218 __kmp_itt_lock_creating(ilk->lock, loc); 2219 #endif 2220 } 2221 2222 /* initialize the lock with a hint */ 2223 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2224 uintptr_t hint) { 2225 KMP_DEBUG_ASSERT(__kmp_init_serial); 2226 if (__kmp_env_consistency_check && user_lock == NULL) { 2227 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2228 } 2229 2230 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2231 2232 #if OMPT_SUPPORT && OMPT_OPTIONAL 2233 // This is the case, if called from omp_init_lock_with_hint: 2234 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2235 if (!codeptr) 2236 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2237 if (ompt_enabled.ompt_callback_lock_init) { 2238 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2239 ompt_mutex_lock, (omp_lock_hint_t)hint, 2240 __ompt_get_mutex_impl_type(user_lock), 2241 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2242 } 2243 #endif 2244 } 2245 2246 /* initialize the lock with a hint */ 2247 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2248 void **user_lock, uintptr_t hint) { 2249 KMP_DEBUG_ASSERT(__kmp_init_serial); 2250 if (__kmp_env_consistency_check && user_lock == NULL) { 2251 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2252 } 2253 2254 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2255 2256 #if OMPT_SUPPORT && OMPT_OPTIONAL 2257 // This is the case, if called from omp_init_lock_with_hint: 2258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2259 if (!codeptr) 2260 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2261 if (ompt_enabled.ompt_callback_lock_init) { 2262 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2263 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2264 __ompt_get_mutex_impl_type(user_lock), 2265 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2266 } 2267 #endif 2268 } 2269 2270 #endif // KMP_USE_DYNAMIC_LOCK 2271 2272 /* initialize the lock */ 2273 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2274 #if KMP_USE_DYNAMIC_LOCK 2275 2276 KMP_DEBUG_ASSERT(__kmp_init_serial); 2277 if (__kmp_env_consistency_check && user_lock == NULL) { 2278 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2279 } 2280 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2281 2282 #if OMPT_SUPPORT && OMPT_OPTIONAL 2283 // This is the case, if called from omp_init_lock_with_hint: 2284 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2285 if (!codeptr) 2286 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2287 if (ompt_enabled.ompt_callback_lock_init) { 2288 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2289 ompt_mutex_lock, omp_lock_hint_none, 2290 __ompt_get_mutex_impl_type(user_lock), 2291 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2292 } 2293 #endif 2294 2295 #else // KMP_USE_DYNAMIC_LOCK 2296 2297 static char const *const func = "omp_init_lock"; 2298 kmp_user_lock_p lck; 2299 KMP_DEBUG_ASSERT(__kmp_init_serial); 2300 2301 if (__kmp_env_consistency_check) { 2302 if (user_lock == NULL) { 2303 KMP_FATAL(LockIsUninitialized, func); 2304 } 2305 } 2306 2307 KMP_CHECK_USER_LOCK_INIT(); 2308 2309 if ((__kmp_user_lock_kind == lk_tas) && 2310 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2311 lck = (kmp_user_lock_p)user_lock; 2312 } 2313 #if KMP_USE_FUTEX 2314 else if ((__kmp_user_lock_kind == lk_futex) && 2315 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2316 lck = (kmp_user_lock_p)user_lock; 2317 } 2318 #endif 2319 else { 2320 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2321 } 2322 INIT_LOCK(lck); 2323 __kmp_set_user_lock_location(lck, loc); 2324 2325 #if OMPT_SUPPORT && OMPT_OPTIONAL 2326 // This is the case, if called from omp_init_lock_with_hint: 2327 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2328 if (!codeptr) 2329 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2330 if (ompt_enabled.ompt_callback_lock_init) { 2331 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2332 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2333 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2334 } 2335 #endif 2336 2337 #if USE_ITT_BUILD 2338 __kmp_itt_lock_creating(lck); 2339 #endif /* USE_ITT_BUILD */ 2340 2341 #endif // KMP_USE_DYNAMIC_LOCK 2342 } // __kmpc_init_lock 2343 2344 /* initialize the lock */ 2345 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2346 #if KMP_USE_DYNAMIC_LOCK 2347 2348 KMP_DEBUG_ASSERT(__kmp_init_serial); 2349 if (__kmp_env_consistency_check && user_lock == NULL) { 2350 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2351 } 2352 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2353 2354 #if OMPT_SUPPORT && OMPT_OPTIONAL 2355 // This is the case, if called from omp_init_lock_with_hint: 2356 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2357 if (!codeptr) 2358 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2359 if (ompt_enabled.ompt_callback_lock_init) { 2360 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2361 ompt_mutex_nest_lock, omp_lock_hint_none, 2362 __ompt_get_mutex_impl_type(user_lock), 2363 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2364 } 2365 #endif 2366 2367 #else // KMP_USE_DYNAMIC_LOCK 2368 2369 static char const *const func = "omp_init_nest_lock"; 2370 kmp_user_lock_p lck; 2371 KMP_DEBUG_ASSERT(__kmp_init_serial); 2372 2373 if (__kmp_env_consistency_check) { 2374 if (user_lock == NULL) { 2375 KMP_FATAL(LockIsUninitialized, func); 2376 } 2377 } 2378 2379 KMP_CHECK_USER_LOCK_INIT(); 2380 2381 if ((__kmp_user_lock_kind == lk_tas) && 2382 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2383 OMP_NEST_LOCK_T_SIZE)) { 2384 lck = (kmp_user_lock_p)user_lock; 2385 } 2386 #if KMP_USE_FUTEX 2387 else if ((__kmp_user_lock_kind == lk_futex) && 2388 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2389 OMP_NEST_LOCK_T_SIZE)) { 2390 lck = (kmp_user_lock_p)user_lock; 2391 } 2392 #endif 2393 else { 2394 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2395 } 2396 2397 INIT_NESTED_LOCK(lck); 2398 __kmp_set_user_lock_location(lck, loc); 2399 2400 #if OMPT_SUPPORT && OMPT_OPTIONAL 2401 // This is the case, if called from omp_init_lock_with_hint: 2402 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2403 if (!codeptr) 2404 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2405 if (ompt_enabled.ompt_callback_lock_init) { 2406 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2407 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2408 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2409 } 2410 #endif 2411 2412 #if USE_ITT_BUILD 2413 __kmp_itt_lock_creating(lck); 2414 #endif /* USE_ITT_BUILD */ 2415 2416 #endif // KMP_USE_DYNAMIC_LOCK 2417 } // __kmpc_init_nest_lock 2418 2419 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2420 #if KMP_USE_DYNAMIC_LOCK 2421 2422 #if USE_ITT_BUILD 2423 kmp_user_lock_p lck; 2424 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2425 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2426 } else { 2427 lck = (kmp_user_lock_p)user_lock; 2428 } 2429 __kmp_itt_lock_destroyed(lck); 2430 #endif 2431 #if OMPT_SUPPORT && OMPT_OPTIONAL 2432 // This is the case, if called from omp_init_lock_with_hint: 2433 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2434 if (!codeptr) 2435 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2436 if (ompt_enabled.ompt_callback_lock_destroy) { 2437 kmp_user_lock_p lck; 2438 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2439 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2440 } else { 2441 lck = (kmp_user_lock_p)user_lock; 2442 } 2443 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2444 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2445 } 2446 #endif 2447 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2448 #else 2449 kmp_user_lock_p lck; 2450 2451 if ((__kmp_user_lock_kind == lk_tas) && 2452 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2453 lck = (kmp_user_lock_p)user_lock; 2454 } 2455 #if KMP_USE_FUTEX 2456 else if ((__kmp_user_lock_kind == lk_futex) && 2457 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2458 lck = (kmp_user_lock_p)user_lock; 2459 } 2460 #endif 2461 else { 2462 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2463 } 2464 2465 #if OMPT_SUPPORT && OMPT_OPTIONAL 2466 // This is the case, if called from omp_init_lock_with_hint: 2467 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2468 if (!codeptr) 2469 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2470 if (ompt_enabled.ompt_callback_lock_destroy) { 2471 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2472 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2473 } 2474 #endif 2475 2476 #if USE_ITT_BUILD 2477 __kmp_itt_lock_destroyed(lck); 2478 #endif /* USE_ITT_BUILD */ 2479 DESTROY_LOCK(lck); 2480 2481 if ((__kmp_user_lock_kind == lk_tas) && 2482 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2483 ; 2484 } 2485 #if KMP_USE_FUTEX 2486 else if ((__kmp_user_lock_kind == lk_futex) && 2487 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2488 ; 2489 } 2490 #endif 2491 else { 2492 __kmp_user_lock_free(user_lock, gtid, lck); 2493 } 2494 #endif // KMP_USE_DYNAMIC_LOCK 2495 } // __kmpc_destroy_lock 2496 2497 /* destroy the lock */ 2498 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2499 #if KMP_USE_DYNAMIC_LOCK 2500 2501 #if USE_ITT_BUILD 2502 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2503 __kmp_itt_lock_destroyed(ilk->lock); 2504 #endif 2505 #if OMPT_SUPPORT && OMPT_OPTIONAL 2506 // This is the case, if called from omp_init_lock_with_hint: 2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2508 if (!codeptr) 2509 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2510 if (ompt_enabled.ompt_callback_lock_destroy) { 2511 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2512 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2513 } 2514 #endif 2515 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2516 2517 #else // KMP_USE_DYNAMIC_LOCK 2518 2519 kmp_user_lock_p lck; 2520 2521 if ((__kmp_user_lock_kind == lk_tas) && 2522 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2523 OMP_NEST_LOCK_T_SIZE)) { 2524 lck = (kmp_user_lock_p)user_lock; 2525 } 2526 #if KMP_USE_FUTEX 2527 else if ((__kmp_user_lock_kind == lk_futex) && 2528 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2529 OMP_NEST_LOCK_T_SIZE)) { 2530 lck = (kmp_user_lock_p)user_lock; 2531 } 2532 #endif 2533 else { 2534 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2535 } 2536 2537 #if OMPT_SUPPORT && OMPT_OPTIONAL 2538 // This is the case, if called from omp_init_lock_with_hint: 2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2540 if (!codeptr) 2541 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2542 if (ompt_enabled.ompt_callback_lock_destroy) { 2543 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2544 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2545 } 2546 #endif 2547 2548 #if USE_ITT_BUILD 2549 __kmp_itt_lock_destroyed(lck); 2550 #endif /* USE_ITT_BUILD */ 2551 2552 DESTROY_NESTED_LOCK(lck); 2553 2554 if ((__kmp_user_lock_kind == lk_tas) && 2555 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2556 OMP_NEST_LOCK_T_SIZE)) { 2557 ; 2558 } 2559 #if KMP_USE_FUTEX 2560 else if ((__kmp_user_lock_kind == lk_futex) && 2561 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2562 OMP_NEST_LOCK_T_SIZE)) { 2563 ; 2564 } 2565 #endif 2566 else { 2567 __kmp_user_lock_free(user_lock, gtid, lck); 2568 } 2569 #endif // KMP_USE_DYNAMIC_LOCK 2570 } // __kmpc_destroy_nest_lock 2571 2572 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2573 KMP_COUNT_BLOCK(OMP_set_lock); 2574 #if KMP_USE_DYNAMIC_LOCK 2575 int tag = KMP_EXTRACT_D_TAG(user_lock); 2576 #if USE_ITT_BUILD 2577 __kmp_itt_lock_acquiring( 2578 (kmp_user_lock_p) 2579 user_lock); // itt function will get to the right lock object. 2580 #endif 2581 #if OMPT_SUPPORT && OMPT_OPTIONAL 2582 // This is the case, if called from omp_init_lock_with_hint: 2583 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2584 if (!codeptr) 2585 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2586 if (ompt_enabled.ompt_callback_mutex_acquire) { 2587 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2588 ompt_mutex_lock, omp_lock_hint_none, 2589 __ompt_get_mutex_impl_type(user_lock), 2590 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2591 } 2592 #endif 2593 #if KMP_USE_INLINED_TAS 2594 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2595 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2596 } else 2597 #elif KMP_USE_INLINED_FUTEX 2598 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2599 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2600 } else 2601 #endif 2602 { 2603 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2604 } 2605 #if USE_ITT_BUILD 2606 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2607 #endif 2608 #if OMPT_SUPPORT && OMPT_OPTIONAL 2609 if (ompt_enabled.ompt_callback_mutex_acquired) { 2610 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2611 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2612 } 2613 #endif 2614 2615 #else // KMP_USE_DYNAMIC_LOCK 2616 2617 kmp_user_lock_p lck; 2618 2619 if ((__kmp_user_lock_kind == lk_tas) && 2620 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2621 lck = (kmp_user_lock_p)user_lock; 2622 } 2623 #if KMP_USE_FUTEX 2624 else if ((__kmp_user_lock_kind == lk_futex) && 2625 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2626 lck = (kmp_user_lock_p)user_lock; 2627 } 2628 #endif 2629 else { 2630 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2631 } 2632 2633 #if USE_ITT_BUILD 2634 __kmp_itt_lock_acquiring(lck); 2635 #endif /* USE_ITT_BUILD */ 2636 #if OMPT_SUPPORT && OMPT_OPTIONAL 2637 // This is the case, if called from omp_init_lock_with_hint: 2638 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2639 if (!codeptr) 2640 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2641 if (ompt_enabled.ompt_callback_mutex_acquire) { 2642 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2643 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2644 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2645 } 2646 #endif 2647 2648 ACQUIRE_LOCK(lck, gtid); 2649 2650 #if USE_ITT_BUILD 2651 __kmp_itt_lock_acquired(lck); 2652 #endif /* USE_ITT_BUILD */ 2653 2654 #if OMPT_SUPPORT && OMPT_OPTIONAL 2655 if (ompt_enabled.ompt_callback_mutex_acquired) { 2656 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2657 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2658 } 2659 #endif 2660 2661 #endif // KMP_USE_DYNAMIC_LOCK 2662 } 2663 2664 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2665 #if KMP_USE_DYNAMIC_LOCK 2666 2667 #if USE_ITT_BUILD 2668 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2669 #endif 2670 #if OMPT_SUPPORT && OMPT_OPTIONAL 2671 // This is the case, if called from omp_init_lock_with_hint: 2672 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2673 if (!codeptr) 2674 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2675 if (ompt_enabled.enabled) { 2676 if (ompt_enabled.ompt_callback_mutex_acquire) { 2677 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2678 ompt_mutex_nest_lock, omp_lock_hint_none, 2679 __ompt_get_mutex_impl_type(user_lock), 2680 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2681 } 2682 } 2683 #endif 2684 int acquire_status = 2685 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2686 (void) acquire_status; 2687 #if USE_ITT_BUILD 2688 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2689 #endif 2690 2691 #if OMPT_SUPPORT && OMPT_OPTIONAL 2692 if (ompt_enabled.enabled) { 2693 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2694 if (ompt_enabled.ompt_callback_mutex_acquired) { 2695 // lock_first 2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2697 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2698 codeptr); 2699 } 2700 } else { 2701 if (ompt_enabled.ompt_callback_nest_lock) { 2702 // lock_next 2703 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2704 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2705 } 2706 } 2707 } 2708 #endif 2709 2710 #else // KMP_USE_DYNAMIC_LOCK 2711 int acquire_status; 2712 kmp_user_lock_p lck; 2713 2714 if ((__kmp_user_lock_kind == lk_tas) && 2715 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2716 OMP_NEST_LOCK_T_SIZE)) { 2717 lck = (kmp_user_lock_p)user_lock; 2718 } 2719 #if KMP_USE_FUTEX 2720 else if ((__kmp_user_lock_kind == lk_futex) && 2721 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2722 OMP_NEST_LOCK_T_SIZE)) { 2723 lck = (kmp_user_lock_p)user_lock; 2724 } 2725 #endif 2726 else { 2727 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2728 } 2729 2730 #if USE_ITT_BUILD 2731 __kmp_itt_lock_acquiring(lck); 2732 #endif /* USE_ITT_BUILD */ 2733 #if OMPT_SUPPORT && OMPT_OPTIONAL 2734 // This is the case, if called from omp_init_lock_with_hint: 2735 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2736 if (!codeptr) 2737 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2738 if (ompt_enabled.enabled) { 2739 if (ompt_enabled.ompt_callback_mutex_acquire) { 2740 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2741 ompt_mutex_nest_lock, omp_lock_hint_none, 2742 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2743 codeptr); 2744 } 2745 } 2746 #endif 2747 2748 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2749 2750 #if USE_ITT_BUILD 2751 __kmp_itt_lock_acquired(lck); 2752 #endif /* USE_ITT_BUILD */ 2753 2754 #if OMPT_SUPPORT && OMPT_OPTIONAL 2755 if (ompt_enabled.enabled) { 2756 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2757 if (ompt_enabled.ompt_callback_mutex_acquired) { 2758 // lock_first 2759 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2760 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2761 } 2762 } else { 2763 if (ompt_enabled.ompt_callback_nest_lock) { 2764 // lock_next 2765 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2766 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2767 } 2768 } 2769 } 2770 #endif 2771 2772 #endif // KMP_USE_DYNAMIC_LOCK 2773 } 2774 2775 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2776 #if KMP_USE_DYNAMIC_LOCK 2777 2778 int tag = KMP_EXTRACT_D_TAG(user_lock); 2779 #if USE_ITT_BUILD 2780 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2781 #endif 2782 #if KMP_USE_INLINED_TAS 2783 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2784 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2785 } else 2786 #elif KMP_USE_INLINED_FUTEX 2787 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2788 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2789 } else 2790 #endif 2791 { 2792 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2793 } 2794 2795 #if OMPT_SUPPORT && OMPT_OPTIONAL 2796 // This is the case, if called from omp_init_lock_with_hint: 2797 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2798 if (!codeptr) 2799 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2800 if (ompt_enabled.ompt_callback_mutex_released) { 2801 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2802 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2803 } 2804 #endif 2805 2806 #else // KMP_USE_DYNAMIC_LOCK 2807 2808 kmp_user_lock_p lck; 2809 2810 /* Can't use serial interval since not block structured */ 2811 /* release the lock */ 2812 2813 if ((__kmp_user_lock_kind == lk_tas) && 2814 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2815 #if KMP_OS_LINUX && \ 2816 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2817 // "fast" path implemented to fix customer performance issue 2818 #if USE_ITT_BUILD 2819 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2820 #endif /* USE_ITT_BUILD */ 2821 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2822 KMP_MB(); 2823 2824 #if OMPT_SUPPORT && OMPT_OPTIONAL 2825 // This is the case, if called from omp_init_lock_with_hint: 2826 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2827 if (!codeptr) 2828 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2829 if (ompt_enabled.ompt_callback_mutex_released) { 2830 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2831 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2832 } 2833 #endif 2834 2835 return; 2836 #else 2837 lck = (kmp_user_lock_p)user_lock; 2838 #endif 2839 } 2840 #if KMP_USE_FUTEX 2841 else if ((__kmp_user_lock_kind == lk_futex) && 2842 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2843 lck = (kmp_user_lock_p)user_lock; 2844 } 2845 #endif 2846 else { 2847 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2848 } 2849 2850 #if USE_ITT_BUILD 2851 __kmp_itt_lock_releasing(lck); 2852 #endif /* USE_ITT_BUILD */ 2853 2854 RELEASE_LOCK(lck, gtid); 2855 2856 #if OMPT_SUPPORT && OMPT_OPTIONAL 2857 // This is the case, if called from omp_init_lock_with_hint: 2858 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2859 if (!codeptr) 2860 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2861 if (ompt_enabled.ompt_callback_mutex_released) { 2862 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2863 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2864 } 2865 #endif 2866 2867 #endif // KMP_USE_DYNAMIC_LOCK 2868 } 2869 2870 /* release the lock */ 2871 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2872 #if KMP_USE_DYNAMIC_LOCK 2873 2874 #if USE_ITT_BUILD 2875 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2876 #endif 2877 int release_status = 2878 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2879 (void) release_status; 2880 2881 #if OMPT_SUPPORT && OMPT_OPTIONAL 2882 // This is the case, if called from omp_init_lock_with_hint: 2883 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2884 if (!codeptr) 2885 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2886 if (ompt_enabled.enabled) { 2887 if (release_status == KMP_LOCK_RELEASED) { 2888 if (ompt_enabled.ompt_callback_mutex_released) { 2889 // release_lock_last 2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2891 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2892 codeptr); 2893 } 2894 } else if (ompt_enabled.ompt_callback_nest_lock) { 2895 // release_lock_prev 2896 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2897 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2898 } 2899 } 2900 #endif 2901 2902 #else // KMP_USE_DYNAMIC_LOCK 2903 2904 kmp_user_lock_p lck; 2905 2906 /* Can't use serial interval since not block structured */ 2907 2908 if ((__kmp_user_lock_kind == lk_tas) && 2909 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2910 OMP_NEST_LOCK_T_SIZE)) { 2911 #if KMP_OS_LINUX && \ 2912 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2913 // "fast" path implemented to fix customer performance issue 2914 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2915 #if USE_ITT_BUILD 2916 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2917 #endif /* USE_ITT_BUILD */ 2918 2919 #if OMPT_SUPPORT && OMPT_OPTIONAL 2920 int release_status = KMP_LOCK_STILL_HELD; 2921 #endif 2922 2923 if (--(tl->lk.depth_locked) == 0) { 2924 TCW_4(tl->lk.poll, 0); 2925 #if OMPT_SUPPORT && OMPT_OPTIONAL 2926 release_status = KMP_LOCK_RELEASED; 2927 #endif 2928 } 2929 KMP_MB(); 2930 2931 #if OMPT_SUPPORT && OMPT_OPTIONAL 2932 // This is the case, if called from omp_init_lock_with_hint: 2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2934 if (!codeptr) 2935 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2936 if (ompt_enabled.enabled) { 2937 if (release_status == KMP_LOCK_RELEASED) { 2938 if (ompt_enabled.ompt_callback_mutex_released) { 2939 // release_lock_last 2940 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2941 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2942 } 2943 } else if (ompt_enabled.ompt_callback_nest_lock) { 2944 // release_lock_previous 2945 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2946 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2947 } 2948 } 2949 #endif 2950 2951 return; 2952 #else 2953 lck = (kmp_user_lock_p)user_lock; 2954 #endif 2955 } 2956 #if KMP_USE_FUTEX 2957 else if ((__kmp_user_lock_kind == lk_futex) && 2958 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2959 OMP_NEST_LOCK_T_SIZE)) { 2960 lck = (kmp_user_lock_p)user_lock; 2961 } 2962 #endif 2963 else { 2964 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2965 } 2966 2967 #if USE_ITT_BUILD 2968 __kmp_itt_lock_releasing(lck); 2969 #endif /* USE_ITT_BUILD */ 2970 2971 int release_status; 2972 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2973 #if OMPT_SUPPORT && OMPT_OPTIONAL 2974 // This is the case, if called from omp_init_lock_with_hint: 2975 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2976 if (!codeptr) 2977 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2978 if (ompt_enabled.enabled) { 2979 if (release_status == KMP_LOCK_RELEASED) { 2980 if (ompt_enabled.ompt_callback_mutex_released) { 2981 // release_lock_last 2982 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2983 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2984 } 2985 } else if (ompt_enabled.ompt_callback_nest_lock) { 2986 // release_lock_previous 2987 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2988 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2989 } 2990 } 2991 #endif 2992 2993 #endif // KMP_USE_DYNAMIC_LOCK 2994 } 2995 2996 /* try to acquire the lock */ 2997 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2998 KMP_COUNT_BLOCK(OMP_test_lock); 2999 3000 #if KMP_USE_DYNAMIC_LOCK 3001 int rc; 3002 int tag = KMP_EXTRACT_D_TAG(user_lock); 3003 #if USE_ITT_BUILD 3004 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3005 #endif 3006 #if OMPT_SUPPORT && OMPT_OPTIONAL 3007 // This is the case, if called from omp_init_lock_with_hint: 3008 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3009 if (!codeptr) 3010 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3011 if (ompt_enabled.ompt_callback_mutex_acquire) { 3012 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3013 ompt_mutex_lock, omp_lock_hint_none, 3014 __ompt_get_mutex_impl_type(user_lock), 3015 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3016 } 3017 #endif 3018 #if KMP_USE_INLINED_TAS 3019 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3020 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3021 } else 3022 #elif KMP_USE_INLINED_FUTEX 3023 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3024 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3025 } else 3026 #endif 3027 { 3028 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3029 } 3030 if (rc) { 3031 #if USE_ITT_BUILD 3032 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3033 #endif 3034 #if OMPT_SUPPORT && OMPT_OPTIONAL 3035 if (ompt_enabled.ompt_callback_mutex_acquired) { 3036 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3037 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3038 } 3039 #endif 3040 return FTN_TRUE; 3041 } else { 3042 #if USE_ITT_BUILD 3043 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3044 #endif 3045 return FTN_FALSE; 3046 } 3047 3048 #else // KMP_USE_DYNAMIC_LOCK 3049 3050 kmp_user_lock_p lck; 3051 int rc; 3052 3053 if ((__kmp_user_lock_kind == lk_tas) && 3054 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3055 lck = (kmp_user_lock_p)user_lock; 3056 } 3057 #if KMP_USE_FUTEX 3058 else if ((__kmp_user_lock_kind == lk_futex) && 3059 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3060 lck = (kmp_user_lock_p)user_lock; 3061 } 3062 #endif 3063 else { 3064 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3065 } 3066 3067 #if USE_ITT_BUILD 3068 __kmp_itt_lock_acquiring(lck); 3069 #endif /* USE_ITT_BUILD */ 3070 #if OMPT_SUPPORT && OMPT_OPTIONAL 3071 // This is the case, if called from omp_init_lock_with_hint: 3072 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3073 if (!codeptr) 3074 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3075 if (ompt_enabled.ompt_callback_mutex_acquire) { 3076 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3077 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3078 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3079 } 3080 #endif 3081 3082 rc = TEST_LOCK(lck, gtid); 3083 #if USE_ITT_BUILD 3084 if (rc) { 3085 __kmp_itt_lock_acquired(lck); 3086 } else { 3087 __kmp_itt_lock_cancelled(lck); 3088 } 3089 #endif /* USE_ITT_BUILD */ 3090 #if OMPT_SUPPORT && OMPT_OPTIONAL 3091 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3092 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3093 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3094 } 3095 #endif 3096 3097 return (rc ? FTN_TRUE : FTN_FALSE); 3098 3099 /* Can't use serial interval since not block structured */ 3100 3101 #endif // KMP_USE_DYNAMIC_LOCK 3102 } 3103 3104 /* try to acquire the lock */ 3105 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3106 #if KMP_USE_DYNAMIC_LOCK 3107 int rc; 3108 #if USE_ITT_BUILD 3109 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3110 #endif 3111 #if OMPT_SUPPORT && OMPT_OPTIONAL 3112 // This is the case, if called from omp_init_lock_with_hint: 3113 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3114 if (!codeptr) 3115 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3116 if (ompt_enabled.ompt_callback_mutex_acquire) { 3117 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3118 ompt_mutex_nest_lock, omp_lock_hint_none, 3119 __ompt_get_mutex_impl_type(user_lock), 3120 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3121 } 3122 #endif 3123 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3124 #if USE_ITT_BUILD 3125 if (rc) { 3126 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3127 } else { 3128 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3129 } 3130 #endif 3131 #if OMPT_SUPPORT && OMPT_OPTIONAL 3132 if (ompt_enabled.enabled && rc) { 3133 if (rc == 1) { 3134 if (ompt_enabled.ompt_callback_mutex_acquired) { 3135 // lock_first 3136 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3137 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3138 codeptr); 3139 } 3140 } else { 3141 if (ompt_enabled.ompt_callback_nest_lock) { 3142 // lock_next 3143 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3144 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3145 } 3146 } 3147 } 3148 #endif 3149 return rc; 3150 3151 #else // KMP_USE_DYNAMIC_LOCK 3152 3153 kmp_user_lock_p lck; 3154 int rc; 3155 3156 if ((__kmp_user_lock_kind == lk_tas) && 3157 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3158 OMP_NEST_LOCK_T_SIZE)) { 3159 lck = (kmp_user_lock_p)user_lock; 3160 } 3161 #if KMP_USE_FUTEX 3162 else if ((__kmp_user_lock_kind == lk_futex) && 3163 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3164 OMP_NEST_LOCK_T_SIZE)) { 3165 lck = (kmp_user_lock_p)user_lock; 3166 } 3167 #endif 3168 else { 3169 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3170 } 3171 3172 #if USE_ITT_BUILD 3173 __kmp_itt_lock_acquiring(lck); 3174 #endif /* USE_ITT_BUILD */ 3175 3176 #if OMPT_SUPPORT && OMPT_OPTIONAL 3177 // This is the case, if called from omp_init_lock_with_hint: 3178 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3179 if (!codeptr) 3180 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3181 if (ompt_enabled.enabled) && 3182 ompt_enabled.ompt_callback_mutex_acquire) { 3183 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3184 ompt_mutex_nest_lock, omp_lock_hint_none, 3185 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3186 codeptr); 3187 } 3188 #endif 3189 3190 rc = TEST_NESTED_LOCK(lck, gtid); 3191 #if USE_ITT_BUILD 3192 if (rc) { 3193 __kmp_itt_lock_acquired(lck); 3194 } else { 3195 __kmp_itt_lock_cancelled(lck); 3196 } 3197 #endif /* USE_ITT_BUILD */ 3198 #if OMPT_SUPPORT && OMPT_OPTIONAL 3199 if (ompt_enabled.enabled && rc) { 3200 if (rc == 1) { 3201 if (ompt_enabled.ompt_callback_mutex_acquired) { 3202 // lock_first 3203 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3204 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3205 } 3206 } else { 3207 if (ompt_enabled.ompt_callback_nest_lock) { 3208 // lock_next 3209 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3210 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3211 } 3212 } 3213 } 3214 #endif 3215 return rc; 3216 3217 /* Can't use serial interval since not block structured */ 3218 3219 #endif // KMP_USE_DYNAMIC_LOCK 3220 } 3221 3222 // Interface to fast scalable reduce methods routines 3223 3224 // keep the selected method in a thread local structure for cross-function 3225 // usage: will be used in __kmpc_end_reduce* functions; 3226 // another solution: to re-determine the method one more time in 3227 // __kmpc_end_reduce* functions (new prototype required then) 3228 // AT: which solution is better? 3229 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3230 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3231 3232 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3233 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3234 3235 // description of the packed_reduction_method variable: look at the macros in 3236 // kmp.h 3237 3238 // used in a critical section reduce block 3239 static __forceinline void 3240 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3241 kmp_critical_name *crit) { 3242 3243 // this lock was visible to a customer and to the threading profile tool as a 3244 // serial overhead span (although it's used for an internal purpose only) 3245 // why was it visible in previous implementation? 3246 // should we keep it visible in new reduce block? 3247 kmp_user_lock_p lck; 3248 3249 #if KMP_USE_DYNAMIC_LOCK 3250 3251 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3252 // Check if it is initialized. 3253 if (*lk == 0) { 3254 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3255 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3256 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3257 } else { 3258 __kmp_init_indirect_csptr(crit, loc, global_tid, 3259 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3260 } 3261 } 3262 // Branch for accessing the actual lock object and set operation. This 3263 // branching is inevitable since this lock initialization does not follow the 3264 // normal dispatch path (lock table is not used). 3265 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3266 lck = (kmp_user_lock_p)lk; 3267 KMP_DEBUG_ASSERT(lck != NULL); 3268 if (__kmp_env_consistency_check) { 3269 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3270 } 3271 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3272 } else { 3273 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3274 lck = ilk->lock; 3275 KMP_DEBUG_ASSERT(lck != NULL); 3276 if (__kmp_env_consistency_check) { 3277 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3278 } 3279 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3280 } 3281 3282 #else // KMP_USE_DYNAMIC_LOCK 3283 3284 // We know that the fast reduction code is only emitted by Intel compilers 3285 // with 32 byte critical sections. If there isn't enough space, then we 3286 // have to use a pointer. 3287 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3288 lck = (kmp_user_lock_p)crit; 3289 } else { 3290 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3291 } 3292 KMP_DEBUG_ASSERT(lck != NULL); 3293 3294 if (__kmp_env_consistency_check) 3295 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3296 3297 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3298 3299 #endif // KMP_USE_DYNAMIC_LOCK 3300 } 3301 3302 // used in a critical section reduce block 3303 static __forceinline void 3304 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3305 kmp_critical_name *crit) { 3306 3307 kmp_user_lock_p lck; 3308 3309 #if KMP_USE_DYNAMIC_LOCK 3310 3311 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3312 lck = (kmp_user_lock_p)crit; 3313 if (__kmp_env_consistency_check) 3314 __kmp_pop_sync(global_tid, ct_critical, loc); 3315 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3316 } else { 3317 kmp_indirect_lock_t *ilk = 3318 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3319 if (__kmp_env_consistency_check) 3320 __kmp_pop_sync(global_tid, ct_critical, loc); 3321 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3322 } 3323 3324 #else // KMP_USE_DYNAMIC_LOCK 3325 3326 // We know that the fast reduction code is only emitted by Intel compilers 3327 // with 32 byte critical sections. If there isn't enough space, then we have 3328 // to use a pointer. 3329 if (__kmp_base_user_lock_size > 32) { 3330 lck = *((kmp_user_lock_p *)crit); 3331 KMP_ASSERT(lck != NULL); 3332 } else { 3333 lck = (kmp_user_lock_p)crit; 3334 } 3335 3336 if (__kmp_env_consistency_check) 3337 __kmp_pop_sync(global_tid, ct_critical, loc); 3338 3339 __kmp_release_user_lock_with_checks(lck, global_tid); 3340 3341 #endif // KMP_USE_DYNAMIC_LOCK 3342 } // __kmp_end_critical_section_reduce_block 3343 3344 #if OMP_40_ENABLED 3345 static __forceinline int 3346 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3347 int *task_state) { 3348 kmp_team_t *team; 3349 3350 // Check if we are inside the teams construct? 3351 if (th->th.th_teams_microtask) { 3352 *team_p = team = th->th.th_team; 3353 if (team->t.t_level == th->th.th_teams_level) { 3354 // This is reduction at teams construct. 3355 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3356 // Let's swap teams temporarily for the reduction. 3357 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3358 th->th.th_team = team->t.t_parent; 3359 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3360 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3361 *task_state = th->th.th_task_state; 3362 th->th.th_task_state = 0; 3363 3364 return 1; 3365 } 3366 } 3367 return 0; 3368 } 3369 3370 static __forceinline void 3371 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3372 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3373 th->th.th_info.ds.ds_tid = 0; 3374 th->th.th_team = team; 3375 th->th.th_team_nproc = team->t.t_nproc; 3376 th->th.th_task_team = team->t.t_task_team[task_state]; 3377 th->th.th_task_state = task_state; 3378 } 3379 #endif 3380 3381 /* 2.a.i. Reduce Block without a terminating barrier */ 3382 /*! 3383 @ingroup SYNCHRONIZATION 3384 @param loc source location information 3385 @param global_tid global thread number 3386 @param num_vars number of items (variables) to be reduced 3387 @param reduce_size size of data in bytes to be reduced 3388 @param reduce_data pointer to data to be reduced 3389 @param reduce_func callback function providing reduction operation on two 3390 operands and returning result of reduction in lhs_data 3391 @param lck pointer to the unique lock data structure 3392 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3393 threads if atomic reduction needed 3394 3395 The nowait version is used for a reduce clause with the nowait argument. 3396 */ 3397 kmp_int32 3398 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3399 size_t reduce_size, void *reduce_data, 3400 void (*reduce_func)(void *lhs_data, void *rhs_data), 3401 kmp_critical_name *lck) { 3402 3403 KMP_COUNT_BLOCK(REDUCE_nowait); 3404 int retval = 0; 3405 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3406 #if OMP_40_ENABLED 3407 kmp_info_t *th; 3408 kmp_team_t *team; 3409 int teams_swapped = 0, task_state; 3410 #endif 3411 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3412 3413 // why do we need this initialization here at all? 3414 // Reduction clause can not be used as a stand-alone directive. 3415 3416 // do not call __kmp_serial_initialize(), it will be called by 3417 // __kmp_parallel_initialize() if needed 3418 // possible detection of false-positive race by the threadchecker ??? 3419 if (!TCR_4(__kmp_init_parallel)) 3420 __kmp_parallel_initialize(); 3421 3422 #if OMP_50_ENABLED 3423 __kmp_resume_if_soft_paused(); 3424 #endif 3425 3426 // check correctness of reduce block nesting 3427 #if KMP_USE_DYNAMIC_LOCK 3428 if (__kmp_env_consistency_check) 3429 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3430 #else 3431 if (__kmp_env_consistency_check) 3432 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3433 #endif 3434 3435 #if OMP_40_ENABLED 3436 th = __kmp_thread_from_gtid(global_tid); 3437 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3438 #endif // OMP_40_ENABLED 3439 3440 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3441 // the value should be kept in a variable 3442 // the variable should be either a construct-specific or thread-specific 3443 // property, not a team specific property 3444 // (a thread can reach the next reduce block on the next construct, reduce 3445 // method may differ on the next construct) 3446 // an ident_t "loc" parameter could be used as a construct-specific property 3447 // (what if loc == 0?) 3448 // (if both construct-specific and team-specific variables were shared, 3449 // then unness extra syncs should be needed) 3450 // a thread-specific variable is better regarding two issues above (next 3451 // construct and extra syncs) 3452 // a thread-specific "th_local.reduction_method" variable is used currently 3453 // each thread executes 'determine' and 'set' lines (no need to execute by one 3454 // thread, to avoid unness extra syncs) 3455 3456 packed_reduction_method = __kmp_determine_reduction_method( 3457 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3458 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3459 3460 if (packed_reduction_method == critical_reduce_block) { 3461 3462 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3463 retval = 1; 3464 3465 } else if (packed_reduction_method == empty_reduce_block) { 3466 3467 // usage: if team size == 1, no synchronization is required ( Intel 3468 // platforms only ) 3469 retval = 1; 3470 3471 } else if (packed_reduction_method == atomic_reduce_block) { 3472 3473 retval = 2; 3474 3475 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3476 // won't be called by the code gen) 3477 // (it's not quite good, because the checking block has been closed by 3478 // this 'pop', 3479 // but atomic operation has not been executed yet, will be executed 3480 // slightly later, literally on next instruction) 3481 if (__kmp_env_consistency_check) 3482 __kmp_pop_sync(global_tid, ct_reduce, loc); 3483 3484 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3485 tree_reduce_block)) { 3486 3487 // AT: performance issue: a real barrier here 3488 // AT: (if master goes slow, other threads are blocked here waiting for the 3489 // master to come and release them) 3490 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3491 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3492 // be confusing to a customer) 3493 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3494 // might go faster and be more in line with sense of NOWAIT 3495 // AT: TO DO: do epcc test and compare times 3496 3497 // this barrier should be invisible to a customer and to the threading profile 3498 // tool (it's neither a terminating barrier nor customer's code, it's 3499 // used for an internal purpose) 3500 #if OMPT_SUPPORT 3501 // JP: can this barrier potentially leed to task scheduling? 3502 // JP: as long as there is a barrier in the implementation, OMPT should and 3503 // will provide the barrier events 3504 // so we set-up the necessary frame/return addresses. 3505 ompt_frame_t *ompt_frame; 3506 if (ompt_enabled.enabled) { 3507 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3508 if (ompt_frame->enter_frame.ptr == NULL) 3509 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3510 OMPT_STORE_RETURN_ADDRESS(global_tid); 3511 } 3512 #endif 3513 #if USE_ITT_NOTIFY 3514 __kmp_threads[global_tid]->th.th_ident = loc; 3515 #endif 3516 retval = 3517 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3518 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3519 retval = (retval != 0) ? (0) : (1); 3520 #if OMPT_SUPPORT && OMPT_OPTIONAL 3521 if (ompt_enabled.enabled) { 3522 ompt_frame->enter_frame = ompt_data_none; 3523 } 3524 #endif 3525 3526 // all other workers except master should do this pop here 3527 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3528 if (__kmp_env_consistency_check) { 3529 if (retval == 0) { 3530 __kmp_pop_sync(global_tid, ct_reduce, loc); 3531 } 3532 } 3533 3534 } else { 3535 3536 // should never reach this block 3537 KMP_ASSERT(0); // "unexpected method" 3538 } 3539 #if OMP_40_ENABLED 3540 if (teams_swapped) { 3541 __kmp_restore_swapped_teams(th, team, task_state); 3542 } 3543 #endif 3544 KA_TRACE( 3545 10, 3546 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3547 global_tid, packed_reduction_method, retval)); 3548 3549 return retval; 3550 } 3551 3552 /*! 3553 @ingroup SYNCHRONIZATION 3554 @param loc source location information 3555 @param global_tid global thread id. 3556 @param lck pointer to the unique lock data structure 3557 3558 Finish the execution of a reduce nowait. 3559 */ 3560 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3561 kmp_critical_name *lck) { 3562 3563 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3564 3565 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3566 3567 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3568 3569 if (packed_reduction_method == critical_reduce_block) { 3570 3571 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3572 3573 } else if (packed_reduction_method == empty_reduce_block) { 3574 3575 // usage: if team size == 1, no synchronization is required ( on Intel 3576 // platforms only ) 3577 3578 } else if (packed_reduction_method == atomic_reduce_block) { 3579 3580 // neither master nor other workers should get here 3581 // (code gen does not generate this call in case 2: atomic reduce block) 3582 // actually it's better to remove this elseif at all; 3583 // after removal this value will checked by the 'else' and will assert 3584 3585 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3586 tree_reduce_block)) { 3587 3588 // only master gets here 3589 3590 } else { 3591 3592 // should never reach this block 3593 KMP_ASSERT(0); // "unexpected method" 3594 } 3595 3596 if (__kmp_env_consistency_check) 3597 __kmp_pop_sync(global_tid, ct_reduce, loc); 3598 3599 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3600 global_tid, packed_reduction_method)); 3601 3602 return; 3603 } 3604 3605 /* 2.a.ii. Reduce Block with a terminating barrier */ 3606 3607 /*! 3608 @ingroup SYNCHRONIZATION 3609 @param loc source location information 3610 @param global_tid global thread number 3611 @param num_vars number of items (variables) to be reduced 3612 @param reduce_size size of data in bytes to be reduced 3613 @param reduce_data pointer to data to be reduced 3614 @param reduce_func callback function providing reduction operation on two 3615 operands and returning result of reduction in lhs_data 3616 @param lck pointer to the unique lock data structure 3617 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3618 threads if atomic reduction needed 3619 3620 A blocking reduce that includes an implicit barrier. 3621 */ 3622 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3623 size_t reduce_size, void *reduce_data, 3624 void (*reduce_func)(void *lhs_data, void *rhs_data), 3625 kmp_critical_name *lck) { 3626 KMP_COUNT_BLOCK(REDUCE_wait); 3627 int retval = 0; 3628 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3629 #if OMP_40_ENABLED 3630 kmp_info_t *th; 3631 kmp_team_t *team; 3632 int teams_swapped = 0, task_state; 3633 #endif 3634 3635 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3636 3637 // why do we need this initialization here at all? 3638 // Reduction clause can not be a stand-alone directive. 3639 3640 // do not call __kmp_serial_initialize(), it will be called by 3641 // __kmp_parallel_initialize() if needed 3642 // possible detection of false-positive race by the threadchecker ??? 3643 if (!TCR_4(__kmp_init_parallel)) 3644 __kmp_parallel_initialize(); 3645 3646 #if OMP_50_ENABLED 3647 __kmp_resume_if_soft_paused(); 3648 #endif 3649 3650 // check correctness of reduce block nesting 3651 #if KMP_USE_DYNAMIC_LOCK 3652 if (__kmp_env_consistency_check) 3653 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3654 #else 3655 if (__kmp_env_consistency_check) 3656 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3657 #endif 3658 3659 #if OMP_40_ENABLED 3660 th = __kmp_thread_from_gtid(global_tid); 3661 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3662 #endif // OMP_40_ENABLED 3663 3664 packed_reduction_method = __kmp_determine_reduction_method( 3665 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3666 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3667 3668 if (packed_reduction_method == critical_reduce_block) { 3669 3670 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3671 retval = 1; 3672 3673 } else if (packed_reduction_method == empty_reduce_block) { 3674 3675 // usage: if team size == 1, no synchronization is required ( Intel 3676 // platforms only ) 3677 retval = 1; 3678 3679 } else if (packed_reduction_method == atomic_reduce_block) { 3680 3681 retval = 2; 3682 3683 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3684 tree_reduce_block)) { 3685 3686 // case tree_reduce_block: 3687 // this barrier should be visible to a customer and to the threading profile 3688 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3689 #if OMPT_SUPPORT 3690 ompt_frame_t *ompt_frame; 3691 if (ompt_enabled.enabled) { 3692 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3693 if (ompt_frame->enter_frame.ptr == NULL) 3694 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3695 OMPT_STORE_RETURN_ADDRESS(global_tid); 3696 } 3697 #endif 3698 #if USE_ITT_NOTIFY 3699 __kmp_threads[global_tid]->th.th_ident = 3700 loc; // needed for correct notification of frames 3701 #endif 3702 retval = 3703 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3704 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3705 retval = (retval != 0) ? (0) : (1); 3706 #if OMPT_SUPPORT && OMPT_OPTIONAL 3707 if (ompt_enabled.enabled) { 3708 ompt_frame->enter_frame = ompt_data_none; 3709 } 3710 #endif 3711 3712 // all other workers except master should do this pop here 3713 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3714 if (__kmp_env_consistency_check) { 3715 if (retval == 0) { // 0: all other workers; 1: master 3716 __kmp_pop_sync(global_tid, ct_reduce, loc); 3717 } 3718 } 3719 3720 } else { 3721 3722 // should never reach this block 3723 KMP_ASSERT(0); // "unexpected method" 3724 } 3725 #if OMP_40_ENABLED 3726 if (teams_swapped) { 3727 __kmp_restore_swapped_teams(th, team, task_state); 3728 } 3729 #endif 3730 3731 KA_TRACE(10, 3732 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3733 global_tid, packed_reduction_method, retval)); 3734 3735 return retval; 3736 } 3737 3738 /*! 3739 @ingroup SYNCHRONIZATION 3740 @param loc source location information 3741 @param global_tid global thread id. 3742 @param lck pointer to the unique lock data structure 3743 3744 Finish the execution of a blocking reduce. 3745 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3746 start function. 3747 */ 3748 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3749 kmp_critical_name *lck) { 3750 3751 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3752 #if OMP_40_ENABLED 3753 kmp_info_t *th; 3754 kmp_team_t *team; 3755 int teams_swapped = 0, task_state; 3756 #endif 3757 3758 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3759 3760 #if OMP_40_ENABLED 3761 th = __kmp_thread_from_gtid(global_tid); 3762 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3763 #endif // OMP_40_ENABLED 3764 3765 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3766 3767 // this barrier should be visible to a customer and to the threading profile 3768 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3769 3770 if (packed_reduction_method == critical_reduce_block) { 3771 3772 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3773 3774 // TODO: implicit barrier: should be exposed 3775 #if OMPT_SUPPORT 3776 ompt_frame_t *ompt_frame; 3777 if (ompt_enabled.enabled) { 3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3779 if (ompt_frame->enter_frame.ptr == NULL) 3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3781 OMPT_STORE_RETURN_ADDRESS(global_tid); 3782 } 3783 #endif 3784 #if USE_ITT_NOTIFY 3785 __kmp_threads[global_tid]->th.th_ident = loc; 3786 #endif 3787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3788 #if OMPT_SUPPORT && OMPT_OPTIONAL 3789 if (ompt_enabled.enabled) { 3790 ompt_frame->enter_frame = ompt_data_none; 3791 } 3792 #endif 3793 3794 } else if (packed_reduction_method == empty_reduce_block) { 3795 3796 // usage: if team size==1, no synchronization is required (Intel platforms only) 3797 3798 // TODO: implicit barrier: should be exposed 3799 #if OMPT_SUPPORT 3800 ompt_frame_t *ompt_frame; 3801 if (ompt_enabled.enabled) { 3802 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3803 if (ompt_frame->enter_frame.ptr == NULL) 3804 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3805 OMPT_STORE_RETURN_ADDRESS(global_tid); 3806 } 3807 #endif 3808 #if USE_ITT_NOTIFY 3809 __kmp_threads[global_tid]->th.th_ident = loc; 3810 #endif 3811 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3812 #if OMPT_SUPPORT && OMPT_OPTIONAL 3813 if (ompt_enabled.enabled) { 3814 ompt_frame->enter_frame = ompt_data_none; 3815 } 3816 #endif 3817 3818 } else if (packed_reduction_method == atomic_reduce_block) { 3819 3820 #if OMPT_SUPPORT 3821 ompt_frame_t *ompt_frame; 3822 if (ompt_enabled.enabled) { 3823 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3824 if (ompt_frame->enter_frame.ptr == NULL) 3825 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3826 OMPT_STORE_RETURN_ADDRESS(global_tid); 3827 } 3828 #endif 3829 // TODO: implicit barrier: should be exposed 3830 #if USE_ITT_NOTIFY 3831 __kmp_threads[global_tid]->th.th_ident = loc; 3832 #endif 3833 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3834 #if OMPT_SUPPORT && OMPT_OPTIONAL 3835 if (ompt_enabled.enabled) { 3836 ompt_frame->enter_frame = ompt_data_none; 3837 } 3838 #endif 3839 3840 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3841 tree_reduce_block)) { 3842 3843 // only master executes here (master releases all other workers) 3844 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3845 global_tid); 3846 3847 } else { 3848 3849 // should never reach this block 3850 KMP_ASSERT(0); // "unexpected method" 3851 } 3852 #if OMP_40_ENABLED 3853 if (teams_swapped) { 3854 __kmp_restore_swapped_teams(th, team, task_state); 3855 } 3856 #endif 3857 3858 if (__kmp_env_consistency_check) 3859 __kmp_pop_sync(global_tid, ct_reduce, loc); 3860 3861 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3862 global_tid, packed_reduction_method)); 3863 3864 return; 3865 } 3866 3867 #undef __KMP_GET_REDUCTION_METHOD 3868 #undef __KMP_SET_REDUCTION_METHOD 3869 3870 /* end of interface to fast scalable reduce routines */ 3871 3872 kmp_uint64 __kmpc_get_taskid() { 3873 3874 kmp_int32 gtid; 3875 kmp_info_t *thread; 3876 3877 gtid = __kmp_get_gtid(); 3878 if (gtid < 0) { 3879 return 0; 3880 } 3881 thread = __kmp_thread_from_gtid(gtid); 3882 return thread->th.th_current_task->td_task_id; 3883 3884 } // __kmpc_get_taskid 3885 3886 kmp_uint64 __kmpc_get_parent_taskid() { 3887 3888 kmp_int32 gtid; 3889 kmp_info_t *thread; 3890 kmp_taskdata_t *parent_task; 3891 3892 gtid = __kmp_get_gtid(); 3893 if (gtid < 0) { 3894 return 0; 3895 } 3896 thread = __kmp_thread_from_gtid(gtid); 3897 parent_task = thread->th.th_current_task->td_parent; 3898 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3899 3900 } // __kmpc_get_parent_taskid 3901 3902 #if OMP_45_ENABLED 3903 /*! 3904 @ingroup WORK_SHARING 3905 @param loc source location information. 3906 @param gtid global thread number. 3907 @param num_dims number of associated doacross loops. 3908 @param dims info on loops bounds. 3909 3910 Initialize doacross loop information. 3911 Expect compiler send us inclusive bounds, 3912 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3913 */ 3914 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3915 const struct kmp_dim *dims) { 3916 int j, idx; 3917 kmp_int64 last, trace_count; 3918 kmp_info_t *th = __kmp_threads[gtid]; 3919 kmp_team_t *team = th->th.th_team; 3920 kmp_uint32 *flags; 3921 kmp_disp_t *pr_buf = th->th.th_dispatch; 3922 dispatch_shared_info_t *sh_buf; 3923 3924 KA_TRACE( 3925 20, 3926 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3927 gtid, num_dims, !team->t.t_serialized)); 3928 KMP_DEBUG_ASSERT(dims != NULL); 3929 KMP_DEBUG_ASSERT(num_dims > 0); 3930 3931 if (team->t.t_serialized) { 3932 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3933 return; // no dependencies if team is serialized 3934 } 3935 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3936 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3937 // the next loop 3938 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3939 3940 // Save bounds info into allocated private buffer 3941 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3942 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3943 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3944 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3945 pr_buf->th_doacross_info[0] = 3946 (kmp_int64)num_dims; // first element is number of dimensions 3947 // Save also address of num_done in order to access it later without knowing 3948 // the buffer index 3949 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3950 pr_buf->th_doacross_info[2] = dims[0].lo; 3951 pr_buf->th_doacross_info[3] = dims[0].up; 3952 pr_buf->th_doacross_info[4] = dims[0].st; 3953 last = 5; 3954 for (j = 1; j < num_dims; ++j) { 3955 kmp_int64 3956 range_length; // To keep ranges of all dimensions but the first dims[0] 3957 if (dims[j].st == 1) { // most common case 3958 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3959 range_length = dims[j].up - dims[j].lo + 1; 3960 } else { 3961 if (dims[j].st > 0) { 3962 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3963 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3964 } else { // negative increment 3965 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3966 range_length = 3967 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3968 } 3969 } 3970 pr_buf->th_doacross_info[last++] = range_length; 3971 pr_buf->th_doacross_info[last++] = dims[j].lo; 3972 pr_buf->th_doacross_info[last++] = dims[j].up; 3973 pr_buf->th_doacross_info[last++] = dims[j].st; 3974 } 3975 3976 // Compute total trip count. 3977 // Start with range of dims[0] which we don't need to keep in the buffer. 3978 if (dims[0].st == 1) { // most common case 3979 trace_count = dims[0].up - dims[0].lo + 1; 3980 } else if (dims[0].st > 0) { 3981 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3982 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3983 } else { // negative increment 3984 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3985 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3986 } 3987 for (j = 1; j < num_dims; ++j) { 3988 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3989 } 3990 KMP_DEBUG_ASSERT(trace_count > 0); 3991 3992 // Check if shared buffer is not occupied by other loop (idx - 3993 // __kmp_dispatch_num_buffers) 3994 if (idx != sh_buf->doacross_buf_idx) { 3995 // Shared buffer is occupied, wait for it to be free 3996 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3997 __kmp_eq_4, NULL); 3998 } 3999 #if KMP_32_BIT_ARCH 4000 // Check if we are the first thread. After the CAS the first thread gets 0, 4001 // others get 1 if initialization is in progress, allocated pointer otherwise. 4002 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4003 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4004 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4005 #else 4006 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4007 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4008 #endif 4009 if (flags == NULL) { 4010 // we are the first thread, allocate the array of flags 4011 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 4012 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4013 KMP_MB(); 4014 sh_buf->doacross_flags = flags; 4015 } else if (flags == (kmp_uint32 *)1) { 4016 #if KMP_32_BIT_ARCH 4017 // initialization is still in progress, need to wait 4018 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4019 #else 4020 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4021 #endif 4022 KMP_YIELD(TRUE); 4023 KMP_MB(); 4024 } else { 4025 KMP_MB(); 4026 } 4027 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4028 pr_buf->th_doacross_flags = 4029 sh_buf->doacross_flags; // save private copy in order to not 4030 // touch shared buffer on each iteration 4031 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4032 } 4033 4034 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4035 kmp_int32 shft, num_dims, i; 4036 kmp_uint32 flag; 4037 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4038 kmp_info_t *th = __kmp_threads[gtid]; 4039 kmp_team_t *team = th->th.th_team; 4040 kmp_disp_t *pr_buf; 4041 kmp_int64 lo, up, st; 4042 4043 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4044 if (team->t.t_serialized) { 4045 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4046 return; // no dependencies if team is serialized 4047 } 4048 4049 // calculate sequential iteration number and check out-of-bounds condition 4050 pr_buf = th->th.th_dispatch; 4051 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4052 num_dims = pr_buf->th_doacross_info[0]; 4053 lo = pr_buf->th_doacross_info[2]; 4054 up = pr_buf->th_doacross_info[3]; 4055 st = pr_buf->th_doacross_info[4]; 4056 if (st == 1) { // most common case 4057 if (vec[0] < lo || vec[0] > up) { 4058 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4059 "bounds [%lld,%lld]\n", 4060 gtid, vec[0], lo, up)); 4061 return; 4062 } 4063 iter_number = vec[0] - lo; 4064 } else if (st > 0) { 4065 if (vec[0] < lo || vec[0] > up) { 4066 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4067 "bounds [%lld,%lld]\n", 4068 gtid, vec[0], lo, up)); 4069 return; 4070 } 4071 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4072 } else { // negative increment 4073 if (vec[0] > lo || vec[0] < up) { 4074 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4075 "bounds [%lld,%lld]\n", 4076 gtid, vec[0], lo, up)); 4077 return; 4078 } 4079 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4080 } 4081 for (i = 1; i < num_dims; ++i) { 4082 kmp_int64 iter, ln; 4083 kmp_int32 j = i * 4; 4084 ln = pr_buf->th_doacross_info[j + 1]; 4085 lo = pr_buf->th_doacross_info[j + 2]; 4086 up = pr_buf->th_doacross_info[j + 3]; 4087 st = pr_buf->th_doacross_info[j + 4]; 4088 if (st == 1) { 4089 if (vec[i] < lo || vec[i] > up) { 4090 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4091 "bounds [%lld,%lld]\n", 4092 gtid, vec[i], lo, up)); 4093 return; 4094 } 4095 iter = vec[i] - lo; 4096 } else if (st > 0) { 4097 if (vec[i] < lo || vec[i] > up) { 4098 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4099 "bounds [%lld,%lld]\n", 4100 gtid, vec[i], lo, up)); 4101 return; 4102 } 4103 iter = (kmp_uint64)(vec[i] - lo) / st; 4104 } else { // st < 0 4105 if (vec[i] > lo || vec[i] < up) { 4106 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4107 "bounds [%lld,%lld]\n", 4108 gtid, vec[i], lo, up)); 4109 return; 4110 } 4111 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4112 } 4113 iter_number = iter + ln * iter_number; 4114 } 4115 shft = iter_number % 32; // use 32-bit granularity 4116 iter_number >>= 5; // divided by 32 4117 flag = 1 << shft; 4118 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4119 KMP_YIELD(TRUE); 4120 } 4121 KMP_MB(); 4122 KA_TRACE(20, 4123 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4124 gtid, (iter_number << 5) + shft)); 4125 } 4126 4127 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4128 kmp_int32 shft, num_dims, i; 4129 kmp_uint32 flag; 4130 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4131 kmp_info_t *th = __kmp_threads[gtid]; 4132 kmp_team_t *team = th->th.th_team; 4133 kmp_disp_t *pr_buf; 4134 kmp_int64 lo, st; 4135 4136 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4137 if (team->t.t_serialized) { 4138 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4139 return; // no dependencies if team is serialized 4140 } 4141 4142 // calculate sequential iteration number (same as in "wait" but no 4143 // out-of-bounds checks) 4144 pr_buf = th->th.th_dispatch; 4145 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4146 num_dims = pr_buf->th_doacross_info[0]; 4147 lo = pr_buf->th_doacross_info[2]; 4148 st = pr_buf->th_doacross_info[4]; 4149 if (st == 1) { // most common case 4150 iter_number = vec[0] - lo; 4151 } else if (st > 0) { 4152 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4153 } else { // negative increment 4154 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4155 } 4156 for (i = 1; i < num_dims; ++i) { 4157 kmp_int64 iter, ln; 4158 kmp_int32 j = i * 4; 4159 ln = pr_buf->th_doacross_info[j + 1]; 4160 lo = pr_buf->th_doacross_info[j + 2]; 4161 st = pr_buf->th_doacross_info[j + 4]; 4162 if (st == 1) { 4163 iter = vec[i] - lo; 4164 } else if (st > 0) { 4165 iter = (kmp_uint64)(vec[i] - lo) / st; 4166 } else { // st < 0 4167 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4168 } 4169 iter_number = iter + ln * iter_number; 4170 } 4171 shft = iter_number % 32; // use 32-bit granularity 4172 iter_number >>= 5; // divided by 32 4173 flag = 1 << shft; 4174 KMP_MB(); 4175 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4176 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4177 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4178 (iter_number << 5) + shft)); 4179 } 4180 4181 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4182 kmp_int32 num_done; 4183 kmp_info_t *th = __kmp_threads[gtid]; 4184 kmp_team_t *team = th->th.th_team; 4185 kmp_disp_t *pr_buf = th->th.th_dispatch; 4186 4187 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4188 if (team->t.t_serialized) { 4189 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4190 return; // nothing to do 4191 } 4192 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4193 if (num_done == th->th.th_team_nproc) { 4194 // we are the last thread, need to free shared resources 4195 int idx = pr_buf->th_doacross_buf_idx - 1; 4196 dispatch_shared_info_t *sh_buf = 4197 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4198 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4199 (kmp_int64)&sh_buf->doacross_num_done); 4200 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4201 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4202 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4203 sh_buf->doacross_flags = NULL; 4204 sh_buf->doacross_num_done = 0; 4205 sh_buf->doacross_buf_idx += 4206 __kmp_dispatch_num_buffers; // free buffer for future re-use 4207 } 4208 // free private resources (need to keep buffer index forever) 4209 pr_buf->th_doacross_flags = NULL; 4210 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4211 pr_buf->th_doacross_info = NULL; 4212 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4213 } 4214 #endif 4215 4216 #if OMP_50_ENABLED 4217 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4218 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4219 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4220 } 4221 4222 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4223 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4224 } 4225 4226 int __kmpc_get_target_offload(void) { 4227 if (!__kmp_init_serial) { 4228 __kmp_serial_initialize(); 4229 } 4230 return __kmp_target_offload; 4231 } 4232 4233 int __kmpc_pause_resource(kmp_pause_status_t level) { 4234 if (!__kmp_init_serial) { 4235 return 1; // Can't pause if runtime is not initialized 4236 } 4237 return __kmp_pause_resource(level); 4238 } 4239 #endif // OMP_50_ENABLED 4240 4241 // end of file // 4242