1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 #if OMP_40_ENABLED 248 249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 250 kmp_int32 proc_bind) { 251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 252 proc_bind)); 253 254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 255 } 256 257 #endif /* OMP_40_ENABLED */ 258 259 /*! 260 @ingroup PARALLEL 261 @param loc source location information 262 @param argc total number of arguments in the ellipsis 263 @param microtask pointer to callback routine consisting of outlined parallel 264 construct 265 @param ... pointers to shared variables that aren't global 266 267 Do the actual fork and call the microtask in the relevant number of threads. 268 */ 269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 270 int gtid = __kmp_entry_gtid(); 271 272 #if (KMP_STATS_ENABLED) 273 // If we were in a serial region, then stop the serial timer, record 274 // the event, and start parallel region timer 275 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 276 if (previous_state == stats_state_e::SERIAL_REGION) { 277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 278 } else { 279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 280 } 281 int inParallel = __kmpc_in_parallel(loc); 282 if (inParallel) { 283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 284 } else { 285 KMP_COUNT_BLOCK(OMP_PARALLEL); 286 } 287 #endif 288 289 // maybe to save thr_state is enough here 290 { 291 va_list ap; 292 va_start(ap, microtask); 293 294 #if OMPT_SUPPORT 295 ompt_frame_t *ompt_frame; 296 if (ompt_enabled.enabled) { 297 kmp_info_t *master_th = __kmp_threads[gtid]; 298 kmp_team_t *parent_team = master_th->th.th_team; 299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 300 if (lwt) 301 ompt_frame = &(lwt->ompt_task_info.frame); 302 else { 303 int tid = __kmp_tid_from_gtid(gtid); 304 ompt_frame = &( 305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 306 } 307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 308 OMPT_STORE_RETURN_ADDRESS(gtid); 309 } 310 #endif 311 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_FORKING(); 314 #endif 315 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 320 &ap 321 #else 322 ap 323 #endif 324 ); 325 #if INCLUDE_SSC_MARKS 326 SSC_MARK_JOINING(); 327 #endif 328 __kmp_join_call(loc, gtid 329 #if OMPT_SUPPORT 330 , 331 fork_context_intel 332 #endif 333 ); 334 335 va_end(ap); 336 } 337 338 #if KMP_STATS_ENABLED 339 if (previous_state == stats_state_e::SERIAL_REGION) { 340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 341 } else { 342 KMP_POP_PARTITIONED_TIMER(); 343 } 344 #endif // KMP_STATS_ENABLED 345 } 346 347 #if OMP_40_ENABLED 348 /*! 349 @ingroup PARALLEL 350 @param loc source location information 351 @param global_tid global thread number 352 @param num_teams number of teams requested for the teams construct 353 @param num_threads number of threads per team requested for the teams construct 354 355 Set the number of teams to be used by the teams construct. 356 This call is only required if the teams construct has a `num_teams` clause 357 or a `thread_limit` clause (or both). 358 */ 359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 360 kmp_int32 num_teams, kmp_int32 num_threads) { 361 KA_TRACE(20, 362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 363 global_tid, num_teams, num_threads)); 364 365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 366 } 367 368 /*! 369 @ingroup PARALLEL 370 @param loc source location information 371 @param argc total number of arguments in the ellipsis 372 @param microtask pointer to callback routine consisting of outlined teams 373 construct 374 @param ... pointers to shared variables that aren't global 375 376 Do the actual fork and call the microtask in the relevant number of threads. 377 */ 378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 379 ...) { 380 int gtid = __kmp_entry_gtid(); 381 kmp_info_t *this_thr = __kmp_threads[gtid]; 382 va_list ap; 383 va_start(ap, microtask); 384 385 KMP_COUNT_BLOCK(OMP_TEAMS); 386 387 // remember teams entry point and nesting level 388 this_thr->th.th_teams_microtask = microtask; 389 this_thr->th.th_teams_level = 390 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 391 392 #if OMPT_SUPPORT 393 kmp_team_t *parent_team = this_thr->th.th_team; 394 int tid = __kmp_tid_from_gtid(gtid); 395 if (ompt_enabled.enabled) { 396 parent_team->t.t_implicit_task_taskdata[tid] 397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 398 } 399 OMPT_STORE_RETURN_ADDRESS(gtid); 400 #endif 401 402 // check if __kmpc_push_num_teams called, set default number of teams 403 // otherwise 404 if (this_thr->th.th_teams_size.nteams == 0) { 405 __kmp_push_num_teams(loc, gtid, 0, 0); 406 } 407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 410 411 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 412 VOLATILE_CAST(microtask_t) 413 __kmp_teams_master, // "wrapped" task 414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 415 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 416 &ap 417 #else 418 ap 419 #endif 420 ); 421 __kmp_join_call(loc, gtid 422 #if OMPT_SUPPORT 423 , 424 fork_context_intel 425 #endif 426 ); 427 428 // Pop current CG root off list 429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 431 this_thr->th.th_cg_roots = tmp->up; 432 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 433 " to node %p. cg_nthreads was %d\n", 434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 435 __kmp_free(tmp); 436 // Restore current task's thread_limit from CG root 437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 438 this_thr->th.th_current_task->td_icvs.thread_limit = 439 this_thr->th.th_cg_roots->cg_thread_limit; 440 441 this_thr->th.th_teams_microtask = NULL; 442 this_thr->th.th_teams_level = 0; 443 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 444 va_end(ap); 445 } 446 #endif /* OMP_40_ENABLED */ 447 448 // I don't think this function should ever have been exported. 449 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 450 // openmp code ever called it, but it's been exported from the RTL for so 451 // long that I'm afraid to remove the definition. 452 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 453 454 /*! 455 @ingroup PARALLEL 456 @param loc source location information 457 @param global_tid global thread number 458 459 Enter a serialized parallel construct. This interface is used to handle a 460 conditional parallel region, like this, 461 @code 462 #pragma omp parallel if (condition) 463 @endcode 464 when the condition is false. 465 */ 466 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 467 // The implementation is now in kmp_runtime.cpp so that it can share static 468 // functions with kmp_fork_call since the tasks to be done are similar in 469 // each case. 470 #if OMPT_SUPPORT 471 OMPT_STORE_RETURN_ADDRESS(global_tid); 472 #endif 473 __kmp_serialized_parallel(loc, global_tid); 474 } 475 476 /*! 477 @ingroup PARALLEL 478 @param loc source location information 479 @param global_tid global thread number 480 481 Leave a serialized parallel construct. 482 */ 483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 484 kmp_internal_control_t *top; 485 kmp_info_t *this_thr; 486 kmp_team_t *serial_team; 487 488 KC_TRACE(10, 489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 490 491 /* skip all this code for autopar serialized loops since it results in 492 unacceptable overhead */ 493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 494 return; 495 496 // Not autopar code 497 if (!TCR_4(__kmp_init_parallel)) 498 __kmp_parallel_initialize(); 499 500 #if OMP_50_ENABLED 501 __kmp_resume_if_soft_paused(); 502 #endif 503 504 this_thr = __kmp_threads[global_tid]; 505 serial_team = this_thr->th.th_serial_team; 506 507 #if OMP_45_ENABLED 508 kmp_task_team_t *task_team = this_thr->th.th_task_team; 509 510 // we need to wait for the proxy tasks before finishing the thread 511 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 512 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 513 #endif 514 515 KMP_MB(); 516 KMP_DEBUG_ASSERT(serial_team); 517 KMP_ASSERT(serial_team->t.t_serialized); 518 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 519 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 520 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 521 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 522 523 #if OMPT_SUPPORT 524 if (ompt_enabled.enabled && 525 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 526 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 527 if (ompt_enabled.ompt_callback_implicit_task) { 528 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 529 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 530 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 531 } 532 533 // reset clear the task id only after unlinking the task 534 ompt_data_t *parent_task_data; 535 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 536 537 if (ompt_enabled.ompt_callback_parallel_end) { 538 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 539 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 540 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 541 } 542 __ompt_lw_taskteam_unlink(this_thr); 543 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 544 } 545 #endif 546 547 /* If necessary, pop the internal control stack values and replace the team 548 * values */ 549 top = serial_team->t.t_control_stack_top; 550 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 551 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 552 serial_team->t.t_control_stack_top = top->next; 553 __kmp_free(top); 554 } 555 556 // if( serial_team -> t.t_serialized > 1 ) 557 serial_team->t.t_level--; 558 559 /* pop dispatch buffers stack */ 560 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 561 { 562 dispatch_private_info_t *disp_buffer = 563 serial_team->t.t_dispatch->th_disp_buffer; 564 serial_team->t.t_dispatch->th_disp_buffer = 565 serial_team->t.t_dispatch->th_disp_buffer->next; 566 __kmp_free(disp_buffer); 567 } 568 #if OMP_50_ENABLED 569 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 570 #endif 571 572 --serial_team->t.t_serialized; 573 if (serial_team->t.t_serialized == 0) { 574 575 /* return to the parallel section */ 576 577 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 578 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 579 __kmp_clear_x87_fpu_status_word(); 580 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 581 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 582 } 583 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 584 585 this_thr->th.th_team = serial_team->t.t_parent; 586 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 587 588 /* restore values cached in the thread */ 589 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 590 this_thr->th.th_team_master = 591 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 592 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 593 594 /* TODO the below shouldn't need to be adjusted for serialized teams */ 595 this_thr->th.th_dispatch = 596 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 597 598 __kmp_pop_current_task_from_thread(this_thr); 599 600 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 601 this_thr->th.th_current_task->td_flags.executing = 1; 602 603 if (__kmp_tasking_mode != tskm_immediate_exec) { 604 // Copy the task team from the new child / old parent team to the thread. 605 this_thr->th.th_task_team = 606 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 607 KA_TRACE(20, 608 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 609 "team %p\n", 610 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 611 } 612 } else { 613 if (__kmp_tasking_mode != tskm_immediate_exec) { 614 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 615 "depth of serial team %p to %d\n", 616 global_tid, serial_team, serial_team->t.t_serialized)); 617 } 618 } 619 620 if (__kmp_env_consistency_check) 621 __kmp_pop_parallel(global_tid, NULL); 622 #if OMPT_SUPPORT 623 if (ompt_enabled.enabled) 624 this_thr->th.ompt_thread_info.state = 625 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 626 : ompt_state_work_parallel); 627 #endif 628 } 629 630 /*! 631 @ingroup SYNCHRONIZATION 632 @param loc source location information. 633 634 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 635 depending on the memory ordering convention obeyed by the compiler 636 even that may not be necessary). 637 */ 638 void __kmpc_flush(ident_t *loc) { 639 KC_TRACE(10, ("__kmpc_flush: called\n")); 640 641 /* need explicit __mf() here since use volatile instead in library */ 642 KMP_MB(); /* Flush all pending memory write invalidates. */ 643 644 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 645 #if KMP_MIC 646 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 647 // We shouldn't need it, though, since the ABI rules require that 648 // * If the compiler generates NGO stores it also generates the fence 649 // * If users hand-code NGO stores they should insert the fence 650 // therefore no incomplete unordered stores should be visible. 651 #else 652 // C74404 653 // This is to address non-temporal store instructions (sfence needed). 654 // The clflush instruction is addressed either (mfence needed). 655 // Probably the non-temporal load monvtdqa instruction should also be 656 // addressed. 657 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 658 if (!__kmp_cpuinfo.initialized) { 659 __kmp_query_cpuid(&__kmp_cpuinfo); 660 } 661 if (!__kmp_cpuinfo.sse2) { 662 // CPU cannot execute SSE2 instructions. 663 } else { 664 #if KMP_COMPILER_ICC 665 _mm_mfence(); 666 #elif KMP_COMPILER_MSVC 667 MemoryBarrier(); 668 #else 669 __sync_synchronize(); 670 #endif // KMP_COMPILER_ICC 671 } 672 #endif // KMP_MIC 673 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 674 // Nothing to see here move along 675 #elif KMP_ARCH_PPC64 676 // Nothing needed here (we have a real MB above). 677 #if KMP_OS_CNK 678 // The flushing thread needs to yield here; this prevents a 679 // busy-waiting thread from saturating the pipeline. flush is 680 // often used in loops like this: 681 // while (!flag) { 682 // #pragma omp flush(flag) 683 // } 684 // and adding the yield here is good for at least a 10x speedup 685 // when running >2 threads per core (on the NAS LU benchmark). 686 __kmp_yield(TRUE); 687 #endif 688 #else 689 #error Unknown or unsupported architecture 690 #endif 691 692 #if OMPT_SUPPORT && OMPT_OPTIONAL 693 if (ompt_enabled.ompt_callback_flush) { 694 ompt_callbacks.ompt_callback(ompt_callback_flush)( 695 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 696 } 697 #endif 698 } 699 700 /* -------------------------------------------------------------------------- */ 701 /*! 702 @ingroup SYNCHRONIZATION 703 @param loc source location information 704 @param global_tid thread id. 705 706 Execute a barrier. 707 */ 708 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 709 KMP_COUNT_BLOCK(OMP_BARRIER); 710 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 711 712 if (!TCR_4(__kmp_init_parallel)) 713 __kmp_parallel_initialize(); 714 715 #if OMP_50_ENABLED 716 __kmp_resume_if_soft_paused(); 717 #endif 718 719 if (__kmp_env_consistency_check) { 720 if (loc == 0) { 721 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 722 } 723 724 __kmp_check_barrier(global_tid, ct_barrier, loc); 725 } 726 727 #if OMPT_SUPPORT 728 ompt_frame_t *ompt_frame; 729 if (ompt_enabled.enabled) { 730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 731 if (ompt_frame->enter_frame.ptr == NULL) 732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 733 OMPT_STORE_RETURN_ADDRESS(global_tid); 734 } 735 #endif 736 __kmp_threads[global_tid]->th.th_ident = loc; 737 // TODO: explicit barrier_wait_id: 738 // this function is called when 'barrier' directive is present or 739 // implicit barrier at the end of a worksharing construct. 740 // 1) better to add a per-thread barrier counter to a thread data structure 741 // 2) set to 0 when a new team is created 742 // 4) no sync is required 743 744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 745 #if OMPT_SUPPORT && OMPT_OPTIONAL 746 if (ompt_enabled.enabled) { 747 ompt_frame->enter_frame = ompt_data_none; 748 } 749 #endif 750 } 751 752 /* The BARRIER for a MASTER section is always explicit */ 753 /*! 754 @ingroup WORK_SHARING 755 @param loc source location information. 756 @param global_tid global thread number . 757 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 758 */ 759 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 760 int status = 0; 761 762 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 763 764 if (!TCR_4(__kmp_init_parallel)) 765 __kmp_parallel_initialize(); 766 767 #if OMP_50_ENABLED 768 __kmp_resume_if_soft_paused(); 769 #endif 770 771 if (KMP_MASTER_GTID(global_tid)) { 772 KMP_COUNT_BLOCK(OMP_MASTER); 773 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 774 status = 1; 775 } 776 777 #if OMPT_SUPPORT && OMPT_OPTIONAL 778 if (status) { 779 if (ompt_enabled.ompt_callback_master) { 780 kmp_info_t *this_thr = __kmp_threads[global_tid]; 781 kmp_team_t *team = this_thr->th.th_team; 782 783 int tid = __kmp_tid_from_gtid(global_tid); 784 ompt_callbacks.ompt_callback(ompt_callback_master)( 785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 787 OMPT_GET_RETURN_ADDRESS(0)); 788 } 789 } 790 #endif 791 792 if (__kmp_env_consistency_check) { 793 #if KMP_USE_DYNAMIC_LOCK 794 if (status) 795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 796 else 797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 798 #else 799 if (status) 800 __kmp_push_sync(global_tid, ct_master, loc, NULL); 801 else 802 __kmp_check_sync(global_tid, ct_master, loc, NULL); 803 #endif 804 } 805 806 return status; 807 } 808 809 /*! 810 @ingroup WORK_SHARING 811 @param loc source location information. 812 @param global_tid global thread number . 813 814 Mark the end of a <tt>master</tt> region. This should only be called by the 815 thread that executes the <tt>master</tt> region. 816 */ 817 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 818 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 819 820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 821 KMP_POP_PARTITIONED_TIMER(); 822 823 #if OMPT_SUPPORT && OMPT_OPTIONAL 824 kmp_info_t *this_thr = __kmp_threads[global_tid]; 825 kmp_team_t *team = this_thr->th.th_team; 826 if (ompt_enabled.ompt_callback_master) { 827 int tid = __kmp_tid_from_gtid(global_tid); 828 ompt_callbacks.ompt_callback(ompt_callback_master)( 829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 831 OMPT_GET_RETURN_ADDRESS(0)); 832 } 833 #endif 834 835 if (__kmp_env_consistency_check) { 836 if (global_tid < 0) 837 KMP_WARNING(ThreadIdentInvalid); 838 839 if (KMP_MASTER_GTID(global_tid)) 840 __kmp_pop_sync(global_tid, ct_master, loc); 841 } 842 } 843 844 /*! 845 @ingroup WORK_SHARING 846 @param loc source location information. 847 @param gtid global thread number. 848 849 Start execution of an <tt>ordered</tt> construct. 850 */ 851 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 852 int cid = 0; 853 kmp_info_t *th; 854 KMP_DEBUG_ASSERT(__kmp_init_serial); 855 856 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 857 858 if (!TCR_4(__kmp_init_parallel)) 859 __kmp_parallel_initialize(); 860 861 #if OMP_50_ENABLED 862 __kmp_resume_if_soft_paused(); 863 #endif 864 865 #if USE_ITT_BUILD 866 __kmp_itt_ordered_prep(gtid); 867 // TODO: ordered_wait_id 868 #endif /* USE_ITT_BUILD */ 869 870 th = __kmp_threads[gtid]; 871 872 #if OMPT_SUPPORT && OMPT_OPTIONAL 873 kmp_team_t *team; 874 ompt_wait_id_t lck; 875 void *codeptr_ra; 876 if (ompt_enabled.enabled) { 877 OMPT_STORE_RETURN_ADDRESS(gtid); 878 team = __kmp_team_from_gtid(gtid); 879 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; 880 /* OMPT state update */ 881 th->th.ompt_thread_info.wait_id = lck; 882 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 883 884 /* OMPT event callback */ 885 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 886 if (ompt_enabled.ompt_callback_mutex_acquire) { 887 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 888 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, 889 (ompt_wait_id_t)lck, codeptr_ra); 890 } 891 } 892 #endif 893 894 if (th->th.th_dispatch->th_deo_fcn != 0) 895 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 896 else 897 __kmp_parallel_deo(>id, &cid, loc); 898 899 #if OMPT_SUPPORT && OMPT_OPTIONAL 900 if (ompt_enabled.enabled) { 901 /* OMPT state update */ 902 th->th.ompt_thread_info.state = ompt_state_work_parallel; 903 th->th.ompt_thread_info.wait_id = 0; 904 905 /* OMPT event callback */ 906 if (ompt_enabled.ompt_callback_mutex_acquired) { 907 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 908 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); 909 } 910 } 911 #endif 912 913 #if USE_ITT_BUILD 914 __kmp_itt_ordered_start(gtid); 915 #endif /* USE_ITT_BUILD */ 916 } 917 918 /*! 919 @ingroup WORK_SHARING 920 @param loc source location information. 921 @param gtid global thread number. 922 923 End execution of an <tt>ordered</tt> construct. 924 */ 925 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 926 int cid = 0; 927 kmp_info_t *th; 928 929 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 930 931 #if USE_ITT_BUILD 932 __kmp_itt_ordered_end(gtid); 933 // TODO: ordered_wait_id 934 #endif /* USE_ITT_BUILD */ 935 936 th = __kmp_threads[gtid]; 937 938 if (th->th.th_dispatch->th_dxo_fcn != 0) 939 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 940 else 941 __kmp_parallel_dxo(>id, &cid, loc); 942 943 #if OMPT_SUPPORT && OMPT_OPTIONAL 944 OMPT_STORE_RETURN_ADDRESS(gtid); 945 if (ompt_enabled.ompt_callback_mutex_released) { 946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 947 ompt_mutex_ordered, 948 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, 949 OMPT_LOAD_RETURN_ADDRESS(gtid)); 950 } 951 #endif 952 } 953 954 #if KMP_USE_DYNAMIC_LOCK 955 956 static __forceinline void 957 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 958 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 959 // Pointer to the allocated indirect lock is written to crit, while indexing 960 // is ignored. 961 void *idx; 962 kmp_indirect_lock_t **lck; 963 lck = (kmp_indirect_lock_t **)crit; 964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 966 KMP_SET_I_LOCK_LOCATION(ilk, loc); 967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 968 KA_TRACE(20, 969 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 970 #if USE_ITT_BUILD 971 __kmp_itt_critical_creating(ilk->lock, loc); 972 #endif 973 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 974 if (status == 0) { 975 #if USE_ITT_BUILD 976 __kmp_itt_critical_destroyed(ilk->lock); 977 #endif 978 // We don't really need to destroy the unclaimed lock here since it will be 979 // cleaned up at program exit. 980 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 981 } 982 KMP_DEBUG_ASSERT(*lck != NULL); 983 } 984 985 // Fast-path acquire tas lock 986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 987 { \ 988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 993 kmp_uint32 spins; \ 994 KMP_FSYNC_PREPARE(l); \ 995 KMP_INIT_YIELD(spins); \ 996 if (TCR_4(__kmp_nth) > \ 997 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 998 KMP_YIELD(TRUE); \ 999 } else { \ 1000 KMP_YIELD_SPIN(spins); \ 1001 } \ 1002 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1003 while ( \ 1004 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1005 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1006 __kmp_spin_backoff(&backoff); \ 1007 if (TCR_4(__kmp_nth) > \ 1008 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1009 KMP_YIELD(TRUE); \ 1010 } else { \ 1011 KMP_YIELD_SPIN(spins); \ 1012 } \ 1013 } \ 1014 } \ 1015 KMP_FSYNC_ACQUIRED(l); \ 1016 } 1017 1018 // Fast-path test tas lock 1019 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1020 { \ 1021 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1022 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1023 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1024 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1025 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1026 } 1027 1028 // Fast-path release tas lock 1029 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1030 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1031 1032 #if KMP_USE_FUTEX 1033 1034 #include <sys/syscall.h> 1035 #include <unistd.h> 1036 #ifndef FUTEX_WAIT 1037 #define FUTEX_WAIT 0 1038 #endif 1039 #ifndef FUTEX_WAKE 1040 #define FUTEX_WAKE 1 1041 #endif 1042 1043 // Fast-path acquire futex lock 1044 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1045 { \ 1046 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1047 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1048 KMP_MB(); \ 1049 KMP_FSYNC_PREPARE(ftx); \ 1050 kmp_int32 poll_val; \ 1051 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1052 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1053 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1054 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1055 if (!cond) { \ 1056 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1057 poll_val | \ 1058 KMP_LOCK_BUSY(1, futex))) { \ 1059 continue; \ 1060 } \ 1061 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1062 } \ 1063 kmp_int32 rc; \ 1064 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1065 NULL, NULL, 0)) != 0) { \ 1066 continue; \ 1067 } \ 1068 gtid_code |= 1; \ 1069 } \ 1070 KMP_FSYNC_ACQUIRED(ftx); \ 1071 } 1072 1073 // Fast-path test futex lock 1074 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1075 { \ 1076 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1077 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1078 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1079 KMP_FSYNC_ACQUIRED(ftx); \ 1080 rc = TRUE; \ 1081 } else { \ 1082 rc = FALSE; \ 1083 } \ 1084 } 1085 1086 // Fast-path release futex lock 1087 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1088 { \ 1089 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1090 KMP_MB(); \ 1091 KMP_FSYNC_RELEASING(ftx); \ 1092 kmp_int32 poll_val = \ 1093 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1094 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1095 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1096 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1097 } \ 1098 KMP_MB(); \ 1099 KMP_YIELD(TCR_4(__kmp_nth) > \ 1100 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ 1101 } 1102 1103 #endif // KMP_USE_FUTEX 1104 1105 #else // KMP_USE_DYNAMIC_LOCK 1106 1107 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1108 ident_t const *loc, 1109 kmp_int32 gtid) { 1110 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1111 1112 // Because of the double-check, the following load doesn't need to be volatile 1113 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1114 1115 if (lck == NULL) { 1116 void *idx; 1117 1118 // Allocate & initialize the lock. 1119 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1120 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1121 __kmp_init_user_lock_with_checks(lck); 1122 __kmp_set_user_lock_location(lck, loc); 1123 #if USE_ITT_BUILD 1124 __kmp_itt_critical_creating(lck); 1125 // __kmp_itt_critical_creating() should be called *before* the first usage 1126 // of underlying lock. It is the only place where we can guarantee it. There 1127 // are chances the lock will destroyed with no usage, but it is not a 1128 // problem, because this is not real event seen by user but rather setting 1129 // name for object (lock). See more details in kmp_itt.h. 1130 #endif /* USE_ITT_BUILD */ 1131 1132 // Use a cmpxchg instruction to slam the start of the critical section with 1133 // the lock pointer. If another thread beat us to it, deallocate the lock, 1134 // and use the lock that the other thread allocated. 1135 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1136 1137 if (status == 0) { 1138 // Deallocate the lock and reload the value. 1139 #if USE_ITT_BUILD 1140 __kmp_itt_critical_destroyed(lck); 1141 // Let ITT know the lock is destroyed and the same memory location may be reused 1142 // for another purpose. 1143 #endif /* USE_ITT_BUILD */ 1144 __kmp_destroy_user_lock_with_checks(lck); 1145 __kmp_user_lock_free(&idx, gtid, lck); 1146 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1147 KMP_DEBUG_ASSERT(lck != NULL); 1148 } 1149 } 1150 return lck; 1151 } 1152 1153 #endif // KMP_USE_DYNAMIC_LOCK 1154 1155 /*! 1156 @ingroup WORK_SHARING 1157 @param loc source location information. 1158 @param global_tid global thread number . 1159 @param crit identity of the critical section. This could be a pointer to a lock 1160 associated with the critical section, or some other suitably unique value. 1161 1162 Enter code protected by a `critical` construct. 1163 This function blocks until the executing thread can enter the critical section. 1164 */ 1165 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1166 kmp_critical_name *crit) { 1167 #if KMP_USE_DYNAMIC_LOCK 1168 #if OMPT_SUPPORT && OMPT_OPTIONAL 1169 OMPT_STORE_RETURN_ADDRESS(global_tid); 1170 #endif // OMPT_SUPPORT 1171 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1172 #else 1173 KMP_COUNT_BLOCK(OMP_CRITICAL); 1174 #if OMPT_SUPPORT && OMPT_OPTIONAL 1175 ompt_state_t prev_state = ompt_state_undefined; 1176 ompt_thread_info_t ti; 1177 #endif 1178 kmp_user_lock_p lck; 1179 1180 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1181 1182 // TODO: add THR_OVHD_STATE 1183 1184 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1185 KMP_CHECK_USER_LOCK_INIT(); 1186 1187 if ((__kmp_user_lock_kind == lk_tas) && 1188 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1189 lck = (kmp_user_lock_p)crit; 1190 } 1191 #if KMP_USE_FUTEX 1192 else if ((__kmp_user_lock_kind == lk_futex) && 1193 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1194 lck = (kmp_user_lock_p)crit; 1195 } 1196 #endif 1197 else { // ticket, queuing or drdpa 1198 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1199 } 1200 1201 if (__kmp_env_consistency_check) 1202 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1203 1204 // since the critical directive binds to all threads, not just the current 1205 // team we have to check this even if we are in a serialized team. 1206 // also, even if we are the uber thread, we still have to conduct the lock, 1207 // as we have to contend with sibling threads. 1208 1209 #if USE_ITT_BUILD 1210 __kmp_itt_critical_acquiring(lck); 1211 #endif /* USE_ITT_BUILD */ 1212 #if OMPT_SUPPORT && OMPT_OPTIONAL 1213 OMPT_STORE_RETURN_ADDRESS(gtid); 1214 void *codeptr_ra = NULL; 1215 if (ompt_enabled.enabled) { 1216 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1217 /* OMPT state update */ 1218 prev_state = ti.state; 1219 ti.wait_id = (ompt_wait_id_t)lck; 1220 ti.state = ompt_state_wait_critical; 1221 1222 /* OMPT event callback */ 1223 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1224 if (ompt_enabled.ompt_callback_mutex_acquire) { 1225 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1226 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1227 (ompt_wait_id_t)crit, codeptr_ra); 1228 } 1229 } 1230 #endif 1231 // Value of 'crit' should be good for using as a critical_id of the critical 1232 // section directive. 1233 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1234 1235 #if USE_ITT_BUILD 1236 __kmp_itt_critical_acquired(lck); 1237 #endif /* USE_ITT_BUILD */ 1238 #if OMPT_SUPPORT && OMPT_OPTIONAL 1239 if (ompt_enabled.enabled) { 1240 /* OMPT state update */ 1241 ti.state = prev_state; 1242 ti.wait_id = 0; 1243 1244 /* OMPT event callback */ 1245 if (ompt_enabled.ompt_callback_mutex_acquired) { 1246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1247 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); 1248 } 1249 } 1250 #endif 1251 KMP_POP_PARTITIONED_TIMER(); 1252 1253 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1254 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1255 #endif // KMP_USE_DYNAMIC_LOCK 1256 } 1257 1258 #if KMP_USE_DYNAMIC_LOCK 1259 1260 // Converts the given hint to an internal lock implementation 1261 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1262 #if KMP_USE_TSX 1263 #define KMP_TSX_LOCK(seq) lockseq_##seq 1264 #else 1265 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1266 #endif 1267 1268 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1269 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1270 #else 1271 #define KMP_CPUINFO_RTM 0 1272 #endif 1273 1274 // Hints that do not require further logic 1275 if (hint & kmp_lock_hint_hle) 1276 return KMP_TSX_LOCK(hle); 1277 if (hint & kmp_lock_hint_rtm) 1278 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1279 if (hint & kmp_lock_hint_adaptive) 1280 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1281 1282 // Rule out conflicting hints first by returning the default lock 1283 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1284 return __kmp_user_lock_seq; 1285 if ((hint & omp_lock_hint_speculative) && 1286 (hint & omp_lock_hint_nonspeculative)) 1287 return __kmp_user_lock_seq; 1288 1289 // Do not even consider speculation when it appears to be contended 1290 if (hint & omp_lock_hint_contended) 1291 return lockseq_queuing; 1292 1293 // Uncontended lock without speculation 1294 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1295 return lockseq_tas; 1296 1297 // HLE lock for speculation 1298 if (hint & omp_lock_hint_speculative) 1299 return KMP_TSX_LOCK(hle); 1300 1301 return __kmp_user_lock_seq; 1302 } 1303 1304 #if OMPT_SUPPORT && OMPT_OPTIONAL 1305 #if KMP_USE_DYNAMIC_LOCK 1306 static kmp_mutex_impl_t 1307 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1308 if (user_lock) { 1309 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1310 case 0: 1311 break; 1312 #if KMP_USE_FUTEX 1313 case locktag_futex: 1314 return kmp_mutex_impl_queuing; 1315 #endif 1316 case locktag_tas: 1317 return kmp_mutex_impl_spin; 1318 #if KMP_USE_TSX 1319 case locktag_hle: 1320 return kmp_mutex_impl_speculative; 1321 #endif 1322 default: 1323 return kmp_mutex_impl_none; 1324 } 1325 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1326 } 1327 KMP_ASSERT(ilock); 1328 switch (ilock->type) { 1329 #if KMP_USE_TSX 1330 case locktag_adaptive: 1331 case locktag_rtm: 1332 return kmp_mutex_impl_speculative; 1333 #endif 1334 case locktag_nested_tas: 1335 return kmp_mutex_impl_spin; 1336 #if KMP_USE_FUTEX 1337 case locktag_nested_futex: 1338 #endif 1339 case locktag_ticket: 1340 case locktag_queuing: 1341 case locktag_drdpa: 1342 case locktag_nested_ticket: 1343 case locktag_nested_queuing: 1344 case locktag_nested_drdpa: 1345 return kmp_mutex_impl_queuing; 1346 default: 1347 return kmp_mutex_impl_none; 1348 } 1349 } 1350 #else 1351 // For locks without dynamic binding 1352 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1353 switch (__kmp_user_lock_kind) { 1354 case lk_tas: 1355 return kmp_mutex_impl_spin; 1356 #if KMP_USE_FUTEX 1357 case lk_futex: 1358 #endif 1359 case lk_ticket: 1360 case lk_queuing: 1361 case lk_drdpa: 1362 return kmp_mutex_impl_queuing; 1363 #if KMP_USE_TSX 1364 case lk_hle: 1365 case lk_rtm: 1366 case lk_adaptive: 1367 return kmp_mutex_impl_speculative; 1368 #endif 1369 default: 1370 return kmp_mutex_impl_none; 1371 } 1372 } 1373 #endif // KMP_USE_DYNAMIC_LOCK 1374 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1375 1376 /*! 1377 @ingroup WORK_SHARING 1378 @param loc source location information. 1379 @param global_tid global thread number. 1380 @param crit identity of the critical section. This could be a pointer to a lock 1381 associated with the critical section, or some other suitably unique value. 1382 @param hint the lock hint. 1383 1384 Enter code protected by a `critical` construct with a hint. The hint value is 1385 used to suggest a lock implementation. This function blocks until the executing 1386 thread can enter the critical section unless the hint suggests use of 1387 speculative execution and the hardware supports it. 1388 */ 1389 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1390 kmp_critical_name *crit, uint32_t hint) { 1391 KMP_COUNT_BLOCK(OMP_CRITICAL); 1392 kmp_user_lock_p lck; 1393 #if OMPT_SUPPORT && OMPT_OPTIONAL 1394 ompt_state_t prev_state = ompt_state_undefined; 1395 ompt_thread_info_t ti; 1396 // This is the case, if called from __kmpc_critical: 1397 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1398 if (!codeptr) 1399 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1400 #endif 1401 1402 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1403 1404 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1405 // Check if it is initialized. 1406 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1407 if (*lk == 0) { 1408 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1409 if (KMP_IS_D_LOCK(lckseq)) { 1410 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1411 KMP_GET_D_TAG(lckseq)); 1412 } else { 1413 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1414 } 1415 } 1416 // Branch for accessing the actual lock object and set operation. This 1417 // branching is inevitable since this lock initialization does not follow the 1418 // normal dispatch path (lock table is not used). 1419 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1420 lck = (kmp_user_lock_p)lk; 1421 if (__kmp_env_consistency_check) { 1422 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1423 __kmp_map_hint_to_lock(hint)); 1424 } 1425 #if USE_ITT_BUILD 1426 __kmp_itt_critical_acquiring(lck); 1427 #endif 1428 #if OMPT_SUPPORT && OMPT_OPTIONAL 1429 if (ompt_enabled.enabled) { 1430 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1431 /* OMPT state update */ 1432 prev_state = ti.state; 1433 ti.wait_id = (ompt_wait_id_t)lck; 1434 ti.state = ompt_state_wait_critical; 1435 1436 /* OMPT event callback */ 1437 if (ompt_enabled.ompt_callback_mutex_acquire) { 1438 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1439 ompt_mutex_critical, (unsigned int)hint, 1440 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); 1441 } 1442 } 1443 #endif 1444 #if KMP_USE_INLINED_TAS 1445 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1446 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1447 } else 1448 #elif KMP_USE_INLINED_FUTEX 1449 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1450 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1451 } else 1452 #endif 1453 { 1454 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1455 } 1456 } else { 1457 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1458 lck = ilk->lock; 1459 if (__kmp_env_consistency_check) { 1460 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1461 __kmp_map_hint_to_lock(hint)); 1462 } 1463 #if USE_ITT_BUILD 1464 __kmp_itt_critical_acquiring(lck); 1465 #endif 1466 #if OMPT_SUPPORT && OMPT_OPTIONAL 1467 if (ompt_enabled.enabled) { 1468 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1469 /* OMPT state update */ 1470 prev_state = ti.state; 1471 ti.wait_id = (ompt_wait_id_t)lck; 1472 ti.state = ompt_state_wait_critical; 1473 1474 /* OMPT event callback */ 1475 if (ompt_enabled.ompt_callback_mutex_acquire) { 1476 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1477 ompt_mutex_critical, (unsigned int)hint, 1478 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); 1479 } 1480 } 1481 #endif 1482 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1483 } 1484 KMP_POP_PARTITIONED_TIMER(); 1485 1486 #if USE_ITT_BUILD 1487 __kmp_itt_critical_acquired(lck); 1488 #endif /* USE_ITT_BUILD */ 1489 #if OMPT_SUPPORT && OMPT_OPTIONAL 1490 if (ompt_enabled.enabled) { 1491 /* OMPT state update */ 1492 ti.state = prev_state; 1493 ti.wait_id = 0; 1494 1495 /* OMPT event callback */ 1496 if (ompt_enabled.ompt_callback_mutex_acquired) { 1497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1498 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); 1499 } 1500 } 1501 #endif 1502 1503 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1504 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1505 } // __kmpc_critical_with_hint 1506 1507 #endif // KMP_USE_DYNAMIC_LOCK 1508 1509 /*! 1510 @ingroup WORK_SHARING 1511 @param loc source location information. 1512 @param global_tid global thread number . 1513 @param crit identity of the critical section. This could be a pointer to a lock 1514 associated with the critical section, or some other suitably unique value. 1515 1516 Leave a critical section, releasing any lock that was held during its execution. 1517 */ 1518 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1519 kmp_critical_name *crit) { 1520 kmp_user_lock_p lck; 1521 1522 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1523 1524 #if KMP_USE_DYNAMIC_LOCK 1525 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1526 lck = (kmp_user_lock_p)crit; 1527 KMP_ASSERT(lck != NULL); 1528 if (__kmp_env_consistency_check) { 1529 __kmp_pop_sync(global_tid, ct_critical, loc); 1530 } 1531 #if USE_ITT_BUILD 1532 __kmp_itt_critical_releasing(lck); 1533 #endif 1534 #if KMP_USE_INLINED_TAS 1535 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1536 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1537 } else 1538 #elif KMP_USE_INLINED_FUTEX 1539 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1540 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1541 } else 1542 #endif 1543 { 1544 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1545 } 1546 } else { 1547 kmp_indirect_lock_t *ilk = 1548 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1549 KMP_ASSERT(ilk != NULL); 1550 lck = ilk->lock; 1551 if (__kmp_env_consistency_check) { 1552 __kmp_pop_sync(global_tid, ct_critical, loc); 1553 } 1554 #if USE_ITT_BUILD 1555 __kmp_itt_critical_releasing(lck); 1556 #endif 1557 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1558 } 1559 1560 #else // KMP_USE_DYNAMIC_LOCK 1561 1562 if ((__kmp_user_lock_kind == lk_tas) && 1563 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1564 lck = (kmp_user_lock_p)crit; 1565 } 1566 #if KMP_USE_FUTEX 1567 else if ((__kmp_user_lock_kind == lk_futex) && 1568 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1569 lck = (kmp_user_lock_p)crit; 1570 } 1571 #endif 1572 else { // ticket, queuing or drdpa 1573 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1574 } 1575 1576 KMP_ASSERT(lck != NULL); 1577 1578 if (__kmp_env_consistency_check) 1579 __kmp_pop_sync(global_tid, ct_critical, loc); 1580 1581 #if USE_ITT_BUILD 1582 __kmp_itt_critical_releasing(lck); 1583 #endif /* USE_ITT_BUILD */ 1584 // Value of 'crit' should be good for using as a critical_id of the critical 1585 // section directive. 1586 __kmp_release_user_lock_with_checks(lck, global_tid); 1587 1588 #endif // KMP_USE_DYNAMIC_LOCK 1589 1590 #if OMPT_SUPPORT && OMPT_OPTIONAL 1591 /* OMPT release event triggers after lock is released; place here to trigger 1592 * for all #if branches */ 1593 OMPT_STORE_RETURN_ADDRESS(global_tid); 1594 if (ompt_enabled.ompt_callback_mutex_released) { 1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1596 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); 1597 } 1598 #endif 1599 1600 KMP_POP_PARTITIONED_TIMER(); 1601 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1602 } 1603 1604 /*! 1605 @ingroup SYNCHRONIZATION 1606 @param loc source location information 1607 @param global_tid thread id. 1608 @return one if the thread should execute the master block, zero otherwise 1609 1610 Start execution of a combined barrier and master. The barrier is executed inside 1611 this function. 1612 */ 1613 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1614 int status; 1615 1616 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1617 1618 if (!TCR_4(__kmp_init_parallel)) 1619 __kmp_parallel_initialize(); 1620 1621 #if OMP_50_ENABLED 1622 __kmp_resume_if_soft_paused(); 1623 #endif 1624 1625 if (__kmp_env_consistency_check) 1626 __kmp_check_barrier(global_tid, ct_barrier, loc); 1627 1628 #if OMPT_SUPPORT 1629 ompt_frame_t *ompt_frame; 1630 if (ompt_enabled.enabled) { 1631 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1632 if (ompt_frame->enter_frame.ptr == NULL) 1633 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1634 OMPT_STORE_RETURN_ADDRESS(global_tid); 1635 } 1636 #endif 1637 #if USE_ITT_NOTIFY 1638 __kmp_threads[global_tid]->th.th_ident = loc; 1639 #endif 1640 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1641 #if OMPT_SUPPORT && OMPT_OPTIONAL 1642 if (ompt_enabled.enabled) { 1643 ompt_frame->enter_frame = ompt_data_none; 1644 } 1645 #endif 1646 1647 return (status != 0) ? 0 : 1; 1648 } 1649 1650 /*! 1651 @ingroup SYNCHRONIZATION 1652 @param loc source location information 1653 @param global_tid thread id. 1654 1655 Complete the execution of a combined barrier and master. This function should 1656 only be called at the completion of the <tt>master</tt> code. Other threads will 1657 still be waiting at the barrier and this call releases them. 1658 */ 1659 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1660 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1661 1662 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1663 } 1664 1665 /*! 1666 @ingroup SYNCHRONIZATION 1667 @param loc source location information 1668 @param global_tid thread id. 1669 @return one if the thread should execute the master block, zero otherwise 1670 1671 Start execution of a combined barrier and master(nowait) construct. 1672 The barrier is executed inside this function. 1673 There is no equivalent "end" function, since the 1674 */ 1675 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1676 kmp_int32 ret; 1677 1678 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1679 1680 if (!TCR_4(__kmp_init_parallel)) 1681 __kmp_parallel_initialize(); 1682 1683 #if OMP_50_ENABLED 1684 __kmp_resume_if_soft_paused(); 1685 #endif 1686 1687 if (__kmp_env_consistency_check) { 1688 if (loc == 0) { 1689 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1690 } 1691 __kmp_check_barrier(global_tid, ct_barrier, loc); 1692 } 1693 1694 #if OMPT_SUPPORT 1695 ompt_frame_t *ompt_frame; 1696 if (ompt_enabled.enabled) { 1697 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1698 if (ompt_frame->enter_frame.ptr == NULL) 1699 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1700 OMPT_STORE_RETURN_ADDRESS(global_tid); 1701 } 1702 #endif 1703 #if USE_ITT_NOTIFY 1704 __kmp_threads[global_tid]->th.th_ident = loc; 1705 #endif 1706 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1707 #if OMPT_SUPPORT && OMPT_OPTIONAL 1708 if (ompt_enabled.enabled) { 1709 ompt_frame->enter_frame = ompt_data_none; 1710 } 1711 #endif 1712 1713 ret = __kmpc_master(loc, global_tid); 1714 1715 if (__kmp_env_consistency_check) { 1716 /* there's no __kmpc_end_master called; so the (stats) */ 1717 /* actions of __kmpc_end_master are done here */ 1718 1719 if (global_tid < 0) { 1720 KMP_WARNING(ThreadIdentInvalid); 1721 } 1722 if (ret) { 1723 /* only one thread should do the pop since only */ 1724 /* one did the push (see __kmpc_master()) */ 1725 1726 __kmp_pop_sync(global_tid, ct_master, loc); 1727 } 1728 } 1729 1730 return (ret); 1731 } 1732 1733 /* The BARRIER for a SINGLE process section is always explicit */ 1734 /*! 1735 @ingroup WORK_SHARING 1736 @param loc source location information 1737 @param global_tid global thread number 1738 @return One if this thread should execute the single construct, zero otherwise. 1739 1740 Test whether to execute a <tt>single</tt> construct. 1741 There are no implicit barriers in the two "single" calls, rather the compiler 1742 should introduce an explicit barrier if it is required. 1743 */ 1744 1745 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1746 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1747 1748 if (rc) { 1749 // We are going to execute the single statement, so we should count it. 1750 KMP_COUNT_BLOCK(OMP_SINGLE); 1751 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1752 } 1753 1754 #if OMPT_SUPPORT && OMPT_OPTIONAL 1755 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1756 kmp_team_t *team = this_thr->th.th_team; 1757 int tid = __kmp_tid_from_gtid(global_tid); 1758 1759 if (ompt_enabled.enabled) { 1760 if (rc) { 1761 if (ompt_enabled.ompt_callback_work) { 1762 ompt_callbacks.ompt_callback(ompt_callback_work)( 1763 ompt_work_single_executor, ompt_scope_begin, 1764 &(team->t.ompt_team_info.parallel_data), 1765 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1766 1, OMPT_GET_RETURN_ADDRESS(0)); 1767 } 1768 } else { 1769 if (ompt_enabled.ompt_callback_work) { 1770 ompt_callbacks.ompt_callback(ompt_callback_work)( 1771 ompt_work_single_other, ompt_scope_begin, 1772 &(team->t.ompt_team_info.parallel_data), 1773 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1774 1, OMPT_GET_RETURN_ADDRESS(0)); 1775 ompt_callbacks.ompt_callback(ompt_callback_work)( 1776 ompt_work_single_other, ompt_scope_end, 1777 &(team->t.ompt_team_info.parallel_data), 1778 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1779 1, OMPT_GET_RETURN_ADDRESS(0)); 1780 } 1781 } 1782 } 1783 #endif 1784 1785 return rc; 1786 } 1787 1788 /*! 1789 @ingroup WORK_SHARING 1790 @param loc source location information 1791 @param global_tid global thread number 1792 1793 Mark the end of a <tt>single</tt> construct. This function should 1794 only be called by the thread that executed the block of code protected 1795 by the `single` construct. 1796 */ 1797 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1798 __kmp_exit_single(global_tid); 1799 KMP_POP_PARTITIONED_TIMER(); 1800 1801 #if OMPT_SUPPORT && OMPT_OPTIONAL 1802 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1803 kmp_team_t *team = this_thr->th.th_team; 1804 int tid = __kmp_tid_from_gtid(global_tid); 1805 1806 if (ompt_enabled.ompt_callback_work) { 1807 ompt_callbacks.ompt_callback(ompt_callback_work)( 1808 ompt_work_single_executor, ompt_scope_end, 1809 &(team->t.ompt_team_info.parallel_data), 1810 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1811 OMPT_GET_RETURN_ADDRESS(0)); 1812 } 1813 #endif 1814 } 1815 1816 /*! 1817 @ingroup WORK_SHARING 1818 @param loc Source location 1819 @param global_tid Global thread id 1820 1821 Mark the end of a statically scheduled loop. 1822 */ 1823 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1824 KMP_POP_PARTITIONED_TIMER(); 1825 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1826 1827 #if OMPT_SUPPORT && OMPT_OPTIONAL 1828 if (ompt_enabled.ompt_callback_work) { 1829 ompt_work_t ompt_work_type = ompt_work_loop; 1830 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1831 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1832 // Determine workshare type 1833 if (loc != NULL) { 1834 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1835 ompt_work_type = ompt_work_loop; 1836 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1837 ompt_work_type = ompt_work_sections; 1838 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1839 ompt_work_type = ompt_work_distribute; 1840 } else { 1841 // use default set above. 1842 // a warning about this case is provided in __kmpc_for_static_init 1843 } 1844 KMP_DEBUG_ASSERT(ompt_work_type); 1845 } 1846 ompt_callbacks.ompt_callback(ompt_callback_work)( 1847 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1848 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1849 } 1850 #endif 1851 if (__kmp_env_consistency_check) 1852 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1853 } 1854 1855 // User routines which take C-style arguments (call by value) 1856 // different from the Fortran equivalent routines 1857 1858 void ompc_set_num_threads(int arg) { 1859 // !!!!! TODO: check the per-task binding 1860 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1861 } 1862 1863 void ompc_set_dynamic(int flag) { 1864 kmp_info_t *thread; 1865 1866 /* For the thread-private implementation of the internal controls */ 1867 thread = __kmp_entry_thread(); 1868 1869 __kmp_save_internal_controls(thread); 1870 1871 set__dynamic(thread, flag ? TRUE : FALSE); 1872 } 1873 1874 void ompc_set_nested(int flag) { 1875 kmp_info_t *thread; 1876 1877 /* For the thread-private internal controls implementation */ 1878 thread = __kmp_entry_thread(); 1879 1880 __kmp_save_internal_controls(thread); 1881 1882 set__nested(thread, flag ? TRUE : FALSE); 1883 } 1884 1885 void ompc_set_max_active_levels(int max_active_levels) { 1886 /* TO DO */ 1887 /* we want per-task implementation of this internal control */ 1888 1889 /* For the per-thread internal controls implementation */ 1890 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1891 } 1892 1893 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1894 // !!!!! TODO: check the per-task binding 1895 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1896 } 1897 1898 int ompc_get_ancestor_thread_num(int level) { 1899 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1900 } 1901 1902 int ompc_get_team_size(int level) { 1903 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1904 } 1905 1906 #if OMP_50_ENABLED 1907 /* OpenMP 5.0 Affinity Format API */ 1908 1909 void ompc_set_affinity_format(char const *format) { 1910 if (!__kmp_init_serial) { 1911 __kmp_serial_initialize(); 1912 } 1913 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1914 format, KMP_STRLEN(format) + 1); 1915 } 1916 1917 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1918 size_t format_size; 1919 if (!__kmp_init_serial) { 1920 __kmp_serial_initialize(); 1921 } 1922 format_size = KMP_STRLEN(__kmp_affinity_format); 1923 if (buffer && size) { 1924 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1925 format_size + 1); 1926 } 1927 return format_size; 1928 } 1929 1930 void ompc_display_affinity(char const *format) { 1931 int gtid; 1932 if (!TCR_4(__kmp_init_middle)) { 1933 __kmp_middle_initialize(); 1934 } 1935 gtid = __kmp_get_gtid(); 1936 __kmp_aux_display_affinity(gtid, format); 1937 } 1938 1939 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1940 char const *format) { 1941 int gtid; 1942 size_t num_required; 1943 kmp_str_buf_t capture_buf; 1944 if (!TCR_4(__kmp_init_middle)) { 1945 __kmp_middle_initialize(); 1946 } 1947 gtid = __kmp_get_gtid(); 1948 __kmp_str_buf_init(&capture_buf); 1949 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1950 if (buffer && buf_size) { 1951 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1952 capture_buf.used + 1); 1953 } 1954 __kmp_str_buf_free(&capture_buf); 1955 return num_required; 1956 } 1957 #endif /* OMP_50_ENABLED */ 1958 1959 void kmpc_set_stacksize(int arg) { 1960 // __kmp_aux_set_stacksize initializes the library if needed 1961 __kmp_aux_set_stacksize(arg); 1962 } 1963 1964 void kmpc_set_stacksize_s(size_t arg) { 1965 // __kmp_aux_set_stacksize initializes the library if needed 1966 __kmp_aux_set_stacksize(arg); 1967 } 1968 1969 void kmpc_set_blocktime(int arg) { 1970 int gtid, tid; 1971 kmp_info_t *thread; 1972 1973 gtid = __kmp_entry_gtid(); 1974 tid = __kmp_tid_from_gtid(gtid); 1975 thread = __kmp_thread_from_gtid(gtid); 1976 1977 __kmp_aux_set_blocktime(arg, thread, tid); 1978 } 1979 1980 void kmpc_set_library(int arg) { 1981 // __kmp_user_set_library initializes the library if needed 1982 __kmp_user_set_library((enum library_type)arg); 1983 } 1984 1985 void kmpc_set_defaults(char const *str) { 1986 // __kmp_aux_set_defaults initializes the library if needed 1987 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1988 } 1989 1990 void kmpc_set_disp_num_buffers(int arg) { 1991 // ignore after initialization because some teams have already 1992 // allocated dispatch buffers 1993 if (__kmp_init_serial == 0 && arg > 0) 1994 __kmp_dispatch_num_buffers = arg; 1995 } 1996 1997 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1998 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1999 return -1; 2000 #else 2001 if (!TCR_4(__kmp_init_middle)) { 2002 __kmp_middle_initialize(); 2003 } 2004 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2005 #endif 2006 } 2007 2008 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2009 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2010 return -1; 2011 #else 2012 if (!TCR_4(__kmp_init_middle)) { 2013 __kmp_middle_initialize(); 2014 } 2015 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2016 #endif 2017 } 2018 2019 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2020 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2021 return -1; 2022 #else 2023 if (!TCR_4(__kmp_init_middle)) { 2024 __kmp_middle_initialize(); 2025 } 2026 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2027 #endif 2028 } 2029 2030 /* -------------------------------------------------------------------------- */ 2031 /*! 2032 @ingroup THREADPRIVATE 2033 @param loc source location information 2034 @param gtid global thread number 2035 @param cpy_size size of the cpy_data buffer 2036 @param cpy_data pointer to data to be copied 2037 @param cpy_func helper function to call for copying data 2038 @param didit flag variable: 1=single thread; 0=not single thread 2039 2040 __kmpc_copyprivate implements the interface for the private data broadcast 2041 needed for the copyprivate clause associated with a single region in an 2042 OpenMP<sup>*</sup> program (both C and Fortran). 2043 All threads participating in the parallel region call this routine. 2044 One of the threads (called the single thread) should have the <tt>didit</tt> 2045 variable set to 1 and all other threads should have that variable set to 0. 2046 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2047 2048 The OpenMP specification forbids the use of nowait on the single region when a 2049 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2050 barrier internally to avoid race conditions, so the code generation for the 2051 single region should avoid generating a barrier after the call to @ref 2052 __kmpc_copyprivate. 2053 2054 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2055 The <tt>loc</tt> parameter is a pointer to source location information. 2056 2057 Internal implementation: The single thread will first copy its descriptor 2058 address (cpy_data) to a team-private location, then the other threads will each 2059 call the function pointed to by the parameter cpy_func, which carries out the 2060 copy by copying the data using the cpy_data buffer. 2061 2062 The cpy_func routine used for the copy and the contents of the data area defined 2063 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2064 to be done. For instance, the cpy_data buffer can hold the actual data to be 2065 copied or it may hold a list of pointers to the data. The cpy_func routine must 2066 interpret the cpy_data buffer appropriately. 2067 2068 The interface to cpy_func is as follows: 2069 @code 2070 void cpy_func( void *destination, void *source ) 2071 @endcode 2072 where void *destination is the cpy_data pointer for the thread being copied to 2073 and void *source is the cpy_data pointer for the thread being copied from. 2074 */ 2075 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2076 void *cpy_data, void (*cpy_func)(void *, void *), 2077 kmp_int32 didit) { 2078 void **data_ptr; 2079 2080 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2081 2082 KMP_MB(); 2083 2084 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2085 2086 if (__kmp_env_consistency_check) { 2087 if (loc == 0) { 2088 KMP_WARNING(ConstructIdentInvalid); 2089 } 2090 } 2091 2092 // ToDo: Optimize the following two barriers into some kind of split barrier 2093 2094 if (didit) 2095 *data_ptr = cpy_data; 2096 2097 #if OMPT_SUPPORT 2098 ompt_frame_t *ompt_frame; 2099 if (ompt_enabled.enabled) { 2100 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2101 if (ompt_frame->enter_frame.ptr == NULL) 2102 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2103 OMPT_STORE_RETURN_ADDRESS(gtid); 2104 } 2105 #endif 2106 /* This barrier is not a barrier region boundary */ 2107 #if USE_ITT_NOTIFY 2108 __kmp_threads[gtid]->th.th_ident = loc; 2109 #endif 2110 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2111 2112 if (!didit) 2113 (*cpy_func)(cpy_data, *data_ptr); 2114 2115 // Consider next barrier a user-visible barrier for barrier region boundaries 2116 // Nesting checks are already handled by the single construct checks 2117 2118 #if OMPT_SUPPORT 2119 if (ompt_enabled.enabled) { 2120 OMPT_STORE_RETURN_ADDRESS(gtid); 2121 } 2122 #endif 2123 #if USE_ITT_NOTIFY 2124 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2125 // tasks can overwrite the location) 2126 #endif 2127 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2128 #if OMPT_SUPPORT && OMPT_OPTIONAL 2129 if (ompt_enabled.enabled) { 2130 ompt_frame->enter_frame = ompt_data_none; 2131 } 2132 #endif 2133 } 2134 2135 /* -------------------------------------------------------------------------- */ 2136 2137 #define INIT_LOCK __kmp_init_user_lock_with_checks 2138 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2139 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2140 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2141 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2142 #define ACQUIRE_NESTED_LOCK_TIMED \ 2143 __kmp_acquire_nested_user_lock_with_checks_timed 2144 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2145 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2146 #define TEST_LOCK __kmp_test_user_lock_with_checks 2147 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2148 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2149 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2150 2151 // TODO: Make check abort messages use location info & pass it into 2152 // with_checks routines 2153 2154 #if KMP_USE_DYNAMIC_LOCK 2155 2156 // internal lock initializer 2157 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2158 kmp_dyna_lockseq_t seq) { 2159 if (KMP_IS_D_LOCK(seq)) { 2160 KMP_INIT_D_LOCK(lock, seq); 2161 #if USE_ITT_BUILD 2162 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2163 #endif 2164 } else { 2165 KMP_INIT_I_LOCK(lock, seq); 2166 #if USE_ITT_BUILD 2167 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2168 __kmp_itt_lock_creating(ilk->lock, loc); 2169 #endif 2170 } 2171 } 2172 2173 // internal nest lock initializer 2174 static __forceinline void 2175 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2176 kmp_dyna_lockseq_t seq) { 2177 #if KMP_USE_TSX 2178 // Don't have nested lock implementation for speculative locks 2179 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2180 seq = __kmp_user_lock_seq; 2181 #endif 2182 switch (seq) { 2183 case lockseq_tas: 2184 seq = lockseq_nested_tas; 2185 break; 2186 #if KMP_USE_FUTEX 2187 case lockseq_futex: 2188 seq = lockseq_nested_futex; 2189 break; 2190 #endif 2191 case lockseq_ticket: 2192 seq = lockseq_nested_ticket; 2193 break; 2194 case lockseq_queuing: 2195 seq = lockseq_nested_queuing; 2196 break; 2197 case lockseq_drdpa: 2198 seq = lockseq_nested_drdpa; 2199 break; 2200 default: 2201 seq = lockseq_nested_queuing; 2202 } 2203 KMP_INIT_I_LOCK(lock, seq); 2204 #if USE_ITT_BUILD 2205 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2206 __kmp_itt_lock_creating(ilk->lock, loc); 2207 #endif 2208 } 2209 2210 /* initialize the lock with a hint */ 2211 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2212 uintptr_t hint) { 2213 KMP_DEBUG_ASSERT(__kmp_init_serial); 2214 if (__kmp_env_consistency_check && user_lock == NULL) { 2215 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2216 } 2217 2218 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2219 2220 #if OMPT_SUPPORT && OMPT_OPTIONAL 2221 // This is the case, if called from omp_init_lock_with_hint: 2222 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2223 if (!codeptr) 2224 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2225 if (ompt_enabled.ompt_callback_lock_init) { 2226 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2227 ompt_mutex_lock, (omp_lock_hint_t)hint, 2228 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2229 codeptr); 2230 } 2231 #endif 2232 } 2233 2234 /* initialize the lock with a hint */ 2235 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2236 void **user_lock, uintptr_t hint) { 2237 KMP_DEBUG_ASSERT(__kmp_init_serial); 2238 if (__kmp_env_consistency_check && user_lock == NULL) { 2239 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2240 } 2241 2242 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2243 2244 #if OMPT_SUPPORT && OMPT_OPTIONAL 2245 // This is the case, if called from omp_init_lock_with_hint: 2246 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2247 if (!codeptr) 2248 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2249 if (ompt_enabled.ompt_callback_lock_init) { 2250 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2251 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2252 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2253 codeptr); 2254 } 2255 #endif 2256 } 2257 2258 #endif // KMP_USE_DYNAMIC_LOCK 2259 2260 /* initialize the lock */ 2261 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2262 #if KMP_USE_DYNAMIC_LOCK 2263 2264 KMP_DEBUG_ASSERT(__kmp_init_serial); 2265 if (__kmp_env_consistency_check && user_lock == NULL) { 2266 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2267 } 2268 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2269 2270 #if OMPT_SUPPORT && OMPT_OPTIONAL 2271 // This is the case, if called from omp_init_lock_with_hint: 2272 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2273 if (!codeptr) 2274 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2275 if (ompt_enabled.ompt_callback_lock_init) { 2276 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2277 ompt_mutex_lock, omp_lock_hint_none, 2278 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2279 codeptr); 2280 } 2281 #endif 2282 2283 #else // KMP_USE_DYNAMIC_LOCK 2284 2285 static char const *const func = "omp_init_lock"; 2286 kmp_user_lock_p lck; 2287 KMP_DEBUG_ASSERT(__kmp_init_serial); 2288 2289 if (__kmp_env_consistency_check) { 2290 if (user_lock == NULL) { 2291 KMP_FATAL(LockIsUninitialized, func); 2292 } 2293 } 2294 2295 KMP_CHECK_USER_LOCK_INIT(); 2296 2297 if ((__kmp_user_lock_kind == lk_tas) && 2298 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2299 lck = (kmp_user_lock_p)user_lock; 2300 } 2301 #if KMP_USE_FUTEX 2302 else if ((__kmp_user_lock_kind == lk_futex) && 2303 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2304 lck = (kmp_user_lock_p)user_lock; 2305 } 2306 #endif 2307 else { 2308 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2309 } 2310 INIT_LOCK(lck); 2311 __kmp_set_user_lock_location(lck, loc); 2312 2313 #if OMPT_SUPPORT && OMPT_OPTIONAL 2314 // This is the case, if called from omp_init_lock_with_hint: 2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2316 if (!codeptr) 2317 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2318 if (ompt_enabled.ompt_callback_lock_init) { 2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2320 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2321 (ompt_wait_id_t)user_lock, codeptr); 2322 } 2323 #endif 2324 2325 #if USE_ITT_BUILD 2326 __kmp_itt_lock_creating(lck); 2327 #endif /* USE_ITT_BUILD */ 2328 2329 #endif // KMP_USE_DYNAMIC_LOCK 2330 } // __kmpc_init_lock 2331 2332 /* initialize the lock */ 2333 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2334 #if KMP_USE_DYNAMIC_LOCK 2335 2336 KMP_DEBUG_ASSERT(__kmp_init_serial); 2337 if (__kmp_env_consistency_check && user_lock == NULL) { 2338 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2339 } 2340 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2341 2342 #if OMPT_SUPPORT && OMPT_OPTIONAL 2343 // This is the case, if called from omp_init_lock_with_hint: 2344 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2345 if (!codeptr) 2346 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2347 if (ompt_enabled.ompt_callback_lock_init) { 2348 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2349 ompt_mutex_nest_lock, omp_lock_hint_none, 2350 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2351 codeptr); 2352 } 2353 #endif 2354 2355 #else // KMP_USE_DYNAMIC_LOCK 2356 2357 static char const *const func = "omp_init_nest_lock"; 2358 kmp_user_lock_p lck; 2359 KMP_DEBUG_ASSERT(__kmp_init_serial); 2360 2361 if (__kmp_env_consistency_check) { 2362 if (user_lock == NULL) { 2363 KMP_FATAL(LockIsUninitialized, func); 2364 } 2365 } 2366 2367 KMP_CHECK_USER_LOCK_INIT(); 2368 2369 if ((__kmp_user_lock_kind == lk_tas) && 2370 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2371 OMP_NEST_LOCK_T_SIZE)) { 2372 lck = (kmp_user_lock_p)user_lock; 2373 } 2374 #if KMP_USE_FUTEX 2375 else if ((__kmp_user_lock_kind == lk_futex) && 2376 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2377 OMP_NEST_LOCK_T_SIZE)) { 2378 lck = (kmp_user_lock_p)user_lock; 2379 } 2380 #endif 2381 else { 2382 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2383 } 2384 2385 INIT_NESTED_LOCK(lck); 2386 __kmp_set_user_lock_location(lck, loc); 2387 2388 #if OMPT_SUPPORT && OMPT_OPTIONAL 2389 // This is the case, if called from omp_init_lock_with_hint: 2390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2391 if (!codeptr) 2392 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2393 if (ompt_enabled.ompt_callback_lock_init) { 2394 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2395 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2396 (ompt_wait_id_t)user_lock, codeptr); 2397 } 2398 #endif 2399 2400 #if USE_ITT_BUILD 2401 __kmp_itt_lock_creating(lck); 2402 #endif /* USE_ITT_BUILD */ 2403 2404 #endif // KMP_USE_DYNAMIC_LOCK 2405 } // __kmpc_init_nest_lock 2406 2407 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2408 #if KMP_USE_DYNAMIC_LOCK 2409 2410 #if USE_ITT_BUILD 2411 kmp_user_lock_p lck; 2412 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2413 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2414 } else { 2415 lck = (kmp_user_lock_p)user_lock; 2416 } 2417 __kmp_itt_lock_destroyed(lck); 2418 #endif 2419 #if OMPT_SUPPORT && OMPT_OPTIONAL 2420 // This is the case, if called from omp_init_lock_with_hint: 2421 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2422 if (!codeptr) 2423 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2424 if (ompt_enabled.ompt_callback_lock_destroy) { 2425 kmp_user_lock_p lck; 2426 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2427 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2428 } else { 2429 lck = (kmp_user_lock_p)user_lock; 2430 } 2431 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2432 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2433 } 2434 #endif 2435 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2436 #else 2437 kmp_user_lock_p lck; 2438 2439 if ((__kmp_user_lock_kind == lk_tas) && 2440 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2441 lck = (kmp_user_lock_p)user_lock; 2442 } 2443 #if KMP_USE_FUTEX 2444 else if ((__kmp_user_lock_kind == lk_futex) && 2445 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2446 lck = (kmp_user_lock_p)user_lock; 2447 } 2448 #endif 2449 else { 2450 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2451 } 2452 2453 #if OMPT_SUPPORT && OMPT_OPTIONAL 2454 // This is the case, if called from omp_init_lock_with_hint: 2455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2456 if (!codeptr) 2457 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2458 if (ompt_enabled.ompt_callback_lock_destroy) { 2459 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2460 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2461 } 2462 #endif 2463 2464 #if USE_ITT_BUILD 2465 __kmp_itt_lock_destroyed(lck); 2466 #endif /* USE_ITT_BUILD */ 2467 DESTROY_LOCK(lck); 2468 2469 if ((__kmp_user_lock_kind == lk_tas) && 2470 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2471 ; 2472 } 2473 #if KMP_USE_FUTEX 2474 else if ((__kmp_user_lock_kind == lk_futex) && 2475 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2476 ; 2477 } 2478 #endif 2479 else { 2480 __kmp_user_lock_free(user_lock, gtid, lck); 2481 } 2482 #endif // KMP_USE_DYNAMIC_LOCK 2483 } // __kmpc_destroy_lock 2484 2485 /* destroy the lock */ 2486 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2487 #if KMP_USE_DYNAMIC_LOCK 2488 2489 #if USE_ITT_BUILD 2490 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2491 __kmp_itt_lock_destroyed(ilk->lock); 2492 #endif 2493 #if OMPT_SUPPORT && OMPT_OPTIONAL 2494 // This is the case, if called from omp_init_lock_with_hint: 2495 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2496 if (!codeptr) 2497 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2498 if (ompt_enabled.ompt_callback_lock_destroy) { 2499 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2500 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2501 } 2502 #endif 2503 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2504 2505 #else // KMP_USE_DYNAMIC_LOCK 2506 2507 kmp_user_lock_p lck; 2508 2509 if ((__kmp_user_lock_kind == lk_tas) && 2510 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2511 OMP_NEST_LOCK_T_SIZE)) { 2512 lck = (kmp_user_lock_p)user_lock; 2513 } 2514 #if KMP_USE_FUTEX 2515 else if ((__kmp_user_lock_kind == lk_futex) && 2516 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2517 OMP_NEST_LOCK_T_SIZE)) { 2518 lck = (kmp_user_lock_p)user_lock; 2519 } 2520 #endif 2521 else { 2522 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2523 } 2524 2525 #if OMPT_SUPPORT && OMPT_OPTIONAL 2526 // This is the case, if called from omp_init_lock_with_hint: 2527 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2528 if (!codeptr) 2529 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2530 if (ompt_enabled.ompt_callback_lock_destroy) { 2531 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2532 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2533 } 2534 #endif 2535 2536 #if USE_ITT_BUILD 2537 __kmp_itt_lock_destroyed(lck); 2538 #endif /* USE_ITT_BUILD */ 2539 2540 DESTROY_NESTED_LOCK(lck); 2541 2542 if ((__kmp_user_lock_kind == lk_tas) && 2543 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2544 OMP_NEST_LOCK_T_SIZE)) { 2545 ; 2546 } 2547 #if KMP_USE_FUTEX 2548 else if ((__kmp_user_lock_kind == lk_futex) && 2549 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2550 OMP_NEST_LOCK_T_SIZE)) { 2551 ; 2552 } 2553 #endif 2554 else { 2555 __kmp_user_lock_free(user_lock, gtid, lck); 2556 } 2557 #endif // KMP_USE_DYNAMIC_LOCK 2558 } // __kmpc_destroy_nest_lock 2559 2560 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2561 KMP_COUNT_BLOCK(OMP_set_lock); 2562 #if KMP_USE_DYNAMIC_LOCK 2563 int tag = KMP_EXTRACT_D_TAG(user_lock); 2564 #if USE_ITT_BUILD 2565 __kmp_itt_lock_acquiring( 2566 (kmp_user_lock_p) 2567 user_lock); // itt function will get to the right lock object. 2568 #endif 2569 #if OMPT_SUPPORT && OMPT_OPTIONAL 2570 // This is the case, if called from omp_init_lock_with_hint: 2571 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2572 if (!codeptr) 2573 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2574 if (ompt_enabled.ompt_callback_mutex_acquire) { 2575 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2576 ompt_mutex_lock, omp_lock_hint_none, 2577 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2578 codeptr); 2579 } 2580 #endif 2581 #if KMP_USE_INLINED_TAS 2582 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2583 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2584 } else 2585 #elif KMP_USE_INLINED_FUTEX 2586 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2587 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2588 } else 2589 #endif 2590 { 2591 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2592 } 2593 #if USE_ITT_BUILD 2594 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2595 #endif 2596 #if OMPT_SUPPORT && OMPT_OPTIONAL 2597 if (ompt_enabled.ompt_callback_mutex_acquired) { 2598 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2599 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2600 } 2601 #endif 2602 2603 #else // KMP_USE_DYNAMIC_LOCK 2604 2605 kmp_user_lock_p lck; 2606 2607 if ((__kmp_user_lock_kind == lk_tas) && 2608 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2609 lck = (kmp_user_lock_p)user_lock; 2610 } 2611 #if KMP_USE_FUTEX 2612 else if ((__kmp_user_lock_kind == lk_futex) && 2613 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2614 lck = (kmp_user_lock_p)user_lock; 2615 } 2616 #endif 2617 else { 2618 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2619 } 2620 2621 #if USE_ITT_BUILD 2622 __kmp_itt_lock_acquiring(lck); 2623 #endif /* USE_ITT_BUILD */ 2624 #if OMPT_SUPPORT && OMPT_OPTIONAL 2625 // This is the case, if called from omp_init_lock_with_hint: 2626 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2627 if (!codeptr) 2628 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2629 if (ompt_enabled.ompt_callback_mutex_acquire) { 2630 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2631 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2632 (ompt_wait_id_t)lck, codeptr); 2633 } 2634 #endif 2635 2636 ACQUIRE_LOCK(lck, gtid); 2637 2638 #if USE_ITT_BUILD 2639 __kmp_itt_lock_acquired(lck); 2640 #endif /* USE_ITT_BUILD */ 2641 2642 #if OMPT_SUPPORT && OMPT_OPTIONAL 2643 if (ompt_enabled.ompt_callback_mutex_acquired) { 2644 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2645 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2646 } 2647 #endif 2648 2649 #endif // KMP_USE_DYNAMIC_LOCK 2650 } 2651 2652 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2653 #if KMP_USE_DYNAMIC_LOCK 2654 2655 #if USE_ITT_BUILD 2656 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2657 #endif 2658 #if OMPT_SUPPORT && OMPT_OPTIONAL 2659 // This is the case, if called from omp_init_lock_with_hint: 2660 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2661 if (!codeptr) 2662 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2663 if (ompt_enabled.enabled) { 2664 if (ompt_enabled.ompt_callback_mutex_acquire) { 2665 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2666 ompt_mutex_nest_lock, omp_lock_hint_none, 2667 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2668 codeptr); 2669 } 2670 } 2671 #endif 2672 int acquire_status = 2673 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2674 (void) acquire_status; 2675 #if USE_ITT_BUILD 2676 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2677 #endif 2678 2679 #if OMPT_SUPPORT && OMPT_OPTIONAL 2680 if (ompt_enabled.enabled) { 2681 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2682 if (ompt_enabled.ompt_callback_mutex_acquired) { 2683 // lock_first 2684 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2685 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2686 } 2687 } else { 2688 if (ompt_enabled.ompt_callback_nest_lock) { 2689 // lock_next 2690 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2691 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); 2692 } 2693 } 2694 } 2695 #endif 2696 2697 #else // KMP_USE_DYNAMIC_LOCK 2698 int acquire_status; 2699 kmp_user_lock_p lck; 2700 2701 if ((__kmp_user_lock_kind == lk_tas) && 2702 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2703 OMP_NEST_LOCK_T_SIZE)) { 2704 lck = (kmp_user_lock_p)user_lock; 2705 } 2706 #if KMP_USE_FUTEX 2707 else if ((__kmp_user_lock_kind == lk_futex) && 2708 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2709 OMP_NEST_LOCK_T_SIZE)) { 2710 lck = (kmp_user_lock_p)user_lock; 2711 } 2712 #endif 2713 else { 2714 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2715 } 2716 2717 #if USE_ITT_BUILD 2718 __kmp_itt_lock_acquiring(lck); 2719 #endif /* USE_ITT_BUILD */ 2720 #if OMPT_SUPPORT && OMPT_OPTIONAL 2721 // This is the case, if called from omp_init_lock_with_hint: 2722 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2723 if (!codeptr) 2724 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2725 if (ompt_enabled.enabled) { 2726 if (ompt_enabled.ompt_callback_mutex_acquire) { 2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2728 ompt_mutex_nest_lock, omp_lock_hint_none, 2729 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); 2730 } 2731 } 2732 #endif 2733 2734 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2735 2736 #if USE_ITT_BUILD 2737 __kmp_itt_lock_acquired(lck); 2738 #endif /* USE_ITT_BUILD */ 2739 2740 #if OMPT_SUPPORT && OMPT_OPTIONAL 2741 if (ompt_enabled.enabled) { 2742 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2743 if (ompt_enabled.ompt_callback_mutex_acquired) { 2744 // lock_first 2745 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2746 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2747 } 2748 } else { 2749 if (ompt_enabled.ompt_callback_nest_lock) { 2750 // lock_next 2751 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2752 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); 2753 } 2754 } 2755 } 2756 #endif 2757 2758 #endif // KMP_USE_DYNAMIC_LOCK 2759 } 2760 2761 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2762 #if KMP_USE_DYNAMIC_LOCK 2763 2764 int tag = KMP_EXTRACT_D_TAG(user_lock); 2765 #if USE_ITT_BUILD 2766 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2767 #endif 2768 #if KMP_USE_INLINED_TAS 2769 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2770 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2771 } else 2772 #elif KMP_USE_INLINED_FUTEX 2773 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2774 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2775 } else 2776 #endif 2777 { 2778 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2779 } 2780 2781 #if OMPT_SUPPORT && OMPT_OPTIONAL 2782 // This is the case, if called from omp_init_lock_with_hint: 2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2784 if (!codeptr) 2785 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2786 if (ompt_enabled.ompt_callback_mutex_released) { 2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2788 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2789 } 2790 #endif 2791 2792 #else // KMP_USE_DYNAMIC_LOCK 2793 2794 kmp_user_lock_p lck; 2795 2796 /* Can't use serial interval since not block structured */ 2797 /* release the lock */ 2798 2799 if ((__kmp_user_lock_kind == lk_tas) && 2800 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2801 #if KMP_OS_LINUX && \ 2802 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2803 // "fast" path implemented to fix customer performance issue 2804 #if USE_ITT_BUILD 2805 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2806 #endif /* USE_ITT_BUILD */ 2807 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2808 KMP_MB(); 2809 2810 #if OMPT_SUPPORT && OMPT_OPTIONAL 2811 // This is the case, if called from omp_init_lock_with_hint: 2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2813 if (!codeptr) 2814 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2815 if (ompt_enabled.ompt_callback_mutex_released) { 2816 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2817 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2818 } 2819 #endif 2820 2821 return; 2822 #else 2823 lck = (kmp_user_lock_p)user_lock; 2824 #endif 2825 } 2826 #if KMP_USE_FUTEX 2827 else if ((__kmp_user_lock_kind == lk_futex) && 2828 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2829 lck = (kmp_user_lock_p)user_lock; 2830 } 2831 #endif 2832 else { 2833 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2834 } 2835 2836 #if USE_ITT_BUILD 2837 __kmp_itt_lock_releasing(lck); 2838 #endif /* USE_ITT_BUILD */ 2839 2840 RELEASE_LOCK(lck, gtid); 2841 2842 #if OMPT_SUPPORT && OMPT_OPTIONAL 2843 // This is the case, if called from omp_init_lock_with_hint: 2844 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2845 if (!codeptr) 2846 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2847 if (ompt_enabled.ompt_callback_mutex_released) { 2848 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2849 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2850 } 2851 #endif 2852 2853 #endif // KMP_USE_DYNAMIC_LOCK 2854 } 2855 2856 /* release the lock */ 2857 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2858 #if KMP_USE_DYNAMIC_LOCK 2859 2860 #if USE_ITT_BUILD 2861 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2862 #endif 2863 int release_status = 2864 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2865 (void) release_status; 2866 2867 #if OMPT_SUPPORT && OMPT_OPTIONAL 2868 // This is the case, if called from omp_init_lock_with_hint: 2869 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2870 if (!codeptr) 2871 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2872 if (ompt_enabled.enabled) { 2873 if (release_status == KMP_LOCK_RELEASED) { 2874 if (ompt_enabled.ompt_callback_mutex_released) { 2875 // release_lock_last 2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2877 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2878 } 2879 } else if (ompt_enabled.ompt_callback_nest_lock) { 2880 // release_lock_prev 2881 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2882 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); 2883 } 2884 } 2885 #endif 2886 2887 #else // KMP_USE_DYNAMIC_LOCK 2888 2889 kmp_user_lock_p lck; 2890 2891 /* Can't use serial interval since not block structured */ 2892 2893 if ((__kmp_user_lock_kind == lk_tas) && 2894 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2895 OMP_NEST_LOCK_T_SIZE)) { 2896 #if KMP_OS_LINUX && \ 2897 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2898 // "fast" path implemented to fix customer performance issue 2899 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2900 #if USE_ITT_BUILD 2901 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2902 #endif /* USE_ITT_BUILD */ 2903 2904 #if OMPT_SUPPORT && OMPT_OPTIONAL 2905 int release_status = KMP_LOCK_STILL_HELD; 2906 #endif 2907 2908 if (--(tl->lk.depth_locked) == 0) { 2909 TCW_4(tl->lk.poll, 0); 2910 #if OMPT_SUPPORT && OMPT_OPTIONAL 2911 release_status = KMP_LOCK_RELEASED; 2912 #endif 2913 } 2914 KMP_MB(); 2915 2916 #if OMPT_SUPPORT && OMPT_OPTIONAL 2917 // This is the case, if called from omp_init_lock_with_hint: 2918 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2919 if (!codeptr) 2920 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2921 if (ompt_enabled.enabled) { 2922 if (release_status == KMP_LOCK_RELEASED) { 2923 if (ompt_enabled.ompt_callback_mutex_released) { 2924 // release_lock_last 2925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2926 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2927 } 2928 } else if (ompt_enabled.ompt_callback_nest_lock) { 2929 // release_lock_previous 2930 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2931 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); 2932 } 2933 } 2934 #endif 2935 2936 return; 2937 #else 2938 lck = (kmp_user_lock_p)user_lock; 2939 #endif 2940 } 2941 #if KMP_USE_FUTEX 2942 else if ((__kmp_user_lock_kind == lk_futex) && 2943 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2944 OMP_NEST_LOCK_T_SIZE)) { 2945 lck = (kmp_user_lock_p)user_lock; 2946 } 2947 #endif 2948 else { 2949 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2950 } 2951 2952 #if USE_ITT_BUILD 2953 __kmp_itt_lock_releasing(lck); 2954 #endif /* USE_ITT_BUILD */ 2955 2956 int release_status; 2957 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2958 #if OMPT_SUPPORT && OMPT_OPTIONAL 2959 // This is the case, if called from omp_init_lock_with_hint: 2960 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2961 if (!codeptr) 2962 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2963 if (ompt_enabled.enabled) { 2964 if (release_status == KMP_LOCK_RELEASED) { 2965 if (ompt_enabled.ompt_callback_mutex_released) { 2966 // release_lock_last 2967 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2968 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2969 } 2970 } else if (ompt_enabled.ompt_callback_nest_lock) { 2971 // release_lock_previous 2972 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2973 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); 2974 } 2975 } 2976 #endif 2977 2978 #endif // KMP_USE_DYNAMIC_LOCK 2979 } 2980 2981 /* try to acquire the lock */ 2982 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2983 KMP_COUNT_BLOCK(OMP_test_lock); 2984 2985 #if KMP_USE_DYNAMIC_LOCK 2986 int rc; 2987 int tag = KMP_EXTRACT_D_TAG(user_lock); 2988 #if USE_ITT_BUILD 2989 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2990 #endif 2991 #if OMPT_SUPPORT && OMPT_OPTIONAL 2992 // This is the case, if called from omp_init_lock_with_hint: 2993 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2994 if (!codeptr) 2995 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2996 if (ompt_enabled.ompt_callback_mutex_acquire) { 2997 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2998 ompt_mutex_lock, omp_lock_hint_none, 2999 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 3000 codeptr); 3001 } 3002 #endif 3003 #if KMP_USE_INLINED_TAS 3004 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3005 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3006 } else 3007 #elif KMP_USE_INLINED_FUTEX 3008 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3009 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3010 } else 3011 #endif 3012 { 3013 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3014 } 3015 if (rc) { 3016 #if USE_ITT_BUILD 3017 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3018 #endif 3019 #if OMPT_SUPPORT && OMPT_OPTIONAL 3020 if (ompt_enabled.ompt_callback_mutex_acquired) { 3021 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3022 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 3023 } 3024 #endif 3025 return FTN_TRUE; 3026 } else { 3027 #if USE_ITT_BUILD 3028 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3029 #endif 3030 return FTN_FALSE; 3031 } 3032 3033 #else // KMP_USE_DYNAMIC_LOCK 3034 3035 kmp_user_lock_p lck; 3036 int rc; 3037 3038 if ((__kmp_user_lock_kind == lk_tas) && 3039 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3040 lck = (kmp_user_lock_p)user_lock; 3041 } 3042 #if KMP_USE_FUTEX 3043 else if ((__kmp_user_lock_kind == lk_futex) && 3044 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3045 lck = (kmp_user_lock_p)user_lock; 3046 } 3047 #endif 3048 else { 3049 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3050 } 3051 3052 #if USE_ITT_BUILD 3053 __kmp_itt_lock_acquiring(lck); 3054 #endif /* USE_ITT_BUILD */ 3055 #if OMPT_SUPPORT && OMPT_OPTIONAL 3056 // This is the case, if called from omp_init_lock_with_hint: 3057 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3058 if (!codeptr) 3059 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3060 if (ompt_enabled.ompt_callback_mutex_acquire) { 3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3062 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3063 (ompt_wait_id_t)lck, codeptr); 3064 } 3065 #endif 3066 3067 rc = TEST_LOCK(lck, gtid); 3068 #if USE_ITT_BUILD 3069 if (rc) { 3070 __kmp_itt_lock_acquired(lck); 3071 } else { 3072 __kmp_itt_lock_cancelled(lck); 3073 } 3074 #endif /* USE_ITT_BUILD */ 3075 #if OMPT_SUPPORT && OMPT_OPTIONAL 3076 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3077 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3078 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 3079 } 3080 #endif 3081 3082 return (rc ? FTN_TRUE : FTN_FALSE); 3083 3084 /* Can't use serial interval since not block structured */ 3085 3086 #endif // KMP_USE_DYNAMIC_LOCK 3087 } 3088 3089 /* try to acquire the lock */ 3090 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3091 #if KMP_USE_DYNAMIC_LOCK 3092 int rc; 3093 #if USE_ITT_BUILD 3094 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3095 #endif 3096 #if OMPT_SUPPORT && OMPT_OPTIONAL 3097 // This is the case, if called from omp_init_lock_with_hint: 3098 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3099 if (!codeptr) 3100 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3101 if (ompt_enabled.ompt_callback_mutex_acquire) { 3102 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3103 ompt_mutex_nest_lock, omp_lock_hint_none, 3104 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 3105 codeptr); 3106 } 3107 #endif 3108 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3109 #if USE_ITT_BUILD 3110 if (rc) { 3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3112 } else { 3113 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3114 } 3115 #endif 3116 #if OMPT_SUPPORT && OMPT_OPTIONAL 3117 if (ompt_enabled.enabled && rc) { 3118 if (rc == 1) { 3119 if (ompt_enabled.ompt_callback_mutex_acquired) { 3120 // lock_first 3121 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3122 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 3123 } 3124 } else { 3125 if (ompt_enabled.ompt_callback_nest_lock) { 3126 // lock_next 3127 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3128 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); 3129 } 3130 } 3131 } 3132 #endif 3133 return rc; 3134 3135 #else // KMP_USE_DYNAMIC_LOCK 3136 3137 kmp_user_lock_p lck; 3138 int rc; 3139 3140 if ((__kmp_user_lock_kind == lk_tas) && 3141 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3142 OMP_NEST_LOCK_T_SIZE)) { 3143 lck = (kmp_user_lock_p)user_lock; 3144 } 3145 #if KMP_USE_FUTEX 3146 else if ((__kmp_user_lock_kind == lk_futex) && 3147 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3148 OMP_NEST_LOCK_T_SIZE)) { 3149 lck = (kmp_user_lock_p)user_lock; 3150 } 3151 #endif 3152 else { 3153 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3154 } 3155 3156 #if USE_ITT_BUILD 3157 __kmp_itt_lock_acquiring(lck); 3158 #endif /* USE_ITT_BUILD */ 3159 3160 #if OMPT_SUPPORT && OMPT_OPTIONAL 3161 // This is the case, if called from omp_init_lock_with_hint: 3162 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3163 if (!codeptr) 3164 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3165 if (ompt_enabled.enabled) && 3166 ompt_enabled.ompt_callback_mutex_acquire) { 3167 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3168 ompt_mutex_nest_lock, omp_lock_hint_none, 3169 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); 3170 } 3171 #endif 3172 3173 rc = TEST_NESTED_LOCK(lck, gtid); 3174 #if USE_ITT_BUILD 3175 if (rc) { 3176 __kmp_itt_lock_acquired(lck); 3177 } else { 3178 __kmp_itt_lock_cancelled(lck); 3179 } 3180 #endif /* USE_ITT_BUILD */ 3181 #if OMPT_SUPPORT && OMPT_OPTIONAL 3182 if (ompt_enabled.enabled && rc) { 3183 if (rc == 1) { 3184 if (ompt_enabled.ompt_callback_mutex_acquired) { 3185 // lock_first 3186 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3187 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 3188 } 3189 } else { 3190 if (ompt_enabled.ompt_callback_nest_lock) { 3191 // lock_next 3192 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3193 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); 3194 } 3195 } 3196 } 3197 #endif 3198 return rc; 3199 3200 /* Can't use serial interval since not block structured */ 3201 3202 #endif // KMP_USE_DYNAMIC_LOCK 3203 } 3204 3205 // Interface to fast scalable reduce methods routines 3206 3207 // keep the selected method in a thread local structure for cross-function 3208 // usage: will be used in __kmpc_end_reduce* functions; 3209 // another solution: to re-determine the method one more time in 3210 // __kmpc_end_reduce* functions (new prototype required then) 3211 // AT: which solution is better? 3212 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3213 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3214 3215 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3216 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3217 3218 // description of the packed_reduction_method variable: look at the macros in 3219 // kmp.h 3220 3221 // used in a critical section reduce block 3222 static __forceinline void 3223 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3224 kmp_critical_name *crit) { 3225 3226 // this lock was visible to a customer and to the threading profile tool as a 3227 // serial overhead span (although it's used for an internal purpose only) 3228 // why was it visible in previous implementation? 3229 // should we keep it visible in new reduce block? 3230 kmp_user_lock_p lck; 3231 3232 #if KMP_USE_DYNAMIC_LOCK 3233 3234 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3235 // Check if it is initialized. 3236 if (*lk == 0) { 3237 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3238 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3239 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3240 } else { 3241 __kmp_init_indirect_csptr(crit, loc, global_tid, 3242 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3243 } 3244 } 3245 // Branch for accessing the actual lock object and set operation. This 3246 // branching is inevitable since this lock initialization does not follow the 3247 // normal dispatch path (lock table is not used). 3248 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3249 lck = (kmp_user_lock_p)lk; 3250 KMP_DEBUG_ASSERT(lck != NULL); 3251 if (__kmp_env_consistency_check) { 3252 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3253 } 3254 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3255 } else { 3256 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3257 lck = ilk->lock; 3258 KMP_DEBUG_ASSERT(lck != NULL); 3259 if (__kmp_env_consistency_check) { 3260 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3261 } 3262 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3263 } 3264 3265 #else // KMP_USE_DYNAMIC_LOCK 3266 3267 // We know that the fast reduction code is only emitted by Intel compilers 3268 // with 32 byte critical sections. If there isn't enough space, then we 3269 // have to use a pointer. 3270 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3271 lck = (kmp_user_lock_p)crit; 3272 } else { 3273 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3274 } 3275 KMP_DEBUG_ASSERT(lck != NULL); 3276 3277 if (__kmp_env_consistency_check) 3278 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3279 3280 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3281 3282 #endif // KMP_USE_DYNAMIC_LOCK 3283 } 3284 3285 // used in a critical section reduce block 3286 static __forceinline void 3287 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3288 kmp_critical_name *crit) { 3289 3290 kmp_user_lock_p lck; 3291 3292 #if KMP_USE_DYNAMIC_LOCK 3293 3294 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3295 lck = (kmp_user_lock_p)crit; 3296 if (__kmp_env_consistency_check) 3297 __kmp_pop_sync(global_tid, ct_critical, loc); 3298 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3299 } else { 3300 kmp_indirect_lock_t *ilk = 3301 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3302 if (__kmp_env_consistency_check) 3303 __kmp_pop_sync(global_tid, ct_critical, loc); 3304 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3305 } 3306 3307 #else // KMP_USE_DYNAMIC_LOCK 3308 3309 // We know that the fast reduction code is only emitted by Intel compilers 3310 // with 32 byte critical sections. If there isn't enough space, then we have 3311 // to use a pointer. 3312 if (__kmp_base_user_lock_size > 32) { 3313 lck = *((kmp_user_lock_p *)crit); 3314 KMP_ASSERT(lck != NULL); 3315 } else { 3316 lck = (kmp_user_lock_p)crit; 3317 } 3318 3319 if (__kmp_env_consistency_check) 3320 __kmp_pop_sync(global_tid, ct_critical, loc); 3321 3322 __kmp_release_user_lock_with_checks(lck, global_tid); 3323 3324 #endif // KMP_USE_DYNAMIC_LOCK 3325 } // __kmp_end_critical_section_reduce_block 3326 3327 #if OMP_40_ENABLED 3328 static __forceinline int 3329 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3330 int *task_state) { 3331 kmp_team_t *team; 3332 3333 // Check if we are inside the teams construct? 3334 if (th->th.th_teams_microtask) { 3335 *team_p = team = th->th.th_team; 3336 if (team->t.t_level == th->th.th_teams_level) { 3337 // This is reduction at teams construct. 3338 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3339 // Let's swap teams temporarily for the reduction. 3340 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3341 th->th.th_team = team->t.t_parent; 3342 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3343 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3344 *task_state = th->th.th_task_state; 3345 th->th.th_task_state = 0; 3346 3347 return 1; 3348 } 3349 } 3350 return 0; 3351 } 3352 3353 static __forceinline void 3354 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3355 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3356 th->th.th_info.ds.ds_tid = 0; 3357 th->th.th_team = team; 3358 th->th.th_team_nproc = team->t.t_nproc; 3359 th->th.th_task_team = team->t.t_task_team[task_state]; 3360 th->th.th_task_state = task_state; 3361 } 3362 #endif 3363 3364 /* 2.a.i. Reduce Block without a terminating barrier */ 3365 /*! 3366 @ingroup SYNCHRONIZATION 3367 @param loc source location information 3368 @param global_tid global thread number 3369 @param num_vars number of items (variables) to be reduced 3370 @param reduce_size size of data in bytes to be reduced 3371 @param reduce_data pointer to data to be reduced 3372 @param reduce_func callback function providing reduction operation on two 3373 operands and returning result of reduction in lhs_data 3374 @param lck pointer to the unique lock data structure 3375 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3376 threads if atomic reduction needed 3377 3378 The nowait version is used for a reduce clause with the nowait argument. 3379 */ 3380 kmp_int32 3381 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3382 size_t reduce_size, void *reduce_data, 3383 void (*reduce_func)(void *lhs_data, void *rhs_data), 3384 kmp_critical_name *lck) { 3385 3386 KMP_COUNT_BLOCK(REDUCE_nowait); 3387 int retval = 0; 3388 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3389 #if OMP_40_ENABLED 3390 kmp_info_t *th; 3391 kmp_team_t *team; 3392 int teams_swapped = 0, task_state; 3393 #endif 3394 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3395 3396 // why do we need this initialization here at all? 3397 // Reduction clause can not be used as a stand-alone directive. 3398 3399 // do not call __kmp_serial_initialize(), it will be called by 3400 // __kmp_parallel_initialize() if needed 3401 // possible detection of false-positive race by the threadchecker ??? 3402 if (!TCR_4(__kmp_init_parallel)) 3403 __kmp_parallel_initialize(); 3404 3405 #if OMP_50_ENABLED 3406 __kmp_resume_if_soft_paused(); 3407 #endif 3408 3409 // check correctness of reduce block nesting 3410 #if KMP_USE_DYNAMIC_LOCK 3411 if (__kmp_env_consistency_check) 3412 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3413 #else 3414 if (__kmp_env_consistency_check) 3415 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3416 #endif 3417 3418 #if OMP_40_ENABLED 3419 th = __kmp_thread_from_gtid(global_tid); 3420 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3421 #endif // OMP_40_ENABLED 3422 3423 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3424 // the value should be kept in a variable 3425 // the variable should be either a construct-specific or thread-specific 3426 // property, not a team specific property 3427 // (a thread can reach the next reduce block on the next construct, reduce 3428 // method may differ on the next construct) 3429 // an ident_t "loc" parameter could be used as a construct-specific property 3430 // (what if loc == 0?) 3431 // (if both construct-specific and team-specific variables were shared, 3432 // then unness extra syncs should be needed) 3433 // a thread-specific variable is better regarding two issues above (next 3434 // construct and extra syncs) 3435 // a thread-specific "th_local.reduction_method" variable is used currently 3436 // each thread executes 'determine' and 'set' lines (no need to execute by one 3437 // thread, to avoid unness extra syncs) 3438 3439 packed_reduction_method = __kmp_determine_reduction_method( 3440 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3441 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3442 3443 if (packed_reduction_method == critical_reduce_block) { 3444 3445 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3446 retval = 1; 3447 3448 } else if (packed_reduction_method == empty_reduce_block) { 3449 3450 // usage: if team size == 1, no synchronization is required ( Intel 3451 // platforms only ) 3452 retval = 1; 3453 3454 } else if (packed_reduction_method == atomic_reduce_block) { 3455 3456 retval = 2; 3457 3458 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3459 // won't be called by the code gen) 3460 // (it's not quite good, because the checking block has been closed by 3461 // this 'pop', 3462 // but atomic operation has not been executed yet, will be executed 3463 // slightly later, literally on next instruction) 3464 if (__kmp_env_consistency_check) 3465 __kmp_pop_sync(global_tid, ct_reduce, loc); 3466 3467 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3468 tree_reduce_block)) { 3469 3470 // AT: performance issue: a real barrier here 3471 // AT: (if master goes slow, other threads are blocked here waiting for the 3472 // master to come and release them) 3473 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3474 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3475 // be confusing to a customer) 3476 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3477 // might go faster and be more in line with sense of NOWAIT 3478 // AT: TO DO: do epcc test and compare times 3479 3480 // this barrier should be invisible to a customer and to the threading profile 3481 // tool (it's neither a terminating barrier nor customer's code, it's 3482 // used for an internal purpose) 3483 #if OMPT_SUPPORT 3484 // JP: can this barrier potentially leed to task scheduling? 3485 // JP: as long as there is a barrier in the implementation, OMPT should and 3486 // will provide the barrier events 3487 // so we set-up the necessary frame/return addresses. 3488 ompt_frame_t *ompt_frame; 3489 if (ompt_enabled.enabled) { 3490 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3491 if (ompt_frame->enter_frame.ptr == NULL) 3492 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3493 OMPT_STORE_RETURN_ADDRESS(global_tid); 3494 } 3495 #endif 3496 #if USE_ITT_NOTIFY 3497 __kmp_threads[global_tid]->th.th_ident = loc; 3498 #endif 3499 retval = 3500 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3501 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3502 retval = (retval != 0) ? (0) : (1); 3503 #if OMPT_SUPPORT && OMPT_OPTIONAL 3504 if (ompt_enabled.enabled) { 3505 ompt_frame->enter_frame = ompt_data_none; 3506 } 3507 #endif 3508 3509 // all other workers except master should do this pop here 3510 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3511 if (__kmp_env_consistency_check) { 3512 if (retval == 0) { 3513 __kmp_pop_sync(global_tid, ct_reduce, loc); 3514 } 3515 } 3516 3517 } else { 3518 3519 // should never reach this block 3520 KMP_ASSERT(0); // "unexpected method" 3521 } 3522 #if OMP_40_ENABLED 3523 if (teams_swapped) { 3524 __kmp_restore_swapped_teams(th, team, task_state); 3525 } 3526 #endif 3527 KA_TRACE( 3528 10, 3529 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3530 global_tid, packed_reduction_method, retval)); 3531 3532 return retval; 3533 } 3534 3535 /*! 3536 @ingroup SYNCHRONIZATION 3537 @param loc source location information 3538 @param global_tid global thread id. 3539 @param lck pointer to the unique lock data structure 3540 3541 Finish the execution of a reduce nowait. 3542 */ 3543 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3544 kmp_critical_name *lck) { 3545 3546 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3547 3548 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3549 3550 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3551 3552 if (packed_reduction_method == critical_reduce_block) { 3553 3554 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3555 3556 } else if (packed_reduction_method == empty_reduce_block) { 3557 3558 // usage: if team size == 1, no synchronization is required ( on Intel 3559 // platforms only ) 3560 3561 } else if (packed_reduction_method == atomic_reduce_block) { 3562 3563 // neither master nor other workers should get here 3564 // (code gen does not generate this call in case 2: atomic reduce block) 3565 // actually it's better to remove this elseif at all; 3566 // after removal this value will checked by the 'else' and will assert 3567 3568 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3569 tree_reduce_block)) { 3570 3571 // only master gets here 3572 3573 } else { 3574 3575 // should never reach this block 3576 KMP_ASSERT(0); // "unexpected method" 3577 } 3578 3579 if (__kmp_env_consistency_check) 3580 __kmp_pop_sync(global_tid, ct_reduce, loc); 3581 3582 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3583 global_tid, packed_reduction_method)); 3584 3585 return; 3586 } 3587 3588 /* 2.a.ii. Reduce Block with a terminating barrier */ 3589 3590 /*! 3591 @ingroup SYNCHRONIZATION 3592 @param loc source location information 3593 @param global_tid global thread number 3594 @param num_vars number of items (variables) to be reduced 3595 @param reduce_size size of data in bytes to be reduced 3596 @param reduce_data pointer to data to be reduced 3597 @param reduce_func callback function providing reduction operation on two 3598 operands and returning result of reduction in lhs_data 3599 @param lck pointer to the unique lock data structure 3600 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3601 threads if atomic reduction needed 3602 3603 A blocking reduce that includes an implicit barrier. 3604 */ 3605 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3606 size_t reduce_size, void *reduce_data, 3607 void (*reduce_func)(void *lhs_data, void *rhs_data), 3608 kmp_critical_name *lck) { 3609 KMP_COUNT_BLOCK(REDUCE_wait); 3610 int retval = 0; 3611 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3612 #if OMP_40_ENABLED 3613 kmp_info_t *th; 3614 kmp_team_t *team; 3615 int teams_swapped = 0, task_state; 3616 #endif 3617 3618 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3619 3620 // why do we need this initialization here at all? 3621 // Reduction clause can not be a stand-alone directive. 3622 3623 // do not call __kmp_serial_initialize(), it will be called by 3624 // __kmp_parallel_initialize() if needed 3625 // possible detection of false-positive race by the threadchecker ??? 3626 if (!TCR_4(__kmp_init_parallel)) 3627 __kmp_parallel_initialize(); 3628 3629 #if OMP_50_ENABLED 3630 __kmp_resume_if_soft_paused(); 3631 #endif 3632 3633 // check correctness of reduce block nesting 3634 #if KMP_USE_DYNAMIC_LOCK 3635 if (__kmp_env_consistency_check) 3636 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3637 #else 3638 if (__kmp_env_consistency_check) 3639 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3640 #endif 3641 3642 #if OMP_40_ENABLED 3643 th = __kmp_thread_from_gtid(global_tid); 3644 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3645 #endif // OMP_40_ENABLED 3646 3647 packed_reduction_method = __kmp_determine_reduction_method( 3648 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3649 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3650 3651 if (packed_reduction_method == critical_reduce_block) { 3652 3653 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3654 retval = 1; 3655 3656 } else if (packed_reduction_method == empty_reduce_block) { 3657 3658 // usage: if team size == 1, no synchronization is required ( Intel 3659 // platforms only ) 3660 retval = 1; 3661 3662 } else if (packed_reduction_method == atomic_reduce_block) { 3663 3664 retval = 2; 3665 3666 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3667 tree_reduce_block)) { 3668 3669 // case tree_reduce_block: 3670 // this barrier should be visible to a customer and to the threading profile 3671 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3672 #if OMPT_SUPPORT 3673 ompt_frame_t *ompt_frame; 3674 if (ompt_enabled.enabled) { 3675 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3676 if (ompt_frame->enter_frame.ptr == NULL) 3677 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3678 OMPT_STORE_RETURN_ADDRESS(global_tid); 3679 } 3680 #endif 3681 #if USE_ITT_NOTIFY 3682 __kmp_threads[global_tid]->th.th_ident = 3683 loc; // needed for correct notification of frames 3684 #endif 3685 retval = 3686 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3687 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3688 retval = (retval != 0) ? (0) : (1); 3689 #if OMPT_SUPPORT && OMPT_OPTIONAL 3690 if (ompt_enabled.enabled) { 3691 ompt_frame->enter_frame = ompt_data_none; 3692 } 3693 #endif 3694 3695 // all other workers except master should do this pop here 3696 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3697 if (__kmp_env_consistency_check) { 3698 if (retval == 0) { // 0: all other workers; 1: master 3699 __kmp_pop_sync(global_tid, ct_reduce, loc); 3700 } 3701 } 3702 3703 } else { 3704 3705 // should never reach this block 3706 KMP_ASSERT(0); // "unexpected method" 3707 } 3708 #if OMP_40_ENABLED 3709 if (teams_swapped) { 3710 __kmp_restore_swapped_teams(th, team, task_state); 3711 } 3712 #endif 3713 3714 KA_TRACE(10, 3715 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3716 global_tid, packed_reduction_method, retval)); 3717 3718 return retval; 3719 } 3720 3721 /*! 3722 @ingroup SYNCHRONIZATION 3723 @param loc source location information 3724 @param global_tid global thread id. 3725 @param lck pointer to the unique lock data structure 3726 3727 Finish the execution of a blocking reduce. 3728 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3729 start function. 3730 */ 3731 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3732 kmp_critical_name *lck) { 3733 3734 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3735 #if OMP_40_ENABLED 3736 kmp_info_t *th; 3737 kmp_team_t *team; 3738 int teams_swapped = 0, task_state; 3739 #endif 3740 3741 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3742 3743 #if OMP_40_ENABLED 3744 th = __kmp_thread_from_gtid(global_tid); 3745 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3746 #endif // OMP_40_ENABLED 3747 3748 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3749 3750 // this barrier should be visible to a customer and to the threading profile 3751 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3752 3753 if (packed_reduction_method == critical_reduce_block) { 3754 3755 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3756 3757 // TODO: implicit barrier: should be exposed 3758 #if OMPT_SUPPORT 3759 ompt_frame_t *ompt_frame; 3760 if (ompt_enabled.enabled) { 3761 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3762 if (ompt_frame->enter_frame.ptr == NULL) 3763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3764 OMPT_STORE_RETURN_ADDRESS(global_tid); 3765 } 3766 #endif 3767 #if USE_ITT_NOTIFY 3768 __kmp_threads[global_tid]->th.th_ident = loc; 3769 #endif 3770 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3771 #if OMPT_SUPPORT && OMPT_OPTIONAL 3772 if (ompt_enabled.enabled) { 3773 ompt_frame->enter_frame = ompt_data_none; 3774 } 3775 #endif 3776 3777 } else if (packed_reduction_method == empty_reduce_block) { 3778 3779 // usage: if team size==1, no synchronization is required (Intel platforms only) 3780 3781 // TODO: implicit barrier: should be exposed 3782 #if OMPT_SUPPORT 3783 ompt_frame_t *ompt_frame; 3784 if (ompt_enabled.enabled) { 3785 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3786 if (ompt_frame->enter_frame.ptr == NULL) 3787 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3788 OMPT_STORE_RETURN_ADDRESS(global_tid); 3789 } 3790 #endif 3791 #if USE_ITT_NOTIFY 3792 __kmp_threads[global_tid]->th.th_ident = loc; 3793 #endif 3794 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3795 #if OMPT_SUPPORT && OMPT_OPTIONAL 3796 if (ompt_enabled.enabled) { 3797 ompt_frame->enter_frame = ompt_data_none; 3798 } 3799 #endif 3800 3801 } else if (packed_reduction_method == atomic_reduce_block) { 3802 3803 #if OMPT_SUPPORT 3804 ompt_frame_t *ompt_frame; 3805 if (ompt_enabled.enabled) { 3806 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3807 if (ompt_frame->enter_frame.ptr == NULL) 3808 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3809 OMPT_STORE_RETURN_ADDRESS(global_tid); 3810 } 3811 #endif 3812 // TODO: implicit barrier: should be exposed 3813 #if USE_ITT_NOTIFY 3814 __kmp_threads[global_tid]->th.th_ident = loc; 3815 #endif 3816 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3817 #if OMPT_SUPPORT && OMPT_OPTIONAL 3818 if (ompt_enabled.enabled) { 3819 ompt_frame->enter_frame = ompt_data_none; 3820 } 3821 #endif 3822 3823 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3824 tree_reduce_block)) { 3825 3826 // only master executes here (master releases all other workers) 3827 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3828 global_tid); 3829 3830 } else { 3831 3832 // should never reach this block 3833 KMP_ASSERT(0); // "unexpected method" 3834 } 3835 #if OMP_40_ENABLED 3836 if (teams_swapped) { 3837 __kmp_restore_swapped_teams(th, team, task_state); 3838 } 3839 #endif 3840 3841 if (__kmp_env_consistency_check) 3842 __kmp_pop_sync(global_tid, ct_reduce, loc); 3843 3844 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3845 global_tid, packed_reduction_method)); 3846 3847 return; 3848 } 3849 3850 #undef __KMP_GET_REDUCTION_METHOD 3851 #undef __KMP_SET_REDUCTION_METHOD 3852 3853 /* end of interface to fast scalable reduce routines */ 3854 3855 kmp_uint64 __kmpc_get_taskid() { 3856 3857 kmp_int32 gtid; 3858 kmp_info_t *thread; 3859 3860 gtid = __kmp_get_gtid(); 3861 if (gtid < 0) { 3862 return 0; 3863 } 3864 thread = __kmp_thread_from_gtid(gtid); 3865 return thread->th.th_current_task->td_task_id; 3866 3867 } // __kmpc_get_taskid 3868 3869 kmp_uint64 __kmpc_get_parent_taskid() { 3870 3871 kmp_int32 gtid; 3872 kmp_info_t *thread; 3873 kmp_taskdata_t *parent_task; 3874 3875 gtid = __kmp_get_gtid(); 3876 if (gtid < 0) { 3877 return 0; 3878 } 3879 thread = __kmp_thread_from_gtid(gtid); 3880 parent_task = thread->th.th_current_task->td_parent; 3881 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3882 3883 } // __kmpc_get_parent_taskid 3884 3885 #if OMP_45_ENABLED 3886 /*! 3887 @ingroup WORK_SHARING 3888 @param loc source location information. 3889 @param gtid global thread number. 3890 @param num_dims number of associated doacross loops. 3891 @param dims info on loops bounds. 3892 3893 Initialize doacross loop information. 3894 Expect compiler send us inclusive bounds, 3895 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3896 */ 3897 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3898 const struct kmp_dim *dims) { 3899 int j, idx; 3900 kmp_int64 last, trace_count; 3901 kmp_info_t *th = __kmp_threads[gtid]; 3902 kmp_team_t *team = th->th.th_team; 3903 kmp_uint32 *flags; 3904 kmp_disp_t *pr_buf = th->th.th_dispatch; 3905 dispatch_shared_info_t *sh_buf; 3906 3907 KA_TRACE( 3908 20, 3909 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3910 gtid, num_dims, !team->t.t_serialized)); 3911 KMP_DEBUG_ASSERT(dims != NULL); 3912 KMP_DEBUG_ASSERT(num_dims > 0); 3913 3914 if (team->t.t_serialized) { 3915 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3916 return; // no dependencies if team is serialized 3917 } 3918 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3919 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3920 // the next loop 3921 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3922 3923 // Save bounds info into allocated private buffer 3924 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3925 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3926 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3927 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3928 pr_buf->th_doacross_info[0] = 3929 (kmp_int64)num_dims; // first element is number of dimensions 3930 // Save also address of num_done in order to access it later without knowing 3931 // the buffer index 3932 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3933 pr_buf->th_doacross_info[2] = dims[0].lo; 3934 pr_buf->th_doacross_info[3] = dims[0].up; 3935 pr_buf->th_doacross_info[4] = dims[0].st; 3936 last = 5; 3937 for (j = 1; j < num_dims; ++j) { 3938 kmp_int64 3939 range_length; // To keep ranges of all dimensions but the first dims[0] 3940 if (dims[j].st == 1) { // most common case 3941 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3942 range_length = dims[j].up - dims[j].lo + 1; 3943 } else { 3944 if (dims[j].st > 0) { 3945 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3946 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3947 } else { // negative increment 3948 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3949 range_length = 3950 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3951 } 3952 } 3953 pr_buf->th_doacross_info[last++] = range_length; 3954 pr_buf->th_doacross_info[last++] = dims[j].lo; 3955 pr_buf->th_doacross_info[last++] = dims[j].up; 3956 pr_buf->th_doacross_info[last++] = dims[j].st; 3957 } 3958 3959 // Compute total trip count. 3960 // Start with range of dims[0] which we don't need to keep in the buffer. 3961 if (dims[0].st == 1) { // most common case 3962 trace_count = dims[0].up - dims[0].lo + 1; 3963 } else if (dims[0].st > 0) { 3964 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3965 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3966 } else { // negative increment 3967 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3968 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3969 } 3970 for (j = 1; j < num_dims; ++j) { 3971 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3972 } 3973 KMP_DEBUG_ASSERT(trace_count > 0); 3974 3975 // Check if shared buffer is not occupied by other loop (idx - 3976 // __kmp_dispatch_num_buffers) 3977 if (idx != sh_buf->doacross_buf_idx) { 3978 // Shared buffer is occupied, wait for it to be free 3979 __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3980 __kmp_eq_4, NULL); 3981 } 3982 #if KMP_32_BIT_ARCH 3983 // Check if we are the first thread. After the CAS the first thread gets 0, 3984 // others get 1 if initialization is in progress, allocated pointer otherwise. 3985 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3986 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3987 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3988 #else 3989 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3990 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3991 #endif 3992 if (flags == NULL) { 3993 // we are the first thread, allocate the array of flags 3994 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3995 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3996 KMP_MB(); 3997 sh_buf->doacross_flags = flags; 3998 } else if (flags == (kmp_uint32 *)1) { 3999 #if KMP_32_BIT_ARCH 4000 // initialization is still in progress, need to wait 4001 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4002 #else 4003 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4004 #endif 4005 KMP_YIELD(TRUE); 4006 KMP_MB(); 4007 } else { 4008 KMP_MB(); 4009 } 4010 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4011 pr_buf->th_doacross_flags = 4012 sh_buf->doacross_flags; // save private copy in order to not 4013 // touch shared buffer on each iteration 4014 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4015 } 4016 4017 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4018 kmp_int32 shft, num_dims, i; 4019 kmp_uint32 flag; 4020 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4021 kmp_info_t *th = __kmp_threads[gtid]; 4022 kmp_team_t *team = th->th.th_team; 4023 kmp_disp_t *pr_buf; 4024 kmp_int64 lo, up, st; 4025 4026 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4027 if (team->t.t_serialized) { 4028 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4029 return; // no dependencies if team is serialized 4030 } 4031 4032 // calculate sequential iteration number and check out-of-bounds condition 4033 pr_buf = th->th.th_dispatch; 4034 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4035 num_dims = pr_buf->th_doacross_info[0]; 4036 lo = pr_buf->th_doacross_info[2]; 4037 up = pr_buf->th_doacross_info[3]; 4038 st = pr_buf->th_doacross_info[4]; 4039 if (st == 1) { // most common case 4040 if (vec[0] < lo || vec[0] > up) { 4041 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4042 "bounds [%lld,%lld]\n", 4043 gtid, vec[0], lo, up)); 4044 return; 4045 } 4046 iter_number = vec[0] - lo; 4047 } else if (st > 0) { 4048 if (vec[0] < lo || vec[0] > up) { 4049 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4050 "bounds [%lld,%lld]\n", 4051 gtid, vec[0], lo, up)); 4052 return; 4053 } 4054 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4055 } else { // negative increment 4056 if (vec[0] > lo || vec[0] < up) { 4057 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4058 "bounds [%lld,%lld]\n", 4059 gtid, vec[0], lo, up)); 4060 return; 4061 } 4062 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4063 } 4064 for (i = 1; i < num_dims; ++i) { 4065 kmp_int64 iter, ln; 4066 kmp_int32 j = i * 4; 4067 ln = pr_buf->th_doacross_info[j + 1]; 4068 lo = pr_buf->th_doacross_info[j + 2]; 4069 up = pr_buf->th_doacross_info[j + 3]; 4070 st = pr_buf->th_doacross_info[j + 4]; 4071 if (st == 1) { 4072 if (vec[i] < lo || vec[i] > up) { 4073 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4074 "bounds [%lld,%lld]\n", 4075 gtid, vec[i], lo, up)); 4076 return; 4077 } 4078 iter = vec[i] - lo; 4079 } else if (st > 0) { 4080 if (vec[i] < lo || vec[i] > up) { 4081 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4082 "bounds [%lld,%lld]\n", 4083 gtid, vec[i], lo, up)); 4084 return; 4085 } 4086 iter = (kmp_uint64)(vec[i] - lo) / st; 4087 } else { // st < 0 4088 if (vec[i] > lo || vec[i] < up) { 4089 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4090 "bounds [%lld,%lld]\n", 4091 gtid, vec[i], lo, up)); 4092 return; 4093 } 4094 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4095 } 4096 iter_number = iter + ln * iter_number; 4097 } 4098 shft = iter_number % 32; // use 32-bit granularity 4099 iter_number >>= 5; // divided by 32 4100 flag = 1 << shft; 4101 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4102 KMP_YIELD(TRUE); 4103 } 4104 KMP_MB(); 4105 KA_TRACE(20, 4106 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4107 gtid, (iter_number << 5) + shft)); 4108 } 4109 4110 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4111 kmp_int32 shft, num_dims, i; 4112 kmp_uint32 flag; 4113 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4114 kmp_info_t *th = __kmp_threads[gtid]; 4115 kmp_team_t *team = th->th.th_team; 4116 kmp_disp_t *pr_buf; 4117 kmp_int64 lo, st; 4118 4119 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4120 if (team->t.t_serialized) { 4121 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4122 return; // no dependencies if team is serialized 4123 } 4124 4125 // calculate sequential iteration number (same as in "wait" but no 4126 // out-of-bounds checks) 4127 pr_buf = th->th.th_dispatch; 4128 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4129 num_dims = pr_buf->th_doacross_info[0]; 4130 lo = pr_buf->th_doacross_info[2]; 4131 st = pr_buf->th_doacross_info[4]; 4132 if (st == 1) { // most common case 4133 iter_number = vec[0] - lo; 4134 } else if (st > 0) { 4135 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4136 } else { // negative increment 4137 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4138 } 4139 for (i = 1; i < num_dims; ++i) { 4140 kmp_int64 iter, ln; 4141 kmp_int32 j = i * 4; 4142 ln = pr_buf->th_doacross_info[j + 1]; 4143 lo = pr_buf->th_doacross_info[j + 2]; 4144 st = pr_buf->th_doacross_info[j + 4]; 4145 if (st == 1) { 4146 iter = vec[i] - lo; 4147 } else if (st > 0) { 4148 iter = (kmp_uint64)(vec[i] - lo) / st; 4149 } else { // st < 0 4150 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4151 } 4152 iter_number = iter + ln * iter_number; 4153 } 4154 shft = iter_number % 32; // use 32-bit granularity 4155 iter_number >>= 5; // divided by 32 4156 flag = 1 << shft; 4157 KMP_MB(); 4158 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4159 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4160 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4161 (iter_number << 5) + shft)); 4162 } 4163 4164 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4165 kmp_int32 num_done; 4166 kmp_info_t *th = __kmp_threads[gtid]; 4167 kmp_team_t *team = th->th.th_team; 4168 kmp_disp_t *pr_buf = th->th.th_dispatch; 4169 4170 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4171 if (team->t.t_serialized) { 4172 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4173 return; // nothing to do 4174 } 4175 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4176 if (num_done == th->th.th_team_nproc) { 4177 // we are the last thread, need to free shared resources 4178 int idx = pr_buf->th_doacross_buf_idx - 1; 4179 dispatch_shared_info_t *sh_buf = 4180 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4181 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4182 (kmp_int64)&sh_buf->doacross_num_done); 4183 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4184 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4185 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4186 sh_buf->doacross_flags = NULL; 4187 sh_buf->doacross_num_done = 0; 4188 sh_buf->doacross_buf_idx += 4189 __kmp_dispatch_num_buffers; // free buffer for future re-use 4190 } 4191 // free private resources (need to keep buffer index forever) 4192 pr_buf->th_doacross_flags = NULL; 4193 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4194 pr_buf->th_doacross_info = NULL; 4195 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4196 } 4197 #endif 4198 4199 #if OMP_50_ENABLED 4200 int __kmpc_get_target_offload(void) { 4201 if (!__kmp_init_serial) { 4202 __kmp_serial_initialize(); 4203 } 4204 return __kmp_target_offload; 4205 } 4206 4207 int __kmpc_pause_resource(kmp_pause_status_t level) { 4208 if (!__kmp_init_serial) { 4209 return 1; // Can't pause if runtime is not initialized 4210 } 4211 return __kmp_pause_resource(level); 4212 } 4213 #endif // OMP_50_ENABLED 4214 4215 // end of file // 4216