1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 248 kmp_int32 proc_bind) { 249 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 250 proc_bind)); 251 252 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 253 } 254 255 /*! 256 @ingroup PARALLEL 257 @param loc source location information 258 @param argc total number of arguments in the ellipsis 259 @param microtask pointer to callback routine consisting of outlined parallel 260 construct 261 @param ... pointers to shared variables that aren't global 262 263 Do the actual fork and call the microtask in the relevant number of threads. 264 */ 265 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 266 int gtid = __kmp_entry_gtid(); 267 268 #if (KMP_STATS_ENABLED) 269 // If we were in a serial region, then stop the serial timer, record 270 // the event, and start parallel region timer 271 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 272 if (previous_state == stats_state_e::SERIAL_REGION) { 273 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 274 } else { 275 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 276 } 277 int inParallel = __kmpc_in_parallel(loc); 278 if (inParallel) { 279 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 280 } else { 281 KMP_COUNT_BLOCK(OMP_PARALLEL); 282 } 283 #endif 284 285 // maybe to save thr_state is enough here 286 { 287 va_list ap; 288 va_start(ap, microtask); 289 290 #if OMPT_SUPPORT 291 ompt_frame_t *ompt_frame; 292 if (ompt_enabled.enabled) { 293 kmp_info_t *master_th = __kmp_threads[gtid]; 294 kmp_team_t *parent_team = master_th->th.th_team; 295 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 296 if (lwt) 297 ompt_frame = &(lwt->ompt_task_info.frame); 298 else { 299 int tid = __kmp_tid_from_gtid(gtid); 300 ompt_frame = &( 301 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 302 } 303 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 304 OMPT_STORE_RETURN_ADDRESS(gtid); 305 } 306 #endif 307 308 #if INCLUDE_SSC_MARKS 309 SSC_MARK_FORKING(); 310 #endif 311 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 312 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 313 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 314 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 315 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 316 &ap 317 #else 318 ap 319 #endif 320 ); 321 #if INCLUDE_SSC_MARKS 322 SSC_MARK_JOINING(); 323 #endif 324 __kmp_join_call(loc, gtid 325 #if OMPT_SUPPORT 326 , 327 fork_context_intel 328 #endif 329 ); 330 331 va_end(ap); 332 } 333 334 #if KMP_STATS_ENABLED 335 if (previous_state == stats_state_e::SERIAL_REGION) { 336 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 337 } else { 338 KMP_POP_PARTITIONED_TIMER(); 339 } 340 #endif // KMP_STATS_ENABLED 341 } 342 343 /*! 344 @ingroup PARALLEL 345 @param loc source location information 346 @param global_tid global thread number 347 @param num_teams number of teams requested for the teams construct 348 @param num_threads number of threads per team requested for the teams construct 349 350 Set the number of teams to be used by the teams construct. 351 This call is only required if the teams construct has a `num_teams` clause 352 or a `thread_limit` clause (or both). 353 */ 354 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 355 kmp_int32 num_teams, kmp_int32 num_threads) { 356 KA_TRACE(20, 357 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 358 global_tid, num_teams, num_threads)); 359 360 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 361 } 362 363 /*! 364 @ingroup PARALLEL 365 @param loc source location information 366 @param argc total number of arguments in the ellipsis 367 @param microtask pointer to callback routine consisting of outlined teams 368 construct 369 @param ... pointers to shared variables that aren't global 370 371 Do the actual fork and call the microtask in the relevant number of threads. 372 */ 373 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 374 ...) { 375 int gtid = __kmp_entry_gtid(); 376 kmp_info_t *this_thr = __kmp_threads[gtid]; 377 va_list ap; 378 va_start(ap, microtask); 379 380 #if KMP_STATS_ENABLED 381 KMP_COUNT_BLOCK(OMP_TEAMS); 382 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 383 if (previous_state == stats_state_e::SERIAL_REGION) { 384 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 385 } else { 386 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 387 } 388 #endif 389 390 // remember teams entry point and nesting level 391 this_thr->th.th_teams_microtask = microtask; 392 this_thr->th.th_teams_level = 393 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 394 395 #if OMPT_SUPPORT 396 kmp_team_t *parent_team = this_thr->th.th_team; 397 int tid = __kmp_tid_from_gtid(gtid); 398 if (ompt_enabled.enabled) { 399 parent_team->t.t_implicit_task_taskdata[tid] 400 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 401 } 402 OMPT_STORE_RETURN_ADDRESS(gtid); 403 #endif 404 405 // check if __kmpc_push_num_teams called, set default number of teams 406 // otherwise 407 if (this_thr->th.th_teams_size.nteams == 0) { 408 __kmp_push_num_teams(loc, gtid, 0, 0); 409 } 410 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 413 414 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 415 VOLATILE_CAST(microtask_t) 416 __kmp_teams_master, // "wrapped" task 417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 418 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 419 &ap 420 #else 421 ap 422 #endif 423 ); 424 __kmp_join_call(loc, gtid 425 #if OMPT_SUPPORT 426 , 427 fork_context_intel 428 #endif 429 ); 430 431 // Pop current CG root off list 432 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 433 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 434 this_thr->th.th_cg_roots = tmp->up; 435 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 436 " to node %p. cg_nthreads was %d\n", 437 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 438 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 439 int i = tmp->cg_nthreads--; 440 if (i == 1) { // check is we are the last thread in CG (not always the case) 441 __kmp_free(tmp); 442 } 443 // Restore current task's thread_limit from CG root 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 this_thr->th.th_current_task->td_icvs.thread_limit = 446 this_thr->th.th_cg_roots->cg_thread_limit; 447 448 this_thr->th.th_teams_microtask = NULL; 449 this_thr->th.th_teams_level = 0; 450 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 451 va_end(ap); 452 #if KMP_STATS_ENABLED 453 if (previous_state == stats_state_e::SERIAL_REGION) { 454 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 455 } else { 456 KMP_POP_PARTITIONED_TIMER(); 457 } 458 #endif // KMP_STATS_ENABLED 459 } 460 461 // I don't think this function should ever have been exported. 462 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 463 // openmp code ever called it, but it's been exported from the RTL for so 464 // long that I'm afraid to remove the definition. 465 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 466 467 /*! 468 @ingroup PARALLEL 469 @param loc source location information 470 @param global_tid global thread number 471 472 Enter a serialized parallel construct. This interface is used to handle a 473 conditional parallel region, like this, 474 @code 475 #pragma omp parallel if (condition) 476 @endcode 477 when the condition is false. 478 */ 479 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 480 // The implementation is now in kmp_runtime.cpp so that it can share static 481 // functions with kmp_fork_call since the tasks to be done are similar in 482 // each case. 483 #if OMPT_SUPPORT 484 OMPT_STORE_RETURN_ADDRESS(global_tid); 485 #endif 486 __kmp_serialized_parallel(loc, global_tid); 487 } 488 489 /*! 490 @ingroup PARALLEL 491 @param loc source location information 492 @param global_tid global thread number 493 494 Leave a serialized parallel construct. 495 */ 496 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 497 kmp_internal_control_t *top; 498 kmp_info_t *this_thr; 499 kmp_team_t *serial_team; 500 501 KC_TRACE(10, 502 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 503 504 /* skip all this code for autopar serialized loops since it results in 505 unacceptable overhead */ 506 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 507 return; 508 509 // Not autopar code 510 if (!TCR_4(__kmp_init_parallel)) 511 __kmp_parallel_initialize(); 512 513 __kmp_resume_if_soft_paused(); 514 515 this_thr = __kmp_threads[global_tid]; 516 serial_team = this_thr->th.th_serial_team; 517 518 kmp_task_team_t *task_team = this_thr->th.th_task_team; 519 // we need to wait for the proxy tasks before finishing the thread 520 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 521 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 522 523 KMP_MB(); 524 KMP_DEBUG_ASSERT(serial_team); 525 KMP_ASSERT(serial_team->t.t_serialized); 526 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 527 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 528 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 529 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 530 531 #if OMPT_SUPPORT 532 if (ompt_enabled.enabled && 533 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 534 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 535 if (ompt_enabled.ompt_callback_implicit_task) { 536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 537 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 538 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 539 } 540 541 // reset clear the task id only after unlinking the task 542 ompt_data_t *parent_task_data; 543 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 544 545 if (ompt_enabled.ompt_callback_parallel_end) { 546 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 547 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 548 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 549 } 550 __ompt_lw_taskteam_unlink(this_thr); 551 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 552 } 553 #endif 554 555 /* If necessary, pop the internal control stack values and replace the team 556 * values */ 557 top = serial_team->t.t_control_stack_top; 558 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 559 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 560 serial_team->t.t_control_stack_top = top->next; 561 __kmp_free(top); 562 } 563 564 // if( serial_team -> t.t_serialized > 1 ) 565 serial_team->t.t_level--; 566 567 /* pop dispatch buffers stack */ 568 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 569 { 570 dispatch_private_info_t *disp_buffer = 571 serial_team->t.t_dispatch->th_disp_buffer; 572 serial_team->t.t_dispatch->th_disp_buffer = 573 serial_team->t.t_dispatch->th_disp_buffer->next; 574 __kmp_free(disp_buffer); 575 } 576 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 577 578 --serial_team->t.t_serialized; 579 if (serial_team->t.t_serialized == 0) { 580 581 /* return to the parallel section */ 582 583 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 584 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 585 __kmp_clear_x87_fpu_status_word(); 586 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 587 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 588 } 589 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 590 591 this_thr->th.th_team = serial_team->t.t_parent; 592 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 593 594 /* restore values cached in the thread */ 595 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 596 this_thr->th.th_team_master = 597 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 598 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 599 600 /* TODO the below shouldn't need to be adjusted for serialized teams */ 601 this_thr->th.th_dispatch = 602 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 603 604 __kmp_pop_current_task_from_thread(this_thr); 605 606 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 607 this_thr->th.th_current_task->td_flags.executing = 1; 608 609 if (__kmp_tasking_mode != tskm_immediate_exec) { 610 // Copy the task team from the new child / old parent team to the thread. 611 this_thr->th.th_task_team = 612 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 613 KA_TRACE(20, 614 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 615 "team %p\n", 616 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 617 } 618 } else { 619 if (__kmp_tasking_mode != tskm_immediate_exec) { 620 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 621 "depth of serial team %p to %d\n", 622 global_tid, serial_team, serial_team->t.t_serialized)); 623 } 624 } 625 626 if (__kmp_env_consistency_check) 627 __kmp_pop_parallel(global_tid, NULL); 628 #if OMPT_SUPPORT 629 if (ompt_enabled.enabled) 630 this_thr->th.ompt_thread_info.state = 631 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 632 : ompt_state_work_parallel); 633 #endif 634 } 635 636 /*! 637 @ingroup SYNCHRONIZATION 638 @param loc source location information. 639 640 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 641 depending on the memory ordering convention obeyed by the compiler 642 even that may not be necessary). 643 */ 644 void __kmpc_flush(ident_t *loc) { 645 KC_TRACE(10, ("__kmpc_flush: called\n")); 646 647 /* need explicit __mf() here since use volatile instead in library */ 648 KMP_MB(); /* Flush all pending memory write invalidates. */ 649 650 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 651 #if KMP_MIC 652 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 653 // We shouldn't need it, though, since the ABI rules require that 654 // * If the compiler generates NGO stores it also generates the fence 655 // * If users hand-code NGO stores they should insert the fence 656 // therefore no incomplete unordered stores should be visible. 657 #else 658 // C74404 659 // This is to address non-temporal store instructions (sfence needed). 660 // The clflush instruction is addressed either (mfence needed). 661 // Probably the non-temporal load monvtdqa instruction should also be 662 // addressed. 663 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 664 if (!__kmp_cpuinfo.initialized) { 665 __kmp_query_cpuid(&__kmp_cpuinfo); 666 } 667 if (!__kmp_cpuinfo.sse2) { 668 // CPU cannot execute SSE2 instructions. 669 } else { 670 #if KMP_COMPILER_ICC 671 _mm_mfence(); 672 #elif KMP_COMPILER_MSVC 673 MemoryBarrier(); 674 #else 675 __sync_synchronize(); 676 #endif // KMP_COMPILER_ICC 677 } 678 #endif // KMP_MIC 679 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 680 KMP_ARCH_RISCV64) 681 // Nothing to see here move along 682 #elif KMP_ARCH_PPC64 683 // Nothing needed here (we have a real MB above). 684 #if KMP_OS_CNK 685 // The flushing thread needs to yield here; this prevents a 686 // busy-waiting thread from saturating the pipeline. flush is 687 // often used in loops like this: 688 // while (!flag) { 689 // #pragma omp flush(flag) 690 // } 691 // and adding the yield here is good for at least a 10x speedup 692 // when running >2 threads per core (on the NAS LU benchmark). 693 __kmp_yield(); 694 #endif 695 #else 696 #error Unknown or unsupported architecture 697 #endif 698 699 #if OMPT_SUPPORT && OMPT_OPTIONAL 700 if (ompt_enabled.ompt_callback_flush) { 701 ompt_callbacks.ompt_callback(ompt_callback_flush)( 702 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 703 } 704 #endif 705 } 706 707 /* -------------------------------------------------------------------------- */ 708 /*! 709 @ingroup SYNCHRONIZATION 710 @param loc source location information 711 @param global_tid thread id. 712 713 Execute a barrier. 714 */ 715 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 716 KMP_COUNT_BLOCK(OMP_BARRIER); 717 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 718 719 if (!TCR_4(__kmp_init_parallel)) 720 __kmp_parallel_initialize(); 721 722 __kmp_resume_if_soft_paused(); 723 724 if (__kmp_env_consistency_check) { 725 if (loc == 0) { 726 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 727 } 728 __kmp_check_barrier(global_tid, ct_barrier, loc); 729 } 730 731 #if OMPT_SUPPORT 732 ompt_frame_t *ompt_frame; 733 if (ompt_enabled.enabled) { 734 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 735 if (ompt_frame->enter_frame.ptr == NULL) 736 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 737 OMPT_STORE_RETURN_ADDRESS(global_tid); 738 } 739 #endif 740 __kmp_threads[global_tid]->th.th_ident = loc; 741 // TODO: explicit barrier_wait_id: 742 // this function is called when 'barrier' directive is present or 743 // implicit barrier at the end of a worksharing construct. 744 // 1) better to add a per-thread barrier counter to a thread data structure 745 // 2) set to 0 when a new team is created 746 // 4) no sync is required 747 748 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 749 #if OMPT_SUPPORT && OMPT_OPTIONAL 750 if (ompt_enabled.enabled) { 751 ompt_frame->enter_frame = ompt_data_none; 752 } 753 #endif 754 } 755 756 /* The BARRIER for a MASTER section is always explicit */ 757 /*! 758 @ingroup WORK_SHARING 759 @param loc source location information. 760 @param global_tid global thread number . 761 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 762 */ 763 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 764 int status = 0; 765 766 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 767 768 if (!TCR_4(__kmp_init_parallel)) 769 __kmp_parallel_initialize(); 770 771 __kmp_resume_if_soft_paused(); 772 773 if (KMP_MASTER_GTID(global_tid)) { 774 KMP_COUNT_BLOCK(OMP_MASTER); 775 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 776 status = 1; 777 } 778 779 #if OMPT_SUPPORT && OMPT_OPTIONAL 780 if (status) { 781 if (ompt_enabled.ompt_callback_master) { 782 kmp_info_t *this_thr = __kmp_threads[global_tid]; 783 kmp_team_t *team = this_thr->th.th_team; 784 785 int tid = __kmp_tid_from_gtid(global_tid); 786 ompt_callbacks.ompt_callback(ompt_callback_master)( 787 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 788 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 789 OMPT_GET_RETURN_ADDRESS(0)); 790 } 791 } 792 #endif 793 794 if (__kmp_env_consistency_check) { 795 #if KMP_USE_DYNAMIC_LOCK 796 if (status) 797 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 798 else 799 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 800 #else 801 if (status) 802 __kmp_push_sync(global_tid, ct_master, loc, NULL); 803 else 804 __kmp_check_sync(global_tid, ct_master, loc, NULL); 805 #endif 806 } 807 808 return status; 809 } 810 811 /*! 812 @ingroup WORK_SHARING 813 @param loc source location information. 814 @param global_tid global thread number . 815 816 Mark the end of a <tt>master</tt> region. This should only be called by the 817 thread that executes the <tt>master</tt> region. 818 */ 819 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 820 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 821 822 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 823 KMP_POP_PARTITIONED_TIMER(); 824 825 #if OMPT_SUPPORT && OMPT_OPTIONAL 826 kmp_info_t *this_thr = __kmp_threads[global_tid]; 827 kmp_team_t *team = this_thr->th.th_team; 828 if (ompt_enabled.ompt_callback_master) { 829 int tid = __kmp_tid_from_gtid(global_tid); 830 ompt_callbacks.ompt_callback(ompt_callback_master)( 831 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 832 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 833 OMPT_GET_RETURN_ADDRESS(0)); 834 } 835 #endif 836 837 if (__kmp_env_consistency_check) { 838 if (global_tid < 0) 839 KMP_WARNING(ThreadIdentInvalid); 840 841 if (KMP_MASTER_GTID(global_tid)) 842 __kmp_pop_sync(global_tid, ct_master, loc); 843 } 844 } 845 846 /*! 847 @ingroup WORK_SHARING 848 @param loc source location information. 849 @param gtid global thread number. 850 851 Start execution of an <tt>ordered</tt> construct. 852 */ 853 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 854 int cid = 0; 855 kmp_info_t *th; 856 KMP_DEBUG_ASSERT(__kmp_init_serial); 857 858 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 859 860 if (!TCR_4(__kmp_init_parallel)) 861 __kmp_parallel_initialize(); 862 863 __kmp_resume_if_soft_paused(); 864 865 #if USE_ITT_BUILD 866 __kmp_itt_ordered_prep(gtid); 867 // TODO: ordered_wait_id 868 #endif /* USE_ITT_BUILD */ 869 870 th = __kmp_threads[gtid]; 871 872 #if OMPT_SUPPORT && OMPT_OPTIONAL 873 kmp_team_t *team; 874 ompt_wait_id_t lck; 875 void *codeptr_ra; 876 if (ompt_enabled.enabled) { 877 OMPT_STORE_RETURN_ADDRESS(gtid); 878 team = __kmp_team_from_gtid(gtid); 879 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 880 /* OMPT state update */ 881 th->th.ompt_thread_info.wait_id = lck; 882 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 883 884 /* OMPT event callback */ 885 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 886 if (ompt_enabled.ompt_callback_mutex_acquire) { 887 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 888 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 889 codeptr_ra); 890 } 891 } 892 #endif 893 894 if (th->th.th_dispatch->th_deo_fcn != 0) 895 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 896 else 897 __kmp_parallel_deo(>id, &cid, loc); 898 899 #if OMPT_SUPPORT && OMPT_OPTIONAL 900 if (ompt_enabled.enabled) { 901 /* OMPT state update */ 902 th->th.ompt_thread_info.state = ompt_state_work_parallel; 903 th->th.ompt_thread_info.wait_id = 0; 904 905 /* OMPT event callback */ 906 if (ompt_enabled.ompt_callback_mutex_acquired) { 907 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 908 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 909 } 910 } 911 #endif 912 913 #if USE_ITT_BUILD 914 __kmp_itt_ordered_start(gtid); 915 #endif /* USE_ITT_BUILD */ 916 } 917 918 /*! 919 @ingroup WORK_SHARING 920 @param loc source location information. 921 @param gtid global thread number. 922 923 End execution of an <tt>ordered</tt> construct. 924 */ 925 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 926 int cid = 0; 927 kmp_info_t *th; 928 929 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 930 931 #if USE_ITT_BUILD 932 __kmp_itt_ordered_end(gtid); 933 // TODO: ordered_wait_id 934 #endif /* USE_ITT_BUILD */ 935 936 th = __kmp_threads[gtid]; 937 938 if (th->th.th_dispatch->th_dxo_fcn != 0) 939 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 940 else 941 __kmp_parallel_dxo(>id, &cid, loc); 942 943 #if OMPT_SUPPORT && OMPT_OPTIONAL 944 OMPT_STORE_RETURN_ADDRESS(gtid); 945 if (ompt_enabled.ompt_callback_mutex_released) { 946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 947 ompt_mutex_ordered, 948 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 949 ->t.t_ordered.dt.t_value, 950 OMPT_LOAD_RETURN_ADDRESS(gtid)); 951 } 952 #endif 953 } 954 955 #if KMP_USE_DYNAMIC_LOCK 956 957 static __forceinline void 958 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 959 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 960 // Pointer to the allocated indirect lock is written to crit, while indexing 961 // is ignored. 962 void *idx; 963 kmp_indirect_lock_t **lck; 964 lck = (kmp_indirect_lock_t **)crit; 965 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 966 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 967 KMP_SET_I_LOCK_LOCATION(ilk, loc); 968 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 969 KA_TRACE(20, 970 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 971 #if USE_ITT_BUILD 972 __kmp_itt_critical_creating(ilk->lock, loc); 973 #endif 974 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 975 if (status == 0) { 976 #if USE_ITT_BUILD 977 __kmp_itt_critical_destroyed(ilk->lock); 978 #endif 979 // We don't really need to destroy the unclaimed lock here since it will be 980 // cleaned up at program exit. 981 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 982 } 983 KMP_DEBUG_ASSERT(*lck != NULL); 984 } 985 986 // Fast-path acquire tas lock 987 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 988 { \ 989 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 990 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 991 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 992 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 993 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 994 kmp_uint32 spins; \ 995 KMP_FSYNC_PREPARE(l); \ 996 KMP_INIT_YIELD(spins); \ 997 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 998 do { \ 999 if (TCR_4(__kmp_nth) > \ 1000 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1001 KMP_YIELD(TRUE); \ 1002 } else { \ 1003 KMP_YIELD_SPIN(spins); \ 1004 } \ 1005 __kmp_spin_backoff(&backoff); \ 1006 } while ( \ 1007 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1008 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1009 } \ 1010 KMP_FSYNC_ACQUIRED(l); \ 1011 } 1012 1013 // Fast-path test tas lock 1014 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1015 { \ 1016 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1017 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1018 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1019 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1020 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1021 } 1022 1023 // Fast-path release tas lock 1024 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1025 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1026 1027 #if KMP_USE_FUTEX 1028 1029 #include <sys/syscall.h> 1030 #include <unistd.h> 1031 #ifndef FUTEX_WAIT 1032 #define FUTEX_WAIT 0 1033 #endif 1034 #ifndef FUTEX_WAKE 1035 #define FUTEX_WAKE 1 1036 #endif 1037 1038 // Fast-path acquire futex lock 1039 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1040 { \ 1041 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1042 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1043 KMP_MB(); \ 1044 KMP_FSYNC_PREPARE(ftx); \ 1045 kmp_int32 poll_val; \ 1046 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1047 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1048 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1049 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1050 if (!cond) { \ 1051 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1052 poll_val | \ 1053 KMP_LOCK_BUSY(1, futex))) { \ 1054 continue; \ 1055 } \ 1056 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1057 } \ 1058 kmp_int32 rc; \ 1059 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1060 NULL, NULL, 0)) != 0) { \ 1061 continue; \ 1062 } \ 1063 gtid_code |= 1; \ 1064 } \ 1065 KMP_FSYNC_ACQUIRED(ftx); \ 1066 } 1067 1068 // Fast-path test futex lock 1069 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1070 { \ 1071 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1072 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1073 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1074 KMP_FSYNC_ACQUIRED(ftx); \ 1075 rc = TRUE; \ 1076 } else { \ 1077 rc = FALSE; \ 1078 } \ 1079 } 1080 1081 // Fast-path release futex lock 1082 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1083 { \ 1084 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1085 KMP_MB(); \ 1086 KMP_FSYNC_RELEASING(ftx); \ 1087 kmp_int32 poll_val = \ 1088 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1089 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1090 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1091 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1092 } \ 1093 KMP_MB(); \ 1094 KMP_YIELD_OVERSUB(); \ 1095 } 1096 1097 #endif // KMP_USE_FUTEX 1098 1099 #else // KMP_USE_DYNAMIC_LOCK 1100 1101 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1102 ident_t const *loc, 1103 kmp_int32 gtid) { 1104 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1105 1106 // Because of the double-check, the following load doesn't need to be volatile 1107 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1108 1109 if (lck == NULL) { 1110 void *idx; 1111 1112 // Allocate & initialize the lock. 1113 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1114 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1115 __kmp_init_user_lock_with_checks(lck); 1116 __kmp_set_user_lock_location(lck, loc); 1117 #if USE_ITT_BUILD 1118 __kmp_itt_critical_creating(lck); 1119 // __kmp_itt_critical_creating() should be called *before* the first usage 1120 // of underlying lock. It is the only place where we can guarantee it. There 1121 // are chances the lock will destroyed with no usage, but it is not a 1122 // problem, because this is not real event seen by user but rather setting 1123 // name for object (lock). See more details in kmp_itt.h. 1124 #endif /* USE_ITT_BUILD */ 1125 1126 // Use a cmpxchg instruction to slam the start of the critical section with 1127 // the lock pointer. If another thread beat us to it, deallocate the lock, 1128 // and use the lock that the other thread allocated. 1129 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1130 1131 if (status == 0) { 1132 // Deallocate the lock and reload the value. 1133 #if USE_ITT_BUILD 1134 __kmp_itt_critical_destroyed(lck); 1135 // Let ITT know the lock is destroyed and the same memory location may be reused 1136 // for another purpose. 1137 #endif /* USE_ITT_BUILD */ 1138 __kmp_destroy_user_lock_with_checks(lck); 1139 __kmp_user_lock_free(&idx, gtid, lck); 1140 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1141 KMP_DEBUG_ASSERT(lck != NULL); 1142 } 1143 } 1144 return lck; 1145 } 1146 1147 #endif // KMP_USE_DYNAMIC_LOCK 1148 1149 /*! 1150 @ingroup WORK_SHARING 1151 @param loc source location information. 1152 @param global_tid global thread number . 1153 @param crit identity of the critical section. This could be a pointer to a lock 1154 associated with the critical section, or some other suitably unique value. 1155 1156 Enter code protected by a `critical` construct. 1157 This function blocks until the executing thread can enter the critical section. 1158 */ 1159 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1160 kmp_critical_name *crit) { 1161 #if KMP_USE_DYNAMIC_LOCK 1162 #if OMPT_SUPPORT && OMPT_OPTIONAL 1163 OMPT_STORE_RETURN_ADDRESS(global_tid); 1164 #endif // OMPT_SUPPORT 1165 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1166 #else 1167 KMP_COUNT_BLOCK(OMP_CRITICAL); 1168 #if OMPT_SUPPORT && OMPT_OPTIONAL 1169 ompt_state_t prev_state = ompt_state_undefined; 1170 ompt_thread_info_t ti; 1171 #endif 1172 kmp_user_lock_p lck; 1173 1174 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1175 1176 // TODO: add THR_OVHD_STATE 1177 1178 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1179 KMP_CHECK_USER_LOCK_INIT(); 1180 1181 if ((__kmp_user_lock_kind == lk_tas) && 1182 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1183 lck = (kmp_user_lock_p)crit; 1184 } 1185 #if KMP_USE_FUTEX 1186 else if ((__kmp_user_lock_kind == lk_futex) && 1187 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1188 lck = (kmp_user_lock_p)crit; 1189 } 1190 #endif 1191 else { // ticket, queuing or drdpa 1192 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1193 } 1194 1195 if (__kmp_env_consistency_check) 1196 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1197 1198 // since the critical directive binds to all threads, not just the current 1199 // team we have to check this even if we are in a serialized team. 1200 // also, even if we are the uber thread, we still have to conduct the lock, 1201 // as we have to contend with sibling threads. 1202 1203 #if USE_ITT_BUILD 1204 __kmp_itt_critical_acquiring(lck); 1205 #endif /* USE_ITT_BUILD */ 1206 #if OMPT_SUPPORT && OMPT_OPTIONAL 1207 OMPT_STORE_RETURN_ADDRESS(gtid); 1208 void *codeptr_ra = NULL; 1209 if (ompt_enabled.enabled) { 1210 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1211 /* OMPT state update */ 1212 prev_state = ti.state; 1213 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1214 ti.state = ompt_state_wait_critical; 1215 1216 /* OMPT event callback */ 1217 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1218 if (ompt_enabled.ompt_callback_mutex_acquire) { 1219 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1220 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1221 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1222 } 1223 } 1224 #endif 1225 // Value of 'crit' should be good for using as a critical_id of the critical 1226 // section directive. 1227 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1228 1229 #if USE_ITT_BUILD 1230 __kmp_itt_critical_acquired(lck); 1231 #endif /* USE_ITT_BUILD */ 1232 #if OMPT_SUPPORT && OMPT_OPTIONAL 1233 if (ompt_enabled.enabled) { 1234 /* OMPT state update */ 1235 ti.state = prev_state; 1236 ti.wait_id = 0; 1237 1238 /* OMPT event callback */ 1239 if (ompt_enabled.ompt_callback_mutex_acquired) { 1240 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1241 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1242 } 1243 } 1244 #endif 1245 KMP_POP_PARTITIONED_TIMER(); 1246 1247 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1248 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1249 #endif // KMP_USE_DYNAMIC_LOCK 1250 } 1251 1252 #if KMP_USE_DYNAMIC_LOCK 1253 1254 // Converts the given hint to an internal lock implementation 1255 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1256 #if KMP_USE_TSX 1257 #define KMP_TSX_LOCK(seq) lockseq_##seq 1258 #else 1259 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1260 #endif 1261 1262 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1263 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1264 #else 1265 #define KMP_CPUINFO_RTM 0 1266 #endif 1267 1268 // Hints that do not require further logic 1269 if (hint & kmp_lock_hint_hle) 1270 return KMP_TSX_LOCK(hle); 1271 if (hint & kmp_lock_hint_rtm) 1272 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1273 if (hint & kmp_lock_hint_adaptive) 1274 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1275 1276 // Rule out conflicting hints first by returning the default lock 1277 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1278 return __kmp_user_lock_seq; 1279 if ((hint & omp_lock_hint_speculative) && 1280 (hint & omp_lock_hint_nonspeculative)) 1281 return __kmp_user_lock_seq; 1282 1283 // Do not even consider speculation when it appears to be contended 1284 if (hint & omp_lock_hint_contended) 1285 return lockseq_queuing; 1286 1287 // Uncontended lock without speculation 1288 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1289 return lockseq_tas; 1290 1291 // HLE lock for speculation 1292 if (hint & omp_lock_hint_speculative) 1293 return KMP_TSX_LOCK(hle); 1294 1295 return __kmp_user_lock_seq; 1296 } 1297 1298 #if OMPT_SUPPORT && OMPT_OPTIONAL 1299 #if KMP_USE_DYNAMIC_LOCK 1300 static kmp_mutex_impl_t 1301 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1302 if (user_lock) { 1303 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1304 case 0: 1305 break; 1306 #if KMP_USE_FUTEX 1307 case locktag_futex: 1308 return kmp_mutex_impl_queuing; 1309 #endif 1310 case locktag_tas: 1311 return kmp_mutex_impl_spin; 1312 #if KMP_USE_TSX 1313 case locktag_hle: 1314 return kmp_mutex_impl_speculative; 1315 #endif 1316 default: 1317 return kmp_mutex_impl_none; 1318 } 1319 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1320 } 1321 KMP_ASSERT(ilock); 1322 switch (ilock->type) { 1323 #if KMP_USE_TSX 1324 case locktag_adaptive: 1325 case locktag_rtm: 1326 return kmp_mutex_impl_speculative; 1327 #endif 1328 case locktag_nested_tas: 1329 return kmp_mutex_impl_spin; 1330 #if KMP_USE_FUTEX 1331 case locktag_nested_futex: 1332 #endif 1333 case locktag_ticket: 1334 case locktag_queuing: 1335 case locktag_drdpa: 1336 case locktag_nested_ticket: 1337 case locktag_nested_queuing: 1338 case locktag_nested_drdpa: 1339 return kmp_mutex_impl_queuing; 1340 default: 1341 return kmp_mutex_impl_none; 1342 } 1343 } 1344 #else 1345 // For locks without dynamic binding 1346 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1347 switch (__kmp_user_lock_kind) { 1348 case lk_tas: 1349 return kmp_mutex_impl_spin; 1350 #if KMP_USE_FUTEX 1351 case lk_futex: 1352 #endif 1353 case lk_ticket: 1354 case lk_queuing: 1355 case lk_drdpa: 1356 return kmp_mutex_impl_queuing; 1357 #if KMP_USE_TSX 1358 case lk_hle: 1359 case lk_rtm: 1360 case lk_adaptive: 1361 return kmp_mutex_impl_speculative; 1362 #endif 1363 default: 1364 return kmp_mutex_impl_none; 1365 } 1366 } 1367 #endif // KMP_USE_DYNAMIC_LOCK 1368 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1369 1370 /*! 1371 @ingroup WORK_SHARING 1372 @param loc source location information. 1373 @param global_tid global thread number. 1374 @param crit identity of the critical section. This could be a pointer to a lock 1375 associated with the critical section, or some other suitably unique value. 1376 @param hint the lock hint. 1377 1378 Enter code protected by a `critical` construct with a hint. The hint value is 1379 used to suggest a lock implementation. This function blocks until the executing 1380 thread can enter the critical section unless the hint suggests use of 1381 speculative execution and the hardware supports it. 1382 */ 1383 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1384 kmp_critical_name *crit, uint32_t hint) { 1385 KMP_COUNT_BLOCK(OMP_CRITICAL); 1386 kmp_user_lock_p lck; 1387 #if OMPT_SUPPORT && OMPT_OPTIONAL 1388 ompt_state_t prev_state = ompt_state_undefined; 1389 ompt_thread_info_t ti; 1390 // This is the case, if called from __kmpc_critical: 1391 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1392 if (!codeptr) 1393 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1394 #endif 1395 1396 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1397 1398 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1399 // Check if it is initialized. 1400 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1401 if (*lk == 0) { 1402 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1403 if (KMP_IS_D_LOCK(lckseq)) { 1404 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1405 KMP_GET_D_TAG(lckseq)); 1406 } else { 1407 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1408 } 1409 } 1410 // Branch for accessing the actual lock object and set operation. This 1411 // branching is inevitable since this lock initialization does not follow the 1412 // normal dispatch path (lock table is not used). 1413 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1414 lck = (kmp_user_lock_p)lk; 1415 if (__kmp_env_consistency_check) { 1416 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1417 __kmp_map_hint_to_lock(hint)); 1418 } 1419 #if USE_ITT_BUILD 1420 __kmp_itt_critical_acquiring(lck); 1421 #endif 1422 #if OMPT_SUPPORT && OMPT_OPTIONAL 1423 if (ompt_enabled.enabled) { 1424 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1425 /* OMPT state update */ 1426 prev_state = ti.state; 1427 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1428 ti.state = ompt_state_wait_critical; 1429 1430 /* OMPT event callback */ 1431 if (ompt_enabled.ompt_callback_mutex_acquire) { 1432 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1433 ompt_mutex_critical, (unsigned int)hint, 1434 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1435 codeptr); 1436 } 1437 } 1438 #endif 1439 #if KMP_USE_INLINED_TAS 1440 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1441 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1442 } else 1443 #elif KMP_USE_INLINED_FUTEX 1444 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1445 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1446 } else 1447 #endif 1448 { 1449 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1450 } 1451 } else { 1452 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1453 lck = ilk->lock; 1454 if (__kmp_env_consistency_check) { 1455 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1456 __kmp_map_hint_to_lock(hint)); 1457 } 1458 #if USE_ITT_BUILD 1459 __kmp_itt_critical_acquiring(lck); 1460 #endif 1461 #if OMPT_SUPPORT && OMPT_OPTIONAL 1462 if (ompt_enabled.enabled) { 1463 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1464 /* OMPT state update */ 1465 prev_state = ti.state; 1466 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1467 ti.state = ompt_state_wait_critical; 1468 1469 /* OMPT event callback */ 1470 if (ompt_enabled.ompt_callback_mutex_acquire) { 1471 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1472 ompt_mutex_critical, (unsigned int)hint, 1473 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1474 codeptr); 1475 } 1476 } 1477 #endif 1478 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1479 } 1480 KMP_POP_PARTITIONED_TIMER(); 1481 1482 #if USE_ITT_BUILD 1483 __kmp_itt_critical_acquired(lck); 1484 #endif /* USE_ITT_BUILD */ 1485 #if OMPT_SUPPORT && OMPT_OPTIONAL 1486 if (ompt_enabled.enabled) { 1487 /* OMPT state update */ 1488 ti.state = prev_state; 1489 ti.wait_id = 0; 1490 1491 /* OMPT event callback */ 1492 if (ompt_enabled.ompt_callback_mutex_acquired) { 1493 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1494 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1495 } 1496 } 1497 #endif 1498 1499 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1500 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1501 } // __kmpc_critical_with_hint 1502 1503 #endif // KMP_USE_DYNAMIC_LOCK 1504 1505 /*! 1506 @ingroup WORK_SHARING 1507 @param loc source location information. 1508 @param global_tid global thread number . 1509 @param crit identity of the critical section. This could be a pointer to a lock 1510 associated with the critical section, or some other suitably unique value. 1511 1512 Leave a critical section, releasing any lock that was held during its execution. 1513 */ 1514 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1515 kmp_critical_name *crit) { 1516 kmp_user_lock_p lck; 1517 1518 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1519 1520 #if KMP_USE_DYNAMIC_LOCK 1521 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1522 lck = (kmp_user_lock_p)crit; 1523 KMP_ASSERT(lck != NULL); 1524 if (__kmp_env_consistency_check) { 1525 __kmp_pop_sync(global_tid, ct_critical, loc); 1526 } 1527 #if USE_ITT_BUILD 1528 __kmp_itt_critical_releasing(lck); 1529 #endif 1530 #if KMP_USE_INLINED_TAS 1531 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1532 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1533 } else 1534 #elif KMP_USE_INLINED_FUTEX 1535 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1536 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1537 } else 1538 #endif 1539 { 1540 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1541 } 1542 } else { 1543 kmp_indirect_lock_t *ilk = 1544 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1545 KMP_ASSERT(ilk != NULL); 1546 lck = ilk->lock; 1547 if (__kmp_env_consistency_check) { 1548 __kmp_pop_sync(global_tid, ct_critical, loc); 1549 } 1550 #if USE_ITT_BUILD 1551 __kmp_itt_critical_releasing(lck); 1552 #endif 1553 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1554 } 1555 1556 #else // KMP_USE_DYNAMIC_LOCK 1557 1558 if ((__kmp_user_lock_kind == lk_tas) && 1559 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1560 lck = (kmp_user_lock_p)crit; 1561 } 1562 #if KMP_USE_FUTEX 1563 else if ((__kmp_user_lock_kind == lk_futex) && 1564 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1565 lck = (kmp_user_lock_p)crit; 1566 } 1567 #endif 1568 else { // ticket, queuing or drdpa 1569 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1570 } 1571 1572 KMP_ASSERT(lck != NULL); 1573 1574 if (__kmp_env_consistency_check) 1575 __kmp_pop_sync(global_tid, ct_critical, loc); 1576 1577 #if USE_ITT_BUILD 1578 __kmp_itt_critical_releasing(lck); 1579 #endif /* USE_ITT_BUILD */ 1580 // Value of 'crit' should be good for using as a critical_id of the critical 1581 // section directive. 1582 __kmp_release_user_lock_with_checks(lck, global_tid); 1583 1584 #endif // KMP_USE_DYNAMIC_LOCK 1585 1586 #if OMPT_SUPPORT && OMPT_OPTIONAL 1587 /* OMPT release event triggers after lock is released; place here to trigger 1588 * for all #if branches */ 1589 OMPT_STORE_RETURN_ADDRESS(global_tid); 1590 if (ompt_enabled.ompt_callback_mutex_released) { 1591 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1592 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1593 OMPT_LOAD_RETURN_ADDRESS(0)); 1594 } 1595 #endif 1596 1597 KMP_POP_PARTITIONED_TIMER(); 1598 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1599 } 1600 1601 /*! 1602 @ingroup SYNCHRONIZATION 1603 @param loc source location information 1604 @param global_tid thread id. 1605 @return one if the thread should execute the master block, zero otherwise 1606 1607 Start execution of a combined barrier and master. The barrier is executed inside 1608 this function. 1609 */ 1610 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1611 int status; 1612 1613 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1614 1615 if (!TCR_4(__kmp_init_parallel)) 1616 __kmp_parallel_initialize(); 1617 1618 __kmp_resume_if_soft_paused(); 1619 1620 if (__kmp_env_consistency_check) 1621 __kmp_check_barrier(global_tid, ct_barrier, loc); 1622 1623 #if OMPT_SUPPORT 1624 ompt_frame_t *ompt_frame; 1625 if (ompt_enabled.enabled) { 1626 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1627 if (ompt_frame->enter_frame.ptr == NULL) 1628 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1629 OMPT_STORE_RETURN_ADDRESS(global_tid); 1630 } 1631 #endif 1632 #if USE_ITT_NOTIFY 1633 __kmp_threads[global_tid]->th.th_ident = loc; 1634 #endif 1635 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1636 #if OMPT_SUPPORT && OMPT_OPTIONAL 1637 if (ompt_enabled.enabled) { 1638 ompt_frame->enter_frame = ompt_data_none; 1639 } 1640 #endif 1641 1642 return (status != 0) ? 0 : 1; 1643 } 1644 1645 /*! 1646 @ingroup SYNCHRONIZATION 1647 @param loc source location information 1648 @param global_tid thread id. 1649 1650 Complete the execution of a combined barrier and master. This function should 1651 only be called at the completion of the <tt>master</tt> code. Other threads will 1652 still be waiting at the barrier and this call releases them. 1653 */ 1654 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1655 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1656 1657 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1658 } 1659 1660 /*! 1661 @ingroup SYNCHRONIZATION 1662 @param loc source location information 1663 @param global_tid thread id. 1664 @return one if the thread should execute the master block, zero otherwise 1665 1666 Start execution of a combined barrier and master(nowait) construct. 1667 The barrier is executed inside this function. 1668 There is no equivalent "end" function, since the 1669 */ 1670 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1671 kmp_int32 ret; 1672 1673 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1674 1675 if (!TCR_4(__kmp_init_parallel)) 1676 __kmp_parallel_initialize(); 1677 1678 __kmp_resume_if_soft_paused(); 1679 1680 if (__kmp_env_consistency_check) { 1681 if (loc == 0) { 1682 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1683 } 1684 __kmp_check_barrier(global_tid, ct_barrier, loc); 1685 } 1686 1687 #if OMPT_SUPPORT 1688 ompt_frame_t *ompt_frame; 1689 if (ompt_enabled.enabled) { 1690 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1691 if (ompt_frame->enter_frame.ptr == NULL) 1692 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1693 OMPT_STORE_RETURN_ADDRESS(global_tid); 1694 } 1695 #endif 1696 #if USE_ITT_NOTIFY 1697 __kmp_threads[global_tid]->th.th_ident = loc; 1698 #endif 1699 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1700 #if OMPT_SUPPORT && OMPT_OPTIONAL 1701 if (ompt_enabled.enabled) { 1702 ompt_frame->enter_frame = ompt_data_none; 1703 } 1704 #endif 1705 1706 ret = __kmpc_master(loc, global_tid); 1707 1708 if (__kmp_env_consistency_check) { 1709 /* there's no __kmpc_end_master called; so the (stats) */ 1710 /* actions of __kmpc_end_master are done here */ 1711 1712 if (global_tid < 0) { 1713 KMP_WARNING(ThreadIdentInvalid); 1714 } 1715 if (ret) { 1716 /* only one thread should do the pop since only */ 1717 /* one did the push (see __kmpc_master()) */ 1718 1719 __kmp_pop_sync(global_tid, ct_master, loc); 1720 } 1721 } 1722 1723 return (ret); 1724 } 1725 1726 /* The BARRIER for a SINGLE process section is always explicit */ 1727 /*! 1728 @ingroup WORK_SHARING 1729 @param loc source location information 1730 @param global_tid global thread number 1731 @return One if this thread should execute the single construct, zero otherwise. 1732 1733 Test whether to execute a <tt>single</tt> construct. 1734 There are no implicit barriers in the two "single" calls, rather the compiler 1735 should introduce an explicit barrier if it is required. 1736 */ 1737 1738 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1739 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1740 1741 if (rc) { 1742 // We are going to execute the single statement, so we should count it. 1743 KMP_COUNT_BLOCK(OMP_SINGLE); 1744 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1745 } 1746 1747 #if OMPT_SUPPORT && OMPT_OPTIONAL 1748 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1749 kmp_team_t *team = this_thr->th.th_team; 1750 int tid = __kmp_tid_from_gtid(global_tid); 1751 1752 if (ompt_enabled.enabled) { 1753 if (rc) { 1754 if (ompt_enabled.ompt_callback_work) { 1755 ompt_callbacks.ompt_callback(ompt_callback_work)( 1756 ompt_work_single_executor, ompt_scope_begin, 1757 &(team->t.ompt_team_info.parallel_data), 1758 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1759 1, OMPT_GET_RETURN_ADDRESS(0)); 1760 } 1761 } else { 1762 if (ompt_enabled.ompt_callback_work) { 1763 ompt_callbacks.ompt_callback(ompt_callback_work)( 1764 ompt_work_single_other, ompt_scope_begin, 1765 &(team->t.ompt_team_info.parallel_data), 1766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1767 1, OMPT_GET_RETURN_ADDRESS(0)); 1768 ompt_callbacks.ompt_callback(ompt_callback_work)( 1769 ompt_work_single_other, ompt_scope_end, 1770 &(team->t.ompt_team_info.parallel_data), 1771 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1772 1, OMPT_GET_RETURN_ADDRESS(0)); 1773 } 1774 } 1775 } 1776 #endif 1777 1778 return rc; 1779 } 1780 1781 /*! 1782 @ingroup WORK_SHARING 1783 @param loc source location information 1784 @param global_tid global thread number 1785 1786 Mark the end of a <tt>single</tt> construct. This function should 1787 only be called by the thread that executed the block of code protected 1788 by the `single` construct. 1789 */ 1790 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1791 __kmp_exit_single(global_tid); 1792 KMP_POP_PARTITIONED_TIMER(); 1793 1794 #if OMPT_SUPPORT && OMPT_OPTIONAL 1795 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1796 kmp_team_t *team = this_thr->th.th_team; 1797 int tid = __kmp_tid_from_gtid(global_tid); 1798 1799 if (ompt_enabled.ompt_callback_work) { 1800 ompt_callbacks.ompt_callback(ompt_callback_work)( 1801 ompt_work_single_executor, ompt_scope_end, 1802 &(team->t.ompt_team_info.parallel_data), 1803 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1804 OMPT_GET_RETURN_ADDRESS(0)); 1805 } 1806 #endif 1807 } 1808 1809 /*! 1810 @ingroup WORK_SHARING 1811 @param loc Source location 1812 @param global_tid Global thread id 1813 1814 Mark the end of a statically scheduled loop. 1815 */ 1816 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1817 KMP_POP_PARTITIONED_TIMER(); 1818 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1819 1820 #if OMPT_SUPPORT && OMPT_OPTIONAL 1821 if (ompt_enabled.ompt_callback_work) { 1822 ompt_work_t ompt_work_type = ompt_work_loop; 1823 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1824 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1825 // Determine workshare type 1826 if (loc != NULL) { 1827 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1828 ompt_work_type = ompt_work_loop; 1829 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1830 ompt_work_type = ompt_work_sections; 1831 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1832 ompt_work_type = ompt_work_distribute; 1833 } else { 1834 // use default set above. 1835 // a warning about this case is provided in __kmpc_for_static_init 1836 } 1837 KMP_DEBUG_ASSERT(ompt_work_type); 1838 } 1839 ompt_callbacks.ompt_callback(ompt_callback_work)( 1840 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1841 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1842 } 1843 #endif 1844 if (__kmp_env_consistency_check) 1845 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1846 } 1847 1848 // User routines which take C-style arguments (call by value) 1849 // different from the Fortran equivalent routines 1850 1851 void ompc_set_num_threads(int arg) { 1852 // !!!!! TODO: check the per-task binding 1853 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1854 } 1855 1856 void ompc_set_dynamic(int flag) { 1857 kmp_info_t *thread; 1858 1859 /* For the thread-private implementation of the internal controls */ 1860 thread = __kmp_entry_thread(); 1861 1862 __kmp_save_internal_controls(thread); 1863 1864 set__dynamic(thread, flag ? TRUE : FALSE); 1865 } 1866 1867 void ompc_set_nested(int flag) { 1868 kmp_info_t *thread; 1869 1870 /* For the thread-private internal controls implementation */ 1871 thread = __kmp_entry_thread(); 1872 1873 __kmp_save_internal_controls(thread); 1874 1875 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1876 } 1877 1878 void ompc_set_max_active_levels(int max_active_levels) { 1879 /* TO DO */ 1880 /* we want per-task implementation of this internal control */ 1881 1882 /* For the per-thread internal controls implementation */ 1883 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1884 } 1885 1886 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1887 // !!!!! TODO: check the per-task binding 1888 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1889 } 1890 1891 int ompc_get_ancestor_thread_num(int level) { 1892 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1893 } 1894 1895 int ompc_get_team_size(int level) { 1896 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1897 } 1898 1899 /* OpenMP 5.0 Affinity Format API */ 1900 1901 void ompc_set_affinity_format(char const *format) { 1902 if (!__kmp_init_serial) { 1903 __kmp_serial_initialize(); 1904 } 1905 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1906 format, KMP_STRLEN(format) + 1); 1907 } 1908 1909 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1910 size_t format_size; 1911 if (!__kmp_init_serial) { 1912 __kmp_serial_initialize(); 1913 } 1914 format_size = KMP_STRLEN(__kmp_affinity_format); 1915 if (buffer && size) { 1916 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1917 format_size + 1); 1918 } 1919 return format_size; 1920 } 1921 1922 void ompc_display_affinity(char const *format) { 1923 int gtid; 1924 if (!TCR_4(__kmp_init_middle)) { 1925 __kmp_middle_initialize(); 1926 } 1927 gtid = __kmp_get_gtid(); 1928 __kmp_aux_display_affinity(gtid, format); 1929 } 1930 1931 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1932 char const *format) { 1933 int gtid; 1934 size_t num_required; 1935 kmp_str_buf_t capture_buf; 1936 if (!TCR_4(__kmp_init_middle)) { 1937 __kmp_middle_initialize(); 1938 } 1939 gtid = __kmp_get_gtid(); 1940 __kmp_str_buf_init(&capture_buf); 1941 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1942 if (buffer && buf_size) { 1943 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1944 capture_buf.used + 1); 1945 } 1946 __kmp_str_buf_free(&capture_buf); 1947 return num_required; 1948 } 1949 1950 void kmpc_set_stacksize(int arg) { 1951 // __kmp_aux_set_stacksize initializes the library if needed 1952 __kmp_aux_set_stacksize(arg); 1953 } 1954 1955 void kmpc_set_stacksize_s(size_t arg) { 1956 // __kmp_aux_set_stacksize initializes the library if needed 1957 __kmp_aux_set_stacksize(arg); 1958 } 1959 1960 void kmpc_set_blocktime(int arg) { 1961 int gtid, tid; 1962 kmp_info_t *thread; 1963 1964 gtid = __kmp_entry_gtid(); 1965 tid = __kmp_tid_from_gtid(gtid); 1966 thread = __kmp_thread_from_gtid(gtid); 1967 1968 __kmp_aux_set_blocktime(arg, thread, tid); 1969 } 1970 1971 void kmpc_set_library(int arg) { 1972 // __kmp_user_set_library initializes the library if needed 1973 __kmp_user_set_library((enum library_type)arg); 1974 } 1975 1976 void kmpc_set_defaults(char const *str) { 1977 // __kmp_aux_set_defaults initializes the library if needed 1978 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1979 } 1980 1981 void kmpc_set_disp_num_buffers(int arg) { 1982 // ignore after initialization because some teams have already 1983 // allocated dispatch buffers 1984 if (__kmp_init_serial == 0 && arg > 0) 1985 __kmp_dispatch_num_buffers = arg; 1986 } 1987 1988 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1989 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1990 return -1; 1991 #else 1992 if (!TCR_4(__kmp_init_middle)) { 1993 __kmp_middle_initialize(); 1994 } 1995 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1996 #endif 1997 } 1998 1999 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2000 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2001 return -1; 2002 #else 2003 if (!TCR_4(__kmp_init_middle)) { 2004 __kmp_middle_initialize(); 2005 } 2006 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2007 #endif 2008 } 2009 2010 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2011 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2012 return -1; 2013 #else 2014 if (!TCR_4(__kmp_init_middle)) { 2015 __kmp_middle_initialize(); 2016 } 2017 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2018 #endif 2019 } 2020 2021 /* -------------------------------------------------------------------------- */ 2022 /*! 2023 @ingroup THREADPRIVATE 2024 @param loc source location information 2025 @param gtid global thread number 2026 @param cpy_size size of the cpy_data buffer 2027 @param cpy_data pointer to data to be copied 2028 @param cpy_func helper function to call for copying data 2029 @param didit flag variable: 1=single thread; 0=not single thread 2030 2031 __kmpc_copyprivate implements the interface for the private data broadcast 2032 needed for the copyprivate clause associated with a single region in an 2033 OpenMP<sup>*</sup> program (both C and Fortran). 2034 All threads participating in the parallel region call this routine. 2035 One of the threads (called the single thread) should have the <tt>didit</tt> 2036 variable set to 1 and all other threads should have that variable set to 0. 2037 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2038 2039 The OpenMP specification forbids the use of nowait on the single region when a 2040 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2041 barrier internally to avoid race conditions, so the code generation for the 2042 single region should avoid generating a barrier after the call to @ref 2043 __kmpc_copyprivate. 2044 2045 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2046 The <tt>loc</tt> parameter is a pointer to source location information. 2047 2048 Internal implementation: The single thread will first copy its descriptor 2049 address (cpy_data) to a team-private location, then the other threads will each 2050 call the function pointed to by the parameter cpy_func, which carries out the 2051 copy by copying the data using the cpy_data buffer. 2052 2053 The cpy_func routine used for the copy and the contents of the data area defined 2054 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2055 to be done. For instance, the cpy_data buffer can hold the actual data to be 2056 copied or it may hold a list of pointers to the data. The cpy_func routine must 2057 interpret the cpy_data buffer appropriately. 2058 2059 The interface to cpy_func is as follows: 2060 @code 2061 void cpy_func( void *destination, void *source ) 2062 @endcode 2063 where void *destination is the cpy_data pointer for the thread being copied to 2064 and void *source is the cpy_data pointer for the thread being copied from. 2065 */ 2066 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2067 void *cpy_data, void (*cpy_func)(void *, void *), 2068 kmp_int32 didit) { 2069 void **data_ptr; 2070 2071 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2072 2073 KMP_MB(); 2074 2075 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2076 2077 if (__kmp_env_consistency_check) { 2078 if (loc == 0) { 2079 KMP_WARNING(ConstructIdentInvalid); 2080 } 2081 } 2082 2083 // ToDo: Optimize the following two barriers into some kind of split barrier 2084 2085 if (didit) 2086 *data_ptr = cpy_data; 2087 2088 #if OMPT_SUPPORT 2089 ompt_frame_t *ompt_frame; 2090 if (ompt_enabled.enabled) { 2091 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2092 if (ompt_frame->enter_frame.ptr == NULL) 2093 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2094 OMPT_STORE_RETURN_ADDRESS(gtid); 2095 } 2096 #endif 2097 /* This barrier is not a barrier region boundary */ 2098 #if USE_ITT_NOTIFY 2099 __kmp_threads[gtid]->th.th_ident = loc; 2100 #endif 2101 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2102 2103 if (!didit) 2104 (*cpy_func)(cpy_data, *data_ptr); 2105 2106 // Consider next barrier a user-visible barrier for barrier region boundaries 2107 // Nesting checks are already handled by the single construct checks 2108 2109 #if OMPT_SUPPORT 2110 if (ompt_enabled.enabled) { 2111 OMPT_STORE_RETURN_ADDRESS(gtid); 2112 } 2113 #endif 2114 #if USE_ITT_NOTIFY 2115 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2116 // tasks can overwrite the location) 2117 #endif 2118 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2119 #if OMPT_SUPPORT && OMPT_OPTIONAL 2120 if (ompt_enabled.enabled) { 2121 ompt_frame->enter_frame = ompt_data_none; 2122 } 2123 #endif 2124 } 2125 2126 /* -------------------------------------------------------------------------- */ 2127 2128 #define INIT_LOCK __kmp_init_user_lock_with_checks 2129 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2130 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2131 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2132 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2133 #define ACQUIRE_NESTED_LOCK_TIMED \ 2134 __kmp_acquire_nested_user_lock_with_checks_timed 2135 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2136 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2137 #define TEST_LOCK __kmp_test_user_lock_with_checks 2138 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2139 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2140 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2141 2142 // TODO: Make check abort messages use location info & pass it into 2143 // with_checks routines 2144 2145 #if KMP_USE_DYNAMIC_LOCK 2146 2147 // internal lock initializer 2148 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2149 kmp_dyna_lockseq_t seq) { 2150 if (KMP_IS_D_LOCK(seq)) { 2151 KMP_INIT_D_LOCK(lock, seq); 2152 #if USE_ITT_BUILD 2153 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2154 #endif 2155 } else { 2156 KMP_INIT_I_LOCK(lock, seq); 2157 #if USE_ITT_BUILD 2158 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2159 __kmp_itt_lock_creating(ilk->lock, loc); 2160 #endif 2161 } 2162 } 2163 2164 // internal nest lock initializer 2165 static __forceinline void 2166 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2167 kmp_dyna_lockseq_t seq) { 2168 #if KMP_USE_TSX 2169 // Don't have nested lock implementation for speculative locks 2170 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2171 seq = __kmp_user_lock_seq; 2172 #endif 2173 switch (seq) { 2174 case lockseq_tas: 2175 seq = lockseq_nested_tas; 2176 break; 2177 #if KMP_USE_FUTEX 2178 case lockseq_futex: 2179 seq = lockseq_nested_futex; 2180 break; 2181 #endif 2182 case lockseq_ticket: 2183 seq = lockseq_nested_ticket; 2184 break; 2185 case lockseq_queuing: 2186 seq = lockseq_nested_queuing; 2187 break; 2188 case lockseq_drdpa: 2189 seq = lockseq_nested_drdpa; 2190 break; 2191 default: 2192 seq = lockseq_nested_queuing; 2193 } 2194 KMP_INIT_I_LOCK(lock, seq); 2195 #if USE_ITT_BUILD 2196 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2197 __kmp_itt_lock_creating(ilk->lock, loc); 2198 #endif 2199 } 2200 2201 /* initialize the lock with a hint */ 2202 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2203 uintptr_t hint) { 2204 KMP_DEBUG_ASSERT(__kmp_init_serial); 2205 if (__kmp_env_consistency_check && user_lock == NULL) { 2206 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2207 } 2208 2209 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2210 2211 #if OMPT_SUPPORT && OMPT_OPTIONAL 2212 // This is the case, if called from omp_init_lock_with_hint: 2213 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2214 if (!codeptr) 2215 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2216 if (ompt_enabled.ompt_callback_lock_init) { 2217 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2218 ompt_mutex_lock, (omp_lock_hint_t)hint, 2219 __ompt_get_mutex_impl_type(user_lock), 2220 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2221 } 2222 #endif 2223 } 2224 2225 /* initialize the lock with a hint */ 2226 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2227 void **user_lock, uintptr_t hint) { 2228 KMP_DEBUG_ASSERT(__kmp_init_serial); 2229 if (__kmp_env_consistency_check && user_lock == NULL) { 2230 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2231 } 2232 2233 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2234 2235 #if OMPT_SUPPORT && OMPT_OPTIONAL 2236 // This is the case, if called from omp_init_lock_with_hint: 2237 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2238 if (!codeptr) 2239 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2240 if (ompt_enabled.ompt_callback_lock_init) { 2241 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2242 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2243 __ompt_get_mutex_impl_type(user_lock), 2244 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2245 } 2246 #endif 2247 } 2248 2249 #endif // KMP_USE_DYNAMIC_LOCK 2250 2251 /* initialize the lock */ 2252 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2253 #if KMP_USE_DYNAMIC_LOCK 2254 2255 KMP_DEBUG_ASSERT(__kmp_init_serial); 2256 if (__kmp_env_consistency_check && user_lock == NULL) { 2257 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2258 } 2259 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2260 2261 #if OMPT_SUPPORT && OMPT_OPTIONAL 2262 // This is the case, if called from omp_init_lock_with_hint: 2263 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2264 if (!codeptr) 2265 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2266 if (ompt_enabled.ompt_callback_lock_init) { 2267 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2268 ompt_mutex_lock, omp_lock_hint_none, 2269 __ompt_get_mutex_impl_type(user_lock), 2270 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2271 } 2272 #endif 2273 2274 #else // KMP_USE_DYNAMIC_LOCK 2275 2276 static char const *const func = "omp_init_lock"; 2277 kmp_user_lock_p lck; 2278 KMP_DEBUG_ASSERT(__kmp_init_serial); 2279 2280 if (__kmp_env_consistency_check) { 2281 if (user_lock == NULL) { 2282 KMP_FATAL(LockIsUninitialized, func); 2283 } 2284 } 2285 2286 KMP_CHECK_USER_LOCK_INIT(); 2287 2288 if ((__kmp_user_lock_kind == lk_tas) && 2289 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2290 lck = (kmp_user_lock_p)user_lock; 2291 } 2292 #if KMP_USE_FUTEX 2293 else if ((__kmp_user_lock_kind == lk_futex) && 2294 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2295 lck = (kmp_user_lock_p)user_lock; 2296 } 2297 #endif 2298 else { 2299 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2300 } 2301 INIT_LOCK(lck); 2302 __kmp_set_user_lock_location(lck, loc); 2303 2304 #if OMPT_SUPPORT && OMPT_OPTIONAL 2305 // This is the case, if called from omp_init_lock_with_hint: 2306 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2307 if (!codeptr) 2308 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2309 if (ompt_enabled.ompt_callback_lock_init) { 2310 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2311 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2312 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2313 } 2314 #endif 2315 2316 #if USE_ITT_BUILD 2317 __kmp_itt_lock_creating(lck); 2318 #endif /* USE_ITT_BUILD */ 2319 2320 #endif // KMP_USE_DYNAMIC_LOCK 2321 } // __kmpc_init_lock 2322 2323 /* initialize the lock */ 2324 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2325 #if KMP_USE_DYNAMIC_LOCK 2326 2327 KMP_DEBUG_ASSERT(__kmp_init_serial); 2328 if (__kmp_env_consistency_check && user_lock == NULL) { 2329 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2330 } 2331 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2332 2333 #if OMPT_SUPPORT && OMPT_OPTIONAL 2334 // This is the case, if called from omp_init_lock_with_hint: 2335 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2336 if (!codeptr) 2337 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2338 if (ompt_enabled.ompt_callback_lock_init) { 2339 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2340 ompt_mutex_nest_lock, omp_lock_hint_none, 2341 __ompt_get_mutex_impl_type(user_lock), 2342 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2343 } 2344 #endif 2345 2346 #else // KMP_USE_DYNAMIC_LOCK 2347 2348 static char const *const func = "omp_init_nest_lock"; 2349 kmp_user_lock_p lck; 2350 KMP_DEBUG_ASSERT(__kmp_init_serial); 2351 2352 if (__kmp_env_consistency_check) { 2353 if (user_lock == NULL) { 2354 KMP_FATAL(LockIsUninitialized, func); 2355 } 2356 } 2357 2358 KMP_CHECK_USER_LOCK_INIT(); 2359 2360 if ((__kmp_user_lock_kind == lk_tas) && 2361 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2362 OMP_NEST_LOCK_T_SIZE)) { 2363 lck = (kmp_user_lock_p)user_lock; 2364 } 2365 #if KMP_USE_FUTEX 2366 else if ((__kmp_user_lock_kind == lk_futex) && 2367 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2368 OMP_NEST_LOCK_T_SIZE)) { 2369 lck = (kmp_user_lock_p)user_lock; 2370 } 2371 #endif 2372 else { 2373 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2374 } 2375 2376 INIT_NESTED_LOCK(lck); 2377 __kmp_set_user_lock_location(lck, loc); 2378 2379 #if OMPT_SUPPORT && OMPT_OPTIONAL 2380 // This is the case, if called from omp_init_lock_with_hint: 2381 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2382 if (!codeptr) 2383 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2384 if (ompt_enabled.ompt_callback_lock_init) { 2385 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2386 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2387 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2388 } 2389 #endif 2390 2391 #if USE_ITT_BUILD 2392 __kmp_itt_lock_creating(lck); 2393 #endif /* USE_ITT_BUILD */ 2394 2395 #endif // KMP_USE_DYNAMIC_LOCK 2396 } // __kmpc_init_nest_lock 2397 2398 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2399 #if KMP_USE_DYNAMIC_LOCK 2400 2401 #if USE_ITT_BUILD 2402 kmp_user_lock_p lck; 2403 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2404 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2405 } else { 2406 lck = (kmp_user_lock_p)user_lock; 2407 } 2408 __kmp_itt_lock_destroyed(lck); 2409 #endif 2410 #if OMPT_SUPPORT && OMPT_OPTIONAL 2411 // This is the case, if called from omp_init_lock_with_hint: 2412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2413 if (!codeptr) 2414 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2415 if (ompt_enabled.ompt_callback_lock_destroy) { 2416 kmp_user_lock_p lck; 2417 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2418 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2419 } else { 2420 lck = (kmp_user_lock_p)user_lock; 2421 } 2422 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2423 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2424 } 2425 #endif 2426 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2427 #else 2428 kmp_user_lock_p lck; 2429 2430 if ((__kmp_user_lock_kind == lk_tas) && 2431 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2432 lck = (kmp_user_lock_p)user_lock; 2433 } 2434 #if KMP_USE_FUTEX 2435 else if ((__kmp_user_lock_kind == lk_futex) && 2436 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2437 lck = (kmp_user_lock_p)user_lock; 2438 } 2439 #endif 2440 else { 2441 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2442 } 2443 2444 #if OMPT_SUPPORT && OMPT_OPTIONAL 2445 // This is the case, if called from omp_init_lock_with_hint: 2446 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2447 if (!codeptr) 2448 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2449 if (ompt_enabled.ompt_callback_lock_destroy) { 2450 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2451 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2452 } 2453 #endif 2454 2455 #if USE_ITT_BUILD 2456 __kmp_itt_lock_destroyed(lck); 2457 #endif /* USE_ITT_BUILD */ 2458 DESTROY_LOCK(lck); 2459 2460 if ((__kmp_user_lock_kind == lk_tas) && 2461 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2462 ; 2463 } 2464 #if KMP_USE_FUTEX 2465 else if ((__kmp_user_lock_kind == lk_futex) && 2466 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2467 ; 2468 } 2469 #endif 2470 else { 2471 __kmp_user_lock_free(user_lock, gtid, lck); 2472 } 2473 #endif // KMP_USE_DYNAMIC_LOCK 2474 } // __kmpc_destroy_lock 2475 2476 /* destroy the lock */ 2477 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2478 #if KMP_USE_DYNAMIC_LOCK 2479 2480 #if USE_ITT_BUILD 2481 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2482 __kmp_itt_lock_destroyed(ilk->lock); 2483 #endif 2484 #if OMPT_SUPPORT && OMPT_OPTIONAL 2485 // This is the case, if called from omp_init_lock_with_hint: 2486 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2487 if (!codeptr) 2488 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2489 if (ompt_enabled.ompt_callback_lock_destroy) { 2490 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2491 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2492 } 2493 #endif 2494 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2495 2496 #else // KMP_USE_DYNAMIC_LOCK 2497 2498 kmp_user_lock_p lck; 2499 2500 if ((__kmp_user_lock_kind == lk_tas) && 2501 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2502 OMP_NEST_LOCK_T_SIZE)) { 2503 lck = (kmp_user_lock_p)user_lock; 2504 } 2505 #if KMP_USE_FUTEX 2506 else if ((__kmp_user_lock_kind == lk_futex) && 2507 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2508 OMP_NEST_LOCK_T_SIZE)) { 2509 lck = (kmp_user_lock_p)user_lock; 2510 } 2511 #endif 2512 else { 2513 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2514 } 2515 2516 #if OMPT_SUPPORT && OMPT_OPTIONAL 2517 // This is the case, if called from omp_init_lock_with_hint: 2518 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2519 if (!codeptr) 2520 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2521 if (ompt_enabled.ompt_callback_lock_destroy) { 2522 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2523 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2524 } 2525 #endif 2526 2527 #if USE_ITT_BUILD 2528 __kmp_itt_lock_destroyed(lck); 2529 #endif /* USE_ITT_BUILD */ 2530 2531 DESTROY_NESTED_LOCK(lck); 2532 2533 if ((__kmp_user_lock_kind == lk_tas) && 2534 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2535 OMP_NEST_LOCK_T_SIZE)) { 2536 ; 2537 } 2538 #if KMP_USE_FUTEX 2539 else if ((__kmp_user_lock_kind == lk_futex) && 2540 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2541 OMP_NEST_LOCK_T_SIZE)) { 2542 ; 2543 } 2544 #endif 2545 else { 2546 __kmp_user_lock_free(user_lock, gtid, lck); 2547 } 2548 #endif // KMP_USE_DYNAMIC_LOCK 2549 } // __kmpc_destroy_nest_lock 2550 2551 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2552 KMP_COUNT_BLOCK(OMP_set_lock); 2553 #if KMP_USE_DYNAMIC_LOCK 2554 int tag = KMP_EXTRACT_D_TAG(user_lock); 2555 #if USE_ITT_BUILD 2556 __kmp_itt_lock_acquiring( 2557 (kmp_user_lock_p) 2558 user_lock); // itt function will get to the right lock object. 2559 #endif 2560 #if OMPT_SUPPORT && OMPT_OPTIONAL 2561 // This is the case, if called from omp_init_lock_with_hint: 2562 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2563 if (!codeptr) 2564 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2565 if (ompt_enabled.ompt_callback_mutex_acquire) { 2566 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2567 ompt_mutex_lock, omp_lock_hint_none, 2568 __ompt_get_mutex_impl_type(user_lock), 2569 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2570 } 2571 #endif 2572 #if KMP_USE_INLINED_TAS 2573 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2574 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2575 } else 2576 #elif KMP_USE_INLINED_FUTEX 2577 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2578 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2579 } else 2580 #endif 2581 { 2582 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2583 } 2584 #if USE_ITT_BUILD 2585 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2586 #endif 2587 #if OMPT_SUPPORT && OMPT_OPTIONAL 2588 if (ompt_enabled.ompt_callback_mutex_acquired) { 2589 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2590 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2591 } 2592 #endif 2593 2594 #else // KMP_USE_DYNAMIC_LOCK 2595 2596 kmp_user_lock_p lck; 2597 2598 if ((__kmp_user_lock_kind == lk_tas) && 2599 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2600 lck = (kmp_user_lock_p)user_lock; 2601 } 2602 #if KMP_USE_FUTEX 2603 else if ((__kmp_user_lock_kind == lk_futex) && 2604 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2605 lck = (kmp_user_lock_p)user_lock; 2606 } 2607 #endif 2608 else { 2609 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2610 } 2611 2612 #if USE_ITT_BUILD 2613 __kmp_itt_lock_acquiring(lck); 2614 #endif /* USE_ITT_BUILD */ 2615 #if OMPT_SUPPORT && OMPT_OPTIONAL 2616 // This is the case, if called from omp_init_lock_with_hint: 2617 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2618 if (!codeptr) 2619 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2620 if (ompt_enabled.ompt_callback_mutex_acquire) { 2621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2622 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2623 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2624 } 2625 #endif 2626 2627 ACQUIRE_LOCK(lck, gtid); 2628 2629 #if USE_ITT_BUILD 2630 __kmp_itt_lock_acquired(lck); 2631 #endif /* USE_ITT_BUILD */ 2632 2633 #if OMPT_SUPPORT && OMPT_OPTIONAL 2634 if (ompt_enabled.ompt_callback_mutex_acquired) { 2635 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2636 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2637 } 2638 #endif 2639 2640 #endif // KMP_USE_DYNAMIC_LOCK 2641 } 2642 2643 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2644 #if KMP_USE_DYNAMIC_LOCK 2645 2646 #if USE_ITT_BUILD 2647 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2648 #endif 2649 #if OMPT_SUPPORT && OMPT_OPTIONAL 2650 // This is the case, if called from omp_init_lock_with_hint: 2651 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2652 if (!codeptr) 2653 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2654 if (ompt_enabled.enabled) { 2655 if (ompt_enabled.ompt_callback_mutex_acquire) { 2656 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2657 ompt_mutex_nest_lock, omp_lock_hint_none, 2658 __ompt_get_mutex_impl_type(user_lock), 2659 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2660 } 2661 } 2662 #endif 2663 int acquire_status = 2664 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2665 (void) acquire_status; 2666 #if USE_ITT_BUILD 2667 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2668 #endif 2669 2670 #if OMPT_SUPPORT && OMPT_OPTIONAL 2671 if (ompt_enabled.enabled) { 2672 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2673 if (ompt_enabled.ompt_callback_mutex_acquired) { 2674 // lock_first 2675 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2676 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2677 codeptr); 2678 } 2679 } else { 2680 if (ompt_enabled.ompt_callback_nest_lock) { 2681 // lock_next 2682 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2683 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2684 } 2685 } 2686 } 2687 #endif 2688 2689 #else // KMP_USE_DYNAMIC_LOCK 2690 int acquire_status; 2691 kmp_user_lock_p lck; 2692 2693 if ((__kmp_user_lock_kind == lk_tas) && 2694 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2695 OMP_NEST_LOCK_T_SIZE)) { 2696 lck = (kmp_user_lock_p)user_lock; 2697 } 2698 #if KMP_USE_FUTEX 2699 else if ((__kmp_user_lock_kind == lk_futex) && 2700 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2701 OMP_NEST_LOCK_T_SIZE)) { 2702 lck = (kmp_user_lock_p)user_lock; 2703 } 2704 #endif 2705 else { 2706 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2707 } 2708 2709 #if USE_ITT_BUILD 2710 __kmp_itt_lock_acquiring(lck); 2711 #endif /* USE_ITT_BUILD */ 2712 #if OMPT_SUPPORT && OMPT_OPTIONAL 2713 // This is the case, if called from omp_init_lock_with_hint: 2714 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2715 if (!codeptr) 2716 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2717 if (ompt_enabled.enabled) { 2718 if (ompt_enabled.ompt_callback_mutex_acquire) { 2719 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2720 ompt_mutex_nest_lock, omp_lock_hint_none, 2721 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2722 codeptr); 2723 } 2724 } 2725 #endif 2726 2727 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2728 2729 #if USE_ITT_BUILD 2730 __kmp_itt_lock_acquired(lck); 2731 #endif /* USE_ITT_BUILD */ 2732 2733 #if OMPT_SUPPORT && OMPT_OPTIONAL 2734 if (ompt_enabled.enabled) { 2735 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2736 if (ompt_enabled.ompt_callback_mutex_acquired) { 2737 // lock_first 2738 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2739 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2740 } 2741 } else { 2742 if (ompt_enabled.ompt_callback_nest_lock) { 2743 // lock_next 2744 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2745 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2746 } 2747 } 2748 } 2749 #endif 2750 2751 #endif // KMP_USE_DYNAMIC_LOCK 2752 } 2753 2754 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2755 #if KMP_USE_DYNAMIC_LOCK 2756 2757 int tag = KMP_EXTRACT_D_TAG(user_lock); 2758 #if USE_ITT_BUILD 2759 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2760 #endif 2761 #if KMP_USE_INLINED_TAS 2762 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2763 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2764 } else 2765 #elif KMP_USE_INLINED_FUTEX 2766 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2767 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2768 } else 2769 #endif 2770 { 2771 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2772 } 2773 2774 #if OMPT_SUPPORT && OMPT_OPTIONAL 2775 // This is the case, if called from omp_init_lock_with_hint: 2776 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2777 if (!codeptr) 2778 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2779 if (ompt_enabled.ompt_callback_mutex_released) { 2780 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2781 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2782 } 2783 #endif 2784 2785 #else // KMP_USE_DYNAMIC_LOCK 2786 2787 kmp_user_lock_p lck; 2788 2789 /* Can't use serial interval since not block structured */ 2790 /* release the lock */ 2791 2792 if ((__kmp_user_lock_kind == lk_tas) && 2793 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2794 #if KMP_OS_LINUX && \ 2795 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2796 // "fast" path implemented to fix customer performance issue 2797 #if USE_ITT_BUILD 2798 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2799 #endif /* USE_ITT_BUILD */ 2800 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2801 KMP_MB(); 2802 2803 #if OMPT_SUPPORT && OMPT_OPTIONAL 2804 // This is the case, if called from omp_init_lock_with_hint: 2805 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2806 if (!codeptr) 2807 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2808 if (ompt_enabled.ompt_callback_mutex_released) { 2809 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2810 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2811 } 2812 #endif 2813 2814 return; 2815 #else 2816 lck = (kmp_user_lock_p)user_lock; 2817 #endif 2818 } 2819 #if KMP_USE_FUTEX 2820 else if ((__kmp_user_lock_kind == lk_futex) && 2821 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2822 lck = (kmp_user_lock_p)user_lock; 2823 } 2824 #endif 2825 else { 2826 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2827 } 2828 2829 #if USE_ITT_BUILD 2830 __kmp_itt_lock_releasing(lck); 2831 #endif /* USE_ITT_BUILD */ 2832 2833 RELEASE_LOCK(lck, gtid); 2834 2835 #if OMPT_SUPPORT && OMPT_OPTIONAL 2836 // This is the case, if called from omp_init_lock_with_hint: 2837 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2838 if (!codeptr) 2839 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2840 if (ompt_enabled.ompt_callback_mutex_released) { 2841 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2842 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2843 } 2844 #endif 2845 2846 #endif // KMP_USE_DYNAMIC_LOCK 2847 } 2848 2849 /* release the lock */ 2850 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2851 #if KMP_USE_DYNAMIC_LOCK 2852 2853 #if USE_ITT_BUILD 2854 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2855 #endif 2856 int release_status = 2857 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2858 (void) release_status; 2859 2860 #if OMPT_SUPPORT && OMPT_OPTIONAL 2861 // This is the case, if called from omp_init_lock_with_hint: 2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2863 if (!codeptr) 2864 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2865 if (ompt_enabled.enabled) { 2866 if (release_status == KMP_LOCK_RELEASED) { 2867 if (ompt_enabled.ompt_callback_mutex_released) { 2868 // release_lock_last 2869 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2870 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2871 codeptr); 2872 } 2873 } else if (ompt_enabled.ompt_callback_nest_lock) { 2874 // release_lock_prev 2875 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2876 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2877 } 2878 } 2879 #endif 2880 2881 #else // KMP_USE_DYNAMIC_LOCK 2882 2883 kmp_user_lock_p lck; 2884 2885 /* Can't use serial interval since not block structured */ 2886 2887 if ((__kmp_user_lock_kind == lk_tas) && 2888 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2889 OMP_NEST_LOCK_T_SIZE)) { 2890 #if KMP_OS_LINUX && \ 2891 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2892 // "fast" path implemented to fix customer performance issue 2893 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2894 #if USE_ITT_BUILD 2895 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2896 #endif /* USE_ITT_BUILD */ 2897 2898 #if OMPT_SUPPORT && OMPT_OPTIONAL 2899 int release_status = KMP_LOCK_STILL_HELD; 2900 #endif 2901 2902 if (--(tl->lk.depth_locked) == 0) { 2903 TCW_4(tl->lk.poll, 0); 2904 #if OMPT_SUPPORT && OMPT_OPTIONAL 2905 release_status = KMP_LOCK_RELEASED; 2906 #endif 2907 } 2908 KMP_MB(); 2909 2910 #if OMPT_SUPPORT && OMPT_OPTIONAL 2911 // This is the case, if called from omp_init_lock_with_hint: 2912 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2913 if (!codeptr) 2914 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2915 if (ompt_enabled.enabled) { 2916 if (release_status == KMP_LOCK_RELEASED) { 2917 if (ompt_enabled.ompt_callback_mutex_released) { 2918 // release_lock_last 2919 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2920 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2921 } 2922 } else if (ompt_enabled.ompt_callback_nest_lock) { 2923 // release_lock_previous 2924 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2925 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2926 } 2927 } 2928 #endif 2929 2930 return; 2931 #else 2932 lck = (kmp_user_lock_p)user_lock; 2933 #endif 2934 } 2935 #if KMP_USE_FUTEX 2936 else if ((__kmp_user_lock_kind == lk_futex) && 2937 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2938 OMP_NEST_LOCK_T_SIZE)) { 2939 lck = (kmp_user_lock_p)user_lock; 2940 } 2941 #endif 2942 else { 2943 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2944 } 2945 2946 #if USE_ITT_BUILD 2947 __kmp_itt_lock_releasing(lck); 2948 #endif /* USE_ITT_BUILD */ 2949 2950 int release_status; 2951 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2952 #if OMPT_SUPPORT && OMPT_OPTIONAL 2953 // This is the case, if called from omp_init_lock_with_hint: 2954 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2955 if (!codeptr) 2956 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2957 if (ompt_enabled.enabled) { 2958 if (release_status == KMP_LOCK_RELEASED) { 2959 if (ompt_enabled.ompt_callback_mutex_released) { 2960 // release_lock_last 2961 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2962 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2963 } 2964 } else if (ompt_enabled.ompt_callback_nest_lock) { 2965 // release_lock_previous 2966 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2967 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2968 } 2969 } 2970 #endif 2971 2972 #endif // KMP_USE_DYNAMIC_LOCK 2973 } 2974 2975 /* try to acquire the lock */ 2976 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2977 KMP_COUNT_BLOCK(OMP_test_lock); 2978 2979 #if KMP_USE_DYNAMIC_LOCK 2980 int rc; 2981 int tag = KMP_EXTRACT_D_TAG(user_lock); 2982 #if USE_ITT_BUILD 2983 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2984 #endif 2985 #if OMPT_SUPPORT && OMPT_OPTIONAL 2986 // This is the case, if called from omp_init_lock_with_hint: 2987 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2988 if (!codeptr) 2989 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2990 if (ompt_enabled.ompt_callback_mutex_acquire) { 2991 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2992 ompt_mutex_lock, omp_lock_hint_none, 2993 __ompt_get_mutex_impl_type(user_lock), 2994 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2995 } 2996 #endif 2997 #if KMP_USE_INLINED_TAS 2998 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2999 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3000 } else 3001 #elif KMP_USE_INLINED_FUTEX 3002 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3003 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3004 } else 3005 #endif 3006 { 3007 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3008 } 3009 if (rc) { 3010 #if USE_ITT_BUILD 3011 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3012 #endif 3013 #if OMPT_SUPPORT && OMPT_OPTIONAL 3014 if (ompt_enabled.ompt_callback_mutex_acquired) { 3015 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3016 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3017 } 3018 #endif 3019 return FTN_TRUE; 3020 } else { 3021 #if USE_ITT_BUILD 3022 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3023 #endif 3024 return FTN_FALSE; 3025 } 3026 3027 #else // KMP_USE_DYNAMIC_LOCK 3028 3029 kmp_user_lock_p lck; 3030 int rc; 3031 3032 if ((__kmp_user_lock_kind == lk_tas) && 3033 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3034 lck = (kmp_user_lock_p)user_lock; 3035 } 3036 #if KMP_USE_FUTEX 3037 else if ((__kmp_user_lock_kind == lk_futex) && 3038 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3039 lck = (kmp_user_lock_p)user_lock; 3040 } 3041 #endif 3042 else { 3043 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3044 } 3045 3046 #if USE_ITT_BUILD 3047 __kmp_itt_lock_acquiring(lck); 3048 #endif /* USE_ITT_BUILD */ 3049 #if OMPT_SUPPORT && OMPT_OPTIONAL 3050 // This is the case, if called from omp_init_lock_with_hint: 3051 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3052 if (!codeptr) 3053 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3054 if (ompt_enabled.ompt_callback_mutex_acquire) { 3055 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3056 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3057 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3058 } 3059 #endif 3060 3061 rc = TEST_LOCK(lck, gtid); 3062 #if USE_ITT_BUILD 3063 if (rc) { 3064 __kmp_itt_lock_acquired(lck); 3065 } else { 3066 __kmp_itt_lock_cancelled(lck); 3067 } 3068 #endif /* USE_ITT_BUILD */ 3069 #if OMPT_SUPPORT && OMPT_OPTIONAL 3070 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3071 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3072 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3073 } 3074 #endif 3075 3076 return (rc ? FTN_TRUE : FTN_FALSE); 3077 3078 /* Can't use serial interval since not block structured */ 3079 3080 #endif // KMP_USE_DYNAMIC_LOCK 3081 } 3082 3083 /* try to acquire the lock */ 3084 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3085 #if KMP_USE_DYNAMIC_LOCK 3086 int rc; 3087 #if USE_ITT_BUILD 3088 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3089 #endif 3090 #if OMPT_SUPPORT && OMPT_OPTIONAL 3091 // This is the case, if called from omp_init_lock_with_hint: 3092 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3093 if (!codeptr) 3094 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3095 if (ompt_enabled.ompt_callback_mutex_acquire) { 3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3097 ompt_mutex_nest_lock, omp_lock_hint_none, 3098 __ompt_get_mutex_impl_type(user_lock), 3099 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3100 } 3101 #endif 3102 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3103 #if USE_ITT_BUILD 3104 if (rc) { 3105 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3106 } else { 3107 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3108 } 3109 #endif 3110 #if OMPT_SUPPORT && OMPT_OPTIONAL 3111 if (ompt_enabled.enabled && rc) { 3112 if (rc == 1) { 3113 if (ompt_enabled.ompt_callback_mutex_acquired) { 3114 // lock_first 3115 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3116 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3117 codeptr); 3118 } 3119 } else { 3120 if (ompt_enabled.ompt_callback_nest_lock) { 3121 // lock_next 3122 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3123 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3124 } 3125 } 3126 } 3127 #endif 3128 return rc; 3129 3130 #else // KMP_USE_DYNAMIC_LOCK 3131 3132 kmp_user_lock_p lck; 3133 int rc; 3134 3135 if ((__kmp_user_lock_kind == lk_tas) && 3136 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3137 OMP_NEST_LOCK_T_SIZE)) { 3138 lck = (kmp_user_lock_p)user_lock; 3139 } 3140 #if KMP_USE_FUTEX 3141 else if ((__kmp_user_lock_kind == lk_futex) && 3142 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3143 OMP_NEST_LOCK_T_SIZE)) { 3144 lck = (kmp_user_lock_p)user_lock; 3145 } 3146 #endif 3147 else { 3148 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3149 } 3150 3151 #if USE_ITT_BUILD 3152 __kmp_itt_lock_acquiring(lck); 3153 #endif /* USE_ITT_BUILD */ 3154 3155 #if OMPT_SUPPORT && OMPT_OPTIONAL 3156 // This is the case, if called from omp_init_lock_with_hint: 3157 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3158 if (!codeptr) 3159 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3160 if (ompt_enabled.enabled) && 3161 ompt_enabled.ompt_callback_mutex_acquire) { 3162 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3163 ompt_mutex_nest_lock, omp_lock_hint_none, 3164 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3165 codeptr); 3166 } 3167 #endif 3168 3169 rc = TEST_NESTED_LOCK(lck, gtid); 3170 #if USE_ITT_BUILD 3171 if (rc) { 3172 __kmp_itt_lock_acquired(lck); 3173 } else { 3174 __kmp_itt_lock_cancelled(lck); 3175 } 3176 #endif /* USE_ITT_BUILD */ 3177 #if OMPT_SUPPORT && OMPT_OPTIONAL 3178 if (ompt_enabled.enabled && rc) { 3179 if (rc == 1) { 3180 if (ompt_enabled.ompt_callback_mutex_acquired) { 3181 // lock_first 3182 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3183 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3184 } 3185 } else { 3186 if (ompt_enabled.ompt_callback_nest_lock) { 3187 // lock_next 3188 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3189 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3190 } 3191 } 3192 } 3193 #endif 3194 return rc; 3195 3196 /* Can't use serial interval since not block structured */ 3197 3198 #endif // KMP_USE_DYNAMIC_LOCK 3199 } 3200 3201 // Interface to fast scalable reduce methods routines 3202 3203 // keep the selected method in a thread local structure for cross-function 3204 // usage: will be used in __kmpc_end_reduce* functions; 3205 // another solution: to re-determine the method one more time in 3206 // __kmpc_end_reduce* functions (new prototype required then) 3207 // AT: which solution is better? 3208 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3209 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3210 3211 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3212 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3213 3214 // description of the packed_reduction_method variable: look at the macros in 3215 // kmp.h 3216 3217 // used in a critical section reduce block 3218 static __forceinline void 3219 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3220 kmp_critical_name *crit) { 3221 3222 // this lock was visible to a customer and to the threading profile tool as a 3223 // serial overhead span (although it's used for an internal purpose only) 3224 // why was it visible in previous implementation? 3225 // should we keep it visible in new reduce block? 3226 kmp_user_lock_p lck; 3227 3228 #if KMP_USE_DYNAMIC_LOCK 3229 3230 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3231 // Check if it is initialized. 3232 if (*lk == 0) { 3233 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3234 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3235 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3236 } else { 3237 __kmp_init_indirect_csptr(crit, loc, global_tid, 3238 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3239 } 3240 } 3241 // Branch for accessing the actual lock object and set operation. This 3242 // branching is inevitable since this lock initialization does not follow the 3243 // normal dispatch path (lock table is not used). 3244 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3245 lck = (kmp_user_lock_p)lk; 3246 KMP_DEBUG_ASSERT(lck != NULL); 3247 if (__kmp_env_consistency_check) { 3248 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3249 } 3250 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3251 } else { 3252 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3253 lck = ilk->lock; 3254 KMP_DEBUG_ASSERT(lck != NULL); 3255 if (__kmp_env_consistency_check) { 3256 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3257 } 3258 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3259 } 3260 3261 #else // KMP_USE_DYNAMIC_LOCK 3262 3263 // We know that the fast reduction code is only emitted by Intel compilers 3264 // with 32 byte critical sections. If there isn't enough space, then we 3265 // have to use a pointer. 3266 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3267 lck = (kmp_user_lock_p)crit; 3268 } else { 3269 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3270 } 3271 KMP_DEBUG_ASSERT(lck != NULL); 3272 3273 if (__kmp_env_consistency_check) 3274 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3275 3276 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3277 3278 #endif // KMP_USE_DYNAMIC_LOCK 3279 } 3280 3281 // used in a critical section reduce block 3282 static __forceinline void 3283 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3284 kmp_critical_name *crit) { 3285 3286 kmp_user_lock_p lck; 3287 3288 #if KMP_USE_DYNAMIC_LOCK 3289 3290 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3291 lck = (kmp_user_lock_p)crit; 3292 if (__kmp_env_consistency_check) 3293 __kmp_pop_sync(global_tid, ct_critical, loc); 3294 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3295 } else { 3296 kmp_indirect_lock_t *ilk = 3297 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3298 if (__kmp_env_consistency_check) 3299 __kmp_pop_sync(global_tid, ct_critical, loc); 3300 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3301 } 3302 3303 #else // KMP_USE_DYNAMIC_LOCK 3304 3305 // We know that the fast reduction code is only emitted by Intel compilers 3306 // with 32 byte critical sections. If there isn't enough space, then we have 3307 // to use a pointer. 3308 if (__kmp_base_user_lock_size > 32) { 3309 lck = *((kmp_user_lock_p *)crit); 3310 KMP_ASSERT(lck != NULL); 3311 } else { 3312 lck = (kmp_user_lock_p)crit; 3313 } 3314 3315 if (__kmp_env_consistency_check) 3316 __kmp_pop_sync(global_tid, ct_critical, loc); 3317 3318 __kmp_release_user_lock_with_checks(lck, global_tid); 3319 3320 #endif // KMP_USE_DYNAMIC_LOCK 3321 } // __kmp_end_critical_section_reduce_block 3322 3323 static __forceinline int 3324 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3325 int *task_state) { 3326 kmp_team_t *team; 3327 3328 // Check if we are inside the teams construct? 3329 if (th->th.th_teams_microtask) { 3330 *team_p = team = th->th.th_team; 3331 if (team->t.t_level == th->th.th_teams_level) { 3332 // This is reduction at teams construct. 3333 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3334 // Let's swap teams temporarily for the reduction. 3335 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3336 th->th.th_team = team->t.t_parent; 3337 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3338 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3339 *task_state = th->th.th_task_state; 3340 th->th.th_task_state = 0; 3341 3342 return 1; 3343 } 3344 } 3345 return 0; 3346 } 3347 3348 static __forceinline void 3349 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3350 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3351 th->th.th_info.ds.ds_tid = 0; 3352 th->th.th_team = team; 3353 th->th.th_team_nproc = team->t.t_nproc; 3354 th->th.th_task_team = team->t.t_task_team[task_state]; 3355 th->th.th_task_state = task_state; 3356 } 3357 3358 /* 2.a.i. Reduce Block without a terminating barrier */ 3359 /*! 3360 @ingroup SYNCHRONIZATION 3361 @param loc source location information 3362 @param global_tid global thread number 3363 @param num_vars number of items (variables) to be reduced 3364 @param reduce_size size of data in bytes to be reduced 3365 @param reduce_data pointer to data to be reduced 3366 @param reduce_func callback function providing reduction operation on two 3367 operands and returning result of reduction in lhs_data 3368 @param lck pointer to the unique lock data structure 3369 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3370 threads if atomic reduction needed 3371 3372 The nowait version is used for a reduce clause with the nowait argument. 3373 */ 3374 kmp_int32 3375 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3376 size_t reduce_size, void *reduce_data, 3377 void (*reduce_func)(void *lhs_data, void *rhs_data), 3378 kmp_critical_name *lck) { 3379 3380 KMP_COUNT_BLOCK(REDUCE_nowait); 3381 int retval = 0; 3382 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3383 kmp_info_t *th; 3384 kmp_team_t *team; 3385 int teams_swapped = 0, task_state; 3386 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3387 3388 // why do we need this initialization here at all? 3389 // Reduction clause can not be used as a stand-alone directive. 3390 3391 // do not call __kmp_serial_initialize(), it will be called by 3392 // __kmp_parallel_initialize() if needed 3393 // possible detection of false-positive race by the threadchecker ??? 3394 if (!TCR_4(__kmp_init_parallel)) 3395 __kmp_parallel_initialize(); 3396 3397 __kmp_resume_if_soft_paused(); 3398 3399 // check correctness of reduce block nesting 3400 #if KMP_USE_DYNAMIC_LOCK 3401 if (__kmp_env_consistency_check) 3402 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3403 #else 3404 if (__kmp_env_consistency_check) 3405 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3406 #endif 3407 3408 th = __kmp_thread_from_gtid(global_tid); 3409 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3410 3411 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3412 // the value should be kept in a variable 3413 // the variable should be either a construct-specific or thread-specific 3414 // property, not a team specific property 3415 // (a thread can reach the next reduce block on the next construct, reduce 3416 // method may differ on the next construct) 3417 // an ident_t "loc" parameter could be used as a construct-specific property 3418 // (what if loc == 0?) 3419 // (if both construct-specific and team-specific variables were shared, 3420 // then unness extra syncs should be needed) 3421 // a thread-specific variable is better regarding two issues above (next 3422 // construct and extra syncs) 3423 // a thread-specific "th_local.reduction_method" variable is used currently 3424 // each thread executes 'determine' and 'set' lines (no need to execute by one 3425 // thread, to avoid unness extra syncs) 3426 3427 packed_reduction_method = __kmp_determine_reduction_method( 3428 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3429 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3430 3431 if (packed_reduction_method == critical_reduce_block) { 3432 3433 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3434 retval = 1; 3435 3436 } else if (packed_reduction_method == empty_reduce_block) { 3437 3438 // usage: if team size == 1, no synchronization is required ( Intel 3439 // platforms only ) 3440 retval = 1; 3441 3442 } else if (packed_reduction_method == atomic_reduce_block) { 3443 3444 retval = 2; 3445 3446 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3447 // won't be called by the code gen) 3448 // (it's not quite good, because the checking block has been closed by 3449 // this 'pop', 3450 // but atomic operation has not been executed yet, will be executed 3451 // slightly later, literally on next instruction) 3452 if (__kmp_env_consistency_check) 3453 __kmp_pop_sync(global_tid, ct_reduce, loc); 3454 3455 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3456 tree_reduce_block)) { 3457 3458 // AT: performance issue: a real barrier here 3459 // AT: (if master goes slow, other threads are blocked here waiting for the 3460 // master to come and release them) 3461 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3462 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3463 // be confusing to a customer) 3464 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3465 // might go faster and be more in line with sense of NOWAIT 3466 // AT: TO DO: do epcc test and compare times 3467 3468 // this barrier should be invisible to a customer and to the threading profile 3469 // tool (it's neither a terminating barrier nor customer's code, it's 3470 // used for an internal purpose) 3471 #if OMPT_SUPPORT 3472 // JP: can this barrier potentially leed to task scheduling? 3473 // JP: as long as there is a barrier in the implementation, OMPT should and 3474 // will provide the barrier events 3475 // so we set-up the necessary frame/return addresses. 3476 ompt_frame_t *ompt_frame; 3477 if (ompt_enabled.enabled) { 3478 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3479 if (ompt_frame->enter_frame.ptr == NULL) 3480 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3481 OMPT_STORE_RETURN_ADDRESS(global_tid); 3482 } 3483 #endif 3484 #if USE_ITT_NOTIFY 3485 __kmp_threads[global_tid]->th.th_ident = loc; 3486 #endif 3487 retval = 3488 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3489 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3490 retval = (retval != 0) ? (0) : (1); 3491 #if OMPT_SUPPORT && OMPT_OPTIONAL 3492 if (ompt_enabled.enabled) { 3493 ompt_frame->enter_frame = ompt_data_none; 3494 } 3495 #endif 3496 3497 // all other workers except master should do this pop here 3498 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3499 if (__kmp_env_consistency_check) { 3500 if (retval == 0) { 3501 __kmp_pop_sync(global_tid, ct_reduce, loc); 3502 } 3503 } 3504 3505 } else { 3506 3507 // should never reach this block 3508 KMP_ASSERT(0); // "unexpected method" 3509 } 3510 if (teams_swapped) { 3511 __kmp_restore_swapped_teams(th, team, task_state); 3512 } 3513 KA_TRACE( 3514 10, 3515 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3516 global_tid, packed_reduction_method, retval)); 3517 3518 return retval; 3519 } 3520 3521 /*! 3522 @ingroup SYNCHRONIZATION 3523 @param loc source location information 3524 @param global_tid global thread id. 3525 @param lck pointer to the unique lock data structure 3526 3527 Finish the execution of a reduce nowait. 3528 */ 3529 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3530 kmp_critical_name *lck) { 3531 3532 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3533 3534 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3535 3536 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3537 3538 if (packed_reduction_method == critical_reduce_block) { 3539 3540 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3541 3542 } else if (packed_reduction_method == empty_reduce_block) { 3543 3544 // usage: if team size == 1, no synchronization is required ( on Intel 3545 // platforms only ) 3546 3547 } else if (packed_reduction_method == atomic_reduce_block) { 3548 3549 // neither master nor other workers should get here 3550 // (code gen does not generate this call in case 2: atomic reduce block) 3551 // actually it's better to remove this elseif at all; 3552 // after removal this value will checked by the 'else' and will assert 3553 3554 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3555 tree_reduce_block)) { 3556 3557 // only master gets here 3558 3559 } else { 3560 3561 // should never reach this block 3562 KMP_ASSERT(0); // "unexpected method" 3563 } 3564 3565 if (__kmp_env_consistency_check) 3566 __kmp_pop_sync(global_tid, ct_reduce, loc); 3567 3568 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3569 global_tid, packed_reduction_method)); 3570 3571 return; 3572 } 3573 3574 /* 2.a.ii. Reduce Block with a terminating barrier */ 3575 3576 /*! 3577 @ingroup SYNCHRONIZATION 3578 @param loc source location information 3579 @param global_tid global thread number 3580 @param num_vars number of items (variables) to be reduced 3581 @param reduce_size size of data in bytes to be reduced 3582 @param reduce_data pointer to data to be reduced 3583 @param reduce_func callback function providing reduction operation on two 3584 operands and returning result of reduction in lhs_data 3585 @param lck pointer to the unique lock data structure 3586 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3587 threads if atomic reduction needed 3588 3589 A blocking reduce that includes an implicit barrier. 3590 */ 3591 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3592 size_t reduce_size, void *reduce_data, 3593 void (*reduce_func)(void *lhs_data, void *rhs_data), 3594 kmp_critical_name *lck) { 3595 KMP_COUNT_BLOCK(REDUCE_wait); 3596 int retval = 0; 3597 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3598 kmp_info_t *th; 3599 kmp_team_t *team; 3600 int teams_swapped = 0, task_state; 3601 3602 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3603 3604 // why do we need this initialization here at all? 3605 // Reduction clause can not be a stand-alone directive. 3606 3607 // do not call __kmp_serial_initialize(), it will be called by 3608 // __kmp_parallel_initialize() if needed 3609 // possible detection of false-positive race by the threadchecker ??? 3610 if (!TCR_4(__kmp_init_parallel)) 3611 __kmp_parallel_initialize(); 3612 3613 __kmp_resume_if_soft_paused(); 3614 3615 // check correctness of reduce block nesting 3616 #if KMP_USE_DYNAMIC_LOCK 3617 if (__kmp_env_consistency_check) 3618 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3619 #else 3620 if (__kmp_env_consistency_check) 3621 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3622 #endif 3623 3624 th = __kmp_thread_from_gtid(global_tid); 3625 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3626 3627 packed_reduction_method = __kmp_determine_reduction_method( 3628 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3629 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3630 3631 if (packed_reduction_method == critical_reduce_block) { 3632 3633 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3634 retval = 1; 3635 3636 } else if (packed_reduction_method == empty_reduce_block) { 3637 3638 // usage: if team size == 1, no synchronization is required ( Intel 3639 // platforms only ) 3640 retval = 1; 3641 3642 } else if (packed_reduction_method == atomic_reduce_block) { 3643 3644 retval = 2; 3645 3646 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3647 tree_reduce_block)) { 3648 3649 // case tree_reduce_block: 3650 // this barrier should be visible to a customer and to the threading profile 3651 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3652 #if OMPT_SUPPORT 3653 ompt_frame_t *ompt_frame; 3654 if (ompt_enabled.enabled) { 3655 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3656 if (ompt_frame->enter_frame.ptr == NULL) 3657 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3658 OMPT_STORE_RETURN_ADDRESS(global_tid); 3659 } 3660 #endif 3661 #if USE_ITT_NOTIFY 3662 __kmp_threads[global_tid]->th.th_ident = 3663 loc; // needed for correct notification of frames 3664 #endif 3665 retval = 3666 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3667 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3668 retval = (retval != 0) ? (0) : (1); 3669 #if OMPT_SUPPORT && OMPT_OPTIONAL 3670 if (ompt_enabled.enabled) { 3671 ompt_frame->enter_frame = ompt_data_none; 3672 } 3673 #endif 3674 3675 // all other workers except master should do this pop here 3676 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3677 if (__kmp_env_consistency_check) { 3678 if (retval == 0) { // 0: all other workers; 1: master 3679 __kmp_pop_sync(global_tid, ct_reduce, loc); 3680 } 3681 } 3682 3683 } else { 3684 3685 // should never reach this block 3686 KMP_ASSERT(0); // "unexpected method" 3687 } 3688 if (teams_swapped) { 3689 __kmp_restore_swapped_teams(th, team, task_state); 3690 } 3691 3692 KA_TRACE(10, 3693 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3694 global_tid, packed_reduction_method, retval)); 3695 return retval; 3696 } 3697 3698 /*! 3699 @ingroup SYNCHRONIZATION 3700 @param loc source location information 3701 @param global_tid global thread id. 3702 @param lck pointer to the unique lock data structure 3703 3704 Finish the execution of a blocking reduce. 3705 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3706 start function. 3707 */ 3708 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3709 kmp_critical_name *lck) { 3710 3711 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3712 kmp_info_t *th; 3713 kmp_team_t *team; 3714 int teams_swapped = 0, task_state; 3715 3716 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3717 3718 th = __kmp_thread_from_gtid(global_tid); 3719 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3720 3721 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3722 3723 // this barrier should be visible to a customer and to the threading profile 3724 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3725 3726 if (packed_reduction_method == critical_reduce_block) { 3727 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3728 3729 // TODO: implicit barrier: should be exposed 3730 #if OMPT_SUPPORT 3731 ompt_frame_t *ompt_frame; 3732 if (ompt_enabled.enabled) { 3733 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3734 if (ompt_frame->enter_frame.ptr == NULL) 3735 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3736 OMPT_STORE_RETURN_ADDRESS(global_tid); 3737 } 3738 #endif 3739 #if USE_ITT_NOTIFY 3740 __kmp_threads[global_tid]->th.th_ident = loc; 3741 #endif 3742 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3743 #if OMPT_SUPPORT && OMPT_OPTIONAL 3744 if (ompt_enabled.enabled) { 3745 ompt_frame->enter_frame = ompt_data_none; 3746 } 3747 #endif 3748 3749 } else if (packed_reduction_method == empty_reduce_block) { 3750 3751 // usage: if team size==1, no synchronization is required (Intel platforms only) 3752 3753 // TODO: implicit barrier: should be exposed 3754 #if OMPT_SUPPORT 3755 ompt_frame_t *ompt_frame; 3756 if (ompt_enabled.enabled) { 3757 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3758 if (ompt_frame->enter_frame.ptr == NULL) 3759 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3760 OMPT_STORE_RETURN_ADDRESS(global_tid); 3761 } 3762 #endif 3763 #if USE_ITT_NOTIFY 3764 __kmp_threads[global_tid]->th.th_ident = loc; 3765 #endif 3766 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3767 #if OMPT_SUPPORT && OMPT_OPTIONAL 3768 if (ompt_enabled.enabled) { 3769 ompt_frame->enter_frame = ompt_data_none; 3770 } 3771 #endif 3772 3773 } else if (packed_reduction_method == atomic_reduce_block) { 3774 3775 #if OMPT_SUPPORT 3776 ompt_frame_t *ompt_frame; 3777 if (ompt_enabled.enabled) { 3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3779 if (ompt_frame->enter_frame.ptr == NULL) 3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3781 OMPT_STORE_RETURN_ADDRESS(global_tid); 3782 } 3783 #endif 3784 // TODO: implicit barrier: should be exposed 3785 #if USE_ITT_NOTIFY 3786 __kmp_threads[global_tid]->th.th_ident = loc; 3787 #endif 3788 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3789 #if OMPT_SUPPORT && OMPT_OPTIONAL 3790 if (ompt_enabled.enabled) { 3791 ompt_frame->enter_frame = ompt_data_none; 3792 } 3793 #endif 3794 3795 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3796 tree_reduce_block)) { 3797 3798 // only master executes here (master releases all other workers) 3799 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3800 global_tid); 3801 3802 } else { 3803 3804 // should never reach this block 3805 KMP_ASSERT(0); // "unexpected method" 3806 } 3807 if (teams_swapped) { 3808 __kmp_restore_swapped_teams(th, team, task_state); 3809 } 3810 3811 if (__kmp_env_consistency_check) 3812 __kmp_pop_sync(global_tid, ct_reduce, loc); 3813 3814 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3815 global_tid, packed_reduction_method)); 3816 3817 return; 3818 } 3819 3820 #undef __KMP_GET_REDUCTION_METHOD 3821 #undef __KMP_SET_REDUCTION_METHOD 3822 3823 /* end of interface to fast scalable reduce routines */ 3824 3825 kmp_uint64 __kmpc_get_taskid() { 3826 3827 kmp_int32 gtid; 3828 kmp_info_t *thread; 3829 3830 gtid = __kmp_get_gtid(); 3831 if (gtid < 0) { 3832 return 0; 3833 } 3834 thread = __kmp_thread_from_gtid(gtid); 3835 return thread->th.th_current_task->td_task_id; 3836 3837 } // __kmpc_get_taskid 3838 3839 kmp_uint64 __kmpc_get_parent_taskid() { 3840 3841 kmp_int32 gtid; 3842 kmp_info_t *thread; 3843 kmp_taskdata_t *parent_task; 3844 3845 gtid = __kmp_get_gtid(); 3846 if (gtid < 0) { 3847 return 0; 3848 } 3849 thread = __kmp_thread_from_gtid(gtid); 3850 parent_task = thread->th.th_current_task->td_parent; 3851 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3852 3853 } // __kmpc_get_parent_taskid 3854 3855 /*! 3856 @ingroup WORK_SHARING 3857 @param loc source location information. 3858 @param gtid global thread number. 3859 @param num_dims number of associated doacross loops. 3860 @param dims info on loops bounds. 3861 3862 Initialize doacross loop information. 3863 Expect compiler send us inclusive bounds, 3864 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3865 */ 3866 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3867 const struct kmp_dim *dims) { 3868 int j, idx; 3869 kmp_int64 last, trace_count; 3870 kmp_info_t *th = __kmp_threads[gtid]; 3871 kmp_team_t *team = th->th.th_team; 3872 kmp_uint32 *flags; 3873 kmp_disp_t *pr_buf = th->th.th_dispatch; 3874 dispatch_shared_info_t *sh_buf; 3875 3876 KA_TRACE( 3877 20, 3878 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3879 gtid, num_dims, !team->t.t_serialized)); 3880 KMP_DEBUG_ASSERT(dims != NULL); 3881 KMP_DEBUG_ASSERT(num_dims > 0); 3882 3883 if (team->t.t_serialized) { 3884 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3885 return; // no dependencies if team is serialized 3886 } 3887 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3888 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3889 // the next loop 3890 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3891 3892 // Save bounds info into allocated private buffer 3893 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3894 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3895 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3896 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3897 pr_buf->th_doacross_info[0] = 3898 (kmp_int64)num_dims; // first element is number of dimensions 3899 // Save also address of num_done in order to access it later without knowing 3900 // the buffer index 3901 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3902 pr_buf->th_doacross_info[2] = dims[0].lo; 3903 pr_buf->th_doacross_info[3] = dims[0].up; 3904 pr_buf->th_doacross_info[4] = dims[0].st; 3905 last = 5; 3906 for (j = 1; j < num_dims; ++j) { 3907 kmp_int64 3908 range_length; // To keep ranges of all dimensions but the first dims[0] 3909 if (dims[j].st == 1) { // most common case 3910 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3911 range_length = dims[j].up - dims[j].lo + 1; 3912 } else { 3913 if (dims[j].st > 0) { 3914 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3915 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3916 } else { // negative increment 3917 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3918 range_length = 3919 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3920 } 3921 } 3922 pr_buf->th_doacross_info[last++] = range_length; 3923 pr_buf->th_doacross_info[last++] = dims[j].lo; 3924 pr_buf->th_doacross_info[last++] = dims[j].up; 3925 pr_buf->th_doacross_info[last++] = dims[j].st; 3926 } 3927 3928 // Compute total trip count. 3929 // Start with range of dims[0] which we don't need to keep in the buffer. 3930 if (dims[0].st == 1) { // most common case 3931 trace_count = dims[0].up - dims[0].lo + 1; 3932 } else if (dims[0].st > 0) { 3933 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3934 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3935 } else { // negative increment 3936 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3937 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3938 } 3939 for (j = 1; j < num_dims; ++j) { 3940 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3941 } 3942 KMP_DEBUG_ASSERT(trace_count > 0); 3943 3944 // Check if shared buffer is not occupied by other loop (idx - 3945 // __kmp_dispatch_num_buffers) 3946 if (idx != sh_buf->doacross_buf_idx) { 3947 // Shared buffer is occupied, wait for it to be free 3948 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3949 __kmp_eq_4, NULL); 3950 } 3951 #if KMP_32_BIT_ARCH 3952 // Check if we are the first thread. After the CAS the first thread gets 0, 3953 // others get 1 if initialization is in progress, allocated pointer otherwise. 3954 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3955 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3956 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3957 #else 3958 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3959 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3960 #endif 3961 if (flags == NULL) { 3962 // we are the first thread, allocate the array of flags 3963 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3964 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3965 KMP_MB(); 3966 sh_buf->doacross_flags = flags; 3967 } else if (flags == (kmp_uint32 *)1) { 3968 #if KMP_32_BIT_ARCH 3969 // initialization is still in progress, need to wait 3970 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3971 #else 3972 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3973 #endif 3974 KMP_YIELD(TRUE); 3975 KMP_MB(); 3976 } else { 3977 KMP_MB(); 3978 } 3979 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3980 pr_buf->th_doacross_flags = 3981 sh_buf->doacross_flags; // save private copy in order to not 3982 // touch shared buffer on each iteration 3983 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3984 } 3985 3986 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3987 kmp_int32 shft, num_dims, i; 3988 kmp_uint32 flag; 3989 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3990 kmp_info_t *th = __kmp_threads[gtid]; 3991 kmp_team_t *team = th->th.th_team; 3992 kmp_disp_t *pr_buf; 3993 kmp_int64 lo, up, st; 3994 3995 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 3996 if (team->t.t_serialized) { 3997 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 3998 return; // no dependencies if team is serialized 3999 } 4000 4001 // calculate sequential iteration number and check out-of-bounds condition 4002 pr_buf = th->th.th_dispatch; 4003 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4004 num_dims = pr_buf->th_doacross_info[0]; 4005 lo = pr_buf->th_doacross_info[2]; 4006 up = pr_buf->th_doacross_info[3]; 4007 st = pr_buf->th_doacross_info[4]; 4008 if (st == 1) { // most common case 4009 if (vec[0] < lo || vec[0] > up) { 4010 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4011 "bounds [%lld,%lld]\n", 4012 gtid, vec[0], lo, up)); 4013 return; 4014 } 4015 iter_number = vec[0] - lo; 4016 } else if (st > 0) { 4017 if (vec[0] < lo || vec[0] > up) { 4018 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4019 "bounds [%lld,%lld]\n", 4020 gtid, vec[0], lo, up)); 4021 return; 4022 } 4023 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4024 } else { // negative increment 4025 if (vec[0] > lo || vec[0] < up) { 4026 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4027 "bounds [%lld,%lld]\n", 4028 gtid, vec[0], lo, up)); 4029 return; 4030 } 4031 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4032 } 4033 for (i = 1; i < num_dims; ++i) { 4034 kmp_int64 iter, ln; 4035 kmp_int32 j = i * 4; 4036 ln = pr_buf->th_doacross_info[j + 1]; 4037 lo = pr_buf->th_doacross_info[j + 2]; 4038 up = pr_buf->th_doacross_info[j + 3]; 4039 st = pr_buf->th_doacross_info[j + 4]; 4040 if (st == 1) { 4041 if (vec[i] < lo || vec[i] > up) { 4042 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4043 "bounds [%lld,%lld]\n", 4044 gtid, vec[i], lo, up)); 4045 return; 4046 } 4047 iter = vec[i] - lo; 4048 } else if (st > 0) { 4049 if (vec[i] < lo || vec[i] > up) { 4050 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4051 "bounds [%lld,%lld]\n", 4052 gtid, vec[i], lo, up)); 4053 return; 4054 } 4055 iter = (kmp_uint64)(vec[i] - lo) / st; 4056 } else { // st < 0 4057 if (vec[i] > lo || vec[i] < up) { 4058 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4059 "bounds [%lld,%lld]\n", 4060 gtid, vec[i], lo, up)); 4061 return; 4062 } 4063 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4064 } 4065 iter_number = iter + ln * iter_number; 4066 } 4067 shft = iter_number % 32; // use 32-bit granularity 4068 iter_number >>= 5; // divided by 32 4069 flag = 1 << shft; 4070 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4071 KMP_YIELD(TRUE); 4072 } 4073 KMP_MB(); 4074 KA_TRACE(20, 4075 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4076 gtid, (iter_number << 5) + shft)); 4077 } 4078 4079 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4080 kmp_int32 shft, num_dims, i; 4081 kmp_uint32 flag; 4082 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4083 kmp_info_t *th = __kmp_threads[gtid]; 4084 kmp_team_t *team = th->th.th_team; 4085 kmp_disp_t *pr_buf; 4086 kmp_int64 lo, st; 4087 4088 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4089 if (team->t.t_serialized) { 4090 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4091 return; // no dependencies if team is serialized 4092 } 4093 4094 // calculate sequential iteration number (same as in "wait" but no 4095 // out-of-bounds checks) 4096 pr_buf = th->th.th_dispatch; 4097 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4098 num_dims = pr_buf->th_doacross_info[0]; 4099 lo = pr_buf->th_doacross_info[2]; 4100 st = pr_buf->th_doacross_info[4]; 4101 if (st == 1) { // most common case 4102 iter_number = vec[0] - lo; 4103 } else if (st > 0) { 4104 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4105 } else { // negative increment 4106 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4107 } 4108 for (i = 1; i < num_dims; ++i) { 4109 kmp_int64 iter, ln; 4110 kmp_int32 j = i * 4; 4111 ln = pr_buf->th_doacross_info[j + 1]; 4112 lo = pr_buf->th_doacross_info[j + 2]; 4113 st = pr_buf->th_doacross_info[j + 4]; 4114 if (st == 1) { 4115 iter = vec[i] - lo; 4116 } else if (st > 0) { 4117 iter = (kmp_uint64)(vec[i] - lo) / st; 4118 } else { // st < 0 4119 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4120 } 4121 iter_number = iter + ln * iter_number; 4122 } 4123 shft = iter_number % 32; // use 32-bit granularity 4124 iter_number >>= 5; // divided by 32 4125 flag = 1 << shft; 4126 KMP_MB(); 4127 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4128 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4129 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4130 (iter_number << 5) + shft)); 4131 } 4132 4133 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4134 kmp_int32 num_done; 4135 kmp_info_t *th = __kmp_threads[gtid]; 4136 kmp_team_t *team = th->th.th_team; 4137 kmp_disp_t *pr_buf = th->th.th_dispatch; 4138 4139 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4140 if (team->t.t_serialized) { 4141 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4142 return; // nothing to do 4143 } 4144 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4145 if (num_done == th->th.th_team_nproc) { 4146 // we are the last thread, need to free shared resources 4147 int idx = pr_buf->th_doacross_buf_idx - 1; 4148 dispatch_shared_info_t *sh_buf = 4149 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4150 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4151 (kmp_int64)&sh_buf->doacross_num_done); 4152 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4153 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4154 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4155 sh_buf->doacross_flags = NULL; 4156 sh_buf->doacross_num_done = 0; 4157 sh_buf->doacross_buf_idx += 4158 __kmp_dispatch_num_buffers; // free buffer for future re-use 4159 } 4160 // free private resources (need to keep buffer index forever) 4161 pr_buf->th_doacross_flags = NULL; 4162 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4163 pr_buf->th_doacross_info = NULL; 4164 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4165 } 4166 4167 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4168 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4169 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4170 } 4171 4172 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4173 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4174 } 4175 4176 int __kmpc_get_target_offload(void) { 4177 if (!__kmp_init_serial) { 4178 __kmp_serial_initialize(); 4179 } 4180 return __kmp_target_offload; 4181 } 4182 4183 int __kmpc_pause_resource(kmp_pause_status_t level) { 4184 if (!__kmp_init_serial) { 4185 return 1; // Can't pause if runtime is not initialized 4186 } 4187 return __kmp_pause_resource(level); 4188 } 4189