1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 248 kmp_int32 proc_bind) { 249 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 250 proc_bind)); 251 252 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 253 } 254 255 /*! 256 @ingroup PARALLEL 257 @param loc source location information 258 @param argc total number of arguments in the ellipsis 259 @param microtask pointer to callback routine consisting of outlined parallel 260 construct 261 @param ... pointers to shared variables that aren't global 262 263 Do the actual fork and call the microtask in the relevant number of threads. 264 */ 265 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 266 int gtid = __kmp_entry_gtid(); 267 268 #if (KMP_STATS_ENABLED) 269 // If we were in a serial region, then stop the serial timer, record 270 // the event, and start parallel region timer 271 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 272 if (previous_state == stats_state_e::SERIAL_REGION) { 273 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 274 } else { 275 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 276 } 277 int inParallel = __kmpc_in_parallel(loc); 278 if (inParallel) { 279 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 280 } else { 281 KMP_COUNT_BLOCK(OMP_PARALLEL); 282 } 283 #endif 284 285 // maybe to save thr_state is enough here 286 { 287 va_list ap; 288 va_start(ap, microtask); 289 290 #if OMPT_SUPPORT 291 ompt_frame_t *ompt_frame; 292 if (ompt_enabled.enabled) { 293 kmp_info_t *master_th = __kmp_threads[gtid]; 294 kmp_team_t *parent_team = master_th->th.th_team; 295 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 296 if (lwt) 297 ompt_frame = &(lwt->ompt_task_info.frame); 298 else { 299 int tid = __kmp_tid_from_gtid(gtid); 300 ompt_frame = &( 301 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 302 } 303 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 304 OMPT_STORE_RETURN_ADDRESS(gtid); 305 } 306 #endif 307 308 #if INCLUDE_SSC_MARKS 309 SSC_MARK_FORKING(); 310 #endif 311 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 312 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 313 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 314 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 315 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 316 &ap 317 #else 318 ap 319 #endif 320 ); 321 #if INCLUDE_SSC_MARKS 322 SSC_MARK_JOINING(); 323 #endif 324 __kmp_join_call(loc, gtid 325 #if OMPT_SUPPORT 326 , 327 fork_context_intel 328 #endif 329 ); 330 331 va_end(ap); 332 } 333 334 #if KMP_STATS_ENABLED 335 if (previous_state == stats_state_e::SERIAL_REGION) { 336 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 337 } else { 338 KMP_POP_PARTITIONED_TIMER(); 339 } 340 #endif // KMP_STATS_ENABLED 341 } 342 343 /*! 344 @ingroup PARALLEL 345 @param loc source location information 346 @param global_tid global thread number 347 @param num_teams number of teams requested for the teams construct 348 @param num_threads number of threads per team requested for the teams construct 349 350 Set the number of teams to be used by the teams construct. 351 This call is only required if the teams construct has a `num_teams` clause 352 or a `thread_limit` clause (or both). 353 */ 354 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 355 kmp_int32 num_teams, kmp_int32 num_threads) { 356 KA_TRACE(20, 357 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 358 global_tid, num_teams, num_threads)); 359 360 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 361 } 362 363 /*! 364 @ingroup PARALLEL 365 @param loc source location information 366 @param argc total number of arguments in the ellipsis 367 @param microtask pointer to callback routine consisting of outlined teams 368 construct 369 @param ... pointers to shared variables that aren't global 370 371 Do the actual fork and call the microtask in the relevant number of threads. 372 */ 373 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 374 ...) { 375 int gtid = __kmp_entry_gtid(); 376 kmp_info_t *this_thr = __kmp_threads[gtid]; 377 va_list ap; 378 va_start(ap, microtask); 379 380 #if KMP_STATS_ENABLED 381 KMP_COUNT_BLOCK(OMP_TEAMS); 382 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 383 if (previous_state == stats_state_e::SERIAL_REGION) { 384 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 385 } else { 386 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 387 } 388 #endif 389 390 // remember teams entry point and nesting level 391 this_thr->th.th_teams_microtask = microtask; 392 this_thr->th.th_teams_level = 393 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 394 395 #if OMPT_SUPPORT 396 kmp_team_t *parent_team = this_thr->th.th_team; 397 int tid = __kmp_tid_from_gtid(gtid); 398 if (ompt_enabled.enabled) { 399 parent_team->t.t_implicit_task_taskdata[tid] 400 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 401 } 402 OMPT_STORE_RETURN_ADDRESS(gtid); 403 #endif 404 405 // check if __kmpc_push_num_teams called, set default number of teams 406 // otherwise 407 if (this_thr->th.th_teams_size.nteams == 0) { 408 __kmp_push_num_teams(loc, gtid, 0, 0); 409 } 410 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 413 414 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 415 VOLATILE_CAST(microtask_t) 416 __kmp_teams_master, // "wrapped" task 417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 418 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 419 &ap 420 #else 421 ap 422 #endif 423 ); 424 __kmp_join_call(loc, gtid 425 #if OMPT_SUPPORT 426 , 427 fork_context_intel 428 #endif 429 ); 430 431 // Pop current CG root off list 432 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 433 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 434 this_thr->th.th_cg_roots = tmp->up; 435 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 436 " to node %p. cg_nthreads was %d\n", 437 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 438 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 439 int i = tmp->cg_nthreads--; 440 if (i == 1) { // check is we are the last thread in CG (not always the case) 441 __kmp_free(tmp); 442 } 443 // Restore current task's thread_limit from CG root 444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 445 this_thr->th.th_current_task->td_icvs.thread_limit = 446 this_thr->th.th_cg_roots->cg_thread_limit; 447 448 this_thr->th.th_teams_microtask = NULL; 449 this_thr->th.th_teams_level = 0; 450 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 451 va_end(ap); 452 #if KMP_STATS_ENABLED 453 if (previous_state == stats_state_e::SERIAL_REGION) { 454 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 455 } else { 456 KMP_POP_PARTITIONED_TIMER(); 457 } 458 #endif // KMP_STATS_ENABLED 459 } 460 461 // I don't think this function should ever have been exported. 462 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 463 // openmp code ever called it, but it's been exported from the RTL for so 464 // long that I'm afraid to remove the definition. 465 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 466 467 /*! 468 @ingroup PARALLEL 469 @param loc source location information 470 @param global_tid global thread number 471 472 Enter a serialized parallel construct. This interface is used to handle a 473 conditional parallel region, like this, 474 @code 475 #pragma omp parallel if (condition) 476 @endcode 477 when the condition is false. 478 */ 479 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 480 // The implementation is now in kmp_runtime.cpp so that it can share static 481 // functions with kmp_fork_call since the tasks to be done are similar in 482 // each case. 483 #if OMPT_SUPPORT 484 OMPT_STORE_RETURN_ADDRESS(global_tid); 485 #endif 486 __kmp_serialized_parallel(loc, global_tid); 487 } 488 489 /*! 490 @ingroup PARALLEL 491 @param loc source location information 492 @param global_tid global thread number 493 494 Leave a serialized parallel construct. 495 */ 496 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 497 kmp_internal_control_t *top; 498 kmp_info_t *this_thr; 499 kmp_team_t *serial_team; 500 501 KC_TRACE(10, 502 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 503 504 /* skip all this code for autopar serialized loops since it results in 505 unacceptable overhead */ 506 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 507 return; 508 509 // Not autopar code 510 if (!TCR_4(__kmp_init_parallel)) 511 __kmp_parallel_initialize(); 512 513 __kmp_resume_if_soft_paused(); 514 515 this_thr = __kmp_threads[global_tid]; 516 serial_team = this_thr->th.th_serial_team; 517 518 kmp_task_team_t *task_team = this_thr->th.th_task_team; 519 // we need to wait for the proxy tasks before finishing the thread 520 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 521 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 522 523 KMP_MB(); 524 KMP_DEBUG_ASSERT(serial_team); 525 KMP_ASSERT(serial_team->t.t_serialized); 526 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 527 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 528 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 529 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 530 531 #if OMPT_SUPPORT 532 if (ompt_enabled.enabled && 533 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 534 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 535 if (ompt_enabled.ompt_callback_implicit_task) { 536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 537 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 538 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 539 } 540 541 // reset clear the task id only after unlinking the task 542 ompt_data_t *parent_task_data; 543 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 544 545 if (ompt_enabled.ompt_callback_parallel_end) { 546 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 547 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 548 ompt_parallel_invoker_program | ompt_parallel_team, 549 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 550 } 551 __ompt_lw_taskteam_unlink(this_thr); 552 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 553 } 554 #endif 555 556 /* If necessary, pop the internal control stack values and replace the team 557 * values */ 558 top = serial_team->t.t_control_stack_top; 559 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 560 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 561 serial_team->t.t_control_stack_top = top->next; 562 __kmp_free(top); 563 } 564 565 // if( serial_team -> t.t_serialized > 1 ) 566 serial_team->t.t_level--; 567 568 /* pop dispatch buffers stack */ 569 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 570 { 571 dispatch_private_info_t *disp_buffer = 572 serial_team->t.t_dispatch->th_disp_buffer; 573 serial_team->t.t_dispatch->th_disp_buffer = 574 serial_team->t.t_dispatch->th_disp_buffer->next; 575 __kmp_free(disp_buffer); 576 } 577 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 578 579 --serial_team->t.t_serialized; 580 if (serial_team->t.t_serialized == 0) { 581 582 /* return to the parallel section */ 583 584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 585 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 586 __kmp_clear_x87_fpu_status_word(); 587 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 588 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 589 } 590 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 591 592 this_thr->th.th_team = serial_team->t.t_parent; 593 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 594 595 /* restore values cached in the thread */ 596 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 597 this_thr->th.th_team_master = 598 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 599 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 600 601 /* TODO the below shouldn't need to be adjusted for serialized teams */ 602 this_thr->th.th_dispatch = 603 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 604 605 __kmp_pop_current_task_from_thread(this_thr); 606 607 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 608 this_thr->th.th_current_task->td_flags.executing = 1; 609 610 if (__kmp_tasking_mode != tskm_immediate_exec) { 611 // Copy the task team from the new child / old parent team to the thread. 612 this_thr->th.th_task_team = 613 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 614 KA_TRACE(20, 615 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 616 "team %p\n", 617 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 618 } 619 } else { 620 if (__kmp_tasking_mode != tskm_immediate_exec) { 621 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 622 "depth of serial team %p to %d\n", 623 global_tid, serial_team, serial_team->t.t_serialized)); 624 } 625 } 626 627 if (__kmp_env_consistency_check) 628 __kmp_pop_parallel(global_tid, NULL); 629 #if OMPT_SUPPORT 630 if (ompt_enabled.enabled) 631 this_thr->th.ompt_thread_info.state = 632 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 633 : ompt_state_work_parallel); 634 #endif 635 } 636 637 /*! 638 @ingroup SYNCHRONIZATION 639 @param loc source location information. 640 641 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 642 depending on the memory ordering convention obeyed by the compiler 643 even that may not be necessary). 644 */ 645 void __kmpc_flush(ident_t *loc) { 646 KC_TRACE(10, ("__kmpc_flush: called\n")); 647 648 /* need explicit __mf() here since use volatile instead in library */ 649 KMP_MB(); /* Flush all pending memory write invalidates. */ 650 651 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 652 #if KMP_MIC 653 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 654 // We shouldn't need it, though, since the ABI rules require that 655 // * If the compiler generates NGO stores it also generates the fence 656 // * If users hand-code NGO stores they should insert the fence 657 // therefore no incomplete unordered stores should be visible. 658 #else 659 // C74404 660 // This is to address non-temporal store instructions (sfence needed). 661 // The clflush instruction is addressed either (mfence needed). 662 // Probably the non-temporal load monvtdqa instruction should also be 663 // addressed. 664 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 665 if (!__kmp_cpuinfo.initialized) { 666 __kmp_query_cpuid(&__kmp_cpuinfo); 667 } 668 if (!__kmp_cpuinfo.sse2) { 669 // CPU cannot execute SSE2 instructions. 670 } else { 671 #if KMP_COMPILER_ICC 672 _mm_mfence(); 673 #elif KMP_COMPILER_MSVC 674 MemoryBarrier(); 675 #else 676 __sync_synchronize(); 677 #endif // KMP_COMPILER_ICC 678 } 679 #endif // KMP_MIC 680 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 681 KMP_ARCH_RISCV64) 682 // Nothing to see here move along 683 #elif KMP_ARCH_PPC64 684 // Nothing needed here (we have a real MB above). 685 #if KMP_OS_CNK 686 // The flushing thread needs to yield here; this prevents a 687 // busy-waiting thread from saturating the pipeline. flush is 688 // often used in loops like this: 689 // while (!flag) { 690 // #pragma omp flush(flag) 691 // } 692 // and adding the yield here is good for at least a 10x speedup 693 // when running >2 threads per core (on the NAS LU benchmark). 694 __kmp_yield(); 695 #endif 696 #else 697 #error Unknown or unsupported architecture 698 #endif 699 700 #if OMPT_SUPPORT && OMPT_OPTIONAL 701 if (ompt_enabled.ompt_callback_flush) { 702 ompt_callbacks.ompt_callback(ompt_callback_flush)( 703 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 704 } 705 #endif 706 } 707 708 /* -------------------------------------------------------------------------- */ 709 /*! 710 @ingroup SYNCHRONIZATION 711 @param loc source location information 712 @param global_tid thread id. 713 714 Execute a barrier. 715 */ 716 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 717 KMP_COUNT_BLOCK(OMP_BARRIER); 718 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 719 720 if (!TCR_4(__kmp_init_parallel)) 721 __kmp_parallel_initialize(); 722 723 __kmp_resume_if_soft_paused(); 724 725 if (__kmp_env_consistency_check) { 726 if (loc == 0) { 727 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 728 } 729 __kmp_check_barrier(global_tid, ct_barrier, loc); 730 } 731 732 #if OMPT_SUPPORT 733 ompt_frame_t *ompt_frame; 734 if (ompt_enabled.enabled) { 735 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 736 if (ompt_frame->enter_frame.ptr == NULL) 737 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 738 OMPT_STORE_RETURN_ADDRESS(global_tid); 739 } 740 #endif 741 __kmp_threads[global_tid]->th.th_ident = loc; 742 // TODO: explicit barrier_wait_id: 743 // this function is called when 'barrier' directive is present or 744 // implicit barrier at the end of a worksharing construct. 745 // 1) better to add a per-thread barrier counter to a thread data structure 746 // 2) set to 0 when a new team is created 747 // 4) no sync is required 748 749 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 750 #if OMPT_SUPPORT && OMPT_OPTIONAL 751 if (ompt_enabled.enabled) { 752 ompt_frame->enter_frame = ompt_data_none; 753 } 754 #endif 755 } 756 757 /* The BARRIER for a MASTER section is always explicit */ 758 /*! 759 @ingroup WORK_SHARING 760 @param loc source location information. 761 @param global_tid global thread number . 762 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 763 */ 764 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 765 int status = 0; 766 767 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 768 769 if (!TCR_4(__kmp_init_parallel)) 770 __kmp_parallel_initialize(); 771 772 __kmp_resume_if_soft_paused(); 773 774 if (KMP_MASTER_GTID(global_tid)) { 775 KMP_COUNT_BLOCK(OMP_MASTER); 776 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 777 status = 1; 778 } 779 780 #if OMPT_SUPPORT && OMPT_OPTIONAL 781 if (status) { 782 if (ompt_enabled.ompt_callback_master) { 783 kmp_info_t *this_thr = __kmp_threads[global_tid]; 784 kmp_team_t *team = this_thr->th.th_team; 785 786 int tid = __kmp_tid_from_gtid(global_tid); 787 ompt_callbacks.ompt_callback(ompt_callback_master)( 788 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 789 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 790 OMPT_GET_RETURN_ADDRESS(0)); 791 } 792 } 793 #endif 794 795 if (__kmp_env_consistency_check) { 796 #if KMP_USE_DYNAMIC_LOCK 797 if (status) 798 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 799 else 800 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 801 #else 802 if (status) 803 __kmp_push_sync(global_tid, ct_master, loc, NULL); 804 else 805 __kmp_check_sync(global_tid, ct_master, loc, NULL); 806 #endif 807 } 808 809 return status; 810 } 811 812 /*! 813 @ingroup WORK_SHARING 814 @param loc source location information. 815 @param global_tid global thread number . 816 817 Mark the end of a <tt>master</tt> region. This should only be called by the 818 thread that executes the <tt>master</tt> region. 819 */ 820 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 821 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 822 823 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 824 KMP_POP_PARTITIONED_TIMER(); 825 826 #if OMPT_SUPPORT && OMPT_OPTIONAL 827 kmp_info_t *this_thr = __kmp_threads[global_tid]; 828 kmp_team_t *team = this_thr->th.th_team; 829 if (ompt_enabled.ompt_callback_master) { 830 int tid = __kmp_tid_from_gtid(global_tid); 831 ompt_callbacks.ompt_callback(ompt_callback_master)( 832 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 833 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 834 OMPT_GET_RETURN_ADDRESS(0)); 835 } 836 #endif 837 838 if (__kmp_env_consistency_check) { 839 if (global_tid < 0) 840 KMP_WARNING(ThreadIdentInvalid); 841 842 if (KMP_MASTER_GTID(global_tid)) 843 __kmp_pop_sync(global_tid, ct_master, loc); 844 } 845 } 846 847 /*! 848 @ingroup WORK_SHARING 849 @param loc source location information. 850 @param gtid global thread number. 851 852 Start execution of an <tt>ordered</tt> construct. 853 */ 854 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 855 int cid = 0; 856 kmp_info_t *th; 857 KMP_DEBUG_ASSERT(__kmp_init_serial); 858 859 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 860 861 if (!TCR_4(__kmp_init_parallel)) 862 __kmp_parallel_initialize(); 863 864 __kmp_resume_if_soft_paused(); 865 866 #if USE_ITT_BUILD 867 __kmp_itt_ordered_prep(gtid); 868 // TODO: ordered_wait_id 869 #endif /* USE_ITT_BUILD */ 870 871 th = __kmp_threads[gtid]; 872 873 #if OMPT_SUPPORT && OMPT_OPTIONAL 874 kmp_team_t *team; 875 ompt_wait_id_t lck; 876 void *codeptr_ra; 877 if (ompt_enabled.enabled) { 878 OMPT_STORE_RETURN_ADDRESS(gtid); 879 team = __kmp_team_from_gtid(gtid); 880 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 881 /* OMPT state update */ 882 th->th.ompt_thread_info.wait_id = lck; 883 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 884 885 /* OMPT event callback */ 886 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 887 if (ompt_enabled.ompt_callback_mutex_acquire) { 888 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 889 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 890 codeptr_ra); 891 } 892 } 893 #endif 894 895 if (th->th.th_dispatch->th_deo_fcn != 0) 896 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 897 else 898 __kmp_parallel_deo(>id, &cid, loc); 899 900 #if OMPT_SUPPORT && OMPT_OPTIONAL 901 if (ompt_enabled.enabled) { 902 /* OMPT state update */ 903 th->th.ompt_thread_info.state = ompt_state_work_parallel; 904 th->th.ompt_thread_info.wait_id = 0; 905 906 /* OMPT event callback */ 907 if (ompt_enabled.ompt_callback_mutex_acquired) { 908 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 909 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 910 } 911 } 912 #endif 913 914 #if USE_ITT_BUILD 915 __kmp_itt_ordered_start(gtid); 916 #endif /* USE_ITT_BUILD */ 917 } 918 919 /*! 920 @ingroup WORK_SHARING 921 @param loc source location information. 922 @param gtid global thread number. 923 924 End execution of an <tt>ordered</tt> construct. 925 */ 926 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 927 int cid = 0; 928 kmp_info_t *th; 929 930 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 931 932 #if USE_ITT_BUILD 933 __kmp_itt_ordered_end(gtid); 934 // TODO: ordered_wait_id 935 #endif /* USE_ITT_BUILD */ 936 937 th = __kmp_threads[gtid]; 938 939 if (th->th.th_dispatch->th_dxo_fcn != 0) 940 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 941 else 942 __kmp_parallel_dxo(>id, &cid, loc); 943 944 #if OMPT_SUPPORT && OMPT_OPTIONAL 945 OMPT_STORE_RETURN_ADDRESS(gtid); 946 if (ompt_enabled.ompt_callback_mutex_released) { 947 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 948 ompt_mutex_ordered, 949 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 950 ->t.t_ordered.dt.t_value, 951 OMPT_LOAD_RETURN_ADDRESS(gtid)); 952 } 953 #endif 954 } 955 956 #if KMP_USE_DYNAMIC_LOCK 957 958 static __forceinline void 959 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 960 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 961 // Pointer to the allocated indirect lock is written to crit, while indexing 962 // is ignored. 963 void *idx; 964 kmp_indirect_lock_t **lck; 965 lck = (kmp_indirect_lock_t **)crit; 966 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 967 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 968 KMP_SET_I_LOCK_LOCATION(ilk, loc); 969 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 970 KA_TRACE(20, 971 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 972 #if USE_ITT_BUILD 973 __kmp_itt_critical_creating(ilk->lock, loc); 974 #endif 975 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 976 if (status == 0) { 977 #if USE_ITT_BUILD 978 __kmp_itt_critical_destroyed(ilk->lock); 979 #endif 980 // We don't really need to destroy the unclaimed lock here since it will be 981 // cleaned up at program exit. 982 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 983 } 984 KMP_DEBUG_ASSERT(*lck != NULL); 985 } 986 987 // Fast-path acquire tas lock 988 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 989 { \ 990 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 991 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 992 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 993 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 994 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 995 kmp_uint32 spins; \ 996 KMP_FSYNC_PREPARE(l); \ 997 KMP_INIT_YIELD(spins); \ 998 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 999 do { \ 1000 if (TCR_4(__kmp_nth) > \ 1001 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1002 KMP_YIELD(TRUE); \ 1003 } else { \ 1004 KMP_YIELD_SPIN(spins); \ 1005 } \ 1006 __kmp_spin_backoff(&backoff); \ 1007 } while ( \ 1008 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1009 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1010 } \ 1011 KMP_FSYNC_ACQUIRED(l); \ 1012 } 1013 1014 // Fast-path test tas lock 1015 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1016 { \ 1017 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1018 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1019 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1020 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1021 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1022 } 1023 1024 // Fast-path release tas lock 1025 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1026 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1027 1028 #if KMP_USE_FUTEX 1029 1030 #include <sys/syscall.h> 1031 #include <unistd.h> 1032 #ifndef FUTEX_WAIT 1033 #define FUTEX_WAIT 0 1034 #endif 1035 #ifndef FUTEX_WAKE 1036 #define FUTEX_WAKE 1 1037 #endif 1038 1039 // Fast-path acquire futex lock 1040 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1041 { \ 1042 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1043 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1044 KMP_MB(); \ 1045 KMP_FSYNC_PREPARE(ftx); \ 1046 kmp_int32 poll_val; \ 1047 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1048 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1049 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1050 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1051 if (!cond) { \ 1052 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1053 poll_val | \ 1054 KMP_LOCK_BUSY(1, futex))) { \ 1055 continue; \ 1056 } \ 1057 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1058 } \ 1059 kmp_int32 rc; \ 1060 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1061 NULL, NULL, 0)) != 0) { \ 1062 continue; \ 1063 } \ 1064 gtid_code |= 1; \ 1065 } \ 1066 KMP_FSYNC_ACQUIRED(ftx); \ 1067 } 1068 1069 // Fast-path test futex lock 1070 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1071 { \ 1072 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1073 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1074 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1075 KMP_FSYNC_ACQUIRED(ftx); \ 1076 rc = TRUE; \ 1077 } else { \ 1078 rc = FALSE; \ 1079 } \ 1080 } 1081 1082 // Fast-path release futex lock 1083 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1084 { \ 1085 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1086 KMP_MB(); \ 1087 KMP_FSYNC_RELEASING(ftx); \ 1088 kmp_int32 poll_val = \ 1089 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1090 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1091 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1092 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1093 } \ 1094 KMP_MB(); \ 1095 KMP_YIELD_OVERSUB(); \ 1096 } 1097 1098 #endif // KMP_USE_FUTEX 1099 1100 #else // KMP_USE_DYNAMIC_LOCK 1101 1102 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1103 ident_t const *loc, 1104 kmp_int32 gtid) { 1105 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1106 1107 // Because of the double-check, the following load doesn't need to be volatile 1108 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1109 1110 if (lck == NULL) { 1111 void *idx; 1112 1113 // Allocate & initialize the lock. 1114 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1115 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1116 __kmp_init_user_lock_with_checks(lck); 1117 __kmp_set_user_lock_location(lck, loc); 1118 #if USE_ITT_BUILD 1119 __kmp_itt_critical_creating(lck); 1120 // __kmp_itt_critical_creating() should be called *before* the first usage 1121 // of underlying lock. It is the only place where we can guarantee it. There 1122 // are chances the lock will destroyed with no usage, but it is not a 1123 // problem, because this is not real event seen by user but rather setting 1124 // name for object (lock). See more details in kmp_itt.h. 1125 #endif /* USE_ITT_BUILD */ 1126 1127 // Use a cmpxchg instruction to slam the start of the critical section with 1128 // the lock pointer. If another thread beat us to it, deallocate the lock, 1129 // and use the lock that the other thread allocated. 1130 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1131 1132 if (status == 0) { 1133 // Deallocate the lock and reload the value. 1134 #if USE_ITT_BUILD 1135 __kmp_itt_critical_destroyed(lck); 1136 // Let ITT know the lock is destroyed and the same memory location may be reused 1137 // for another purpose. 1138 #endif /* USE_ITT_BUILD */ 1139 __kmp_destroy_user_lock_with_checks(lck); 1140 __kmp_user_lock_free(&idx, gtid, lck); 1141 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1142 KMP_DEBUG_ASSERT(lck != NULL); 1143 } 1144 } 1145 return lck; 1146 } 1147 1148 #endif // KMP_USE_DYNAMIC_LOCK 1149 1150 /*! 1151 @ingroup WORK_SHARING 1152 @param loc source location information. 1153 @param global_tid global thread number . 1154 @param crit identity of the critical section. This could be a pointer to a lock 1155 associated with the critical section, or some other suitably unique value. 1156 1157 Enter code protected by a `critical` construct. 1158 This function blocks until the executing thread can enter the critical section. 1159 */ 1160 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1161 kmp_critical_name *crit) { 1162 #if KMP_USE_DYNAMIC_LOCK 1163 #if OMPT_SUPPORT && OMPT_OPTIONAL 1164 OMPT_STORE_RETURN_ADDRESS(global_tid); 1165 #endif // OMPT_SUPPORT 1166 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1167 #else 1168 KMP_COUNT_BLOCK(OMP_CRITICAL); 1169 #if OMPT_SUPPORT && OMPT_OPTIONAL 1170 ompt_state_t prev_state = ompt_state_undefined; 1171 ompt_thread_info_t ti; 1172 #endif 1173 kmp_user_lock_p lck; 1174 1175 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1176 1177 // TODO: add THR_OVHD_STATE 1178 1179 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1180 KMP_CHECK_USER_LOCK_INIT(); 1181 1182 if ((__kmp_user_lock_kind == lk_tas) && 1183 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1184 lck = (kmp_user_lock_p)crit; 1185 } 1186 #if KMP_USE_FUTEX 1187 else if ((__kmp_user_lock_kind == lk_futex) && 1188 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1189 lck = (kmp_user_lock_p)crit; 1190 } 1191 #endif 1192 else { // ticket, queuing or drdpa 1193 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1194 } 1195 1196 if (__kmp_env_consistency_check) 1197 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1198 1199 // since the critical directive binds to all threads, not just the current 1200 // team we have to check this even if we are in a serialized team. 1201 // also, even if we are the uber thread, we still have to conduct the lock, 1202 // as we have to contend with sibling threads. 1203 1204 #if USE_ITT_BUILD 1205 __kmp_itt_critical_acquiring(lck); 1206 #endif /* USE_ITT_BUILD */ 1207 #if OMPT_SUPPORT && OMPT_OPTIONAL 1208 OMPT_STORE_RETURN_ADDRESS(gtid); 1209 void *codeptr_ra = NULL; 1210 if (ompt_enabled.enabled) { 1211 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1212 /* OMPT state update */ 1213 prev_state = ti.state; 1214 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1215 ti.state = ompt_state_wait_critical; 1216 1217 /* OMPT event callback */ 1218 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1219 if (ompt_enabled.ompt_callback_mutex_acquire) { 1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1221 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1222 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1223 } 1224 } 1225 #endif 1226 // Value of 'crit' should be good for using as a critical_id of the critical 1227 // section directive. 1228 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1229 1230 #if USE_ITT_BUILD 1231 __kmp_itt_critical_acquired(lck); 1232 #endif /* USE_ITT_BUILD */ 1233 #if OMPT_SUPPORT && OMPT_OPTIONAL 1234 if (ompt_enabled.enabled) { 1235 /* OMPT state update */ 1236 ti.state = prev_state; 1237 ti.wait_id = 0; 1238 1239 /* OMPT event callback */ 1240 if (ompt_enabled.ompt_callback_mutex_acquired) { 1241 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1242 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1243 } 1244 } 1245 #endif 1246 KMP_POP_PARTITIONED_TIMER(); 1247 1248 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1249 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1250 #endif // KMP_USE_DYNAMIC_LOCK 1251 } 1252 1253 #if KMP_USE_DYNAMIC_LOCK 1254 1255 // Converts the given hint to an internal lock implementation 1256 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1257 #if KMP_USE_TSX 1258 #define KMP_TSX_LOCK(seq) lockseq_##seq 1259 #else 1260 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1261 #endif 1262 1263 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1264 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1265 #else 1266 #define KMP_CPUINFO_RTM 0 1267 #endif 1268 1269 // Hints that do not require further logic 1270 if (hint & kmp_lock_hint_hle) 1271 return KMP_TSX_LOCK(hle); 1272 if (hint & kmp_lock_hint_rtm) 1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1274 if (hint & kmp_lock_hint_adaptive) 1275 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1276 1277 // Rule out conflicting hints first by returning the default lock 1278 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1279 return __kmp_user_lock_seq; 1280 if ((hint & omp_lock_hint_speculative) && 1281 (hint & omp_lock_hint_nonspeculative)) 1282 return __kmp_user_lock_seq; 1283 1284 // Do not even consider speculation when it appears to be contended 1285 if (hint & omp_lock_hint_contended) 1286 return lockseq_queuing; 1287 1288 // Uncontended lock without speculation 1289 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1290 return lockseq_tas; 1291 1292 // HLE lock for speculation 1293 if (hint & omp_lock_hint_speculative) 1294 return KMP_TSX_LOCK(hle); 1295 1296 return __kmp_user_lock_seq; 1297 } 1298 1299 #if OMPT_SUPPORT && OMPT_OPTIONAL 1300 #if KMP_USE_DYNAMIC_LOCK 1301 static kmp_mutex_impl_t 1302 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1303 if (user_lock) { 1304 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1305 case 0: 1306 break; 1307 #if KMP_USE_FUTEX 1308 case locktag_futex: 1309 return kmp_mutex_impl_queuing; 1310 #endif 1311 case locktag_tas: 1312 return kmp_mutex_impl_spin; 1313 #if KMP_USE_TSX 1314 case locktag_hle: 1315 return kmp_mutex_impl_speculative; 1316 #endif 1317 default: 1318 return kmp_mutex_impl_none; 1319 } 1320 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1321 } 1322 KMP_ASSERT(ilock); 1323 switch (ilock->type) { 1324 #if KMP_USE_TSX 1325 case locktag_adaptive: 1326 case locktag_rtm: 1327 return kmp_mutex_impl_speculative; 1328 #endif 1329 case locktag_nested_tas: 1330 return kmp_mutex_impl_spin; 1331 #if KMP_USE_FUTEX 1332 case locktag_nested_futex: 1333 #endif 1334 case locktag_ticket: 1335 case locktag_queuing: 1336 case locktag_drdpa: 1337 case locktag_nested_ticket: 1338 case locktag_nested_queuing: 1339 case locktag_nested_drdpa: 1340 return kmp_mutex_impl_queuing; 1341 default: 1342 return kmp_mutex_impl_none; 1343 } 1344 } 1345 #else 1346 // For locks without dynamic binding 1347 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1348 switch (__kmp_user_lock_kind) { 1349 case lk_tas: 1350 return kmp_mutex_impl_spin; 1351 #if KMP_USE_FUTEX 1352 case lk_futex: 1353 #endif 1354 case lk_ticket: 1355 case lk_queuing: 1356 case lk_drdpa: 1357 return kmp_mutex_impl_queuing; 1358 #if KMP_USE_TSX 1359 case lk_hle: 1360 case lk_rtm: 1361 case lk_adaptive: 1362 return kmp_mutex_impl_speculative; 1363 #endif 1364 default: 1365 return kmp_mutex_impl_none; 1366 } 1367 } 1368 #endif // KMP_USE_DYNAMIC_LOCK 1369 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1370 1371 /*! 1372 @ingroup WORK_SHARING 1373 @param loc source location information. 1374 @param global_tid global thread number. 1375 @param crit identity of the critical section. This could be a pointer to a lock 1376 associated with the critical section, or some other suitably unique value. 1377 @param hint the lock hint. 1378 1379 Enter code protected by a `critical` construct with a hint. The hint value is 1380 used to suggest a lock implementation. This function blocks until the executing 1381 thread can enter the critical section unless the hint suggests use of 1382 speculative execution and the hardware supports it. 1383 */ 1384 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1385 kmp_critical_name *crit, uint32_t hint) { 1386 KMP_COUNT_BLOCK(OMP_CRITICAL); 1387 kmp_user_lock_p lck; 1388 #if OMPT_SUPPORT && OMPT_OPTIONAL 1389 ompt_state_t prev_state = ompt_state_undefined; 1390 ompt_thread_info_t ti; 1391 // This is the case, if called from __kmpc_critical: 1392 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1393 if (!codeptr) 1394 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1395 #endif 1396 1397 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1398 1399 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1400 // Check if it is initialized. 1401 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1402 if (*lk == 0) { 1403 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1404 if (KMP_IS_D_LOCK(lckseq)) { 1405 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1406 KMP_GET_D_TAG(lckseq)); 1407 } else { 1408 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1409 } 1410 } 1411 // Branch for accessing the actual lock object and set operation. This 1412 // branching is inevitable since this lock initialization does not follow the 1413 // normal dispatch path (lock table is not used). 1414 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1415 lck = (kmp_user_lock_p)lk; 1416 if (__kmp_env_consistency_check) { 1417 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1418 __kmp_map_hint_to_lock(hint)); 1419 } 1420 #if USE_ITT_BUILD 1421 __kmp_itt_critical_acquiring(lck); 1422 #endif 1423 #if OMPT_SUPPORT && OMPT_OPTIONAL 1424 if (ompt_enabled.enabled) { 1425 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1426 /* OMPT state update */ 1427 prev_state = ti.state; 1428 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1429 ti.state = ompt_state_wait_critical; 1430 1431 /* OMPT event callback */ 1432 if (ompt_enabled.ompt_callback_mutex_acquire) { 1433 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1434 ompt_mutex_critical, (unsigned int)hint, 1435 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1436 codeptr); 1437 } 1438 } 1439 #endif 1440 #if KMP_USE_INLINED_TAS 1441 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1442 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1443 } else 1444 #elif KMP_USE_INLINED_FUTEX 1445 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1446 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1447 } else 1448 #endif 1449 { 1450 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1451 } 1452 } else { 1453 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1454 lck = ilk->lock; 1455 if (__kmp_env_consistency_check) { 1456 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1457 __kmp_map_hint_to_lock(hint)); 1458 } 1459 #if USE_ITT_BUILD 1460 __kmp_itt_critical_acquiring(lck); 1461 #endif 1462 #if OMPT_SUPPORT && OMPT_OPTIONAL 1463 if (ompt_enabled.enabled) { 1464 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1465 /* OMPT state update */ 1466 prev_state = ti.state; 1467 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1468 ti.state = ompt_state_wait_critical; 1469 1470 /* OMPT event callback */ 1471 if (ompt_enabled.ompt_callback_mutex_acquire) { 1472 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1473 ompt_mutex_critical, (unsigned int)hint, 1474 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1475 codeptr); 1476 } 1477 } 1478 #endif 1479 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1480 } 1481 KMP_POP_PARTITIONED_TIMER(); 1482 1483 #if USE_ITT_BUILD 1484 __kmp_itt_critical_acquired(lck); 1485 #endif /* USE_ITT_BUILD */ 1486 #if OMPT_SUPPORT && OMPT_OPTIONAL 1487 if (ompt_enabled.enabled) { 1488 /* OMPT state update */ 1489 ti.state = prev_state; 1490 ti.wait_id = 0; 1491 1492 /* OMPT event callback */ 1493 if (ompt_enabled.ompt_callback_mutex_acquired) { 1494 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1495 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1496 } 1497 } 1498 #endif 1499 1500 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1501 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1502 } // __kmpc_critical_with_hint 1503 1504 #endif // KMP_USE_DYNAMIC_LOCK 1505 1506 /*! 1507 @ingroup WORK_SHARING 1508 @param loc source location information. 1509 @param global_tid global thread number . 1510 @param crit identity of the critical section. This could be a pointer to a lock 1511 associated with the critical section, or some other suitably unique value. 1512 1513 Leave a critical section, releasing any lock that was held during its execution. 1514 */ 1515 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1516 kmp_critical_name *crit) { 1517 kmp_user_lock_p lck; 1518 1519 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1520 1521 #if KMP_USE_DYNAMIC_LOCK 1522 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1523 lck = (kmp_user_lock_p)crit; 1524 KMP_ASSERT(lck != NULL); 1525 if (__kmp_env_consistency_check) { 1526 __kmp_pop_sync(global_tid, ct_critical, loc); 1527 } 1528 #if USE_ITT_BUILD 1529 __kmp_itt_critical_releasing(lck); 1530 #endif 1531 #if KMP_USE_INLINED_TAS 1532 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1533 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1534 } else 1535 #elif KMP_USE_INLINED_FUTEX 1536 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1537 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1538 } else 1539 #endif 1540 { 1541 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1542 } 1543 } else { 1544 kmp_indirect_lock_t *ilk = 1545 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1546 KMP_ASSERT(ilk != NULL); 1547 lck = ilk->lock; 1548 if (__kmp_env_consistency_check) { 1549 __kmp_pop_sync(global_tid, ct_critical, loc); 1550 } 1551 #if USE_ITT_BUILD 1552 __kmp_itt_critical_releasing(lck); 1553 #endif 1554 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1555 } 1556 1557 #else // KMP_USE_DYNAMIC_LOCK 1558 1559 if ((__kmp_user_lock_kind == lk_tas) && 1560 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1561 lck = (kmp_user_lock_p)crit; 1562 } 1563 #if KMP_USE_FUTEX 1564 else if ((__kmp_user_lock_kind == lk_futex) && 1565 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1566 lck = (kmp_user_lock_p)crit; 1567 } 1568 #endif 1569 else { // ticket, queuing or drdpa 1570 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1571 } 1572 1573 KMP_ASSERT(lck != NULL); 1574 1575 if (__kmp_env_consistency_check) 1576 __kmp_pop_sync(global_tid, ct_critical, loc); 1577 1578 #if USE_ITT_BUILD 1579 __kmp_itt_critical_releasing(lck); 1580 #endif /* USE_ITT_BUILD */ 1581 // Value of 'crit' should be good for using as a critical_id of the critical 1582 // section directive. 1583 __kmp_release_user_lock_with_checks(lck, global_tid); 1584 1585 #endif // KMP_USE_DYNAMIC_LOCK 1586 1587 #if OMPT_SUPPORT && OMPT_OPTIONAL 1588 /* OMPT release event triggers after lock is released; place here to trigger 1589 * for all #if branches */ 1590 OMPT_STORE_RETURN_ADDRESS(global_tid); 1591 if (ompt_enabled.ompt_callback_mutex_released) { 1592 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1593 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1594 OMPT_LOAD_RETURN_ADDRESS(0)); 1595 } 1596 #endif 1597 1598 KMP_POP_PARTITIONED_TIMER(); 1599 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1600 } 1601 1602 /*! 1603 @ingroup SYNCHRONIZATION 1604 @param loc source location information 1605 @param global_tid thread id. 1606 @return one if the thread should execute the master block, zero otherwise 1607 1608 Start execution of a combined barrier and master. The barrier is executed inside 1609 this function. 1610 */ 1611 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1612 int status; 1613 1614 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1615 1616 if (!TCR_4(__kmp_init_parallel)) 1617 __kmp_parallel_initialize(); 1618 1619 __kmp_resume_if_soft_paused(); 1620 1621 if (__kmp_env_consistency_check) 1622 __kmp_check_barrier(global_tid, ct_barrier, loc); 1623 1624 #if OMPT_SUPPORT 1625 ompt_frame_t *ompt_frame; 1626 if (ompt_enabled.enabled) { 1627 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1628 if (ompt_frame->enter_frame.ptr == NULL) 1629 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1630 OMPT_STORE_RETURN_ADDRESS(global_tid); 1631 } 1632 #endif 1633 #if USE_ITT_NOTIFY 1634 __kmp_threads[global_tid]->th.th_ident = loc; 1635 #endif 1636 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1637 #if OMPT_SUPPORT && OMPT_OPTIONAL 1638 if (ompt_enabled.enabled) { 1639 ompt_frame->enter_frame = ompt_data_none; 1640 } 1641 #endif 1642 1643 return (status != 0) ? 0 : 1; 1644 } 1645 1646 /*! 1647 @ingroup SYNCHRONIZATION 1648 @param loc source location information 1649 @param global_tid thread id. 1650 1651 Complete the execution of a combined barrier and master. This function should 1652 only be called at the completion of the <tt>master</tt> code. Other threads will 1653 still be waiting at the barrier and this call releases them. 1654 */ 1655 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1656 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1657 1658 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1659 } 1660 1661 /*! 1662 @ingroup SYNCHRONIZATION 1663 @param loc source location information 1664 @param global_tid thread id. 1665 @return one if the thread should execute the master block, zero otherwise 1666 1667 Start execution of a combined barrier and master(nowait) construct. 1668 The barrier is executed inside this function. 1669 There is no equivalent "end" function, since the 1670 */ 1671 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1672 kmp_int32 ret; 1673 1674 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1675 1676 if (!TCR_4(__kmp_init_parallel)) 1677 __kmp_parallel_initialize(); 1678 1679 __kmp_resume_if_soft_paused(); 1680 1681 if (__kmp_env_consistency_check) { 1682 if (loc == 0) { 1683 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1684 } 1685 __kmp_check_barrier(global_tid, ct_barrier, loc); 1686 } 1687 1688 #if OMPT_SUPPORT 1689 ompt_frame_t *ompt_frame; 1690 if (ompt_enabled.enabled) { 1691 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1692 if (ompt_frame->enter_frame.ptr == NULL) 1693 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1694 OMPT_STORE_RETURN_ADDRESS(global_tid); 1695 } 1696 #endif 1697 #if USE_ITT_NOTIFY 1698 __kmp_threads[global_tid]->th.th_ident = loc; 1699 #endif 1700 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1701 #if OMPT_SUPPORT && OMPT_OPTIONAL 1702 if (ompt_enabled.enabled) { 1703 ompt_frame->enter_frame = ompt_data_none; 1704 } 1705 #endif 1706 1707 ret = __kmpc_master(loc, global_tid); 1708 1709 if (__kmp_env_consistency_check) { 1710 /* there's no __kmpc_end_master called; so the (stats) */ 1711 /* actions of __kmpc_end_master are done here */ 1712 1713 if (global_tid < 0) { 1714 KMP_WARNING(ThreadIdentInvalid); 1715 } 1716 if (ret) { 1717 /* only one thread should do the pop since only */ 1718 /* one did the push (see __kmpc_master()) */ 1719 1720 __kmp_pop_sync(global_tid, ct_master, loc); 1721 } 1722 } 1723 1724 return (ret); 1725 } 1726 1727 /* The BARRIER for a SINGLE process section is always explicit */ 1728 /*! 1729 @ingroup WORK_SHARING 1730 @param loc source location information 1731 @param global_tid global thread number 1732 @return One if this thread should execute the single construct, zero otherwise. 1733 1734 Test whether to execute a <tt>single</tt> construct. 1735 There are no implicit barriers in the two "single" calls, rather the compiler 1736 should introduce an explicit barrier if it is required. 1737 */ 1738 1739 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1740 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1741 1742 if (rc) { 1743 // We are going to execute the single statement, so we should count it. 1744 KMP_COUNT_BLOCK(OMP_SINGLE); 1745 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1746 } 1747 1748 #if OMPT_SUPPORT && OMPT_OPTIONAL 1749 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1750 kmp_team_t *team = this_thr->th.th_team; 1751 int tid = __kmp_tid_from_gtid(global_tid); 1752 1753 if (ompt_enabled.enabled) { 1754 if (rc) { 1755 if (ompt_enabled.ompt_callback_work) { 1756 ompt_callbacks.ompt_callback(ompt_callback_work)( 1757 ompt_work_single_executor, ompt_scope_begin, 1758 &(team->t.ompt_team_info.parallel_data), 1759 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1760 1, OMPT_GET_RETURN_ADDRESS(0)); 1761 } 1762 } else { 1763 if (ompt_enabled.ompt_callback_work) { 1764 ompt_callbacks.ompt_callback(ompt_callback_work)( 1765 ompt_work_single_other, ompt_scope_begin, 1766 &(team->t.ompt_team_info.parallel_data), 1767 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1768 1, OMPT_GET_RETURN_ADDRESS(0)); 1769 ompt_callbacks.ompt_callback(ompt_callback_work)( 1770 ompt_work_single_other, ompt_scope_end, 1771 &(team->t.ompt_team_info.parallel_data), 1772 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1773 1, OMPT_GET_RETURN_ADDRESS(0)); 1774 } 1775 } 1776 } 1777 #endif 1778 1779 return rc; 1780 } 1781 1782 /*! 1783 @ingroup WORK_SHARING 1784 @param loc source location information 1785 @param global_tid global thread number 1786 1787 Mark the end of a <tt>single</tt> construct. This function should 1788 only be called by the thread that executed the block of code protected 1789 by the `single` construct. 1790 */ 1791 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1792 __kmp_exit_single(global_tid); 1793 KMP_POP_PARTITIONED_TIMER(); 1794 1795 #if OMPT_SUPPORT && OMPT_OPTIONAL 1796 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1797 kmp_team_t *team = this_thr->th.th_team; 1798 int tid = __kmp_tid_from_gtid(global_tid); 1799 1800 if (ompt_enabled.ompt_callback_work) { 1801 ompt_callbacks.ompt_callback(ompt_callback_work)( 1802 ompt_work_single_executor, ompt_scope_end, 1803 &(team->t.ompt_team_info.parallel_data), 1804 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1805 OMPT_GET_RETURN_ADDRESS(0)); 1806 } 1807 #endif 1808 } 1809 1810 /*! 1811 @ingroup WORK_SHARING 1812 @param loc Source location 1813 @param global_tid Global thread id 1814 1815 Mark the end of a statically scheduled loop. 1816 */ 1817 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1818 KMP_POP_PARTITIONED_TIMER(); 1819 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1820 1821 #if OMPT_SUPPORT && OMPT_OPTIONAL 1822 if (ompt_enabled.ompt_callback_work) { 1823 ompt_work_t ompt_work_type = ompt_work_loop; 1824 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1825 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1826 // Determine workshare type 1827 if (loc != NULL) { 1828 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1829 ompt_work_type = ompt_work_loop; 1830 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1831 ompt_work_type = ompt_work_sections; 1832 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1833 ompt_work_type = ompt_work_distribute; 1834 } else { 1835 // use default set above. 1836 // a warning about this case is provided in __kmpc_for_static_init 1837 } 1838 KMP_DEBUG_ASSERT(ompt_work_type); 1839 } 1840 ompt_callbacks.ompt_callback(ompt_callback_work)( 1841 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1842 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1843 } 1844 #endif 1845 if (__kmp_env_consistency_check) 1846 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1847 } 1848 1849 // User routines which take C-style arguments (call by value) 1850 // different from the Fortran equivalent routines 1851 1852 void ompc_set_num_threads(int arg) { 1853 // !!!!! TODO: check the per-task binding 1854 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1855 } 1856 1857 void ompc_set_dynamic(int flag) { 1858 kmp_info_t *thread; 1859 1860 /* For the thread-private implementation of the internal controls */ 1861 thread = __kmp_entry_thread(); 1862 1863 __kmp_save_internal_controls(thread); 1864 1865 set__dynamic(thread, flag ? TRUE : FALSE); 1866 } 1867 1868 void ompc_set_nested(int flag) { 1869 kmp_info_t *thread; 1870 1871 /* For the thread-private internal controls implementation */ 1872 thread = __kmp_entry_thread(); 1873 1874 __kmp_save_internal_controls(thread); 1875 1876 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1877 } 1878 1879 void ompc_set_max_active_levels(int max_active_levels) { 1880 /* TO DO */ 1881 /* we want per-task implementation of this internal control */ 1882 1883 /* For the per-thread internal controls implementation */ 1884 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1885 } 1886 1887 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1888 // !!!!! TODO: check the per-task binding 1889 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1890 } 1891 1892 int ompc_get_ancestor_thread_num(int level) { 1893 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1894 } 1895 1896 int ompc_get_team_size(int level) { 1897 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1898 } 1899 1900 /* OpenMP 5.0 Affinity Format API */ 1901 1902 void ompc_set_affinity_format(char const *format) { 1903 if (!__kmp_init_serial) { 1904 __kmp_serial_initialize(); 1905 } 1906 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1907 format, KMP_STRLEN(format) + 1); 1908 } 1909 1910 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1911 size_t format_size; 1912 if (!__kmp_init_serial) { 1913 __kmp_serial_initialize(); 1914 } 1915 format_size = KMP_STRLEN(__kmp_affinity_format); 1916 if (buffer && size) { 1917 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1918 format_size + 1); 1919 } 1920 return format_size; 1921 } 1922 1923 void ompc_display_affinity(char const *format) { 1924 int gtid; 1925 if (!TCR_4(__kmp_init_middle)) { 1926 __kmp_middle_initialize(); 1927 } 1928 gtid = __kmp_get_gtid(); 1929 __kmp_aux_display_affinity(gtid, format); 1930 } 1931 1932 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1933 char const *format) { 1934 int gtid; 1935 size_t num_required; 1936 kmp_str_buf_t capture_buf; 1937 if (!TCR_4(__kmp_init_middle)) { 1938 __kmp_middle_initialize(); 1939 } 1940 gtid = __kmp_get_gtid(); 1941 __kmp_str_buf_init(&capture_buf); 1942 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1943 if (buffer && buf_size) { 1944 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1945 capture_buf.used + 1); 1946 } 1947 __kmp_str_buf_free(&capture_buf); 1948 return num_required; 1949 } 1950 1951 void kmpc_set_stacksize(int arg) { 1952 // __kmp_aux_set_stacksize initializes the library if needed 1953 __kmp_aux_set_stacksize(arg); 1954 } 1955 1956 void kmpc_set_stacksize_s(size_t arg) { 1957 // __kmp_aux_set_stacksize initializes the library if needed 1958 __kmp_aux_set_stacksize(arg); 1959 } 1960 1961 void kmpc_set_blocktime(int arg) { 1962 int gtid, tid; 1963 kmp_info_t *thread; 1964 1965 gtid = __kmp_entry_gtid(); 1966 tid = __kmp_tid_from_gtid(gtid); 1967 thread = __kmp_thread_from_gtid(gtid); 1968 1969 __kmp_aux_set_blocktime(arg, thread, tid); 1970 } 1971 1972 void kmpc_set_library(int arg) { 1973 // __kmp_user_set_library initializes the library if needed 1974 __kmp_user_set_library((enum library_type)arg); 1975 } 1976 1977 void kmpc_set_defaults(char const *str) { 1978 // __kmp_aux_set_defaults initializes the library if needed 1979 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1980 } 1981 1982 void kmpc_set_disp_num_buffers(int arg) { 1983 // ignore after initialization because some teams have already 1984 // allocated dispatch buffers 1985 if (__kmp_init_serial == 0 && arg > 0) 1986 __kmp_dispatch_num_buffers = arg; 1987 } 1988 1989 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1990 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1991 return -1; 1992 #else 1993 if (!TCR_4(__kmp_init_middle)) { 1994 __kmp_middle_initialize(); 1995 } 1996 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1997 #endif 1998 } 1999 2000 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2001 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2002 return -1; 2003 #else 2004 if (!TCR_4(__kmp_init_middle)) { 2005 __kmp_middle_initialize(); 2006 } 2007 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2008 #endif 2009 } 2010 2011 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2012 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2013 return -1; 2014 #else 2015 if (!TCR_4(__kmp_init_middle)) { 2016 __kmp_middle_initialize(); 2017 } 2018 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2019 #endif 2020 } 2021 2022 /* -------------------------------------------------------------------------- */ 2023 /*! 2024 @ingroup THREADPRIVATE 2025 @param loc source location information 2026 @param gtid global thread number 2027 @param cpy_size size of the cpy_data buffer 2028 @param cpy_data pointer to data to be copied 2029 @param cpy_func helper function to call for copying data 2030 @param didit flag variable: 1=single thread; 0=not single thread 2031 2032 __kmpc_copyprivate implements the interface for the private data broadcast 2033 needed for the copyprivate clause associated with a single region in an 2034 OpenMP<sup>*</sup> program (both C and Fortran). 2035 All threads participating in the parallel region call this routine. 2036 One of the threads (called the single thread) should have the <tt>didit</tt> 2037 variable set to 1 and all other threads should have that variable set to 0. 2038 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2039 2040 The OpenMP specification forbids the use of nowait on the single region when a 2041 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2042 barrier internally to avoid race conditions, so the code generation for the 2043 single region should avoid generating a barrier after the call to @ref 2044 __kmpc_copyprivate. 2045 2046 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2047 The <tt>loc</tt> parameter is a pointer to source location information. 2048 2049 Internal implementation: The single thread will first copy its descriptor 2050 address (cpy_data) to a team-private location, then the other threads will each 2051 call the function pointed to by the parameter cpy_func, which carries out the 2052 copy by copying the data using the cpy_data buffer. 2053 2054 The cpy_func routine used for the copy and the contents of the data area defined 2055 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2056 to be done. For instance, the cpy_data buffer can hold the actual data to be 2057 copied or it may hold a list of pointers to the data. The cpy_func routine must 2058 interpret the cpy_data buffer appropriately. 2059 2060 The interface to cpy_func is as follows: 2061 @code 2062 void cpy_func( void *destination, void *source ) 2063 @endcode 2064 where void *destination is the cpy_data pointer for the thread being copied to 2065 and void *source is the cpy_data pointer for the thread being copied from. 2066 */ 2067 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2068 void *cpy_data, void (*cpy_func)(void *, void *), 2069 kmp_int32 didit) { 2070 void **data_ptr; 2071 2072 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2073 2074 KMP_MB(); 2075 2076 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2077 2078 if (__kmp_env_consistency_check) { 2079 if (loc == 0) { 2080 KMP_WARNING(ConstructIdentInvalid); 2081 } 2082 } 2083 2084 // ToDo: Optimize the following two barriers into some kind of split barrier 2085 2086 if (didit) 2087 *data_ptr = cpy_data; 2088 2089 #if OMPT_SUPPORT 2090 ompt_frame_t *ompt_frame; 2091 if (ompt_enabled.enabled) { 2092 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2093 if (ompt_frame->enter_frame.ptr == NULL) 2094 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2095 OMPT_STORE_RETURN_ADDRESS(gtid); 2096 } 2097 #endif 2098 /* This barrier is not a barrier region boundary */ 2099 #if USE_ITT_NOTIFY 2100 __kmp_threads[gtid]->th.th_ident = loc; 2101 #endif 2102 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2103 2104 if (!didit) 2105 (*cpy_func)(cpy_data, *data_ptr); 2106 2107 // Consider next barrier a user-visible barrier for barrier region boundaries 2108 // Nesting checks are already handled by the single construct checks 2109 2110 #if OMPT_SUPPORT 2111 if (ompt_enabled.enabled) { 2112 OMPT_STORE_RETURN_ADDRESS(gtid); 2113 } 2114 #endif 2115 #if USE_ITT_NOTIFY 2116 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2117 // tasks can overwrite the location) 2118 #endif 2119 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2120 #if OMPT_SUPPORT && OMPT_OPTIONAL 2121 if (ompt_enabled.enabled) { 2122 ompt_frame->enter_frame = ompt_data_none; 2123 } 2124 #endif 2125 } 2126 2127 /* -------------------------------------------------------------------------- */ 2128 2129 #define INIT_LOCK __kmp_init_user_lock_with_checks 2130 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2131 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2132 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2133 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2134 #define ACQUIRE_NESTED_LOCK_TIMED \ 2135 __kmp_acquire_nested_user_lock_with_checks_timed 2136 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2137 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2138 #define TEST_LOCK __kmp_test_user_lock_with_checks 2139 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2140 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2141 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2142 2143 // TODO: Make check abort messages use location info & pass it into 2144 // with_checks routines 2145 2146 #if KMP_USE_DYNAMIC_LOCK 2147 2148 // internal lock initializer 2149 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2150 kmp_dyna_lockseq_t seq) { 2151 if (KMP_IS_D_LOCK(seq)) { 2152 KMP_INIT_D_LOCK(lock, seq); 2153 #if USE_ITT_BUILD 2154 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2155 #endif 2156 } else { 2157 KMP_INIT_I_LOCK(lock, seq); 2158 #if USE_ITT_BUILD 2159 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2160 __kmp_itt_lock_creating(ilk->lock, loc); 2161 #endif 2162 } 2163 } 2164 2165 // internal nest lock initializer 2166 static __forceinline void 2167 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2168 kmp_dyna_lockseq_t seq) { 2169 #if KMP_USE_TSX 2170 // Don't have nested lock implementation for speculative locks 2171 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2172 seq = __kmp_user_lock_seq; 2173 #endif 2174 switch (seq) { 2175 case lockseq_tas: 2176 seq = lockseq_nested_tas; 2177 break; 2178 #if KMP_USE_FUTEX 2179 case lockseq_futex: 2180 seq = lockseq_nested_futex; 2181 break; 2182 #endif 2183 case lockseq_ticket: 2184 seq = lockseq_nested_ticket; 2185 break; 2186 case lockseq_queuing: 2187 seq = lockseq_nested_queuing; 2188 break; 2189 case lockseq_drdpa: 2190 seq = lockseq_nested_drdpa; 2191 break; 2192 default: 2193 seq = lockseq_nested_queuing; 2194 } 2195 KMP_INIT_I_LOCK(lock, seq); 2196 #if USE_ITT_BUILD 2197 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2198 __kmp_itt_lock_creating(ilk->lock, loc); 2199 #endif 2200 } 2201 2202 /* initialize the lock with a hint */ 2203 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2204 uintptr_t hint) { 2205 KMP_DEBUG_ASSERT(__kmp_init_serial); 2206 if (__kmp_env_consistency_check && user_lock == NULL) { 2207 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2208 } 2209 2210 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2211 2212 #if OMPT_SUPPORT && OMPT_OPTIONAL 2213 // This is the case, if called from omp_init_lock_with_hint: 2214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2215 if (!codeptr) 2216 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2217 if (ompt_enabled.ompt_callback_lock_init) { 2218 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2219 ompt_mutex_lock, (omp_lock_hint_t)hint, 2220 __ompt_get_mutex_impl_type(user_lock), 2221 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2222 } 2223 #endif 2224 } 2225 2226 /* initialize the lock with a hint */ 2227 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2228 void **user_lock, uintptr_t hint) { 2229 KMP_DEBUG_ASSERT(__kmp_init_serial); 2230 if (__kmp_env_consistency_check && user_lock == NULL) { 2231 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2232 } 2233 2234 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2235 2236 #if OMPT_SUPPORT && OMPT_OPTIONAL 2237 // This is the case, if called from omp_init_lock_with_hint: 2238 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2239 if (!codeptr) 2240 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2241 if (ompt_enabled.ompt_callback_lock_init) { 2242 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2243 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2244 __ompt_get_mutex_impl_type(user_lock), 2245 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2246 } 2247 #endif 2248 } 2249 2250 #endif // KMP_USE_DYNAMIC_LOCK 2251 2252 /* initialize the lock */ 2253 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2254 #if KMP_USE_DYNAMIC_LOCK 2255 2256 KMP_DEBUG_ASSERT(__kmp_init_serial); 2257 if (__kmp_env_consistency_check && user_lock == NULL) { 2258 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2259 } 2260 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2261 2262 #if OMPT_SUPPORT && OMPT_OPTIONAL 2263 // This is the case, if called from omp_init_lock_with_hint: 2264 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2265 if (!codeptr) 2266 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2267 if (ompt_enabled.ompt_callback_lock_init) { 2268 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2269 ompt_mutex_lock, omp_lock_hint_none, 2270 __ompt_get_mutex_impl_type(user_lock), 2271 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2272 } 2273 #endif 2274 2275 #else // KMP_USE_DYNAMIC_LOCK 2276 2277 static char const *const func = "omp_init_lock"; 2278 kmp_user_lock_p lck; 2279 KMP_DEBUG_ASSERT(__kmp_init_serial); 2280 2281 if (__kmp_env_consistency_check) { 2282 if (user_lock == NULL) { 2283 KMP_FATAL(LockIsUninitialized, func); 2284 } 2285 } 2286 2287 KMP_CHECK_USER_LOCK_INIT(); 2288 2289 if ((__kmp_user_lock_kind == lk_tas) && 2290 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2291 lck = (kmp_user_lock_p)user_lock; 2292 } 2293 #if KMP_USE_FUTEX 2294 else if ((__kmp_user_lock_kind == lk_futex) && 2295 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2296 lck = (kmp_user_lock_p)user_lock; 2297 } 2298 #endif 2299 else { 2300 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2301 } 2302 INIT_LOCK(lck); 2303 __kmp_set_user_lock_location(lck, loc); 2304 2305 #if OMPT_SUPPORT && OMPT_OPTIONAL 2306 // This is the case, if called from omp_init_lock_with_hint: 2307 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2308 if (!codeptr) 2309 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2310 if (ompt_enabled.ompt_callback_lock_init) { 2311 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2312 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2313 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2314 } 2315 #endif 2316 2317 #if USE_ITT_BUILD 2318 __kmp_itt_lock_creating(lck); 2319 #endif /* USE_ITT_BUILD */ 2320 2321 #endif // KMP_USE_DYNAMIC_LOCK 2322 } // __kmpc_init_lock 2323 2324 /* initialize the lock */ 2325 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2326 #if KMP_USE_DYNAMIC_LOCK 2327 2328 KMP_DEBUG_ASSERT(__kmp_init_serial); 2329 if (__kmp_env_consistency_check && user_lock == NULL) { 2330 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2331 } 2332 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2333 2334 #if OMPT_SUPPORT && OMPT_OPTIONAL 2335 // This is the case, if called from omp_init_lock_with_hint: 2336 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2337 if (!codeptr) 2338 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2339 if (ompt_enabled.ompt_callback_lock_init) { 2340 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2341 ompt_mutex_nest_lock, omp_lock_hint_none, 2342 __ompt_get_mutex_impl_type(user_lock), 2343 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2344 } 2345 #endif 2346 2347 #else // KMP_USE_DYNAMIC_LOCK 2348 2349 static char const *const func = "omp_init_nest_lock"; 2350 kmp_user_lock_p lck; 2351 KMP_DEBUG_ASSERT(__kmp_init_serial); 2352 2353 if (__kmp_env_consistency_check) { 2354 if (user_lock == NULL) { 2355 KMP_FATAL(LockIsUninitialized, func); 2356 } 2357 } 2358 2359 KMP_CHECK_USER_LOCK_INIT(); 2360 2361 if ((__kmp_user_lock_kind == lk_tas) && 2362 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2363 OMP_NEST_LOCK_T_SIZE)) { 2364 lck = (kmp_user_lock_p)user_lock; 2365 } 2366 #if KMP_USE_FUTEX 2367 else if ((__kmp_user_lock_kind == lk_futex) && 2368 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2369 OMP_NEST_LOCK_T_SIZE)) { 2370 lck = (kmp_user_lock_p)user_lock; 2371 } 2372 #endif 2373 else { 2374 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2375 } 2376 2377 INIT_NESTED_LOCK(lck); 2378 __kmp_set_user_lock_location(lck, loc); 2379 2380 #if OMPT_SUPPORT && OMPT_OPTIONAL 2381 // This is the case, if called from omp_init_lock_with_hint: 2382 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2383 if (!codeptr) 2384 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2385 if (ompt_enabled.ompt_callback_lock_init) { 2386 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2387 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2388 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2389 } 2390 #endif 2391 2392 #if USE_ITT_BUILD 2393 __kmp_itt_lock_creating(lck); 2394 #endif /* USE_ITT_BUILD */ 2395 2396 #endif // KMP_USE_DYNAMIC_LOCK 2397 } // __kmpc_init_nest_lock 2398 2399 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2400 #if KMP_USE_DYNAMIC_LOCK 2401 2402 #if USE_ITT_BUILD 2403 kmp_user_lock_p lck; 2404 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2405 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2406 } else { 2407 lck = (kmp_user_lock_p)user_lock; 2408 } 2409 __kmp_itt_lock_destroyed(lck); 2410 #endif 2411 #if OMPT_SUPPORT && OMPT_OPTIONAL 2412 // This is the case, if called from omp_init_lock_with_hint: 2413 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2414 if (!codeptr) 2415 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2416 if (ompt_enabled.ompt_callback_lock_destroy) { 2417 kmp_user_lock_p lck; 2418 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2419 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2420 } else { 2421 lck = (kmp_user_lock_p)user_lock; 2422 } 2423 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2424 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2425 } 2426 #endif 2427 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2428 #else 2429 kmp_user_lock_p lck; 2430 2431 if ((__kmp_user_lock_kind == lk_tas) && 2432 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2433 lck = (kmp_user_lock_p)user_lock; 2434 } 2435 #if KMP_USE_FUTEX 2436 else if ((__kmp_user_lock_kind == lk_futex) && 2437 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2438 lck = (kmp_user_lock_p)user_lock; 2439 } 2440 #endif 2441 else { 2442 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2443 } 2444 2445 #if OMPT_SUPPORT && OMPT_OPTIONAL 2446 // This is the case, if called from omp_init_lock_with_hint: 2447 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2448 if (!codeptr) 2449 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2450 if (ompt_enabled.ompt_callback_lock_destroy) { 2451 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2452 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2453 } 2454 #endif 2455 2456 #if USE_ITT_BUILD 2457 __kmp_itt_lock_destroyed(lck); 2458 #endif /* USE_ITT_BUILD */ 2459 DESTROY_LOCK(lck); 2460 2461 if ((__kmp_user_lock_kind == lk_tas) && 2462 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2463 ; 2464 } 2465 #if KMP_USE_FUTEX 2466 else if ((__kmp_user_lock_kind == lk_futex) && 2467 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2468 ; 2469 } 2470 #endif 2471 else { 2472 __kmp_user_lock_free(user_lock, gtid, lck); 2473 } 2474 #endif // KMP_USE_DYNAMIC_LOCK 2475 } // __kmpc_destroy_lock 2476 2477 /* destroy the lock */ 2478 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2479 #if KMP_USE_DYNAMIC_LOCK 2480 2481 #if USE_ITT_BUILD 2482 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2483 __kmp_itt_lock_destroyed(ilk->lock); 2484 #endif 2485 #if OMPT_SUPPORT && OMPT_OPTIONAL 2486 // This is the case, if called from omp_init_lock_with_hint: 2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2488 if (!codeptr) 2489 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2490 if (ompt_enabled.ompt_callback_lock_destroy) { 2491 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2492 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2493 } 2494 #endif 2495 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2496 2497 #else // KMP_USE_DYNAMIC_LOCK 2498 2499 kmp_user_lock_p lck; 2500 2501 if ((__kmp_user_lock_kind == lk_tas) && 2502 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2503 OMP_NEST_LOCK_T_SIZE)) { 2504 lck = (kmp_user_lock_p)user_lock; 2505 } 2506 #if KMP_USE_FUTEX 2507 else if ((__kmp_user_lock_kind == lk_futex) && 2508 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2509 OMP_NEST_LOCK_T_SIZE)) { 2510 lck = (kmp_user_lock_p)user_lock; 2511 } 2512 #endif 2513 else { 2514 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2515 } 2516 2517 #if OMPT_SUPPORT && OMPT_OPTIONAL 2518 // This is the case, if called from omp_init_lock_with_hint: 2519 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2520 if (!codeptr) 2521 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2522 if (ompt_enabled.ompt_callback_lock_destroy) { 2523 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2524 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2525 } 2526 #endif 2527 2528 #if USE_ITT_BUILD 2529 __kmp_itt_lock_destroyed(lck); 2530 #endif /* USE_ITT_BUILD */ 2531 2532 DESTROY_NESTED_LOCK(lck); 2533 2534 if ((__kmp_user_lock_kind == lk_tas) && 2535 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2536 OMP_NEST_LOCK_T_SIZE)) { 2537 ; 2538 } 2539 #if KMP_USE_FUTEX 2540 else if ((__kmp_user_lock_kind == lk_futex) && 2541 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2542 OMP_NEST_LOCK_T_SIZE)) { 2543 ; 2544 } 2545 #endif 2546 else { 2547 __kmp_user_lock_free(user_lock, gtid, lck); 2548 } 2549 #endif // KMP_USE_DYNAMIC_LOCK 2550 } // __kmpc_destroy_nest_lock 2551 2552 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2553 KMP_COUNT_BLOCK(OMP_set_lock); 2554 #if KMP_USE_DYNAMIC_LOCK 2555 int tag = KMP_EXTRACT_D_TAG(user_lock); 2556 #if USE_ITT_BUILD 2557 __kmp_itt_lock_acquiring( 2558 (kmp_user_lock_p) 2559 user_lock); // itt function will get to the right lock object. 2560 #endif 2561 #if OMPT_SUPPORT && OMPT_OPTIONAL 2562 // This is the case, if called from omp_init_lock_with_hint: 2563 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2564 if (!codeptr) 2565 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2566 if (ompt_enabled.ompt_callback_mutex_acquire) { 2567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2568 ompt_mutex_lock, omp_lock_hint_none, 2569 __ompt_get_mutex_impl_type(user_lock), 2570 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2571 } 2572 #endif 2573 #if KMP_USE_INLINED_TAS 2574 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2575 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2576 } else 2577 #elif KMP_USE_INLINED_FUTEX 2578 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2579 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2580 } else 2581 #endif 2582 { 2583 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2584 } 2585 #if USE_ITT_BUILD 2586 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2587 #endif 2588 #if OMPT_SUPPORT && OMPT_OPTIONAL 2589 if (ompt_enabled.ompt_callback_mutex_acquired) { 2590 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2591 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2592 } 2593 #endif 2594 2595 #else // KMP_USE_DYNAMIC_LOCK 2596 2597 kmp_user_lock_p lck; 2598 2599 if ((__kmp_user_lock_kind == lk_tas) && 2600 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2601 lck = (kmp_user_lock_p)user_lock; 2602 } 2603 #if KMP_USE_FUTEX 2604 else if ((__kmp_user_lock_kind == lk_futex) && 2605 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2606 lck = (kmp_user_lock_p)user_lock; 2607 } 2608 #endif 2609 else { 2610 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2611 } 2612 2613 #if USE_ITT_BUILD 2614 __kmp_itt_lock_acquiring(lck); 2615 #endif /* USE_ITT_BUILD */ 2616 #if OMPT_SUPPORT && OMPT_OPTIONAL 2617 // This is the case, if called from omp_init_lock_with_hint: 2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2619 if (!codeptr) 2620 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2621 if (ompt_enabled.ompt_callback_mutex_acquire) { 2622 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2623 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2624 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2625 } 2626 #endif 2627 2628 ACQUIRE_LOCK(lck, gtid); 2629 2630 #if USE_ITT_BUILD 2631 __kmp_itt_lock_acquired(lck); 2632 #endif /* USE_ITT_BUILD */ 2633 2634 #if OMPT_SUPPORT && OMPT_OPTIONAL 2635 if (ompt_enabled.ompt_callback_mutex_acquired) { 2636 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2637 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2638 } 2639 #endif 2640 2641 #endif // KMP_USE_DYNAMIC_LOCK 2642 } 2643 2644 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2645 #if KMP_USE_DYNAMIC_LOCK 2646 2647 #if USE_ITT_BUILD 2648 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2649 #endif 2650 #if OMPT_SUPPORT && OMPT_OPTIONAL 2651 // This is the case, if called from omp_init_lock_with_hint: 2652 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2653 if (!codeptr) 2654 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2655 if (ompt_enabled.enabled) { 2656 if (ompt_enabled.ompt_callback_mutex_acquire) { 2657 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2658 ompt_mutex_nest_lock, omp_lock_hint_none, 2659 __ompt_get_mutex_impl_type(user_lock), 2660 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2661 } 2662 } 2663 #endif 2664 int acquire_status = 2665 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2666 (void) acquire_status; 2667 #if USE_ITT_BUILD 2668 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2669 #endif 2670 2671 #if OMPT_SUPPORT && OMPT_OPTIONAL 2672 if (ompt_enabled.enabled) { 2673 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2674 if (ompt_enabled.ompt_callback_mutex_acquired) { 2675 // lock_first 2676 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2677 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2678 codeptr); 2679 } 2680 } else { 2681 if (ompt_enabled.ompt_callback_nest_lock) { 2682 // lock_next 2683 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2684 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2685 } 2686 } 2687 } 2688 #endif 2689 2690 #else // KMP_USE_DYNAMIC_LOCK 2691 int acquire_status; 2692 kmp_user_lock_p lck; 2693 2694 if ((__kmp_user_lock_kind == lk_tas) && 2695 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2696 OMP_NEST_LOCK_T_SIZE)) { 2697 lck = (kmp_user_lock_p)user_lock; 2698 } 2699 #if KMP_USE_FUTEX 2700 else if ((__kmp_user_lock_kind == lk_futex) && 2701 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2702 OMP_NEST_LOCK_T_SIZE)) { 2703 lck = (kmp_user_lock_p)user_lock; 2704 } 2705 #endif 2706 else { 2707 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2708 } 2709 2710 #if USE_ITT_BUILD 2711 __kmp_itt_lock_acquiring(lck); 2712 #endif /* USE_ITT_BUILD */ 2713 #if OMPT_SUPPORT && OMPT_OPTIONAL 2714 // This is the case, if called from omp_init_lock_with_hint: 2715 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2716 if (!codeptr) 2717 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2718 if (ompt_enabled.enabled) { 2719 if (ompt_enabled.ompt_callback_mutex_acquire) { 2720 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2721 ompt_mutex_nest_lock, omp_lock_hint_none, 2722 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2723 codeptr); 2724 } 2725 } 2726 #endif 2727 2728 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2729 2730 #if USE_ITT_BUILD 2731 __kmp_itt_lock_acquired(lck); 2732 #endif /* USE_ITT_BUILD */ 2733 2734 #if OMPT_SUPPORT && OMPT_OPTIONAL 2735 if (ompt_enabled.enabled) { 2736 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2737 if (ompt_enabled.ompt_callback_mutex_acquired) { 2738 // lock_first 2739 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2740 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2741 } 2742 } else { 2743 if (ompt_enabled.ompt_callback_nest_lock) { 2744 // lock_next 2745 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2746 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2747 } 2748 } 2749 } 2750 #endif 2751 2752 #endif // KMP_USE_DYNAMIC_LOCK 2753 } 2754 2755 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2756 #if KMP_USE_DYNAMIC_LOCK 2757 2758 int tag = KMP_EXTRACT_D_TAG(user_lock); 2759 #if USE_ITT_BUILD 2760 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2761 #endif 2762 #if KMP_USE_INLINED_TAS 2763 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2764 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2765 } else 2766 #elif KMP_USE_INLINED_FUTEX 2767 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2768 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2769 } else 2770 #endif 2771 { 2772 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2773 } 2774 2775 #if OMPT_SUPPORT && OMPT_OPTIONAL 2776 // This is the case, if called from omp_init_lock_with_hint: 2777 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2778 if (!codeptr) 2779 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2780 if (ompt_enabled.ompt_callback_mutex_released) { 2781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2782 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2783 } 2784 #endif 2785 2786 #else // KMP_USE_DYNAMIC_LOCK 2787 2788 kmp_user_lock_p lck; 2789 2790 /* Can't use serial interval since not block structured */ 2791 /* release the lock */ 2792 2793 if ((__kmp_user_lock_kind == lk_tas) && 2794 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2795 #if KMP_OS_LINUX && \ 2796 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2797 // "fast" path implemented to fix customer performance issue 2798 #if USE_ITT_BUILD 2799 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2800 #endif /* USE_ITT_BUILD */ 2801 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2802 KMP_MB(); 2803 2804 #if OMPT_SUPPORT && OMPT_OPTIONAL 2805 // This is the case, if called from omp_init_lock_with_hint: 2806 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2807 if (!codeptr) 2808 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2809 if (ompt_enabled.ompt_callback_mutex_released) { 2810 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2811 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2812 } 2813 #endif 2814 2815 return; 2816 #else 2817 lck = (kmp_user_lock_p)user_lock; 2818 #endif 2819 } 2820 #if KMP_USE_FUTEX 2821 else if ((__kmp_user_lock_kind == lk_futex) && 2822 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2823 lck = (kmp_user_lock_p)user_lock; 2824 } 2825 #endif 2826 else { 2827 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2828 } 2829 2830 #if USE_ITT_BUILD 2831 __kmp_itt_lock_releasing(lck); 2832 #endif /* USE_ITT_BUILD */ 2833 2834 RELEASE_LOCK(lck, gtid); 2835 2836 #if OMPT_SUPPORT && OMPT_OPTIONAL 2837 // This is the case, if called from omp_init_lock_with_hint: 2838 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2839 if (!codeptr) 2840 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2841 if (ompt_enabled.ompt_callback_mutex_released) { 2842 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2843 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2844 } 2845 #endif 2846 2847 #endif // KMP_USE_DYNAMIC_LOCK 2848 } 2849 2850 /* release the lock */ 2851 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2852 #if KMP_USE_DYNAMIC_LOCK 2853 2854 #if USE_ITT_BUILD 2855 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2856 #endif 2857 int release_status = 2858 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2859 (void) release_status; 2860 2861 #if OMPT_SUPPORT && OMPT_OPTIONAL 2862 // This is the case, if called from omp_init_lock_with_hint: 2863 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2864 if (!codeptr) 2865 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2866 if (ompt_enabled.enabled) { 2867 if (release_status == KMP_LOCK_RELEASED) { 2868 if (ompt_enabled.ompt_callback_mutex_released) { 2869 // release_lock_last 2870 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2871 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2872 codeptr); 2873 } 2874 } else if (ompt_enabled.ompt_callback_nest_lock) { 2875 // release_lock_prev 2876 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2877 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2878 } 2879 } 2880 #endif 2881 2882 #else // KMP_USE_DYNAMIC_LOCK 2883 2884 kmp_user_lock_p lck; 2885 2886 /* Can't use serial interval since not block structured */ 2887 2888 if ((__kmp_user_lock_kind == lk_tas) && 2889 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2890 OMP_NEST_LOCK_T_SIZE)) { 2891 #if KMP_OS_LINUX && \ 2892 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2893 // "fast" path implemented to fix customer performance issue 2894 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2895 #if USE_ITT_BUILD 2896 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2897 #endif /* USE_ITT_BUILD */ 2898 2899 #if OMPT_SUPPORT && OMPT_OPTIONAL 2900 int release_status = KMP_LOCK_STILL_HELD; 2901 #endif 2902 2903 if (--(tl->lk.depth_locked) == 0) { 2904 TCW_4(tl->lk.poll, 0); 2905 #if OMPT_SUPPORT && OMPT_OPTIONAL 2906 release_status = KMP_LOCK_RELEASED; 2907 #endif 2908 } 2909 KMP_MB(); 2910 2911 #if OMPT_SUPPORT && OMPT_OPTIONAL 2912 // This is the case, if called from omp_init_lock_with_hint: 2913 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2914 if (!codeptr) 2915 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2916 if (ompt_enabled.enabled) { 2917 if (release_status == KMP_LOCK_RELEASED) { 2918 if (ompt_enabled.ompt_callback_mutex_released) { 2919 // release_lock_last 2920 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2921 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2922 } 2923 } else if (ompt_enabled.ompt_callback_nest_lock) { 2924 // release_lock_previous 2925 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2926 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2927 } 2928 } 2929 #endif 2930 2931 return; 2932 #else 2933 lck = (kmp_user_lock_p)user_lock; 2934 #endif 2935 } 2936 #if KMP_USE_FUTEX 2937 else if ((__kmp_user_lock_kind == lk_futex) && 2938 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2939 OMP_NEST_LOCK_T_SIZE)) { 2940 lck = (kmp_user_lock_p)user_lock; 2941 } 2942 #endif 2943 else { 2944 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2945 } 2946 2947 #if USE_ITT_BUILD 2948 __kmp_itt_lock_releasing(lck); 2949 #endif /* USE_ITT_BUILD */ 2950 2951 int release_status; 2952 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2953 #if OMPT_SUPPORT && OMPT_OPTIONAL 2954 // This is the case, if called from omp_init_lock_with_hint: 2955 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2956 if (!codeptr) 2957 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2958 if (ompt_enabled.enabled) { 2959 if (release_status == KMP_LOCK_RELEASED) { 2960 if (ompt_enabled.ompt_callback_mutex_released) { 2961 // release_lock_last 2962 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2963 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2964 } 2965 } else if (ompt_enabled.ompt_callback_nest_lock) { 2966 // release_lock_previous 2967 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2968 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2969 } 2970 } 2971 #endif 2972 2973 #endif // KMP_USE_DYNAMIC_LOCK 2974 } 2975 2976 /* try to acquire the lock */ 2977 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2978 KMP_COUNT_BLOCK(OMP_test_lock); 2979 2980 #if KMP_USE_DYNAMIC_LOCK 2981 int rc; 2982 int tag = KMP_EXTRACT_D_TAG(user_lock); 2983 #if USE_ITT_BUILD 2984 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2985 #endif 2986 #if OMPT_SUPPORT && OMPT_OPTIONAL 2987 // This is the case, if called from omp_init_lock_with_hint: 2988 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2989 if (!codeptr) 2990 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2991 if (ompt_enabled.ompt_callback_mutex_acquire) { 2992 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2993 ompt_mutex_lock, omp_lock_hint_none, 2994 __ompt_get_mutex_impl_type(user_lock), 2995 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2996 } 2997 #endif 2998 #if KMP_USE_INLINED_TAS 2999 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3000 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3001 } else 3002 #elif KMP_USE_INLINED_FUTEX 3003 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3004 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3005 } else 3006 #endif 3007 { 3008 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3009 } 3010 if (rc) { 3011 #if USE_ITT_BUILD 3012 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3013 #endif 3014 #if OMPT_SUPPORT && OMPT_OPTIONAL 3015 if (ompt_enabled.ompt_callback_mutex_acquired) { 3016 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3017 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3018 } 3019 #endif 3020 return FTN_TRUE; 3021 } else { 3022 #if USE_ITT_BUILD 3023 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3024 #endif 3025 return FTN_FALSE; 3026 } 3027 3028 #else // KMP_USE_DYNAMIC_LOCK 3029 3030 kmp_user_lock_p lck; 3031 int rc; 3032 3033 if ((__kmp_user_lock_kind == lk_tas) && 3034 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3035 lck = (kmp_user_lock_p)user_lock; 3036 } 3037 #if KMP_USE_FUTEX 3038 else if ((__kmp_user_lock_kind == lk_futex) && 3039 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3040 lck = (kmp_user_lock_p)user_lock; 3041 } 3042 #endif 3043 else { 3044 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3045 } 3046 3047 #if USE_ITT_BUILD 3048 __kmp_itt_lock_acquiring(lck); 3049 #endif /* USE_ITT_BUILD */ 3050 #if OMPT_SUPPORT && OMPT_OPTIONAL 3051 // This is the case, if called from omp_init_lock_with_hint: 3052 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3053 if (!codeptr) 3054 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3055 if (ompt_enabled.ompt_callback_mutex_acquire) { 3056 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3057 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3058 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3059 } 3060 #endif 3061 3062 rc = TEST_LOCK(lck, gtid); 3063 #if USE_ITT_BUILD 3064 if (rc) { 3065 __kmp_itt_lock_acquired(lck); 3066 } else { 3067 __kmp_itt_lock_cancelled(lck); 3068 } 3069 #endif /* USE_ITT_BUILD */ 3070 #if OMPT_SUPPORT && OMPT_OPTIONAL 3071 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3072 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3073 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3074 } 3075 #endif 3076 3077 return (rc ? FTN_TRUE : FTN_FALSE); 3078 3079 /* Can't use serial interval since not block structured */ 3080 3081 #endif // KMP_USE_DYNAMIC_LOCK 3082 } 3083 3084 /* try to acquire the lock */ 3085 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3086 #if KMP_USE_DYNAMIC_LOCK 3087 int rc; 3088 #if USE_ITT_BUILD 3089 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3090 #endif 3091 #if OMPT_SUPPORT && OMPT_OPTIONAL 3092 // This is the case, if called from omp_init_lock_with_hint: 3093 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3094 if (!codeptr) 3095 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3096 if (ompt_enabled.ompt_callback_mutex_acquire) { 3097 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3098 ompt_mutex_nest_lock, omp_lock_hint_none, 3099 __ompt_get_mutex_impl_type(user_lock), 3100 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3101 } 3102 #endif 3103 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3104 #if USE_ITT_BUILD 3105 if (rc) { 3106 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3107 } else { 3108 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3109 } 3110 #endif 3111 #if OMPT_SUPPORT && OMPT_OPTIONAL 3112 if (ompt_enabled.enabled && rc) { 3113 if (rc == 1) { 3114 if (ompt_enabled.ompt_callback_mutex_acquired) { 3115 // lock_first 3116 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3117 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3118 codeptr); 3119 } 3120 } else { 3121 if (ompt_enabled.ompt_callback_nest_lock) { 3122 // lock_next 3123 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3124 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3125 } 3126 } 3127 } 3128 #endif 3129 return rc; 3130 3131 #else // KMP_USE_DYNAMIC_LOCK 3132 3133 kmp_user_lock_p lck; 3134 int rc; 3135 3136 if ((__kmp_user_lock_kind == lk_tas) && 3137 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3138 OMP_NEST_LOCK_T_SIZE)) { 3139 lck = (kmp_user_lock_p)user_lock; 3140 } 3141 #if KMP_USE_FUTEX 3142 else if ((__kmp_user_lock_kind == lk_futex) && 3143 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3144 OMP_NEST_LOCK_T_SIZE)) { 3145 lck = (kmp_user_lock_p)user_lock; 3146 } 3147 #endif 3148 else { 3149 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3150 } 3151 3152 #if USE_ITT_BUILD 3153 __kmp_itt_lock_acquiring(lck); 3154 #endif /* USE_ITT_BUILD */ 3155 3156 #if OMPT_SUPPORT && OMPT_OPTIONAL 3157 // This is the case, if called from omp_init_lock_with_hint: 3158 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3159 if (!codeptr) 3160 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3161 if (ompt_enabled.enabled) && 3162 ompt_enabled.ompt_callback_mutex_acquire) { 3163 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3164 ompt_mutex_nest_lock, omp_lock_hint_none, 3165 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3166 codeptr); 3167 } 3168 #endif 3169 3170 rc = TEST_NESTED_LOCK(lck, gtid); 3171 #if USE_ITT_BUILD 3172 if (rc) { 3173 __kmp_itt_lock_acquired(lck); 3174 } else { 3175 __kmp_itt_lock_cancelled(lck); 3176 } 3177 #endif /* USE_ITT_BUILD */ 3178 #if OMPT_SUPPORT && OMPT_OPTIONAL 3179 if (ompt_enabled.enabled && rc) { 3180 if (rc == 1) { 3181 if (ompt_enabled.ompt_callback_mutex_acquired) { 3182 // lock_first 3183 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3184 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3185 } 3186 } else { 3187 if (ompt_enabled.ompt_callback_nest_lock) { 3188 // lock_next 3189 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3190 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3191 } 3192 } 3193 } 3194 #endif 3195 return rc; 3196 3197 /* Can't use serial interval since not block structured */ 3198 3199 #endif // KMP_USE_DYNAMIC_LOCK 3200 } 3201 3202 // Interface to fast scalable reduce methods routines 3203 3204 // keep the selected method in a thread local structure for cross-function 3205 // usage: will be used in __kmpc_end_reduce* functions; 3206 // another solution: to re-determine the method one more time in 3207 // __kmpc_end_reduce* functions (new prototype required then) 3208 // AT: which solution is better? 3209 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3210 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3211 3212 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3213 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3214 3215 // description of the packed_reduction_method variable: look at the macros in 3216 // kmp.h 3217 3218 // used in a critical section reduce block 3219 static __forceinline void 3220 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3221 kmp_critical_name *crit) { 3222 3223 // this lock was visible to a customer and to the threading profile tool as a 3224 // serial overhead span (although it's used for an internal purpose only) 3225 // why was it visible in previous implementation? 3226 // should we keep it visible in new reduce block? 3227 kmp_user_lock_p lck; 3228 3229 #if KMP_USE_DYNAMIC_LOCK 3230 3231 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3232 // Check if it is initialized. 3233 if (*lk == 0) { 3234 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3235 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3236 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3237 } else { 3238 __kmp_init_indirect_csptr(crit, loc, global_tid, 3239 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3240 } 3241 } 3242 // Branch for accessing the actual lock object and set operation. This 3243 // branching is inevitable since this lock initialization does not follow the 3244 // normal dispatch path (lock table is not used). 3245 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3246 lck = (kmp_user_lock_p)lk; 3247 KMP_DEBUG_ASSERT(lck != NULL); 3248 if (__kmp_env_consistency_check) { 3249 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3250 } 3251 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3252 } else { 3253 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3254 lck = ilk->lock; 3255 KMP_DEBUG_ASSERT(lck != NULL); 3256 if (__kmp_env_consistency_check) { 3257 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3258 } 3259 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3260 } 3261 3262 #else // KMP_USE_DYNAMIC_LOCK 3263 3264 // We know that the fast reduction code is only emitted by Intel compilers 3265 // with 32 byte critical sections. If there isn't enough space, then we 3266 // have to use a pointer. 3267 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3268 lck = (kmp_user_lock_p)crit; 3269 } else { 3270 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3271 } 3272 KMP_DEBUG_ASSERT(lck != NULL); 3273 3274 if (__kmp_env_consistency_check) 3275 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3276 3277 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3278 3279 #endif // KMP_USE_DYNAMIC_LOCK 3280 } 3281 3282 // used in a critical section reduce block 3283 static __forceinline void 3284 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3285 kmp_critical_name *crit) { 3286 3287 kmp_user_lock_p lck; 3288 3289 #if KMP_USE_DYNAMIC_LOCK 3290 3291 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3292 lck = (kmp_user_lock_p)crit; 3293 if (__kmp_env_consistency_check) 3294 __kmp_pop_sync(global_tid, ct_critical, loc); 3295 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3296 } else { 3297 kmp_indirect_lock_t *ilk = 3298 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3299 if (__kmp_env_consistency_check) 3300 __kmp_pop_sync(global_tid, ct_critical, loc); 3301 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3302 } 3303 3304 #else // KMP_USE_DYNAMIC_LOCK 3305 3306 // We know that the fast reduction code is only emitted by Intel compilers 3307 // with 32 byte critical sections. If there isn't enough space, then we have 3308 // to use a pointer. 3309 if (__kmp_base_user_lock_size > 32) { 3310 lck = *((kmp_user_lock_p *)crit); 3311 KMP_ASSERT(lck != NULL); 3312 } else { 3313 lck = (kmp_user_lock_p)crit; 3314 } 3315 3316 if (__kmp_env_consistency_check) 3317 __kmp_pop_sync(global_tid, ct_critical, loc); 3318 3319 __kmp_release_user_lock_with_checks(lck, global_tid); 3320 3321 #endif // KMP_USE_DYNAMIC_LOCK 3322 } // __kmp_end_critical_section_reduce_block 3323 3324 static __forceinline int 3325 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3326 int *task_state) { 3327 kmp_team_t *team; 3328 3329 // Check if we are inside the teams construct? 3330 if (th->th.th_teams_microtask) { 3331 *team_p = team = th->th.th_team; 3332 if (team->t.t_level == th->th.th_teams_level) { 3333 // This is reduction at teams construct. 3334 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3335 // Let's swap teams temporarily for the reduction. 3336 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3337 th->th.th_team = team->t.t_parent; 3338 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3339 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3340 *task_state = th->th.th_task_state; 3341 th->th.th_task_state = 0; 3342 3343 return 1; 3344 } 3345 } 3346 return 0; 3347 } 3348 3349 static __forceinline void 3350 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3351 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3352 th->th.th_info.ds.ds_tid = 0; 3353 th->th.th_team = team; 3354 th->th.th_team_nproc = team->t.t_nproc; 3355 th->th.th_task_team = team->t.t_task_team[task_state]; 3356 th->th.th_task_state = task_state; 3357 } 3358 3359 /* 2.a.i. Reduce Block without a terminating barrier */ 3360 /*! 3361 @ingroup SYNCHRONIZATION 3362 @param loc source location information 3363 @param global_tid global thread number 3364 @param num_vars number of items (variables) to be reduced 3365 @param reduce_size size of data in bytes to be reduced 3366 @param reduce_data pointer to data to be reduced 3367 @param reduce_func callback function providing reduction operation on two 3368 operands and returning result of reduction in lhs_data 3369 @param lck pointer to the unique lock data structure 3370 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3371 threads if atomic reduction needed 3372 3373 The nowait version is used for a reduce clause with the nowait argument. 3374 */ 3375 kmp_int32 3376 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3377 size_t reduce_size, void *reduce_data, 3378 void (*reduce_func)(void *lhs_data, void *rhs_data), 3379 kmp_critical_name *lck) { 3380 3381 KMP_COUNT_BLOCK(REDUCE_nowait); 3382 int retval = 0; 3383 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3384 kmp_info_t *th; 3385 kmp_team_t *team; 3386 int teams_swapped = 0, task_state; 3387 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3388 3389 // why do we need this initialization here at all? 3390 // Reduction clause can not be used as a stand-alone directive. 3391 3392 // do not call __kmp_serial_initialize(), it will be called by 3393 // __kmp_parallel_initialize() if needed 3394 // possible detection of false-positive race by the threadchecker ??? 3395 if (!TCR_4(__kmp_init_parallel)) 3396 __kmp_parallel_initialize(); 3397 3398 __kmp_resume_if_soft_paused(); 3399 3400 // check correctness of reduce block nesting 3401 #if KMP_USE_DYNAMIC_LOCK 3402 if (__kmp_env_consistency_check) 3403 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3404 #else 3405 if (__kmp_env_consistency_check) 3406 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3407 #endif 3408 3409 th = __kmp_thread_from_gtid(global_tid); 3410 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3411 3412 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3413 // the value should be kept in a variable 3414 // the variable should be either a construct-specific or thread-specific 3415 // property, not a team specific property 3416 // (a thread can reach the next reduce block on the next construct, reduce 3417 // method may differ on the next construct) 3418 // an ident_t "loc" parameter could be used as a construct-specific property 3419 // (what if loc == 0?) 3420 // (if both construct-specific and team-specific variables were shared, 3421 // then unness extra syncs should be needed) 3422 // a thread-specific variable is better regarding two issues above (next 3423 // construct and extra syncs) 3424 // a thread-specific "th_local.reduction_method" variable is used currently 3425 // each thread executes 'determine' and 'set' lines (no need to execute by one 3426 // thread, to avoid unness extra syncs) 3427 3428 packed_reduction_method = __kmp_determine_reduction_method( 3429 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3430 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3431 3432 if (packed_reduction_method == critical_reduce_block) { 3433 3434 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3435 retval = 1; 3436 3437 } else if (packed_reduction_method == empty_reduce_block) { 3438 3439 // usage: if team size == 1, no synchronization is required ( Intel 3440 // platforms only ) 3441 retval = 1; 3442 3443 } else if (packed_reduction_method == atomic_reduce_block) { 3444 3445 retval = 2; 3446 3447 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3448 // won't be called by the code gen) 3449 // (it's not quite good, because the checking block has been closed by 3450 // this 'pop', 3451 // but atomic operation has not been executed yet, will be executed 3452 // slightly later, literally on next instruction) 3453 if (__kmp_env_consistency_check) 3454 __kmp_pop_sync(global_tid, ct_reduce, loc); 3455 3456 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3457 tree_reduce_block)) { 3458 3459 // AT: performance issue: a real barrier here 3460 // AT: (if master goes slow, other threads are blocked here waiting for the 3461 // master to come and release them) 3462 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3463 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3464 // be confusing to a customer) 3465 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3466 // might go faster and be more in line with sense of NOWAIT 3467 // AT: TO DO: do epcc test and compare times 3468 3469 // this barrier should be invisible to a customer and to the threading profile 3470 // tool (it's neither a terminating barrier nor customer's code, it's 3471 // used for an internal purpose) 3472 #if OMPT_SUPPORT 3473 // JP: can this barrier potentially leed to task scheduling? 3474 // JP: as long as there is a barrier in the implementation, OMPT should and 3475 // will provide the barrier events 3476 // so we set-up the necessary frame/return addresses. 3477 ompt_frame_t *ompt_frame; 3478 if (ompt_enabled.enabled) { 3479 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3480 if (ompt_frame->enter_frame.ptr == NULL) 3481 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3482 OMPT_STORE_RETURN_ADDRESS(global_tid); 3483 } 3484 #endif 3485 #if USE_ITT_NOTIFY 3486 __kmp_threads[global_tid]->th.th_ident = loc; 3487 #endif 3488 retval = 3489 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3490 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3491 retval = (retval != 0) ? (0) : (1); 3492 #if OMPT_SUPPORT && OMPT_OPTIONAL 3493 if (ompt_enabled.enabled) { 3494 ompt_frame->enter_frame = ompt_data_none; 3495 } 3496 #endif 3497 3498 // all other workers except master should do this pop here 3499 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3500 if (__kmp_env_consistency_check) { 3501 if (retval == 0) { 3502 __kmp_pop_sync(global_tid, ct_reduce, loc); 3503 } 3504 } 3505 3506 } else { 3507 3508 // should never reach this block 3509 KMP_ASSERT(0); // "unexpected method" 3510 } 3511 if (teams_swapped) { 3512 __kmp_restore_swapped_teams(th, team, task_state); 3513 } 3514 KA_TRACE( 3515 10, 3516 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3517 global_tid, packed_reduction_method, retval)); 3518 3519 return retval; 3520 } 3521 3522 /*! 3523 @ingroup SYNCHRONIZATION 3524 @param loc source location information 3525 @param global_tid global thread id. 3526 @param lck pointer to the unique lock data structure 3527 3528 Finish the execution of a reduce nowait. 3529 */ 3530 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3531 kmp_critical_name *lck) { 3532 3533 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3534 3535 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3536 3537 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3538 3539 if (packed_reduction_method == critical_reduce_block) { 3540 3541 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3542 3543 } else if (packed_reduction_method == empty_reduce_block) { 3544 3545 // usage: if team size == 1, no synchronization is required ( on Intel 3546 // platforms only ) 3547 3548 } else if (packed_reduction_method == atomic_reduce_block) { 3549 3550 // neither master nor other workers should get here 3551 // (code gen does not generate this call in case 2: atomic reduce block) 3552 // actually it's better to remove this elseif at all; 3553 // after removal this value will checked by the 'else' and will assert 3554 3555 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3556 tree_reduce_block)) { 3557 3558 // only master gets here 3559 3560 } else { 3561 3562 // should never reach this block 3563 KMP_ASSERT(0); // "unexpected method" 3564 } 3565 3566 if (__kmp_env_consistency_check) 3567 __kmp_pop_sync(global_tid, ct_reduce, loc); 3568 3569 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3570 global_tid, packed_reduction_method)); 3571 3572 return; 3573 } 3574 3575 /* 2.a.ii. Reduce Block with a terminating barrier */ 3576 3577 /*! 3578 @ingroup SYNCHRONIZATION 3579 @param loc source location information 3580 @param global_tid global thread number 3581 @param num_vars number of items (variables) to be reduced 3582 @param reduce_size size of data in bytes to be reduced 3583 @param reduce_data pointer to data to be reduced 3584 @param reduce_func callback function providing reduction operation on two 3585 operands and returning result of reduction in lhs_data 3586 @param lck pointer to the unique lock data structure 3587 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3588 threads if atomic reduction needed 3589 3590 A blocking reduce that includes an implicit barrier. 3591 */ 3592 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3593 size_t reduce_size, void *reduce_data, 3594 void (*reduce_func)(void *lhs_data, void *rhs_data), 3595 kmp_critical_name *lck) { 3596 KMP_COUNT_BLOCK(REDUCE_wait); 3597 int retval = 0; 3598 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3599 kmp_info_t *th; 3600 kmp_team_t *team; 3601 int teams_swapped = 0, task_state; 3602 3603 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3604 3605 // why do we need this initialization here at all? 3606 // Reduction clause can not be a stand-alone directive. 3607 3608 // do not call __kmp_serial_initialize(), it will be called by 3609 // __kmp_parallel_initialize() if needed 3610 // possible detection of false-positive race by the threadchecker ??? 3611 if (!TCR_4(__kmp_init_parallel)) 3612 __kmp_parallel_initialize(); 3613 3614 __kmp_resume_if_soft_paused(); 3615 3616 // check correctness of reduce block nesting 3617 #if KMP_USE_DYNAMIC_LOCK 3618 if (__kmp_env_consistency_check) 3619 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3620 #else 3621 if (__kmp_env_consistency_check) 3622 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3623 #endif 3624 3625 th = __kmp_thread_from_gtid(global_tid); 3626 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3627 3628 packed_reduction_method = __kmp_determine_reduction_method( 3629 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3630 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3631 3632 if (packed_reduction_method == critical_reduce_block) { 3633 3634 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3635 retval = 1; 3636 3637 } else if (packed_reduction_method == empty_reduce_block) { 3638 3639 // usage: if team size == 1, no synchronization is required ( Intel 3640 // platforms only ) 3641 retval = 1; 3642 3643 } else if (packed_reduction_method == atomic_reduce_block) { 3644 3645 retval = 2; 3646 3647 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3648 tree_reduce_block)) { 3649 3650 // case tree_reduce_block: 3651 // this barrier should be visible to a customer and to the threading profile 3652 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3653 #if OMPT_SUPPORT 3654 ompt_frame_t *ompt_frame; 3655 if (ompt_enabled.enabled) { 3656 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3657 if (ompt_frame->enter_frame.ptr == NULL) 3658 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3659 OMPT_STORE_RETURN_ADDRESS(global_tid); 3660 } 3661 #endif 3662 #if USE_ITT_NOTIFY 3663 __kmp_threads[global_tid]->th.th_ident = 3664 loc; // needed for correct notification of frames 3665 #endif 3666 retval = 3667 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3668 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3669 retval = (retval != 0) ? (0) : (1); 3670 #if OMPT_SUPPORT && OMPT_OPTIONAL 3671 if (ompt_enabled.enabled) { 3672 ompt_frame->enter_frame = ompt_data_none; 3673 } 3674 #endif 3675 3676 // all other workers except master should do this pop here 3677 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3678 if (__kmp_env_consistency_check) { 3679 if (retval == 0) { // 0: all other workers; 1: master 3680 __kmp_pop_sync(global_tid, ct_reduce, loc); 3681 } 3682 } 3683 3684 } else { 3685 3686 // should never reach this block 3687 KMP_ASSERT(0); // "unexpected method" 3688 } 3689 if (teams_swapped) { 3690 __kmp_restore_swapped_teams(th, team, task_state); 3691 } 3692 3693 KA_TRACE(10, 3694 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3695 global_tid, packed_reduction_method, retval)); 3696 return retval; 3697 } 3698 3699 /*! 3700 @ingroup SYNCHRONIZATION 3701 @param loc source location information 3702 @param global_tid global thread id. 3703 @param lck pointer to the unique lock data structure 3704 3705 Finish the execution of a blocking reduce. 3706 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3707 start function. 3708 */ 3709 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3710 kmp_critical_name *lck) { 3711 3712 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3713 kmp_info_t *th; 3714 kmp_team_t *team; 3715 int teams_swapped = 0, task_state; 3716 3717 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3718 3719 th = __kmp_thread_from_gtid(global_tid); 3720 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3721 3722 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3723 3724 // this barrier should be visible to a customer and to the threading profile 3725 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3726 3727 if (packed_reduction_method == critical_reduce_block) { 3728 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3729 3730 // TODO: implicit barrier: should be exposed 3731 #if OMPT_SUPPORT 3732 ompt_frame_t *ompt_frame; 3733 if (ompt_enabled.enabled) { 3734 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3735 if (ompt_frame->enter_frame.ptr == NULL) 3736 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3737 OMPT_STORE_RETURN_ADDRESS(global_tid); 3738 } 3739 #endif 3740 #if USE_ITT_NOTIFY 3741 __kmp_threads[global_tid]->th.th_ident = loc; 3742 #endif 3743 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3744 #if OMPT_SUPPORT && OMPT_OPTIONAL 3745 if (ompt_enabled.enabled) { 3746 ompt_frame->enter_frame = ompt_data_none; 3747 } 3748 #endif 3749 3750 } else if (packed_reduction_method == empty_reduce_block) { 3751 3752 // usage: if team size==1, no synchronization is required (Intel platforms only) 3753 3754 // TODO: implicit barrier: should be exposed 3755 #if OMPT_SUPPORT 3756 ompt_frame_t *ompt_frame; 3757 if (ompt_enabled.enabled) { 3758 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3759 if (ompt_frame->enter_frame.ptr == NULL) 3760 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3761 OMPT_STORE_RETURN_ADDRESS(global_tid); 3762 } 3763 #endif 3764 #if USE_ITT_NOTIFY 3765 __kmp_threads[global_tid]->th.th_ident = loc; 3766 #endif 3767 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3768 #if OMPT_SUPPORT && OMPT_OPTIONAL 3769 if (ompt_enabled.enabled) { 3770 ompt_frame->enter_frame = ompt_data_none; 3771 } 3772 #endif 3773 3774 } else if (packed_reduction_method == atomic_reduce_block) { 3775 3776 #if OMPT_SUPPORT 3777 ompt_frame_t *ompt_frame; 3778 if (ompt_enabled.enabled) { 3779 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3780 if (ompt_frame->enter_frame.ptr == NULL) 3781 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3782 OMPT_STORE_RETURN_ADDRESS(global_tid); 3783 } 3784 #endif 3785 // TODO: implicit barrier: should be exposed 3786 #if USE_ITT_NOTIFY 3787 __kmp_threads[global_tid]->th.th_ident = loc; 3788 #endif 3789 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3790 #if OMPT_SUPPORT && OMPT_OPTIONAL 3791 if (ompt_enabled.enabled) { 3792 ompt_frame->enter_frame = ompt_data_none; 3793 } 3794 #endif 3795 3796 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3797 tree_reduce_block)) { 3798 3799 // only master executes here (master releases all other workers) 3800 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3801 global_tid); 3802 3803 } else { 3804 3805 // should never reach this block 3806 KMP_ASSERT(0); // "unexpected method" 3807 } 3808 if (teams_swapped) { 3809 __kmp_restore_swapped_teams(th, team, task_state); 3810 } 3811 3812 if (__kmp_env_consistency_check) 3813 __kmp_pop_sync(global_tid, ct_reduce, loc); 3814 3815 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3816 global_tid, packed_reduction_method)); 3817 3818 return; 3819 } 3820 3821 #undef __KMP_GET_REDUCTION_METHOD 3822 #undef __KMP_SET_REDUCTION_METHOD 3823 3824 /* end of interface to fast scalable reduce routines */ 3825 3826 kmp_uint64 __kmpc_get_taskid() { 3827 3828 kmp_int32 gtid; 3829 kmp_info_t *thread; 3830 3831 gtid = __kmp_get_gtid(); 3832 if (gtid < 0) { 3833 return 0; 3834 } 3835 thread = __kmp_thread_from_gtid(gtid); 3836 return thread->th.th_current_task->td_task_id; 3837 3838 } // __kmpc_get_taskid 3839 3840 kmp_uint64 __kmpc_get_parent_taskid() { 3841 3842 kmp_int32 gtid; 3843 kmp_info_t *thread; 3844 kmp_taskdata_t *parent_task; 3845 3846 gtid = __kmp_get_gtid(); 3847 if (gtid < 0) { 3848 return 0; 3849 } 3850 thread = __kmp_thread_from_gtid(gtid); 3851 parent_task = thread->th.th_current_task->td_parent; 3852 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3853 3854 } // __kmpc_get_parent_taskid 3855 3856 /*! 3857 @ingroup WORK_SHARING 3858 @param loc source location information. 3859 @param gtid global thread number. 3860 @param num_dims number of associated doacross loops. 3861 @param dims info on loops bounds. 3862 3863 Initialize doacross loop information. 3864 Expect compiler send us inclusive bounds, 3865 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3866 */ 3867 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3868 const struct kmp_dim *dims) { 3869 int j, idx; 3870 kmp_int64 last, trace_count; 3871 kmp_info_t *th = __kmp_threads[gtid]; 3872 kmp_team_t *team = th->th.th_team; 3873 kmp_uint32 *flags; 3874 kmp_disp_t *pr_buf = th->th.th_dispatch; 3875 dispatch_shared_info_t *sh_buf; 3876 3877 KA_TRACE( 3878 20, 3879 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3880 gtid, num_dims, !team->t.t_serialized)); 3881 KMP_DEBUG_ASSERT(dims != NULL); 3882 KMP_DEBUG_ASSERT(num_dims > 0); 3883 3884 if (team->t.t_serialized) { 3885 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3886 return; // no dependencies if team is serialized 3887 } 3888 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3889 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3890 // the next loop 3891 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3892 3893 // Save bounds info into allocated private buffer 3894 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3895 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3896 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3897 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3898 pr_buf->th_doacross_info[0] = 3899 (kmp_int64)num_dims; // first element is number of dimensions 3900 // Save also address of num_done in order to access it later without knowing 3901 // the buffer index 3902 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3903 pr_buf->th_doacross_info[2] = dims[0].lo; 3904 pr_buf->th_doacross_info[3] = dims[0].up; 3905 pr_buf->th_doacross_info[4] = dims[0].st; 3906 last = 5; 3907 for (j = 1; j < num_dims; ++j) { 3908 kmp_int64 3909 range_length; // To keep ranges of all dimensions but the first dims[0] 3910 if (dims[j].st == 1) { // most common case 3911 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3912 range_length = dims[j].up - dims[j].lo + 1; 3913 } else { 3914 if (dims[j].st > 0) { 3915 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3916 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3917 } else { // negative increment 3918 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3919 range_length = 3920 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3921 } 3922 } 3923 pr_buf->th_doacross_info[last++] = range_length; 3924 pr_buf->th_doacross_info[last++] = dims[j].lo; 3925 pr_buf->th_doacross_info[last++] = dims[j].up; 3926 pr_buf->th_doacross_info[last++] = dims[j].st; 3927 } 3928 3929 // Compute total trip count. 3930 // Start with range of dims[0] which we don't need to keep in the buffer. 3931 if (dims[0].st == 1) { // most common case 3932 trace_count = dims[0].up - dims[0].lo + 1; 3933 } else if (dims[0].st > 0) { 3934 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3935 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3936 } else { // negative increment 3937 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3938 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3939 } 3940 for (j = 1; j < num_dims; ++j) { 3941 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3942 } 3943 KMP_DEBUG_ASSERT(trace_count > 0); 3944 3945 // Check if shared buffer is not occupied by other loop (idx - 3946 // __kmp_dispatch_num_buffers) 3947 if (idx != sh_buf->doacross_buf_idx) { 3948 // Shared buffer is occupied, wait for it to be free 3949 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3950 __kmp_eq_4, NULL); 3951 } 3952 #if KMP_32_BIT_ARCH 3953 // Check if we are the first thread. After the CAS the first thread gets 0, 3954 // others get 1 if initialization is in progress, allocated pointer otherwise. 3955 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3956 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3957 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3958 #else 3959 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3960 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3961 #endif 3962 if (flags == NULL) { 3963 // we are the first thread, allocate the array of flags 3964 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3965 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3966 KMP_MB(); 3967 sh_buf->doacross_flags = flags; 3968 } else if (flags == (kmp_uint32 *)1) { 3969 #if KMP_32_BIT_ARCH 3970 // initialization is still in progress, need to wait 3971 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3972 #else 3973 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3974 #endif 3975 KMP_YIELD(TRUE); 3976 KMP_MB(); 3977 } else { 3978 KMP_MB(); 3979 } 3980 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 3981 pr_buf->th_doacross_flags = 3982 sh_buf->doacross_flags; // save private copy in order to not 3983 // touch shared buffer on each iteration 3984 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 3985 } 3986 3987 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 3988 kmp_int32 shft, num_dims, i; 3989 kmp_uint32 flag; 3990 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 3991 kmp_info_t *th = __kmp_threads[gtid]; 3992 kmp_team_t *team = th->th.th_team; 3993 kmp_disp_t *pr_buf; 3994 kmp_int64 lo, up, st; 3995 3996 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 3997 if (team->t.t_serialized) { 3998 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 3999 return; // no dependencies if team is serialized 4000 } 4001 4002 // calculate sequential iteration number and check out-of-bounds condition 4003 pr_buf = th->th.th_dispatch; 4004 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4005 num_dims = pr_buf->th_doacross_info[0]; 4006 lo = pr_buf->th_doacross_info[2]; 4007 up = pr_buf->th_doacross_info[3]; 4008 st = pr_buf->th_doacross_info[4]; 4009 if (st == 1) { // most common case 4010 if (vec[0] < lo || vec[0] > up) { 4011 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4012 "bounds [%lld,%lld]\n", 4013 gtid, vec[0], lo, up)); 4014 return; 4015 } 4016 iter_number = vec[0] - lo; 4017 } else if (st > 0) { 4018 if (vec[0] < lo || vec[0] > up) { 4019 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4020 "bounds [%lld,%lld]\n", 4021 gtid, vec[0], lo, up)); 4022 return; 4023 } 4024 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4025 } else { // negative increment 4026 if (vec[0] > lo || vec[0] < up) { 4027 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4028 "bounds [%lld,%lld]\n", 4029 gtid, vec[0], lo, up)); 4030 return; 4031 } 4032 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4033 } 4034 for (i = 1; i < num_dims; ++i) { 4035 kmp_int64 iter, ln; 4036 kmp_int32 j = i * 4; 4037 ln = pr_buf->th_doacross_info[j + 1]; 4038 lo = pr_buf->th_doacross_info[j + 2]; 4039 up = pr_buf->th_doacross_info[j + 3]; 4040 st = pr_buf->th_doacross_info[j + 4]; 4041 if (st == 1) { 4042 if (vec[i] < lo || vec[i] > up) { 4043 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4044 "bounds [%lld,%lld]\n", 4045 gtid, vec[i], lo, up)); 4046 return; 4047 } 4048 iter = vec[i] - lo; 4049 } else if (st > 0) { 4050 if (vec[i] < lo || vec[i] > up) { 4051 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4052 "bounds [%lld,%lld]\n", 4053 gtid, vec[i], lo, up)); 4054 return; 4055 } 4056 iter = (kmp_uint64)(vec[i] - lo) / st; 4057 } else { // st < 0 4058 if (vec[i] > lo || vec[i] < up) { 4059 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4060 "bounds [%lld,%lld]\n", 4061 gtid, vec[i], lo, up)); 4062 return; 4063 } 4064 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4065 } 4066 iter_number = iter + ln * iter_number; 4067 } 4068 shft = iter_number % 32; // use 32-bit granularity 4069 iter_number >>= 5; // divided by 32 4070 flag = 1 << shft; 4071 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4072 KMP_YIELD(TRUE); 4073 } 4074 KMP_MB(); 4075 KA_TRACE(20, 4076 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4077 gtid, (iter_number << 5) + shft)); 4078 } 4079 4080 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4081 kmp_int32 shft, num_dims, i; 4082 kmp_uint32 flag; 4083 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4084 kmp_info_t *th = __kmp_threads[gtid]; 4085 kmp_team_t *team = th->th.th_team; 4086 kmp_disp_t *pr_buf; 4087 kmp_int64 lo, st; 4088 4089 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4090 if (team->t.t_serialized) { 4091 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4092 return; // no dependencies if team is serialized 4093 } 4094 4095 // calculate sequential iteration number (same as in "wait" but no 4096 // out-of-bounds checks) 4097 pr_buf = th->th.th_dispatch; 4098 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4099 num_dims = pr_buf->th_doacross_info[0]; 4100 lo = pr_buf->th_doacross_info[2]; 4101 st = pr_buf->th_doacross_info[4]; 4102 if (st == 1) { // most common case 4103 iter_number = vec[0] - lo; 4104 } else if (st > 0) { 4105 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4106 } else { // negative increment 4107 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4108 } 4109 for (i = 1; i < num_dims; ++i) { 4110 kmp_int64 iter, ln; 4111 kmp_int32 j = i * 4; 4112 ln = pr_buf->th_doacross_info[j + 1]; 4113 lo = pr_buf->th_doacross_info[j + 2]; 4114 st = pr_buf->th_doacross_info[j + 4]; 4115 if (st == 1) { 4116 iter = vec[i] - lo; 4117 } else if (st > 0) { 4118 iter = (kmp_uint64)(vec[i] - lo) / st; 4119 } else { // st < 0 4120 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4121 } 4122 iter_number = iter + ln * iter_number; 4123 } 4124 shft = iter_number % 32; // use 32-bit granularity 4125 iter_number >>= 5; // divided by 32 4126 flag = 1 << shft; 4127 KMP_MB(); 4128 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4129 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4130 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4131 (iter_number << 5) + shft)); 4132 } 4133 4134 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4135 kmp_int32 num_done; 4136 kmp_info_t *th = __kmp_threads[gtid]; 4137 kmp_team_t *team = th->th.th_team; 4138 kmp_disp_t *pr_buf = th->th.th_dispatch; 4139 4140 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4141 if (team->t.t_serialized) { 4142 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4143 return; // nothing to do 4144 } 4145 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4146 if (num_done == th->th.th_team_nproc) { 4147 // we are the last thread, need to free shared resources 4148 int idx = pr_buf->th_doacross_buf_idx - 1; 4149 dispatch_shared_info_t *sh_buf = 4150 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4151 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4152 (kmp_int64)&sh_buf->doacross_num_done); 4153 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4154 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4155 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4156 sh_buf->doacross_flags = NULL; 4157 sh_buf->doacross_num_done = 0; 4158 sh_buf->doacross_buf_idx += 4159 __kmp_dispatch_num_buffers; // free buffer for future re-use 4160 } 4161 // free private resources (need to keep buffer index forever) 4162 pr_buf->th_doacross_flags = NULL; 4163 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4164 pr_buf->th_doacross_info = NULL; 4165 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4166 } 4167 4168 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */ 4169 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4170 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); 4171 } 4172 4173 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4174 __kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4175 } 4176 4177 int __kmpc_get_target_offload(void) { 4178 if (!__kmp_init_serial) { 4179 __kmp_serial_initialize(); 4180 } 4181 return __kmp_target_offload; 4182 } 4183 4184 int __kmpc_pause_resource(kmp_pause_status_t level) { 4185 if (!__kmp_init_serial) { 4186 return 1; // Can't pause if runtime is not initialized 4187 } 4188 return __kmp_pause_resource(level); 4189 } 4190