1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #define MAX_MESSAGE 512 27 28 // flags will be used in future, e.g. to implement openmp_strict library 29 // restrictions 30 31 /*! 32 * @ingroup STARTUP_SHUTDOWN 33 * @param loc in source location information 34 * @param flags in for future use (currently ignored) 35 * 36 * Initialize the runtime library. This call is optional; if it is not made then 37 * it will be implicitly called by attempts to use other library functions. 38 */ 39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 40 // By default __kmpc_begin() is no-op. 41 char *env; 42 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 43 __kmp_str_match_true(env)) { 44 __kmp_middle_initialize(); 45 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 46 } else if (__kmp_ignore_mppbeg() == FALSE) { 47 // By default __kmp_ignore_mppbeg() returns TRUE. 48 __kmp_internal_begin(); 49 KC_TRACE(10, ("__kmpc_begin: called\n")); 50 } 51 } 52 53 /*! 54 * @ingroup STARTUP_SHUTDOWN 55 * @param loc source location information 56 * 57 * Shutdown the runtime library. This is also optional, and even if called will 58 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 59 * zero. 60 */ 61 void __kmpc_end(ident_t *loc) { 62 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 63 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 64 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 65 // returns FALSE and __kmpc_end() will unregister this root (it can cause 66 // library shut down). 67 if (__kmp_ignore_mppend() == FALSE) { 68 KC_TRACE(10, ("__kmpc_end: called\n")); 69 KA_TRACE(30, ("__kmpc_end\n")); 70 71 __kmp_internal_end_thread(-1); 72 } 73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 74 // Normal exit process on Windows does not allow worker threads of the final 75 // parallel region to finish reporting their events, so shutting down the 76 // library here fixes the issue at least for the cases where __kmpc_end() is 77 // placed properly. 78 if (ompt_enabled.enabled) 79 __kmp_internal_end_library(__kmp_gtid_get_specific()); 80 #endif 81 } 82 83 /*! 84 @ingroup THREAD_STATES 85 @param loc Source location information. 86 @return The global thread index of the active thread. 87 88 This function can be called in any context. 89 90 If the runtime has ony been entered at the outermost level from a 91 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 92 that which would be returned by omp_get_thread_num() in the outermost 93 active parallel construct. (Or zero if there is no active parallel 94 construct, since the master thread is necessarily thread zero). 95 96 If multiple non-OpenMP threads all enter an OpenMP construct then this 97 will be a unique thread identifier among all the threads created by 98 the OpenMP runtime (but the value cannote be defined in terms of 99 OpenMP thread ids returned by omp_get_thread_num()). 100 */ 101 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 102 kmp_int32 gtid = __kmp_entry_gtid(); 103 104 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 105 106 return gtid; 107 } 108 109 /*! 110 @ingroup THREAD_STATES 111 @param loc Source location information. 112 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 113 114 This function can be called in any context. 115 It returns the total number of threads under the control of the OpenMP runtime. 116 That is not a number that can be determined by any OpenMP standard calls, since 117 the library may be called from more than one non-OpenMP thread, and this 118 reflects the total over all such calls. Similarly the runtime maintains 119 underlying threads even when they are not active (since the cost of creating 120 and destroying OS threads is high), this call counts all such threads even if 121 they are not waiting for work. 122 */ 123 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 124 KC_TRACE(10, 125 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 126 127 return TCR_4(__kmp_all_nth); 128 } 129 130 /*! 131 @ingroup THREAD_STATES 132 @param loc Source location information. 133 @return The thread number of the calling thread in the innermost active parallel 134 construct. 135 */ 136 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 137 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 138 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 139 } 140 141 /*! 142 @ingroup THREAD_STATES 143 @param loc Source location information. 144 @return The number of threads in the innermost active parallel construct. 145 */ 146 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 147 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 148 149 return __kmp_entry_thread()->th.th_team->t.t_nproc; 150 } 151 152 /*! 153 * @ingroup DEPRECATED 154 * @param loc location description 155 * 156 * This function need not be called. It always returns TRUE. 157 */ 158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 159 #ifndef KMP_DEBUG 160 161 return TRUE; 162 163 #else 164 165 const char *semi2; 166 const char *semi3; 167 int line_no; 168 169 if (__kmp_par_range == 0) { 170 return TRUE; 171 } 172 semi2 = loc->psource; 173 if (semi2 == NULL) { 174 return TRUE; 175 } 176 semi2 = strchr(semi2, ';'); 177 if (semi2 == NULL) { 178 return TRUE; 179 } 180 semi2 = strchr(semi2 + 1, ';'); 181 if (semi2 == NULL) { 182 return TRUE; 183 } 184 if (__kmp_par_range_filename[0]) { 185 const char *name = semi2 - 1; 186 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 187 name--; 188 } 189 if ((*name == '/') || (*name == ';')) { 190 name++; 191 } 192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 193 return __kmp_par_range < 0; 194 } 195 } 196 semi3 = strchr(semi2 + 1, ';'); 197 if (__kmp_par_range_routine[0]) { 198 if ((semi3 != NULL) && (semi3 > semi2) && 199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 200 return __kmp_par_range < 0; 201 } 202 } 203 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 205 return __kmp_par_range > 0; 206 } 207 return __kmp_par_range < 0; 208 } 209 return TRUE; 210 211 #endif /* KMP_DEBUG */ 212 } 213 214 /*! 215 @ingroup THREAD_STATES 216 @param loc Source location information. 217 @return 1 if this thread is executing inside an active parallel region, zero if 218 not. 219 */ 220 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 221 return __kmp_entry_thread()->th.th_root->r.r_active; 222 } 223 224 /*! 225 @ingroup PARALLEL 226 @param loc source location information 227 @param global_tid global thread number 228 @param num_threads number of threads requested for this parallel construct 229 230 Set the number of threads to be used by the next fork spawned by this thread. 231 This call is only required if the parallel construct has a `num_threads` clause. 232 */ 233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 234 kmp_int32 num_threads) { 235 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 236 global_tid, num_threads)); 237 238 __kmp_push_num_threads(loc, global_tid, num_threads); 239 } 240 241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 242 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 243 244 /* the num_threads are automatically popped */ 245 } 246 247 #if OMP_40_ENABLED 248 249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 250 kmp_int32 proc_bind) { 251 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 252 proc_bind)); 253 254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 255 } 256 257 #endif /* OMP_40_ENABLED */ 258 259 /*! 260 @ingroup PARALLEL 261 @param loc source location information 262 @param argc total number of arguments in the ellipsis 263 @param microtask pointer to callback routine consisting of outlined parallel 264 construct 265 @param ... pointers to shared variables that aren't global 266 267 Do the actual fork and call the microtask in the relevant number of threads. 268 */ 269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 270 int gtid = __kmp_entry_gtid(); 271 272 #if (KMP_STATS_ENABLED) 273 // If we were in a serial region, then stop the serial timer, record 274 // the event, and start parallel region timer 275 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 276 if (previous_state == stats_state_e::SERIAL_REGION) { 277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 278 } else { 279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 280 } 281 int inParallel = __kmpc_in_parallel(loc); 282 if (inParallel) { 283 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 284 } else { 285 KMP_COUNT_BLOCK(OMP_PARALLEL); 286 } 287 #endif 288 289 // maybe to save thr_state is enough here 290 { 291 va_list ap; 292 va_start(ap, microtask); 293 294 #if OMPT_SUPPORT 295 ompt_frame_t *ompt_frame; 296 if (ompt_enabled.enabled) { 297 kmp_info_t *master_th = __kmp_threads[gtid]; 298 kmp_team_t *parent_team = master_th->th.th_team; 299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; 300 if (lwt) 301 ompt_frame = &(lwt->ompt_task_info.frame); 302 else { 303 int tid = __kmp_tid_from_gtid(gtid); 304 ompt_frame = &( 305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); 306 } 307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 308 OMPT_STORE_RETURN_ADDRESS(gtid); 309 } 310 #endif 311 312 #if INCLUDE_SSC_MARKS 313 SSC_MARK_FORKING(); 314 #endif 315 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 316 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 320 &ap 321 #else 322 ap 323 #endif 324 ); 325 #if INCLUDE_SSC_MARKS 326 SSC_MARK_JOINING(); 327 #endif 328 __kmp_join_call(loc, gtid 329 #if OMPT_SUPPORT 330 , 331 fork_context_intel 332 #endif 333 ); 334 335 va_end(ap); 336 } 337 338 #if KMP_STATS_ENABLED 339 if (previous_state == stats_state_e::SERIAL_REGION) { 340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 341 } else { 342 KMP_POP_PARTITIONED_TIMER(); 343 } 344 #endif // KMP_STATS_ENABLED 345 } 346 347 #if OMP_40_ENABLED 348 /*! 349 @ingroup PARALLEL 350 @param loc source location information 351 @param global_tid global thread number 352 @param num_teams number of teams requested for the teams construct 353 @param num_threads number of threads per team requested for the teams construct 354 355 Set the number of teams to be used by the teams construct. 356 This call is only required if the teams construct has a `num_teams` clause 357 or a `thread_limit` clause (or both). 358 */ 359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 360 kmp_int32 num_teams, kmp_int32 num_threads) { 361 KA_TRACE(20, 362 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 363 global_tid, num_teams, num_threads)); 364 365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 366 } 367 368 /*! 369 @ingroup PARALLEL 370 @param loc source location information 371 @param argc total number of arguments in the ellipsis 372 @param microtask pointer to callback routine consisting of outlined teams 373 construct 374 @param ... pointers to shared variables that aren't global 375 376 Do the actual fork and call the microtask in the relevant number of threads. 377 */ 378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 379 ...) { 380 int gtid = __kmp_entry_gtid(); 381 kmp_info_t *this_thr = __kmp_threads[gtid]; 382 va_list ap; 383 va_start(ap, microtask); 384 385 KMP_COUNT_BLOCK(OMP_TEAMS); 386 387 // remember teams entry point and nesting level 388 this_thr->th.th_teams_microtask = microtask; 389 this_thr->th.th_teams_level = 390 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 391 392 #if OMPT_SUPPORT 393 kmp_team_t *parent_team = this_thr->th.th_team; 394 int tid = __kmp_tid_from_gtid(gtid); 395 if (ompt_enabled.enabled) { 396 parent_team->t.t_implicit_task_taskdata[tid] 397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 398 } 399 OMPT_STORE_RETURN_ADDRESS(gtid); 400 #endif 401 402 // check if __kmpc_push_num_teams called, set default number of teams 403 // otherwise 404 if (this_thr->th.th_teams_size.nteams == 0) { 405 __kmp_push_num_teams(loc, gtid, 0, 0); 406 } 407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 410 411 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 412 VOLATILE_CAST(microtask_t) 413 __kmp_teams_master, // "wrapped" task 414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, 415 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 416 &ap 417 #else 418 ap 419 #endif 420 ); 421 __kmp_join_call(loc, gtid 422 #if OMPT_SUPPORT 423 , 424 fork_context_intel 425 #endif 426 ); 427 428 // Pop current CG root off list 429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 431 this_thr->th.th_cg_roots = tmp->up; 432 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 433 " to node %p. cg_nthreads was %d\n", 434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 435 __kmp_free(tmp); 436 // Restore current task's thread_limit from CG root 437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 438 this_thr->th.th_current_task->td_icvs.thread_limit = 439 this_thr->th.th_cg_roots->cg_thread_limit; 440 441 this_thr->th.th_teams_microtask = NULL; 442 this_thr->th.th_teams_level = 0; 443 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 444 va_end(ap); 445 } 446 #endif /* OMP_40_ENABLED */ 447 448 // I don't think this function should ever have been exported. 449 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 450 // openmp code ever called it, but it's been exported from the RTL for so 451 // long that I'm afraid to remove the definition. 452 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 453 454 /*! 455 @ingroup PARALLEL 456 @param loc source location information 457 @param global_tid global thread number 458 459 Enter a serialized parallel construct. This interface is used to handle a 460 conditional parallel region, like this, 461 @code 462 #pragma omp parallel if (condition) 463 @endcode 464 when the condition is false. 465 */ 466 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 467 // The implementation is now in kmp_runtime.cpp so that it can share static 468 // functions with kmp_fork_call since the tasks to be done are similar in 469 // each case. 470 #if OMPT_SUPPORT 471 OMPT_STORE_RETURN_ADDRESS(global_tid); 472 #endif 473 __kmp_serialized_parallel(loc, global_tid); 474 } 475 476 /*! 477 @ingroup PARALLEL 478 @param loc source location information 479 @param global_tid global thread number 480 481 Leave a serialized parallel construct. 482 */ 483 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 484 kmp_internal_control_t *top; 485 kmp_info_t *this_thr; 486 kmp_team_t *serial_team; 487 488 KC_TRACE(10, 489 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 490 491 /* skip all this code for autopar serialized loops since it results in 492 unacceptable overhead */ 493 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 494 return; 495 496 // Not autopar code 497 if (!TCR_4(__kmp_init_parallel)) 498 __kmp_parallel_initialize(); 499 500 #if OMP_50_ENABLED 501 __kmp_resume_if_soft_paused(); 502 #endif 503 504 this_thr = __kmp_threads[global_tid]; 505 serial_team = this_thr->th.th_serial_team; 506 507 #if OMP_45_ENABLED 508 kmp_task_team_t *task_team = this_thr->th.th_task_team; 509 510 // we need to wait for the proxy tasks before finishing the thread 511 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) 512 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 513 #endif 514 515 KMP_MB(); 516 KMP_DEBUG_ASSERT(serial_team); 517 KMP_ASSERT(serial_team->t.t_serialized); 518 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 519 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 520 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 521 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 522 523 #if OMPT_SUPPORT 524 if (ompt_enabled.enabled && 525 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 526 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 527 if (ompt_enabled.ompt_callback_implicit_task) { 528 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 529 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 530 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 531 } 532 533 // reset clear the task id only after unlinking the task 534 ompt_data_t *parent_task_data; 535 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 536 537 if (ompt_enabled.ompt_callback_parallel_end) { 538 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 539 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 540 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); 541 } 542 __ompt_lw_taskteam_unlink(this_thr); 543 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 544 } 545 #endif 546 547 /* If necessary, pop the internal control stack values and replace the team 548 * values */ 549 top = serial_team->t.t_control_stack_top; 550 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 551 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 552 serial_team->t.t_control_stack_top = top->next; 553 __kmp_free(top); 554 } 555 556 // if( serial_team -> t.t_serialized > 1 ) 557 serial_team->t.t_level--; 558 559 /* pop dispatch buffers stack */ 560 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 561 { 562 dispatch_private_info_t *disp_buffer = 563 serial_team->t.t_dispatch->th_disp_buffer; 564 serial_team->t.t_dispatch->th_disp_buffer = 565 serial_team->t.t_dispatch->th_disp_buffer->next; 566 __kmp_free(disp_buffer); 567 } 568 #if OMP_50_ENABLED 569 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 570 #endif 571 572 --serial_team->t.t_serialized; 573 if (serial_team->t.t_serialized == 0) { 574 575 /* return to the parallel section */ 576 577 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 578 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 579 __kmp_clear_x87_fpu_status_word(); 580 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 581 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 582 } 583 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 584 585 this_thr->th.th_team = serial_team->t.t_parent; 586 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 587 588 /* restore values cached in the thread */ 589 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 590 this_thr->th.th_team_master = 591 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 592 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 593 594 /* TODO the below shouldn't need to be adjusted for serialized teams */ 595 this_thr->th.th_dispatch = 596 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 597 598 __kmp_pop_current_task_from_thread(this_thr); 599 600 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 601 this_thr->th.th_current_task->td_flags.executing = 1; 602 603 if (__kmp_tasking_mode != tskm_immediate_exec) { 604 // Copy the task team from the new child / old parent team to the thread. 605 this_thr->th.th_task_team = 606 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 607 KA_TRACE(20, 608 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 609 "team %p\n", 610 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 611 } 612 } else { 613 if (__kmp_tasking_mode != tskm_immediate_exec) { 614 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 615 "depth of serial team %p to %d\n", 616 global_tid, serial_team, serial_team->t.t_serialized)); 617 } 618 } 619 620 if (__kmp_env_consistency_check) 621 __kmp_pop_parallel(global_tid, NULL); 622 #if OMPT_SUPPORT 623 if (ompt_enabled.enabled) 624 this_thr->th.ompt_thread_info.state = 625 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 626 : ompt_state_work_parallel); 627 #endif 628 } 629 630 /*! 631 @ingroup SYNCHRONIZATION 632 @param loc source location information. 633 634 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 635 depending on the memory ordering convention obeyed by the compiler 636 even that may not be necessary). 637 */ 638 void __kmpc_flush(ident_t *loc) { 639 KC_TRACE(10, ("__kmpc_flush: called\n")); 640 641 /* need explicit __mf() here since use volatile instead in library */ 642 KMP_MB(); /* Flush all pending memory write invalidates. */ 643 644 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 645 #if KMP_MIC 646 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. 647 // We shouldn't need it, though, since the ABI rules require that 648 // * If the compiler generates NGO stores it also generates the fence 649 // * If users hand-code NGO stores they should insert the fence 650 // therefore no incomplete unordered stores should be visible. 651 #else 652 // C74404 653 // This is to address non-temporal store instructions (sfence needed). 654 // The clflush instruction is addressed either (mfence needed). 655 // Probably the non-temporal load monvtdqa instruction should also be 656 // addressed. 657 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. 658 if (!__kmp_cpuinfo.initialized) { 659 __kmp_query_cpuid(&__kmp_cpuinfo); 660 } 661 if (!__kmp_cpuinfo.sse2) { 662 // CPU cannot execute SSE2 instructions. 663 } else { 664 #if KMP_COMPILER_ICC 665 _mm_mfence(); 666 #elif KMP_COMPILER_MSVC 667 MemoryBarrier(); 668 #else 669 __sync_synchronize(); 670 #endif // KMP_COMPILER_ICC 671 } 672 #endif // KMP_MIC 673 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 674 // Nothing to see here move along 675 #elif KMP_ARCH_PPC64 676 // Nothing needed here (we have a real MB above). 677 #if KMP_OS_CNK 678 // The flushing thread needs to yield here; this prevents a 679 // busy-waiting thread from saturating the pipeline. flush is 680 // often used in loops like this: 681 // while (!flag) { 682 // #pragma omp flush(flag) 683 // } 684 // and adding the yield here is good for at least a 10x speedup 685 // when running >2 threads per core (on the NAS LU benchmark). 686 __kmp_yield(); 687 #endif 688 #else 689 #error Unknown or unsupported architecture 690 #endif 691 692 #if OMPT_SUPPORT && OMPT_OPTIONAL 693 if (ompt_enabled.ompt_callback_flush) { 694 ompt_callbacks.ompt_callback(ompt_callback_flush)( 695 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 696 } 697 #endif 698 } 699 700 /* -------------------------------------------------------------------------- */ 701 /*! 702 @ingroup SYNCHRONIZATION 703 @param loc source location information 704 @param global_tid thread id. 705 706 Execute a barrier. 707 */ 708 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 709 KMP_COUNT_BLOCK(OMP_BARRIER); 710 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 711 712 if (!TCR_4(__kmp_init_parallel)) 713 __kmp_parallel_initialize(); 714 715 #if OMP_50_ENABLED 716 __kmp_resume_if_soft_paused(); 717 #endif 718 719 if (__kmp_env_consistency_check) { 720 if (loc == 0) { 721 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 722 } 723 724 __kmp_check_barrier(global_tid, ct_barrier, loc); 725 } 726 727 #if OMPT_SUPPORT 728 ompt_frame_t *ompt_frame; 729 if (ompt_enabled.enabled) { 730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 731 if (ompt_frame->enter_frame.ptr == NULL) 732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 733 OMPT_STORE_RETURN_ADDRESS(global_tid); 734 } 735 #endif 736 __kmp_threads[global_tid]->th.th_ident = loc; 737 // TODO: explicit barrier_wait_id: 738 // this function is called when 'barrier' directive is present or 739 // implicit barrier at the end of a worksharing construct. 740 // 1) better to add a per-thread barrier counter to a thread data structure 741 // 2) set to 0 when a new team is created 742 // 4) no sync is required 743 744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 745 #if OMPT_SUPPORT && OMPT_OPTIONAL 746 if (ompt_enabled.enabled) { 747 ompt_frame->enter_frame = ompt_data_none; 748 } 749 #endif 750 } 751 752 /* The BARRIER for a MASTER section is always explicit */ 753 /*! 754 @ingroup WORK_SHARING 755 @param loc source location information. 756 @param global_tid global thread number . 757 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 758 */ 759 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 760 int status = 0; 761 762 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 763 764 if (!TCR_4(__kmp_init_parallel)) 765 __kmp_parallel_initialize(); 766 767 #if OMP_50_ENABLED 768 __kmp_resume_if_soft_paused(); 769 #endif 770 771 if (KMP_MASTER_GTID(global_tid)) { 772 KMP_COUNT_BLOCK(OMP_MASTER); 773 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 774 status = 1; 775 } 776 777 #if OMPT_SUPPORT && OMPT_OPTIONAL 778 if (status) { 779 if (ompt_enabled.ompt_callback_master) { 780 kmp_info_t *this_thr = __kmp_threads[global_tid]; 781 kmp_team_t *team = this_thr->th.th_team; 782 783 int tid = __kmp_tid_from_gtid(global_tid); 784 ompt_callbacks.ompt_callback(ompt_callback_master)( 785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 787 OMPT_GET_RETURN_ADDRESS(0)); 788 } 789 } 790 #endif 791 792 if (__kmp_env_consistency_check) { 793 #if KMP_USE_DYNAMIC_LOCK 794 if (status) 795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 796 else 797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 798 #else 799 if (status) 800 __kmp_push_sync(global_tid, ct_master, loc, NULL); 801 else 802 __kmp_check_sync(global_tid, ct_master, loc, NULL); 803 #endif 804 } 805 806 return status; 807 } 808 809 /*! 810 @ingroup WORK_SHARING 811 @param loc source location information. 812 @param global_tid global thread number . 813 814 Mark the end of a <tt>master</tt> region. This should only be called by the 815 thread that executes the <tt>master</tt> region. 816 */ 817 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 818 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 819 820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 821 KMP_POP_PARTITIONED_TIMER(); 822 823 #if OMPT_SUPPORT && OMPT_OPTIONAL 824 kmp_info_t *this_thr = __kmp_threads[global_tid]; 825 kmp_team_t *team = this_thr->th.th_team; 826 if (ompt_enabled.ompt_callback_master) { 827 int tid = __kmp_tid_from_gtid(global_tid); 828 ompt_callbacks.ompt_callback(ompt_callback_master)( 829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 831 OMPT_GET_RETURN_ADDRESS(0)); 832 } 833 #endif 834 835 if (__kmp_env_consistency_check) { 836 if (global_tid < 0) 837 KMP_WARNING(ThreadIdentInvalid); 838 839 if (KMP_MASTER_GTID(global_tid)) 840 __kmp_pop_sync(global_tid, ct_master, loc); 841 } 842 } 843 844 /*! 845 @ingroup WORK_SHARING 846 @param loc source location information. 847 @param gtid global thread number. 848 849 Start execution of an <tt>ordered</tt> construct. 850 */ 851 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 852 int cid = 0; 853 kmp_info_t *th; 854 KMP_DEBUG_ASSERT(__kmp_init_serial); 855 856 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 857 858 if (!TCR_4(__kmp_init_parallel)) 859 __kmp_parallel_initialize(); 860 861 #if OMP_50_ENABLED 862 __kmp_resume_if_soft_paused(); 863 #endif 864 865 #if USE_ITT_BUILD 866 __kmp_itt_ordered_prep(gtid); 867 // TODO: ordered_wait_id 868 #endif /* USE_ITT_BUILD */ 869 870 th = __kmp_threads[gtid]; 871 872 #if OMPT_SUPPORT && OMPT_OPTIONAL 873 kmp_team_t *team; 874 ompt_wait_id_t lck; 875 void *codeptr_ra; 876 if (ompt_enabled.enabled) { 877 OMPT_STORE_RETURN_ADDRESS(gtid); 878 team = __kmp_team_from_gtid(gtid); 879 lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; 880 /* OMPT state update */ 881 th->th.ompt_thread_info.wait_id = lck; 882 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 883 884 /* OMPT event callback */ 885 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 886 if (ompt_enabled.ompt_callback_mutex_acquire) { 887 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 888 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, 889 (ompt_wait_id_t)lck, codeptr_ra); 890 } 891 } 892 #endif 893 894 if (th->th.th_dispatch->th_deo_fcn != 0) 895 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 896 else 897 __kmp_parallel_deo(>id, &cid, loc); 898 899 #if OMPT_SUPPORT && OMPT_OPTIONAL 900 if (ompt_enabled.enabled) { 901 /* OMPT state update */ 902 th->th.ompt_thread_info.state = ompt_state_work_parallel; 903 th->th.ompt_thread_info.wait_id = 0; 904 905 /* OMPT event callback */ 906 if (ompt_enabled.ompt_callback_mutex_acquired) { 907 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 908 ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); 909 } 910 } 911 #endif 912 913 #if USE_ITT_BUILD 914 __kmp_itt_ordered_start(gtid); 915 #endif /* USE_ITT_BUILD */ 916 } 917 918 /*! 919 @ingroup WORK_SHARING 920 @param loc source location information. 921 @param gtid global thread number. 922 923 End execution of an <tt>ordered</tt> construct. 924 */ 925 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 926 int cid = 0; 927 kmp_info_t *th; 928 929 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 930 931 #if USE_ITT_BUILD 932 __kmp_itt_ordered_end(gtid); 933 // TODO: ordered_wait_id 934 #endif /* USE_ITT_BUILD */ 935 936 th = __kmp_threads[gtid]; 937 938 if (th->th.th_dispatch->th_dxo_fcn != 0) 939 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 940 else 941 __kmp_parallel_dxo(>id, &cid, loc); 942 943 #if OMPT_SUPPORT && OMPT_OPTIONAL 944 OMPT_STORE_RETURN_ADDRESS(gtid); 945 if (ompt_enabled.ompt_callback_mutex_released) { 946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 947 ompt_mutex_ordered, 948 (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, 949 OMPT_LOAD_RETURN_ADDRESS(gtid)); 950 } 951 #endif 952 } 953 954 #if KMP_USE_DYNAMIC_LOCK 955 956 static __forceinline void 957 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 958 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 959 // Pointer to the allocated indirect lock is written to crit, while indexing 960 // is ignored. 961 void *idx; 962 kmp_indirect_lock_t **lck; 963 lck = (kmp_indirect_lock_t **)crit; 964 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 965 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 966 KMP_SET_I_LOCK_LOCATION(ilk, loc); 967 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 968 KA_TRACE(20, 969 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 970 #if USE_ITT_BUILD 971 __kmp_itt_critical_creating(ilk->lock, loc); 972 #endif 973 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 974 if (status == 0) { 975 #if USE_ITT_BUILD 976 __kmp_itt_critical_destroyed(ilk->lock); 977 #endif 978 // We don't really need to destroy the unclaimed lock here since it will be 979 // cleaned up at program exit. 980 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 981 } 982 KMP_DEBUG_ASSERT(*lck != NULL); 983 } 984 985 // Fast-path acquire tas lock 986 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 987 { \ 988 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 989 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 990 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 991 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 992 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 993 kmp_uint32 spins; \ 994 KMP_FSYNC_PREPARE(l); \ 995 KMP_INIT_YIELD(spins); \ 996 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 997 do { \ 998 if (TCR_4(__kmp_nth) > \ 999 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1000 KMP_YIELD(TRUE); \ 1001 } else { \ 1002 KMP_YIELD_SPIN(spins); \ 1003 } \ 1004 __kmp_spin_backoff(&backoff); \ 1005 } while ( \ 1006 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1007 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1008 } \ 1009 KMP_FSYNC_ACQUIRED(l); \ 1010 } 1011 1012 // Fast-path test tas lock 1013 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1014 { \ 1015 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1016 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1017 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1018 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1019 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1020 } 1021 1022 // Fast-path release tas lock 1023 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1024 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1025 1026 #if KMP_USE_FUTEX 1027 1028 #include <sys/syscall.h> 1029 #include <unistd.h> 1030 #ifndef FUTEX_WAIT 1031 #define FUTEX_WAIT 0 1032 #endif 1033 #ifndef FUTEX_WAKE 1034 #define FUTEX_WAKE 1 1035 #endif 1036 1037 // Fast-path acquire futex lock 1038 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1039 { \ 1040 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1041 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1042 KMP_MB(); \ 1043 KMP_FSYNC_PREPARE(ftx); \ 1044 kmp_int32 poll_val; \ 1045 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1046 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1047 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1048 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1049 if (!cond) { \ 1050 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1051 poll_val | \ 1052 KMP_LOCK_BUSY(1, futex))) { \ 1053 continue; \ 1054 } \ 1055 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1056 } \ 1057 kmp_int32 rc; \ 1058 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1059 NULL, NULL, 0)) != 0) { \ 1060 continue; \ 1061 } \ 1062 gtid_code |= 1; \ 1063 } \ 1064 KMP_FSYNC_ACQUIRED(ftx); \ 1065 } 1066 1067 // Fast-path test futex lock 1068 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1069 { \ 1070 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1071 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1072 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1073 KMP_FSYNC_ACQUIRED(ftx); \ 1074 rc = TRUE; \ 1075 } else { \ 1076 rc = FALSE; \ 1077 } \ 1078 } 1079 1080 // Fast-path release futex lock 1081 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1082 { \ 1083 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1084 KMP_MB(); \ 1085 KMP_FSYNC_RELEASING(ftx); \ 1086 kmp_int32 poll_val = \ 1087 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1088 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1089 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1090 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1091 } \ 1092 KMP_MB(); \ 1093 KMP_YIELD_OVERSUB(); \ 1094 } 1095 1096 #endif // KMP_USE_FUTEX 1097 1098 #else // KMP_USE_DYNAMIC_LOCK 1099 1100 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1101 ident_t const *loc, 1102 kmp_int32 gtid) { 1103 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1104 1105 // Because of the double-check, the following load doesn't need to be volatile 1106 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1107 1108 if (lck == NULL) { 1109 void *idx; 1110 1111 // Allocate & initialize the lock. 1112 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1113 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1114 __kmp_init_user_lock_with_checks(lck); 1115 __kmp_set_user_lock_location(lck, loc); 1116 #if USE_ITT_BUILD 1117 __kmp_itt_critical_creating(lck); 1118 // __kmp_itt_critical_creating() should be called *before* the first usage 1119 // of underlying lock. It is the only place where we can guarantee it. There 1120 // are chances the lock will destroyed with no usage, but it is not a 1121 // problem, because this is not real event seen by user but rather setting 1122 // name for object (lock). See more details in kmp_itt.h. 1123 #endif /* USE_ITT_BUILD */ 1124 1125 // Use a cmpxchg instruction to slam the start of the critical section with 1126 // the lock pointer. If another thread beat us to it, deallocate the lock, 1127 // and use the lock that the other thread allocated. 1128 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1129 1130 if (status == 0) { 1131 // Deallocate the lock and reload the value. 1132 #if USE_ITT_BUILD 1133 __kmp_itt_critical_destroyed(lck); 1134 // Let ITT know the lock is destroyed and the same memory location may be reused 1135 // for another purpose. 1136 #endif /* USE_ITT_BUILD */ 1137 __kmp_destroy_user_lock_with_checks(lck); 1138 __kmp_user_lock_free(&idx, gtid, lck); 1139 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1140 KMP_DEBUG_ASSERT(lck != NULL); 1141 } 1142 } 1143 return lck; 1144 } 1145 1146 #endif // KMP_USE_DYNAMIC_LOCK 1147 1148 /*! 1149 @ingroup WORK_SHARING 1150 @param loc source location information. 1151 @param global_tid global thread number . 1152 @param crit identity of the critical section. This could be a pointer to a lock 1153 associated with the critical section, or some other suitably unique value. 1154 1155 Enter code protected by a `critical` construct. 1156 This function blocks until the executing thread can enter the critical section. 1157 */ 1158 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1159 kmp_critical_name *crit) { 1160 #if KMP_USE_DYNAMIC_LOCK 1161 #if OMPT_SUPPORT && OMPT_OPTIONAL 1162 OMPT_STORE_RETURN_ADDRESS(global_tid); 1163 #endif // OMPT_SUPPORT 1164 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1165 #else 1166 KMP_COUNT_BLOCK(OMP_CRITICAL); 1167 #if OMPT_SUPPORT && OMPT_OPTIONAL 1168 ompt_state_t prev_state = ompt_state_undefined; 1169 ompt_thread_info_t ti; 1170 #endif 1171 kmp_user_lock_p lck; 1172 1173 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1174 1175 // TODO: add THR_OVHD_STATE 1176 1177 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1178 KMP_CHECK_USER_LOCK_INIT(); 1179 1180 if ((__kmp_user_lock_kind == lk_tas) && 1181 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1182 lck = (kmp_user_lock_p)crit; 1183 } 1184 #if KMP_USE_FUTEX 1185 else if ((__kmp_user_lock_kind == lk_futex) && 1186 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1187 lck = (kmp_user_lock_p)crit; 1188 } 1189 #endif 1190 else { // ticket, queuing or drdpa 1191 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1192 } 1193 1194 if (__kmp_env_consistency_check) 1195 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1196 1197 // since the critical directive binds to all threads, not just the current 1198 // team we have to check this even if we are in a serialized team. 1199 // also, even if we are the uber thread, we still have to conduct the lock, 1200 // as we have to contend with sibling threads. 1201 1202 #if USE_ITT_BUILD 1203 __kmp_itt_critical_acquiring(lck); 1204 #endif /* USE_ITT_BUILD */ 1205 #if OMPT_SUPPORT && OMPT_OPTIONAL 1206 OMPT_STORE_RETURN_ADDRESS(gtid); 1207 void *codeptr_ra = NULL; 1208 if (ompt_enabled.enabled) { 1209 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1210 /* OMPT state update */ 1211 prev_state = ti.state; 1212 ti.wait_id = (ompt_wait_id_t)lck; 1213 ti.state = ompt_state_wait_critical; 1214 1215 /* OMPT event callback */ 1216 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1217 if (ompt_enabled.ompt_callback_mutex_acquire) { 1218 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1219 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1220 (ompt_wait_id_t)crit, codeptr_ra); 1221 } 1222 } 1223 #endif 1224 // Value of 'crit' should be good for using as a critical_id of the critical 1225 // section directive. 1226 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1227 1228 #if USE_ITT_BUILD 1229 __kmp_itt_critical_acquired(lck); 1230 #endif /* USE_ITT_BUILD */ 1231 #if OMPT_SUPPORT && OMPT_OPTIONAL 1232 if (ompt_enabled.enabled) { 1233 /* OMPT state update */ 1234 ti.state = prev_state; 1235 ti.wait_id = 0; 1236 1237 /* OMPT event callback */ 1238 if (ompt_enabled.ompt_callback_mutex_acquired) { 1239 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1240 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); 1241 } 1242 } 1243 #endif 1244 KMP_POP_PARTITIONED_TIMER(); 1245 1246 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1247 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1248 #endif // KMP_USE_DYNAMIC_LOCK 1249 } 1250 1251 #if KMP_USE_DYNAMIC_LOCK 1252 1253 // Converts the given hint to an internal lock implementation 1254 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1255 #if KMP_USE_TSX 1256 #define KMP_TSX_LOCK(seq) lockseq_##seq 1257 #else 1258 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1259 #endif 1260 1261 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1262 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1263 #else 1264 #define KMP_CPUINFO_RTM 0 1265 #endif 1266 1267 // Hints that do not require further logic 1268 if (hint & kmp_lock_hint_hle) 1269 return KMP_TSX_LOCK(hle); 1270 if (hint & kmp_lock_hint_rtm) 1271 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; 1272 if (hint & kmp_lock_hint_adaptive) 1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1274 1275 // Rule out conflicting hints first by returning the default lock 1276 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1277 return __kmp_user_lock_seq; 1278 if ((hint & omp_lock_hint_speculative) && 1279 (hint & omp_lock_hint_nonspeculative)) 1280 return __kmp_user_lock_seq; 1281 1282 // Do not even consider speculation when it appears to be contended 1283 if (hint & omp_lock_hint_contended) 1284 return lockseq_queuing; 1285 1286 // Uncontended lock without speculation 1287 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1288 return lockseq_tas; 1289 1290 // HLE lock for speculation 1291 if (hint & omp_lock_hint_speculative) 1292 return KMP_TSX_LOCK(hle); 1293 1294 return __kmp_user_lock_seq; 1295 } 1296 1297 #if OMPT_SUPPORT && OMPT_OPTIONAL 1298 #if KMP_USE_DYNAMIC_LOCK 1299 static kmp_mutex_impl_t 1300 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1301 if (user_lock) { 1302 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1303 case 0: 1304 break; 1305 #if KMP_USE_FUTEX 1306 case locktag_futex: 1307 return kmp_mutex_impl_queuing; 1308 #endif 1309 case locktag_tas: 1310 return kmp_mutex_impl_spin; 1311 #if KMP_USE_TSX 1312 case locktag_hle: 1313 return kmp_mutex_impl_speculative; 1314 #endif 1315 default: 1316 return kmp_mutex_impl_none; 1317 } 1318 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1319 } 1320 KMP_ASSERT(ilock); 1321 switch (ilock->type) { 1322 #if KMP_USE_TSX 1323 case locktag_adaptive: 1324 case locktag_rtm: 1325 return kmp_mutex_impl_speculative; 1326 #endif 1327 case locktag_nested_tas: 1328 return kmp_mutex_impl_spin; 1329 #if KMP_USE_FUTEX 1330 case locktag_nested_futex: 1331 #endif 1332 case locktag_ticket: 1333 case locktag_queuing: 1334 case locktag_drdpa: 1335 case locktag_nested_ticket: 1336 case locktag_nested_queuing: 1337 case locktag_nested_drdpa: 1338 return kmp_mutex_impl_queuing; 1339 default: 1340 return kmp_mutex_impl_none; 1341 } 1342 } 1343 #else 1344 // For locks without dynamic binding 1345 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1346 switch (__kmp_user_lock_kind) { 1347 case lk_tas: 1348 return kmp_mutex_impl_spin; 1349 #if KMP_USE_FUTEX 1350 case lk_futex: 1351 #endif 1352 case lk_ticket: 1353 case lk_queuing: 1354 case lk_drdpa: 1355 return kmp_mutex_impl_queuing; 1356 #if KMP_USE_TSX 1357 case lk_hle: 1358 case lk_rtm: 1359 case lk_adaptive: 1360 return kmp_mutex_impl_speculative; 1361 #endif 1362 default: 1363 return kmp_mutex_impl_none; 1364 } 1365 } 1366 #endif // KMP_USE_DYNAMIC_LOCK 1367 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1368 1369 /*! 1370 @ingroup WORK_SHARING 1371 @param loc source location information. 1372 @param global_tid global thread number. 1373 @param crit identity of the critical section. This could be a pointer to a lock 1374 associated with the critical section, or some other suitably unique value. 1375 @param hint the lock hint. 1376 1377 Enter code protected by a `critical` construct with a hint. The hint value is 1378 used to suggest a lock implementation. This function blocks until the executing 1379 thread can enter the critical section unless the hint suggests use of 1380 speculative execution and the hardware supports it. 1381 */ 1382 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1383 kmp_critical_name *crit, uint32_t hint) { 1384 KMP_COUNT_BLOCK(OMP_CRITICAL); 1385 kmp_user_lock_p lck; 1386 #if OMPT_SUPPORT && OMPT_OPTIONAL 1387 ompt_state_t prev_state = ompt_state_undefined; 1388 ompt_thread_info_t ti; 1389 // This is the case, if called from __kmpc_critical: 1390 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1391 if (!codeptr) 1392 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1393 #endif 1394 1395 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1396 1397 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1398 // Check if it is initialized. 1399 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1400 if (*lk == 0) { 1401 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); 1402 if (KMP_IS_D_LOCK(lckseq)) { 1403 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1404 KMP_GET_D_TAG(lckseq)); 1405 } else { 1406 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); 1407 } 1408 } 1409 // Branch for accessing the actual lock object and set operation. This 1410 // branching is inevitable since this lock initialization does not follow the 1411 // normal dispatch path (lock table is not used). 1412 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1413 lck = (kmp_user_lock_p)lk; 1414 if (__kmp_env_consistency_check) { 1415 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1416 __kmp_map_hint_to_lock(hint)); 1417 } 1418 #if USE_ITT_BUILD 1419 __kmp_itt_critical_acquiring(lck); 1420 #endif 1421 #if OMPT_SUPPORT && OMPT_OPTIONAL 1422 if (ompt_enabled.enabled) { 1423 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1424 /* OMPT state update */ 1425 prev_state = ti.state; 1426 ti.wait_id = (ompt_wait_id_t)lck; 1427 ti.state = ompt_state_wait_critical; 1428 1429 /* OMPT event callback */ 1430 if (ompt_enabled.ompt_callback_mutex_acquire) { 1431 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1432 ompt_mutex_critical, (unsigned int)hint, 1433 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); 1434 } 1435 } 1436 #endif 1437 #if KMP_USE_INLINED_TAS 1438 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1439 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1440 } else 1441 #elif KMP_USE_INLINED_FUTEX 1442 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1443 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1444 } else 1445 #endif 1446 { 1447 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1448 } 1449 } else { 1450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1451 lck = ilk->lock; 1452 if (__kmp_env_consistency_check) { 1453 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1454 __kmp_map_hint_to_lock(hint)); 1455 } 1456 #if USE_ITT_BUILD 1457 __kmp_itt_critical_acquiring(lck); 1458 #endif 1459 #if OMPT_SUPPORT && OMPT_OPTIONAL 1460 if (ompt_enabled.enabled) { 1461 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1462 /* OMPT state update */ 1463 prev_state = ti.state; 1464 ti.wait_id = (ompt_wait_id_t)lck; 1465 ti.state = ompt_state_wait_critical; 1466 1467 /* OMPT event callback */ 1468 if (ompt_enabled.ompt_callback_mutex_acquire) { 1469 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1470 ompt_mutex_critical, (unsigned int)hint, 1471 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); 1472 } 1473 } 1474 #endif 1475 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1476 } 1477 KMP_POP_PARTITIONED_TIMER(); 1478 1479 #if USE_ITT_BUILD 1480 __kmp_itt_critical_acquired(lck); 1481 #endif /* USE_ITT_BUILD */ 1482 #if OMPT_SUPPORT && OMPT_OPTIONAL 1483 if (ompt_enabled.enabled) { 1484 /* OMPT state update */ 1485 ti.state = prev_state; 1486 ti.wait_id = 0; 1487 1488 /* OMPT event callback */ 1489 if (ompt_enabled.ompt_callback_mutex_acquired) { 1490 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1491 ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); 1492 } 1493 } 1494 #endif 1495 1496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1497 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1498 } // __kmpc_critical_with_hint 1499 1500 #endif // KMP_USE_DYNAMIC_LOCK 1501 1502 /*! 1503 @ingroup WORK_SHARING 1504 @param loc source location information. 1505 @param global_tid global thread number . 1506 @param crit identity of the critical section. This could be a pointer to a lock 1507 associated with the critical section, or some other suitably unique value. 1508 1509 Leave a critical section, releasing any lock that was held during its execution. 1510 */ 1511 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1512 kmp_critical_name *crit) { 1513 kmp_user_lock_p lck; 1514 1515 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1516 1517 #if KMP_USE_DYNAMIC_LOCK 1518 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 1519 lck = (kmp_user_lock_p)crit; 1520 KMP_ASSERT(lck != NULL); 1521 if (__kmp_env_consistency_check) { 1522 __kmp_pop_sync(global_tid, ct_critical, loc); 1523 } 1524 #if USE_ITT_BUILD 1525 __kmp_itt_critical_releasing(lck); 1526 #endif 1527 #if KMP_USE_INLINED_TAS 1528 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { 1529 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1530 } else 1531 #elif KMP_USE_INLINED_FUTEX 1532 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { 1533 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1534 } else 1535 #endif 1536 { 1537 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1538 } 1539 } else { 1540 kmp_indirect_lock_t *ilk = 1541 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1542 KMP_ASSERT(ilk != NULL); 1543 lck = ilk->lock; 1544 if (__kmp_env_consistency_check) { 1545 __kmp_pop_sync(global_tid, ct_critical, loc); 1546 } 1547 #if USE_ITT_BUILD 1548 __kmp_itt_critical_releasing(lck); 1549 #endif 1550 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1551 } 1552 1553 #else // KMP_USE_DYNAMIC_LOCK 1554 1555 if ((__kmp_user_lock_kind == lk_tas) && 1556 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1557 lck = (kmp_user_lock_p)crit; 1558 } 1559 #if KMP_USE_FUTEX 1560 else if ((__kmp_user_lock_kind == lk_futex) && 1561 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1562 lck = (kmp_user_lock_p)crit; 1563 } 1564 #endif 1565 else { // ticket, queuing or drdpa 1566 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1567 } 1568 1569 KMP_ASSERT(lck != NULL); 1570 1571 if (__kmp_env_consistency_check) 1572 __kmp_pop_sync(global_tid, ct_critical, loc); 1573 1574 #if USE_ITT_BUILD 1575 __kmp_itt_critical_releasing(lck); 1576 #endif /* USE_ITT_BUILD */ 1577 // Value of 'crit' should be good for using as a critical_id of the critical 1578 // section directive. 1579 __kmp_release_user_lock_with_checks(lck, global_tid); 1580 1581 #endif // KMP_USE_DYNAMIC_LOCK 1582 1583 #if OMPT_SUPPORT && OMPT_OPTIONAL 1584 /* OMPT release event triggers after lock is released; place here to trigger 1585 * for all #if branches */ 1586 OMPT_STORE_RETURN_ADDRESS(global_tid); 1587 if (ompt_enabled.ompt_callback_mutex_released) { 1588 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1589 ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); 1590 } 1591 #endif 1592 1593 KMP_POP_PARTITIONED_TIMER(); 1594 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1595 } 1596 1597 /*! 1598 @ingroup SYNCHRONIZATION 1599 @param loc source location information 1600 @param global_tid thread id. 1601 @return one if the thread should execute the master block, zero otherwise 1602 1603 Start execution of a combined barrier and master. The barrier is executed inside 1604 this function. 1605 */ 1606 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1607 int status; 1608 1609 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1610 1611 if (!TCR_4(__kmp_init_parallel)) 1612 __kmp_parallel_initialize(); 1613 1614 #if OMP_50_ENABLED 1615 __kmp_resume_if_soft_paused(); 1616 #endif 1617 1618 if (__kmp_env_consistency_check) 1619 __kmp_check_barrier(global_tid, ct_barrier, loc); 1620 1621 #if OMPT_SUPPORT 1622 ompt_frame_t *ompt_frame; 1623 if (ompt_enabled.enabled) { 1624 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1625 if (ompt_frame->enter_frame.ptr == NULL) 1626 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1627 OMPT_STORE_RETURN_ADDRESS(global_tid); 1628 } 1629 #endif 1630 #if USE_ITT_NOTIFY 1631 __kmp_threads[global_tid]->th.th_ident = loc; 1632 #endif 1633 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1634 #if OMPT_SUPPORT && OMPT_OPTIONAL 1635 if (ompt_enabled.enabled) { 1636 ompt_frame->enter_frame = ompt_data_none; 1637 } 1638 #endif 1639 1640 return (status != 0) ? 0 : 1; 1641 } 1642 1643 /*! 1644 @ingroup SYNCHRONIZATION 1645 @param loc source location information 1646 @param global_tid thread id. 1647 1648 Complete the execution of a combined barrier and master. This function should 1649 only be called at the completion of the <tt>master</tt> code. Other threads will 1650 still be waiting at the barrier and this call releases them. 1651 */ 1652 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1653 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1654 1655 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1656 } 1657 1658 /*! 1659 @ingroup SYNCHRONIZATION 1660 @param loc source location information 1661 @param global_tid thread id. 1662 @return one if the thread should execute the master block, zero otherwise 1663 1664 Start execution of a combined barrier and master(nowait) construct. 1665 The barrier is executed inside this function. 1666 There is no equivalent "end" function, since the 1667 */ 1668 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1669 kmp_int32 ret; 1670 1671 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1672 1673 if (!TCR_4(__kmp_init_parallel)) 1674 __kmp_parallel_initialize(); 1675 1676 #if OMP_50_ENABLED 1677 __kmp_resume_if_soft_paused(); 1678 #endif 1679 1680 if (__kmp_env_consistency_check) { 1681 if (loc == 0) { 1682 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1683 } 1684 __kmp_check_barrier(global_tid, ct_barrier, loc); 1685 } 1686 1687 #if OMPT_SUPPORT 1688 ompt_frame_t *ompt_frame; 1689 if (ompt_enabled.enabled) { 1690 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1691 if (ompt_frame->enter_frame.ptr == NULL) 1692 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1693 OMPT_STORE_RETURN_ADDRESS(global_tid); 1694 } 1695 #endif 1696 #if USE_ITT_NOTIFY 1697 __kmp_threads[global_tid]->th.th_ident = loc; 1698 #endif 1699 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1700 #if OMPT_SUPPORT && OMPT_OPTIONAL 1701 if (ompt_enabled.enabled) { 1702 ompt_frame->enter_frame = ompt_data_none; 1703 } 1704 #endif 1705 1706 ret = __kmpc_master(loc, global_tid); 1707 1708 if (__kmp_env_consistency_check) { 1709 /* there's no __kmpc_end_master called; so the (stats) */ 1710 /* actions of __kmpc_end_master are done here */ 1711 1712 if (global_tid < 0) { 1713 KMP_WARNING(ThreadIdentInvalid); 1714 } 1715 if (ret) { 1716 /* only one thread should do the pop since only */ 1717 /* one did the push (see __kmpc_master()) */ 1718 1719 __kmp_pop_sync(global_tid, ct_master, loc); 1720 } 1721 } 1722 1723 return (ret); 1724 } 1725 1726 /* The BARRIER for a SINGLE process section is always explicit */ 1727 /*! 1728 @ingroup WORK_SHARING 1729 @param loc source location information 1730 @param global_tid global thread number 1731 @return One if this thread should execute the single construct, zero otherwise. 1732 1733 Test whether to execute a <tt>single</tt> construct. 1734 There are no implicit barriers in the two "single" calls, rather the compiler 1735 should introduce an explicit barrier if it is required. 1736 */ 1737 1738 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1739 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1740 1741 if (rc) { 1742 // We are going to execute the single statement, so we should count it. 1743 KMP_COUNT_BLOCK(OMP_SINGLE); 1744 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1745 } 1746 1747 #if OMPT_SUPPORT && OMPT_OPTIONAL 1748 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1749 kmp_team_t *team = this_thr->th.th_team; 1750 int tid = __kmp_tid_from_gtid(global_tid); 1751 1752 if (ompt_enabled.enabled) { 1753 if (rc) { 1754 if (ompt_enabled.ompt_callback_work) { 1755 ompt_callbacks.ompt_callback(ompt_callback_work)( 1756 ompt_work_single_executor, ompt_scope_begin, 1757 &(team->t.ompt_team_info.parallel_data), 1758 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1759 1, OMPT_GET_RETURN_ADDRESS(0)); 1760 } 1761 } else { 1762 if (ompt_enabled.ompt_callback_work) { 1763 ompt_callbacks.ompt_callback(ompt_callback_work)( 1764 ompt_work_single_other, ompt_scope_begin, 1765 &(team->t.ompt_team_info.parallel_data), 1766 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1767 1, OMPT_GET_RETURN_ADDRESS(0)); 1768 ompt_callbacks.ompt_callback(ompt_callback_work)( 1769 ompt_work_single_other, ompt_scope_end, 1770 &(team->t.ompt_team_info.parallel_data), 1771 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1772 1, OMPT_GET_RETURN_ADDRESS(0)); 1773 } 1774 } 1775 } 1776 #endif 1777 1778 return rc; 1779 } 1780 1781 /*! 1782 @ingroup WORK_SHARING 1783 @param loc source location information 1784 @param global_tid global thread number 1785 1786 Mark the end of a <tt>single</tt> construct. This function should 1787 only be called by the thread that executed the block of code protected 1788 by the `single` construct. 1789 */ 1790 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1791 __kmp_exit_single(global_tid); 1792 KMP_POP_PARTITIONED_TIMER(); 1793 1794 #if OMPT_SUPPORT && OMPT_OPTIONAL 1795 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1796 kmp_team_t *team = this_thr->th.th_team; 1797 int tid = __kmp_tid_from_gtid(global_tid); 1798 1799 if (ompt_enabled.ompt_callback_work) { 1800 ompt_callbacks.ompt_callback(ompt_callback_work)( 1801 ompt_work_single_executor, ompt_scope_end, 1802 &(team->t.ompt_team_info.parallel_data), 1803 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1804 OMPT_GET_RETURN_ADDRESS(0)); 1805 } 1806 #endif 1807 } 1808 1809 /*! 1810 @ingroup WORK_SHARING 1811 @param loc Source location 1812 @param global_tid Global thread id 1813 1814 Mark the end of a statically scheduled loop. 1815 */ 1816 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1817 KMP_POP_PARTITIONED_TIMER(); 1818 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1819 1820 #if OMPT_SUPPORT && OMPT_OPTIONAL 1821 if (ompt_enabled.ompt_callback_work) { 1822 ompt_work_t ompt_work_type = ompt_work_loop; 1823 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1824 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1825 // Determine workshare type 1826 if (loc != NULL) { 1827 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1828 ompt_work_type = ompt_work_loop; 1829 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1830 ompt_work_type = ompt_work_sections; 1831 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1832 ompt_work_type = ompt_work_distribute; 1833 } else { 1834 // use default set above. 1835 // a warning about this case is provided in __kmpc_for_static_init 1836 } 1837 KMP_DEBUG_ASSERT(ompt_work_type); 1838 } 1839 ompt_callbacks.ompt_callback(ompt_callback_work)( 1840 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1841 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1842 } 1843 #endif 1844 if (__kmp_env_consistency_check) 1845 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1846 } 1847 1848 // User routines which take C-style arguments (call by value) 1849 // different from the Fortran equivalent routines 1850 1851 void ompc_set_num_threads(int arg) { 1852 // !!!!! TODO: check the per-task binding 1853 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1854 } 1855 1856 void ompc_set_dynamic(int flag) { 1857 kmp_info_t *thread; 1858 1859 /* For the thread-private implementation of the internal controls */ 1860 thread = __kmp_entry_thread(); 1861 1862 __kmp_save_internal_controls(thread); 1863 1864 set__dynamic(thread, flag ? TRUE : FALSE); 1865 } 1866 1867 void ompc_set_nested(int flag) { 1868 kmp_info_t *thread; 1869 1870 /* For the thread-private internal controls implementation */ 1871 thread = __kmp_entry_thread(); 1872 1873 __kmp_save_internal_controls(thread); 1874 1875 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 1876 } 1877 1878 void ompc_set_max_active_levels(int max_active_levels) { 1879 /* TO DO */ 1880 /* we want per-task implementation of this internal control */ 1881 1882 /* For the per-thread internal controls implementation */ 1883 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 1884 } 1885 1886 void ompc_set_schedule(omp_sched_t kind, int modifier) { 1887 // !!!!! TODO: check the per-task binding 1888 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 1889 } 1890 1891 int ompc_get_ancestor_thread_num(int level) { 1892 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 1893 } 1894 1895 int ompc_get_team_size(int level) { 1896 return __kmp_get_team_size(__kmp_entry_gtid(), level); 1897 } 1898 1899 #if OMP_50_ENABLED 1900 /* OpenMP 5.0 Affinity Format API */ 1901 1902 void ompc_set_affinity_format(char const *format) { 1903 if (!__kmp_init_serial) { 1904 __kmp_serial_initialize(); 1905 } 1906 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 1907 format, KMP_STRLEN(format) + 1); 1908 } 1909 1910 size_t ompc_get_affinity_format(char *buffer, size_t size) { 1911 size_t format_size; 1912 if (!__kmp_init_serial) { 1913 __kmp_serial_initialize(); 1914 } 1915 format_size = KMP_STRLEN(__kmp_affinity_format); 1916 if (buffer && size) { 1917 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 1918 format_size + 1); 1919 } 1920 return format_size; 1921 } 1922 1923 void ompc_display_affinity(char const *format) { 1924 int gtid; 1925 if (!TCR_4(__kmp_init_middle)) { 1926 __kmp_middle_initialize(); 1927 } 1928 gtid = __kmp_get_gtid(); 1929 __kmp_aux_display_affinity(gtid, format); 1930 } 1931 1932 size_t ompc_capture_affinity(char *buffer, size_t buf_size, 1933 char const *format) { 1934 int gtid; 1935 size_t num_required; 1936 kmp_str_buf_t capture_buf; 1937 if (!TCR_4(__kmp_init_middle)) { 1938 __kmp_middle_initialize(); 1939 } 1940 gtid = __kmp_get_gtid(); 1941 __kmp_str_buf_init(&capture_buf); 1942 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 1943 if (buffer && buf_size) { 1944 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 1945 capture_buf.used + 1); 1946 } 1947 __kmp_str_buf_free(&capture_buf); 1948 return num_required; 1949 } 1950 #endif /* OMP_50_ENABLED */ 1951 1952 void kmpc_set_stacksize(int arg) { 1953 // __kmp_aux_set_stacksize initializes the library if needed 1954 __kmp_aux_set_stacksize(arg); 1955 } 1956 1957 void kmpc_set_stacksize_s(size_t arg) { 1958 // __kmp_aux_set_stacksize initializes the library if needed 1959 __kmp_aux_set_stacksize(arg); 1960 } 1961 1962 void kmpc_set_blocktime(int arg) { 1963 int gtid, tid; 1964 kmp_info_t *thread; 1965 1966 gtid = __kmp_entry_gtid(); 1967 tid = __kmp_tid_from_gtid(gtid); 1968 thread = __kmp_thread_from_gtid(gtid); 1969 1970 __kmp_aux_set_blocktime(arg, thread, tid); 1971 } 1972 1973 void kmpc_set_library(int arg) { 1974 // __kmp_user_set_library initializes the library if needed 1975 __kmp_user_set_library((enum library_type)arg); 1976 } 1977 1978 void kmpc_set_defaults(char const *str) { 1979 // __kmp_aux_set_defaults initializes the library if needed 1980 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 1981 } 1982 1983 void kmpc_set_disp_num_buffers(int arg) { 1984 // ignore after initialization because some teams have already 1985 // allocated dispatch buffers 1986 if (__kmp_init_serial == 0 && arg > 0) 1987 __kmp_dispatch_num_buffers = arg; 1988 } 1989 1990 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 1991 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1992 return -1; 1993 #else 1994 if (!TCR_4(__kmp_init_middle)) { 1995 __kmp_middle_initialize(); 1996 } 1997 return __kmp_aux_set_affinity_mask_proc(proc, mask); 1998 #endif 1999 } 2000 2001 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2002 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2003 return -1; 2004 #else 2005 if (!TCR_4(__kmp_init_middle)) { 2006 __kmp_middle_initialize(); 2007 } 2008 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2009 #endif 2010 } 2011 2012 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2013 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2014 return -1; 2015 #else 2016 if (!TCR_4(__kmp_init_middle)) { 2017 __kmp_middle_initialize(); 2018 } 2019 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2020 #endif 2021 } 2022 2023 /* -------------------------------------------------------------------------- */ 2024 /*! 2025 @ingroup THREADPRIVATE 2026 @param loc source location information 2027 @param gtid global thread number 2028 @param cpy_size size of the cpy_data buffer 2029 @param cpy_data pointer to data to be copied 2030 @param cpy_func helper function to call for copying data 2031 @param didit flag variable: 1=single thread; 0=not single thread 2032 2033 __kmpc_copyprivate implements the interface for the private data broadcast 2034 needed for the copyprivate clause associated with a single region in an 2035 OpenMP<sup>*</sup> program (both C and Fortran). 2036 All threads participating in the parallel region call this routine. 2037 One of the threads (called the single thread) should have the <tt>didit</tt> 2038 variable set to 1 and all other threads should have that variable set to 0. 2039 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2040 2041 The OpenMP specification forbids the use of nowait on the single region when a 2042 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2043 barrier internally to avoid race conditions, so the code generation for the 2044 single region should avoid generating a barrier after the call to @ref 2045 __kmpc_copyprivate. 2046 2047 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2048 The <tt>loc</tt> parameter is a pointer to source location information. 2049 2050 Internal implementation: The single thread will first copy its descriptor 2051 address (cpy_data) to a team-private location, then the other threads will each 2052 call the function pointed to by the parameter cpy_func, which carries out the 2053 copy by copying the data using the cpy_data buffer. 2054 2055 The cpy_func routine used for the copy and the contents of the data area defined 2056 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2057 to be done. For instance, the cpy_data buffer can hold the actual data to be 2058 copied or it may hold a list of pointers to the data. The cpy_func routine must 2059 interpret the cpy_data buffer appropriately. 2060 2061 The interface to cpy_func is as follows: 2062 @code 2063 void cpy_func( void *destination, void *source ) 2064 @endcode 2065 where void *destination is the cpy_data pointer for the thread being copied to 2066 and void *source is the cpy_data pointer for the thread being copied from. 2067 */ 2068 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2069 void *cpy_data, void (*cpy_func)(void *, void *), 2070 kmp_int32 didit) { 2071 void **data_ptr; 2072 2073 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2074 2075 KMP_MB(); 2076 2077 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2078 2079 if (__kmp_env_consistency_check) { 2080 if (loc == 0) { 2081 KMP_WARNING(ConstructIdentInvalid); 2082 } 2083 } 2084 2085 // ToDo: Optimize the following two barriers into some kind of split barrier 2086 2087 if (didit) 2088 *data_ptr = cpy_data; 2089 2090 #if OMPT_SUPPORT 2091 ompt_frame_t *ompt_frame; 2092 if (ompt_enabled.enabled) { 2093 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2094 if (ompt_frame->enter_frame.ptr == NULL) 2095 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2096 OMPT_STORE_RETURN_ADDRESS(gtid); 2097 } 2098 #endif 2099 /* This barrier is not a barrier region boundary */ 2100 #if USE_ITT_NOTIFY 2101 __kmp_threads[gtid]->th.th_ident = loc; 2102 #endif 2103 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2104 2105 if (!didit) 2106 (*cpy_func)(cpy_data, *data_ptr); 2107 2108 // Consider next barrier a user-visible barrier for barrier region boundaries 2109 // Nesting checks are already handled by the single construct checks 2110 2111 #if OMPT_SUPPORT 2112 if (ompt_enabled.enabled) { 2113 OMPT_STORE_RETURN_ADDRESS(gtid); 2114 } 2115 #endif 2116 #if USE_ITT_NOTIFY 2117 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2118 // tasks can overwrite the location) 2119 #endif 2120 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2121 #if OMPT_SUPPORT && OMPT_OPTIONAL 2122 if (ompt_enabled.enabled) { 2123 ompt_frame->enter_frame = ompt_data_none; 2124 } 2125 #endif 2126 } 2127 2128 /* -------------------------------------------------------------------------- */ 2129 2130 #define INIT_LOCK __kmp_init_user_lock_with_checks 2131 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2132 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2133 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2134 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2135 #define ACQUIRE_NESTED_LOCK_TIMED \ 2136 __kmp_acquire_nested_user_lock_with_checks_timed 2137 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2138 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2139 #define TEST_LOCK __kmp_test_user_lock_with_checks 2140 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2141 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2142 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2143 2144 // TODO: Make check abort messages use location info & pass it into 2145 // with_checks routines 2146 2147 #if KMP_USE_DYNAMIC_LOCK 2148 2149 // internal lock initializer 2150 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2151 kmp_dyna_lockseq_t seq) { 2152 if (KMP_IS_D_LOCK(seq)) { 2153 KMP_INIT_D_LOCK(lock, seq); 2154 #if USE_ITT_BUILD 2155 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2156 #endif 2157 } else { 2158 KMP_INIT_I_LOCK(lock, seq); 2159 #if USE_ITT_BUILD 2160 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2161 __kmp_itt_lock_creating(ilk->lock, loc); 2162 #endif 2163 } 2164 } 2165 2166 // internal nest lock initializer 2167 static __forceinline void 2168 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2169 kmp_dyna_lockseq_t seq) { 2170 #if KMP_USE_TSX 2171 // Don't have nested lock implementation for speculative locks 2172 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) 2173 seq = __kmp_user_lock_seq; 2174 #endif 2175 switch (seq) { 2176 case lockseq_tas: 2177 seq = lockseq_nested_tas; 2178 break; 2179 #if KMP_USE_FUTEX 2180 case lockseq_futex: 2181 seq = lockseq_nested_futex; 2182 break; 2183 #endif 2184 case lockseq_ticket: 2185 seq = lockseq_nested_ticket; 2186 break; 2187 case lockseq_queuing: 2188 seq = lockseq_nested_queuing; 2189 break; 2190 case lockseq_drdpa: 2191 seq = lockseq_nested_drdpa; 2192 break; 2193 default: 2194 seq = lockseq_nested_queuing; 2195 } 2196 KMP_INIT_I_LOCK(lock, seq); 2197 #if USE_ITT_BUILD 2198 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2199 __kmp_itt_lock_creating(ilk->lock, loc); 2200 #endif 2201 } 2202 2203 /* initialize the lock with a hint */ 2204 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2205 uintptr_t hint) { 2206 KMP_DEBUG_ASSERT(__kmp_init_serial); 2207 if (__kmp_env_consistency_check && user_lock == NULL) { 2208 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2209 } 2210 2211 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2212 2213 #if OMPT_SUPPORT && OMPT_OPTIONAL 2214 // This is the case, if called from omp_init_lock_with_hint: 2215 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2216 if (!codeptr) 2217 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2218 if (ompt_enabled.ompt_callback_lock_init) { 2219 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2220 ompt_mutex_lock, (omp_lock_hint_t)hint, 2221 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2222 codeptr); 2223 } 2224 #endif 2225 } 2226 2227 /* initialize the lock with a hint */ 2228 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2229 void **user_lock, uintptr_t hint) { 2230 KMP_DEBUG_ASSERT(__kmp_init_serial); 2231 if (__kmp_env_consistency_check && user_lock == NULL) { 2232 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2233 } 2234 2235 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2236 2237 #if OMPT_SUPPORT && OMPT_OPTIONAL 2238 // This is the case, if called from omp_init_lock_with_hint: 2239 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2240 if (!codeptr) 2241 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2242 if (ompt_enabled.ompt_callback_lock_init) { 2243 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2244 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2245 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2246 codeptr); 2247 } 2248 #endif 2249 } 2250 2251 #endif // KMP_USE_DYNAMIC_LOCK 2252 2253 /* initialize the lock */ 2254 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2255 #if KMP_USE_DYNAMIC_LOCK 2256 2257 KMP_DEBUG_ASSERT(__kmp_init_serial); 2258 if (__kmp_env_consistency_check && user_lock == NULL) { 2259 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2260 } 2261 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2262 2263 #if OMPT_SUPPORT && OMPT_OPTIONAL 2264 // This is the case, if called from omp_init_lock_with_hint: 2265 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2266 if (!codeptr) 2267 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2268 if (ompt_enabled.ompt_callback_lock_init) { 2269 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2270 ompt_mutex_lock, omp_lock_hint_none, 2271 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2272 codeptr); 2273 } 2274 #endif 2275 2276 #else // KMP_USE_DYNAMIC_LOCK 2277 2278 static char const *const func = "omp_init_lock"; 2279 kmp_user_lock_p lck; 2280 KMP_DEBUG_ASSERT(__kmp_init_serial); 2281 2282 if (__kmp_env_consistency_check) { 2283 if (user_lock == NULL) { 2284 KMP_FATAL(LockIsUninitialized, func); 2285 } 2286 } 2287 2288 KMP_CHECK_USER_LOCK_INIT(); 2289 2290 if ((__kmp_user_lock_kind == lk_tas) && 2291 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2292 lck = (kmp_user_lock_p)user_lock; 2293 } 2294 #if KMP_USE_FUTEX 2295 else if ((__kmp_user_lock_kind == lk_futex) && 2296 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2297 lck = (kmp_user_lock_p)user_lock; 2298 } 2299 #endif 2300 else { 2301 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2302 } 2303 INIT_LOCK(lck); 2304 __kmp_set_user_lock_location(lck, loc); 2305 2306 #if OMPT_SUPPORT && OMPT_OPTIONAL 2307 // This is the case, if called from omp_init_lock_with_hint: 2308 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2309 if (!codeptr) 2310 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2311 if (ompt_enabled.ompt_callback_lock_init) { 2312 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2313 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2314 (ompt_wait_id_t)user_lock, codeptr); 2315 } 2316 #endif 2317 2318 #if USE_ITT_BUILD 2319 __kmp_itt_lock_creating(lck); 2320 #endif /* USE_ITT_BUILD */ 2321 2322 #endif // KMP_USE_DYNAMIC_LOCK 2323 } // __kmpc_init_lock 2324 2325 /* initialize the lock */ 2326 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2327 #if KMP_USE_DYNAMIC_LOCK 2328 2329 KMP_DEBUG_ASSERT(__kmp_init_serial); 2330 if (__kmp_env_consistency_check && user_lock == NULL) { 2331 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2332 } 2333 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2334 2335 #if OMPT_SUPPORT && OMPT_OPTIONAL 2336 // This is the case, if called from omp_init_lock_with_hint: 2337 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2338 if (!codeptr) 2339 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2340 if (ompt_enabled.ompt_callback_lock_init) { 2341 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2342 ompt_mutex_nest_lock, omp_lock_hint_none, 2343 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2344 codeptr); 2345 } 2346 #endif 2347 2348 #else // KMP_USE_DYNAMIC_LOCK 2349 2350 static char const *const func = "omp_init_nest_lock"; 2351 kmp_user_lock_p lck; 2352 KMP_DEBUG_ASSERT(__kmp_init_serial); 2353 2354 if (__kmp_env_consistency_check) { 2355 if (user_lock == NULL) { 2356 KMP_FATAL(LockIsUninitialized, func); 2357 } 2358 } 2359 2360 KMP_CHECK_USER_LOCK_INIT(); 2361 2362 if ((__kmp_user_lock_kind == lk_tas) && 2363 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2364 OMP_NEST_LOCK_T_SIZE)) { 2365 lck = (kmp_user_lock_p)user_lock; 2366 } 2367 #if KMP_USE_FUTEX 2368 else if ((__kmp_user_lock_kind == lk_futex) && 2369 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2370 OMP_NEST_LOCK_T_SIZE)) { 2371 lck = (kmp_user_lock_p)user_lock; 2372 } 2373 #endif 2374 else { 2375 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2376 } 2377 2378 INIT_NESTED_LOCK(lck); 2379 __kmp_set_user_lock_location(lck, loc); 2380 2381 #if OMPT_SUPPORT && OMPT_OPTIONAL 2382 // This is the case, if called from omp_init_lock_with_hint: 2383 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2384 if (!codeptr) 2385 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2386 if (ompt_enabled.ompt_callback_lock_init) { 2387 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2388 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2389 (ompt_wait_id_t)user_lock, codeptr); 2390 } 2391 #endif 2392 2393 #if USE_ITT_BUILD 2394 __kmp_itt_lock_creating(lck); 2395 #endif /* USE_ITT_BUILD */ 2396 2397 #endif // KMP_USE_DYNAMIC_LOCK 2398 } // __kmpc_init_nest_lock 2399 2400 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2401 #if KMP_USE_DYNAMIC_LOCK 2402 2403 #if USE_ITT_BUILD 2404 kmp_user_lock_p lck; 2405 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2406 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2407 } else { 2408 lck = (kmp_user_lock_p)user_lock; 2409 } 2410 __kmp_itt_lock_destroyed(lck); 2411 #endif 2412 #if OMPT_SUPPORT && OMPT_OPTIONAL 2413 // This is the case, if called from omp_init_lock_with_hint: 2414 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2415 if (!codeptr) 2416 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2417 if (ompt_enabled.ompt_callback_lock_destroy) { 2418 kmp_user_lock_p lck; 2419 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2420 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2421 } else { 2422 lck = (kmp_user_lock_p)user_lock; 2423 } 2424 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2425 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2426 } 2427 #endif 2428 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2429 #else 2430 kmp_user_lock_p lck; 2431 2432 if ((__kmp_user_lock_kind == lk_tas) && 2433 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2434 lck = (kmp_user_lock_p)user_lock; 2435 } 2436 #if KMP_USE_FUTEX 2437 else if ((__kmp_user_lock_kind == lk_futex) && 2438 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2439 lck = (kmp_user_lock_p)user_lock; 2440 } 2441 #endif 2442 else { 2443 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2444 } 2445 2446 #if OMPT_SUPPORT && OMPT_OPTIONAL 2447 // This is the case, if called from omp_init_lock_with_hint: 2448 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2449 if (!codeptr) 2450 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2451 if (ompt_enabled.ompt_callback_lock_destroy) { 2452 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2453 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2454 } 2455 #endif 2456 2457 #if USE_ITT_BUILD 2458 __kmp_itt_lock_destroyed(lck); 2459 #endif /* USE_ITT_BUILD */ 2460 DESTROY_LOCK(lck); 2461 2462 if ((__kmp_user_lock_kind == lk_tas) && 2463 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2464 ; 2465 } 2466 #if KMP_USE_FUTEX 2467 else if ((__kmp_user_lock_kind == lk_futex) && 2468 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2469 ; 2470 } 2471 #endif 2472 else { 2473 __kmp_user_lock_free(user_lock, gtid, lck); 2474 } 2475 #endif // KMP_USE_DYNAMIC_LOCK 2476 } // __kmpc_destroy_lock 2477 2478 /* destroy the lock */ 2479 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2480 #if KMP_USE_DYNAMIC_LOCK 2481 2482 #if USE_ITT_BUILD 2483 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2484 __kmp_itt_lock_destroyed(ilk->lock); 2485 #endif 2486 #if OMPT_SUPPORT && OMPT_OPTIONAL 2487 // This is the case, if called from omp_init_lock_with_hint: 2488 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2489 if (!codeptr) 2490 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2491 if (ompt_enabled.ompt_callback_lock_destroy) { 2492 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2493 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2494 } 2495 #endif 2496 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2497 2498 #else // KMP_USE_DYNAMIC_LOCK 2499 2500 kmp_user_lock_p lck; 2501 2502 if ((__kmp_user_lock_kind == lk_tas) && 2503 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2504 OMP_NEST_LOCK_T_SIZE)) { 2505 lck = (kmp_user_lock_p)user_lock; 2506 } 2507 #if KMP_USE_FUTEX 2508 else if ((__kmp_user_lock_kind == lk_futex) && 2509 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2510 OMP_NEST_LOCK_T_SIZE)) { 2511 lck = (kmp_user_lock_p)user_lock; 2512 } 2513 #endif 2514 else { 2515 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2516 } 2517 2518 #if OMPT_SUPPORT && OMPT_OPTIONAL 2519 // This is the case, if called from omp_init_lock_with_hint: 2520 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2521 if (!codeptr) 2522 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2523 if (ompt_enabled.ompt_callback_lock_destroy) { 2524 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2525 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2526 } 2527 #endif 2528 2529 #if USE_ITT_BUILD 2530 __kmp_itt_lock_destroyed(lck); 2531 #endif /* USE_ITT_BUILD */ 2532 2533 DESTROY_NESTED_LOCK(lck); 2534 2535 if ((__kmp_user_lock_kind == lk_tas) && 2536 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2537 OMP_NEST_LOCK_T_SIZE)) { 2538 ; 2539 } 2540 #if KMP_USE_FUTEX 2541 else if ((__kmp_user_lock_kind == lk_futex) && 2542 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2543 OMP_NEST_LOCK_T_SIZE)) { 2544 ; 2545 } 2546 #endif 2547 else { 2548 __kmp_user_lock_free(user_lock, gtid, lck); 2549 } 2550 #endif // KMP_USE_DYNAMIC_LOCK 2551 } // __kmpc_destroy_nest_lock 2552 2553 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2554 KMP_COUNT_BLOCK(OMP_set_lock); 2555 #if KMP_USE_DYNAMIC_LOCK 2556 int tag = KMP_EXTRACT_D_TAG(user_lock); 2557 #if USE_ITT_BUILD 2558 __kmp_itt_lock_acquiring( 2559 (kmp_user_lock_p) 2560 user_lock); // itt function will get to the right lock object. 2561 #endif 2562 #if OMPT_SUPPORT && OMPT_OPTIONAL 2563 // This is the case, if called from omp_init_lock_with_hint: 2564 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2565 if (!codeptr) 2566 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2567 if (ompt_enabled.ompt_callback_mutex_acquire) { 2568 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2569 ompt_mutex_lock, omp_lock_hint_none, 2570 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2571 codeptr); 2572 } 2573 #endif 2574 #if KMP_USE_INLINED_TAS 2575 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2576 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2577 } else 2578 #elif KMP_USE_INLINED_FUTEX 2579 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2580 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2581 } else 2582 #endif 2583 { 2584 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2585 } 2586 #if USE_ITT_BUILD 2587 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2588 #endif 2589 #if OMPT_SUPPORT && OMPT_OPTIONAL 2590 if (ompt_enabled.ompt_callback_mutex_acquired) { 2591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2592 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2593 } 2594 #endif 2595 2596 #else // KMP_USE_DYNAMIC_LOCK 2597 2598 kmp_user_lock_p lck; 2599 2600 if ((__kmp_user_lock_kind == lk_tas) && 2601 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2602 lck = (kmp_user_lock_p)user_lock; 2603 } 2604 #if KMP_USE_FUTEX 2605 else if ((__kmp_user_lock_kind == lk_futex) && 2606 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2607 lck = (kmp_user_lock_p)user_lock; 2608 } 2609 #endif 2610 else { 2611 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2612 } 2613 2614 #if USE_ITT_BUILD 2615 __kmp_itt_lock_acquiring(lck); 2616 #endif /* USE_ITT_BUILD */ 2617 #if OMPT_SUPPORT && OMPT_OPTIONAL 2618 // This is the case, if called from omp_init_lock_with_hint: 2619 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2620 if (!codeptr) 2621 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2622 if (ompt_enabled.ompt_callback_mutex_acquire) { 2623 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2624 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2625 (ompt_wait_id_t)lck, codeptr); 2626 } 2627 #endif 2628 2629 ACQUIRE_LOCK(lck, gtid); 2630 2631 #if USE_ITT_BUILD 2632 __kmp_itt_lock_acquired(lck); 2633 #endif /* USE_ITT_BUILD */ 2634 2635 #if OMPT_SUPPORT && OMPT_OPTIONAL 2636 if (ompt_enabled.ompt_callback_mutex_acquired) { 2637 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2638 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2639 } 2640 #endif 2641 2642 #endif // KMP_USE_DYNAMIC_LOCK 2643 } 2644 2645 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2646 #if KMP_USE_DYNAMIC_LOCK 2647 2648 #if USE_ITT_BUILD 2649 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2650 #endif 2651 #if OMPT_SUPPORT && OMPT_OPTIONAL 2652 // This is the case, if called from omp_init_lock_with_hint: 2653 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2654 if (!codeptr) 2655 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2656 if (ompt_enabled.enabled) { 2657 if (ompt_enabled.ompt_callback_mutex_acquire) { 2658 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2659 ompt_mutex_nest_lock, omp_lock_hint_none, 2660 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2661 codeptr); 2662 } 2663 } 2664 #endif 2665 int acquire_status = 2666 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2667 (void) acquire_status; 2668 #if USE_ITT_BUILD 2669 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2670 #endif 2671 2672 #if OMPT_SUPPORT && OMPT_OPTIONAL 2673 if (ompt_enabled.enabled) { 2674 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2675 if (ompt_enabled.ompt_callback_mutex_acquired) { 2676 // lock_first 2677 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2678 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2679 } 2680 } else { 2681 if (ompt_enabled.ompt_callback_nest_lock) { 2682 // lock_next 2683 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2684 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); 2685 } 2686 } 2687 } 2688 #endif 2689 2690 #else // KMP_USE_DYNAMIC_LOCK 2691 int acquire_status; 2692 kmp_user_lock_p lck; 2693 2694 if ((__kmp_user_lock_kind == lk_tas) && 2695 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2696 OMP_NEST_LOCK_T_SIZE)) { 2697 lck = (kmp_user_lock_p)user_lock; 2698 } 2699 #if KMP_USE_FUTEX 2700 else if ((__kmp_user_lock_kind == lk_futex) && 2701 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2702 OMP_NEST_LOCK_T_SIZE)) { 2703 lck = (kmp_user_lock_p)user_lock; 2704 } 2705 #endif 2706 else { 2707 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2708 } 2709 2710 #if USE_ITT_BUILD 2711 __kmp_itt_lock_acquiring(lck); 2712 #endif /* USE_ITT_BUILD */ 2713 #if OMPT_SUPPORT && OMPT_OPTIONAL 2714 // This is the case, if called from omp_init_lock_with_hint: 2715 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2716 if (!codeptr) 2717 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2718 if (ompt_enabled.enabled) { 2719 if (ompt_enabled.ompt_callback_mutex_acquire) { 2720 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2721 ompt_mutex_nest_lock, omp_lock_hint_none, 2722 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); 2723 } 2724 } 2725 #endif 2726 2727 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2728 2729 #if USE_ITT_BUILD 2730 __kmp_itt_lock_acquired(lck); 2731 #endif /* USE_ITT_BUILD */ 2732 2733 #if OMPT_SUPPORT && OMPT_OPTIONAL 2734 if (ompt_enabled.enabled) { 2735 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2736 if (ompt_enabled.ompt_callback_mutex_acquired) { 2737 // lock_first 2738 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2739 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2740 } 2741 } else { 2742 if (ompt_enabled.ompt_callback_nest_lock) { 2743 // lock_next 2744 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2745 ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); 2746 } 2747 } 2748 } 2749 #endif 2750 2751 #endif // KMP_USE_DYNAMIC_LOCK 2752 } 2753 2754 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2755 #if KMP_USE_DYNAMIC_LOCK 2756 2757 int tag = KMP_EXTRACT_D_TAG(user_lock); 2758 #if USE_ITT_BUILD 2759 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2760 #endif 2761 #if KMP_USE_INLINED_TAS 2762 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2763 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2764 } else 2765 #elif KMP_USE_INLINED_FUTEX 2766 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2767 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2768 } else 2769 #endif 2770 { 2771 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2772 } 2773 2774 #if OMPT_SUPPORT && OMPT_OPTIONAL 2775 // This is the case, if called from omp_init_lock_with_hint: 2776 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2777 if (!codeptr) 2778 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2779 if (ompt_enabled.ompt_callback_mutex_released) { 2780 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2781 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 2782 } 2783 #endif 2784 2785 #else // KMP_USE_DYNAMIC_LOCK 2786 2787 kmp_user_lock_p lck; 2788 2789 /* Can't use serial interval since not block structured */ 2790 /* release the lock */ 2791 2792 if ((__kmp_user_lock_kind == lk_tas) && 2793 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2794 #if KMP_OS_LINUX && \ 2795 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2796 // "fast" path implemented to fix customer performance issue 2797 #if USE_ITT_BUILD 2798 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2799 #endif /* USE_ITT_BUILD */ 2800 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2801 KMP_MB(); 2802 2803 #if OMPT_SUPPORT && OMPT_OPTIONAL 2804 // This is the case, if called from omp_init_lock_with_hint: 2805 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2806 if (!codeptr) 2807 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2808 if (ompt_enabled.ompt_callback_mutex_released) { 2809 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2810 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2811 } 2812 #endif 2813 2814 return; 2815 #else 2816 lck = (kmp_user_lock_p)user_lock; 2817 #endif 2818 } 2819 #if KMP_USE_FUTEX 2820 else if ((__kmp_user_lock_kind == lk_futex) && 2821 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2822 lck = (kmp_user_lock_p)user_lock; 2823 } 2824 #endif 2825 else { 2826 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 2827 } 2828 2829 #if USE_ITT_BUILD 2830 __kmp_itt_lock_releasing(lck); 2831 #endif /* USE_ITT_BUILD */ 2832 2833 RELEASE_LOCK(lck, gtid); 2834 2835 #if OMPT_SUPPORT && OMPT_OPTIONAL 2836 // This is the case, if called from omp_init_lock_with_hint: 2837 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2838 if (!codeptr) 2839 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2840 if (ompt_enabled.ompt_callback_mutex_released) { 2841 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2842 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 2843 } 2844 #endif 2845 2846 #endif // KMP_USE_DYNAMIC_LOCK 2847 } 2848 2849 /* release the lock */ 2850 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2851 #if KMP_USE_DYNAMIC_LOCK 2852 2853 #if USE_ITT_BUILD 2854 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2855 #endif 2856 int release_status = 2857 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 2858 (void) release_status; 2859 2860 #if OMPT_SUPPORT && OMPT_OPTIONAL 2861 // This is the case, if called from omp_init_lock_with_hint: 2862 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2863 if (!codeptr) 2864 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2865 if (ompt_enabled.enabled) { 2866 if (release_status == KMP_LOCK_RELEASED) { 2867 if (ompt_enabled.ompt_callback_mutex_released) { 2868 // release_lock_last 2869 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2870 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 2871 } 2872 } else if (ompt_enabled.ompt_callback_nest_lock) { 2873 // release_lock_prev 2874 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2875 ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); 2876 } 2877 } 2878 #endif 2879 2880 #else // KMP_USE_DYNAMIC_LOCK 2881 2882 kmp_user_lock_p lck; 2883 2884 /* Can't use serial interval since not block structured */ 2885 2886 if ((__kmp_user_lock_kind == lk_tas) && 2887 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2888 OMP_NEST_LOCK_T_SIZE)) { 2889 #if KMP_OS_LINUX && \ 2890 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2891 // "fast" path implemented to fix customer performance issue 2892 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 2893 #if USE_ITT_BUILD 2894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2895 #endif /* USE_ITT_BUILD */ 2896 2897 #if OMPT_SUPPORT && OMPT_OPTIONAL 2898 int release_status = KMP_LOCK_STILL_HELD; 2899 #endif 2900 2901 if (--(tl->lk.depth_locked) == 0) { 2902 TCW_4(tl->lk.poll, 0); 2903 #if OMPT_SUPPORT && OMPT_OPTIONAL 2904 release_status = KMP_LOCK_RELEASED; 2905 #endif 2906 } 2907 KMP_MB(); 2908 2909 #if OMPT_SUPPORT && OMPT_OPTIONAL 2910 // This is the case, if called from omp_init_lock_with_hint: 2911 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2912 if (!codeptr) 2913 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2914 if (ompt_enabled.enabled) { 2915 if (release_status == KMP_LOCK_RELEASED) { 2916 if (ompt_enabled.ompt_callback_mutex_released) { 2917 // release_lock_last 2918 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2919 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2920 } 2921 } else if (ompt_enabled.ompt_callback_nest_lock) { 2922 // release_lock_previous 2923 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2924 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); 2925 } 2926 } 2927 #endif 2928 2929 return; 2930 #else 2931 lck = (kmp_user_lock_p)user_lock; 2932 #endif 2933 } 2934 #if KMP_USE_FUTEX 2935 else if ((__kmp_user_lock_kind == lk_futex) && 2936 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2937 OMP_NEST_LOCK_T_SIZE)) { 2938 lck = (kmp_user_lock_p)user_lock; 2939 } 2940 #endif 2941 else { 2942 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 2943 } 2944 2945 #if USE_ITT_BUILD 2946 __kmp_itt_lock_releasing(lck); 2947 #endif /* USE_ITT_BUILD */ 2948 2949 int release_status; 2950 release_status = RELEASE_NESTED_LOCK(lck, gtid); 2951 #if OMPT_SUPPORT && OMPT_OPTIONAL 2952 // This is the case, if called from omp_init_lock_with_hint: 2953 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2954 if (!codeptr) 2955 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2956 if (ompt_enabled.enabled) { 2957 if (release_status == KMP_LOCK_RELEASED) { 2958 if (ompt_enabled.ompt_callback_mutex_released) { 2959 // release_lock_last 2960 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2961 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 2962 } 2963 } else if (ompt_enabled.ompt_callback_nest_lock) { 2964 // release_lock_previous 2965 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2966 ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); 2967 } 2968 } 2969 #endif 2970 2971 #endif // KMP_USE_DYNAMIC_LOCK 2972 } 2973 2974 /* try to acquire the lock */ 2975 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2976 KMP_COUNT_BLOCK(OMP_test_lock); 2977 2978 #if KMP_USE_DYNAMIC_LOCK 2979 int rc; 2980 int tag = KMP_EXTRACT_D_TAG(user_lock); 2981 #if USE_ITT_BUILD 2982 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2983 #endif 2984 #if OMPT_SUPPORT && OMPT_OPTIONAL 2985 // This is the case, if called from omp_init_lock_with_hint: 2986 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2987 if (!codeptr) 2988 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2989 if (ompt_enabled.ompt_callback_mutex_acquire) { 2990 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2991 ompt_mutex_lock, omp_lock_hint_none, 2992 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 2993 codeptr); 2994 } 2995 #endif 2996 #if KMP_USE_INLINED_TAS 2997 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2998 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 2999 } else 3000 #elif KMP_USE_INLINED_FUTEX 3001 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3002 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3003 } else 3004 #endif 3005 { 3006 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3007 } 3008 if (rc) { 3009 #if USE_ITT_BUILD 3010 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3011 #endif 3012 #if OMPT_SUPPORT && OMPT_OPTIONAL 3013 if (ompt_enabled.ompt_callback_mutex_acquired) { 3014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3015 ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); 3016 } 3017 #endif 3018 return FTN_TRUE; 3019 } else { 3020 #if USE_ITT_BUILD 3021 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3022 #endif 3023 return FTN_FALSE; 3024 } 3025 3026 #else // KMP_USE_DYNAMIC_LOCK 3027 3028 kmp_user_lock_p lck; 3029 int rc; 3030 3031 if ((__kmp_user_lock_kind == lk_tas) && 3032 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3033 lck = (kmp_user_lock_p)user_lock; 3034 } 3035 #if KMP_USE_FUTEX 3036 else if ((__kmp_user_lock_kind == lk_futex) && 3037 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3038 lck = (kmp_user_lock_p)user_lock; 3039 } 3040 #endif 3041 else { 3042 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3043 } 3044 3045 #if USE_ITT_BUILD 3046 __kmp_itt_lock_acquiring(lck); 3047 #endif /* USE_ITT_BUILD */ 3048 #if OMPT_SUPPORT && OMPT_OPTIONAL 3049 // This is the case, if called from omp_init_lock_with_hint: 3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3051 if (!codeptr) 3052 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3053 if (ompt_enabled.ompt_callback_mutex_acquire) { 3054 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3055 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3056 (ompt_wait_id_t)lck, codeptr); 3057 } 3058 #endif 3059 3060 rc = TEST_LOCK(lck, gtid); 3061 #if USE_ITT_BUILD 3062 if (rc) { 3063 __kmp_itt_lock_acquired(lck); 3064 } else { 3065 __kmp_itt_lock_cancelled(lck); 3066 } 3067 #endif /* USE_ITT_BUILD */ 3068 #if OMPT_SUPPORT && OMPT_OPTIONAL 3069 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3071 ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); 3072 } 3073 #endif 3074 3075 return (rc ? FTN_TRUE : FTN_FALSE); 3076 3077 /* Can't use serial interval since not block structured */ 3078 3079 #endif // KMP_USE_DYNAMIC_LOCK 3080 } 3081 3082 /* try to acquire the lock */ 3083 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3084 #if KMP_USE_DYNAMIC_LOCK 3085 int rc; 3086 #if USE_ITT_BUILD 3087 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3088 #endif 3089 #if OMPT_SUPPORT && OMPT_OPTIONAL 3090 // This is the case, if called from omp_init_lock_with_hint: 3091 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3092 if (!codeptr) 3093 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3094 if (ompt_enabled.ompt_callback_mutex_acquire) { 3095 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3096 ompt_mutex_nest_lock, omp_lock_hint_none, 3097 __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, 3098 codeptr); 3099 } 3100 #endif 3101 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3102 #if USE_ITT_BUILD 3103 if (rc) { 3104 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3105 } else { 3106 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3107 } 3108 #endif 3109 #if OMPT_SUPPORT && OMPT_OPTIONAL 3110 if (ompt_enabled.enabled && rc) { 3111 if (rc == 1) { 3112 if (ompt_enabled.ompt_callback_mutex_acquired) { 3113 // lock_first 3114 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3115 ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); 3116 } 3117 } else { 3118 if (ompt_enabled.ompt_callback_nest_lock) { 3119 // lock_next 3120 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3121 ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); 3122 } 3123 } 3124 } 3125 #endif 3126 return rc; 3127 3128 #else // KMP_USE_DYNAMIC_LOCK 3129 3130 kmp_user_lock_p lck; 3131 int rc; 3132 3133 if ((__kmp_user_lock_kind == lk_tas) && 3134 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3135 OMP_NEST_LOCK_T_SIZE)) { 3136 lck = (kmp_user_lock_p)user_lock; 3137 } 3138 #if KMP_USE_FUTEX 3139 else if ((__kmp_user_lock_kind == lk_futex) && 3140 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3141 OMP_NEST_LOCK_T_SIZE)) { 3142 lck = (kmp_user_lock_p)user_lock; 3143 } 3144 #endif 3145 else { 3146 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3147 } 3148 3149 #if USE_ITT_BUILD 3150 __kmp_itt_lock_acquiring(lck); 3151 #endif /* USE_ITT_BUILD */ 3152 3153 #if OMPT_SUPPORT && OMPT_OPTIONAL 3154 // This is the case, if called from omp_init_lock_with_hint: 3155 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3156 if (!codeptr) 3157 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3158 if (ompt_enabled.enabled) && 3159 ompt_enabled.ompt_callback_mutex_acquire) { 3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3161 ompt_mutex_nest_lock, omp_lock_hint_none, 3162 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); 3163 } 3164 #endif 3165 3166 rc = TEST_NESTED_LOCK(lck, gtid); 3167 #if USE_ITT_BUILD 3168 if (rc) { 3169 __kmp_itt_lock_acquired(lck); 3170 } else { 3171 __kmp_itt_lock_cancelled(lck); 3172 } 3173 #endif /* USE_ITT_BUILD */ 3174 #if OMPT_SUPPORT && OMPT_OPTIONAL 3175 if (ompt_enabled.enabled && rc) { 3176 if (rc == 1) { 3177 if (ompt_enabled.ompt_callback_mutex_acquired) { 3178 // lock_first 3179 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3180 ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); 3181 } 3182 } else { 3183 if (ompt_enabled.ompt_callback_nest_lock) { 3184 // lock_next 3185 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3186 ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); 3187 } 3188 } 3189 } 3190 #endif 3191 return rc; 3192 3193 /* Can't use serial interval since not block structured */ 3194 3195 #endif // KMP_USE_DYNAMIC_LOCK 3196 } 3197 3198 // Interface to fast scalable reduce methods routines 3199 3200 // keep the selected method in a thread local structure for cross-function 3201 // usage: will be used in __kmpc_end_reduce* functions; 3202 // another solution: to re-determine the method one more time in 3203 // __kmpc_end_reduce* functions (new prototype required then) 3204 // AT: which solution is better? 3205 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3206 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3207 3208 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3209 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3210 3211 // description of the packed_reduction_method variable: look at the macros in 3212 // kmp.h 3213 3214 // used in a critical section reduce block 3215 static __forceinline void 3216 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3217 kmp_critical_name *crit) { 3218 3219 // this lock was visible to a customer and to the threading profile tool as a 3220 // serial overhead span (although it's used for an internal purpose only) 3221 // why was it visible in previous implementation? 3222 // should we keep it visible in new reduce block? 3223 kmp_user_lock_p lck; 3224 3225 #if KMP_USE_DYNAMIC_LOCK 3226 3227 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3228 // Check if it is initialized. 3229 if (*lk == 0) { 3230 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3231 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3232 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3233 } else { 3234 __kmp_init_indirect_csptr(crit, loc, global_tid, 3235 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3236 } 3237 } 3238 // Branch for accessing the actual lock object and set operation. This 3239 // branching is inevitable since this lock initialization does not follow the 3240 // normal dispatch path (lock table is not used). 3241 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3242 lck = (kmp_user_lock_p)lk; 3243 KMP_DEBUG_ASSERT(lck != NULL); 3244 if (__kmp_env_consistency_check) { 3245 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3246 } 3247 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3248 } else { 3249 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3250 lck = ilk->lock; 3251 KMP_DEBUG_ASSERT(lck != NULL); 3252 if (__kmp_env_consistency_check) { 3253 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3254 } 3255 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3256 } 3257 3258 #else // KMP_USE_DYNAMIC_LOCK 3259 3260 // We know that the fast reduction code is only emitted by Intel compilers 3261 // with 32 byte critical sections. If there isn't enough space, then we 3262 // have to use a pointer. 3263 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3264 lck = (kmp_user_lock_p)crit; 3265 } else { 3266 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3267 } 3268 KMP_DEBUG_ASSERT(lck != NULL); 3269 3270 if (__kmp_env_consistency_check) 3271 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3272 3273 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3274 3275 #endif // KMP_USE_DYNAMIC_LOCK 3276 } 3277 3278 // used in a critical section reduce block 3279 static __forceinline void 3280 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3281 kmp_critical_name *crit) { 3282 3283 kmp_user_lock_p lck; 3284 3285 #if KMP_USE_DYNAMIC_LOCK 3286 3287 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3288 lck = (kmp_user_lock_p)crit; 3289 if (__kmp_env_consistency_check) 3290 __kmp_pop_sync(global_tid, ct_critical, loc); 3291 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3292 } else { 3293 kmp_indirect_lock_t *ilk = 3294 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3295 if (__kmp_env_consistency_check) 3296 __kmp_pop_sync(global_tid, ct_critical, loc); 3297 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3298 } 3299 3300 #else // KMP_USE_DYNAMIC_LOCK 3301 3302 // We know that the fast reduction code is only emitted by Intel compilers 3303 // with 32 byte critical sections. If there isn't enough space, then we have 3304 // to use a pointer. 3305 if (__kmp_base_user_lock_size > 32) { 3306 lck = *((kmp_user_lock_p *)crit); 3307 KMP_ASSERT(lck != NULL); 3308 } else { 3309 lck = (kmp_user_lock_p)crit; 3310 } 3311 3312 if (__kmp_env_consistency_check) 3313 __kmp_pop_sync(global_tid, ct_critical, loc); 3314 3315 __kmp_release_user_lock_with_checks(lck, global_tid); 3316 3317 #endif // KMP_USE_DYNAMIC_LOCK 3318 } // __kmp_end_critical_section_reduce_block 3319 3320 #if OMP_40_ENABLED 3321 static __forceinline int 3322 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3323 int *task_state) { 3324 kmp_team_t *team; 3325 3326 // Check if we are inside the teams construct? 3327 if (th->th.th_teams_microtask) { 3328 *team_p = team = th->th.th_team; 3329 if (team->t.t_level == th->th.th_teams_level) { 3330 // This is reduction at teams construct. 3331 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3332 // Let's swap teams temporarily for the reduction. 3333 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3334 th->th.th_team = team->t.t_parent; 3335 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3336 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3337 *task_state = th->th.th_task_state; 3338 th->th.th_task_state = 0; 3339 3340 return 1; 3341 } 3342 } 3343 return 0; 3344 } 3345 3346 static __forceinline void 3347 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3348 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3349 th->th.th_info.ds.ds_tid = 0; 3350 th->th.th_team = team; 3351 th->th.th_team_nproc = team->t.t_nproc; 3352 th->th.th_task_team = team->t.t_task_team[task_state]; 3353 th->th.th_task_state = task_state; 3354 } 3355 #endif 3356 3357 /* 2.a.i. Reduce Block without a terminating barrier */ 3358 /*! 3359 @ingroup SYNCHRONIZATION 3360 @param loc source location information 3361 @param global_tid global thread number 3362 @param num_vars number of items (variables) to be reduced 3363 @param reduce_size size of data in bytes to be reduced 3364 @param reduce_data pointer to data to be reduced 3365 @param reduce_func callback function providing reduction operation on two 3366 operands and returning result of reduction in lhs_data 3367 @param lck pointer to the unique lock data structure 3368 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3369 threads if atomic reduction needed 3370 3371 The nowait version is used for a reduce clause with the nowait argument. 3372 */ 3373 kmp_int32 3374 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3375 size_t reduce_size, void *reduce_data, 3376 void (*reduce_func)(void *lhs_data, void *rhs_data), 3377 kmp_critical_name *lck) { 3378 3379 KMP_COUNT_BLOCK(REDUCE_nowait); 3380 int retval = 0; 3381 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3382 #if OMP_40_ENABLED 3383 kmp_info_t *th; 3384 kmp_team_t *team; 3385 int teams_swapped = 0, task_state; 3386 #endif 3387 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3388 3389 // why do we need this initialization here at all? 3390 // Reduction clause can not be used as a stand-alone directive. 3391 3392 // do not call __kmp_serial_initialize(), it will be called by 3393 // __kmp_parallel_initialize() if needed 3394 // possible detection of false-positive race by the threadchecker ??? 3395 if (!TCR_4(__kmp_init_parallel)) 3396 __kmp_parallel_initialize(); 3397 3398 #if OMP_50_ENABLED 3399 __kmp_resume_if_soft_paused(); 3400 #endif 3401 3402 // check correctness of reduce block nesting 3403 #if KMP_USE_DYNAMIC_LOCK 3404 if (__kmp_env_consistency_check) 3405 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3406 #else 3407 if (__kmp_env_consistency_check) 3408 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3409 #endif 3410 3411 #if OMP_40_ENABLED 3412 th = __kmp_thread_from_gtid(global_tid); 3413 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3414 #endif // OMP_40_ENABLED 3415 3416 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3417 // the value should be kept in a variable 3418 // the variable should be either a construct-specific or thread-specific 3419 // property, not a team specific property 3420 // (a thread can reach the next reduce block on the next construct, reduce 3421 // method may differ on the next construct) 3422 // an ident_t "loc" parameter could be used as a construct-specific property 3423 // (what if loc == 0?) 3424 // (if both construct-specific and team-specific variables were shared, 3425 // then unness extra syncs should be needed) 3426 // a thread-specific variable is better regarding two issues above (next 3427 // construct and extra syncs) 3428 // a thread-specific "th_local.reduction_method" variable is used currently 3429 // each thread executes 'determine' and 'set' lines (no need to execute by one 3430 // thread, to avoid unness extra syncs) 3431 3432 packed_reduction_method = __kmp_determine_reduction_method( 3433 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3434 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3435 3436 if (packed_reduction_method == critical_reduce_block) { 3437 3438 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3439 retval = 1; 3440 3441 } else if (packed_reduction_method == empty_reduce_block) { 3442 3443 // usage: if team size == 1, no synchronization is required ( Intel 3444 // platforms only ) 3445 retval = 1; 3446 3447 } else if (packed_reduction_method == atomic_reduce_block) { 3448 3449 retval = 2; 3450 3451 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3452 // won't be called by the code gen) 3453 // (it's not quite good, because the checking block has been closed by 3454 // this 'pop', 3455 // but atomic operation has not been executed yet, will be executed 3456 // slightly later, literally on next instruction) 3457 if (__kmp_env_consistency_check) 3458 __kmp_pop_sync(global_tid, ct_reduce, loc); 3459 3460 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3461 tree_reduce_block)) { 3462 3463 // AT: performance issue: a real barrier here 3464 // AT: (if master goes slow, other threads are blocked here waiting for the 3465 // master to come and release them) 3466 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3467 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3468 // be confusing to a customer) 3469 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3470 // might go faster and be more in line with sense of NOWAIT 3471 // AT: TO DO: do epcc test and compare times 3472 3473 // this barrier should be invisible to a customer and to the threading profile 3474 // tool (it's neither a terminating barrier nor customer's code, it's 3475 // used for an internal purpose) 3476 #if OMPT_SUPPORT 3477 // JP: can this barrier potentially leed to task scheduling? 3478 // JP: as long as there is a barrier in the implementation, OMPT should and 3479 // will provide the barrier events 3480 // so we set-up the necessary frame/return addresses. 3481 ompt_frame_t *ompt_frame; 3482 if (ompt_enabled.enabled) { 3483 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3484 if (ompt_frame->enter_frame.ptr == NULL) 3485 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3486 OMPT_STORE_RETURN_ADDRESS(global_tid); 3487 } 3488 #endif 3489 #if USE_ITT_NOTIFY 3490 __kmp_threads[global_tid]->th.th_ident = loc; 3491 #endif 3492 retval = 3493 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3494 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3495 retval = (retval != 0) ? (0) : (1); 3496 #if OMPT_SUPPORT && OMPT_OPTIONAL 3497 if (ompt_enabled.enabled) { 3498 ompt_frame->enter_frame = ompt_data_none; 3499 } 3500 #endif 3501 3502 // all other workers except master should do this pop here 3503 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3504 if (__kmp_env_consistency_check) { 3505 if (retval == 0) { 3506 __kmp_pop_sync(global_tid, ct_reduce, loc); 3507 } 3508 } 3509 3510 } else { 3511 3512 // should never reach this block 3513 KMP_ASSERT(0); // "unexpected method" 3514 } 3515 #if OMP_40_ENABLED 3516 if (teams_swapped) { 3517 __kmp_restore_swapped_teams(th, team, task_state); 3518 } 3519 #endif 3520 KA_TRACE( 3521 10, 3522 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3523 global_tid, packed_reduction_method, retval)); 3524 3525 return retval; 3526 } 3527 3528 /*! 3529 @ingroup SYNCHRONIZATION 3530 @param loc source location information 3531 @param global_tid global thread id. 3532 @param lck pointer to the unique lock data structure 3533 3534 Finish the execution of a reduce nowait. 3535 */ 3536 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3537 kmp_critical_name *lck) { 3538 3539 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3540 3541 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3542 3543 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3544 3545 if (packed_reduction_method == critical_reduce_block) { 3546 3547 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3548 3549 } else if (packed_reduction_method == empty_reduce_block) { 3550 3551 // usage: if team size == 1, no synchronization is required ( on Intel 3552 // platforms only ) 3553 3554 } else if (packed_reduction_method == atomic_reduce_block) { 3555 3556 // neither master nor other workers should get here 3557 // (code gen does not generate this call in case 2: atomic reduce block) 3558 // actually it's better to remove this elseif at all; 3559 // after removal this value will checked by the 'else' and will assert 3560 3561 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3562 tree_reduce_block)) { 3563 3564 // only master gets here 3565 3566 } else { 3567 3568 // should never reach this block 3569 KMP_ASSERT(0); // "unexpected method" 3570 } 3571 3572 if (__kmp_env_consistency_check) 3573 __kmp_pop_sync(global_tid, ct_reduce, loc); 3574 3575 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3576 global_tid, packed_reduction_method)); 3577 3578 return; 3579 } 3580 3581 /* 2.a.ii. Reduce Block with a terminating barrier */ 3582 3583 /*! 3584 @ingroup SYNCHRONIZATION 3585 @param loc source location information 3586 @param global_tid global thread number 3587 @param num_vars number of items (variables) to be reduced 3588 @param reduce_size size of data in bytes to be reduced 3589 @param reduce_data pointer to data to be reduced 3590 @param reduce_func callback function providing reduction operation on two 3591 operands and returning result of reduction in lhs_data 3592 @param lck pointer to the unique lock data structure 3593 @result 1 for the master thread, 0 for all other team threads, 2 for all team 3594 threads if atomic reduction needed 3595 3596 A blocking reduce that includes an implicit barrier. 3597 */ 3598 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3599 size_t reduce_size, void *reduce_data, 3600 void (*reduce_func)(void *lhs_data, void *rhs_data), 3601 kmp_critical_name *lck) { 3602 KMP_COUNT_BLOCK(REDUCE_wait); 3603 int retval = 0; 3604 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3605 #if OMP_40_ENABLED 3606 kmp_info_t *th; 3607 kmp_team_t *team; 3608 int teams_swapped = 0, task_state; 3609 #endif 3610 3611 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3612 3613 // why do we need this initialization here at all? 3614 // Reduction clause can not be a stand-alone directive. 3615 3616 // do not call __kmp_serial_initialize(), it will be called by 3617 // __kmp_parallel_initialize() if needed 3618 // possible detection of false-positive race by the threadchecker ??? 3619 if (!TCR_4(__kmp_init_parallel)) 3620 __kmp_parallel_initialize(); 3621 3622 #if OMP_50_ENABLED 3623 __kmp_resume_if_soft_paused(); 3624 #endif 3625 3626 // check correctness of reduce block nesting 3627 #if KMP_USE_DYNAMIC_LOCK 3628 if (__kmp_env_consistency_check) 3629 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3630 #else 3631 if (__kmp_env_consistency_check) 3632 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3633 #endif 3634 3635 #if OMP_40_ENABLED 3636 th = __kmp_thread_from_gtid(global_tid); 3637 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3638 #endif // OMP_40_ENABLED 3639 3640 packed_reduction_method = __kmp_determine_reduction_method( 3641 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3642 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3643 3644 if (packed_reduction_method == critical_reduce_block) { 3645 3646 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3647 retval = 1; 3648 3649 } else if (packed_reduction_method == empty_reduce_block) { 3650 3651 // usage: if team size == 1, no synchronization is required ( Intel 3652 // platforms only ) 3653 retval = 1; 3654 3655 } else if (packed_reduction_method == atomic_reduce_block) { 3656 3657 retval = 2; 3658 3659 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3660 tree_reduce_block)) { 3661 3662 // case tree_reduce_block: 3663 // this barrier should be visible to a customer and to the threading profile 3664 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3665 #if OMPT_SUPPORT 3666 ompt_frame_t *ompt_frame; 3667 if (ompt_enabled.enabled) { 3668 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3669 if (ompt_frame->enter_frame.ptr == NULL) 3670 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3671 OMPT_STORE_RETURN_ADDRESS(global_tid); 3672 } 3673 #endif 3674 #if USE_ITT_NOTIFY 3675 __kmp_threads[global_tid]->th.th_ident = 3676 loc; // needed for correct notification of frames 3677 #endif 3678 retval = 3679 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3680 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3681 retval = (retval != 0) ? (0) : (1); 3682 #if OMPT_SUPPORT && OMPT_OPTIONAL 3683 if (ompt_enabled.enabled) { 3684 ompt_frame->enter_frame = ompt_data_none; 3685 } 3686 #endif 3687 3688 // all other workers except master should do this pop here 3689 // ( none of other workers except master will enter __kmpc_end_reduce() ) 3690 if (__kmp_env_consistency_check) { 3691 if (retval == 0) { // 0: all other workers; 1: master 3692 __kmp_pop_sync(global_tid, ct_reduce, loc); 3693 } 3694 } 3695 3696 } else { 3697 3698 // should never reach this block 3699 KMP_ASSERT(0); // "unexpected method" 3700 } 3701 #if OMP_40_ENABLED 3702 if (teams_swapped) { 3703 __kmp_restore_swapped_teams(th, team, task_state); 3704 } 3705 #endif 3706 3707 KA_TRACE(10, 3708 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3709 global_tid, packed_reduction_method, retval)); 3710 3711 return retval; 3712 } 3713 3714 /*! 3715 @ingroup SYNCHRONIZATION 3716 @param loc source location information 3717 @param global_tid global thread id. 3718 @param lck pointer to the unique lock data structure 3719 3720 Finish the execution of a blocking reduce. 3721 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3722 start function. 3723 */ 3724 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3725 kmp_critical_name *lck) { 3726 3727 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3728 #if OMP_40_ENABLED 3729 kmp_info_t *th; 3730 kmp_team_t *team; 3731 int teams_swapped = 0, task_state; 3732 #endif 3733 3734 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3735 3736 #if OMP_40_ENABLED 3737 th = __kmp_thread_from_gtid(global_tid); 3738 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3739 #endif // OMP_40_ENABLED 3740 3741 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3742 3743 // this barrier should be visible to a customer and to the threading profile 3744 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3745 3746 if (packed_reduction_method == critical_reduce_block) { 3747 3748 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3749 3750 // TODO: implicit barrier: should be exposed 3751 #if OMPT_SUPPORT 3752 ompt_frame_t *ompt_frame; 3753 if (ompt_enabled.enabled) { 3754 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3755 if (ompt_frame->enter_frame.ptr == NULL) 3756 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3757 OMPT_STORE_RETURN_ADDRESS(global_tid); 3758 } 3759 #endif 3760 #if USE_ITT_NOTIFY 3761 __kmp_threads[global_tid]->th.th_ident = loc; 3762 #endif 3763 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3764 #if OMPT_SUPPORT && OMPT_OPTIONAL 3765 if (ompt_enabled.enabled) { 3766 ompt_frame->enter_frame = ompt_data_none; 3767 } 3768 #endif 3769 3770 } else if (packed_reduction_method == empty_reduce_block) { 3771 3772 // usage: if team size==1, no synchronization is required (Intel platforms only) 3773 3774 // TODO: implicit barrier: should be exposed 3775 #if OMPT_SUPPORT 3776 ompt_frame_t *ompt_frame; 3777 if (ompt_enabled.enabled) { 3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3779 if (ompt_frame->enter_frame.ptr == NULL) 3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3781 OMPT_STORE_RETURN_ADDRESS(global_tid); 3782 } 3783 #endif 3784 #if USE_ITT_NOTIFY 3785 __kmp_threads[global_tid]->th.th_ident = loc; 3786 #endif 3787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3788 #if OMPT_SUPPORT && OMPT_OPTIONAL 3789 if (ompt_enabled.enabled) { 3790 ompt_frame->enter_frame = ompt_data_none; 3791 } 3792 #endif 3793 3794 } else if (packed_reduction_method == atomic_reduce_block) { 3795 3796 #if OMPT_SUPPORT 3797 ompt_frame_t *ompt_frame; 3798 if (ompt_enabled.enabled) { 3799 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3800 if (ompt_frame->enter_frame.ptr == NULL) 3801 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3802 OMPT_STORE_RETURN_ADDRESS(global_tid); 3803 } 3804 #endif 3805 // TODO: implicit barrier: should be exposed 3806 #if USE_ITT_NOTIFY 3807 __kmp_threads[global_tid]->th.th_ident = loc; 3808 #endif 3809 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3810 #if OMPT_SUPPORT && OMPT_OPTIONAL 3811 if (ompt_enabled.enabled) { 3812 ompt_frame->enter_frame = ompt_data_none; 3813 } 3814 #endif 3815 3816 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3817 tree_reduce_block)) { 3818 3819 // only master executes here (master releases all other workers) 3820 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3821 global_tid); 3822 3823 } else { 3824 3825 // should never reach this block 3826 KMP_ASSERT(0); // "unexpected method" 3827 } 3828 #if OMP_40_ENABLED 3829 if (teams_swapped) { 3830 __kmp_restore_swapped_teams(th, team, task_state); 3831 } 3832 #endif 3833 3834 if (__kmp_env_consistency_check) 3835 __kmp_pop_sync(global_tid, ct_reduce, loc); 3836 3837 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 3838 global_tid, packed_reduction_method)); 3839 3840 return; 3841 } 3842 3843 #undef __KMP_GET_REDUCTION_METHOD 3844 #undef __KMP_SET_REDUCTION_METHOD 3845 3846 /* end of interface to fast scalable reduce routines */ 3847 3848 kmp_uint64 __kmpc_get_taskid() { 3849 3850 kmp_int32 gtid; 3851 kmp_info_t *thread; 3852 3853 gtid = __kmp_get_gtid(); 3854 if (gtid < 0) { 3855 return 0; 3856 } 3857 thread = __kmp_thread_from_gtid(gtid); 3858 return thread->th.th_current_task->td_task_id; 3859 3860 } // __kmpc_get_taskid 3861 3862 kmp_uint64 __kmpc_get_parent_taskid() { 3863 3864 kmp_int32 gtid; 3865 kmp_info_t *thread; 3866 kmp_taskdata_t *parent_task; 3867 3868 gtid = __kmp_get_gtid(); 3869 if (gtid < 0) { 3870 return 0; 3871 } 3872 thread = __kmp_thread_from_gtid(gtid); 3873 parent_task = thread->th.th_current_task->td_parent; 3874 return (parent_task == NULL ? 0 : parent_task->td_task_id); 3875 3876 } // __kmpc_get_parent_taskid 3877 3878 #if OMP_45_ENABLED 3879 /*! 3880 @ingroup WORK_SHARING 3881 @param loc source location information. 3882 @param gtid global thread number. 3883 @param num_dims number of associated doacross loops. 3884 @param dims info on loops bounds. 3885 3886 Initialize doacross loop information. 3887 Expect compiler send us inclusive bounds, 3888 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 3889 */ 3890 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 3891 const struct kmp_dim *dims) { 3892 int j, idx; 3893 kmp_int64 last, trace_count; 3894 kmp_info_t *th = __kmp_threads[gtid]; 3895 kmp_team_t *team = th->th.th_team; 3896 kmp_uint32 *flags; 3897 kmp_disp_t *pr_buf = th->th.th_dispatch; 3898 dispatch_shared_info_t *sh_buf; 3899 3900 KA_TRACE( 3901 20, 3902 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 3903 gtid, num_dims, !team->t.t_serialized)); 3904 KMP_DEBUG_ASSERT(dims != NULL); 3905 KMP_DEBUG_ASSERT(num_dims > 0); 3906 3907 if (team->t.t_serialized) { 3908 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 3909 return; // no dependencies if team is serialized 3910 } 3911 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 3912 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 3913 // the next loop 3914 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 3915 3916 // Save bounds info into allocated private buffer 3917 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 3918 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 3919 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 3920 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 3921 pr_buf->th_doacross_info[0] = 3922 (kmp_int64)num_dims; // first element is number of dimensions 3923 // Save also address of num_done in order to access it later without knowing 3924 // the buffer index 3925 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 3926 pr_buf->th_doacross_info[2] = dims[0].lo; 3927 pr_buf->th_doacross_info[3] = dims[0].up; 3928 pr_buf->th_doacross_info[4] = dims[0].st; 3929 last = 5; 3930 for (j = 1; j < num_dims; ++j) { 3931 kmp_int64 3932 range_length; // To keep ranges of all dimensions but the first dims[0] 3933 if (dims[j].st == 1) { // most common case 3934 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 3935 range_length = dims[j].up - dims[j].lo + 1; 3936 } else { 3937 if (dims[j].st > 0) { 3938 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 3939 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 3940 } else { // negative increment 3941 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 3942 range_length = 3943 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 3944 } 3945 } 3946 pr_buf->th_doacross_info[last++] = range_length; 3947 pr_buf->th_doacross_info[last++] = dims[j].lo; 3948 pr_buf->th_doacross_info[last++] = dims[j].up; 3949 pr_buf->th_doacross_info[last++] = dims[j].st; 3950 } 3951 3952 // Compute total trip count. 3953 // Start with range of dims[0] which we don't need to keep in the buffer. 3954 if (dims[0].st == 1) { // most common case 3955 trace_count = dims[0].up - dims[0].lo + 1; 3956 } else if (dims[0].st > 0) { 3957 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 3958 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 3959 } else { // negative increment 3960 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 3961 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 3962 } 3963 for (j = 1; j < num_dims; ++j) { 3964 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 3965 } 3966 KMP_DEBUG_ASSERT(trace_count > 0); 3967 3968 // Check if shared buffer is not occupied by other loop (idx - 3969 // __kmp_dispatch_num_buffers) 3970 if (idx != sh_buf->doacross_buf_idx) { 3971 // Shared buffer is occupied, wait for it to be free 3972 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 3973 __kmp_eq_4, NULL); 3974 } 3975 #if KMP_32_BIT_ARCH 3976 // Check if we are the first thread. After the CAS the first thread gets 0, 3977 // others get 1 if initialization is in progress, allocated pointer otherwise. 3978 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 3979 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 3980 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 3981 #else 3982 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 3983 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 3984 #endif 3985 if (flags == NULL) { 3986 // we are the first thread, allocate the array of flags 3987 size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration 3988 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 3989 KMP_MB(); 3990 sh_buf->doacross_flags = flags; 3991 } else if (flags == (kmp_uint32 *)1) { 3992 #if KMP_32_BIT_ARCH 3993 // initialization is still in progress, need to wait 3994 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 3995 #else 3996 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 3997 #endif 3998 KMP_YIELD(TRUE); 3999 KMP_MB(); 4000 } else { 4001 KMP_MB(); 4002 } 4003 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4004 pr_buf->th_doacross_flags = 4005 sh_buf->doacross_flags; // save private copy in order to not 4006 // touch shared buffer on each iteration 4007 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4008 } 4009 4010 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4011 kmp_int32 shft, num_dims, i; 4012 kmp_uint32 flag; 4013 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4014 kmp_info_t *th = __kmp_threads[gtid]; 4015 kmp_team_t *team = th->th.th_team; 4016 kmp_disp_t *pr_buf; 4017 kmp_int64 lo, up, st; 4018 4019 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4020 if (team->t.t_serialized) { 4021 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4022 return; // no dependencies if team is serialized 4023 } 4024 4025 // calculate sequential iteration number and check out-of-bounds condition 4026 pr_buf = th->th.th_dispatch; 4027 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4028 num_dims = pr_buf->th_doacross_info[0]; 4029 lo = pr_buf->th_doacross_info[2]; 4030 up = pr_buf->th_doacross_info[3]; 4031 st = pr_buf->th_doacross_info[4]; 4032 if (st == 1) { // most common case 4033 if (vec[0] < lo || vec[0] > up) { 4034 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4035 "bounds [%lld,%lld]\n", 4036 gtid, vec[0], lo, up)); 4037 return; 4038 } 4039 iter_number = vec[0] - lo; 4040 } else if (st > 0) { 4041 if (vec[0] < lo || vec[0] > up) { 4042 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4043 "bounds [%lld,%lld]\n", 4044 gtid, vec[0], lo, up)); 4045 return; 4046 } 4047 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4048 } else { // negative increment 4049 if (vec[0] > lo || vec[0] < up) { 4050 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4051 "bounds [%lld,%lld]\n", 4052 gtid, vec[0], lo, up)); 4053 return; 4054 } 4055 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4056 } 4057 for (i = 1; i < num_dims; ++i) { 4058 kmp_int64 iter, ln; 4059 kmp_int32 j = i * 4; 4060 ln = pr_buf->th_doacross_info[j + 1]; 4061 lo = pr_buf->th_doacross_info[j + 2]; 4062 up = pr_buf->th_doacross_info[j + 3]; 4063 st = pr_buf->th_doacross_info[j + 4]; 4064 if (st == 1) { 4065 if (vec[i] < lo || vec[i] > up) { 4066 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4067 "bounds [%lld,%lld]\n", 4068 gtid, vec[i], lo, up)); 4069 return; 4070 } 4071 iter = vec[i] - lo; 4072 } else if (st > 0) { 4073 if (vec[i] < lo || vec[i] > up) { 4074 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4075 "bounds [%lld,%lld]\n", 4076 gtid, vec[i], lo, up)); 4077 return; 4078 } 4079 iter = (kmp_uint64)(vec[i] - lo) / st; 4080 } else { // st < 0 4081 if (vec[i] > lo || vec[i] < up) { 4082 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4083 "bounds [%lld,%lld]\n", 4084 gtid, vec[i], lo, up)); 4085 return; 4086 } 4087 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4088 } 4089 iter_number = iter + ln * iter_number; 4090 } 4091 shft = iter_number % 32; // use 32-bit granularity 4092 iter_number >>= 5; // divided by 32 4093 flag = 1 << shft; 4094 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4095 KMP_YIELD(TRUE); 4096 } 4097 KMP_MB(); 4098 KA_TRACE(20, 4099 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4100 gtid, (iter_number << 5) + shft)); 4101 } 4102 4103 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4104 kmp_int32 shft, num_dims, i; 4105 kmp_uint32 flag; 4106 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4107 kmp_info_t *th = __kmp_threads[gtid]; 4108 kmp_team_t *team = th->th.th_team; 4109 kmp_disp_t *pr_buf; 4110 kmp_int64 lo, st; 4111 4112 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4113 if (team->t.t_serialized) { 4114 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4115 return; // no dependencies if team is serialized 4116 } 4117 4118 // calculate sequential iteration number (same as in "wait" but no 4119 // out-of-bounds checks) 4120 pr_buf = th->th.th_dispatch; 4121 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4122 num_dims = pr_buf->th_doacross_info[0]; 4123 lo = pr_buf->th_doacross_info[2]; 4124 st = pr_buf->th_doacross_info[4]; 4125 if (st == 1) { // most common case 4126 iter_number = vec[0] - lo; 4127 } else if (st > 0) { 4128 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4129 } else { // negative increment 4130 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4131 } 4132 for (i = 1; i < num_dims; ++i) { 4133 kmp_int64 iter, ln; 4134 kmp_int32 j = i * 4; 4135 ln = pr_buf->th_doacross_info[j + 1]; 4136 lo = pr_buf->th_doacross_info[j + 2]; 4137 st = pr_buf->th_doacross_info[j + 4]; 4138 if (st == 1) { 4139 iter = vec[i] - lo; 4140 } else if (st > 0) { 4141 iter = (kmp_uint64)(vec[i] - lo) / st; 4142 } else { // st < 0 4143 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4144 } 4145 iter_number = iter + ln * iter_number; 4146 } 4147 shft = iter_number % 32; // use 32-bit granularity 4148 iter_number >>= 5; // divided by 32 4149 flag = 1 << shft; 4150 KMP_MB(); 4151 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4152 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4153 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4154 (iter_number << 5) + shft)); 4155 } 4156 4157 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4158 kmp_int32 num_done; 4159 kmp_info_t *th = __kmp_threads[gtid]; 4160 kmp_team_t *team = th->th.th_team; 4161 kmp_disp_t *pr_buf = th->th.th_dispatch; 4162 4163 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4164 if (team->t.t_serialized) { 4165 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4166 return; // nothing to do 4167 } 4168 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; 4169 if (num_done == th->th.th_team_nproc) { 4170 // we are the last thread, need to free shared resources 4171 int idx = pr_buf->th_doacross_buf_idx - 1; 4172 dispatch_shared_info_t *sh_buf = 4173 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4174 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4175 (kmp_int64)&sh_buf->doacross_num_done); 4176 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4177 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4178 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4179 sh_buf->doacross_flags = NULL; 4180 sh_buf->doacross_num_done = 0; 4181 sh_buf->doacross_buf_idx += 4182 __kmp_dispatch_num_buffers; // free buffer for future re-use 4183 } 4184 // free private resources (need to keep buffer index forever) 4185 pr_buf->th_doacross_flags = NULL; 4186 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4187 pr_buf->th_doacross_info = NULL; 4188 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4189 } 4190 #endif 4191 4192 #if OMP_50_ENABLED 4193 int __kmpc_get_target_offload(void) { 4194 if (!__kmp_init_serial) { 4195 __kmp_serial_initialize(); 4196 } 4197 return __kmp_target_offload; 4198 } 4199 4200 int __kmpc_pause_resource(kmp_pause_status_t level) { 4201 if (!__kmp_init_serial) { 4202 return 1; // Can't pause if runtime is not initialized 4203 } 4204 return __kmp_pause_resource(level); 4205 } 4206 #endif // OMP_50_ENABLED 4207 4208 // end of file // 4209