1 /*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22
23 #define MAX_MESSAGE 512
24
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27
28 /*!
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
32 *
33 * Initialize the runtime library. This call is optional; if it is not made then
34 * it will be implicitly called by attempts to use other library functions.
35 */
__kmpc_begin(ident_t * loc,kmp_int32 flags)36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49 }
50
51 /*!
52 * @ingroup STARTUP_SHUTDOWN
53 * @param loc source location information
54 *
55 * Shutdown the runtime library. This is also optional, and even if called will
56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
57 * zero.
58 */
__kmpc_end(ident_t * loc)59 void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80
81 /*!
82 @ingroup THREAD_STATES
83 @param loc Source location information.
84 @return The global thread index of the active thread.
85
86 This function can be called in any context.
87
88 If the runtime has ony been entered at the outermost level from a
89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
90 that which would be returned by omp_get_thread_num() in the outermost
91 active parallel construct. (Or zero if there is no active parallel
92 construct, since the primary thread is necessarily thread zero).
93
94 If multiple non-OpenMP threads all enter an OpenMP construct then this
95 will be a unique thread identifier among all the threads created by
96 the OpenMP runtime (but the value cannot be defined in terms of
97 OpenMP thread ids returned by omp_get_thread_num()).
98 */
__kmpc_global_thread_num(ident_t * loc)99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105 }
106
107 /*!
108 @ingroup THREAD_STATES
109 @param loc Source location information.
110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112 This function can be called in any context.
113 It returns the total number of threads under the control of the OpenMP runtime.
114 That is not a number that can be determined by any OpenMP standard calls, since
115 the library may be called from more than one non-OpenMP thread, and this
116 reflects the total over all such calls. Similarly the runtime maintains
117 underlying threads even when they are not active (since the cost of creating
118 and destroying OS threads is high), this call counts all such threads even if
119 they are not waiting for work.
120 */
__kmpc_global_num_threads(ident_t * loc)121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126 }
127
128 /*!
129 @ingroup THREAD_STATES
130 @param loc Source location information.
131 @return The thread number of the calling thread in the innermost active parallel
132 construct.
133 */
__kmpc_bound_thread_num(ident_t * loc)134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138
139 /*!
140 @ingroup THREAD_STATES
141 @param loc Source location information.
142 @return The number of threads in the innermost active parallel construct.
143 */
__kmpc_bound_num_threads(ident_t * loc)144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149
150 /*!
151 * @ingroup DEPRECATED
152 * @param loc location description
153 *
154 * This function need not be called. It always returns TRUE.
155 */
__kmpc_ok_to_fork(ident_t * loc)156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158
159 return TRUE;
160
161 #else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209 #endif /* KMP_DEBUG */
210 }
211
212 /*!
213 @ingroup THREAD_STATES
214 @param loc Source location information.
215 @return 1 if this thread is executing inside an active parallel region, zero if
216 not.
217 */
__kmpc_in_parallel(ident_t * loc)218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221
222 /*!
223 @ingroup PARALLEL
224 @param loc source location information
225 @param global_tid global thread number
226 @param num_threads number of threads requested for this parallel construct
227
228 Set the number of threads to be used by the next fork spawned by this thread.
229 This call is only required if the parallel construct has a `num_threads` clause.
230 */
__kmpc_push_num_threads(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_threads)231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238
__kmpc_pop_num_threads(ident_t * loc,kmp_int32 global_tid)239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242 }
243
__kmpc_push_proc_bind(ident_t * loc,kmp_int32 global_tid,kmp_int32 proc_bind)244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251
252 /*!
253 @ingroup PARALLEL
254 @param loc source location information
255 @param argc total number of arguments in the ellipsis
256 @param microtask pointer to callback routine consisting of outlined parallel
257 construct
258 @param ... pointers to shared variables that aren't global
259
260 Do the actual fork and call the microtask in the relevant number of threads.
261 */
__kmpc_fork_call(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265 #if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280 #endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287 #if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293 if (lwt)
294 ompt_frame = &(lwt->ompt_task_info.frame);
295 else {
296 int tid = __kmp_tid_from_gtid(gtid);
297 ompt_frame = &(
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299 }
300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301 }
302 OMPT_STORE_RETURN_ADDRESS(gtid);
303 #endif
304
305 #if INCLUDE_SSC_MARKS
306 SSC_MARK_FORKING();
307 #endif
308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311 kmp_va_addr_of(ap));
312 #if INCLUDE_SSC_MARKS
313 SSC_MARK_JOINING();
314 #endif
315 __kmp_join_call(loc, gtid
316 #if OMPT_SUPPORT
317 ,
318 fork_context_intel
319 #endif
320 );
321
322 va_end(ap);
323 }
324
325 #if KMP_STATS_ENABLED
326 if (previous_state == stats_state_e::SERIAL_REGION) {
327 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
328 KMP_SET_THREAD_STATE(previous_state);
329 } else {
330 KMP_POP_PARTITIONED_TIMER();
331 }
332 #endif // KMP_STATS_ENABLED
333 }
334
335 /*!
336 @ingroup PARALLEL
337 @param loc source location information
338 @param global_tid global thread number
339 @param num_teams number of teams requested for the teams construct
340 @param num_threads number of threads per team requested for the teams construct
341
342 Set the number of teams to be used by the teams construct.
343 This call is only required if the teams construct has a `num_teams` clause
344 or a `thread_limit` clause (or both).
345 */
__kmpc_push_num_teams(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams,kmp_int32 num_threads)346 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
347 kmp_int32 num_teams, kmp_int32 num_threads) {
348 KA_TRACE(20,
349 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
350 global_tid, num_teams, num_threads));
351 __kmp_assert_valid_gtid(global_tid);
352 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
353 }
354
355 /*!
356 @ingroup PARALLEL
357 @param loc source location information
358 @param global_tid global thread number
359 @param num_teams_lo lower bound on number of teams requested for the teams
360 construct
361 @param num_teams_up upper bound on number of teams requested for the teams
362 construct
363 @param num_threads number of threads per team requested for the teams construct
364
365 Set the number of teams to be used by the teams construct. The number of initial
366 teams cretaed will be greater than or equal to the lower bound and less than or
367 equal to the upper bound.
368 This call is only required if the teams construct has a `num_teams` clause
369 or a `thread_limit` clause (or both).
370 */
__kmpc_push_num_teams_51(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams_lb,kmp_int32 num_teams_ub,kmp_int32 num_threads)371 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
372 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
373 kmp_int32 num_threads) {
374 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
375 " num_teams_ub=%d num_threads=%d\n",
376 global_tid, num_teams_lb, num_teams_ub, num_threads));
377 __kmp_assert_valid_gtid(global_tid);
378 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
379 num_threads);
380 }
381
382 /*!
383 @ingroup PARALLEL
384 @param loc source location information
385 @param argc total number of arguments in the ellipsis
386 @param microtask pointer to callback routine consisting of outlined teams
387 construct
388 @param ... pointers to shared variables that aren't global
389
390 Do the actual fork and call the microtask in the relevant number of threads.
391 */
__kmpc_fork_teams(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)392 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
393 ...) {
394 int gtid = __kmp_entry_gtid();
395 kmp_info_t *this_thr = __kmp_threads[gtid];
396 va_list ap;
397 va_start(ap, microtask);
398
399 #if KMP_STATS_ENABLED
400 KMP_COUNT_BLOCK(OMP_TEAMS);
401 stats_state_e previous_state = KMP_GET_THREAD_STATE();
402 if (previous_state == stats_state_e::SERIAL_REGION) {
403 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
404 } else {
405 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
406 }
407 #endif
408
409 // remember teams entry point and nesting level
410 this_thr->th.th_teams_microtask = microtask;
411 this_thr->th.th_teams_level =
412 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
413
414 #if OMPT_SUPPORT
415 kmp_team_t *parent_team = this_thr->th.th_team;
416 int tid = __kmp_tid_from_gtid(gtid);
417 if (ompt_enabled.enabled) {
418 parent_team->t.t_implicit_task_taskdata[tid]
419 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
420 }
421 OMPT_STORE_RETURN_ADDRESS(gtid);
422 #endif
423
424 // check if __kmpc_push_num_teams called, set default number of teams
425 // otherwise
426 if (this_thr->th.th_teams_size.nteams == 0) {
427 __kmp_push_num_teams(loc, gtid, 0, 0);
428 }
429 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
430 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
431 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
432
433 __kmp_fork_call(
434 loc, gtid, fork_context_intel, argc,
435 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
436 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
437 __kmp_join_call(loc, gtid
438 #if OMPT_SUPPORT
439 ,
440 fork_context_intel
441 #endif
442 );
443
444 // Pop current CG root off list
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
447 this_thr->th.th_cg_roots = tmp->up;
448 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
449 " to node %p. cg_nthreads was %d\n",
450 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
451 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
452 int i = tmp->cg_nthreads--;
453 if (i == 1) { // check is we are the last thread in CG (not always the case)
454 __kmp_free(tmp);
455 }
456 // Restore current task's thread_limit from CG root
457 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
458 this_thr->th.th_current_task->td_icvs.thread_limit =
459 this_thr->th.th_cg_roots->cg_thread_limit;
460
461 this_thr->th.th_teams_microtask = NULL;
462 this_thr->th.th_teams_level = 0;
463 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
464 va_end(ap);
465 #if KMP_STATS_ENABLED
466 if (previous_state == stats_state_e::SERIAL_REGION) {
467 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
468 KMP_SET_THREAD_STATE(previous_state);
469 } else {
470 KMP_POP_PARTITIONED_TIMER();
471 }
472 #endif // KMP_STATS_ENABLED
473 }
474
475 // I don't think this function should ever have been exported.
476 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
477 // openmp code ever called it, but it's been exported from the RTL for so
478 // long that I'm afraid to remove the definition.
__kmpc_invoke_task_func(int gtid)479 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
480
481 /*!
482 @ingroup PARALLEL
483 @param loc source location information
484 @param global_tid global thread number
485
486 Enter a serialized parallel construct. This interface is used to handle a
487 conditional parallel region, like this,
488 @code
489 #pragma omp parallel if (condition)
490 @endcode
491 when the condition is false.
492 */
__kmpc_serialized_parallel(ident_t * loc,kmp_int32 global_tid)493 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
494 // The implementation is now in kmp_runtime.cpp so that it can share static
495 // functions with kmp_fork_call since the tasks to be done are similar in
496 // each case.
497 __kmp_assert_valid_gtid(global_tid);
498 #if OMPT_SUPPORT
499 OMPT_STORE_RETURN_ADDRESS(global_tid);
500 #endif
501 __kmp_serialized_parallel(loc, global_tid);
502 }
503
504 /*!
505 @ingroup PARALLEL
506 @param loc source location information
507 @param global_tid global thread number
508
509 Leave a serialized parallel construct.
510 */
__kmpc_end_serialized_parallel(ident_t * loc,kmp_int32 global_tid)511 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
512 kmp_internal_control_t *top;
513 kmp_info_t *this_thr;
514 kmp_team_t *serial_team;
515
516 KC_TRACE(10,
517 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
518
519 /* skip all this code for autopar serialized loops since it results in
520 unacceptable overhead */
521 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
522 return;
523
524 // Not autopar code
525 __kmp_assert_valid_gtid(global_tid);
526 if (!TCR_4(__kmp_init_parallel))
527 __kmp_parallel_initialize();
528
529 __kmp_resume_if_soft_paused();
530
531 this_thr = __kmp_threads[global_tid];
532 serial_team = this_thr->th.th_serial_team;
533
534 kmp_task_team_t *task_team = this_thr->th.th_task_team;
535 // we need to wait for the proxy tasks before finishing the thread
536 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
537 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
538
539 KMP_MB();
540 KMP_DEBUG_ASSERT(serial_team);
541 KMP_ASSERT(serial_team->t.t_serialized);
542 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
543 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
545 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
546
547 #if OMPT_SUPPORT
548 if (ompt_enabled.enabled &&
549 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
550 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
551 if (ompt_enabled.ompt_callback_implicit_task) {
552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
553 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
554 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
555 }
556
557 // reset clear the task id only after unlinking the task
558 ompt_data_t *parent_task_data;
559 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
560
561 if (ompt_enabled.ompt_callback_parallel_end) {
562 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
563 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
564 ompt_parallel_invoker_program | ompt_parallel_team,
565 OMPT_LOAD_RETURN_ADDRESS(global_tid));
566 }
567 __ompt_lw_taskteam_unlink(this_thr);
568 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
569 }
570 #endif
571
572 /* If necessary, pop the internal control stack values and replace the team
573 * values */
574 top = serial_team->t.t_control_stack_top;
575 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
576 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
577 serial_team->t.t_control_stack_top = top->next;
578 __kmp_free(top);
579 }
580
581 // if( serial_team -> t.t_serialized > 1 )
582 serial_team->t.t_level--;
583
584 /* pop dispatch buffers stack */
585 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
586 {
587 dispatch_private_info_t *disp_buffer =
588 serial_team->t.t_dispatch->th_disp_buffer;
589 serial_team->t.t_dispatch->th_disp_buffer =
590 serial_team->t.t_dispatch->th_disp_buffer->next;
591 __kmp_free(disp_buffer);
592 }
593 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
594
595 --serial_team->t.t_serialized;
596 if (serial_team->t.t_serialized == 0) {
597
598 /* return to the parallel section */
599
600 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
601 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
602 __kmp_clear_x87_fpu_status_word();
603 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
604 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
605 }
606 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
607
608 #if OMPD_SUPPORT
609 if (ompd_state & OMPD_ENABLE_BP)
610 ompd_bp_parallel_end();
611 #endif
612
613 this_thr->th.th_team = serial_team->t.t_parent;
614 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
615
616 /* restore values cached in the thread */
617 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
618 this_thr->th.th_team_master =
619 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
620 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
621
622 /* TODO the below shouldn't need to be adjusted for serialized teams */
623 this_thr->th.th_dispatch =
624 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
625
626 __kmp_pop_current_task_from_thread(this_thr);
627
628 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
629 this_thr->th.th_current_task->td_flags.executing = 1;
630
631 if (__kmp_tasking_mode != tskm_immediate_exec) {
632 // Copy the task team from the new child / old parent team to the thread.
633 this_thr->th.th_task_team =
634 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
635 KA_TRACE(20,
636 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
637 "team %p\n",
638 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
639 }
640 } else {
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
645 }
646 }
647
648 if (__kmp_env_consistency_check)
649 __kmp_pop_parallel(global_tid, NULL);
650 #if OMPT_SUPPORT
651 if (ompt_enabled.enabled)
652 this_thr->th.ompt_thread_info.state =
653 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
654 : ompt_state_work_parallel);
655 #endif
656 }
657
658 /*!
659 @ingroup SYNCHRONIZATION
660 @param loc source location information.
661
662 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
663 depending on the memory ordering convention obeyed by the compiler
664 even that may not be necessary).
665 */
__kmpc_flush(ident_t * loc)666 void __kmpc_flush(ident_t *loc) {
667 KC_TRACE(10, ("__kmpc_flush: called\n"));
668
669 /* need explicit __mf() here since use volatile instead in library */
670 KMP_MB(); /* Flush all pending memory write invalidates. */
671
672 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
673 #if KMP_MIC
674 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
675 // We shouldn't need it, though, since the ABI rules require that
676 // * If the compiler generates NGO stores it also generates the fence
677 // * If users hand-code NGO stores they should insert the fence
678 // therefore no incomplete unordered stores should be visible.
679 #else
680 // C74404
681 // This is to address non-temporal store instructions (sfence needed).
682 // The clflush instruction is addressed either (mfence needed).
683 // Probably the non-temporal load monvtdqa instruction should also be
684 // addressed.
685 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
686 if (!__kmp_cpuinfo.initialized) {
687 __kmp_query_cpuid(&__kmp_cpuinfo);
688 }
689 if (!__kmp_cpuinfo.sse2) {
690 // CPU cannot execute SSE2 instructions.
691 } else {
692 #if KMP_COMPILER_ICC
693 _mm_mfence();
694 #elif KMP_COMPILER_MSVC
695 MemoryBarrier();
696 #else
697 __sync_synchronize();
698 #endif // KMP_COMPILER_ICC
699 }
700 #endif // KMP_MIC
701 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
702 KMP_ARCH_RISCV64)
703 // Nothing to see here move along
704 #elif KMP_ARCH_PPC64
705 // Nothing needed here (we have a real MB above).
706 #else
707 #error Unknown or unsupported architecture
708 #endif
709
710 #if OMPT_SUPPORT && OMPT_OPTIONAL
711 if (ompt_enabled.ompt_callback_flush) {
712 ompt_callbacks.ompt_callback(ompt_callback_flush)(
713 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
714 }
715 #endif
716 }
717
718 /* -------------------------------------------------------------------------- */
719 /*!
720 @ingroup SYNCHRONIZATION
721 @param loc source location information
722 @param global_tid thread id.
723
724 Execute a barrier.
725 */
__kmpc_barrier(ident_t * loc,kmp_int32 global_tid)726 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
727 KMP_COUNT_BLOCK(OMP_BARRIER);
728 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
729 __kmp_assert_valid_gtid(global_tid);
730
731 if (!TCR_4(__kmp_init_parallel))
732 __kmp_parallel_initialize();
733
734 __kmp_resume_if_soft_paused();
735
736 if (__kmp_env_consistency_check) {
737 if (loc == 0) {
738 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
739 }
740 __kmp_check_barrier(global_tid, ct_barrier, loc);
741 }
742
743 #if OMPT_SUPPORT
744 ompt_frame_t *ompt_frame;
745 if (ompt_enabled.enabled) {
746 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
747 if (ompt_frame->enter_frame.ptr == NULL)
748 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
749 }
750 OMPT_STORE_RETURN_ADDRESS(global_tid);
751 #endif
752 __kmp_threads[global_tid]->th.th_ident = loc;
753 // TODO: explicit barrier_wait_id:
754 // this function is called when 'barrier' directive is present or
755 // implicit barrier at the end of a worksharing construct.
756 // 1) better to add a per-thread barrier counter to a thread data structure
757 // 2) set to 0 when a new team is created
758 // 4) no sync is required
759
760 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
761 #if OMPT_SUPPORT && OMPT_OPTIONAL
762 if (ompt_enabled.enabled) {
763 ompt_frame->enter_frame = ompt_data_none;
764 }
765 #endif
766 }
767
768 /* The BARRIER for a MASTER section is always explicit */
769 /*!
770 @ingroup WORK_SHARING
771 @param loc source location information.
772 @param global_tid global thread number .
773 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
774 */
__kmpc_master(ident_t * loc,kmp_int32 global_tid)775 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
776 int status = 0;
777
778 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
779 __kmp_assert_valid_gtid(global_tid);
780
781 if (!TCR_4(__kmp_init_parallel))
782 __kmp_parallel_initialize();
783
784 __kmp_resume_if_soft_paused();
785
786 if (KMP_MASTER_GTID(global_tid)) {
787 KMP_COUNT_BLOCK(OMP_MASTER);
788 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
789 status = 1;
790 }
791
792 #if OMPT_SUPPORT && OMPT_OPTIONAL
793 if (status) {
794 if (ompt_enabled.ompt_callback_masked) {
795 kmp_info_t *this_thr = __kmp_threads[global_tid];
796 kmp_team_t *team = this_thr->th.th_team;
797
798 int tid = __kmp_tid_from_gtid(global_tid);
799 ompt_callbacks.ompt_callback(ompt_callback_masked)(
800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802 OMPT_GET_RETURN_ADDRESS(0));
803 }
804 }
805 #endif
806
807 if (__kmp_env_consistency_check) {
808 #if KMP_USE_DYNAMIC_LOCK
809 if (status)
810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
811 else
812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
813 #else
814 if (status)
815 __kmp_push_sync(global_tid, ct_master, loc, NULL);
816 else
817 __kmp_check_sync(global_tid, ct_master, loc, NULL);
818 #endif
819 }
820
821 return status;
822 }
823
824 /*!
825 @ingroup WORK_SHARING
826 @param loc source location information.
827 @param global_tid global thread number .
828
829 Mark the end of a <tt>master</tt> region. This should only be called by the
830 thread that executes the <tt>master</tt> region.
831 */
__kmpc_end_master(ident_t * loc,kmp_int32 global_tid)832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
833 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
834 __kmp_assert_valid_gtid(global_tid);
835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836 KMP_POP_PARTITIONED_TIMER();
837
838 #if OMPT_SUPPORT && OMPT_OPTIONAL
839 kmp_info_t *this_thr = __kmp_threads[global_tid];
840 kmp_team_t *team = this_thr->th.th_team;
841 if (ompt_enabled.ompt_callback_masked) {
842 int tid = __kmp_tid_from_gtid(global_tid);
843 ompt_callbacks.ompt_callback(ompt_callback_masked)(
844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846 OMPT_GET_RETURN_ADDRESS(0));
847 }
848 #endif
849
850 if (__kmp_env_consistency_check) {
851 if (KMP_MASTER_GTID(global_tid))
852 __kmp_pop_sync(global_tid, ct_master, loc);
853 }
854 }
855
856 /*!
857 @ingroup WORK_SHARING
858 @param loc source location information.
859 @param global_tid global thread number.
860 @param filter result of evaluating filter clause on thread global_tid, or zero
861 if no filter clause present
862 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
863 */
__kmpc_masked(ident_t * loc,kmp_int32 global_tid,kmp_int32 filter)864 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
865 int status = 0;
866 int tid;
867 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
868 __kmp_assert_valid_gtid(global_tid);
869
870 if (!TCR_4(__kmp_init_parallel))
871 __kmp_parallel_initialize();
872
873 __kmp_resume_if_soft_paused();
874
875 tid = __kmp_tid_from_gtid(global_tid);
876 if (tid == filter) {
877 KMP_COUNT_BLOCK(OMP_MASKED);
878 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
879 status = 1;
880 }
881
882 #if OMPT_SUPPORT && OMPT_OPTIONAL
883 if (status) {
884 if (ompt_enabled.ompt_callback_masked) {
885 kmp_info_t *this_thr = __kmp_threads[global_tid];
886 kmp_team_t *team = this_thr->th.th_team;
887 ompt_callbacks.ompt_callback(ompt_callback_masked)(
888 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
889 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
890 OMPT_GET_RETURN_ADDRESS(0));
891 }
892 }
893 #endif
894
895 if (__kmp_env_consistency_check) {
896 #if KMP_USE_DYNAMIC_LOCK
897 if (status)
898 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
899 else
900 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
901 #else
902 if (status)
903 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
904 else
905 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
906 #endif
907 }
908
909 return status;
910 }
911
912 /*!
913 @ingroup WORK_SHARING
914 @param loc source location information.
915 @param global_tid global thread number .
916
917 Mark the end of a <tt>masked</tt> region. This should only be called by the
918 thread that executes the <tt>masked</tt> region.
919 */
__kmpc_end_masked(ident_t * loc,kmp_int32 global_tid)920 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
921 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
922 __kmp_assert_valid_gtid(global_tid);
923 KMP_POP_PARTITIONED_TIMER();
924
925 #if OMPT_SUPPORT && OMPT_OPTIONAL
926 kmp_info_t *this_thr = __kmp_threads[global_tid];
927 kmp_team_t *team = this_thr->th.th_team;
928 if (ompt_enabled.ompt_callback_masked) {
929 int tid = __kmp_tid_from_gtid(global_tid);
930 ompt_callbacks.ompt_callback(ompt_callback_masked)(
931 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
932 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
933 OMPT_GET_RETURN_ADDRESS(0));
934 }
935 #endif
936
937 if (__kmp_env_consistency_check) {
938 __kmp_pop_sync(global_tid, ct_masked, loc);
939 }
940 }
941
942 /*!
943 @ingroup WORK_SHARING
944 @param loc source location information.
945 @param gtid global thread number.
946
947 Start execution of an <tt>ordered</tt> construct.
948 */
__kmpc_ordered(ident_t * loc,kmp_int32 gtid)949 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
950 int cid = 0;
951 kmp_info_t *th;
952 KMP_DEBUG_ASSERT(__kmp_init_serial);
953
954 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
955 __kmp_assert_valid_gtid(gtid);
956
957 if (!TCR_4(__kmp_init_parallel))
958 __kmp_parallel_initialize();
959
960 __kmp_resume_if_soft_paused();
961
962 #if USE_ITT_BUILD
963 __kmp_itt_ordered_prep(gtid);
964 // TODO: ordered_wait_id
965 #endif /* USE_ITT_BUILD */
966
967 th = __kmp_threads[gtid];
968
969 #if OMPT_SUPPORT && OMPT_OPTIONAL
970 kmp_team_t *team;
971 ompt_wait_id_t lck;
972 void *codeptr_ra;
973 OMPT_STORE_RETURN_ADDRESS(gtid);
974 if (ompt_enabled.enabled) {
975 team = __kmp_team_from_gtid(gtid);
976 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
977 /* OMPT state update */
978 th->th.ompt_thread_info.wait_id = lck;
979 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
980
981 /* OMPT event callback */
982 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
983 if (ompt_enabled.ompt_callback_mutex_acquire) {
984 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
985 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
986 codeptr_ra);
987 }
988 }
989 #endif
990
991 if (th->th.th_dispatch->th_deo_fcn != 0)
992 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
993 else
994 __kmp_parallel_deo(>id, &cid, loc);
995
996 #if OMPT_SUPPORT && OMPT_OPTIONAL
997 if (ompt_enabled.enabled) {
998 /* OMPT state update */
999 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1000 th->th.ompt_thread_info.wait_id = 0;
1001
1002 /* OMPT event callback */
1003 if (ompt_enabled.ompt_callback_mutex_acquired) {
1004 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1005 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1006 }
1007 }
1008 #endif
1009
1010 #if USE_ITT_BUILD
1011 __kmp_itt_ordered_start(gtid);
1012 #endif /* USE_ITT_BUILD */
1013 }
1014
1015 /*!
1016 @ingroup WORK_SHARING
1017 @param loc source location information.
1018 @param gtid global thread number.
1019
1020 End execution of an <tt>ordered</tt> construct.
1021 */
__kmpc_end_ordered(ident_t * loc,kmp_int32 gtid)1022 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1023 int cid = 0;
1024 kmp_info_t *th;
1025
1026 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1027 __kmp_assert_valid_gtid(gtid);
1028
1029 #if USE_ITT_BUILD
1030 __kmp_itt_ordered_end(gtid);
1031 // TODO: ordered_wait_id
1032 #endif /* USE_ITT_BUILD */
1033
1034 th = __kmp_threads[gtid];
1035
1036 if (th->th.th_dispatch->th_dxo_fcn != 0)
1037 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1038 else
1039 __kmp_parallel_dxo(>id, &cid, loc);
1040
1041 #if OMPT_SUPPORT && OMPT_OPTIONAL
1042 OMPT_STORE_RETURN_ADDRESS(gtid);
1043 if (ompt_enabled.ompt_callback_mutex_released) {
1044 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1045 ompt_mutex_ordered,
1046 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1047 ->t.t_ordered.dt.t_value,
1048 OMPT_LOAD_RETURN_ADDRESS(gtid));
1049 }
1050 #endif
1051 }
1052
1053 #if KMP_USE_DYNAMIC_LOCK
1054
1055 static __forceinline void
__kmp_init_indirect_csptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid,kmp_indirect_locktag_t tag)1056 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1057 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1058 // Pointer to the allocated indirect lock is written to crit, while indexing
1059 // is ignored.
1060 void *idx;
1061 kmp_indirect_lock_t **lck;
1062 lck = (kmp_indirect_lock_t **)crit;
1063 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1064 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1065 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1066 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1067 KA_TRACE(20,
1068 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1069 #if USE_ITT_BUILD
1070 __kmp_itt_critical_creating(ilk->lock, loc);
1071 #endif
1072 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1073 if (status == 0) {
1074 #if USE_ITT_BUILD
1075 __kmp_itt_critical_destroyed(ilk->lock);
1076 #endif
1077 // We don't really need to destroy the unclaimed lock here since it will be
1078 // cleaned up at program exit.
1079 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1080 }
1081 KMP_DEBUG_ASSERT(*lck != NULL);
1082 }
1083
1084 // Fast-path acquire tas lock
1085 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1086 { \
1087 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1088 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1089 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1090 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1091 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1092 kmp_uint32 spins; \
1093 KMP_FSYNC_PREPARE(l); \
1094 KMP_INIT_YIELD(spins); \
1095 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1096 do { \
1097 if (TCR_4(__kmp_nth) > \
1098 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1099 KMP_YIELD(TRUE); \
1100 } else { \
1101 KMP_YIELD_SPIN(spins); \
1102 } \
1103 __kmp_spin_backoff(&backoff); \
1104 } while ( \
1105 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1106 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1107 } \
1108 KMP_FSYNC_ACQUIRED(l); \
1109 }
1110
1111 // Fast-path test tas lock
1112 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1113 { \
1114 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1115 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1116 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1117 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1118 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1119 }
1120
1121 // Fast-path release tas lock
1122 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1123 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1124
1125 #if KMP_USE_FUTEX
1126
1127 #include <sys/syscall.h>
1128 #include <unistd.h>
1129 #ifndef FUTEX_WAIT
1130 #define FUTEX_WAIT 0
1131 #endif
1132 #ifndef FUTEX_WAKE
1133 #define FUTEX_WAKE 1
1134 #endif
1135
1136 // Fast-path acquire futex lock
1137 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1138 { \
1139 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1140 kmp_int32 gtid_code = (gtid + 1) << 1; \
1141 KMP_MB(); \
1142 KMP_FSYNC_PREPARE(ftx); \
1143 kmp_int32 poll_val; \
1144 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1145 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1146 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1147 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1148 if (!cond) { \
1149 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1150 poll_val | \
1151 KMP_LOCK_BUSY(1, futex))) { \
1152 continue; \
1153 } \
1154 poll_val |= KMP_LOCK_BUSY(1, futex); \
1155 } \
1156 kmp_int32 rc; \
1157 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1158 NULL, NULL, 0)) != 0) { \
1159 continue; \
1160 } \
1161 gtid_code |= 1; \
1162 } \
1163 KMP_FSYNC_ACQUIRED(ftx); \
1164 }
1165
1166 // Fast-path test futex lock
1167 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1168 { \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1171 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1172 KMP_FSYNC_ACQUIRED(ftx); \
1173 rc = TRUE; \
1174 } else { \
1175 rc = FALSE; \
1176 } \
1177 }
1178
1179 // Fast-path release futex lock
1180 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1181 { \
1182 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1183 KMP_MB(); \
1184 KMP_FSYNC_RELEASING(ftx); \
1185 kmp_int32 poll_val = \
1186 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1187 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1188 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1189 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1190 } \
1191 KMP_MB(); \
1192 KMP_YIELD_OVERSUB(); \
1193 }
1194
1195 #endif // KMP_USE_FUTEX
1196
1197 #else // KMP_USE_DYNAMIC_LOCK
1198
__kmp_get_critical_section_ptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid)1199 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1200 ident_t const *loc,
1201 kmp_int32 gtid) {
1202 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1203
1204 // Because of the double-check, the following load doesn't need to be volatile
1205 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1206
1207 if (lck == NULL) {
1208 void *idx;
1209
1210 // Allocate & initialize the lock.
1211 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1212 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1213 __kmp_init_user_lock_with_checks(lck);
1214 __kmp_set_user_lock_location(lck, loc);
1215 #if USE_ITT_BUILD
1216 __kmp_itt_critical_creating(lck);
1217 // __kmp_itt_critical_creating() should be called *before* the first usage
1218 // of underlying lock. It is the only place where we can guarantee it. There
1219 // are chances the lock will destroyed with no usage, but it is not a
1220 // problem, because this is not real event seen by user but rather setting
1221 // name for object (lock). See more details in kmp_itt.h.
1222 #endif /* USE_ITT_BUILD */
1223
1224 // Use a cmpxchg instruction to slam the start of the critical section with
1225 // the lock pointer. If another thread beat us to it, deallocate the lock,
1226 // and use the lock that the other thread allocated.
1227 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1228
1229 if (status == 0) {
1230 // Deallocate the lock and reload the value.
1231 #if USE_ITT_BUILD
1232 __kmp_itt_critical_destroyed(lck);
1233 // Let ITT know the lock is destroyed and the same memory location may be reused
1234 // for another purpose.
1235 #endif /* USE_ITT_BUILD */
1236 __kmp_destroy_user_lock_with_checks(lck);
1237 __kmp_user_lock_free(&idx, gtid, lck);
1238 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1239 KMP_DEBUG_ASSERT(lck != NULL);
1240 }
1241 }
1242 return lck;
1243 }
1244
1245 #endif // KMP_USE_DYNAMIC_LOCK
1246
1247 /*!
1248 @ingroup WORK_SHARING
1249 @param loc source location information.
1250 @param global_tid global thread number.
1251 @param crit identity of the critical section. This could be a pointer to a lock
1252 associated with the critical section, or some other suitably unique value.
1253
1254 Enter code protected by a `critical` construct.
1255 This function blocks until the executing thread can enter the critical section.
1256 */
__kmpc_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1257 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1258 kmp_critical_name *crit) {
1259 #if KMP_USE_DYNAMIC_LOCK
1260 #if OMPT_SUPPORT && OMPT_OPTIONAL
1261 OMPT_STORE_RETURN_ADDRESS(global_tid);
1262 #endif // OMPT_SUPPORT
1263 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1264 #else
1265 KMP_COUNT_BLOCK(OMP_CRITICAL);
1266 #if OMPT_SUPPORT && OMPT_OPTIONAL
1267 ompt_state_t prev_state = ompt_state_undefined;
1268 ompt_thread_info_t ti;
1269 #endif
1270 kmp_user_lock_p lck;
1271
1272 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1273 __kmp_assert_valid_gtid(global_tid);
1274
1275 // TODO: add THR_OVHD_STATE
1276
1277 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1278 KMP_CHECK_USER_LOCK_INIT();
1279
1280 if ((__kmp_user_lock_kind == lk_tas) &&
1281 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1282 lck = (kmp_user_lock_p)crit;
1283 }
1284 #if KMP_USE_FUTEX
1285 else if ((__kmp_user_lock_kind == lk_futex) &&
1286 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1287 lck = (kmp_user_lock_p)crit;
1288 }
1289 #endif
1290 else { // ticket, queuing or drdpa
1291 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1292 }
1293
1294 if (__kmp_env_consistency_check)
1295 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1296
1297 // since the critical directive binds to all threads, not just the current
1298 // team we have to check this even if we are in a serialized team.
1299 // also, even if we are the uber thread, we still have to conduct the lock,
1300 // as we have to contend with sibling threads.
1301
1302 #if USE_ITT_BUILD
1303 __kmp_itt_critical_acquiring(lck);
1304 #endif /* USE_ITT_BUILD */
1305 #if OMPT_SUPPORT && OMPT_OPTIONAL
1306 OMPT_STORE_RETURN_ADDRESS(gtid);
1307 void *codeptr_ra = NULL;
1308 if (ompt_enabled.enabled) {
1309 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1310 /* OMPT state update */
1311 prev_state = ti.state;
1312 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1313 ti.state = ompt_state_wait_critical;
1314
1315 /* OMPT event callback */
1316 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1317 if (ompt_enabled.ompt_callback_mutex_acquire) {
1318 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1319 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1320 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1321 }
1322 }
1323 #endif
1324 // Value of 'crit' should be good for using as a critical_id of the critical
1325 // section directive.
1326 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1327
1328 #if USE_ITT_BUILD
1329 __kmp_itt_critical_acquired(lck);
1330 #endif /* USE_ITT_BUILD */
1331 #if OMPT_SUPPORT && OMPT_OPTIONAL
1332 if (ompt_enabled.enabled) {
1333 /* OMPT state update */
1334 ti.state = prev_state;
1335 ti.wait_id = 0;
1336
1337 /* OMPT event callback */
1338 if (ompt_enabled.ompt_callback_mutex_acquired) {
1339 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1340 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1341 }
1342 }
1343 #endif
1344 KMP_POP_PARTITIONED_TIMER();
1345
1346 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1347 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1348 #endif // KMP_USE_DYNAMIC_LOCK
1349 }
1350
1351 #if KMP_USE_DYNAMIC_LOCK
1352
1353 // Converts the given hint to an internal lock implementation
__kmp_map_hint_to_lock(uintptr_t hint)1354 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1355 #if KMP_USE_TSX
1356 #define KMP_TSX_LOCK(seq) lockseq_##seq
1357 #else
1358 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1359 #endif
1360
1361 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1362 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1363 #else
1364 #define KMP_CPUINFO_RTM 0
1365 #endif
1366
1367 // Hints that do not require further logic
1368 if (hint & kmp_lock_hint_hle)
1369 return KMP_TSX_LOCK(hle);
1370 if (hint & kmp_lock_hint_rtm)
1371 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1372 if (hint & kmp_lock_hint_adaptive)
1373 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1374
1375 // Rule out conflicting hints first by returning the default lock
1376 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1377 return __kmp_user_lock_seq;
1378 if ((hint & omp_lock_hint_speculative) &&
1379 (hint & omp_lock_hint_nonspeculative))
1380 return __kmp_user_lock_seq;
1381
1382 // Do not even consider speculation when it appears to be contended
1383 if (hint & omp_lock_hint_contended)
1384 return lockseq_queuing;
1385
1386 // Uncontended lock without speculation
1387 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1388 return lockseq_tas;
1389
1390 // Use RTM lock for speculation
1391 if (hint & omp_lock_hint_speculative)
1392 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1393
1394 return __kmp_user_lock_seq;
1395 }
1396
1397 #if OMPT_SUPPORT && OMPT_OPTIONAL
1398 #if KMP_USE_DYNAMIC_LOCK
1399 static kmp_mutex_impl_t
__ompt_get_mutex_impl_type(void * user_lock,kmp_indirect_lock_t * ilock=0)1400 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1401 if (user_lock) {
1402 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1403 case 0:
1404 break;
1405 #if KMP_USE_FUTEX
1406 case locktag_futex:
1407 return kmp_mutex_impl_queuing;
1408 #endif
1409 case locktag_tas:
1410 return kmp_mutex_impl_spin;
1411 #if KMP_USE_TSX
1412 case locktag_hle:
1413 case locktag_rtm_spin:
1414 return kmp_mutex_impl_speculative;
1415 #endif
1416 default:
1417 return kmp_mutex_impl_none;
1418 }
1419 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1420 }
1421 KMP_ASSERT(ilock);
1422 switch (ilock->type) {
1423 #if KMP_USE_TSX
1424 case locktag_adaptive:
1425 case locktag_rtm_queuing:
1426 return kmp_mutex_impl_speculative;
1427 #endif
1428 case locktag_nested_tas:
1429 return kmp_mutex_impl_spin;
1430 #if KMP_USE_FUTEX
1431 case locktag_nested_futex:
1432 #endif
1433 case locktag_ticket:
1434 case locktag_queuing:
1435 case locktag_drdpa:
1436 case locktag_nested_ticket:
1437 case locktag_nested_queuing:
1438 case locktag_nested_drdpa:
1439 return kmp_mutex_impl_queuing;
1440 default:
1441 return kmp_mutex_impl_none;
1442 }
1443 }
1444 #else
1445 // For locks without dynamic binding
__ompt_get_mutex_impl_type()1446 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1447 switch (__kmp_user_lock_kind) {
1448 case lk_tas:
1449 return kmp_mutex_impl_spin;
1450 #if KMP_USE_FUTEX
1451 case lk_futex:
1452 #endif
1453 case lk_ticket:
1454 case lk_queuing:
1455 case lk_drdpa:
1456 return kmp_mutex_impl_queuing;
1457 #if KMP_USE_TSX
1458 case lk_hle:
1459 case lk_rtm_queuing:
1460 case lk_rtm_spin:
1461 case lk_adaptive:
1462 return kmp_mutex_impl_speculative;
1463 #endif
1464 default:
1465 return kmp_mutex_impl_none;
1466 }
1467 }
1468 #endif // KMP_USE_DYNAMIC_LOCK
1469 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1470
1471 /*!
1472 @ingroup WORK_SHARING
1473 @param loc source location information.
1474 @param global_tid global thread number.
1475 @param crit identity of the critical section. This could be a pointer to a lock
1476 associated with the critical section, or some other suitably unique value.
1477 @param hint the lock hint.
1478
1479 Enter code protected by a `critical` construct with a hint. The hint value is
1480 used to suggest a lock implementation. This function blocks until the executing
1481 thread can enter the critical section unless the hint suggests use of
1482 speculative execution and the hardware supports it.
1483 */
__kmpc_critical_with_hint(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit,uint32_t hint)1484 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1485 kmp_critical_name *crit, uint32_t hint) {
1486 KMP_COUNT_BLOCK(OMP_CRITICAL);
1487 kmp_user_lock_p lck;
1488 #if OMPT_SUPPORT && OMPT_OPTIONAL
1489 ompt_state_t prev_state = ompt_state_undefined;
1490 ompt_thread_info_t ti;
1491 // This is the case, if called from __kmpc_critical:
1492 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1493 if (!codeptr)
1494 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1495 #endif
1496
1497 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1498 __kmp_assert_valid_gtid(global_tid);
1499
1500 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1501 // Check if it is initialized.
1502 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1503 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1504 if (*lk == 0) {
1505 if (KMP_IS_D_LOCK(lockseq)) {
1506 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1507 KMP_GET_D_TAG(lockseq));
1508 } else {
1509 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1510 }
1511 }
1512 // Branch for accessing the actual lock object and set operation. This
1513 // branching is inevitable since this lock initialization does not follow the
1514 // normal dispatch path (lock table is not used).
1515 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1516 lck = (kmp_user_lock_p)lk;
1517 if (__kmp_env_consistency_check) {
1518 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1519 __kmp_map_hint_to_lock(hint));
1520 }
1521 #if USE_ITT_BUILD
1522 __kmp_itt_critical_acquiring(lck);
1523 #endif
1524 #if OMPT_SUPPORT && OMPT_OPTIONAL
1525 if (ompt_enabled.enabled) {
1526 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1527 /* OMPT state update */
1528 prev_state = ti.state;
1529 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1530 ti.state = ompt_state_wait_critical;
1531
1532 /* OMPT event callback */
1533 if (ompt_enabled.ompt_callback_mutex_acquire) {
1534 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1535 ompt_mutex_critical, (unsigned int)hint,
1536 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1537 codeptr);
1538 }
1539 }
1540 #endif
1541 #if KMP_USE_INLINED_TAS
1542 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1543 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1544 } else
1545 #elif KMP_USE_INLINED_FUTEX
1546 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1547 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1548 } else
1549 #endif
1550 {
1551 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1552 }
1553 } else {
1554 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1555 lck = ilk->lock;
1556 if (__kmp_env_consistency_check) {
1557 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1558 __kmp_map_hint_to_lock(hint));
1559 }
1560 #if USE_ITT_BUILD
1561 __kmp_itt_critical_acquiring(lck);
1562 #endif
1563 #if OMPT_SUPPORT && OMPT_OPTIONAL
1564 if (ompt_enabled.enabled) {
1565 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1566 /* OMPT state update */
1567 prev_state = ti.state;
1568 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1569 ti.state = ompt_state_wait_critical;
1570
1571 /* OMPT event callback */
1572 if (ompt_enabled.ompt_callback_mutex_acquire) {
1573 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1574 ompt_mutex_critical, (unsigned int)hint,
1575 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1576 codeptr);
1577 }
1578 }
1579 #endif
1580 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1581 }
1582 KMP_POP_PARTITIONED_TIMER();
1583
1584 #if USE_ITT_BUILD
1585 __kmp_itt_critical_acquired(lck);
1586 #endif /* USE_ITT_BUILD */
1587 #if OMPT_SUPPORT && OMPT_OPTIONAL
1588 if (ompt_enabled.enabled) {
1589 /* OMPT state update */
1590 ti.state = prev_state;
1591 ti.wait_id = 0;
1592
1593 /* OMPT event callback */
1594 if (ompt_enabled.ompt_callback_mutex_acquired) {
1595 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1596 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1597 }
1598 }
1599 #endif
1600
1601 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1602 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1603 } // __kmpc_critical_with_hint
1604
1605 #endif // KMP_USE_DYNAMIC_LOCK
1606
1607 /*!
1608 @ingroup WORK_SHARING
1609 @param loc source location information.
1610 @param global_tid global thread number .
1611 @param crit identity of the critical section. This could be a pointer to a lock
1612 associated with the critical section, or some other suitably unique value.
1613
1614 Leave a critical section, releasing any lock that was held during its execution.
1615 */
__kmpc_end_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1616 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1617 kmp_critical_name *crit) {
1618 kmp_user_lock_p lck;
1619
1620 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1621
1622 #if KMP_USE_DYNAMIC_LOCK
1623 int locktag = KMP_EXTRACT_D_TAG(crit);
1624 if (locktag) {
1625 lck = (kmp_user_lock_p)crit;
1626 KMP_ASSERT(lck != NULL);
1627 if (__kmp_env_consistency_check) {
1628 __kmp_pop_sync(global_tid, ct_critical, loc);
1629 }
1630 #if USE_ITT_BUILD
1631 __kmp_itt_critical_releasing(lck);
1632 #endif
1633 #if KMP_USE_INLINED_TAS
1634 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1635 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1636 } else
1637 #elif KMP_USE_INLINED_FUTEX
1638 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1639 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1640 } else
1641 #endif
1642 {
1643 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1644 }
1645 } else {
1646 kmp_indirect_lock_t *ilk =
1647 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1648 KMP_ASSERT(ilk != NULL);
1649 lck = ilk->lock;
1650 if (__kmp_env_consistency_check) {
1651 __kmp_pop_sync(global_tid, ct_critical, loc);
1652 }
1653 #if USE_ITT_BUILD
1654 __kmp_itt_critical_releasing(lck);
1655 #endif
1656 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1657 }
1658
1659 #else // KMP_USE_DYNAMIC_LOCK
1660
1661 if ((__kmp_user_lock_kind == lk_tas) &&
1662 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1663 lck = (kmp_user_lock_p)crit;
1664 }
1665 #if KMP_USE_FUTEX
1666 else if ((__kmp_user_lock_kind == lk_futex) &&
1667 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1668 lck = (kmp_user_lock_p)crit;
1669 }
1670 #endif
1671 else { // ticket, queuing or drdpa
1672 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1673 }
1674
1675 KMP_ASSERT(lck != NULL);
1676
1677 if (__kmp_env_consistency_check)
1678 __kmp_pop_sync(global_tid, ct_critical, loc);
1679
1680 #if USE_ITT_BUILD
1681 __kmp_itt_critical_releasing(lck);
1682 #endif /* USE_ITT_BUILD */
1683 // Value of 'crit' should be good for using as a critical_id of the critical
1684 // section directive.
1685 __kmp_release_user_lock_with_checks(lck, global_tid);
1686
1687 #endif // KMP_USE_DYNAMIC_LOCK
1688
1689 #if OMPT_SUPPORT && OMPT_OPTIONAL
1690 /* OMPT release event triggers after lock is released; place here to trigger
1691 * for all #if branches */
1692 OMPT_STORE_RETURN_ADDRESS(global_tid);
1693 if (ompt_enabled.ompt_callback_mutex_released) {
1694 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1695 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1696 OMPT_LOAD_RETURN_ADDRESS(0));
1697 }
1698 #endif
1699
1700 KMP_POP_PARTITIONED_TIMER();
1701 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1702 }
1703
1704 /*!
1705 @ingroup SYNCHRONIZATION
1706 @param loc source location information
1707 @param global_tid thread id.
1708 @return one if the thread should execute the master block, zero otherwise
1709
1710 Start execution of a combined barrier and master. The barrier is executed inside
1711 this function.
1712 */
__kmpc_barrier_master(ident_t * loc,kmp_int32 global_tid)1713 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1714 int status;
1715 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1716 __kmp_assert_valid_gtid(global_tid);
1717
1718 if (!TCR_4(__kmp_init_parallel))
1719 __kmp_parallel_initialize();
1720
1721 __kmp_resume_if_soft_paused();
1722
1723 if (__kmp_env_consistency_check)
1724 __kmp_check_barrier(global_tid, ct_barrier, loc);
1725
1726 #if OMPT_SUPPORT
1727 ompt_frame_t *ompt_frame;
1728 if (ompt_enabled.enabled) {
1729 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1730 if (ompt_frame->enter_frame.ptr == NULL)
1731 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1732 }
1733 OMPT_STORE_RETURN_ADDRESS(global_tid);
1734 #endif
1735 #if USE_ITT_NOTIFY
1736 __kmp_threads[global_tid]->th.th_ident = loc;
1737 #endif
1738 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1739 #if OMPT_SUPPORT && OMPT_OPTIONAL
1740 if (ompt_enabled.enabled) {
1741 ompt_frame->enter_frame = ompt_data_none;
1742 }
1743 #endif
1744
1745 return (status != 0) ? 0 : 1;
1746 }
1747
1748 /*!
1749 @ingroup SYNCHRONIZATION
1750 @param loc source location information
1751 @param global_tid thread id.
1752
1753 Complete the execution of a combined barrier and master. This function should
1754 only be called at the completion of the <tt>master</tt> code. Other threads will
1755 still be waiting at the barrier and this call releases them.
1756 */
__kmpc_end_barrier_master(ident_t * loc,kmp_int32 global_tid)1757 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1758 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1759 __kmp_assert_valid_gtid(global_tid);
1760 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1761 }
1762
1763 /*!
1764 @ingroup SYNCHRONIZATION
1765 @param loc source location information
1766 @param global_tid thread id.
1767 @return one if the thread should execute the master block, zero otherwise
1768
1769 Start execution of a combined barrier and master(nowait) construct.
1770 The barrier is executed inside this function.
1771 There is no equivalent "end" function, since the
1772 */
__kmpc_barrier_master_nowait(ident_t * loc,kmp_int32 global_tid)1773 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1774 kmp_int32 ret;
1775 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1776 __kmp_assert_valid_gtid(global_tid);
1777
1778 if (!TCR_4(__kmp_init_parallel))
1779 __kmp_parallel_initialize();
1780
1781 __kmp_resume_if_soft_paused();
1782
1783 if (__kmp_env_consistency_check) {
1784 if (loc == 0) {
1785 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1786 }
1787 __kmp_check_barrier(global_tid, ct_barrier, loc);
1788 }
1789
1790 #if OMPT_SUPPORT
1791 ompt_frame_t *ompt_frame;
1792 if (ompt_enabled.enabled) {
1793 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1794 if (ompt_frame->enter_frame.ptr == NULL)
1795 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1796 }
1797 OMPT_STORE_RETURN_ADDRESS(global_tid);
1798 #endif
1799 #if USE_ITT_NOTIFY
1800 __kmp_threads[global_tid]->th.th_ident = loc;
1801 #endif
1802 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1803 #if OMPT_SUPPORT && OMPT_OPTIONAL
1804 if (ompt_enabled.enabled) {
1805 ompt_frame->enter_frame = ompt_data_none;
1806 }
1807 #endif
1808
1809 ret = __kmpc_master(loc, global_tid);
1810
1811 if (__kmp_env_consistency_check) {
1812 /* there's no __kmpc_end_master called; so the (stats) */
1813 /* actions of __kmpc_end_master are done here */
1814 if (ret) {
1815 /* only one thread should do the pop since only */
1816 /* one did the push (see __kmpc_master()) */
1817 __kmp_pop_sync(global_tid, ct_master, loc);
1818 }
1819 }
1820
1821 return (ret);
1822 }
1823
1824 /* The BARRIER for a SINGLE process section is always explicit */
1825 /*!
1826 @ingroup WORK_SHARING
1827 @param loc source location information
1828 @param global_tid global thread number
1829 @return One if this thread should execute the single construct, zero otherwise.
1830
1831 Test whether to execute a <tt>single</tt> construct.
1832 There are no implicit barriers in the two "single" calls, rather the compiler
1833 should introduce an explicit barrier if it is required.
1834 */
1835
__kmpc_single(ident_t * loc,kmp_int32 global_tid)1836 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1837 __kmp_assert_valid_gtid(global_tid);
1838 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1839
1840 if (rc) {
1841 // We are going to execute the single statement, so we should count it.
1842 KMP_COUNT_BLOCK(OMP_SINGLE);
1843 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1844 }
1845
1846 #if OMPT_SUPPORT && OMPT_OPTIONAL
1847 kmp_info_t *this_thr = __kmp_threads[global_tid];
1848 kmp_team_t *team = this_thr->th.th_team;
1849 int tid = __kmp_tid_from_gtid(global_tid);
1850
1851 if (ompt_enabled.enabled) {
1852 if (rc) {
1853 if (ompt_enabled.ompt_callback_work) {
1854 ompt_callbacks.ompt_callback(ompt_callback_work)(
1855 ompt_work_single_executor, ompt_scope_begin,
1856 &(team->t.ompt_team_info.parallel_data),
1857 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1858 1, OMPT_GET_RETURN_ADDRESS(0));
1859 }
1860 } else {
1861 if (ompt_enabled.ompt_callback_work) {
1862 ompt_callbacks.ompt_callback(ompt_callback_work)(
1863 ompt_work_single_other, ompt_scope_begin,
1864 &(team->t.ompt_team_info.parallel_data),
1865 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1866 1, OMPT_GET_RETURN_ADDRESS(0));
1867 ompt_callbacks.ompt_callback(ompt_callback_work)(
1868 ompt_work_single_other, ompt_scope_end,
1869 &(team->t.ompt_team_info.parallel_data),
1870 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1871 1, OMPT_GET_RETURN_ADDRESS(0));
1872 }
1873 }
1874 }
1875 #endif
1876
1877 return rc;
1878 }
1879
1880 /*!
1881 @ingroup WORK_SHARING
1882 @param loc source location information
1883 @param global_tid global thread number
1884
1885 Mark the end of a <tt>single</tt> construct. This function should
1886 only be called by the thread that executed the block of code protected
1887 by the `single` construct.
1888 */
__kmpc_end_single(ident_t * loc,kmp_int32 global_tid)1889 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1890 __kmp_assert_valid_gtid(global_tid);
1891 __kmp_exit_single(global_tid);
1892 KMP_POP_PARTITIONED_TIMER();
1893
1894 #if OMPT_SUPPORT && OMPT_OPTIONAL
1895 kmp_info_t *this_thr = __kmp_threads[global_tid];
1896 kmp_team_t *team = this_thr->th.th_team;
1897 int tid = __kmp_tid_from_gtid(global_tid);
1898
1899 if (ompt_enabled.ompt_callback_work) {
1900 ompt_callbacks.ompt_callback(ompt_callback_work)(
1901 ompt_work_single_executor, ompt_scope_end,
1902 &(team->t.ompt_team_info.parallel_data),
1903 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1904 OMPT_GET_RETURN_ADDRESS(0));
1905 }
1906 #endif
1907 }
1908
1909 /*!
1910 @ingroup WORK_SHARING
1911 @param loc Source location
1912 @param global_tid Global thread id
1913
1914 Mark the end of a statically scheduled loop.
1915 */
__kmpc_for_static_fini(ident_t * loc,kmp_int32 global_tid)1916 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1917 KMP_POP_PARTITIONED_TIMER();
1918 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1919
1920 #if OMPT_SUPPORT && OMPT_OPTIONAL
1921 if (ompt_enabled.ompt_callback_work) {
1922 ompt_work_t ompt_work_type = ompt_work_loop;
1923 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1924 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1925 // Determine workshare type
1926 if (loc != NULL) {
1927 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1928 ompt_work_type = ompt_work_loop;
1929 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1930 ompt_work_type = ompt_work_sections;
1931 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1932 ompt_work_type = ompt_work_distribute;
1933 } else {
1934 // use default set above.
1935 // a warning about this case is provided in __kmpc_for_static_init
1936 }
1937 KMP_DEBUG_ASSERT(ompt_work_type);
1938 }
1939 ompt_callbacks.ompt_callback(ompt_callback_work)(
1940 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1941 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1942 }
1943 #endif
1944 if (__kmp_env_consistency_check)
1945 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1946 }
1947
1948 // User routines which take C-style arguments (call by value)
1949 // different from the Fortran equivalent routines
1950
ompc_set_num_threads(int arg)1951 void ompc_set_num_threads(int arg) {
1952 // !!!!! TODO: check the per-task binding
1953 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1954 }
1955
ompc_set_dynamic(int flag)1956 void ompc_set_dynamic(int flag) {
1957 kmp_info_t *thread;
1958
1959 /* For the thread-private implementation of the internal controls */
1960 thread = __kmp_entry_thread();
1961
1962 __kmp_save_internal_controls(thread);
1963
1964 set__dynamic(thread, flag ? true : false);
1965 }
1966
ompc_set_nested(int flag)1967 void ompc_set_nested(int flag) {
1968 kmp_info_t *thread;
1969
1970 /* For the thread-private internal controls implementation */
1971 thread = __kmp_entry_thread();
1972
1973 __kmp_save_internal_controls(thread);
1974
1975 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1976 }
1977
ompc_set_max_active_levels(int max_active_levels)1978 void ompc_set_max_active_levels(int max_active_levels) {
1979 /* TO DO */
1980 /* we want per-task implementation of this internal control */
1981
1982 /* For the per-thread internal controls implementation */
1983 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1984 }
1985
ompc_set_schedule(omp_sched_t kind,int modifier)1986 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1987 // !!!!! TODO: check the per-task binding
1988 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1989 }
1990
ompc_get_ancestor_thread_num(int level)1991 int ompc_get_ancestor_thread_num(int level) {
1992 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1993 }
1994
ompc_get_team_size(int level)1995 int ompc_get_team_size(int level) {
1996 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1997 }
1998
1999 /* OpenMP 5.0 Affinity Format API */
KMP_EXPAND_NAME(ompc_set_affinity_format)2000 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2001 if (!__kmp_init_serial) {
2002 __kmp_serial_initialize();
2003 }
2004 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2005 format, KMP_STRLEN(format) + 1);
2006 }
2007
KMP_EXPAND_NAME(ompc_get_affinity_format)2008 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2009 size_t format_size;
2010 if (!__kmp_init_serial) {
2011 __kmp_serial_initialize();
2012 }
2013 format_size = KMP_STRLEN(__kmp_affinity_format);
2014 if (buffer && size) {
2015 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2016 format_size + 1);
2017 }
2018 return format_size;
2019 }
2020
KMP_EXPAND_NAME(ompc_display_affinity)2021 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2022 int gtid;
2023 if (!TCR_4(__kmp_init_middle)) {
2024 __kmp_middle_initialize();
2025 }
2026 __kmp_assign_root_init_mask();
2027 gtid = __kmp_get_gtid();
2028 __kmp_aux_display_affinity(gtid, format);
2029 }
2030
KMP_EXPAND_NAME(ompc_capture_affinity)2031 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2032 char const *format) {
2033 int gtid;
2034 size_t num_required;
2035 kmp_str_buf_t capture_buf;
2036 if (!TCR_4(__kmp_init_middle)) {
2037 __kmp_middle_initialize();
2038 }
2039 __kmp_assign_root_init_mask();
2040 gtid = __kmp_get_gtid();
2041 __kmp_str_buf_init(&capture_buf);
2042 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2043 if (buffer && buf_size) {
2044 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2045 capture_buf.used + 1);
2046 }
2047 __kmp_str_buf_free(&capture_buf);
2048 return num_required;
2049 }
2050
kmpc_set_stacksize(int arg)2051 void kmpc_set_stacksize(int arg) {
2052 // __kmp_aux_set_stacksize initializes the library if needed
2053 __kmp_aux_set_stacksize(arg);
2054 }
2055
kmpc_set_stacksize_s(size_t arg)2056 void kmpc_set_stacksize_s(size_t arg) {
2057 // __kmp_aux_set_stacksize initializes the library if needed
2058 __kmp_aux_set_stacksize(arg);
2059 }
2060
kmpc_set_blocktime(int arg)2061 void kmpc_set_blocktime(int arg) {
2062 int gtid, tid;
2063 kmp_info_t *thread;
2064
2065 gtid = __kmp_entry_gtid();
2066 tid = __kmp_tid_from_gtid(gtid);
2067 thread = __kmp_thread_from_gtid(gtid);
2068
2069 __kmp_aux_set_blocktime(arg, thread, tid);
2070 }
2071
kmpc_set_library(int arg)2072 void kmpc_set_library(int arg) {
2073 // __kmp_user_set_library initializes the library if needed
2074 __kmp_user_set_library((enum library_type)arg);
2075 }
2076
kmpc_set_defaults(char const * str)2077 void kmpc_set_defaults(char const *str) {
2078 // __kmp_aux_set_defaults initializes the library if needed
2079 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2080 }
2081
kmpc_set_disp_num_buffers(int arg)2082 void kmpc_set_disp_num_buffers(int arg) {
2083 // ignore after initialization because some teams have already
2084 // allocated dispatch buffers
2085 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2086 arg <= KMP_MAX_DISP_NUM_BUFF) {
2087 __kmp_dispatch_num_buffers = arg;
2088 }
2089 }
2090
kmpc_set_affinity_mask_proc(int proc,void ** mask)2091 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2092 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2093 return -1;
2094 #else
2095 if (!TCR_4(__kmp_init_middle)) {
2096 __kmp_middle_initialize();
2097 }
2098 __kmp_assign_root_init_mask();
2099 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2100 #endif
2101 }
2102
kmpc_unset_affinity_mask_proc(int proc,void ** mask)2103 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2104 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2105 return -1;
2106 #else
2107 if (!TCR_4(__kmp_init_middle)) {
2108 __kmp_middle_initialize();
2109 }
2110 __kmp_assign_root_init_mask();
2111 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2112 #endif
2113 }
2114
kmpc_get_affinity_mask_proc(int proc,void ** mask)2115 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2116 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2117 return -1;
2118 #else
2119 if (!TCR_4(__kmp_init_middle)) {
2120 __kmp_middle_initialize();
2121 }
2122 __kmp_assign_root_init_mask();
2123 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2124 #endif
2125 }
2126
2127 /* -------------------------------------------------------------------------- */
2128 /*!
2129 @ingroup THREADPRIVATE
2130 @param loc source location information
2131 @param gtid global thread number
2132 @param cpy_size size of the cpy_data buffer
2133 @param cpy_data pointer to data to be copied
2134 @param cpy_func helper function to call for copying data
2135 @param didit flag variable: 1=single thread; 0=not single thread
2136
2137 __kmpc_copyprivate implements the interface for the private data broadcast
2138 needed for the copyprivate clause associated with a single region in an
2139 OpenMP<sup>*</sup> program (both C and Fortran).
2140 All threads participating in the parallel region call this routine.
2141 One of the threads (called the single thread) should have the <tt>didit</tt>
2142 variable set to 1 and all other threads should have that variable set to 0.
2143 All threads pass a pointer to a data buffer (cpy_data) that they have built.
2144
2145 The OpenMP specification forbids the use of nowait on the single region when a
2146 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2147 barrier internally to avoid race conditions, so the code generation for the
2148 single region should avoid generating a barrier after the call to @ref
2149 __kmpc_copyprivate.
2150
2151 The <tt>gtid</tt> parameter is the global thread id for the current thread.
2152 The <tt>loc</tt> parameter is a pointer to source location information.
2153
2154 Internal implementation: The single thread will first copy its descriptor
2155 address (cpy_data) to a team-private location, then the other threads will each
2156 call the function pointed to by the parameter cpy_func, which carries out the
2157 copy by copying the data using the cpy_data buffer.
2158
2159 The cpy_func routine used for the copy and the contents of the data area defined
2160 by cpy_data and cpy_size may be built in any fashion that will allow the copy
2161 to be done. For instance, the cpy_data buffer can hold the actual data to be
2162 copied or it may hold a list of pointers to the data. The cpy_func routine must
2163 interpret the cpy_data buffer appropriately.
2164
2165 The interface to cpy_func is as follows:
2166 @code
2167 void cpy_func( void *destination, void *source )
2168 @endcode
2169 where void *destination is the cpy_data pointer for the thread being copied to
2170 and void *source is the cpy_data pointer for the thread being copied from.
2171 */
__kmpc_copyprivate(ident_t * loc,kmp_int32 gtid,size_t cpy_size,void * cpy_data,void (* cpy_func)(void *,void *),kmp_int32 didit)2172 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2173 void *cpy_data, void (*cpy_func)(void *, void *),
2174 kmp_int32 didit) {
2175 void **data_ptr;
2176 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2177 __kmp_assert_valid_gtid(gtid);
2178
2179 KMP_MB();
2180
2181 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2182
2183 if (__kmp_env_consistency_check) {
2184 if (loc == 0) {
2185 KMP_WARNING(ConstructIdentInvalid);
2186 }
2187 }
2188
2189 // ToDo: Optimize the following two barriers into some kind of split barrier
2190
2191 if (didit)
2192 *data_ptr = cpy_data;
2193
2194 #if OMPT_SUPPORT
2195 ompt_frame_t *ompt_frame;
2196 if (ompt_enabled.enabled) {
2197 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2198 if (ompt_frame->enter_frame.ptr == NULL)
2199 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2200 }
2201 OMPT_STORE_RETURN_ADDRESS(gtid);
2202 #endif
2203 /* This barrier is not a barrier region boundary */
2204 #if USE_ITT_NOTIFY
2205 __kmp_threads[gtid]->th.th_ident = loc;
2206 #endif
2207 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2208
2209 if (!didit)
2210 (*cpy_func)(cpy_data, *data_ptr);
2211
2212 // Consider next barrier a user-visible barrier for barrier region boundaries
2213 // Nesting checks are already handled by the single construct checks
2214 {
2215 #if OMPT_SUPPORT
2216 OMPT_STORE_RETURN_ADDRESS(gtid);
2217 #endif
2218 #if USE_ITT_NOTIFY
2219 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2220 // tasks can overwrite the location)
2221 #endif
2222 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2223 #if OMPT_SUPPORT && OMPT_OPTIONAL
2224 if (ompt_enabled.enabled) {
2225 ompt_frame->enter_frame = ompt_data_none;
2226 }
2227 #endif
2228 }
2229 }
2230
2231 /* -------------------------------------------------------------------------- */
2232
2233 #define INIT_LOCK __kmp_init_user_lock_with_checks
2234 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2235 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2236 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2237 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2238 #define ACQUIRE_NESTED_LOCK_TIMED \
2239 __kmp_acquire_nested_user_lock_with_checks_timed
2240 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2241 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2242 #define TEST_LOCK __kmp_test_user_lock_with_checks
2243 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2244 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2245 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2246
2247 // TODO: Make check abort messages use location info & pass it into
2248 // with_checks routines
2249
2250 #if KMP_USE_DYNAMIC_LOCK
2251
2252 // internal lock initializer
__kmp_init_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2253 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2254 kmp_dyna_lockseq_t seq) {
2255 if (KMP_IS_D_LOCK(seq)) {
2256 KMP_INIT_D_LOCK(lock, seq);
2257 #if USE_ITT_BUILD
2258 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2259 #endif
2260 } else {
2261 KMP_INIT_I_LOCK(lock, seq);
2262 #if USE_ITT_BUILD
2263 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2264 __kmp_itt_lock_creating(ilk->lock, loc);
2265 #endif
2266 }
2267 }
2268
2269 // internal nest lock initializer
2270 static __forceinline void
__kmp_init_nest_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2271 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2272 kmp_dyna_lockseq_t seq) {
2273 #if KMP_USE_TSX
2274 // Don't have nested lock implementation for speculative locks
2275 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2276 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2277 seq = __kmp_user_lock_seq;
2278 #endif
2279 switch (seq) {
2280 case lockseq_tas:
2281 seq = lockseq_nested_tas;
2282 break;
2283 #if KMP_USE_FUTEX
2284 case lockseq_futex:
2285 seq = lockseq_nested_futex;
2286 break;
2287 #endif
2288 case lockseq_ticket:
2289 seq = lockseq_nested_ticket;
2290 break;
2291 case lockseq_queuing:
2292 seq = lockseq_nested_queuing;
2293 break;
2294 case lockseq_drdpa:
2295 seq = lockseq_nested_drdpa;
2296 break;
2297 default:
2298 seq = lockseq_nested_queuing;
2299 }
2300 KMP_INIT_I_LOCK(lock, seq);
2301 #if USE_ITT_BUILD
2302 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2303 __kmp_itt_lock_creating(ilk->lock, loc);
2304 #endif
2305 }
2306
2307 /* initialize the lock with a hint */
__kmpc_init_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2308 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2309 uintptr_t hint) {
2310 KMP_DEBUG_ASSERT(__kmp_init_serial);
2311 if (__kmp_env_consistency_check && user_lock == NULL) {
2312 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2313 }
2314
2315 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2316
2317 #if OMPT_SUPPORT && OMPT_OPTIONAL
2318 // This is the case, if called from omp_init_lock_with_hint:
2319 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2320 if (!codeptr)
2321 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2322 if (ompt_enabled.ompt_callback_lock_init) {
2323 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2324 ompt_mutex_lock, (omp_lock_hint_t)hint,
2325 __ompt_get_mutex_impl_type(user_lock),
2326 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2327 }
2328 #endif
2329 }
2330
2331 /* initialize the lock with a hint */
__kmpc_init_nest_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2332 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2333 void **user_lock, uintptr_t hint) {
2334 KMP_DEBUG_ASSERT(__kmp_init_serial);
2335 if (__kmp_env_consistency_check && user_lock == NULL) {
2336 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2337 }
2338
2339 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2340
2341 #if OMPT_SUPPORT && OMPT_OPTIONAL
2342 // This is the case, if called from omp_init_lock_with_hint:
2343 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2344 if (!codeptr)
2345 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2346 if (ompt_enabled.ompt_callback_lock_init) {
2347 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2348 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2349 __ompt_get_mutex_impl_type(user_lock),
2350 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2351 }
2352 #endif
2353 }
2354
2355 #endif // KMP_USE_DYNAMIC_LOCK
2356
2357 /* initialize the lock */
__kmpc_init_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2358 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2359 #if KMP_USE_DYNAMIC_LOCK
2360
2361 KMP_DEBUG_ASSERT(__kmp_init_serial);
2362 if (__kmp_env_consistency_check && user_lock == NULL) {
2363 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2364 }
2365 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2366
2367 #if OMPT_SUPPORT && OMPT_OPTIONAL
2368 // This is the case, if called from omp_init_lock_with_hint:
2369 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2370 if (!codeptr)
2371 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2372 if (ompt_enabled.ompt_callback_lock_init) {
2373 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2374 ompt_mutex_lock, omp_lock_hint_none,
2375 __ompt_get_mutex_impl_type(user_lock),
2376 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2377 }
2378 #endif
2379
2380 #else // KMP_USE_DYNAMIC_LOCK
2381
2382 static char const *const func = "omp_init_lock";
2383 kmp_user_lock_p lck;
2384 KMP_DEBUG_ASSERT(__kmp_init_serial);
2385
2386 if (__kmp_env_consistency_check) {
2387 if (user_lock == NULL) {
2388 KMP_FATAL(LockIsUninitialized, func);
2389 }
2390 }
2391
2392 KMP_CHECK_USER_LOCK_INIT();
2393
2394 if ((__kmp_user_lock_kind == lk_tas) &&
2395 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2396 lck = (kmp_user_lock_p)user_lock;
2397 }
2398 #if KMP_USE_FUTEX
2399 else if ((__kmp_user_lock_kind == lk_futex) &&
2400 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2401 lck = (kmp_user_lock_p)user_lock;
2402 }
2403 #endif
2404 else {
2405 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2406 }
2407 INIT_LOCK(lck);
2408 __kmp_set_user_lock_location(lck, loc);
2409
2410 #if OMPT_SUPPORT && OMPT_OPTIONAL
2411 // This is the case, if called from omp_init_lock_with_hint:
2412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2413 if (!codeptr)
2414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2415 if (ompt_enabled.ompt_callback_lock_init) {
2416 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2417 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2418 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2419 }
2420 #endif
2421
2422 #if USE_ITT_BUILD
2423 __kmp_itt_lock_creating(lck);
2424 #endif /* USE_ITT_BUILD */
2425
2426 #endif // KMP_USE_DYNAMIC_LOCK
2427 } // __kmpc_init_lock
2428
2429 /* initialize the lock */
__kmpc_init_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2430 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2431 #if KMP_USE_DYNAMIC_LOCK
2432
2433 KMP_DEBUG_ASSERT(__kmp_init_serial);
2434 if (__kmp_env_consistency_check && user_lock == NULL) {
2435 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2436 }
2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2438
2439 #if OMPT_SUPPORT && OMPT_OPTIONAL
2440 // This is the case, if called from omp_init_lock_with_hint:
2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2442 if (!codeptr)
2443 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2444 if (ompt_enabled.ompt_callback_lock_init) {
2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2446 ompt_mutex_nest_lock, omp_lock_hint_none,
2447 __ompt_get_mutex_impl_type(user_lock),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449 }
2450 #endif
2451
2452 #else // KMP_USE_DYNAMIC_LOCK
2453
2454 static char const *const func = "omp_init_nest_lock";
2455 kmp_user_lock_p lck;
2456 KMP_DEBUG_ASSERT(__kmp_init_serial);
2457
2458 if (__kmp_env_consistency_check) {
2459 if (user_lock == NULL) {
2460 KMP_FATAL(LockIsUninitialized, func);
2461 }
2462 }
2463
2464 KMP_CHECK_USER_LOCK_INIT();
2465
2466 if ((__kmp_user_lock_kind == lk_tas) &&
2467 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2468 OMP_NEST_LOCK_T_SIZE)) {
2469 lck = (kmp_user_lock_p)user_lock;
2470 }
2471 #if KMP_USE_FUTEX
2472 else if ((__kmp_user_lock_kind == lk_futex) &&
2473 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2474 OMP_NEST_LOCK_T_SIZE)) {
2475 lck = (kmp_user_lock_p)user_lock;
2476 }
2477 #endif
2478 else {
2479 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2480 }
2481
2482 INIT_NESTED_LOCK(lck);
2483 __kmp_set_user_lock_location(lck, loc);
2484
2485 #if OMPT_SUPPORT && OMPT_OPTIONAL
2486 // This is the case, if called from omp_init_lock_with_hint:
2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2488 if (!codeptr)
2489 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2490 if (ompt_enabled.ompt_callback_lock_init) {
2491 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2492 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2493 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2494 }
2495 #endif
2496
2497 #if USE_ITT_BUILD
2498 __kmp_itt_lock_creating(lck);
2499 #endif /* USE_ITT_BUILD */
2500
2501 #endif // KMP_USE_DYNAMIC_LOCK
2502 } // __kmpc_init_nest_lock
2503
__kmpc_destroy_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2504 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2505 #if KMP_USE_DYNAMIC_LOCK
2506
2507 #if USE_ITT_BUILD
2508 kmp_user_lock_p lck;
2509 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2510 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2511 } else {
2512 lck = (kmp_user_lock_p)user_lock;
2513 }
2514 __kmp_itt_lock_destroyed(lck);
2515 #endif
2516 #if OMPT_SUPPORT && OMPT_OPTIONAL
2517 // This is the case, if called from omp_init_lock_with_hint:
2518 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2519 if (!codeptr)
2520 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2521 if (ompt_enabled.ompt_callback_lock_destroy) {
2522 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2523 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2524 }
2525 #endif
2526 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2527 #else
2528 kmp_user_lock_p lck;
2529
2530 if ((__kmp_user_lock_kind == lk_tas) &&
2531 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2532 lck = (kmp_user_lock_p)user_lock;
2533 }
2534 #if KMP_USE_FUTEX
2535 else if ((__kmp_user_lock_kind == lk_futex) &&
2536 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2537 lck = (kmp_user_lock_p)user_lock;
2538 }
2539 #endif
2540 else {
2541 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2542 }
2543
2544 #if OMPT_SUPPORT && OMPT_OPTIONAL
2545 // This is the case, if called from omp_init_lock_with_hint:
2546 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2547 if (!codeptr)
2548 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2549 if (ompt_enabled.ompt_callback_lock_destroy) {
2550 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2551 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2552 }
2553 #endif
2554
2555 #if USE_ITT_BUILD
2556 __kmp_itt_lock_destroyed(lck);
2557 #endif /* USE_ITT_BUILD */
2558 DESTROY_LOCK(lck);
2559
2560 if ((__kmp_user_lock_kind == lk_tas) &&
2561 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2562 ;
2563 }
2564 #if KMP_USE_FUTEX
2565 else if ((__kmp_user_lock_kind == lk_futex) &&
2566 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2567 ;
2568 }
2569 #endif
2570 else {
2571 __kmp_user_lock_free(user_lock, gtid, lck);
2572 }
2573 #endif // KMP_USE_DYNAMIC_LOCK
2574 } // __kmpc_destroy_lock
2575
2576 /* destroy the lock */
__kmpc_destroy_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2577 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2578 #if KMP_USE_DYNAMIC_LOCK
2579
2580 #if USE_ITT_BUILD
2581 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2582 __kmp_itt_lock_destroyed(ilk->lock);
2583 #endif
2584 #if OMPT_SUPPORT && OMPT_OPTIONAL
2585 // This is the case, if called from omp_init_lock_with_hint:
2586 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2587 if (!codeptr)
2588 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2589 if (ompt_enabled.ompt_callback_lock_destroy) {
2590 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2591 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2592 }
2593 #endif
2594 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2595
2596 #else // KMP_USE_DYNAMIC_LOCK
2597
2598 kmp_user_lock_p lck;
2599
2600 if ((__kmp_user_lock_kind == lk_tas) &&
2601 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2602 OMP_NEST_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2604 }
2605 #if KMP_USE_FUTEX
2606 else if ((__kmp_user_lock_kind == lk_futex) &&
2607 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2608 OMP_NEST_LOCK_T_SIZE)) {
2609 lck = (kmp_user_lock_p)user_lock;
2610 }
2611 #endif
2612 else {
2613 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2614 }
2615
2616 #if OMPT_SUPPORT && OMPT_OPTIONAL
2617 // This is the case, if called from omp_init_lock_with_hint:
2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2619 if (!codeptr)
2620 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621 if (ompt_enabled.ompt_callback_lock_destroy) {
2622 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2623 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2624 }
2625 #endif
2626
2627 #if USE_ITT_BUILD
2628 __kmp_itt_lock_destroyed(lck);
2629 #endif /* USE_ITT_BUILD */
2630
2631 DESTROY_NESTED_LOCK(lck);
2632
2633 if ((__kmp_user_lock_kind == lk_tas) &&
2634 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2635 OMP_NEST_LOCK_T_SIZE)) {
2636 ;
2637 }
2638 #if KMP_USE_FUTEX
2639 else if ((__kmp_user_lock_kind == lk_futex) &&
2640 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2641 OMP_NEST_LOCK_T_SIZE)) {
2642 ;
2643 }
2644 #endif
2645 else {
2646 __kmp_user_lock_free(user_lock, gtid, lck);
2647 }
2648 #endif // KMP_USE_DYNAMIC_LOCK
2649 } // __kmpc_destroy_nest_lock
2650
__kmpc_set_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2651 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2652 KMP_COUNT_BLOCK(OMP_set_lock);
2653 #if KMP_USE_DYNAMIC_LOCK
2654 int tag = KMP_EXTRACT_D_TAG(user_lock);
2655 #if USE_ITT_BUILD
2656 __kmp_itt_lock_acquiring(
2657 (kmp_user_lock_p)
2658 user_lock); // itt function will get to the right lock object.
2659 #endif
2660 #if OMPT_SUPPORT && OMPT_OPTIONAL
2661 // This is the case, if called from omp_init_lock_with_hint:
2662 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2663 if (!codeptr)
2664 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2665 if (ompt_enabled.ompt_callback_mutex_acquire) {
2666 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2667 ompt_mutex_lock, omp_lock_hint_none,
2668 __ompt_get_mutex_impl_type(user_lock),
2669 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2670 }
2671 #endif
2672 #if KMP_USE_INLINED_TAS
2673 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2674 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2675 } else
2676 #elif KMP_USE_INLINED_FUTEX
2677 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2678 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2679 } else
2680 #endif
2681 {
2682 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2683 }
2684 #if USE_ITT_BUILD
2685 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2686 #endif
2687 #if OMPT_SUPPORT && OMPT_OPTIONAL
2688 if (ompt_enabled.ompt_callback_mutex_acquired) {
2689 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2690 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2691 }
2692 #endif
2693
2694 #else // KMP_USE_DYNAMIC_LOCK
2695
2696 kmp_user_lock_p lck;
2697
2698 if ((__kmp_user_lock_kind == lk_tas) &&
2699 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2700 lck = (kmp_user_lock_p)user_lock;
2701 }
2702 #if KMP_USE_FUTEX
2703 else if ((__kmp_user_lock_kind == lk_futex) &&
2704 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2705 lck = (kmp_user_lock_p)user_lock;
2706 }
2707 #endif
2708 else {
2709 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2710 }
2711
2712 #if USE_ITT_BUILD
2713 __kmp_itt_lock_acquiring(lck);
2714 #endif /* USE_ITT_BUILD */
2715 #if OMPT_SUPPORT && OMPT_OPTIONAL
2716 // This is the case, if called from omp_init_lock_with_hint:
2717 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2718 if (!codeptr)
2719 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2720 if (ompt_enabled.ompt_callback_mutex_acquire) {
2721 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2722 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2723 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2724 }
2725 #endif
2726
2727 ACQUIRE_LOCK(lck, gtid);
2728
2729 #if USE_ITT_BUILD
2730 __kmp_itt_lock_acquired(lck);
2731 #endif /* USE_ITT_BUILD */
2732
2733 #if OMPT_SUPPORT && OMPT_OPTIONAL
2734 if (ompt_enabled.ompt_callback_mutex_acquired) {
2735 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2736 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2737 }
2738 #endif
2739
2740 #endif // KMP_USE_DYNAMIC_LOCK
2741 }
2742
__kmpc_set_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2743 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2744 #if KMP_USE_DYNAMIC_LOCK
2745
2746 #if USE_ITT_BUILD
2747 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2748 #endif
2749 #if OMPT_SUPPORT && OMPT_OPTIONAL
2750 // This is the case, if called from omp_init_lock_with_hint:
2751 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2752 if (!codeptr)
2753 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2754 if (ompt_enabled.enabled) {
2755 if (ompt_enabled.ompt_callback_mutex_acquire) {
2756 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2757 ompt_mutex_nest_lock, omp_lock_hint_none,
2758 __ompt_get_mutex_impl_type(user_lock),
2759 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2760 }
2761 }
2762 #endif
2763 int acquire_status =
2764 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2765 (void)acquire_status;
2766 #if USE_ITT_BUILD
2767 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2768 #endif
2769
2770 #if OMPT_SUPPORT && OMPT_OPTIONAL
2771 if (ompt_enabled.enabled) {
2772 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2773 if (ompt_enabled.ompt_callback_mutex_acquired) {
2774 // lock_first
2775 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2776 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2777 codeptr);
2778 }
2779 } else {
2780 if (ompt_enabled.ompt_callback_nest_lock) {
2781 // lock_next
2782 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2783 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2784 }
2785 }
2786 }
2787 #endif
2788
2789 #else // KMP_USE_DYNAMIC_LOCK
2790 int acquire_status;
2791 kmp_user_lock_p lck;
2792
2793 if ((__kmp_user_lock_kind == lk_tas) &&
2794 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2795 OMP_NEST_LOCK_T_SIZE)) {
2796 lck = (kmp_user_lock_p)user_lock;
2797 }
2798 #if KMP_USE_FUTEX
2799 else if ((__kmp_user_lock_kind == lk_futex) &&
2800 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2801 OMP_NEST_LOCK_T_SIZE)) {
2802 lck = (kmp_user_lock_p)user_lock;
2803 }
2804 #endif
2805 else {
2806 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2807 }
2808
2809 #if USE_ITT_BUILD
2810 __kmp_itt_lock_acquiring(lck);
2811 #endif /* USE_ITT_BUILD */
2812 #if OMPT_SUPPORT && OMPT_OPTIONAL
2813 // This is the case, if called from omp_init_lock_with_hint:
2814 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2815 if (!codeptr)
2816 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2817 if (ompt_enabled.enabled) {
2818 if (ompt_enabled.ompt_callback_mutex_acquire) {
2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2820 ompt_mutex_nest_lock, omp_lock_hint_none,
2821 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2822 codeptr);
2823 }
2824 }
2825 #endif
2826
2827 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2828
2829 #if USE_ITT_BUILD
2830 __kmp_itt_lock_acquired(lck);
2831 #endif /* USE_ITT_BUILD */
2832
2833 #if OMPT_SUPPORT && OMPT_OPTIONAL
2834 if (ompt_enabled.enabled) {
2835 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2836 if (ompt_enabled.ompt_callback_mutex_acquired) {
2837 // lock_first
2838 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2839 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2840 }
2841 } else {
2842 if (ompt_enabled.ompt_callback_nest_lock) {
2843 // lock_next
2844 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2845 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2846 }
2847 }
2848 }
2849 #endif
2850
2851 #endif // KMP_USE_DYNAMIC_LOCK
2852 }
2853
__kmpc_unset_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2854 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2855 #if KMP_USE_DYNAMIC_LOCK
2856
2857 int tag = KMP_EXTRACT_D_TAG(user_lock);
2858 #if USE_ITT_BUILD
2859 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2860 #endif
2861 #if KMP_USE_INLINED_TAS
2862 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2863 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2864 } else
2865 #elif KMP_USE_INLINED_FUTEX
2866 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2867 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2868 } else
2869 #endif
2870 {
2871 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2872 }
2873
2874 #if OMPT_SUPPORT && OMPT_OPTIONAL
2875 // This is the case, if called from omp_init_lock_with_hint:
2876 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2877 if (!codeptr)
2878 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2879 if (ompt_enabled.ompt_callback_mutex_released) {
2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2881 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2882 }
2883 #endif
2884
2885 #else // KMP_USE_DYNAMIC_LOCK
2886
2887 kmp_user_lock_p lck;
2888
2889 /* Can't use serial interval since not block structured */
2890 /* release the lock */
2891
2892 if ((__kmp_user_lock_kind == lk_tas) &&
2893 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2894 #if KMP_OS_LINUX && \
2895 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2896 // "fast" path implemented to fix customer performance issue
2897 #if USE_ITT_BUILD
2898 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2899 #endif /* USE_ITT_BUILD */
2900 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2901 KMP_MB();
2902
2903 #if OMPT_SUPPORT && OMPT_OPTIONAL
2904 // This is the case, if called from omp_init_lock_with_hint:
2905 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2906 if (!codeptr)
2907 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2908 if (ompt_enabled.ompt_callback_mutex_released) {
2909 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2910 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2911 }
2912 #endif
2913
2914 return;
2915 #else
2916 lck = (kmp_user_lock_p)user_lock;
2917 #endif
2918 }
2919 #if KMP_USE_FUTEX
2920 else if ((__kmp_user_lock_kind == lk_futex) &&
2921 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2922 lck = (kmp_user_lock_p)user_lock;
2923 }
2924 #endif
2925 else {
2926 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2927 }
2928
2929 #if USE_ITT_BUILD
2930 __kmp_itt_lock_releasing(lck);
2931 #endif /* USE_ITT_BUILD */
2932
2933 RELEASE_LOCK(lck, gtid);
2934
2935 #if OMPT_SUPPORT && OMPT_OPTIONAL
2936 // This is the case, if called from omp_init_lock_with_hint:
2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2938 if (!codeptr)
2939 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2940 if (ompt_enabled.ompt_callback_mutex_released) {
2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2943 }
2944 #endif
2945
2946 #endif // KMP_USE_DYNAMIC_LOCK
2947 }
2948
2949 /* release the lock */
__kmpc_unset_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2950 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2951 #if KMP_USE_DYNAMIC_LOCK
2952
2953 #if USE_ITT_BUILD
2954 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2955 #endif
2956 int release_status =
2957 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2958 (void)release_status;
2959
2960 #if OMPT_SUPPORT && OMPT_OPTIONAL
2961 // This is the case, if called from omp_init_lock_with_hint:
2962 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2963 if (!codeptr)
2964 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2965 if (ompt_enabled.enabled) {
2966 if (release_status == KMP_LOCK_RELEASED) {
2967 if (ompt_enabled.ompt_callback_mutex_released) {
2968 // release_lock_last
2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2970 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2971 codeptr);
2972 }
2973 } else if (ompt_enabled.ompt_callback_nest_lock) {
2974 // release_lock_prev
2975 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2976 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2977 }
2978 }
2979 #endif
2980
2981 #else // KMP_USE_DYNAMIC_LOCK
2982
2983 kmp_user_lock_p lck;
2984
2985 /* Can't use serial interval since not block structured */
2986
2987 if ((__kmp_user_lock_kind == lk_tas) &&
2988 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2989 OMP_NEST_LOCK_T_SIZE)) {
2990 #if KMP_OS_LINUX && \
2991 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2992 // "fast" path implemented to fix customer performance issue
2993 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2994 #if USE_ITT_BUILD
2995 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2996 #endif /* USE_ITT_BUILD */
2997
2998 #if OMPT_SUPPORT && OMPT_OPTIONAL
2999 int release_status = KMP_LOCK_STILL_HELD;
3000 #endif
3001
3002 if (--(tl->lk.depth_locked) == 0) {
3003 TCW_4(tl->lk.poll, 0);
3004 #if OMPT_SUPPORT && OMPT_OPTIONAL
3005 release_status = KMP_LOCK_RELEASED;
3006 #endif
3007 }
3008 KMP_MB();
3009
3010 #if OMPT_SUPPORT && OMPT_OPTIONAL
3011 // This is the case, if called from omp_init_lock_with_hint:
3012 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3013 if (!codeptr)
3014 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3015 if (ompt_enabled.enabled) {
3016 if (release_status == KMP_LOCK_RELEASED) {
3017 if (ompt_enabled.ompt_callback_mutex_released) {
3018 // release_lock_last
3019 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3020 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3021 }
3022 } else if (ompt_enabled.ompt_callback_nest_lock) {
3023 // release_lock_previous
3024 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3025 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3026 }
3027 }
3028 #endif
3029
3030 return;
3031 #else
3032 lck = (kmp_user_lock_p)user_lock;
3033 #endif
3034 }
3035 #if KMP_USE_FUTEX
3036 else if ((__kmp_user_lock_kind == lk_futex) &&
3037 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3038 OMP_NEST_LOCK_T_SIZE)) {
3039 lck = (kmp_user_lock_p)user_lock;
3040 }
3041 #endif
3042 else {
3043 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3044 }
3045
3046 #if USE_ITT_BUILD
3047 __kmp_itt_lock_releasing(lck);
3048 #endif /* USE_ITT_BUILD */
3049
3050 int release_status;
3051 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3052 #if OMPT_SUPPORT && OMPT_OPTIONAL
3053 // This is the case, if called from omp_init_lock_with_hint:
3054 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3055 if (!codeptr)
3056 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3057 if (ompt_enabled.enabled) {
3058 if (release_status == KMP_LOCK_RELEASED) {
3059 if (ompt_enabled.ompt_callback_mutex_released) {
3060 // release_lock_last
3061 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3062 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3063 }
3064 } else if (ompt_enabled.ompt_callback_nest_lock) {
3065 // release_lock_previous
3066 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3067 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3068 }
3069 }
3070 #endif
3071
3072 #endif // KMP_USE_DYNAMIC_LOCK
3073 }
3074
3075 /* try to acquire the lock */
__kmpc_test_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3076 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3077 KMP_COUNT_BLOCK(OMP_test_lock);
3078
3079 #if KMP_USE_DYNAMIC_LOCK
3080 int rc;
3081 int tag = KMP_EXTRACT_D_TAG(user_lock);
3082 #if USE_ITT_BUILD
3083 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3084 #endif
3085 #if OMPT_SUPPORT && OMPT_OPTIONAL
3086 // This is the case, if called from omp_init_lock_with_hint:
3087 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3088 if (!codeptr)
3089 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3090 if (ompt_enabled.ompt_callback_mutex_acquire) {
3091 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3092 ompt_mutex_lock, omp_lock_hint_none,
3093 __ompt_get_mutex_impl_type(user_lock),
3094 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3095 }
3096 #endif
3097 #if KMP_USE_INLINED_TAS
3098 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3099 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3100 } else
3101 #elif KMP_USE_INLINED_FUTEX
3102 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3103 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3104 } else
3105 #endif
3106 {
3107 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3108 }
3109 if (rc) {
3110 #if USE_ITT_BUILD
3111 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3112 #endif
3113 #if OMPT_SUPPORT && OMPT_OPTIONAL
3114 if (ompt_enabled.ompt_callback_mutex_acquired) {
3115 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3116 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3117 }
3118 #endif
3119 return FTN_TRUE;
3120 } else {
3121 #if USE_ITT_BUILD
3122 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3123 #endif
3124 return FTN_FALSE;
3125 }
3126
3127 #else // KMP_USE_DYNAMIC_LOCK
3128
3129 kmp_user_lock_p lck;
3130 int rc;
3131
3132 if ((__kmp_user_lock_kind == lk_tas) &&
3133 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3134 lck = (kmp_user_lock_p)user_lock;
3135 }
3136 #if KMP_USE_FUTEX
3137 else if ((__kmp_user_lock_kind == lk_futex) &&
3138 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3139 lck = (kmp_user_lock_p)user_lock;
3140 }
3141 #endif
3142 else {
3143 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3144 }
3145
3146 #if USE_ITT_BUILD
3147 __kmp_itt_lock_acquiring(lck);
3148 #endif /* USE_ITT_BUILD */
3149 #if OMPT_SUPPORT && OMPT_OPTIONAL
3150 // This is the case, if called from omp_init_lock_with_hint:
3151 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3152 if (!codeptr)
3153 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3154 if (ompt_enabled.ompt_callback_mutex_acquire) {
3155 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3156 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3157 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3158 }
3159 #endif
3160
3161 rc = TEST_LOCK(lck, gtid);
3162 #if USE_ITT_BUILD
3163 if (rc) {
3164 __kmp_itt_lock_acquired(lck);
3165 } else {
3166 __kmp_itt_lock_cancelled(lck);
3167 }
3168 #endif /* USE_ITT_BUILD */
3169 #if OMPT_SUPPORT && OMPT_OPTIONAL
3170 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3171 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3172 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3173 }
3174 #endif
3175
3176 return (rc ? FTN_TRUE : FTN_FALSE);
3177
3178 /* Can't use serial interval since not block structured */
3179
3180 #endif // KMP_USE_DYNAMIC_LOCK
3181 }
3182
3183 /* try to acquire the lock */
__kmpc_test_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3184 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3185 #if KMP_USE_DYNAMIC_LOCK
3186 int rc;
3187 #if USE_ITT_BUILD
3188 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3189 #endif
3190 #if OMPT_SUPPORT && OMPT_OPTIONAL
3191 // This is the case, if called from omp_init_lock_with_hint:
3192 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3193 if (!codeptr)
3194 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3195 if (ompt_enabled.ompt_callback_mutex_acquire) {
3196 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3197 ompt_mutex_nest_lock, omp_lock_hint_none,
3198 __ompt_get_mutex_impl_type(user_lock),
3199 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3200 }
3201 #endif
3202 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3203 #if USE_ITT_BUILD
3204 if (rc) {
3205 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3206 } else {
3207 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3208 }
3209 #endif
3210 #if OMPT_SUPPORT && OMPT_OPTIONAL
3211 if (ompt_enabled.enabled && rc) {
3212 if (rc == 1) {
3213 if (ompt_enabled.ompt_callback_mutex_acquired) {
3214 // lock_first
3215 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3216 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3217 codeptr);
3218 }
3219 } else {
3220 if (ompt_enabled.ompt_callback_nest_lock) {
3221 // lock_next
3222 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3223 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3224 }
3225 }
3226 }
3227 #endif
3228 return rc;
3229
3230 #else // KMP_USE_DYNAMIC_LOCK
3231
3232 kmp_user_lock_p lck;
3233 int rc;
3234
3235 if ((__kmp_user_lock_kind == lk_tas) &&
3236 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3237 OMP_NEST_LOCK_T_SIZE)) {
3238 lck = (kmp_user_lock_p)user_lock;
3239 }
3240 #if KMP_USE_FUTEX
3241 else if ((__kmp_user_lock_kind == lk_futex) &&
3242 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3243 OMP_NEST_LOCK_T_SIZE)) {
3244 lck = (kmp_user_lock_p)user_lock;
3245 }
3246 #endif
3247 else {
3248 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3249 }
3250
3251 #if USE_ITT_BUILD
3252 __kmp_itt_lock_acquiring(lck);
3253 #endif /* USE_ITT_BUILD */
3254
3255 #if OMPT_SUPPORT && OMPT_OPTIONAL
3256 // This is the case, if called from omp_init_lock_with_hint:
3257 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3258 if (!codeptr)
3259 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3260 if (ompt_enabled.enabled) &&
3261 ompt_enabled.ompt_callback_mutex_acquire) {
3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3263 ompt_mutex_nest_lock, omp_lock_hint_none,
3264 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3265 codeptr);
3266 }
3267 #endif
3268
3269 rc = TEST_NESTED_LOCK(lck, gtid);
3270 #if USE_ITT_BUILD
3271 if (rc) {
3272 __kmp_itt_lock_acquired(lck);
3273 } else {
3274 __kmp_itt_lock_cancelled(lck);
3275 }
3276 #endif /* USE_ITT_BUILD */
3277 #if OMPT_SUPPORT && OMPT_OPTIONAL
3278 if (ompt_enabled.enabled && rc) {
3279 if (rc == 1) {
3280 if (ompt_enabled.ompt_callback_mutex_acquired) {
3281 // lock_first
3282 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3283 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3284 }
3285 } else {
3286 if (ompt_enabled.ompt_callback_nest_lock) {
3287 // lock_next
3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3289 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3290 }
3291 }
3292 }
3293 #endif
3294 return rc;
3295
3296 /* Can't use serial interval since not block structured */
3297
3298 #endif // KMP_USE_DYNAMIC_LOCK
3299 }
3300
3301 // Interface to fast scalable reduce methods routines
3302
3303 // keep the selected method in a thread local structure for cross-function
3304 // usage: will be used in __kmpc_end_reduce* functions;
3305 // another solution: to re-determine the method one more time in
3306 // __kmpc_end_reduce* functions (new prototype required then)
3307 // AT: which solution is better?
3308 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3309 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3310
3311 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3312 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3313
3314 // description of the packed_reduction_method variable: look at the macros in
3315 // kmp.h
3316
3317 // used in a critical section reduce block
3318 static __forceinline void
__kmp_enter_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3319 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3320 kmp_critical_name *crit) {
3321
3322 // this lock was visible to a customer and to the threading profile tool as a
3323 // serial overhead span (although it's used for an internal purpose only)
3324 // why was it visible in previous implementation?
3325 // should we keep it visible in new reduce block?
3326 kmp_user_lock_p lck;
3327
3328 #if KMP_USE_DYNAMIC_LOCK
3329
3330 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3331 // Check if it is initialized.
3332 if (*lk == 0) {
3333 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3334 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3335 KMP_GET_D_TAG(__kmp_user_lock_seq));
3336 } else {
3337 __kmp_init_indirect_csptr(crit, loc, global_tid,
3338 KMP_GET_I_TAG(__kmp_user_lock_seq));
3339 }
3340 }
3341 // Branch for accessing the actual lock object and set operation. This
3342 // branching is inevitable since this lock initialization does not follow the
3343 // normal dispatch path (lock table is not used).
3344 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3345 lck = (kmp_user_lock_p)lk;
3346 KMP_DEBUG_ASSERT(lck != NULL);
3347 if (__kmp_env_consistency_check) {
3348 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3349 }
3350 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3351 } else {
3352 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3353 lck = ilk->lock;
3354 KMP_DEBUG_ASSERT(lck != NULL);
3355 if (__kmp_env_consistency_check) {
3356 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3357 }
3358 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3359 }
3360
3361 #else // KMP_USE_DYNAMIC_LOCK
3362
3363 // We know that the fast reduction code is only emitted by Intel compilers
3364 // with 32 byte critical sections. If there isn't enough space, then we
3365 // have to use a pointer.
3366 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3367 lck = (kmp_user_lock_p)crit;
3368 } else {
3369 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3370 }
3371 KMP_DEBUG_ASSERT(lck != NULL);
3372
3373 if (__kmp_env_consistency_check)
3374 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3375
3376 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3377
3378 #endif // KMP_USE_DYNAMIC_LOCK
3379 }
3380
3381 // used in a critical section reduce block
3382 static __forceinline void
__kmp_end_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3383 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3384 kmp_critical_name *crit) {
3385
3386 kmp_user_lock_p lck;
3387
3388 #if KMP_USE_DYNAMIC_LOCK
3389
3390 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3391 lck = (kmp_user_lock_p)crit;
3392 if (__kmp_env_consistency_check)
3393 __kmp_pop_sync(global_tid, ct_critical, loc);
3394 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3395 } else {
3396 kmp_indirect_lock_t *ilk =
3397 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3398 if (__kmp_env_consistency_check)
3399 __kmp_pop_sync(global_tid, ct_critical, loc);
3400 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3401 }
3402
3403 #else // KMP_USE_DYNAMIC_LOCK
3404
3405 // We know that the fast reduction code is only emitted by Intel compilers
3406 // with 32 byte critical sections. If there isn't enough space, then we have
3407 // to use a pointer.
3408 if (__kmp_base_user_lock_size > 32) {
3409 lck = *((kmp_user_lock_p *)crit);
3410 KMP_ASSERT(lck != NULL);
3411 } else {
3412 lck = (kmp_user_lock_p)crit;
3413 }
3414
3415 if (__kmp_env_consistency_check)
3416 __kmp_pop_sync(global_tid, ct_critical, loc);
3417
3418 __kmp_release_user_lock_with_checks(lck, global_tid);
3419
3420 #endif // KMP_USE_DYNAMIC_LOCK
3421 } // __kmp_end_critical_section_reduce_block
3422
3423 static __forceinline int
__kmp_swap_teams_for_teams_reduction(kmp_info_t * th,kmp_team_t ** team_p,int * task_state)3424 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3425 int *task_state) {
3426 kmp_team_t *team;
3427
3428 // Check if we are inside the teams construct?
3429 if (th->th.th_teams_microtask) {
3430 *team_p = team = th->th.th_team;
3431 if (team->t.t_level == th->th.th_teams_level) {
3432 // This is reduction at teams construct.
3433 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3434 // Let's swap teams temporarily for the reduction.
3435 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3436 th->th.th_team = team->t.t_parent;
3437 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3438 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3439 *task_state = th->th.th_task_state;
3440 th->th.th_task_state = 0;
3441
3442 return 1;
3443 }
3444 }
3445 return 0;
3446 }
3447
3448 static __forceinline void
__kmp_restore_swapped_teams(kmp_info_t * th,kmp_team_t * team,int task_state)3449 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3450 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3451 th->th.th_info.ds.ds_tid = 0;
3452 th->th.th_team = team;
3453 th->th.th_team_nproc = team->t.t_nproc;
3454 th->th.th_task_team = team->t.t_task_team[task_state];
3455 __kmp_type_convert(task_state, &(th->th.th_task_state));
3456 }
3457
3458 /* 2.a.i. Reduce Block without a terminating barrier */
3459 /*!
3460 @ingroup SYNCHRONIZATION
3461 @param loc source location information
3462 @param global_tid global thread number
3463 @param num_vars number of items (variables) to be reduced
3464 @param reduce_size size of data in bytes to be reduced
3465 @param reduce_data pointer to data to be reduced
3466 @param reduce_func callback function providing reduction operation on two
3467 operands and returning result of reduction in lhs_data
3468 @param lck pointer to the unique lock data structure
3469 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3470 threads if atomic reduction needed
3471
3472 The nowait version is used for a reduce clause with the nowait argument.
3473 */
3474 kmp_int32
__kmpc_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3475 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3476 size_t reduce_size, void *reduce_data,
3477 void (*reduce_func)(void *lhs_data, void *rhs_data),
3478 kmp_critical_name *lck) {
3479
3480 KMP_COUNT_BLOCK(REDUCE_nowait);
3481 int retval = 0;
3482 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3483 kmp_info_t *th;
3484 kmp_team_t *team;
3485 int teams_swapped = 0, task_state;
3486 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3487 __kmp_assert_valid_gtid(global_tid);
3488
3489 // why do we need this initialization here at all?
3490 // Reduction clause can not be used as a stand-alone directive.
3491
3492 // do not call __kmp_serial_initialize(), it will be called by
3493 // __kmp_parallel_initialize() if needed
3494 // possible detection of false-positive race by the threadchecker ???
3495 if (!TCR_4(__kmp_init_parallel))
3496 __kmp_parallel_initialize();
3497
3498 __kmp_resume_if_soft_paused();
3499
3500 // check correctness of reduce block nesting
3501 #if KMP_USE_DYNAMIC_LOCK
3502 if (__kmp_env_consistency_check)
3503 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3504 #else
3505 if (__kmp_env_consistency_check)
3506 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3507 #endif
3508
3509 th = __kmp_thread_from_gtid(global_tid);
3510 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3511
3512 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3513 // the value should be kept in a variable
3514 // the variable should be either a construct-specific or thread-specific
3515 // property, not a team specific property
3516 // (a thread can reach the next reduce block on the next construct, reduce
3517 // method may differ on the next construct)
3518 // an ident_t "loc" parameter could be used as a construct-specific property
3519 // (what if loc == 0?)
3520 // (if both construct-specific and team-specific variables were shared,
3521 // then unness extra syncs should be needed)
3522 // a thread-specific variable is better regarding two issues above (next
3523 // construct and extra syncs)
3524 // a thread-specific "th_local.reduction_method" variable is used currently
3525 // each thread executes 'determine' and 'set' lines (no need to execute by one
3526 // thread, to avoid unness extra syncs)
3527
3528 packed_reduction_method = __kmp_determine_reduction_method(
3529 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3530 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3531
3532 OMPT_REDUCTION_DECL(th, global_tid);
3533 if (packed_reduction_method == critical_reduce_block) {
3534
3535 OMPT_REDUCTION_BEGIN;
3536
3537 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3538 retval = 1;
3539
3540 } else if (packed_reduction_method == empty_reduce_block) {
3541
3542 OMPT_REDUCTION_BEGIN;
3543
3544 // usage: if team size == 1, no synchronization is required ( Intel
3545 // platforms only )
3546 retval = 1;
3547
3548 } else if (packed_reduction_method == atomic_reduce_block) {
3549
3550 retval = 2;
3551
3552 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3553 // won't be called by the code gen)
3554 // (it's not quite good, because the checking block has been closed by
3555 // this 'pop',
3556 // but atomic operation has not been executed yet, will be executed
3557 // slightly later, literally on next instruction)
3558 if (__kmp_env_consistency_check)
3559 __kmp_pop_sync(global_tid, ct_reduce, loc);
3560
3561 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3562 tree_reduce_block)) {
3563
3564 // AT: performance issue: a real barrier here
3565 // AT: (if primary thread is slow, other threads are blocked here waiting for
3566 // the primary thread to come and release them)
3567 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3568 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3569 // be confusing to a customer)
3570 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3571 // might go faster and be more in line with sense of NOWAIT
3572 // AT: TO DO: do epcc test and compare times
3573
3574 // this barrier should be invisible to a customer and to the threading profile
3575 // tool (it's neither a terminating barrier nor customer's code, it's
3576 // used for an internal purpose)
3577 #if OMPT_SUPPORT
3578 // JP: can this barrier potentially leed to task scheduling?
3579 // JP: as long as there is a barrier in the implementation, OMPT should and
3580 // will provide the barrier events
3581 // so we set-up the necessary frame/return addresses.
3582 ompt_frame_t *ompt_frame;
3583 if (ompt_enabled.enabled) {
3584 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3585 if (ompt_frame->enter_frame.ptr == NULL)
3586 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3587 }
3588 OMPT_STORE_RETURN_ADDRESS(global_tid);
3589 #endif
3590 #if USE_ITT_NOTIFY
3591 __kmp_threads[global_tid]->th.th_ident = loc;
3592 #endif
3593 retval =
3594 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3595 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3596 retval = (retval != 0) ? (0) : (1);
3597 #if OMPT_SUPPORT && OMPT_OPTIONAL
3598 if (ompt_enabled.enabled) {
3599 ompt_frame->enter_frame = ompt_data_none;
3600 }
3601 #endif
3602
3603 // all other workers except primary thread should do this pop here
3604 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3605 if (__kmp_env_consistency_check) {
3606 if (retval == 0) {
3607 __kmp_pop_sync(global_tid, ct_reduce, loc);
3608 }
3609 }
3610
3611 } else {
3612
3613 // should never reach this block
3614 KMP_ASSERT(0); // "unexpected method"
3615 }
3616 if (teams_swapped) {
3617 __kmp_restore_swapped_teams(th, team, task_state);
3618 }
3619 KA_TRACE(
3620 10,
3621 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3622 global_tid, packed_reduction_method, retval));
3623
3624 return retval;
3625 }
3626
3627 /*!
3628 @ingroup SYNCHRONIZATION
3629 @param loc source location information
3630 @param global_tid global thread id.
3631 @param lck pointer to the unique lock data structure
3632
3633 Finish the execution of a reduce nowait.
3634 */
__kmpc_end_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3635 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3636 kmp_critical_name *lck) {
3637
3638 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3639
3640 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3641 __kmp_assert_valid_gtid(global_tid);
3642
3643 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3644
3645 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3646
3647 if (packed_reduction_method == critical_reduce_block) {
3648
3649 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3650 OMPT_REDUCTION_END;
3651
3652 } else if (packed_reduction_method == empty_reduce_block) {
3653
3654 // usage: if team size == 1, no synchronization is required ( on Intel
3655 // platforms only )
3656
3657 OMPT_REDUCTION_END;
3658
3659 } else if (packed_reduction_method == atomic_reduce_block) {
3660
3661 // neither primary thread nor other workers should get here
3662 // (code gen does not generate this call in case 2: atomic reduce block)
3663 // actually it's better to remove this elseif at all;
3664 // after removal this value will checked by the 'else' and will assert
3665
3666 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3667 tree_reduce_block)) {
3668
3669 // only primary thread gets here
3670 // OMPT: tree reduction is annotated in the barrier code
3671
3672 } else {
3673
3674 // should never reach this block
3675 KMP_ASSERT(0); // "unexpected method"
3676 }
3677
3678 if (__kmp_env_consistency_check)
3679 __kmp_pop_sync(global_tid, ct_reduce, loc);
3680
3681 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3682 global_tid, packed_reduction_method));
3683
3684 return;
3685 }
3686
3687 /* 2.a.ii. Reduce Block with a terminating barrier */
3688
3689 /*!
3690 @ingroup SYNCHRONIZATION
3691 @param loc source location information
3692 @param global_tid global thread number
3693 @param num_vars number of items (variables) to be reduced
3694 @param reduce_size size of data in bytes to be reduced
3695 @param reduce_data pointer to data to be reduced
3696 @param reduce_func callback function providing reduction operation on two
3697 operands and returning result of reduction in lhs_data
3698 @param lck pointer to the unique lock data structure
3699 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3700 threads if atomic reduction needed
3701
3702 A blocking reduce that includes an implicit barrier.
3703 */
__kmpc_reduce(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3704 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3705 size_t reduce_size, void *reduce_data,
3706 void (*reduce_func)(void *lhs_data, void *rhs_data),
3707 kmp_critical_name *lck) {
3708 KMP_COUNT_BLOCK(REDUCE_wait);
3709 int retval = 0;
3710 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3711 kmp_info_t *th;
3712 kmp_team_t *team;
3713 int teams_swapped = 0, task_state;
3714
3715 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3716 __kmp_assert_valid_gtid(global_tid);
3717
3718 // why do we need this initialization here at all?
3719 // Reduction clause can not be a stand-alone directive.
3720
3721 // do not call __kmp_serial_initialize(), it will be called by
3722 // __kmp_parallel_initialize() if needed
3723 // possible detection of false-positive race by the threadchecker ???
3724 if (!TCR_4(__kmp_init_parallel))
3725 __kmp_parallel_initialize();
3726
3727 __kmp_resume_if_soft_paused();
3728
3729 // check correctness of reduce block nesting
3730 #if KMP_USE_DYNAMIC_LOCK
3731 if (__kmp_env_consistency_check)
3732 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3733 #else
3734 if (__kmp_env_consistency_check)
3735 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3736 #endif
3737
3738 th = __kmp_thread_from_gtid(global_tid);
3739 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3740
3741 packed_reduction_method = __kmp_determine_reduction_method(
3742 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3743 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3744
3745 OMPT_REDUCTION_DECL(th, global_tid);
3746
3747 if (packed_reduction_method == critical_reduce_block) {
3748
3749 OMPT_REDUCTION_BEGIN;
3750 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3751 retval = 1;
3752
3753 } else if (packed_reduction_method == empty_reduce_block) {
3754
3755 OMPT_REDUCTION_BEGIN;
3756 // usage: if team size == 1, no synchronization is required ( Intel
3757 // platforms only )
3758 retval = 1;
3759
3760 } else if (packed_reduction_method == atomic_reduce_block) {
3761
3762 retval = 2;
3763
3764 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3765 tree_reduce_block)) {
3766
3767 // case tree_reduce_block:
3768 // this barrier should be visible to a customer and to the threading profile
3769 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3770 #if OMPT_SUPPORT
3771 ompt_frame_t *ompt_frame;
3772 if (ompt_enabled.enabled) {
3773 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3774 if (ompt_frame->enter_frame.ptr == NULL)
3775 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3776 }
3777 OMPT_STORE_RETURN_ADDRESS(global_tid);
3778 #endif
3779 #if USE_ITT_NOTIFY
3780 __kmp_threads[global_tid]->th.th_ident =
3781 loc; // needed for correct notification of frames
3782 #endif
3783 retval =
3784 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3785 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3786 retval = (retval != 0) ? (0) : (1);
3787 #if OMPT_SUPPORT && OMPT_OPTIONAL
3788 if (ompt_enabled.enabled) {
3789 ompt_frame->enter_frame = ompt_data_none;
3790 }
3791 #endif
3792
3793 // all other workers except primary thread should do this pop here
3794 // (none of other workers except primary will enter __kmpc_end_reduce())
3795 if (__kmp_env_consistency_check) {
3796 if (retval == 0) { // 0: all other workers; 1: primary thread
3797 __kmp_pop_sync(global_tid, ct_reduce, loc);
3798 }
3799 }
3800
3801 } else {
3802
3803 // should never reach this block
3804 KMP_ASSERT(0); // "unexpected method"
3805 }
3806 if (teams_swapped) {
3807 __kmp_restore_swapped_teams(th, team, task_state);
3808 }
3809
3810 KA_TRACE(10,
3811 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3812 global_tid, packed_reduction_method, retval));
3813 return retval;
3814 }
3815
3816 /*!
3817 @ingroup SYNCHRONIZATION
3818 @param loc source location information
3819 @param global_tid global thread id.
3820 @param lck pointer to the unique lock data structure
3821
3822 Finish the execution of a blocking reduce.
3823 The <tt>lck</tt> pointer must be the same as that used in the corresponding
3824 start function.
3825 */
__kmpc_end_reduce(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3826 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3827 kmp_critical_name *lck) {
3828
3829 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3830 kmp_info_t *th;
3831 kmp_team_t *team;
3832 int teams_swapped = 0, task_state;
3833
3834 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3835 __kmp_assert_valid_gtid(global_tid);
3836
3837 th = __kmp_thread_from_gtid(global_tid);
3838 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3839
3840 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3841
3842 // this barrier should be visible to a customer and to the threading profile
3843 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3844 OMPT_REDUCTION_DECL(th, global_tid);
3845
3846 if (packed_reduction_method == critical_reduce_block) {
3847 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3848
3849 OMPT_REDUCTION_END;
3850
3851 // TODO: implicit barrier: should be exposed
3852 #if OMPT_SUPPORT
3853 ompt_frame_t *ompt_frame;
3854 if (ompt_enabled.enabled) {
3855 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3856 if (ompt_frame->enter_frame.ptr == NULL)
3857 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3858 }
3859 OMPT_STORE_RETURN_ADDRESS(global_tid);
3860 #endif
3861 #if USE_ITT_NOTIFY
3862 __kmp_threads[global_tid]->th.th_ident = loc;
3863 #endif
3864 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3865 #if OMPT_SUPPORT && OMPT_OPTIONAL
3866 if (ompt_enabled.enabled) {
3867 ompt_frame->enter_frame = ompt_data_none;
3868 }
3869 #endif
3870
3871 } else if (packed_reduction_method == empty_reduce_block) {
3872
3873 OMPT_REDUCTION_END;
3874
3875 // usage: if team size==1, no synchronization is required (Intel platforms only)
3876
3877 // TODO: implicit barrier: should be exposed
3878 #if OMPT_SUPPORT
3879 ompt_frame_t *ompt_frame;
3880 if (ompt_enabled.enabled) {
3881 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3882 if (ompt_frame->enter_frame.ptr == NULL)
3883 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3884 }
3885 OMPT_STORE_RETURN_ADDRESS(global_tid);
3886 #endif
3887 #if USE_ITT_NOTIFY
3888 __kmp_threads[global_tid]->th.th_ident = loc;
3889 #endif
3890 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3891 #if OMPT_SUPPORT && OMPT_OPTIONAL
3892 if (ompt_enabled.enabled) {
3893 ompt_frame->enter_frame = ompt_data_none;
3894 }
3895 #endif
3896
3897 } else if (packed_reduction_method == atomic_reduce_block) {
3898
3899 #if OMPT_SUPPORT
3900 ompt_frame_t *ompt_frame;
3901 if (ompt_enabled.enabled) {
3902 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3903 if (ompt_frame->enter_frame.ptr == NULL)
3904 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3905 }
3906 OMPT_STORE_RETURN_ADDRESS(global_tid);
3907 #endif
3908 // TODO: implicit barrier: should be exposed
3909 #if USE_ITT_NOTIFY
3910 __kmp_threads[global_tid]->th.th_ident = loc;
3911 #endif
3912 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3913 #if OMPT_SUPPORT && OMPT_OPTIONAL
3914 if (ompt_enabled.enabled) {
3915 ompt_frame->enter_frame = ompt_data_none;
3916 }
3917 #endif
3918
3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920 tree_reduce_block)) {
3921
3922 // only primary thread executes here (primary releases all other workers)
3923 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3924 global_tid);
3925
3926 } else {
3927
3928 // should never reach this block
3929 KMP_ASSERT(0); // "unexpected method"
3930 }
3931 if (teams_swapped) {
3932 __kmp_restore_swapped_teams(th, team, task_state);
3933 }
3934
3935 if (__kmp_env_consistency_check)
3936 __kmp_pop_sync(global_tid, ct_reduce, loc);
3937
3938 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3939 global_tid, packed_reduction_method));
3940
3941 return;
3942 }
3943
3944 #undef __KMP_GET_REDUCTION_METHOD
3945 #undef __KMP_SET_REDUCTION_METHOD
3946
3947 /* end of interface to fast scalable reduce routines */
3948
__kmpc_get_taskid()3949 kmp_uint64 __kmpc_get_taskid() {
3950
3951 kmp_int32 gtid;
3952 kmp_info_t *thread;
3953
3954 gtid = __kmp_get_gtid();
3955 if (gtid < 0) {
3956 return 0;
3957 }
3958 thread = __kmp_thread_from_gtid(gtid);
3959 return thread->th.th_current_task->td_task_id;
3960
3961 } // __kmpc_get_taskid
3962
__kmpc_get_parent_taskid()3963 kmp_uint64 __kmpc_get_parent_taskid() {
3964
3965 kmp_int32 gtid;
3966 kmp_info_t *thread;
3967 kmp_taskdata_t *parent_task;
3968
3969 gtid = __kmp_get_gtid();
3970 if (gtid < 0) {
3971 return 0;
3972 }
3973 thread = __kmp_thread_from_gtid(gtid);
3974 parent_task = thread->th.th_current_task->td_parent;
3975 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3976
3977 } // __kmpc_get_parent_taskid
3978
3979 /*!
3980 @ingroup WORK_SHARING
3981 @param loc source location information.
3982 @param gtid global thread number.
3983 @param num_dims number of associated doacross loops.
3984 @param dims info on loops bounds.
3985
3986 Initialize doacross loop information.
3987 Expect compiler send us inclusive bounds,
3988 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3989 */
__kmpc_doacross_init(ident_t * loc,int gtid,int num_dims,const struct kmp_dim * dims)3990 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3991 const struct kmp_dim *dims) {
3992 __kmp_assert_valid_gtid(gtid);
3993 int j, idx;
3994 kmp_int64 last, trace_count;
3995 kmp_info_t *th = __kmp_threads[gtid];
3996 kmp_team_t *team = th->th.th_team;
3997 kmp_uint32 *flags;
3998 kmp_disp_t *pr_buf = th->th.th_dispatch;
3999 dispatch_shared_info_t *sh_buf;
4000
4001 KA_TRACE(
4002 20,
4003 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4004 gtid, num_dims, !team->t.t_serialized));
4005 KMP_DEBUG_ASSERT(dims != NULL);
4006 KMP_DEBUG_ASSERT(num_dims > 0);
4007
4008 if (team->t.t_serialized) {
4009 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4010 return; // no dependencies if team is serialized
4011 }
4012 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4013 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4014 // the next loop
4015 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4016
4017 // Save bounds info into allocated private buffer
4018 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4019 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4020 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022 pr_buf->th_doacross_info[0] =
4023 (kmp_int64)num_dims; // first element is number of dimensions
4024 // Save also address of num_done in order to access it later without knowing
4025 // the buffer index
4026 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4027 pr_buf->th_doacross_info[2] = dims[0].lo;
4028 pr_buf->th_doacross_info[3] = dims[0].up;
4029 pr_buf->th_doacross_info[4] = dims[0].st;
4030 last = 5;
4031 for (j = 1; j < num_dims; ++j) {
4032 kmp_int64
4033 range_length; // To keep ranges of all dimensions but the first dims[0]
4034 if (dims[j].st == 1) { // most common case
4035 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4036 range_length = dims[j].up - dims[j].lo + 1;
4037 } else {
4038 if (dims[j].st > 0) {
4039 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4040 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4041 } else { // negative increment
4042 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4043 range_length =
4044 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4045 }
4046 }
4047 pr_buf->th_doacross_info[last++] = range_length;
4048 pr_buf->th_doacross_info[last++] = dims[j].lo;
4049 pr_buf->th_doacross_info[last++] = dims[j].up;
4050 pr_buf->th_doacross_info[last++] = dims[j].st;
4051 }
4052
4053 // Compute total trip count.
4054 // Start with range of dims[0] which we don't need to keep in the buffer.
4055 if (dims[0].st == 1) { // most common case
4056 trace_count = dims[0].up - dims[0].lo + 1;
4057 } else if (dims[0].st > 0) {
4058 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4059 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4060 } else { // negative increment
4061 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4062 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4063 }
4064 for (j = 1; j < num_dims; ++j) {
4065 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4066 }
4067 KMP_DEBUG_ASSERT(trace_count > 0);
4068
4069 // Check if shared buffer is not occupied by other loop (idx -
4070 // __kmp_dispatch_num_buffers)
4071 if (idx != sh_buf->doacross_buf_idx) {
4072 // Shared buffer is occupied, wait for it to be free
4073 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4074 __kmp_eq_4, NULL);
4075 }
4076 #if KMP_32_BIT_ARCH
4077 // Check if we are the first thread. After the CAS the first thread gets 0,
4078 // others get 1 if initialization is in progress, allocated pointer otherwise.
4079 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4080 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4081 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4082 #else
4083 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4084 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4085 #endif
4086 if (flags == NULL) {
4087 // we are the first thread, allocate the array of flags
4088 size_t size =
4089 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4090 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4091 KMP_MB();
4092 sh_buf->doacross_flags = flags;
4093 } else if (flags == (kmp_uint32 *)1) {
4094 #if KMP_32_BIT_ARCH
4095 // initialization is still in progress, need to wait
4096 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4097 #else
4098 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4099 #endif
4100 KMP_YIELD(TRUE);
4101 KMP_MB();
4102 } else {
4103 KMP_MB();
4104 }
4105 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4106 pr_buf->th_doacross_flags =
4107 sh_buf->doacross_flags; // save private copy in order to not
4108 // touch shared buffer on each iteration
4109 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4110 }
4111
__kmpc_doacross_wait(ident_t * loc,int gtid,const kmp_int64 * vec)4112 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4113 __kmp_assert_valid_gtid(gtid);
4114 kmp_int64 shft;
4115 size_t num_dims, i;
4116 kmp_uint32 flag;
4117 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4118 kmp_info_t *th = __kmp_threads[gtid];
4119 kmp_team_t *team = th->th.th_team;
4120 kmp_disp_t *pr_buf;
4121 kmp_int64 lo, up, st;
4122
4123 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4124 if (team->t.t_serialized) {
4125 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4126 return; // no dependencies if team is serialized
4127 }
4128
4129 // calculate sequential iteration number and check out-of-bounds condition
4130 pr_buf = th->th.th_dispatch;
4131 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4132 num_dims = (size_t)pr_buf->th_doacross_info[0];
4133 lo = pr_buf->th_doacross_info[2];
4134 up = pr_buf->th_doacross_info[3];
4135 st = pr_buf->th_doacross_info[4];
4136 #if OMPT_SUPPORT && OMPT_OPTIONAL
4137 ompt_dependence_t deps[num_dims];
4138 #endif
4139 if (st == 1) { // most common case
4140 if (vec[0] < lo || vec[0] > up) {
4141 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4142 "bounds [%lld,%lld]\n",
4143 gtid, vec[0], lo, up));
4144 return;
4145 }
4146 iter_number = vec[0] - lo;
4147 } else if (st > 0) {
4148 if (vec[0] < lo || vec[0] > up) {
4149 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4150 "bounds [%lld,%lld]\n",
4151 gtid, vec[0], lo, up));
4152 return;
4153 }
4154 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4155 } else { // negative increment
4156 if (vec[0] > lo || vec[0] < up) {
4157 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4158 "bounds [%lld,%lld]\n",
4159 gtid, vec[0], lo, up));
4160 return;
4161 }
4162 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4163 }
4164 #if OMPT_SUPPORT && OMPT_OPTIONAL
4165 deps[0].variable.value = iter_number;
4166 deps[0].dependence_type = ompt_dependence_type_sink;
4167 #endif
4168 for (i = 1; i < num_dims; ++i) {
4169 kmp_int64 iter, ln;
4170 size_t j = i * 4;
4171 ln = pr_buf->th_doacross_info[j + 1];
4172 lo = pr_buf->th_doacross_info[j + 2];
4173 up = pr_buf->th_doacross_info[j + 3];
4174 st = pr_buf->th_doacross_info[j + 4];
4175 if (st == 1) {
4176 if (vec[i] < lo || vec[i] > up) {
4177 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4178 "bounds [%lld,%lld]\n",
4179 gtid, vec[i], lo, up));
4180 return;
4181 }
4182 iter = vec[i] - lo;
4183 } else if (st > 0) {
4184 if (vec[i] < lo || vec[i] > up) {
4185 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4186 "bounds [%lld,%lld]\n",
4187 gtid, vec[i], lo, up));
4188 return;
4189 }
4190 iter = (kmp_uint64)(vec[i] - lo) / st;
4191 } else { // st < 0
4192 if (vec[i] > lo || vec[i] < up) {
4193 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4194 "bounds [%lld,%lld]\n",
4195 gtid, vec[i], lo, up));
4196 return;
4197 }
4198 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4199 }
4200 iter_number = iter + ln * iter_number;
4201 #if OMPT_SUPPORT && OMPT_OPTIONAL
4202 deps[i].variable.value = iter;
4203 deps[i].dependence_type = ompt_dependence_type_sink;
4204 #endif
4205 }
4206 shft = iter_number % 32; // use 32-bit granularity
4207 iter_number >>= 5; // divided by 32
4208 flag = 1 << shft;
4209 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4210 KMP_YIELD(TRUE);
4211 }
4212 KMP_MB();
4213 #if OMPT_SUPPORT && OMPT_OPTIONAL
4214 if (ompt_enabled.ompt_callback_dependences) {
4215 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4216 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4217 }
4218 #endif
4219 KA_TRACE(20,
4220 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4221 gtid, (iter_number << 5) + shft));
4222 }
4223
__kmpc_doacross_post(ident_t * loc,int gtid,const kmp_int64 * vec)4224 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4225 __kmp_assert_valid_gtid(gtid);
4226 kmp_int64 shft;
4227 size_t num_dims, i;
4228 kmp_uint32 flag;
4229 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4230 kmp_info_t *th = __kmp_threads[gtid];
4231 kmp_team_t *team = th->th.th_team;
4232 kmp_disp_t *pr_buf;
4233 kmp_int64 lo, st;
4234
4235 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4236 if (team->t.t_serialized) {
4237 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4238 return; // no dependencies if team is serialized
4239 }
4240
4241 // calculate sequential iteration number (same as in "wait" but no
4242 // out-of-bounds checks)
4243 pr_buf = th->th.th_dispatch;
4244 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4245 num_dims = (size_t)pr_buf->th_doacross_info[0];
4246 lo = pr_buf->th_doacross_info[2];
4247 st = pr_buf->th_doacross_info[4];
4248 #if OMPT_SUPPORT && OMPT_OPTIONAL
4249 ompt_dependence_t deps[num_dims];
4250 #endif
4251 if (st == 1) { // most common case
4252 iter_number = vec[0] - lo;
4253 } else if (st > 0) {
4254 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4255 } else { // negative increment
4256 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4257 }
4258 #if OMPT_SUPPORT && OMPT_OPTIONAL
4259 deps[0].variable.value = iter_number;
4260 deps[0].dependence_type = ompt_dependence_type_source;
4261 #endif
4262 for (i = 1; i < num_dims; ++i) {
4263 kmp_int64 iter, ln;
4264 size_t j = i * 4;
4265 ln = pr_buf->th_doacross_info[j + 1];
4266 lo = pr_buf->th_doacross_info[j + 2];
4267 st = pr_buf->th_doacross_info[j + 4];
4268 if (st == 1) {
4269 iter = vec[i] - lo;
4270 } else if (st > 0) {
4271 iter = (kmp_uint64)(vec[i] - lo) / st;
4272 } else { // st < 0
4273 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4274 }
4275 iter_number = iter + ln * iter_number;
4276 #if OMPT_SUPPORT && OMPT_OPTIONAL
4277 deps[i].variable.value = iter;
4278 deps[i].dependence_type = ompt_dependence_type_source;
4279 #endif
4280 }
4281 #if OMPT_SUPPORT && OMPT_OPTIONAL
4282 if (ompt_enabled.ompt_callback_dependences) {
4283 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4284 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4285 }
4286 #endif
4287 shft = iter_number % 32; // use 32-bit granularity
4288 iter_number >>= 5; // divided by 32
4289 flag = 1 << shft;
4290 KMP_MB();
4291 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4292 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4293 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4294 (iter_number << 5) + shft));
4295 }
4296
__kmpc_doacross_fini(ident_t * loc,int gtid)4297 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4298 __kmp_assert_valid_gtid(gtid);
4299 kmp_int32 num_done;
4300 kmp_info_t *th = __kmp_threads[gtid];
4301 kmp_team_t *team = th->th.th_team;
4302 kmp_disp_t *pr_buf = th->th.th_dispatch;
4303
4304 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4305 if (team->t.t_serialized) {
4306 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4307 return; // nothing to do
4308 }
4309 num_done =
4310 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4311 if (num_done == th->th.th_team_nproc) {
4312 // we are the last thread, need to free shared resources
4313 int idx = pr_buf->th_doacross_buf_idx - 1;
4314 dispatch_shared_info_t *sh_buf =
4315 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4316 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4317 (kmp_int64)&sh_buf->doacross_num_done);
4318 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4319 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4320 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4321 sh_buf->doacross_flags = NULL;
4322 sh_buf->doacross_num_done = 0;
4323 sh_buf->doacross_buf_idx +=
4324 __kmp_dispatch_num_buffers; // free buffer for future re-use
4325 }
4326 // free private resources (need to keep buffer index forever)
4327 pr_buf->th_doacross_flags = NULL;
4328 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4329 pr_buf->th_doacross_info = NULL;
4330 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4331 }
4332
4333 /* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */
omp_alloc(size_t size,omp_allocator_handle_t allocator)4334 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4335 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4336 }
4337
omp_calloc(size_t nmemb,size_t size,omp_allocator_handle_t allocator)4338 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4339 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4340 }
4341
omp_realloc(void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)4342 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4343 omp_allocator_handle_t free_allocator) {
4344 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4345 free_allocator);
4346 }
4347
omp_free(void * ptr,omp_allocator_handle_t allocator)4348 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4349 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4350 }
4351
__kmpc_get_target_offload(void)4352 int __kmpc_get_target_offload(void) {
4353 if (!__kmp_init_serial) {
4354 __kmp_serial_initialize();
4355 }
4356 return __kmp_target_offload;
4357 }
4358
__kmpc_pause_resource(kmp_pause_status_t level)4359 int __kmpc_pause_resource(kmp_pause_status_t level) {
4360 if (!__kmp_init_serial) {
4361 return 1; // Can't pause if runtime is not initialized
4362 }
4363 return __kmp_pause_resource(level);
4364 }
4365
__kmpc_error(ident_t * loc,int severity,const char * message)4366 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4367 if (!__kmp_init_serial)
4368 __kmp_serial_initialize();
4369
4370 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4371
4372 #if OMPT_SUPPORT
4373 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4374 ompt_callbacks.ompt_callback(ompt_callback_error)(
4375 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4376 OMPT_GET_RETURN_ADDRESS(0));
4377 }
4378 #endif // OMPT_SUPPORT
4379
4380 char *src_loc;
4381 if (loc && loc->psource) {
4382 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4383 src_loc =
4384 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4385 __kmp_str_loc_free(&str_loc);
4386 } else {
4387 src_loc = __kmp_str_format("unknown");
4388 }
4389
4390 if (severity == severity_warning)
4391 KMP_WARNING(UserDirectedWarning, src_loc, message);
4392 else
4393 KMP_FATAL(UserDirectedError, src_loc, message);
4394
4395 __kmp_str_free(&src_loc);
4396 }
4397
4398 #ifdef KMP_USE_VERSION_SYMBOLS
4399 // For GOMP compatibility there are two versions of each omp_* API.
4400 // One is the plain C symbol and one is the Fortran symbol with an appended
4401 // underscore. When we implement a specific ompc_* version of an omp_*
4402 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4403 // instead of the Fortran versions in kmp_ftn_entry.h
4404 extern "C" {
4405 // Have to undef these from omp.h so they aren't translated into
4406 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4407 #ifdef omp_set_affinity_format
4408 #undef omp_set_affinity_format
4409 #endif
4410 #ifdef omp_get_affinity_format
4411 #undef omp_get_affinity_format
4412 #endif
4413 #ifdef omp_display_affinity
4414 #undef omp_display_affinity
4415 #endif
4416 #ifdef omp_capture_affinity
4417 #undef omp_capture_affinity
4418 #endif
4419 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4420 "OMP_5.0");
4421 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4422 "OMP_5.0");
4423 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4424 "OMP_5.0");
4425 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4426 "OMP_5.0");
4427 } // extern "C"
4428 #endif
4429