1 /*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22
23 #define MAX_MESSAGE 512
24
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27
28 /*!
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
32 *
33 * Initialize the runtime library. This call is optional; if it is not made then
34 * it will be implicitly called by attempts to use other library functions.
35 */
__kmpc_begin(ident_t * loc,kmp_int32 flags)36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49 }
50
51 /*!
52 * @ingroup STARTUP_SHUTDOWN
53 * @param loc source location information
54 *
55 * Shutdown the runtime library. This is also optional, and even if called will
56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
57 * zero.
58 */
__kmpc_end(ident_t * loc)59 void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80
81 /*!
82 @ingroup THREAD_STATES
83 @param loc Source location information.
84 @return The global thread index of the active thread.
85
86 This function can be called in any context.
87
88 If the runtime has ony been entered at the outermost level from a
89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
90 that which would be returned by omp_get_thread_num() in the outermost
91 active parallel construct. (Or zero if there is no active parallel
92 construct, since the primary thread is necessarily thread zero).
93
94 If multiple non-OpenMP threads all enter an OpenMP construct then this
95 will be a unique thread identifier among all the threads created by
96 the OpenMP runtime (but the value cannot be defined in terms of
97 OpenMP thread ids returned by omp_get_thread_num()).
98 */
__kmpc_global_thread_num(ident_t * loc)99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105 }
106
107 /*!
108 @ingroup THREAD_STATES
109 @param loc Source location information.
110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime
111
112 This function can be called in any context.
113 It returns the total number of threads under the control of the OpenMP runtime.
114 That is not a number that can be determined by any OpenMP standard calls, since
115 the library may be called from more than one non-OpenMP thread, and this
116 reflects the total over all such calls. Similarly the runtime maintains
117 underlying threads even when they are not active (since the cost of creating
118 and destroying OS threads is high), this call counts all such threads even if
119 they are not waiting for work.
120 */
__kmpc_global_num_threads(ident_t * loc)121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126 }
127
128 /*!
129 @ingroup THREAD_STATES
130 @param loc Source location information.
131 @return The thread number of the calling thread in the innermost active parallel
132 construct.
133 */
__kmpc_bound_thread_num(ident_t * loc)134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138
139 /*!
140 @ingroup THREAD_STATES
141 @param loc Source location information.
142 @return The number of threads in the innermost active parallel construct.
143 */
__kmpc_bound_num_threads(ident_t * loc)144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149
150 /*!
151 * @ingroup DEPRECATED
152 * @param loc location description
153 *
154 * This function need not be called. It always returns TRUE.
155 */
__kmpc_ok_to_fork(ident_t * loc)156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158
159 return TRUE;
160
161 #else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209 #endif /* KMP_DEBUG */
210 }
211
212 /*!
213 @ingroup THREAD_STATES
214 @param loc Source location information.
215 @return 1 if this thread is executing inside an active parallel region, zero if
216 not.
217 */
__kmpc_in_parallel(ident_t * loc)218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221
222 /*!
223 @ingroup PARALLEL
224 @param loc source location information
225 @param global_tid global thread number
226 @param num_threads number of threads requested for this parallel construct
227
228 Set the number of threads to be used by the next fork spawned by this thread.
229 This call is only required if the parallel construct has a `num_threads` clause.
230 */
__kmpc_push_num_threads(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_threads)231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238
__kmpc_pop_num_threads(ident_t * loc,kmp_int32 global_tid)239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242 }
243
__kmpc_push_proc_bind(ident_t * loc,kmp_int32 global_tid,kmp_int32 proc_bind)244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251
252 /*!
253 @ingroup PARALLEL
254 @param loc source location information
255 @param argc total number of arguments in the ellipsis
256 @param microtask pointer to callback routine consisting of outlined parallel
257 construct
258 @param ... pointers to shared variables that aren't global
259
260 Do the actual fork and call the microtask in the relevant number of threads.
261 */
__kmpc_fork_call(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265 #if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280 #endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287 #if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296
297 #if INCLUDE_SSC_MARKS
298 SSC_MARK_FORKING();
299 #endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305 SSC_MARK_JOINING();
306 #endif
307 __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309 ,
310 fork_context_intel
311 #endif
312 );
313
314 va_end(ap);
315
316 #if OMPT_SUPPORT
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
319 }
320 #endif
321 }
322
323 #if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
327 } else {
328 KMP_POP_PARTITIONED_TIMER();
329 }
330 #endif // KMP_STATS_ENABLED
331 }
332
333 /*!
334 @ingroup PARALLEL
335 @param loc source location information
336 @param global_tid global thread number
337 @param num_teams number of teams requested for the teams construct
338 @param num_threads number of threads per team requested for the teams construct
339
340 Set the number of teams to be used by the teams construct.
341 This call is only required if the teams construct has a `num_teams` clause
342 or a `thread_limit` clause (or both).
343 */
__kmpc_push_num_teams(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams,kmp_int32 num_threads)344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345 kmp_int32 num_teams, kmp_int32 num_threads) {
346 KA_TRACE(20,
347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351 }
352
353 /*!
354 @ingroup PARALLEL
355 @param loc source location information
356 @param global_tid global thread number
357 @param num_teams_lb lower bound on number of teams requested for the teams
358 construct
359 @param num_teams_ub upper bound on number of teams requested for the teams
360 construct
361 @param num_threads number of threads per team requested for the teams construct
362
363 Set the number of teams to be used by the teams construct. The number of initial
364 teams cretaed will be greater than or equal to the lower bound and less than or
365 equal to the upper bound.
366 This call is only required if the teams construct has a `num_teams` clause
367 or a `thread_limit` clause (or both).
368 */
__kmpc_push_num_teams_51(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams_lb,kmp_int32 num_teams_ub,kmp_int32 num_threads)369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373 " num_teams_ub=%d num_threads=%d\n",
374 global_tid, num_teams_lb, num_teams_ub, num_threads));
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377 num_threads);
378 }
379
380 /*!
381 @ingroup PARALLEL
382 @param loc source location information
383 @param argc total number of arguments in the ellipsis
384 @param microtask pointer to callback routine consisting of outlined teams
385 construct
386 @param ... pointers to shared variables that aren't global
387
388 Do the actual fork and call the microtask in the relevant number of threads.
389 */
__kmpc_fork_teams(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391 ...) {
392 int gtid = __kmp_entry_gtid();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
394 va_list ap;
395 va_start(ap, microtask);
396
397 #if KMP_STATS_ENABLED
398 KMP_COUNT_BLOCK(OMP_TEAMS);
399 stats_state_e previous_state = KMP_GET_THREAD_STATE();
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402 } else {
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404 }
405 #endif
406
407 // remember teams entry point and nesting level
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411
412 #if OMPT_SUPPORT
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418 }
419 OMPT_STORE_RETURN_ADDRESS(gtid);
420 #endif
421
422 // check if __kmpc_push_num_teams called, set default number of teams
423 // otherwise
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
426 }
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430
431 __kmp_fork_call(
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435 __kmp_join_call(loc, gtid
436 #if OMPT_SUPPORT
437 ,
438 fork_context_intel
439 #endif
440 );
441
442 // Pop current CG root off list
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447 " to node %p. cg_nthreads was %d\n",
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450 int i = tmp->cg_nthreads--;
451 if (i == 1) { // check is we are the last thread in CG (not always the case)
452 __kmp_free(tmp);
453 }
454 // Restore current task's thread_limit from CG root
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
458
459 this_thr->th.th_teams_microtask = NULL;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462 va_end(ap);
463 #if KMP_STATS_ENABLED
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state);
467 } else {
468 KMP_POP_PARTITIONED_TIMER();
469 }
470 #endif // KMP_STATS_ENABLED
471 }
472
473 // I don't think this function should ever have been exported.
474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475 // openmp code ever called it, but it's been exported from the RTL for so
476 // long that I'm afraid to remove the definition.
__kmpc_invoke_task_func(int gtid)477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478
479 /*!
480 @ingroup PARALLEL
481 @param loc source location information
482 @param global_tid global thread number
483
484 Enter a serialized parallel construct. This interface is used to handle a
485 conditional parallel region, like this,
486 @code
487 #pragma omp parallel if (condition)
488 @endcode
489 when the condition is false.
490 */
__kmpc_serialized_parallel(ident_t * loc,kmp_int32 global_tid)491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492 // The implementation is now in kmp_runtime.cpp so that it can share static
493 // functions with kmp_fork_call since the tasks to be done are similar in
494 // each case.
495 __kmp_assert_valid_gtid(global_tid);
496 #if OMPT_SUPPORT
497 OMPT_STORE_RETURN_ADDRESS(global_tid);
498 #endif
499 __kmp_serialized_parallel(loc, global_tid);
500 }
501
502 /*!
503 @ingroup PARALLEL
504 @param loc source location information
505 @param global_tid global thread number
506
507 Leave a serialized parallel construct.
508 */
__kmpc_end_serialized_parallel(ident_t * loc,kmp_int32 global_tid)509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
513
514 KC_TRACE(10,
515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516
517 /* skip all this code for autopar serialized loops since it results in
518 unacceptable overhead */
519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520 return;
521
522 // Not autopar code
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel))
525 __kmp_parallel_initialize();
526
527 __kmp_resume_if_soft_paused();
528
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
531
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
533 // we need to wait for the proxy tasks before finishing the thread
534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535 task_team->tt.tt_hidden_helper_task_encountered))
536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
537
538 KMP_MB();
539 KMP_DEBUG_ASSERT(serial_team);
540 KMP_ASSERT(serial_team->t.t_serialized);
541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
545
546 #if OMPT_SUPPORT
547 if (ompt_enabled.enabled &&
548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550 if (ompt_enabled.ompt_callback_implicit_task) {
551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
554 }
555
556 // reset clear the task id only after unlinking the task
557 ompt_data_t *parent_task_data;
558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559
560 if (ompt_enabled.ompt_callback_parallel_end) {
561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563 ompt_parallel_invoker_program | ompt_parallel_team,
564 OMPT_LOAD_RETURN_ADDRESS(global_tid));
565 }
566 __ompt_lw_taskteam_unlink(this_thr);
567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568 }
569 #endif
570
571 /* If necessary, pop the internal control stack values and replace the team
572 * values */
573 top = serial_team->t.t_control_stack_top;
574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576 serial_team->t.t_control_stack_top = top->next;
577 __kmp_free(top);
578 }
579
580 /* pop dispatch buffers stack */
581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582 {
583 dispatch_private_info_t *disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer;
585 serial_team->t.t_dispatch->th_disp_buffer =
586 serial_team->t.t_dispatch->th_disp_buffer->next;
587 __kmp_free(disp_buffer);
588 }
589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590
591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
593
594 /* return to the parallel section */
595
596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601 }
602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603
604 __kmp_pop_current_task_from_thread(this_thr);
605 #if OMPD_SUPPORT
606 if (ompd_state & OMPD_ENABLE_BP)
607 ompd_bp_parallel_end();
608 #endif
609
610 this_thr->th.th_team = serial_team->t.t_parent;
611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612
613 /* restore values cached in the thread */
614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615 this_thr->th.th_team_master =
616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618
619 /* TODO the below shouldn't need to be adjusted for serialized teams */
620 this_thr->th.th_dispatch =
621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622
623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624 this_thr->th.th_current_task->td_flags.executing = 1;
625
626 if (__kmp_tasking_mode != tskm_immediate_exec) {
627 // Copy the task team from the new child / old parent team to the thread.
628 this_thr->th.th_task_team =
629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630 KA_TRACE(20,
631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632 "team %p\n",
633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
634 }
635 #if KMP_AFFINITY_SUPPORTED
636 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
637 __kmp_reset_root_init_mask(global_tid);
638 }
639 #endif
640 } else {
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
645 }
646 }
647
648 serial_team->t.t_level--;
649 if (__kmp_env_consistency_check)
650 __kmp_pop_parallel(global_tid, NULL);
651 #if OMPT_SUPPORT
652 if (ompt_enabled.enabled)
653 this_thr->th.ompt_thread_info.state =
654 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
655 : ompt_state_work_parallel);
656 #endif
657 }
658
659 /*!
660 @ingroup SYNCHRONIZATION
661 @param loc source location information.
662
663 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
664 depending on the memory ordering convention obeyed by the compiler
665 even that may not be necessary).
666 */
__kmpc_flush(ident_t * loc)667 void __kmpc_flush(ident_t *loc) {
668 KC_TRACE(10, ("__kmpc_flush: called\n"));
669
670 /* need explicit __mf() here since use volatile instead in library */
671 KMP_MB(); /* Flush all pending memory write invalidates. */
672
673 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
674 #if KMP_MIC
675 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
676 // We shouldn't need it, though, since the ABI rules require that
677 // * If the compiler generates NGO stores it also generates the fence
678 // * If users hand-code NGO stores they should insert the fence
679 // therefore no incomplete unordered stores should be visible.
680 #else
681 // C74404
682 // This is to address non-temporal store instructions (sfence needed).
683 // The clflush instruction is addressed either (mfence needed).
684 // Probably the non-temporal load monvtdqa instruction should also be
685 // addressed.
686 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
687 if (!__kmp_cpuinfo.initialized) {
688 __kmp_query_cpuid(&__kmp_cpuinfo);
689 }
690 if (!__kmp_cpuinfo.flags.sse2) {
691 // CPU cannot execute SSE2 instructions.
692 } else {
693 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
694 _mm_mfence();
695 #elif KMP_COMPILER_MSVC
696 MemoryBarrier();
697 #else
698 __sync_synchronize();
699 #endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
700 }
701 #endif // KMP_MIC
702 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
703 KMP_ARCH_RISCV64)
704 // Nothing to see here move along
705 #elif KMP_ARCH_PPC64
706 // Nothing needed here (we have a real MB above).
707 #else
708 #error Unknown or unsupported architecture
709 #endif
710
711 #if OMPT_SUPPORT && OMPT_OPTIONAL
712 if (ompt_enabled.ompt_callback_flush) {
713 ompt_callbacks.ompt_callback(ompt_callback_flush)(
714 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
715 }
716 #endif
717 }
718
719 /* -------------------------------------------------------------------------- */
720 /*!
721 @ingroup SYNCHRONIZATION
722 @param loc source location information
723 @param global_tid thread id.
724
725 Execute a barrier.
726 */
__kmpc_barrier(ident_t * loc,kmp_int32 global_tid)727 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
728 KMP_COUNT_BLOCK(OMP_BARRIER);
729 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
730 __kmp_assert_valid_gtid(global_tid);
731
732 if (!TCR_4(__kmp_init_parallel))
733 __kmp_parallel_initialize();
734
735 __kmp_resume_if_soft_paused();
736
737 if (__kmp_env_consistency_check) {
738 if (loc == 0) {
739 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
740 }
741 __kmp_check_barrier(global_tid, ct_barrier, loc);
742 }
743
744 #if OMPT_SUPPORT
745 ompt_frame_t *ompt_frame;
746 if (ompt_enabled.enabled) {
747 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
748 if (ompt_frame->enter_frame.ptr == NULL)
749 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
750 }
751 OMPT_STORE_RETURN_ADDRESS(global_tid);
752 #endif
753 __kmp_threads[global_tid]->th.th_ident = loc;
754 // TODO: explicit barrier_wait_id:
755 // this function is called when 'barrier' directive is present or
756 // implicit barrier at the end of a worksharing construct.
757 // 1) better to add a per-thread barrier counter to a thread data structure
758 // 2) set to 0 when a new team is created
759 // 4) no sync is required
760
761 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
762 #if OMPT_SUPPORT && OMPT_OPTIONAL
763 if (ompt_enabled.enabled) {
764 ompt_frame->enter_frame = ompt_data_none;
765 }
766 #endif
767 }
768
769 /* The BARRIER for a MASTER section is always explicit */
770 /*!
771 @ingroup WORK_SHARING
772 @param loc source location information.
773 @param global_tid global thread number .
774 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
775 */
__kmpc_master(ident_t * loc,kmp_int32 global_tid)776 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
777 int status = 0;
778
779 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
780 __kmp_assert_valid_gtid(global_tid);
781
782 if (!TCR_4(__kmp_init_parallel))
783 __kmp_parallel_initialize();
784
785 __kmp_resume_if_soft_paused();
786
787 if (KMP_MASTER_GTID(global_tid)) {
788 KMP_COUNT_BLOCK(OMP_MASTER);
789 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
790 status = 1;
791 }
792
793 #if OMPT_SUPPORT && OMPT_OPTIONAL
794 if (status) {
795 if (ompt_enabled.ompt_callback_masked) {
796 kmp_info_t *this_thr = __kmp_threads[global_tid];
797 kmp_team_t *team = this_thr->th.th_team;
798
799 int tid = __kmp_tid_from_gtid(global_tid);
800 ompt_callbacks.ompt_callback(ompt_callback_masked)(
801 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
802 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
803 OMPT_GET_RETURN_ADDRESS(0));
804 }
805 }
806 #endif
807
808 if (__kmp_env_consistency_check) {
809 #if KMP_USE_DYNAMIC_LOCK
810 if (status)
811 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
812 else
813 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
814 #else
815 if (status)
816 __kmp_push_sync(global_tid, ct_master, loc, NULL);
817 else
818 __kmp_check_sync(global_tid, ct_master, loc, NULL);
819 #endif
820 }
821
822 return status;
823 }
824
825 /*!
826 @ingroup WORK_SHARING
827 @param loc source location information.
828 @param global_tid global thread number .
829
830 Mark the end of a <tt>master</tt> region. This should only be called by the
831 thread that executes the <tt>master</tt> region.
832 */
__kmpc_end_master(ident_t * loc,kmp_int32 global_tid)833 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
834 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
835 __kmp_assert_valid_gtid(global_tid);
836 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
837 KMP_POP_PARTITIONED_TIMER();
838
839 #if OMPT_SUPPORT && OMPT_OPTIONAL
840 kmp_info_t *this_thr = __kmp_threads[global_tid];
841 kmp_team_t *team = this_thr->th.th_team;
842 if (ompt_enabled.ompt_callback_masked) {
843 int tid = __kmp_tid_from_gtid(global_tid);
844 ompt_callbacks.ompt_callback(ompt_callback_masked)(
845 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
846 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
847 OMPT_GET_RETURN_ADDRESS(0));
848 }
849 #endif
850
851 if (__kmp_env_consistency_check) {
852 if (KMP_MASTER_GTID(global_tid))
853 __kmp_pop_sync(global_tid, ct_master, loc);
854 }
855 }
856
857 /*!
858 @ingroup WORK_SHARING
859 @param loc source location information.
860 @param global_tid global thread number.
861 @param filter result of evaluating filter clause on thread global_tid, or zero
862 if no filter clause present
863 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
864 */
__kmpc_masked(ident_t * loc,kmp_int32 global_tid,kmp_int32 filter)865 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
866 int status = 0;
867 int tid;
868 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
869 __kmp_assert_valid_gtid(global_tid);
870
871 if (!TCR_4(__kmp_init_parallel))
872 __kmp_parallel_initialize();
873
874 __kmp_resume_if_soft_paused();
875
876 tid = __kmp_tid_from_gtid(global_tid);
877 if (tid == filter) {
878 KMP_COUNT_BLOCK(OMP_MASKED);
879 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
880 status = 1;
881 }
882
883 #if OMPT_SUPPORT && OMPT_OPTIONAL
884 if (status) {
885 if (ompt_enabled.ompt_callback_masked) {
886 kmp_info_t *this_thr = __kmp_threads[global_tid];
887 kmp_team_t *team = this_thr->th.th_team;
888 ompt_callbacks.ompt_callback(ompt_callback_masked)(
889 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
890 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
891 OMPT_GET_RETURN_ADDRESS(0));
892 }
893 }
894 #endif
895
896 if (__kmp_env_consistency_check) {
897 #if KMP_USE_DYNAMIC_LOCK
898 if (status)
899 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
900 else
901 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
902 #else
903 if (status)
904 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
905 else
906 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
907 #endif
908 }
909
910 return status;
911 }
912
913 /*!
914 @ingroup WORK_SHARING
915 @param loc source location information.
916 @param global_tid global thread number .
917
918 Mark the end of a <tt>masked</tt> region. This should only be called by the
919 thread that executes the <tt>masked</tt> region.
920 */
__kmpc_end_masked(ident_t * loc,kmp_int32 global_tid)921 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
922 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
923 __kmp_assert_valid_gtid(global_tid);
924 KMP_POP_PARTITIONED_TIMER();
925
926 #if OMPT_SUPPORT && OMPT_OPTIONAL
927 kmp_info_t *this_thr = __kmp_threads[global_tid];
928 kmp_team_t *team = this_thr->th.th_team;
929 if (ompt_enabled.ompt_callback_masked) {
930 int tid = __kmp_tid_from_gtid(global_tid);
931 ompt_callbacks.ompt_callback(ompt_callback_masked)(
932 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
933 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
934 OMPT_GET_RETURN_ADDRESS(0));
935 }
936 #endif
937
938 if (__kmp_env_consistency_check) {
939 __kmp_pop_sync(global_tid, ct_masked, loc);
940 }
941 }
942
943 /*!
944 @ingroup WORK_SHARING
945 @param loc source location information.
946 @param gtid global thread number.
947
948 Start execution of an <tt>ordered</tt> construct.
949 */
__kmpc_ordered(ident_t * loc,kmp_int32 gtid)950 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
951 int cid = 0;
952 kmp_info_t *th;
953 KMP_DEBUG_ASSERT(__kmp_init_serial);
954
955 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
956 __kmp_assert_valid_gtid(gtid);
957
958 if (!TCR_4(__kmp_init_parallel))
959 __kmp_parallel_initialize();
960
961 __kmp_resume_if_soft_paused();
962
963 #if USE_ITT_BUILD
964 __kmp_itt_ordered_prep(gtid);
965 // TODO: ordered_wait_id
966 #endif /* USE_ITT_BUILD */
967
968 th = __kmp_threads[gtid];
969
970 #if OMPT_SUPPORT && OMPT_OPTIONAL
971 kmp_team_t *team;
972 ompt_wait_id_t lck;
973 void *codeptr_ra;
974 OMPT_STORE_RETURN_ADDRESS(gtid);
975 if (ompt_enabled.enabled) {
976 team = __kmp_team_from_gtid(gtid);
977 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
978 /* OMPT state update */
979 th->th.ompt_thread_info.wait_id = lck;
980 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
981
982 /* OMPT event callback */
983 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
984 if (ompt_enabled.ompt_callback_mutex_acquire) {
985 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
986 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
987 codeptr_ra);
988 }
989 }
990 #endif
991
992 if (th->th.th_dispatch->th_deo_fcn != 0)
993 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
994 else
995 __kmp_parallel_deo(>id, &cid, loc);
996
997 #if OMPT_SUPPORT && OMPT_OPTIONAL
998 if (ompt_enabled.enabled) {
999 /* OMPT state update */
1000 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1001 th->th.ompt_thread_info.wait_id = 0;
1002
1003 /* OMPT event callback */
1004 if (ompt_enabled.ompt_callback_mutex_acquired) {
1005 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1006 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1007 }
1008 }
1009 #endif
1010
1011 #if USE_ITT_BUILD
1012 __kmp_itt_ordered_start(gtid);
1013 #endif /* USE_ITT_BUILD */
1014 }
1015
1016 /*!
1017 @ingroup WORK_SHARING
1018 @param loc source location information.
1019 @param gtid global thread number.
1020
1021 End execution of an <tt>ordered</tt> construct.
1022 */
__kmpc_end_ordered(ident_t * loc,kmp_int32 gtid)1023 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1024 int cid = 0;
1025 kmp_info_t *th;
1026
1027 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1028 __kmp_assert_valid_gtid(gtid);
1029
1030 #if USE_ITT_BUILD
1031 __kmp_itt_ordered_end(gtid);
1032 // TODO: ordered_wait_id
1033 #endif /* USE_ITT_BUILD */
1034
1035 th = __kmp_threads[gtid];
1036
1037 if (th->th.th_dispatch->th_dxo_fcn != 0)
1038 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1039 else
1040 __kmp_parallel_dxo(>id, &cid, loc);
1041
1042 #if OMPT_SUPPORT && OMPT_OPTIONAL
1043 OMPT_STORE_RETURN_ADDRESS(gtid);
1044 if (ompt_enabled.ompt_callback_mutex_released) {
1045 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1046 ompt_mutex_ordered,
1047 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1048 ->t.t_ordered.dt.t_value,
1049 OMPT_LOAD_RETURN_ADDRESS(gtid));
1050 }
1051 #endif
1052 }
1053
1054 #if KMP_USE_DYNAMIC_LOCK
1055
1056 static __forceinline void
__kmp_init_indirect_csptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid,kmp_indirect_locktag_t tag)1057 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1058 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1059 // Pointer to the allocated indirect lock is written to crit, while indexing
1060 // is ignored.
1061 void *idx;
1062 kmp_indirect_lock_t **lck;
1063 lck = (kmp_indirect_lock_t **)crit;
1064 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1065 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1066 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1067 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1068 KA_TRACE(20,
1069 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1070 #if USE_ITT_BUILD
1071 __kmp_itt_critical_creating(ilk->lock, loc);
1072 #endif
1073 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1074 if (status == 0) {
1075 #if USE_ITT_BUILD
1076 __kmp_itt_critical_destroyed(ilk->lock);
1077 #endif
1078 // We don't really need to destroy the unclaimed lock here since it will be
1079 // cleaned up at program exit.
1080 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1081 }
1082 KMP_DEBUG_ASSERT(*lck != NULL);
1083 }
1084
1085 // Fast-path acquire tas lock
1086 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1087 { \
1088 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1089 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1090 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1091 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1092 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1093 kmp_uint32 spins; \
1094 KMP_FSYNC_PREPARE(l); \
1095 KMP_INIT_YIELD(spins); \
1096 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1097 do { \
1098 if (TCR_4(__kmp_nth) > \
1099 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1100 KMP_YIELD(TRUE); \
1101 } else { \
1102 KMP_YIELD_SPIN(spins); \
1103 } \
1104 __kmp_spin_backoff(&backoff); \
1105 } while ( \
1106 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1107 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1108 } \
1109 KMP_FSYNC_ACQUIRED(l); \
1110 }
1111
1112 // Fast-path test tas lock
1113 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1114 { \
1115 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1116 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1117 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1118 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1119 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1120 }
1121
1122 // Fast-path release tas lock
1123 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1124 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1125
1126 #if KMP_USE_FUTEX
1127
1128 #include <sys/syscall.h>
1129 #include <unistd.h>
1130 #ifndef FUTEX_WAIT
1131 #define FUTEX_WAIT 0
1132 #endif
1133 #ifndef FUTEX_WAKE
1134 #define FUTEX_WAKE 1
1135 #endif
1136
1137 // Fast-path acquire futex lock
1138 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1139 { \
1140 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1141 kmp_int32 gtid_code = (gtid + 1) << 1; \
1142 KMP_MB(); \
1143 KMP_FSYNC_PREPARE(ftx); \
1144 kmp_int32 poll_val; \
1145 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1146 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1147 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1148 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1149 if (!cond) { \
1150 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1151 poll_val | \
1152 KMP_LOCK_BUSY(1, futex))) { \
1153 continue; \
1154 } \
1155 poll_val |= KMP_LOCK_BUSY(1, futex); \
1156 } \
1157 kmp_int32 rc; \
1158 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1159 NULL, NULL, 0)) != 0) { \
1160 continue; \
1161 } \
1162 gtid_code |= 1; \
1163 } \
1164 KMP_FSYNC_ACQUIRED(ftx); \
1165 }
1166
1167 // Fast-path test futex lock
1168 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1169 { \
1170 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1171 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1172 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1173 KMP_FSYNC_ACQUIRED(ftx); \
1174 rc = TRUE; \
1175 } else { \
1176 rc = FALSE; \
1177 } \
1178 }
1179
1180 // Fast-path release futex lock
1181 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1182 { \
1183 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1184 KMP_MB(); \
1185 KMP_FSYNC_RELEASING(ftx); \
1186 kmp_int32 poll_val = \
1187 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1188 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1189 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1190 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1191 } \
1192 KMP_MB(); \
1193 KMP_YIELD_OVERSUB(); \
1194 }
1195
1196 #endif // KMP_USE_FUTEX
1197
1198 #else // KMP_USE_DYNAMIC_LOCK
1199
__kmp_get_critical_section_ptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid)1200 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1201 ident_t const *loc,
1202 kmp_int32 gtid) {
1203 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1204
1205 // Because of the double-check, the following load doesn't need to be volatile
1206 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1207
1208 if (lck == NULL) {
1209 void *idx;
1210
1211 // Allocate & initialize the lock.
1212 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1213 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1214 __kmp_init_user_lock_with_checks(lck);
1215 __kmp_set_user_lock_location(lck, loc);
1216 #if USE_ITT_BUILD
1217 __kmp_itt_critical_creating(lck);
1218 // __kmp_itt_critical_creating() should be called *before* the first usage
1219 // of underlying lock. It is the only place where we can guarantee it. There
1220 // are chances the lock will destroyed with no usage, but it is not a
1221 // problem, because this is not real event seen by user but rather setting
1222 // name for object (lock). See more details in kmp_itt.h.
1223 #endif /* USE_ITT_BUILD */
1224
1225 // Use a cmpxchg instruction to slam the start of the critical section with
1226 // the lock pointer. If another thread beat us to it, deallocate the lock,
1227 // and use the lock that the other thread allocated.
1228 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1229
1230 if (status == 0) {
1231 // Deallocate the lock and reload the value.
1232 #if USE_ITT_BUILD
1233 __kmp_itt_critical_destroyed(lck);
1234 // Let ITT know the lock is destroyed and the same memory location may be reused
1235 // for another purpose.
1236 #endif /* USE_ITT_BUILD */
1237 __kmp_destroy_user_lock_with_checks(lck);
1238 __kmp_user_lock_free(&idx, gtid, lck);
1239 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1240 KMP_DEBUG_ASSERT(lck != NULL);
1241 }
1242 }
1243 return lck;
1244 }
1245
1246 #endif // KMP_USE_DYNAMIC_LOCK
1247
1248 /*!
1249 @ingroup WORK_SHARING
1250 @param loc source location information.
1251 @param global_tid global thread number.
1252 @param crit identity of the critical section. This could be a pointer to a lock
1253 associated with the critical section, or some other suitably unique value.
1254
1255 Enter code protected by a `critical` construct.
1256 This function blocks until the executing thread can enter the critical section.
1257 */
__kmpc_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1258 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1259 kmp_critical_name *crit) {
1260 #if KMP_USE_DYNAMIC_LOCK
1261 #if OMPT_SUPPORT && OMPT_OPTIONAL
1262 OMPT_STORE_RETURN_ADDRESS(global_tid);
1263 #endif // OMPT_SUPPORT
1264 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1265 #else
1266 KMP_COUNT_BLOCK(OMP_CRITICAL);
1267 #if OMPT_SUPPORT && OMPT_OPTIONAL
1268 ompt_state_t prev_state = ompt_state_undefined;
1269 ompt_thread_info_t ti;
1270 #endif
1271 kmp_user_lock_p lck;
1272
1273 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1274 __kmp_assert_valid_gtid(global_tid);
1275
1276 // TODO: add THR_OVHD_STATE
1277
1278 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1279 KMP_CHECK_USER_LOCK_INIT();
1280
1281 if ((__kmp_user_lock_kind == lk_tas) &&
1282 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1283 lck = (kmp_user_lock_p)crit;
1284 }
1285 #if KMP_USE_FUTEX
1286 else if ((__kmp_user_lock_kind == lk_futex) &&
1287 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1288 lck = (kmp_user_lock_p)crit;
1289 }
1290 #endif
1291 else { // ticket, queuing or drdpa
1292 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1293 }
1294
1295 if (__kmp_env_consistency_check)
1296 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1297
1298 // since the critical directive binds to all threads, not just the current
1299 // team we have to check this even if we are in a serialized team.
1300 // also, even if we are the uber thread, we still have to conduct the lock,
1301 // as we have to contend with sibling threads.
1302
1303 #if USE_ITT_BUILD
1304 __kmp_itt_critical_acquiring(lck);
1305 #endif /* USE_ITT_BUILD */
1306 #if OMPT_SUPPORT && OMPT_OPTIONAL
1307 OMPT_STORE_RETURN_ADDRESS(gtid);
1308 void *codeptr_ra = NULL;
1309 if (ompt_enabled.enabled) {
1310 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1311 /* OMPT state update */
1312 prev_state = ti.state;
1313 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1314 ti.state = ompt_state_wait_critical;
1315
1316 /* OMPT event callback */
1317 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1318 if (ompt_enabled.ompt_callback_mutex_acquire) {
1319 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1320 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1321 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1322 }
1323 }
1324 #endif
1325 // Value of 'crit' should be good for using as a critical_id of the critical
1326 // section directive.
1327 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1328
1329 #if USE_ITT_BUILD
1330 __kmp_itt_critical_acquired(lck);
1331 #endif /* USE_ITT_BUILD */
1332 #if OMPT_SUPPORT && OMPT_OPTIONAL
1333 if (ompt_enabled.enabled) {
1334 /* OMPT state update */
1335 ti.state = prev_state;
1336 ti.wait_id = 0;
1337
1338 /* OMPT event callback */
1339 if (ompt_enabled.ompt_callback_mutex_acquired) {
1340 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1341 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1342 }
1343 }
1344 #endif
1345 KMP_POP_PARTITIONED_TIMER();
1346
1347 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1348 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1349 #endif // KMP_USE_DYNAMIC_LOCK
1350 }
1351
1352 #if KMP_USE_DYNAMIC_LOCK
1353
1354 // Converts the given hint to an internal lock implementation
__kmp_map_hint_to_lock(uintptr_t hint)1355 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1356 #if KMP_USE_TSX
1357 #define KMP_TSX_LOCK(seq) lockseq_##seq
1358 #else
1359 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1360 #endif
1361
1362 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1363 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1364 #else
1365 #define KMP_CPUINFO_RTM 0
1366 #endif
1367
1368 // Hints that do not require further logic
1369 if (hint & kmp_lock_hint_hle)
1370 return KMP_TSX_LOCK(hle);
1371 if (hint & kmp_lock_hint_rtm)
1372 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1373 if (hint & kmp_lock_hint_adaptive)
1374 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1375
1376 // Rule out conflicting hints first by returning the default lock
1377 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1378 return __kmp_user_lock_seq;
1379 if ((hint & omp_lock_hint_speculative) &&
1380 (hint & omp_lock_hint_nonspeculative))
1381 return __kmp_user_lock_seq;
1382
1383 // Do not even consider speculation when it appears to be contended
1384 if (hint & omp_lock_hint_contended)
1385 return lockseq_queuing;
1386
1387 // Uncontended lock without speculation
1388 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1389 return lockseq_tas;
1390
1391 // Use RTM lock for speculation
1392 if (hint & omp_lock_hint_speculative)
1393 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1394
1395 return __kmp_user_lock_seq;
1396 }
1397
1398 #if OMPT_SUPPORT && OMPT_OPTIONAL
1399 #if KMP_USE_DYNAMIC_LOCK
1400 static kmp_mutex_impl_t
__ompt_get_mutex_impl_type(void * user_lock,kmp_indirect_lock_t * ilock=0)1401 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1402 if (user_lock) {
1403 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1404 case 0:
1405 break;
1406 #if KMP_USE_FUTEX
1407 case locktag_futex:
1408 return kmp_mutex_impl_queuing;
1409 #endif
1410 case locktag_tas:
1411 return kmp_mutex_impl_spin;
1412 #if KMP_USE_TSX
1413 case locktag_hle:
1414 case locktag_rtm_spin:
1415 return kmp_mutex_impl_speculative;
1416 #endif
1417 default:
1418 return kmp_mutex_impl_none;
1419 }
1420 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1421 }
1422 KMP_ASSERT(ilock);
1423 switch (ilock->type) {
1424 #if KMP_USE_TSX
1425 case locktag_adaptive:
1426 case locktag_rtm_queuing:
1427 return kmp_mutex_impl_speculative;
1428 #endif
1429 case locktag_nested_tas:
1430 return kmp_mutex_impl_spin;
1431 #if KMP_USE_FUTEX
1432 case locktag_nested_futex:
1433 #endif
1434 case locktag_ticket:
1435 case locktag_queuing:
1436 case locktag_drdpa:
1437 case locktag_nested_ticket:
1438 case locktag_nested_queuing:
1439 case locktag_nested_drdpa:
1440 return kmp_mutex_impl_queuing;
1441 default:
1442 return kmp_mutex_impl_none;
1443 }
1444 }
1445 #else
1446 // For locks without dynamic binding
__ompt_get_mutex_impl_type()1447 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1448 switch (__kmp_user_lock_kind) {
1449 case lk_tas:
1450 return kmp_mutex_impl_spin;
1451 #if KMP_USE_FUTEX
1452 case lk_futex:
1453 #endif
1454 case lk_ticket:
1455 case lk_queuing:
1456 case lk_drdpa:
1457 return kmp_mutex_impl_queuing;
1458 #if KMP_USE_TSX
1459 case lk_hle:
1460 case lk_rtm_queuing:
1461 case lk_rtm_spin:
1462 case lk_adaptive:
1463 return kmp_mutex_impl_speculative;
1464 #endif
1465 default:
1466 return kmp_mutex_impl_none;
1467 }
1468 }
1469 #endif // KMP_USE_DYNAMIC_LOCK
1470 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1471
1472 /*!
1473 @ingroup WORK_SHARING
1474 @param loc source location information.
1475 @param global_tid global thread number.
1476 @param crit identity of the critical section. This could be a pointer to a lock
1477 associated with the critical section, or some other suitably unique value.
1478 @param hint the lock hint.
1479
1480 Enter code protected by a `critical` construct with a hint. The hint value is
1481 used to suggest a lock implementation. This function blocks until the executing
1482 thread can enter the critical section unless the hint suggests use of
1483 speculative execution and the hardware supports it.
1484 */
__kmpc_critical_with_hint(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit,uint32_t hint)1485 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1486 kmp_critical_name *crit, uint32_t hint) {
1487 KMP_COUNT_BLOCK(OMP_CRITICAL);
1488 kmp_user_lock_p lck;
1489 #if OMPT_SUPPORT && OMPT_OPTIONAL
1490 ompt_state_t prev_state = ompt_state_undefined;
1491 ompt_thread_info_t ti;
1492 // This is the case, if called from __kmpc_critical:
1493 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1494 if (!codeptr)
1495 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1496 #endif
1497
1498 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1499 __kmp_assert_valid_gtid(global_tid);
1500
1501 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1502 // Check if it is initialized.
1503 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1504 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1505 if (*lk == 0) {
1506 if (KMP_IS_D_LOCK(lockseq)) {
1507 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1508 KMP_GET_D_TAG(lockseq));
1509 } else {
1510 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1511 }
1512 }
1513 // Branch for accessing the actual lock object and set operation. This
1514 // branching is inevitable since this lock initialization does not follow the
1515 // normal dispatch path (lock table is not used).
1516 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1517 lck = (kmp_user_lock_p)lk;
1518 if (__kmp_env_consistency_check) {
1519 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1520 __kmp_map_hint_to_lock(hint));
1521 }
1522 #if USE_ITT_BUILD
1523 __kmp_itt_critical_acquiring(lck);
1524 #endif
1525 #if OMPT_SUPPORT && OMPT_OPTIONAL
1526 if (ompt_enabled.enabled) {
1527 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1528 /* OMPT state update */
1529 prev_state = ti.state;
1530 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1531 ti.state = ompt_state_wait_critical;
1532
1533 /* OMPT event callback */
1534 if (ompt_enabled.ompt_callback_mutex_acquire) {
1535 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1536 ompt_mutex_critical, (unsigned int)hint,
1537 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1538 codeptr);
1539 }
1540 }
1541 #endif
1542 #if KMP_USE_INLINED_TAS
1543 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1544 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1545 } else
1546 #elif KMP_USE_INLINED_FUTEX
1547 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1548 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1549 } else
1550 #endif
1551 {
1552 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1553 }
1554 } else {
1555 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1556 lck = ilk->lock;
1557 if (__kmp_env_consistency_check) {
1558 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1559 __kmp_map_hint_to_lock(hint));
1560 }
1561 #if USE_ITT_BUILD
1562 __kmp_itt_critical_acquiring(lck);
1563 #endif
1564 #if OMPT_SUPPORT && OMPT_OPTIONAL
1565 if (ompt_enabled.enabled) {
1566 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1567 /* OMPT state update */
1568 prev_state = ti.state;
1569 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1570 ti.state = ompt_state_wait_critical;
1571
1572 /* OMPT event callback */
1573 if (ompt_enabled.ompt_callback_mutex_acquire) {
1574 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1575 ompt_mutex_critical, (unsigned int)hint,
1576 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1577 codeptr);
1578 }
1579 }
1580 #endif
1581 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1582 }
1583 KMP_POP_PARTITIONED_TIMER();
1584
1585 #if USE_ITT_BUILD
1586 __kmp_itt_critical_acquired(lck);
1587 #endif /* USE_ITT_BUILD */
1588 #if OMPT_SUPPORT && OMPT_OPTIONAL
1589 if (ompt_enabled.enabled) {
1590 /* OMPT state update */
1591 ti.state = prev_state;
1592 ti.wait_id = 0;
1593
1594 /* OMPT event callback */
1595 if (ompt_enabled.ompt_callback_mutex_acquired) {
1596 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1597 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1598 }
1599 }
1600 #endif
1601
1602 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1603 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1604 } // __kmpc_critical_with_hint
1605
1606 #endif // KMP_USE_DYNAMIC_LOCK
1607
1608 /*!
1609 @ingroup WORK_SHARING
1610 @param loc source location information.
1611 @param global_tid global thread number .
1612 @param crit identity of the critical section. This could be a pointer to a lock
1613 associated with the critical section, or some other suitably unique value.
1614
1615 Leave a critical section, releasing any lock that was held during its execution.
1616 */
__kmpc_end_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1617 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1618 kmp_critical_name *crit) {
1619 kmp_user_lock_p lck;
1620
1621 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1622
1623 #if KMP_USE_DYNAMIC_LOCK
1624 int locktag = KMP_EXTRACT_D_TAG(crit);
1625 if (locktag) {
1626 lck = (kmp_user_lock_p)crit;
1627 KMP_ASSERT(lck != NULL);
1628 if (__kmp_env_consistency_check) {
1629 __kmp_pop_sync(global_tid, ct_critical, loc);
1630 }
1631 #if USE_ITT_BUILD
1632 __kmp_itt_critical_releasing(lck);
1633 #endif
1634 #if KMP_USE_INLINED_TAS
1635 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1636 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1637 } else
1638 #elif KMP_USE_INLINED_FUTEX
1639 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1640 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1641 } else
1642 #endif
1643 {
1644 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1645 }
1646 } else {
1647 kmp_indirect_lock_t *ilk =
1648 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1649 KMP_ASSERT(ilk != NULL);
1650 lck = ilk->lock;
1651 if (__kmp_env_consistency_check) {
1652 __kmp_pop_sync(global_tid, ct_critical, loc);
1653 }
1654 #if USE_ITT_BUILD
1655 __kmp_itt_critical_releasing(lck);
1656 #endif
1657 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1658 }
1659
1660 #else // KMP_USE_DYNAMIC_LOCK
1661
1662 if ((__kmp_user_lock_kind == lk_tas) &&
1663 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1664 lck = (kmp_user_lock_p)crit;
1665 }
1666 #if KMP_USE_FUTEX
1667 else if ((__kmp_user_lock_kind == lk_futex) &&
1668 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1669 lck = (kmp_user_lock_p)crit;
1670 }
1671 #endif
1672 else { // ticket, queuing or drdpa
1673 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1674 }
1675
1676 KMP_ASSERT(lck != NULL);
1677
1678 if (__kmp_env_consistency_check)
1679 __kmp_pop_sync(global_tid, ct_critical, loc);
1680
1681 #if USE_ITT_BUILD
1682 __kmp_itt_critical_releasing(lck);
1683 #endif /* USE_ITT_BUILD */
1684 // Value of 'crit' should be good for using as a critical_id of the critical
1685 // section directive.
1686 __kmp_release_user_lock_with_checks(lck, global_tid);
1687
1688 #endif // KMP_USE_DYNAMIC_LOCK
1689
1690 #if OMPT_SUPPORT && OMPT_OPTIONAL
1691 /* OMPT release event triggers after lock is released; place here to trigger
1692 * for all #if branches */
1693 OMPT_STORE_RETURN_ADDRESS(global_tid);
1694 if (ompt_enabled.ompt_callback_mutex_released) {
1695 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1696 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1697 OMPT_LOAD_RETURN_ADDRESS(0));
1698 }
1699 #endif
1700
1701 KMP_POP_PARTITIONED_TIMER();
1702 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1703 }
1704
1705 /*!
1706 @ingroup SYNCHRONIZATION
1707 @param loc source location information
1708 @param global_tid thread id.
1709 @return one if the thread should execute the master block, zero otherwise
1710
1711 Start execution of a combined barrier and master. The barrier is executed inside
1712 this function.
1713 */
__kmpc_barrier_master(ident_t * loc,kmp_int32 global_tid)1714 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1715 int status;
1716 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1717 __kmp_assert_valid_gtid(global_tid);
1718
1719 if (!TCR_4(__kmp_init_parallel))
1720 __kmp_parallel_initialize();
1721
1722 __kmp_resume_if_soft_paused();
1723
1724 if (__kmp_env_consistency_check)
1725 __kmp_check_barrier(global_tid, ct_barrier, loc);
1726
1727 #if OMPT_SUPPORT
1728 ompt_frame_t *ompt_frame;
1729 if (ompt_enabled.enabled) {
1730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1731 if (ompt_frame->enter_frame.ptr == NULL)
1732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1733 }
1734 OMPT_STORE_RETURN_ADDRESS(global_tid);
1735 #endif
1736 #if USE_ITT_NOTIFY
1737 __kmp_threads[global_tid]->th.th_ident = loc;
1738 #endif
1739 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1740 #if OMPT_SUPPORT && OMPT_OPTIONAL
1741 if (ompt_enabled.enabled) {
1742 ompt_frame->enter_frame = ompt_data_none;
1743 }
1744 #endif
1745
1746 return (status != 0) ? 0 : 1;
1747 }
1748
1749 /*!
1750 @ingroup SYNCHRONIZATION
1751 @param loc source location information
1752 @param global_tid thread id.
1753
1754 Complete the execution of a combined barrier and master. This function should
1755 only be called at the completion of the <tt>master</tt> code. Other threads will
1756 still be waiting at the barrier and this call releases them.
1757 */
__kmpc_end_barrier_master(ident_t * loc,kmp_int32 global_tid)1758 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1759 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1760 __kmp_assert_valid_gtid(global_tid);
1761 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1762 }
1763
1764 /*!
1765 @ingroup SYNCHRONIZATION
1766 @param loc source location information
1767 @param global_tid thread id.
1768 @return one if the thread should execute the master block, zero otherwise
1769
1770 Start execution of a combined barrier and master(nowait) construct.
1771 The barrier is executed inside this function.
1772 There is no equivalent "end" function, since the
1773 */
__kmpc_barrier_master_nowait(ident_t * loc,kmp_int32 global_tid)1774 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1775 kmp_int32 ret;
1776 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1777 __kmp_assert_valid_gtid(global_tid);
1778
1779 if (!TCR_4(__kmp_init_parallel))
1780 __kmp_parallel_initialize();
1781
1782 __kmp_resume_if_soft_paused();
1783
1784 if (__kmp_env_consistency_check) {
1785 if (loc == 0) {
1786 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1787 }
1788 __kmp_check_barrier(global_tid, ct_barrier, loc);
1789 }
1790
1791 #if OMPT_SUPPORT
1792 ompt_frame_t *ompt_frame;
1793 if (ompt_enabled.enabled) {
1794 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1795 if (ompt_frame->enter_frame.ptr == NULL)
1796 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1797 }
1798 OMPT_STORE_RETURN_ADDRESS(global_tid);
1799 #endif
1800 #if USE_ITT_NOTIFY
1801 __kmp_threads[global_tid]->th.th_ident = loc;
1802 #endif
1803 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1804 #if OMPT_SUPPORT && OMPT_OPTIONAL
1805 if (ompt_enabled.enabled) {
1806 ompt_frame->enter_frame = ompt_data_none;
1807 }
1808 #endif
1809
1810 ret = __kmpc_master(loc, global_tid);
1811
1812 if (__kmp_env_consistency_check) {
1813 /* there's no __kmpc_end_master called; so the (stats) */
1814 /* actions of __kmpc_end_master are done here */
1815 if (ret) {
1816 /* only one thread should do the pop since only */
1817 /* one did the push (see __kmpc_master()) */
1818 __kmp_pop_sync(global_tid, ct_master, loc);
1819 }
1820 }
1821
1822 return (ret);
1823 }
1824
1825 /* The BARRIER for a SINGLE process section is always explicit */
1826 /*!
1827 @ingroup WORK_SHARING
1828 @param loc source location information
1829 @param global_tid global thread number
1830 @return One if this thread should execute the single construct, zero otherwise.
1831
1832 Test whether to execute a <tt>single</tt> construct.
1833 There are no implicit barriers in the two "single" calls, rather the compiler
1834 should introduce an explicit barrier if it is required.
1835 */
1836
__kmpc_single(ident_t * loc,kmp_int32 global_tid)1837 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1838 __kmp_assert_valid_gtid(global_tid);
1839 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1840
1841 if (rc) {
1842 // We are going to execute the single statement, so we should count it.
1843 KMP_COUNT_BLOCK(OMP_SINGLE);
1844 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1845 }
1846
1847 #if OMPT_SUPPORT && OMPT_OPTIONAL
1848 kmp_info_t *this_thr = __kmp_threads[global_tid];
1849 kmp_team_t *team = this_thr->th.th_team;
1850 int tid = __kmp_tid_from_gtid(global_tid);
1851
1852 if (ompt_enabled.enabled) {
1853 if (rc) {
1854 if (ompt_enabled.ompt_callback_work) {
1855 ompt_callbacks.ompt_callback(ompt_callback_work)(
1856 ompt_work_single_executor, ompt_scope_begin,
1857 &(team->t.ompt_team_info.parallel_data),
1858 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1859 1, OMPT_GET_RETURN_ADDRESS(0));
1860 }
1861 } else {
1862 if (ompt_enabled.ompt_callback_work) {
1863 ompt_callbacks.ompt_callback(ompt_callback_work)(
1864 ompt_work_single_other, ompt_scope_begin,
1865 &(team->t.ompt_team_info.parallel_data),
1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1867 1, OMPT_GET_RETURN_ADDRESS(0));
1868 ompt_callbacks.ompt_callback(ompt_callback_work)(
1869 ompt_work_single_other, ompt_scope_end,
1870 &(team->t.ompt_team_info.parallel_data),
1871 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1872 1, OMPT_GET_RETURN_ADDRESS(0));
1873 }
1874 }
1875 }
1876 #endif
1877
1878 return rc;
1879 }
1880
1881 /*!
1882 @ingroup WORK_SHARING
1883 @param loc source location information
1884 @param global_tid global thread number
1885
1886 Mark the end of a <tt>single</tt> construct. This function should
1887 only be called by the thread that executed the block of code protected
1888 by the `single` construct.
1889 */
__kmpc_end_single(ident_t * loc,kmp_int32 global_tid)1890 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1891 __kmp_assert_valid_gtid(global_tid);
1892 __kmp_exit_single(global_tid);
1893 KMP_POP_PARTITIONED_TIMER();
1894
1895 #if OMPT_SUPPORT && OMPT_OPTIONAL
1896 kmp_info_t *this_thr = __kmp_threads[global_tid];
1897 kmp_team_t *team = this_thr->th.th_team;
1898 int tid = __kmp_tid_from_gtid(global_tid);
1899
1900 if (ompt_enabled.ompt_callback_work) {
1901 ompt_callbacks.ompt_callback(ompt_callback_work)(
1902 ompt_work_single_executor, ompt_scope_end,
1903 &(team->t.ompt_team_info.parallel_data),
1904 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1905 OMPT_GET_RETURN_ADDRESS(0));
1906 }
1907 #endif
1908 }
1909
1910 /*!
1911 @ingroup WORK_SHARING
1912 @param loc Source location
1913 @param global_tid Global thread id
1914
1915 Mark the end of a statically scheduled loop.
1916 */
__kmpc_for_static_fini(ident_t * loc,kmp_int32 global_tid)1917 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1918 KMP_POP_PARTITIONED_TIMER();
1919 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1920
1921 #if OMPT_SUPPORT && OMPT_OPTIONAL
1922 if (ompt_enabled.ompt_callback_work) {
1923 ompt_work_t ompt_work_type = ompt_work_loop;
1924 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1925 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1926 // Determine workshare type
1927 if (loc != NULL) {
1928 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1929 ompt_work_type = ompt_work_loop;
1930 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1931 ompt_work_type = ompt_work_sections;
1932 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1933 ompt_work_type = ompt_work_distribute;
1934 } else {
1935 // use default set above.
1936 // a warning about this case is provided in __kmpc_for_static_init
1937 }
1938 KMP_DEBUG_ASSERT(ompt_work_type);
1939 }
1940 ompt_callbacks.ompt_callback(ompt_callback_work)(
1941 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1942 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1943 }
1944 #endif
1945 if (__kmp_env_consistency_check)
1946 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1947 }
1948
1949 // User routines which take C-style arguments (call by value)
1950 // different from the Fortran equivalent routines
1951
ompc_set_num_threads(int arg)1952 void ompc_set_num_threads(int arg) {
1953 // !!!!! TODO: check the per-task binding
1954 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1955 }
1956
ompc_set_dynamic(int flag)1957 void ompc_set_dynamic(int flag) {
1958 kmp_info_t *thread;
1959
1960 /* For the thread-private implementation of the internal controls */
1961 thread = __kmp_entry_thread();
1962
1963 __kmp_save_internal_controls(thread);
1964
1965 set__dynamic(thread, flag ? true : false);
1966 }
1967
ompc_set_nested(int flag)1968 void ompc_set_nested(int flag) {
1969 kmp_info_t *thread;
1970
1971 /* For the thread-private internal controls implementation */
1972 thread = __kmp_entry_thread();
1973
1974 __kmp_save_internal_controls(thread);
1975
1976 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1977 }
1978
ompc_set_max_active_levels(int max_active_levels)1979 void ompc_set_max_active_levels(int max_active_levels) {
1980 /* TO DO */
1981 /* we want per-task implementation of this internal control */
1982
1983 /* For the per-thread internal controls implementation */
1984 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1985 }
1986
ompc_set_schedule(omp_sched_t kind,int modifier)1987 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1988 // !!!!! TODO: check the per-task binding
1989 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1990 }
1991
ompc_get_ancestor_thread_num(int level)1992 int ompc_get_ancestor_thread_num(int level) {
1993 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1994 }
1995
ompc_get_team_size(int level)1996 int ompc_get_team_size(int level) {
1997 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1998 }
1999
2000 /* OpenMP 5.0 Affinity Format API */
KMP_EXPAND_NAME(ompc_set_affinity_format)2001 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2002 if (!__kmp_init_serial) {
2003 __kmp_serial_initialize();
2004 }
2005 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2006 format, KMP_STRLEN(format) + 1);
2007 }
2008
KMP_EXPAND_NAME(ompc_get_affinity_format)2009 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2010 size_t format_size;
2011 if (!__kmp_init_serial) {
2012 __kmp_serial_initialize();
2013 }
2014 format_size = KMP_STRLEN(__kmp_affinity_format);
2015 if (buffer && size) {
2016 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2017 format_size + 1);
2018 }
2019 return format_size;
2020 }
2021
KMP_EXPAND_NAME(ompc_display_affinity)2022 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2023 int gtid;
2024 if (!TCR_4(__kmp_init_middle)) {
2025 __kmp_middle_initialize();
2026 }
2027 __kmp_assign_root_init_mask();
2028 gtid = __kmp_get_gtid();
2029 #if KMP_AFFINITY_SUPPORTED
2030 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2031 __kmp_reset_root_init_mask(gtid);
2032 }
2033 #endif
2034 __kmp_aux_display_affinity(gtid, format);
2035 }
2036
KMP_EXPAND_NAME(ompc_capture_affinity)2037 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2038 char const *format) {
2039 int gtid;
2040 size_t num_required;
2041 kmp_str_buf_t capture_buf;
2042 if (!TCR_4(__kmp_init_middle)) {
2043 __kmp_middle_initialize();
2044 }
2045 __kmp_assign_root_init_mask();
2046 gtid = __kmp_get_gtid();
2047 #if KMP_AFFINITY_SUPPORTED
2048 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2049 __kmp_reset_root_init_mask(gtid);
2050 }
2051 #endif
2052 __kmp_str_buf_init(&capture_buf);
2053 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2054 if (buffer && buf_size) {
2055 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2056 capture_buf.used + 1);
2057 }
2058 __kmp_str_buf_free(&capture_buf);
2059 return num_required;
2060 }
2061
kmpc_set_stacksize(int arg)2062 void kmpc_set_stacksize(int arg) {
2063 // __kmp_aux_set_stacksize initializes the library if needed
2064 __kmp_aux_set_stacksize(arg);
2065 }
2066
kmpc_set_stacksize_s(size_t arg)2067 void kmpc_set_stacksize_s(size_t arg) {
2068 // __kmp_aux_set_stacksize initializes the library if needed
2069 __kmp_aux_set_stacksize(arg);
2070 }
2071
kmpc_set_blocktime(int arg)2072 void kmpc_set_blocktime(int arg) {
2073 int gtid, tid;
2074 kmp_info_t *thread;
2075
2076 gtid = __kmp_entry_gtid();
2077 tid = __kmp_tid_from_gtid(gtid);
2078 thread = __kmp_thread_from_gtid(gtid);
2079
2080 __kmp_aux_set_blocktime(arg, thread, tid);
2081 }
2082
kmpc_set_library(int arg)2083 void kmpc_set_library(int arg) {
2084 // __kmp_user_set_library initializes the library if needed
2085 __kmp_user_set_library((enum library_type)arg);
2086 }
2087
kmpc_set_defaults(char const * str)2088 void kmpc_set_defaults(char const *str) {
2089 // __kmp_aux_set_defaults initializes the library if needed
2090 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2091 }
2092
kmpc_set_disp_num_buffers(int arg)2093 void kmpc_set_disp_num_buffers(int arg) {
2094 // ignore after initialization because some teams have already
2095 // allocated dispatch buffers
2096 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2097 arg <= KMP_MAX_DISP_NUM_BUFF) {
2098 __kmp_dispatch_num_buffers = arg;
2099 }
2100 }
2101
kmpc_set_affinity_mask_proc(int proc,void ** mask)2102 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2103 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2104 return -1;
2105 #else
2106 if (!TCR_4(__kmp_init_middle)) {
2107 __kmp_middle_initialize();
2108 }
2109 __kmp_assign_root_init_mask();
2110 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2111 #endif
2112 }
2113
kmpc_unset_affinity_mask_proc(int proc,void ** mask)2114 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2115 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2116 return -1;
2117 #else
2118 if (!TCR_4(__kmp_init_middle)) {
2119 __kmp_middle_initialize();
2120 }
2121 __kmp_assign_root_init_mask();
2122 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2123 #endif
2124 }
2125
kmpc_get_affinity_mask_proc(int proc,void ** mask)2126 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2127 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2128 return -1;
2129 #else
2130 if (!TCR_4(__kmp_init_middle)) {
2131 __kmp_middle_initialize();
2132 }
2133 __kmp_assign_root_init_mask();
2134 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2135 #endif
2136 }
2137
2138 /* -------------------------------------------------------------------------- */
2139 /*!
2140 @ingroup THREADPRIVATE
2141 @param loc source location information
2142 @param gtid global thread number
2143 @param cpy_size size of the cpy_data buffer
2144 @param cpy_data pointer to data to be copied
2145 @param cpy_func helper function to call for copying data
2146 @param didit flag variable: 1=single thread; 0=not single thread
2147
2148 __kmpc_copyprivate implements the interface for the private data broadcast
2149 needed for the copyprivate clause associated with a single region in an
2150 OpenMP<sup>*</sup> program (both C and Fortran).
2151 All threads participating in the parallel region call this routine.
2152 One of the threads (called the single thread) should have the <tt>didit</tt>
2153 variable set to 1 and all other threads should have that variable set to 0.
2154 All threads pass a pointer to a data buffer (cpy_data) that they have built.
2155
2156 The OpenMP specification forbids the use of nowait on the single region when a
2157 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2158 barrier internally to avoid race conditions, so the code generation for the
2159 single region should avoid generating a barrier after the call to @ref
2160 __kmpc_copyprivate.
2161
2162 The <tt>gtid</tt> parameter is the global thread id for the current thread.
2163 The <tt>loc</tt> parameter is a pointer to source location information.
2164
2165 Internal implementation: The single thread will first copy its descriptor
2166 address (cpy_data) to a team-private location, then the other threads will each
2167 call the function pointed to by the parameter cpy_func, which carries out the
2168 copy by copying the data using the cpy_data buffer.
2169
2170 The cpy_func routine used for the copy and the contents of the data area defined
2171 by cpy_data and cpy_size may be built in any fashion that will allow the copy
2172 to be done. For instance, the cpy_data buffer can hold the actual data to be
2173 copied or it may hold a list of pointers to the data. The cpy_func routine must
2174 interpret the cpy_data buffer appropriately.
2175
2176 The interface to cpy_func is as follows:
2177 @code
2178 void cpy_func( void *destination, void *source )
2179 @endcode
2180 where void *destination is the cpy_data pointer for the thread being copied to
2181 and void *source is the cpy_data pointer for the thread being copied from.
2182 */
__kmpc_copyprivate(ident_t * loc,kmp_int32 gtid,size_t cpy_size,void * cpy_data,void (* cpy_func)(void *,void *),kmp_int32 didit)2183 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2184 void *cpy_data, void (*cpy_func)(void *, void *),
2185 kmp_int32 didit) {
2186 void **data_ptr;
2187 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2188 __kmp_assert_valid_gtid(gtid);
2189
2190 KMP_MB();
2191
2192 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2193
2194 if (__kmp_env_consistency_check) {
2195 if (loc == 0) {
2196 KMP_WARNING(ConstructIdentInvalid);
2197 }
2198 }
2199
2200 // ToDo: Optimize the following two barriers into some kind of split barrier
2201
2202 if (didit)
2203 *data_ptr = cpy_data;
2204
2205 #if OMPT_SUPPORT
2206 ompt_frame_t *ompt_frame;
2207 if (ompt_enabled.enabled) {
2208 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2209 if (ompt_frame->enter_frame.ptr == NULL)
2210 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2211 }
2212 OMPT_STORE_RETURN_ADDRESS(gtid);
2213 #endif
2214 /* This barrier is not a barrier region boundary */
2215 #if USE_ITT_NOTIFY
2216 __kmp_threads[gtid]->th.th_ident = loc;
2217 #endif
2218 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2219
2220 if (!didit)
2221 (*cpy_func)(cpy_data, *data_ptr);
2222
2223 // Consider next barrier a user-visible barrier for barrier region boundaries
2224 // Nesting checks are already handled by the single construct checks
2225 {
2226 #if OMPT_SUPPORT
2227 OMPT_STORE_RETURN_ADDRESS(gtid);
2228 #endif
2229 #if USE_ITT_NOTIFY
2230 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2231 // tasks can overwrite the location)
2232 #endif
2233 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2234 #if OMPT_SUPPORT && OMPT_OPTIONAL
2235 if (ompt_enabled.enabled) {
2236 ompt_frame->enter_frame = ompt_data_none;
2237 }
2238 #endif
2239 }
2240 }
2241
2242 /* --------------------------------------------------------------------------*/
2243 /*!
2244 @ingroup THREADPRIVATE
2245 @param loc source location information
2246 @param gtid global thread number
2247 @param cpy_data pointer to the data to be saved/copied or 0
2248 @return the saved pointer to the data
2249
2250 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2251 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2252 coming from single), and returns that pointer in all calls (for single thread
2253 it's not needed). This version doesn't do any actual data copying. Data copying
2254 has to be done somewhere else, e.g. inline in the generated code. Due to this,
2255 this function doesn't have any barrier at the end of the function, like
2256 __kmpc_copyprivate does, so generated code needs barrier after copying of all
2257 data was done.
2258 */
__kmpc_copyprivate_light(ident_t * loc,kmp_int32 gtid,void * cpy_data)2259 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2260 void **data_ptr;
2261
2262 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2263
2264 KMP_MB();
2265
2266 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2267
2268 if (__kmp_env_consistency_check) {
2269 if (loc == 0) {
2270 KMP_WARNING(ConstructIdentInvalid);
2271 }
2272 }
2273
2274 // ToDo: Optimize the following barrier
2275
2276 if (cpy_data)
2277 *data_ptr = cpy_data;
2278
2279 #if OMPT_SUPPORT
2280 ompt_frame_t *ompt_frame;
2281 if (ompt_enabled.enabled) {
2282 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2283 if (ompt_frame->enter_frame.ptr == NULL)
2284 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2285 OMPT_STORE_RETURN_ADDRESS(gtid);
2286 }
2287 #endif
2288 /* This barrier is not a barrier region boundary */
2289 #if USE_ITT_NOTIFY
2290 __kmp_threads[gtid]->th.th_ident = loc;
2291 #endif
2292 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2293
2294 return *data_ptr;
2295 }
2296
2297 /* -------------------------------------------------------------------------- */
2298
2299 #define INIT_LOCK __kmp_init_user_lock_with_checks
2300 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2301 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2302 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2303 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2304 #define ACQUIRE_NESTED_LOCK_TIMED \
2305 __kmp_acquire_nested_user_lock_with_checks_timed
2306 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2307 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2308 #define TEST_LOCK __kmp_test_user_lock_with_checks
2309 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2310 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2311 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2312
2313 // TODO: Make check abort messages use location info & pass it into
2314 // with_checks routines
2315
2316 #if KMP_USE_DYNAMIC_LOCK
2317
2318 // internal lock initializer
__kmp_init_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2319 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2320 kmp_dyna_lockseq_t seq) {
2321 if (KMP_IS_D_LOCK(seq)) {
2322 KMP_INIT_D_LOCK(lock, seq);
2323 #if USE_ITT_BUILD
2324 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2325 #endif
2326 } else {
2327 KMP_INIT_I_LOCK(lock, seq);
2328 #if USE_ITT_BUILD
2329 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2330 __kmp_itt_lock_creating(ilk->lock, loc);
2331 #endif
2332 }
2333 }
2334
2335 // internal nest lock initializer
2336 static __forceinline void
__kmp_init_nest_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2337 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2338 kmp_dyna_lockseq_t seq) {
2339 #if KMP_USE_TSX
2340 // Don't have nested lock implementation for speculative locks
2341 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2342 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2343 seq = __kmp_user_lock_seq;
2344 #endif
2345 switch (seq) {
2346 case lockseq_tas:
2347 seq = lockseq_nested_tas;
2348 break;
2349 #if KMP_USE_FUTEX
2350 case lockseq_futex:
2351 seq = lockseq_nested_futex;
2352 break;
2353 #endif
2354 case lockseq_ticket:
2355 seq = lockseq_nested_ticket;
2356 break;
2357 case lockseq_queuing:
2358 seq = lockseq_nested_queuing;
2359 break;
2360 case lockseq_drdpa:
2361 seq = lockseq_nested_drdpa;
2362 break;
2363 default:
2364 seq = lockseq_nested_queuing;
2365 }
2366 KMP_INIT_I_LOCK(lock, seq);
2367 #if USE_ITT_BUILD
2368 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2369 __kmp_itt_lock_creating(ilk->lock, loc);
2370 #endif
2371 }
2372
2373 /* initialize the lock with a hint */
__kmpc_init_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2374 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2375 uintptr_t hint) {
2376 KMP_DEBUG_ASSERT(__kmp_init_serial);
2377 if (__kmp_env_consistency_check && user_lock == NULL) {
2378 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2379 }
2380
2381 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2382
2383 #if OMPT_SUPPORT && OMPT_OPTIONAL
2384 // This is the case, if called from omp_init_lock_with_hint:
2385 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2386 if (!codeptr)
2387 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2388 if (ompt_enabled.ompt_callback_lock_init) {
2389 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2390 ompt_mutex_lock, (omp_lock_hint_t)hint,
2391 __ompt_get_mutex_impl_type(user_lock),
2392 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2393 }
2394 #endif
2395 }
2396
2397 /* initialize the lock with a hint */
__kmpc_init_nest_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2398 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2399 void **user_lock, uintptr_t hint) {
2400 KMP_DEBUG_ASSERT(__kmp_init_serial);
2401 if (__kmp_env_consistency_check && user_lock == NULL) {
2402 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2403 }
2404
2405 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2406
2407 #if OMPT_SUPPORT && OMPT_OPTIONAL
2408 // This is the case, if called from omp_init_lock_with_hint:
2409 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2410 if (!codeptr)
2411 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2412 if (ompt_enabled.ompt_callback_lock_init) {
2413 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2414 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2415 __ompt_get_mutex_impl_type(user_lock),
2416 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2417 }
2418 #endif
2419 }
2420
2421 #endif // KMP_USE_DYNAMIC_LOCK
2422
2423 /* initialize the lock */
__kmpc_init_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2424 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2425 #if KMP_USE_DYNAMIC_LOCK
2426
2427 KMP_DEBUG_ASSERT(__kmp_init_serial);
2428 if (__kmp_env_consistency_check && user_lock == NULL) {
2429 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2430 }
2431 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2432
2433 #if OMPT_SUPPORT && OMPT_OPTIONAL
2434 // This is the case, if called from omp_init_lock_with_hint:
2435 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2436 if (!codeptr)
2437 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2438 if (ompt_enabled.ompt_callback_lock_init) {
2439 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2440 ompt_mutex_lock, omp_lock_hint_none,
2441 __ompt_get_mutex_impl_type(user_lock),
2442 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2443 }
2444 #endif
2445
2446 #else // KMP_USE_DYNAMIC_LOCK
2447
2448 static char const *const func = "omp_init_lock";
2449 kmp_user_lock_p lck;
2450 KMP_DEBUG_ASSERT(__kmp_init_serial);
2451
2452 if (__kmp_env_consistency_check) {
2453 if (user_lock == NULL) {
2454 KMP_FATAL(LockIsUninitialized, func);
2455 }
2456 }
2457
2458 KMP_CHECK_USER_LOCK_INIT();
2459
2460 if ((__kmp_user_lock_kind == lk_tas) &&
2461 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2462 lck = (kmp_user_lock_p)user_lock;
2463 }
2464 #if KMP_USE_FUTEX
2465 else if ((__kmp_user_lock_kind == lk_futex) &&
2466 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2467 lck = (kmp_user_lock_p)user_lock;
2468 }
2469 #endif
2470 else {
2471 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2472 }
2473 INIT_LOCK(lck);
2474 __kmp_set_user_lock_location(lck, loc);
2475
2476 #if OMPT_SUPPORT && OMPT_OPTIONAL
2477 // This is the case, if called from omp_init_lock_with_hint:
2478 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2479 if (!codeptr)
2480 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2481 if (ompt_enabled.ompt_callback_lock_init) {
2482 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2483 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2484 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2485 }
2486 #endif
2487
2488 #if USE_ITT_BUILD
2489 __kmp_itt_lock_creating(lck);
2490 #endif /* USE_ITT_BUILD */
2491
2492 #endif // KMP_USE_DYNAMIC_LOCK
2493 } // __kmpc_init_lock
2494
2495 /* initialize the lock */
__kmpc_init_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2496 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2497 #if KMP_USE_DYNAMIC_LOCK
2498
2499 KMP_DEBUG_ASSERT(__kmp_init_serial);
2500 if (__kmp_env_consistency_check && user_lock == NULL) {
2501 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2502 }
2503 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2504
2505 #if OMPT_SUPPORT && OMPT_OPTIONAL
2506 // This is the case, if called from omp_init_lock_with_hint:
2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2508 if (!codeptr)
2509 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2510 if (ompt_enabled.ompt_callback_lock_init) {
2511 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2512 ompt_mutex_nest_lock, omp_lock_hint_none,
2513 __ompt_get_mutex_impl_type(user_lock),
2514 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2515 }
2516 #endif
2517
2518 #else // KMP_USE_DYNAMIC_LOCK
2519
2520 static char const *const func = "omp_init_nest_lock";
2521 kmp_user_lock_p lck;
2522 KMP_DEBUG_ASSERT(__kmp_init_serial);
2523
2524 if (__kmp_env_consistency_check) {
2525 if (user_lock == NULL) {
2526 KMP_FATAL(LockIsUninitialized, func);
2527 }
2528 }
2529
2530 KMP_CHECK_USER_LOCK_INIT();
2531
2532 if ((__kmp_user_lock_kind == lk_tas) &&
2533 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2534 OMP_NEST_LOCK_T_SIZE)) {
2535 lck = (kmp_user_lock_p)user_lock;
2536 }
2537 #if KMP_USE_FUTEX
2538 else if ((__kmp_user_lock_kind == lk_futex) &&
2539 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2540 OMP_NEST_LOCK_T_SIZE)) {
2541 lck = (kmp_user_lock_p)user_lock;
2542 }
2543 #endif
2544 else {
2545 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2546 }
2547
2548 INIT_NESTED_LOCK(lck);
2549 __kmp_set_user_lock_location(lck, loc);
2550
2551 #if OMPT_SUPPORT && OMPT_OPTIONAL
2552 // This is the case, if called from omp_init_lock_with_hint:
2553 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2554 if (!codeptr)
2555 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2556 if (ompt_enabled.ompt_callback_lock_init) {
2557 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2558 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2559 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2560 }
2561 #endif
2562
2563 #if USE_ITT_BUILD
2564 __kmp_itt_lock_creating(lck);
2565 #endif /* USE_ITT_BUILD */
2566
2567 #endif // KMP_USE_DYNAMIC_LOCK
2568 } // __kmpc_init_nest_lock
2569
__kmpc_destroy_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2570 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2571 #if KMP_USE_DYNAMIC_LOCK
2572
2573 #if USE_ITT_BUILD
2574 kmp_user_lock_p lck;
2575 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2576 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2577 } else {
2578 lck = (kmp_user_lock_p)user_lock;
2579 }
2580 __kmp_itt_lock_destroyed(lck);
2581 #endif
2582 #if OMPT_SUPPORT && OMPT_OPTIONAL
2583 // This is the case, if called from omp_init_lock_with_hint:
2584 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2585 if (!codeptr)
2586 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2587 if (ompt_enabled.ompt_callback_lock_destroy) {
2588 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2589 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2590 }
2591 #endif
2592 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2593 #else
2594 kmp_user_lock_p lck;
2595
2596 if ((__kmp_user_lock_kind == lk_tas) &&
2597 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598 lck = (kmp_user_lock_p)user_lock;
2599 }
2600 #if KMP_USE_FUTEX
2601 else if ((__kmp_user_lock_kind == lk_futex) &&
2602 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2604 }
2605 #endif
2606 else {
2607 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2608 }
2609
2610 #if OMPT_SUPPORT && OMPT_OPTIONAL
2611 // This is the case, if called from omp_init_lock_with_hint:
2612 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2613 if (!codeptr)
2614 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2615 if (ompt_enabled.ompt_callback_lock_destroy) {
2616 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2617 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2618 }
2619 #endif
2620
2621 #if USE_ITT_BUILD
2622 __kmp_itt_lock_destroyed(lck);
2623 #endif /* USE_ITT_BUILD */
2624 DESTROY_LOCK(lck);
2625
2626 if ((__kmp_user_lock_kind == lk_tas) &&
2627 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2628 ;
2629 }
2630 #if KMP_USE_FUTEX
2631 else if ((__kmp_user_lock_kind == lk_futex) &&
2632 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2633 ;
2634 }
2635 #endif
2636 else {
2637 __kmp_user_lock_free(user_lock, gtid, lck);
2638 }
2639 #endif // KMP_USE_DYNAMIC_LOCK
2640 } // __kmpc_destroy_lock
2641
2642 /* destroy the lock */
__kmpc_destroy_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2643 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2644 #if KMP_USE_DYNAMIC_LOCK
2645
2646 #if USE_ITT_BUILD
2647 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2648 __kmp_itt_lock_destroyed(ilk->lock);
2649 #endif
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL
2651 // This is the case, if called from omp_init_lock_with_hint:
2652 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2653 if (!codeptr)
2654 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2655 if (ompt_enabled.ompt_callback_lock_destroy) {
2656 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2657 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2658 }
2659 #endif
2660 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2661
2662 #else // KMP_USE_DYNAMIC_LOCK
2663
2664 kmp_user_lock_p lck;
2665
2666 if ((__kmp_user_lock_kind == lk_tas) &&
2667 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2668 OMP_NEST_LOCK_T_SIZE)) {
2669 lck = (kmp_user_lock_p)user_lock;
2670 }
2671 #if KMP_USE_FUTEX
2672 else if ((__kmp_user_lock_kind == lk_futex) &&
2673 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2674 OMP_NEST_LOCK_T_SIZE)) {
2675 lck = (kmp_user_lock_p)user_lock;
2676 }
2677 #endif
2678 else {
2679 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2680 }
2681
2682 #if OMPT_SUPPORT && OMPT_OPTIONAL
2683 // This is the case, if called from omp_init_lock_with_hint:
2684 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2685 if (!codeptr)
2686 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2687 if (ompt_enabled.ompt_callback_lock_destroy) {
2688 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2689 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2690 }
2691 #endif
2692
2693 #if USE_ITT_BUILD
2694 __kmp_itt_lock_destroyed(lck);
2695 #endif /* USE_ITT_BUILD */
2696
2697 DESTROY_NESTED_LOCK(lck);
2698
2699 if ((__kmp_user_lock_kind == lk_tas) &&
2700 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2701 OMP_NEST_LOCK_T_SIZE)) {
2702 ;
2703 }
2704 #if KMP_USE_FUTEX
2705 else if ((__kmp_user_lock_kind == lk_futex) &&
2706 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2707 OMP_NEST_LOCK_T_SIZE)) {
2708 ;
2709 }
2710 #endif
2711 else {
2712 __kmp_user_lock_free(user_lock, gtid, lck);
2713 }
2714 #endif // KMP_USE_DYNAMIC_LOCK
2715 } // __kmpc_destroy_nest_lock
2716
__kmpc_set_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2717 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2718 KMP_COUNT_BLOCK(OMP_set_lock);
2719 #if KMP_USE_DYNAMIC_LOCK
2720 int tag = KMP_EXTRACT_D_TAG(user_lock);
2721 #if USE_ITT_BUILD
2722 __kmp_itt_lock_acquiring(
2723 (kmp_user_lock_p)
2724 user_lock); // itt function will get to the right lock object.
2725 #endif
2726 #if OMPT_SUPPORT && OMPT_OPTIONAL
2727 // This is the case, if called from omp_init_lock_with_hint:
2728 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2729 if (!codeptr)
2730 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2731 if (ompt_enabled.ompt_callback_mutex_acquire) {
2732 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2733 ompt_mutex_lock, omp_lock_hint_none,
2734 __ompt_get_mutex_impl_type(user_lock),
2735 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2736 }
2737 #endif
2738 #if KMP_USE_INLINED_TAS
2739 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2740 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2741 } else
2742 #elif KMP_USE_INLINED_FUTEX
2743 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2744 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2745 } else
2746 #endif
2747 {
2748 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2749 }
2750 #if USE_ITT_BUILD
2751 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2752 #endif
2753 #if OMPT_SUPPORT && OMPT_OPTIONAL
2754 if (ompt_enabled.ompt_callback_mutex_acquired) {
2755 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2756 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2757 }
2758 #endif
2759
2760 #else // KMP_USE_DYNAMIC_LOCK
2761
2762 kmp_user_lock_p lck;
2763
2764 if ((__kmp_user_lock_kind == lk_tas) &&
2765 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2766 lck = (kmp_user_lock_p)user_lock;
2767 }
2768 #if KMP_USE_FUTEX
2769 else if ((__kmp_user_lock_kind == lk_futex) &&
2770 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2771 lck = (kmp_user_lock_p)user_lock;
2772 }
2773 #endif
2774 else {
2775 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2776 }
2777
2778 #if USE_ITT_BUILD
2779 __kmp_itt_lock_acquiring(lck);
2780 #endif /* USE_ITT_BUILD */
2781 #if OMPT_SUPPORT && OMPT_OPTIONAL
2782 // This is the case, if called from omp_init_lock_with_hint:
2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784 if (!codeptr)
2785 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786 if (ompt_enabled.ompt_callback_mutex_acquire) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2788 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2789 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2790 }
2791 #endif
2792
2793 ACQUIRE_LOCK(lck, gtid);
2794
2795 #if USE_ITT_BUILD
2796 __kmp_itt_lock_acquired(lck);
2797 #endif /* USE_ITT_BUILD */
2798
2799 #if OMPT_SUPPORT && OMPT_OPTIONAL
2800 if (ompt_enabled.ompt_callback_mutex_acquired) {
2801 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2802 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2803 }
2804 #endif
2805
2806 #endif // KMP_USE_DYNAMIC_LOCK
2807 }
2808
__kmpc_set_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2809 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2810 #if KMP_USE_DYNAMIC_LOCK
2811
2812 #if USE_ITT_BUILD
2813 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2814 #endif
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.enabled) {
2821 if (ompt_enabled.ompt_callback_mutex_acquire) {
2822 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2823 ompt_mutex_nest_lock, omp_lock_hint_none,
2824 __ompt_get_mutex_impl_type(user_lock),
2825 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2826 }
2827 }
2828 #endif
2829 int acquire_status =
2830 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2831 (void)acquire_status;
2832 #if USE_ITT_BUILD
2833 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2834 #endif
2835
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL
2837 if (ompt_enabled.enabled) {
2838 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2839 if (ompt_enabled.ompt_callback_mutex_acquired) {
2840 // lock_first
2841 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2842 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2843 codeptr);
2844 }
2845 } else {
2846 if (ompt_enabled.ompt_callback_nest_lock) {
2847 // lock_next
2848 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2849 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2850 }
2851 }
2852 }
2853 #endif
2854
2855 #else // KMP_USE_DYNAMIC_LOCK
2856 int acquire_status;
2857 kmp_user_lock_p lck;
2858
2859 if ((__kmp_user_lock_kind == lk_tas) &&
2860 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2861 OMP_NEST_LOCK_T_SIZE)) {
2862 lck = (kmp_user_lock_p)user_lock;
2863 }
2864 #if KMP_USE_FUTEX
2865 else if ((__kmp_user_lock_kind == lk_futex) &&
2866 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2867 OMP_NEST_LOCK_T_SIZE)) {
2868 lck = (kmp_user_lock_p)user_lock;
2869 }
2870 #endif
2871 else {
2872 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2873 }
2874
2875 #if USE_ITT_BUILD
2876 __kmp_itt_lock_acquiring(lck);
2877 #endif /* USE_ITT_BUILD */
2878 #if OMPT_SUPPORT && OMPT_OPTIONAL
2879 // This is the case, if called from omp_init_lock_with_hint:
2880 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2881 if (!codeptr)
2882 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2883 if (ompt_enabled.enabled) {
2884 if (ompt_enabled.ompt_callback_mutex_acquire) {
2885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2886 ompt_mutex_nest_lock, omp_lock_hint_none,
2887 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2888 codeptr);
2889 }
2890 }
2891 #endif
2892
2893 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2894
2895 #if USE_ITT_BUILD
2896 __kmp_itt_lock_acquired(lck);
2897 #endif /* USE_ITT_BUILD */
2898
2899 #if OMPT_SUPPORT && OMPT_OPTIONAL
2900 if (ompt_enabled.enabled) {
2901 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2902 if (ompt_enabled.ompt_callback_mutex_acquired) {
2903 // lock_first
2904 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2905 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2906 }
2907 } else {
2908 if (ompt_enabled.ompt_callback_nest_lock) {
2909 // lock_next
2910 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2911 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2912 }
2913 }
2914 }
2915 #endif
2916
2917 #endif // KMP_USE_DYNAMIC_LOCK
2918 }
2919
__kmpc_unset_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2920 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2921 #if KMP_USE_DYNAMIC_LOCK
2922
2923 int tag = KMP_EXTRACT_D_TAG(user_lock);
2924 #if USE_ITT_BUILD
2925 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2926 #endif
2927 #if KMP_USE_INLINED_TAS
2928 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2929 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2930 } else
2931 #elif KMP_USE_INLINED_FUTEX
2932 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2933 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2934 } else
2935 #endif
2936 {
2937 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2938 }
2939
2940 #if OMPT_SUPPORT && OMPT_OPTIONAL
2941 // This is the case, if called from omp_init_lock_with_hint:
2942 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2943 if (!codeptr)
2944 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2945 if (ompt_enabled.ompt_callback_mutex_released) {
2946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2947 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2948 }
2949 #endif
2950
2951 #else // KMP_USE_DYNAMIC_LOCK
2952
2953 kmp_user_lock_p lck;
2954
2955 /* Can't use serial interval since not block structured */
2956 /* release the lock */
2957
2958 if ((__kmp_user_lock_kind == lk_tas) &&
2959 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2960 #if KMP_OS_LINUX && \
2961 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2962 // "fast" path implemented to fix customer performance issue
2963 #if USE_ITT_BUILD
2964 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2965 #endif /* USE_ITT_BUILD */
2966 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2967 KMP_MB();
2968
2969 #if OMPT_SUPPORT && OMPT_OPTIONAL
2970 // This is the case, if called from omp_init_lock_with_hint:
2971 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2972 if (!codeptr)
2973 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2974 if (ompt_enabled.ompt_callback_mutex_released) {
2975 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2976 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2977 }
2978 #endif
2979
2980 return;
2981 #else
2982 lck = (kmp_user_lock_p)user_lock;
2983 #endif
2984 }
2985 #if KMP_USE_FUTEX
2986 else if ((__kmp_user_lock_kind == lk_futex) &&
2987 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2988 lck = (kmp_user_lock_p)user_lock;
2989 }
2990 #endif
2991 else {
2992 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2993 }
2994
2995 #if USE_ITT_BUILD
2996 __kmp_itt_lock_releasing(lck);
2997 #endif /* USE_ITT_BUILD */
2998
2999 RELEASE_LOCK(lck, gtid);
3000
3001 #if OMPT_SUPPORT && OMPT_OPTIONAL
3002 // This is the case, if called from omp_init_lock_with_hint:
3003 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3004 if (!codeptr)
3005 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3006 if (ompt_enabled.ompt_callback_mutex_released) {
3007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3008 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3009 }
3010 #endif
3011
3012 #endif // KMP_USE_DYNAMIC_LOCK
3013 }
3014
3015 /* release the lock */
__kmpc_unset_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3016 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3017 #if KMP_USE_DYNAMIC_LOCK
3018
3019 #if USE_ITT_BUILD
3020 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3021 #endif
3022 int release_status =
3023 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3024 (void)release_status;
3025
3026 #if OMPT_SUPPORT && OMPT_OPTIONAL
3027 // This is the case, if called from omp_init_lock_with_hint:
3028 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3029 if (!codeptr)
3030 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3031 if (ompt_enabled.enabled) {
3032 if (release_status == KMP_LOCK_RELEASED) {
3033 if (ompt_enabled.ompt_callback_mutex_released) {
3034 // release_lock_last
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3037 codeptr);
3038 }
3039 } else if (ompt_enabled.ompt_callback_nest_lock) {
3040 // release_lock_prev
3041 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3042 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3043 }
3044 }
3045 #endif
3046
3047 #else // KMP_USE_DYNAMIC_LOCK
3048
3049 kmp_user_lock_p lck;
3050
3051 /* Can't use serial interval since not block structured */
3052
3053 if ((__kmp_user_lock_kind == lk_tas) &&
3054 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3055 OMP_NEST_LOCK_T_SIZE)) {
3056 #if KMP_OS_LINUX && \
3057 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3058 // "fast" path implemented to fix customer performance issue
3059 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3060 #if USE_ITT_BUILD
3061 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3062 #endif /* USE_ITT_BUILD */
3063
3064 #if OMPT_SUPPORT && OMPT_OPTIONAL
3065 int release_status = KMP_LOCK_STILL_HELD;
3066 #endif
3067
3068 if (--(tl->lk.depth_locked) == 0) {
3069 TCW_4(tl->lk.poll, 0);
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3071 release_status = KMP_LOCK_RELEASED;
3072 #endif
3073 }
3074 KMP_MB();
3075
3076 #if OMPT_SUPPORT && OMPT_OPTIONAL
3077 // This is the case, if called from omp_init_lock_with_hint:
3078 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3079 if (!codeptr)
3080 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3081 if (ompt_enabled.enabled) {
3082 if (release_status == KMP_LOCK_RELEASED) {
3083 if (ompt_enabled.ompt_callback_mutex_released) {
3084 // release_lock_last
3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3086 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3087 }
3088 } else if (ompt_enabled.ompt_callback_nest_lock) {
3089 // release_lock_previous
3090 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3091 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3092 }
3093 }
3094 #endif
3095
3096 return;
3097 #else
3098 lck = (kmp_user_lock_p)user_lock;
3099 #endif
3100 }
3101 #if KMP_USE_FUTEX
3102 else if ((__kmp_user_lock_kind == lk_futex) &&
3103 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3104 OMP_NEST_LOCK_T_SIZE)) {
3105 lck = (kmp_user_lock_p)user_lock;
3106 }
3107 #endif
3108 else {
3109 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3110 }
3111
3112 #if USE_ITT_BUILD
3113 __kmp_itt_lock_releasing(lck);
3114 #endif /* USE_ITT_BUILD */
3115
3116 int release_status;
3117 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3118 #if OMPT_SUPPORT && OMPT_OPTIONAL
3119 // This is the case, if called from omp_init_lock_with_hint:
3120 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3121 if (!codeptr)
3122 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3123 if (ompt_enabled.enabled) {
3124 if (release_status == KMP_LOCK_RELEASED) {
3125 if (ompt_enabled.ompt_callback_mutex_released) {
3126 // release_lock_last
3127 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3128 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3129 }
3130 } else if (ompt_enabled.ompt_callback_nest_lock) {
3131 // release_lock_previous
3132 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3133 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3134 }
3135 }
3136 #endif
3137
3138 #endif // KMP_USE_DYNAMIC_LOCK
3139 }
3140
3141 /* try to acquire the lock */
__kmpc_test_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3142 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3143 KMP_COUNT_BLOCK(OMP_test_lock);
3144
3145 #if KMP_USE_DYNAMIC_LOCK
3146 int rc;
3147 int tag = KMP_EXTRACT_D_TAG(user_lock);
3148 #if USE_ITT_BUILD
3149 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3150 #endif
3151 #if OMPT_SUPPORT && OMPT_OPTIONAL
3152 // This is the case, if called from omp_init_lock_with_hint:
3153 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3154 if (!codeptr)
3155 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3156 if (ompt_enabled.ompt_callback_mutex_acquire) {
3157 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3158 ompt_mutex_lock, omp_lock_hint_none,
3159 __ompt_get_mutex_impl_type(user_lock),
3160 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3161 }
3162 #endif
3163 #if KMP_USE_INLINED_TAS
3164 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3165 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3166 } else
3167 #elif KMP_USE_INLINED_FUTEX
3168 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3169 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3170 } else
3171 #endif
3172 {
3173 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3174 }
3175 if (rc) {
3176 #if USE_ITT_BUILD
3177 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3178 #endif
3179 #if OMPT_SUPPORT && OMPT_OPTIONAL
3180 if (ompt_enabled.ompt_callback_mutex_acquired) {
3181 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3182 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3183 }
3184 #endif
3185 return FTN_TRUE;
3186 } else {
3187 #if USE_ITT_BUILD
3188 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3189 #endif
3190 return FTN_FALSE;
3191 }
3192
3193 #else // KMP_USE_DYNAMIC_LOCK
3194
3195 kmp_user_lock_p lck;
3196 int rc;
3197
3198 if ((__kmp_user_lock_kind == lk_tas) &&
3199 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3200 lck = (kmp_user_lock_p)user_lock;
3201 }
3202 #if KMP_USE_FUTEX
3203 else if ((__kmp_user_lock_kind == lk_futex) &&
3204 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3205 lck = (kmp_user_lock_p)user_lock;
3206 }
3207 #endif
3208 else {
3209 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3210 }
3211
3212 #if USE_ITT_BUILD
3213 __kmp_itt_lock_acquiring(lck);
3214 #endif /* USE_ITT_BUILD */
3215 #if OMPT_SUPPORT && OMPT_OPTIONAL
3216 // This is the case, if called from omp_init_lock_with_hint:
3217 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3218 if (!codeptr)
3219 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3220 if (ompt_enabled.ompt_callback_mutex_acquire) {
3221 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3222 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3223 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3224 }
3225 #endif
3226
3227 rc = TEST_LOCK(lck, gtid);
3228 #if USE_ITT_BUILD
3229 if (rc) {
3230 __kmp_itt_lock_acquired(lck);
3231 } else {
3232 __kmp_itt_lock_cancelled(lck);
3233 }
3234 #endif /* USE_ITT_BUILD */
3235 #if OMPT_SUPPORT && OMPT_OPTIONAL
3236 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3237 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3238 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3239 }
3240 #endif
3241
3242 return (rc ? FTN_TRUE : FTN_FALSE);
3243
3244 /* Can't use serial interval since not block structured */
3245
3246 #endif // KMP_USE_DYNAMIC_LOCK
3247 }
3248
3249 /* try to acquire the lock */
__kmpc_test_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3250 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3251 #if KMP_USE_DYNAMIC_LOCK
3252 int rc;
3253 #if USE_ITT_BUILD
3254 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3255 #endif
3256 #if OMPT_SUPPORT && OMPT_OPTIONAL
3257 // This is the case, if called from omp_init_lock_with_hint:
3258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3259 if (!codeptr)
3260 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3261 if (ompt_enabled.ompt_callback_mutex_acquire) {
3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3263 ompt_mutex_nest_lock, omp_lock_hint_none,
3264 __ompt_get_mutex_impl_type(user_lock),
3265 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3266 }
3267 #endif
3268 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3269 #if USE_ITT_BUILD
3270 if (rc) {
3271 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3272 } else {
3273 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3274 }
3275 #endif
3276 #if OMPT_SUPPORT && OMPT_OPTIONAL
3277 if (ompt_enabled.enabled && rc) {
3278 if (rc == 1) {
3279 if (ompt_enabled.ompt_callback_mutex_acquired) {
3280 // lock_first
3281 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3282 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3283 codeptr);
3284 }
3285 } else {
3286 if (ompt_enabled.ompt_callback_nest_lock) {
3287 // lock_next
3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3289 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3290 }
3291 }
3292 }
3293 #endif
3294 return rc;
3295
3296 #else // KMP_USE_DYNAMIC_LOCK
3297
3298 kmp_user_lock_p lck;
3299 int rc;
3300
3301 if ((__kmp_user_lock_kind == lk_tas) &&
3302 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3303 OMP_NEST_LOCK_T_SIZE)) {
3304 lck = (kmp_user_lock_p)user_lock;
3305 }
3306 #if KMP_USE_FUTEX
3307 else if ((__kmp_user_lock_kind == lk_futex) &&
3308 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3309 OMP_NEST_LOCK_T_SIZE)) {
3310 lck = (kmp_user_lock_p)user_lock;
3311 }
3312 #endif
3313 else {
3314 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3315 }
3316
3317 #if USE_ITT_BUILD
3318 __kmp_itt_lock_acquiring(lck);
3319 #endif /* USE_ITT_BUILD */
3320
3321 #if OMPT_SUPPORT && OMPT_OPTIONAL
3322 // This is the case, if called from omp_init_lock_with_hint:
3323 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3324 if (!codeptr)
3325 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3326 if (ompt_enabled.enabled) &&
3327 ompt_enabled.ompt_callback_mutex_acquire) {
3328 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3329 ompt_mutex_nest_lock, omp_lock_hint_none,
3330 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3331 codeptr);
3332 }
3333 #endif
3334
3335 rc = TEST_NESTED_LOCK(lck, gtid);
3336 #if USE_ITT_BUILD
3337 if (rc) {
3338 __kmp_itt_lock_acquired(lck);
3339 } else {
3340 __kmp_itt_lock_cancelled(lck);
3341 }
3342 #endif /* USE_ITT_BUILD */
3343 #if OMPT_SUPPORT && OMPT_OPTIONAL
3344 if (ompt_enabled.enabled && rc) {
3345 if (rc == 1) {
3346 if (ompt_enabled.ompt_callback_mutex_acquired) {
3347 // lock_first
3348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3349 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3350 }
3351 } else {
3352 if (ompt_enabled.ompt_callback_nest_lock) {
3353 // lock_next
3354 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3355 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3356 }
3357 }
3358 }
3359 #endif
3360 return rc;
3361
3362 /* Can't use serial interval since not block structured */
3363
3364 #endif // KMP_USE_DYNAMIC_LOCK
3365 }
3366
3367 // Interface to fast scalable reduce methods routines
3368
3369 // keep the selected method in a thread local structure for cross-function
3370 // usage: will be used in __kmpc_end_reduce* functions;
3371 // another solution: to re-determine the method one more time in
3372 // __kmpc_end_reduce* functions (new prototype required then)
3373 // AT: which solution is better?
3374 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3375 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3376
3377 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3378 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3379
3380 // description of the packed_reduction_method variable: look at the macros in
3381 // kmp.h
3382
3383 // used in a critical section reduce block
3384 static __forceinline void
__kmp_enter_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3385 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3386 kmp_critical_name *crit) {
3387
3388 // this lock was visible to a customer and to the threading profile tool as a
3389 // serial overhead span (although it's used for an internal purpose only)
3390 // why was it visible in previous implementation?
3391 // should we keep it visible in new reduce block?
3392 kmp_user_lock_p lck;
3393
3394 #if KMP_USE_DYNAMIC_LOCK
3395
3396 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3397 // Check if it is initialized.
3398 if (*lk == 0) {
3399 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3400 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3401 KMP_GET_D_TAG(__kmp_user_lock_seq));
3402 } else {
3403 __kmp_init_indirect_csptr(crit, loc, global_tid,
3404 KMP_GET_I_TAG(__kmp_user_lock_seq));
3405 }
3406 }
3407 // Branch for accessing the actual lock object and set operation. This
3408 // branching is inevitable since this lock initialization does not follow the
3409 // normal dispatch path (lock table is not used).
3410 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3411 lck = (kmp_user_lock_p)lk;
3412 KMP_DEBUG_ASSERT(lck != NULL);
3413 if (__kmp_env_consistency_check) {
3414 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3415 }
3416 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3417 } else {
3418 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3419 lck = ilk->lock;
3420 KMP_DEBUG_ASSERT(lck != NULL);
3421 if (__kmp_env_consistency_check) {
3422 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3423 }
3424 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3425 }
3426
3427 #else // KMP_USE_DYNAMIC_LOCK
3428
3429 // We know that the fast reduction code is only emitted by Intel compilers
3430 // with 32 byte critical sections. If there isn't enough space, then we
3431 // have to use a pointer.
3432 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3433 lck = (kmp_user_lock_p)crit;
3434 } else {
3435 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3436 }
3437 KMP_DEBUG_ASSERT(lck != NULL);
3438
3439 if (__kmp_env_consistency_check)
3440 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3441
3442 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3443
3444 #endif // KMP_USE_DYNAMIC_LOCK
3445 }
3446
3447 // used in a critical section reduce block
3448 static __forceinline void
__kmp_end_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3449 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3450 kmp_critical_name *crit) {
3451
3452 kmp_user_lock_p lck;
3453
3454 #if KMP_USE_DYNAMIC_LOCK
3455
3456 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3457 lck = (kmp_user_lock_p)crit;
3458 if (__kmp_env_consistency_check)
3459 __kmp_pop_sync(global_tid, ct_critical, loc);
3460 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3461 } else {
3462 kmp_indirect_lock_t *ilk =
3463 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3464 if (__kmp_env_consistency_check)
3465 __kmp_pop_sync(global_tid, ct_critical, loc);
3466 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3467 }
3468
3469 #else // KMP_USE_DYNAMIC_LOCK
3470
3471 // We know that the fast reduction code is only emitted by Intel compilers
3472 // with 32 byte critical sections. If there isn't enough space, then we have
3473 // to use a pointer.
3474 if (__kmp_base_user_lock_size > 32) {
3475 lck = *((kmp_user_lock_p *)crit);
3476 KMP_ASSERT(lck != NULL);
3477 } else {
3478 lck = (kmp_user_lock_p)crit;
3479 }
3480
3481 if (__kmp_env_consistency_check)
3482 __kmp_pop_sync(global_tid, ct_critical, loc);
3483
3484 __kmp_release_user_lock_with_checks(lck, global_tid);
3485
3486 #endif // KMP_USE_DYNAMIC_LOCK
3487 } // __kmp_end_critical_section_reduce_block
3488
3489 static __forceinline int
__kmp_swap_teams_for_teams_reduction(kmp_info_t * th,kmp_team_t ** team_p,int * task_state)3490 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3491 int *task_state) {
3492 kmp_team_t *team;
3493
3494 // Check if we are inside the teams construct?
3495 if (th->th.th_teams_microtask) {
3496 *team_p = team = th->th.th_team;
3497 if (team->t.t_level == th->th.th_teams_level) {
3498 // This is reduction at teams construct.
3499 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3500 // Let's swap teams temporarily for the reduction.
3501 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3502 th->th.th_team = team->t.t_parent;
3503 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3504 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3505 *task_state = th->th.th_task_state;
3506 th->th.th_task_state = 0;
3507
3508 return 1;
3509 }
3510 }
3511 return 0;
3512 }
3513
3514 static __forceinline void
__kmp_restore_swapped_teams(kmp_info_t * th,kmp_team_t * team,int task_state)3515 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3516 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3517 th->th.th_info.ds.ds_tid = 0;
3518 th->th.th_team = team;
3519 th->th.th_team_nproc = team->t.t_nproc;
3520 th->th.th_task_team = team->t.t_task_team[task_state];
3521 __kmp_type_convert(task_state, &(th->th.th_task_state));
3522 }
3523
3524 /* 2.a.i. Reduce Block without a terminating barrier */
3525 /*!
3526 @ingroup SYNCHRONIZATION
3527 @param loc source location information
3528 @param global_tid global thread number
3529 @param num_vars number of items (variables) to be reduced
3530 @param reduce_size size of data in bytes to be reduced
3531 @param reduce_data pointer to data to be reduced
3532 @param reduce_func callback function providing reduction operation on two
3533 operands and returning result of reduction in lhs_data
3534 @param lck pointer to the unique lock data structure
3535 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3536 threads if atomic reduction needed
3537
3538 The nowait version is used for a reduce clause with the nowait argument.
3539 */
3540 kmp_int32
__kmpc_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3541 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3542 size_t reduce_size, void *reduce_data,
3543 void (*reduce_func)(void *lhs_data, void *rhs_data),
3544 kmp_critical_name *lck) {
3545
3546 KMP_COUNT_BLOCK(REDUCE_nowait);
3547 int retval = 0;
3548 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3549 kmp_info_t *th;
3550 kmp_team_t *team;
3551 int teams_swapped = 0, task_state;
3552 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3553 __kmp_assert_valid_gtid(global_tid);
3554
3555 // why do we need this initialization here at all?
3556 // Reduction clause can not be used as a stand-alone directive.
3557
3558 // do not call __kmp_serial_initialize(), it will be called by
3559 // __kmp_parallel_initialize() if needed
3560 // possible detection of false-positive race by the threadchecker ???
3561 if (!TCR_4(__kmp_init_parallel))
3562 __kmp_parallel_initialize();
3563
3564 __kmp_resume_if_soft_paused();
3565
3566 // check correctness of reduce block nesting
3567 #if KMP_USE_DYNAMIC_LOCK
3568 if (__kmp_env_consistency_check)
3569 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3570 #else
3571 if (__kmp_env_consistency_check)
3572 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3573 #endif
3574
3575 th = __kmp_thread_from_gtid(global_tid);
3576 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3577
3578 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3579 // the value should be kept in a variable
3580 // the variable should be either a construct-specific or thread-specific
3581 // property, not a team specific property
3582 // (a thread can reach the next reduce block on the next construct, reduce
3583 // method may differ on the next construct)
3584 // an ident_t "loc" parameter could be used as a construct-specific property
3585 // (what if loc == 0?)
3586 // (if both construct-specific and team-specific variables were shared,
3587 // then unness extra syncs should be needed)
3588 // a thread-specific variable is better regarding two issues above (next
3589 // construct and extra syncs)
3590 // a thread-specific "th_local.reduction_method" variable is used currently
3591 // each thread executes 'determine' and 'set' lines (no need to execute by one
3592 // thread, to avoid unness extra syncs)
3593
3594 packed_reduction_method = __kmp_determine_reduction_method(
3595 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3596 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3597
3598 OMPT_REDUCTION_DECL(th, global_tid);
3599 if (packed_reduction_method == critical_reduce_block) {
3600
3601 OMPT_REDUCTION_BEGIN;
3602
3603 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3604 retval = 1;
3605
3606 } else if (packed_reduction_method == empty_reduce_block) {
3607
3608 OMPT_REDUCTION_BEGIN;
3609
3610 // usage: if team size == 1, no synchronization is required ( Intel
3611 // platforms only )
3612 retval = 1;
3613
3614 } else if (packed_reduction_method == atomic_reduce_block) {
3615
3616 retval = 2;
3617
3618 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3619 // won't be called by the code gen)
3620 // (it's not quite good, because the checking block has been closed by
3621 // this 'pop',
3622 // but atomic operation has not been executed yet, will be executed
3623 // slightly later, literally on next instruction)
3624 if (__kmp_env_consistency_check)
3625 __kmp_pop_sync(global_tid, ct_reduce, loc);
3626
3627 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3628 tree_reduce_block)) {
3629
3630 // AT: performance issue: a real barrier here
3631 // AT: (if primary thread is slow, other threads are blocked here waiting for
3632 // the primary thread to come and release them)
3633 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3634 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3635 // be confusing to a customer)
3636 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3637 // might go faster and be more in line with sense of NOWAIT
3638 // AT: TO DO: do epcc test and compare times
3639
3640 // this barrier should be invisible to a customer and to the threading profile
3641 // tool (it's neither a terminating barrier nor customer's code, it's
3642 // used for an internal purpose)
3643 #if OMPT_SUPPORT
3644 // JP: can this barrier potentially leed to task scheduling?
3645 // JP: as long as there is a barrier in the implementation, OMPT should and
3646 // will provide the barrier events
3647 // so we set-up the necessary frame/return addresses.
3648 ompt_frame_t *ompt_frame;
3649 if (ompt_enabled.enabled) {
3650 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3651 if (ompt_frame->enter_frame.ptr == NULL)
3652 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3653 }
3654 OMPT_STORE_RETURN_ADDRESS(global_tid);
3655 #endif
3656 #if USE_ITT_NOTIFY
3657 __kmp_threads[global_tid]->th.th_ident = loc;
3658 #endif
3659 retval =
3660 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3661 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3662 retval = (retval != 0) ? (0) : (1);
3663 #if OMPT_SUPPORT && OMPT_OPTIONAL
3664 if (ompt_enabled.enabled) {
3665 ompt_frame->enter_frame = ompt_data_none;
3666 }
3667 #endif
3668
3669 // all other workers except primary thread should do this pop here
3670 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3671 if (__kmp_env_consistency_check) {
3672 if (retval == 0) {
3673 __kmp_pop_sync(global_tid, ct_reduce, loc);
3674 }
3675 }
3676
3677 } else {
3678
3679 // should never reach this block
3680 KMP_ASSERT(0); // "unexpected method"
3681 }
3682 if (teams_swapped) {
3683 __kmp_restore_swapped_teams(th, team, task_state);
3684 }
3685 KA_TRACE(
3686 10,
3687 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3688 global_tid, packed_reduction_method, retval));
3689
3690 return retval;
3691 }
3692
3693 /*!
3694 @ingroup SYNCHRONIZATION
3695 @param loc source location information
3696 @param global_tid global thread id.
3697 @param lck pointer to the unique lock data structure
3698
3699 Finish the execution of a reduce nowait.
3700 */
__kmpc_end_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3701 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3702 kmp_critical_name *lck) {
3703
3704 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3705
3706 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3707 __kmp_assert_valid_gtid(global_tid);
3708
3709 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3710
3711 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3712
3713 if (packed_reduction_method == critical_reduce_block) {
3714
3715 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3716 OMPT_REDUCTION_END;
3717
3718 } else if (packed_reduction_method == empty_reduce_block) {
3719
3720 // usage: if team size == 1, no synchronization is required ( on Intel
3721 // platforms only )
3722
3723 OMPT_REDUCTION_END;
3724
3725 } else if (packed_reduction_method == atomic_reduce_block) {
3726
3727 // neither primary thread nor other workers should get here
3728 // (code gen does not generate this call in case 2: atomic reduce block)
3729 // actually it's better to remove this elseif at all;
3730 // after removal this value will checked by the 'else' and will assert
3731
3732 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3733 tree_reduce_block)) {
3734
3735 // only primary thread gets here
3736 // OMPT: tree reduction is annotated in the barrier code
3737
3738 } else {
3739
3740 // should never reach this block
3741 KMP_ASSERT(0); // "unexpected method"
3742 }
3743
3744 if (__kmp_env_consistency_check)
3745 __kmp_pop_sync(global_tid, ct_reduce, loc);
3746
3747 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3748 global_tid, packed_reduction_method));
3749
3750 return;
3751 }
3752
3753 /* 2.a.ii. Reduce Block with a terminating barrier */
3754
3755 /*!
3756 @ingroup SYNCHRONIZATION
3757 @param loc source location information
3758 @param global_tid global thread number
3759 @param num_vars number of items (variables) to be reduced
3760 @param reduce_size size of data in bytes to be reduced
3761 @param reduce_data pointer to data to be reduced
3762 @param reduce_func callback function providing reduction operation on two
3763 operands and returning result of reduction in lhs_data
3764 @param lck pointer to the unique lock data structure
3765 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3766 threads if atomic reduction needed
3767
3768 A blocking reduce that includes an implicit barrier.
3769 */
__kmpc_reduce(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3770 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3771 size_t reduce_size, void *reduce_data,
3772 void (*reduce_func)(void *lhs_data, void *rhs_data),
3773 kmp_critical_name *lck) {
3774 KMP_COUNT_BLOCK(REDUCE_wait);
3775 int retval = 0;
3776 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3777 kmp_info_t *th;
3778 kmp_team_t *team;
3779 int teams_swapped = 0, task_state;
3780
3781 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3782 __kmp_assert_valid_gtid(global_tid);
3783
3784 // why do we need this initialization here at all?
3785 // Reduction clause can not be a stand-alone directive.
3786
3787 // do not call __kmp_serial_initialize(), it will be called by
3788 // __kmp_parallel_initialize() if needed
3789 // possible detection of false-positive race by the threadchecker ???
3790 if (!TCR_4(__kmp_init_parallel))
3791 __kmp_parallel_initialize();
3792
3793 __kmp_resume_if_soft_paused();
3794
3795 // check correctness of reduce block nesting
3796 #if KMP_USE_DYNAMIC_LOCK
3797 if (__kmp_env_consistency_check)
3798 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3799 #else
3800 if (__kmp_env_consistency_check)
3801 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3802 #endif
3803
3804 th = __kmp_thread_from_gtid(global_tid);
3805 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3806
3807 packed_reduction_method = __kmp_determine_reduction_method(
3808 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3809 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3810
3811 OMPT_REDUCTION_DECL(th, global_tid);
3812
3813 if (packed_reduction_method == critical_reduce_block) {
3814
3815 OMPT_REDUCTION_BEGIN;
3816 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3817 retval = 1;
3818
3819 } else if (packed_reduction_method == empty_reduce_block) {
3820
3821 OMPT_REDUCTION_BEGIN;
3822 // usage: if team size == 1, no synchronization is required ( Intel
3823 // platforms only )
3824 retval = 1;
3825
3826 } else if (packed_reduction_method == atomic_reduce_block) {
3827
3828 retval = 2;
3829
3830 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3831 tree_reduce_block)) {
3832
3833 // case tree_reduce_block:
3834 // this barrier should be visible to a customer and to the threading profile
3835 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3836 #if OMPT_SUPPORT
3837 ompt_frame_t *ompt_frame;
3838 if (ompt_enabled.enabled) {
3839 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3840 if (ompt_frame->enter_frame.ptr == NULL)
3841 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3842 }
3843 OMPT_STORE_RETURN_ADDRESS(global_tid);
3844 #endif
3845 #if USE_ITT_NOTIFY
3846 __kmp_threads[global_tid]->th.th_ident =
3847 loc; // needed for correct notification of frames
3848 #endif
3849 retval =
3850 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3851 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3852 retval = (retval != 0) ? (0) : (1);
3853 #if OMPT_SUPPORT && OMPT_OPTIONAL
3854 if (ompt_enabled.enabled) {
3855 ompt_frame->enter_frame = ompt_data_none;
3856 }
3857 #endif
3858
3859 // all other workers except primary thread should do this pop here
3860 // (none of other workers except primary will enter __kmpc_end_reduce())
3861 if (__kmp_env_consistency_check) {
3862 if (retval == 0) { // 0: all other workers; 1: primary thread
3863 __kmp_pop_sync(global_tid, ct_reduce, loc);
3864 }
3865 }
3866
3867 } else {
3868
3869 // should never reach this block
3870 KMP_ASSERT(0); // "unexpected method"
3871 }
3872 if (teams_swapped) {
3873 __kmp_restore_swapped_teams(th, team, task_state);
3874 }
3875
3876 KA_TRACE(10,
3877 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3878 global_tid, packed_reduction_method, retval));
3879 return retval;
3880 }
3881
3882 /*!
3883 @ingroup SYNCHRONIZATION
3884 @param loc source location information
3885 @param global_tid global thread id.
3886 @param lck pointer to the unique lock data structure
3887
3888 Finish the execution of a blocking reduce.
3889 The <tt>lck</tt> pointer must be the same as that used in the corresponding
3890 start function.
3891 */
__kmpc_end_reduce(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3892 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3893 kmp_critical_name *lck) {
3894
3895 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3896 kmp_info_t *th;
3897 kmp_team_t *team;
3898 int teams_swapped = 0, task_state;
3899
3900 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3901 __kmp_assert_valid_gtid(global_tid);
3902
3903 th = __kmp_thread_from_gtid(global_tid);
3904 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3905
3906 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3907
3908 // this barrier should be visible to a customer and to the threading profile
3909 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3910 OMPT_REDUCTION_DECL(th, global_tid);
3911
3912 if (packed_reduction_method == critical_reduce_block) {
3913 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3914
3915 OMPT_REDUCTION_END;
3916
3917 // TODO: implicit barrier: should be exposed
3918 #if OMPT_SUPPORT
3919 ompt_frame_t *ompt_frame;
3920 if (ompt_enabled.enabled) {
3921 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3922 if (ompt_frame->enter_frame.ptr == NULL)
3923 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3924 }
3925 OMPT_STORE_RETURN_ADDRESS(global_tid);
3926 #endif
3927 #if USE_ITT_NOTIFY
3928 __kmp_threads[global_tid]->th.th_ident = loc;
3929 #endif
3930 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3931 #if OMPT_SUPPORT && OMPT_OPTIONAL
3932 if (ompt_enabled.enabled) {
3933 ompt_frame->enter_frame = ompt_data_none;
3934 }
3935 #endif
3936
3937 } else if (packed_reduction_method == empty_reduce_block) {
3938
3939 OMPT_REDUCTION_END;
3940
3941 // usage: if team size==1, no synchronization is required (Intel platforms only)
3942
3943 // TODO: implicit barrier: should be exposed
3944 #if OMPT_SUPPORT
3945 ompt_frame_t *ompt_frame;
3946 if (ompt_enabled.enabled) {
3947 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3948 if (ompt_frame->enter_frame.ptr == NULL)
3949 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3950 }
3951 OMPT_STORE_RETURN_ADDRESS(global_tid);
3952 #endif
3953 #if USE_ITT_NOTIFY
3954 __kmp_threads[global_tid]->th.th_ident = loc;
3955 #endif
3956 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3957 #if OMPT_SUPPORT && OMPT_OPTIONAL
3958 if (ompt_enabled.enabled) {
3959 ompt_frame->enter_frame = ompt_data_none;
3960 }
3961 #endif
3962
3963 } else if (packed_reduction_method == atomic_reduce_block) {
3964
3965 #if OMPT_SUPPORT
3966 ompt_frame_t *ompt_frame;
3967 if (ompt_enabled.enabled) {
3968 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3969 if (ompt_frame->enter_frame.ptr == NULL)
3970 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3971 }
3972 OMPT_STORE_RETURN_ADDRESS(global_tid);
3973 #endif
3974 // TODO: implicit barrier: should be exposed
3975 #if USE_ITT_NOTIFY
3976 __kmp_threads[global_tid]->th.th_ident = loc;
3977 #endif
3978 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3979 #if OMPT_SUPPORT && OMPT_OPTIONAL
3980 if (ompt_enabled.enabled) {
3981 ompt_frame->enter_frame = ompt_data_none;
3982 }
3983 #endif
3984
3985 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3986 tree_reduce_block)) {
3987
3988 // only primary thread executes here (primary releases all other workers)
3989 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3990 global_tid);
3991
3992 } else {
3993
3994 // should never reach this block
3995 KMP_ASSERT(0); // "unexpected method"
3996 }
3997 if (teams_swapped) {
3998 __kmp_restore_swapped_teams(th, team, task_state);
3999 }
4000
4001 if (__kmp_env_consistency_check)
4002 __kmp_pop_sync(global_tid, ct_reduce, loc);
4003
4004 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4005 global_tid, packed_reduction_method));
4006
4007 return;
4008 }
4009
4010 #undef __KMP_GET_REDUCTION_METHOD
4011 #undef __KMP_SET_REDUCTION_METHOD
4012
4013 /* end of interface to fast scalable reduce routines */
4014
__kmpc_get_taskid()4015 kmp_uint64 __kmpc_get_taskid() {
4016
4017 kmp_int32 gtid;
4018 kmp_info_t *thread;
4019
4020 gtid = __kmp_get_gtid();
4021 if (gtid < 0) {
4022 return 0;
4023 }
4024 thread = __kmp_thread_from_gtid(gtid);
4025 return thread->th.th_current_task->td_task_id;
4026
4027 } // __kmpc_get_taskid
4028
__kmpc_get_parent_taskid()4029 kmp_uint64 __kmpc_get_parent_taskid() {
4030
4031 kmp_int32 gtid;
4032 kmp_info_t *thread;
4033 kmp_taskdata_t *parent_task;
4034
4035 gtid = __kmp_get_gtid();
4036 if (gtid < 0) {
4037 return 0;
4038 }
4039 thread = __kmp_thread_from_gtid(gtid);
4040 parent_task = thread->th.th_current_task->td_parent;
4041 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4042
4043 } // __kmpc_get_parent_taskid
4044
4045 /*!
4046 @ingroup WORK_SHARING
4047 @param loc source location information.
4048 @param gtid global thread number.
4049 @param num_dims number of associated doacross loops.
4050 @param dims info on loops bounds.
4051
4052 Initialize doacross loop information.
4053 Expect compiler send us inclusive bounds,
4054 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4055 */
__kmpc_doacross_init(ident_t * loc,int gtid,int num_dims,const struct kmp_dim * dims)4056 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4057 const struct kmp_dim *dims) {
4058 __kmp_assert_valid_gtid(gtid);
4059 int j, idx;
4060 kmp_int64 last, trace_count;
4061 kmp_info_t *th = __kmp_threads[gtid];
4062 kmp_team_t *team = th->th.th_team;
4063 kmp_uint32 *flags;
4064 kmp_disp_t *pr_buf = th->th.th_dispatch;
4065 dispatch_shared_info_t *sh_buf;
4066
4067 KA_TRACE(
4068 20,
4069 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4070 gtid, num_dims, !team->t.t_serialized));
4071 KMP_DEBUG_ASSERT(dims != NULL);
4072 KMP_DEBUG_ASSERT(num_dims > 0);
4073
4074 if (team->t.t_serialized) {
4075 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4076 return; // no dependencies if team is serialized
4077 }
4078 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4079 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4080 // the next loop
4081 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4082
4083 // Save bounds info into allocated private buffer
4084 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4085 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4086 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4087 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4088 pr_buf->th_doacross_info[0] =
4089 (kmp_int64)num_dims; // first element is number of dimensions
4090 // Save also address of num_done in order to access it later without knowing
4091 // the buffer index
4092 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4093 pr_buf->th_doacross_info[2] = dims[0].lo;
4094 pr_buf->th_doacross_info[3] = dims[0].up;
4095 pr_buf->th_doacross_info[4] = dims[0].st;
4096 last = 5;
4097 for (j = 1; j < num_dims; ++j) {
4098 kmp_int64
4099 range_length; // To keep ranges of all dimensions but the first dims[0]
4100 if (dims[j].st == 1) { // most common case
4101 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4102 range_length = dims[j].up - dims[j].lo + 1;
4103 } else {
4104 if (dims[j].st > 0) {
4105 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4106 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4107 } else { // negative increment
4108 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4109 range_length =
4110 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4111 }
4112 }
4113 pr_buf->th_doacross_info[last++] = range_length;
4114 pr_buf->th_doacross_info[last++] = dims[j].lo;
4115 pr_buf->th_doacross_info[last++] = dims[j].up;
4116 pr_buf->th_doacross_info[last++] = dims[j].st;
4117 }
4118
4119 // Compute total trip count.
4120 // Start with range of dims[0] which we don't need to keep in the buffer.
4121 if (dims[0].st == 1) { // most common case
4122 trace_count = dims[0].up - dims[0].lo + 1;
4123 } else if (dims[0].st > 0) {
4124 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4125 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4126 } else { // negative increment
4127 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4128 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4129 }
4130 for (j = 1; j < num_dims; ++j) {
4131 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4132 }
4133 KMP_DEBUG_ASSERT(trace_count > 0);
4134
4135 // Check if shared buffer is not occupied by other loop (idx -
4136 // __kmp_dispatch_num_buffers)
4137 if (idx != sh_buf->doacross_buf_idx) {
4138 // Shared buffer is occupied, wait for it to be free
4139 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4140 __kmp_eq_4, NULL);
4141 }
4142 #if KMP_32_BIT_ARCH
4143 // Check if we are the first thread. After the CAS the first thread gets 0,
4144 // others get 1 if initialization is in progress, allocated pointer otherwise.
4145 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4146 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4147 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4148 #else
4149 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4150 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4151 #endif
4152 if (flags == NULL) {
4153 // we are the first thread, allocate the array of flags
4154 size_t size =
4155 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4156 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4157 KMP_MB();
4158 sh_buf->doacross_flags = flags;
4159 } else if (flags == (kmp_uint32 *)1) {
4160 #if KMP_32_BIT_ARCH
4161 // initialization is still in progress, need to wait
4162 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4163 #else
4164 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4165 #endif
4166 KMP_YIELD(TRUE);
4167 KMP_MB();
4168 } else {
4169 KMP_MB();
4170 }
4171 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4172 pr_buf->th_doacross_flags =
4173 sh_buf->doacross_flags; // save private copy in order to not
4174 // touch shared buffer on each iteration
4175 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4176 }
4177
__kmpc_doacross_wait(ident_t * loc,int gtid,const kmp_int64 * vec)4178 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4179 __kmp_assert_valid_gtid(gtid);
4180 kmp_int64 shft;
4181 size_t num_dims, i;
4182 kmp_uint32 flag;
4183 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4184 kmp_info_t *th = __kmp_threads[gtid];
4185 kmp_team_t *team = th->th.th_team;
4186 kmp_disp_t *pr_buf;
4187 kmp_int64 lo, up, st;
4188
4189 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4190 if (team->t.t_serialized) {
4191 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4192 return; // no dependencies if team is serialized
4193 }
4194
4195 // calculate sequential iteration number and check out-of-bounds condition
4196 pr_buf = th->th.th_dispatch;
4197 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4198 num_dims = (size_t)pr_buf->th_doacross_info[0];
4199 lo = pr_buf->th_doacross_info[2];
4200 up = pr_buf->th_doacross_info[3];
4201 st = pr_buf->th_doacross_info[4];
4202 #if OMPT_SUPPORT && OMPT_OPTIONAL
4203 ompt_dependence_t deps[num_dims];
4204 #endif
4205 if (st == 1) { // most common case
4206 if (vec[0] < lo || vec[0] > up) {
4207 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4208 "bounds [%lld,%lld]\n",
4209 gtid, vec[0], lo, up));
4210 return;
4211 }
4212 iter_number = vec[0] - lo;
4213 } else if (st > 0) {
4214 if (vec[0] < lo || vec[0] > up) {
4215 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4216 "bounds [%lld,%lld]\n",
4217 gtid, vec[0], lo, up));
4218 return;
4219 }
4220 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4221 } else { // negative increment
4222 if (vec[0] > lo || vec[0] < up) {
4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4224 "bounds [%lld,%lld]\n",
4225 gtid, vec[0], lo, up));
4226 return;
4227 }
4228 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4229 }
4230 #if OMPT_SUPPORT && OMPT_OPTIONAL
4231 deps[0].variable.value = iter_number;
4232 deps[0].dependence_type = ompt_dependence_type_sink;
4233 #endif
4234 for (i = 1; i < num_dims; ++i) {
4235 kmp_int64 iter, ln;
4236 size_t j = i * 4;
4237 ln = pr_buf->th_doacross_info[j + 1];
4238 lo = pr_buf->th_doacross_info[j + 2];
4239 up = pr_buf->th_doacross_info[j + 3];
4240 st = pr_buf->th_doacross_info[j + 4];
4241 if (st == 1) {
4242 if (vec[i] < lo || vec[i] > up) {
4243 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4244 "bounds [%lld,%lld]\n",
4245 gtid, vec[i], lo, up));
4246 return;
4247 }
4248 iter = vec[i] - lo;
4249 } else if (st > 0) {
4250 if (vec[i] < lo || vec[i] > up) {
4251 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4252 "bounds [%lld,%lld]\n",
4253 gtid, vec[i], lo, up));
4254 return;
4255 }
4256 iter = (kmp_uint64)(vec[i] - lo) / st;
4257 } else { // st < 0
4258 if (vec[i] > lo || vec[i] < up) {
4259 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4260 "bounds [%lld,%lld]\n",
4261 gtid, vec[i], lo, up));
4262 return;
4263 }
4264 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4265 }
4266 iter_number = iter + ln * iter_number;
4267 #if OMPT_SUPPORT && OMPT_OPTIONAL
4268 deps[i].variable.value = iter;
4269 deps[i].dependence_type = ompt_dependence_type_sink;
4270 #endif
4271 }
4272 shft = iter_number % 32; // use 32-bit granularity
4273 iter_number >>= 5; // divided by 32
4274 flag = 1 << shft;
4275 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4276 KMP_YIELD(TRUE);
4277 }
4278 KMP_MB();
4279 #if OMPT_SUPPORT && OMPT_OPTIONAL
4280 if (ompt_enabled.ompt_callback_dependences) {
4281 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4282 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4283 }
4284 #endif
4285 KA_TRACE(20,
4286 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4287 gtid, (iter_number << 5) + shft));
4288 }
4289
__kmpc_doacross_post(ident_t * loc,int gtid,const kmp_int64 * vec)4290 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4291 __kmp_assert_valid_gtid(gtid);
4292 kmp_int64 shft;
4293 size_t num_dims, i;
4294 kmp_uint32 flag;
4295 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4296 kmp_info_t *th = __kmp_threads[gtid];
4297 kmp_team_t *team = th->th.th_team;
4298 kmp_disp_t *pr_buf;
4299 kmp_int64 lo, st;
4300
4301 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4302 if (team->t.t_serialized) {
4303 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4304 return; // no dependencies if team is serialized
4305 }
4306
4307 // calculate sequential iteration number (same as in "wait" but no
4308 // out-of-bounds checks)
4309 pr_buf = th->th.th_dispatch;
4310 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4311 num_dims = (size_t)pr_buf->th_doacross_info[0];
4312 lo = pr_buf->th_doacross_info[2];
4313 st = pr_buf->th_doacross_info[4];
4314 #if OMPT_SUPPORT && OMPT_OPTIONAL
4315 ompt_dependence_t deps[num_dims];
4316 #endif
4317 if (st == 1) { // most common case
4318 iter_number = vec[0] - lo;
4319 } else if (st > 0) {
4320 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4321 } else { // negative increment
4322 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4323 }
4324 #if OMPT_SUPPORT && OMPT_OPTIONAL
4325 deps[0].variable.value = iter_number;
4326 deps[0].dependence_type = ompt_dependence_type_source;
4327 #endif
4328 for (i = 1; i < num_dims; ++i) {
4329 kmp_int64 iter, ln;
4330 size_t j = i * 4;
4331 ln = pr_buf->th_doacross_info[j + 1];
4332 lo = pr_buf->th_doacross_info[j + 2];
4333 st = pr_buf->th_doacross_info[j + 4];
4334 if (st == 1) {
4335 iter = vec[i] - lo;
4336 } else if (st > 0) {
4337 iter = (kmp_uint64)(vec[i] - lo) / st;
4338 } else { // st < 0
4339 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4340 }
4341 iter_number = iter + ln * iter_number;
4342 #if OMPT_SUPPORT && OMPT_OPTIONAL
4343 deps[i].variable.value = iter;
4344 deps[i].dependence_type = ompt_dependence_type_source;
4345 #endif
4346 }
4347 #if OMPT_SUPPORT && OMPT_OPTIONAL
4348 if (ompt_enabled.ompt_callback_dependences) {
4349 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4350 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4351 }
4352 #endif
4353 shft = iter_number % 32; // use 32-bit granularity
4354 iter_number >>= 5; // divided by 32
4355 flag = 1 << shft;
4356 KMP_MB();
4357 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4358 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4359 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4360 (iter_number << 5) + shft));
4361 }
4362
__kmpc_doacross_fini(ident_t * loc,int gtid)4363 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4364 __kmp_assert_valid_gtid(gtid);
4365 kmp_int32 num_done;
4366 kmp_info_t *th = __kmp_threads[gtid];
4367 kmp_team_t *team = th->th.th_team;
4368 kmp_disp_t *pr_buf = th->th.th_dispatch;
4369
4370 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4371 if (team->t.t_serialized) {
4372 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4373 return; // nothing to do
4374 }
4375 num_done =
4376 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4377 if (num_done == th->th.th_team_nproc) {
4378 // we are the last thread, need to free shared resources
4379 int idx = pr_buf->th_doacross_buf_idx - 1;
4380 dispatch_shared_info_t *sh_buf =
4381 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4382 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4383 (kmp_int64)&sh_buf->doacross_num_done);
4384 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4385 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4386 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4387 sh_buf->doacross_flags = NULL;
4388 sh_buf->doacross_num_done = 0;
4389 sh_buf->doacross_buf_idx +=
4390 __kmp_dispatch_num_buffers; // free buffer for future re-use
4391 }
4392 // free private resources (need to keep buffer index forever)
4393 pr_buf->th_doacross_flags = NULL;
4394 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4395 pr_buf->th_doacross_info = NULL;
4396 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4397 }
4398
4399 /* OpenMP 5.1 Memory Management routines */
omp_alloc(size_t size,omp_allocator_handle_t allocator)4400 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4401 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4402 }
4403
omp_aligned_alloc(size_t align,size_t size,omp_allocator_handle_t allocator)4404 void *omp_aligned_alloc(size_t align, size_t size,
4405 omp_allocator_handle_t allocator) {
4406 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4407 }
4408
omp_calloc(size_t nmemb,size_t size,omp_allocator_handle_t allocator)4409 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4410 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4411 }
4412
omp_aligned_calloc(size_t align,size_t nmemb,size_t size,omp_allocator_handle_t allocator)4413 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4414 omp_allocator_handle_t allocator) {
4415 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4416 }
4417
omp_realloc(void * ptr,size_t size,omp_allocator_handle_t allocator,omp_allocator_handle_t free_allocator)4418 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4419 omp_allocator_handle_t free_allocator) {
4420 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4421 free_allocator);
4422 }
4423
omp_free(void * ptr,omp_allocator_handle_t allocator)4424 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4425 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4426 }
4427 /* end of OpenMP 5.1 Memory Management routines */
4428
__kmpc_get_target_offload(void)4429 int __kmpc_get_target_offload(void) {
4430 if (!__kmp_init_serial) {
4431 __kmp_serial_initialize();
4432 }
4433 return __kmp_target_offload;
4434 }
4435
__kmpc_pause_resource(kmp_pause_status_t level)4436 int __kmpc_pause_resource(kmp_pause_status_t level) {
4437 if (!__kmp_init_serial) {
4438 return 1; // Can't pause if runtime is not initialized
4439 }
4440 return __kmp_pause_resource(level);
4441 }
4442
__kmpc_error(ident_t * loc,int severity,const char * message)4443 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4444 if (!__kmp_init_serial)
4445 __kmp_serial_initialize();
4446
4447 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4448
4449 #if OMPT_SUPPORT
4450 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4451 ompt_callbacks.ompt_callback(ompt_callback_error)(
4452 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4453 OMPT_GET_RETURN_ADDRESS(0));
4454 }
4455 #endif // OMPT_SUPPORT
4456
4457 char *src_loc;
4458 if (loc && loc->psource) {
4459 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4460 src_loc =
4461 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4462 __kmp_str_loc_free(&str_loc);
4463 } else {
4464 src_loc = __kmp_str_format("unknown");
4465 }
4466
4467 if (severity == severity_warning)
4468 KMP_WARNING(UserDirectedWarning, src_loc, message);
4469 else
4470 KMP_FATAL(UserDirectedError, src_loc, message);
4471
4472 __kmp_str_free(&src_loc);
4473 }
4474
4475 // Mark begin of scope directive.
__kmpc_scope(ident_t * loc,kmp_int32 gtid,void * reserved)4476 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4477 // reserved is for extension of scope directive and not used.
4478 #if OMPT_SUPPORT && OMPT_OPTIONAL
4479 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4480 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4481 int tid = __kmp_tid_from_gtid(gtid);
4482 ompt_callbacks.ompt_callback(ompt_callback_work)(
4483 ompt_work_scope, ompt_scope_begin,
4484 &(team->t.ompt_team_info.parallel_data),
4485 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4486 OMPT_GET_RETURN_ADDRESS(0));
4487 }
4488 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4489 }
4490
4491 // Mark end of scope directive
__kmpc_end_scope(ident_t * loc,kmp_int32 gtid,void * reserved)4492 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4493 // reserved is for extension of scope directive and not used.
4494 #if OMPT_SUPPORT && OMPT_OPTIONAL
4495 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4496 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4497 int tid = __kmp_tid_from_gtid(gtid);
4498 ompt_callbacks.ompt_callback(ompt_callback_work)(
4499 ompt_work_scope, ompt_scope_end,
4500 &(team->t.ompt_team_info.parallel_data),
4501 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4502 OMPT_GET_RETURN_ADDRESS(0));
4503 }
4504 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4505 }
4506
4507 #ifdef KMP_USE_VERSION_SYMBOLS
4508 // For GOMP compatibility there are two versions of each omp_* API.
4509 // One is the plain C symbol and one is the Fortran symbol with an appended
4510 // underscore. When we implement a specific ompc_* version of an omp_*
4511 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4512 // instead of the Fortran versions in kmp_ftn_entry.h
4513 extern "C" {
4514 // Have to undef these from omp.h so they aren't translated into
4515 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4516 #ifdef omp_set_affinity_format
4517 #undef omp_set_affinity_format
4518 #endif
4519 #ifdef omp_get_affinity_format
4520 #undef omp_get_affinity_format
4521 #endif
4522 #ifdef omp_display_affinity
4523 #undef omp_display_affinity
4524 #endif
4525 #ifdef omp_capture_affinity
4526 #undef omp_capture_affinity
4527 #endif
4528 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4529 "OMP_5.0");
4530 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4531 "OMP_5.0");
4532 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4533 "OMP_5.0");
4534 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4535 "OMP_5.0");
4536 } // extern "C"
4537 #endif
4538