15e8470afSJim Cownie /*
25e8470afSJim Cownie * kmp_lock.cpp -- lock-related functions
35e8470afSJim Cownie */
45e8470afSJim Cownie
55e8470afSJim Cownie //===----------------------------------------------------------------------===//
65e8470afSJim Cownie //
757b08b09SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
857b08b09SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
957b08b09SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
105e8470afSJim Cownie //
115e8470afSJim Cownie //===----------------------------------------------------------------------===//
125e8470afSJim Cownie
135e8470afSJim Cownie #include <stddef.h>
14f7cc6affSPaul Osmialowski #include <atomic>
155e8470afSJim Cownie
165e8470afSJim Cownie #include "kmp.h"
175e8470afSJim Cownie #include "kmp_i18n.h"
185e8470afSJim Cownie #include "kmp_io.h"
193041982dSJonathan Peyton #include "kmp_itt.h"
203041982dSJonathan Peyton #include "kmp_lock.h"
2137e2ef54SJonathan Peyton #include "kmp_wait_release.h"
228692e142SJonathan Peyton #include "kmp_wrapper_getpid.h"
235e8470afSJim Cownie
24fb043fdfSPaul Osmialowski #if KMP_USE_FUTEX
255e8470afSJim Cownie #include <sys/syscall.h>
263041982dSJonathan Peyton #include <unistd.h>
273041982dSJonathan Peyton // We should really include <futex.h>, but that causes compatibility problems on
283041982dSJonathan Peyton // different Linux* OS distributions that either require that you include (or
293041982dSJonathan Peyton // break when you try to include) <pci/types.h>. Since all we need is the two
303041982dSJonathan Peyton // macros below (which are part of the kernel ABI, so can't change) we just
313041982dSJonathan Peyton // define the constants here and don't include <futex.h>
325e8470afSJim Cownie #ifndef FUTEX_WAIT
335e8470afSJim Cownie #define FUTEX_WAIT 0
345e8470afSJim Cownie #endif
355e8470afSJim Cownie #ifndef FUTEX_WAKE
365e8470afSJim Cownie #define FUTEX_WAKE 1
375e8470afSJim Cownie #endif
385e8470afSJim Cownie #endif
395e8470afSJim Cownie
405e8470afSJim Cownie /* Implement spin locks for internal library use. */
415e8470afSJim Cownie /* The algorithm implemented is Lamport's bakery lock [1974]. */
425e8470afSJim Cownie
__kmp_validate_locks(void)433041982dSJonathan Peyton void __kmp_validate_locks(void) {
445e8470afSJim Cownie int i;
455e8470afSJim Cownie kmp_uint32 x, y;
465e8470afSJim Cownie
475e8470afSJim Cownie /* Check to make sure unsigned arithmetic does wraps properly */
485e8470afSJim Cownie x = ~((kmp_uint32)0) - 2;
495e8470afSJim Cownie y = x - 2;
505e8470afSJim Cownie
515e8470afSJim Cownie for (i = 0; i < 8; ++i, ++x, ++y) {
525e8470afSJim Cownie kmp_uint32 z = (x - y);
535e8470afSJim Cownie KMP_ASSERT(z == 2);
545e8470afSJim Cownie }
555e8470afSJim Cownie
565e8470afSJim Cownie KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
575e8470afSJim Cownie }
585e8470afSJim Cownie
595e8470afSJim Cownie /* ------------------------------------------------------------------------ */
605e8470afSJim Cownie /* test and set locks */
615e8470afSJim Cownie
625e8470afSJim Cownie // For the non-nested locks, we can only assume that the first 4 bytes were
635e8470afSJim Cownie // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
645e8470afSJim Cownie // compiler only allocates a 4 byte pointer on IA-32 architecture. On
655e8470afSJim Cownie // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
665e8470afSJim Cownie //
675e8470afSJim Cownie // gcc reserves >= 8 bytes for nested locks, so we can assume that the
685e8470afSJim Cownie // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
695e8470afSJim Cownie
__kmp_get_tas_lock_owner(kmp_tas_lock_t * lck)703041982dSJonathan Peyton static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
7137e2ef54SJonathan Peyton return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
725e8470afSJim Cownie }
735e8470afSJim Cownie
__kmp_is_tas_lock_nestable(kmp_tas_lock_t * lck)743041982dSJonathan Peyton static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
755e8470afSJim Cownie return lck->lk.depth_locked != -1;
765e8470afSJim Cownie }
775e8470afSJim Cownie
780e6d4577SJonathan Peyton __forceinline static int
__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t * lck,kmp_int32 gtid)793041982dSJonathan Peyton __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
805e8470afSJim Cownie KMP_MB();
815e8470afSJim Cownie
825e8470afSJim Cownie #ifdef USE_LOCK_PROFILE
8337e2ef54SJonathan Peyton kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
845e8470afSJim Cownie if ((curr != 0) && (curr != gtid + 1))
855e8470afSJim Cownie __kmp_printf("LOCK CONTENTION: %p\n", lck);
865e8470afSJim Cownie /* else __kmp_printf( "." );*/
875e8470afSJim Cownie #endif /* USE_LOCK_PROFILE */
885e8470afSJim Cownie
8937e2ef54SJonathan Peyton kmp_int32 tas_free = KMP_LOCK_FREE(tas);
9037e2ef54SJonathan Peyton kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
9137e2ef54SJonathan Peyton
9237e2ef54SJonathan Peyton if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
9337e2ef54SJonathan Peyton __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
945e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
950e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
965e8470afSJim Cownie }
975e8470afSJim Cownie
985e8470afSJim Cownie kmp_uint32 spins;
992e02579aSTerry Wilmarth kmp_uint64 time;
1005e8470afSJim Cownie KMP_FSYNC_PREPARE(lck);
1015e8470afSJim Cownie KMP_INIT_YIELD(spins);
1022e02579aSTerry Wilmarth KMP_INIT_BACKOFF(time);
103377aa40dSJonathan Peyton kmp_backoff_t backoff = __kmp_spin_backoff_params;
104e47d32f1SJonathan Peyton do {
1052e02579aSTerry Wilmarth #if !KMP_HAVE_UMWAIT
106377aa40dSJonathan Peyton __kmp_spin_backoff(&backoff);
1072e02579aSTerry Wilmarth #else
1082e02579aSTerry Wilmarth if (!__kmp_tpause_enabled)
1092e02579aSTerry Wilmarth __kmp_spin_backoff(&backoff);
1102e02579aSTerry Wilmarth #endif
1112e02579aSTerry Wilmarth KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112e47d32f1SJonathan Peyton } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113e47d32f1SJonathan Peyton !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
1145e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
1150e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
1165e8470afSJim Cownie }
1175e8470afSJim Cownie
__kmp_acquire_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)1183041982dSJonathan Peyton int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
11950fed047SJonas Hahnfeld int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
12050fed047SJonas Hahnfeld return retval;
1215e8470afSJim Cownie }
1225e8470afSJim Cownie
__kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)1233041982dSJonathan Peyton static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
1243041982dSJonathan Peyton kmp_int32 gtid) {
1255e8470afSJim Cownie char const *const func = "omp_set_lock";
1263041982dSJonathan Peyton if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
1273041982dSJonathan Peyton __kmp_is_tas_lock_nestable(lck)) {
1285e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
1295e8470afSJim Cownie }
1305e8470afSJim Cownie if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
1315e8470afSJim Cownie KMP_FATAL(LockIsAlreadyOwned, func);
1325e8470afSJim Cownie }
1330e6d4577SJonathan Peyton return __kmp_acquire_tas_lock(lck, gtid);
1345e8470afSJim Cownie }
1355e8470afSJim Cownie
__kmp_test_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)1363041982dSJonathan Peyton int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
13737e2ef54SJonathan Peyton kmp_int32 tas_free = KMP_LOCK_FREE(tas);
13837e2ef54SJonathan Peyton kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
13937e2ef54SJonathan Peyton if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
14037e2ef54SJonathan Peyton __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
1415e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
1425e8470afSJim Cownie return TRUE;
1435e8470afSJim Cownie }
1445e8470afSJim Cownie return FALSE;
1455e8470afSJim Cownie }
1465e8470afSJim Cownie
__kmp_test_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)1473041982dSJonathan Peyton static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
1483041982dSJonathan Peyton kmp_int32 gtid) {
1495e8470afSJim Cownie char const *const func = "omp_test_lock";
1503041982dSJonathan Peyton if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
1513041982dSJonathan Peyton __kmp_is_tas_lock_nestable(lck)) {
1525e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
1535e8470afSJim Cownie }
1545e8470afSJim Cownie return __kmp_test_tas_lock(lck, gtid);
1555e8470afSJim Cownie }
1565e8470afSJim Cownie
__kmp_release_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)1573041982dSJonathan Peyton int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
1585e8470afSJim Cownie KMP_MB(); /* Flush all pending memory write invalidates. */
1595e8470afSJim Cownie
1605e8470afSJim Cownie KMP_FSYNC_RELEASING(lck);
16137e2ef54SJonathan Peyton KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
1625e8470afSJim Cownie KMP_MB(); /* Flush all pending memory write invalidates. */
1635e8470afSJim Cownie
164e47d32f1SJonathan Peyton KMP_YIELD_OVERSUB();
1658d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
1665e8470afSJim Cownie }
1675e8470afSJim Cownie
__kmp_release_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)1683041982dSJonathan Peyton static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
1693041982dSJonathan Peyton kmp_int32 gtid) {
1705e8470afSJim Cownie char const *const func = "omp_unset_lock";
1715e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
1723041982dSJonathan Peyton if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
1733041982dSJonathan Peyton __kmp_is_tas_lock_nestable(lck)) {
1745e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
1755e8470afSJim Cownie }
1765e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) == -1) {
1775e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
1785e8470afSJim Cownie }
1793041982dSJonathan Peyton if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
1803041982dSJonathan Peyton (__kmp_get_tas_lock_owner(lck) != gtid)) {
1815e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
1825e8470afSJim Cownie }
1838d09facaSAndrey Churbanov return __kmp_release_tas_lock(lck, gtid);
1845e8470afSJim Cownie }
1855e8470afSJim Cownie
__kmp_init_tas_lock(kmp_tas_lock_t * lck)1863041982dSJonathan Peyton void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
18737e2ef54SJonathan Peyton lck->lk.poll = KMP_LOCK_FREE(tas);
1885e8470afSJim Cownie }
1895e8470afSJim Cownie
__kmp_destroy_tas_lock(kmp_tas_lock_t * lck)1903041982dSJonathan Peyton void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
1915e8470afSJim Cownie
__kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t * lck)1923041982dSJonathan Peyton static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
1935e8470afSJim Cownie char const *const func = "omp_destroy_lock";
1943041982dSJonathan Peyton if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
1953041982dSJonathan Peyton __kmp_is_tas_lock_nestable(lck)) {
1965e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
1975e8470afSJim Cownie }
1985e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) != -1) {
1995e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
2005e8470afSJim Cownie }
2015e8470afSJim Cownie __kmp_destroy_tas_lock(lck);
2025e8470afSJim Cownie }
2035e8470afSJim Cownie
2045e8470afSJim Cownie // nested test and set locks
2055e8470afSJim Cownie
__kmp_acquire_nested_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)2063041982dSJonathan Peyton int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
2075e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
2085e8470afSJim Cownie
2095e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) == gtid) {
2105e8470afSJim Cownie lck->lk.depth_locked += 1;
2110e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_NEXT;
2123041982dSJonathan Peyton } else {
2135e8470afSJim Cownie __kmp_acquire_tas_lock_timed_template(lck, gtid);
2145e8470afSJim Cownie lck->lk.depth_locked = 1;
2150e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
2165e8470afSJim Cownie }
2175e8470afSJim Cownie }
2185e8470afSJim Cownie
__kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)2193041982dSJonathan Peyton static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
2203041982dSJonathan Peyton kmp_int32 gtid) {
2215e8470afSJim Cownie char const *const func = "omp_set_nest_lock";
2225e8470afSJim Cownie if (!__kmp_is_tas_lock_nestable(lck)) {
2235e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
2245e8470afSJim Cownie }
2250e6d4577SJonathan Peyton return __kmp_acquire_nested_tas_lock(lck, gtid);
2265e8470afSJim Cownie }
2275e8470afSJim Cownie
__kmp_test_nested_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)2283041982dSJonathan Peyton int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
2295e8470afSJim Cownie int retval;
2305e8470afSJim Cownie
2315e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
2325e8470afSJim Cownie
2335e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) == gtid) {
2345e8470afSJim Cownie retval = ++lck->lk.depth_locked;
2353041982dSJonathan Peyton } else if (!__kmp_test_tas_lock(lck, gtid)) {
2365e8470afSJim Cownie retval = 0;
2373041982dSJonathan Peyton } else {
2385e8470afSJim Cownie KMP_MB();
2395e8470afSJim Cownie retval = lck->lk.depth_locked = 1;
2405e8470afSJim Cownie }
2415e8470afSJim Cownie return retval;
2425e8470afSJim Cownie }
2435e8470afSJim Cownie
__kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)2443041982dSJonathan Peyton static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
2453041982dSJonathan Peyton kmp_int32 gtid) {
2465e8470afSJim Cownie char const *const func = "omp_test_nest_lock";
2475e8470afSJim Cownie if (!__kmp_is_tas_lock_nestable(lck)) {
2485e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
2495e8470afSJim Cownie }
2505e8470afSJim Cownie return __kmp_test_nested_tas_lock(lck, gtid);
2515e8470afSJim Cownie }
2525e8470afSJim Cownie
__kmp_release_nested_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)2533041982dSJonathan Peyton int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
2545e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
2555e8470afSJim Cownie
2565e8470afSJim Cownie KMP_MB();
2575e8470afSJim Cownie if (--(lck->lk.depth_locked) == 0) {
2585e8470afSJim Cownie __kmp_release_tas_lock(lck, gtid);
2598d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
2605e8470afSJim Cownie }
2618d09facaSAndrey Churbanov return KMP_LOCK_STILL_HELD;
2625e8470afSJim Cownie }
2635e8470afSJim Cownie
__kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)2643041982dSJonathan Peyton static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
2653041982dSJonathan Peyton kmp_int32 gtid) {
2665e8470afSJim Cownie char const *const func = "omp_unset_nest_lock";
2675e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
2685e8470afSJim Cownie if (!__kmp_is_tas_lock_nestable(lck)) {
2695e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
2705e8470afSJim Cownie }
2715e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) == -1) {
2725e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
2735e8470afSJim Cownie }
2745e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) != gtid) {
2755e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
2765e8470afSJim Cownie }
2778d09facaSAndrey Churbanov return __kmp_release_nested_tas_lock(lck, gtid);
2785e8470afSJim Cownie }
2795e8470afSJim Cownie
__kmp_init_nested_tas_lock(kmp_tas_lock_t * lck)2803041982dSJonathan Peyton void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
2815e8470afSJim Cownie __kmp_init_tas_lock(lck);
2825e8470afSJim Cownie lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2835e8470afSJim Cownie }
2845e8470afSJim Cownie
__kmp_destroy_nested_tas_lock(kmp_tas_lock_t * lck)2853041982dSJonathan Peyton void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
2865e8470afSJim Cownie __kmp_destroy_tas_lock(lck);
2875e8470afSJim Cownie lck->lk.depth_locked = 0;
2885e8470afSJim Cownie }
2895e8470afSJim Cownie
__kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t * lck)2903041982dSJonathan Peyton static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
2915e8470afSJim Cownie char const *const func = "omp_destroy_nest_lock";
2925e8470afSJim Cownie if (!__kmp_is_tas_lock_nestable(lck)) {
2935e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
2945e8470afSJim Cownie }
2955e8470afSJim Cownie if (__kmp_get_tas_lock_owner(lck) != -1) {
2965e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
2975e8470afSJim Cownie }
2985e8470afSJim Cownie __kmp_destroy_nested_tas_lock(lck);
2995e8470afSJim Cownie }
3005e8470afSJim Cownie
301fb043fdfSPaul Osmialowski #if KMP_USE_FUTEX
3025e8470afSJim Cownie
3035e8470afSJim Cownie /* ------------------------------------------------------------------------ */
3045e8470afSJim Cownie /* futex locks */
3055e8470afSJim Cownie
3065e8470afSJim Cownie // futex locks are really just test and set locks, with a different method
3075e8470afSJim Cownie // of handling contention. They take the same amount of space as test and
3085e8470afSJim Cownie // set locks, and are allocated the same way (i.e. use the area allocated by
3095e8470afSJim Cownie // the compiler for non-nested locks / allocate nested locks on the heap).
3105e8470afSJim Cownie
__kmp_get_futex_lock_owner(kmp_futex_lock_t * lck)3113041982dSJonathan Peyton static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312f2d119ffSJonathan Peyton return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
3135e8470afSJim Cownie }
3145e8470afSJim Cownie
__kmp_is_futex_lock_nestable(kmp_futex_lock_t * lck)3153041982dSJonathan Peyton static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
3165e8470afSJim Cownie return lck->lk.depth_locked != -1;
3175e8470afSJim Cownie }
3185e8470afSJim Cownie
3190e6d4577SJonathan Peyton __forceinline static int
__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t * lck,kmp_int32 gtid)3203041982dSJonathan Peyton __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
3215e8470afSJim Cownie kmp_int32 gtid_code = (gtid + 1) << 1;
3225e8470afSJim Cownie
3235e8470afSJim Cownie KMP_MB();
3245e8470afSJim Cownie
3255e8470afSJim Cownie #ifdef USE_LOCK_PROFILE
326ac7ba406SJonathan Peyton kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
3275e8470afSJim Cownie if ((curr != 0) && (curr != gtid_code))
3285e8470afSJim Cownie __kmp_printf("LOCK CONTENTION: %p\n", lck);
3295e8470afSJim Cownie /* else __kmp_printf( "." );*/
3305e8470afSJim Cownie #endif /* USE_LOCK_PROFILE */
3315e8470afSJim Cownie
3325e8470afSJim Cownie KMP_FSYNC_PREPARE(lck);
3335e8470afSJim Cownie KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
3345e8470afSJim Cownie lck, lck->lk.poll, gtid));
3355e8470afSJim Cownie
3365e8470afSJim Cownie kmp_int32 poll_val;
3375c56fb55SAndrey Churbanov
3383041982dSJonathan Peyton while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
3393041982dSJonathan Peyton &(lck->lk.poll), KMP_LOCK_FREE(futex),
340f2d119ffSJonathan Peyton KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
3415c56fb55SAndrey Churbanov
342f2d119ffSJonathan Peyton kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
3433041982dSJonathan Peyton KA_TRACE(
3443041982dSJonathan Peyton 1000,
3453041982dSJonathan Peyton ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
3465e8470afSJim Cownie lck, gtid, poll_val, cond));
3475e8470afSJim Cownie
3485e8470afSJim Cownie // NOTE: if you try to use the following condition for this branch
3495e8470afSJim Cownie //
3505e8470afSJim Cownie // if ( poll_val & 1 == 0 )
3515e8470afSJim Cownie //
3525e8470afSJim Cownie // Then the 12.0 compiler has a bug where the following block will
3535e8470afSJim Cownie // always be skipped, regardless of the value of the LSB of poll_val.
3545e8470afSJim Cownie if (!cond) {
3555e8470afSJim Cownie // Try to set the lsb in the poll to indicate to the owner
3565e8470afSJim Cownie // thread that they need to wake this thread up.
3573041982dSJonathan Peyton if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
3583041982dSJonathan Peyton poll_val | KMP_LOCK_BUSY(1, futex))) {
3593041982dSJonathan Peyton KA_TRACE(
3603041982dSJonathan Peyton 1000,
3613041982dSJonathan Peyton ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
3625e8470afSJim Cownie lck, lck->lk.poll, gtid));
3635e8470afSJim Cownie continue;
3645e8470afSJim Cownie }
365f2d119ffSJonathan Peyton poll_val |= KMP_LOCK_BUSY(1, futex);
3665e8470afSJim Cownie
3673041982dSJonathan Peyton KA_TRACE(1000,
3683041982dSJonathan Peyton ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
3693041982dSJonathan Peyton lck->lk.poll, gtid));
3705e8470afSJim Cownie }
3715e8470afSJim Cownie
3723041982dSJonathan Peyton KA_TRACE(
3733041982dSJonathan Peyton 1000,
3743041982dSJonathan Peyton ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
3755e8470afSJim Cownie lck, gtid, poll_val));
3765e8470afSJim Cownie
3776b316febSTerry Wilmarth long rc;
3783041982dSJonathan Peyton if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
3793041982dSJonathan Peyton NULL, 0)) != 0) {
3803041982dSJonathan Peyton KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
3816b316febSTerry Wilmarth "failed (rc=%ld errno=%d)\n",
3825e8470afSJim Cownie lck, gtid, poll_val, rc, errno));
3835e8470afSJim Cownie continue;
3845e8470afSJim Cownie }
3855e8470afSJim Cownie
3863041982dSJonathan Peyton KA_TRACE(1000,
3873041982dSJonathan Peyton ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
3885e8470afSJim Cownie lck, gtid, poll_val));
3893041982dSJonathan Peyton // This thread has now done a successful futex wait call and was entered on
3903041982dSJonathan Peyton // the OS futex queue. We must now perform a futex wake call when releasing
3913041982dSJonathan Peyton // the lock, as we have no idea how many other threads are in the queue.
3925e8470afSJim Cownie gtid_code |= 1;
3935e8470afSJim Cownie }
3945e8470afSJim Cownie
3955e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
3963041982dSJonathan Peyton KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
3973041982dSJonathan Peyton lck->lk.poll, gtid));
3980e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
3995e8470afSJim Cownie }
4005e8470afSJim Cownie
__kmp_acquire_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)4013041982dSJonathan Peyton int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
40250fed047SJonas Hahnfeld int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
40350fed047SJonas Hahnfeld return retval;
4045e8470afSJim Cownie }
4055e8470afSJim Cownie
__kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)4063041982dSJonathan Peyton static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
4073041982dSJonathan Peyton kmp_int32 gtid) {
4085e8470afSJim Cownie char const *const func = "omp_set_lock";
4093041982dSJonathan Peyton if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
4103041982dSJonathan Peyton __kmp_is_futex_lock_nestable(lck)) {
4115e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
4125e8470afSJim Cownie }
4135e8470afSJim Cownie if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
4145e8470afSJim Cownie KMP_FATAL(LockIsAlreadyOwned, func);
4155e8470afSJim Cownie }
4160e6d4577SJonathan Peyton return __kmp_acquire_futex_lock(lck, gtid);
4175e8470afSJim Cownie }
4185e8470afSJim Cownie
__kmp_test_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)4193041982dSJonathan Peyton int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
4203041982dSJonathan Peyton if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
4213041982dSJonathan Peyton KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
4225e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
4235e8470afSJim Cownie return TRUE;
4245e8470afSJim Cownie }
4255e8470afSJim Cownie return FALSE;
4265e8470afSJim Cownie }
4275e8470afSJim Cownie
__kmp_test_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)4283041982dSJonathan Peyton static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
4293041982dSJonathan Peyton kmp_int32 gtid) {
4305e8470afSJim Cownie char const *const func = "omp_test_lock";
4313041982dSJonathan Peyton if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
4323041982dSJonathan Peyton __kmp_is_futex_lock_nestable(lck)) {
4335e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
4345e8470afSJim Cownie }
4355e8470afSJim Cownie return __kmp_test_futex_lock(lck, gtid);
4365e8470afSJim Cownie }
4375e8470afSJim Cownie
__kmp_release_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)4383041982dSJonathan Peyton int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
4395e8470afSJim Cownie KMP_MB(); /* Flush all pending memory write invalidates. */
4405e8470afSJim Cownie
4415e8470afSJim Cownie KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
4425e8470afSJim Cownie lck, lck->lk.poll, gtid));
4435e8470afSJim Cownie
4445e8470afSJim Cownie KMP_FSYNC_RELEASING(lck);
4455e8470afSJim Cownie
446f2d119ffSJonathan Peyton kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
4475e8470afSJim Cownie
4483041982dSJonathan Peyton KA_TRACE(1000,
4493041982dSJonathan Peyton ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
4505e8470afSJim Cownie lck, gtid, poll_val));
4515e8470afSJim Cownie
452f2d119ffSJonathan Peyton if (KMP_LOCK_STRIP(poll_val) & 1) {
4533041982dSJonathan Peyton KA_TRACE(1000,
4543041982dSJonathan Peyton ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
4555e8470afSJim Cownie lck, gtid));
4563041982dSJonathan Peyton syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
4573041982dSJonathan Peyton NULL, NULL, 0);
4585e8470afSJim Cownie }
4595e8470afSJim Cownie
4605e8470afSJim Cownie KMP_MB(); /* Flush all pending memory write invalidates. */
4615e8470afSJim Cownie
4623041982dSJonathan Peyton KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
4633041982dSJonathan Peyton lck->lk.poll, gtid));
4645e8470afSJim Cownie
465e47d32f1SJonathan Peyton KMP_YIELD_OVERSUB();
4668d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
4675e8470afSJim Cownie }
4685e8470afSJim Cownie
__kmp_release_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)4693041982dSJonathan Peyton static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
4703041982dSJonathan Peyton kmp_int32 gtid) {
4715e8470afSJim Cownie char const *const func = "omp_unset_lock";
4725e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
4733041982dSJonathan Peyton if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
4743041982dSJonathan Peyton __kmp_is_futex_lock_nestable(lck)) {
4755e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
4765e8470afSJim Cownie }
4775e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) == -1) {
4785e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
4795e8470afSJim Cownie }
4803041982dSJonathan Peyton if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
4813041982dSJonathan Peyton (__kmp_get_futex_lock_owner(lck) != gtid)) {
4825e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
4835e8470afSJim Cownie }
4848d09facaSAndrey Churbanov return __kmp_release_futex_lock(lck, gtid);
4855e8470afSJim Cownie }
4865e8470afSJim Cownie
__kmp_init_futex_lock(kmp_futex_lock_t * lck)4873041982dSJonathan Peyton void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488f2d119ffSJonathan Peyton TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
4895e8470afSJim Cownie }
4905e8470afSJim Cownie
__kmp_destroy_futex_lock(kmp_futex_lock_t * lck)4913041982dSJonathan Peyton void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
4925e8470afSJim Cownie
__kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t * lck)4933041982dSJonathan Peyton static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
4945e8470afSJim Cownie char const *const func = "omp_destroy_lock";
4953041982dSJonathan Peyton if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
4963041982dSJonathan Peyton __kmp_is_futex_lock_nestable(lck)) {
4975e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
4985e8470afSJim Cownie }
4995e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) != -1) {
5005e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
5015e8470afSJim Cownie }
5025e8470afSJim Cownie __kmp_destroy_futex_lock(lck);
5035e8470afSJim Cownie }
5045e8470afSJim Cownie
5055e8470afSJim Cownie // nested futex locks
5065e8470afSJim Cownie
__kmp_acquire_nested_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)5073041982dSJonathan Peyton int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
5085e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
5095e8470afSJim Cownie
5105e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) == gtid) {
5115e8470afSJim Cownie lck->lk.depth_locked += 1;
5120e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_NEXT;
5133041982dSJonathan Peyton } else {
5145e8470afSJim Cownie __kmp_acquire_futex_lock_timed_template(lck, gtid);
5155e8470afSJim Cownie lck->lk.depth_locked = 1;
5160e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
5175e8470afSJim Cownie }
5185e8470afSJim Cownie }
5195e8470afSJim Cownie
__kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)5203041982dSJonathan Peyton static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
5213041982dSJonathan Peyton kmp_int32 gtid) {
5225e8470afSJim Cownie char const *const func = "omp_set_nest_lock";
5235e8470afSJim Cownie if (!__kmp_is_futex_lock_nestable(lck)) {
5245e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
5255e8470afSJim Cownie }
5260e6d4577SJonathan Peyton return __kmp_acquire_nested_futex_lock(lck, gtid);
5275e8470afSJim Cownie }
5285e8470afSJim Cownie
__kmp_test_nested_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)5293041982dSJonathan Peyton int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
5305e8470afSJim Cownie int retval;
5315e8470afSJim Cownie
5325e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
5335e8470afSJim Cownie
5345e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) == gtid) {
5355e8470afSJim Cownie retval = ++lck->lk.depth_locked;
5363041982dSJonathan Peyton } else if (!__kmp_test_futex_lock(lck, gtid)) {
5375e8470afSJim Cownie retval = 0;
5383041982dSJonathan Peyton } else {
5395e8470afSJim Cownie KMP_MB();
5405e8470afSJim Cownie retval = lck->lk.depth_locked = 1;
5415e8470afSJim Cownie }
5425e8470afSJim Cownie return retval;
5435e8470afSJim Cownie }
5445e8470afSJim Cownie
__kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)5453041982dSJonathan Peyton static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
5463041982dSJonathan Peyton kmp_int32 gtid) {
5475e8470afSJim Cownie char const *const func = "omp_test_nest_lock";
5485e8470afSJim Cownie if (!__kmp_is_futex_lock_nestable(lck)) {
5495e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
5505e8470afSJim Cownie }
5515e8470afSJim Cownie return __kmp_test_nested_futex_lock(lck, gtid);
5525e8470afSJim Cownie }
5535e8470afSJim Cownie
__kmp_release_nested_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)5543041982dSJonathan Peyton int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
5555e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
5565e8470afSJim Cownie
5575e8470afSJim Cownie KMP_MB();
5585e8470afSJim Cownie if (--(lck->lk.depth_locked) == 0) {
5595e8470afSJim Cownie __kmp_release_futex_lock(lck, gtid);
5608d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
5615e8470afSJim Cownie }
5628d09facaSAndrey Churbanov return KMP_LOCK_STILL_HELD;
5635e8470afSJim Cownie }
5645e8470afSJim Cownie
__kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)5653041982dSJonathan Peyton static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
5663041982dSJonathan Peyton kmp_int32 gtid) {
5675e8470afSJim Cownie char const *const func = "omp_unset_nest_lock";
5685e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
5695e8470afSJim Cownie if (!__kmp_is_futex_lock_nestable(lck)) {
5705e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
5715e8470afSJim Cownie }
5725e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) == -1) {
5735e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
5745e8470afSJim Cownie }
5755e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) != gtid) {
5765e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
5775e8470afSJim Cownie }
5788d09facaSAndrey Churbanov return __kmp_release_nested_futex_lock(lck, gtid);
5795e8470afSJim Cownie }
5805e8470afSJim Cownie
__kmp_init_nested_futex_lock(kmp_futex_lock_t * lck)5813041982dSJonathan Peyton void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
5825e8470afSJim Cownie __kmp_init_futex_lock(lck);
5835e8470afSJim Cownie lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
5845e8470afSJim Cownie }
5855e8470afSJim Cownie
__kmp_destroy_nested_futex_lock(kmp_futex_lock_t * lck)5863041982dSJonathan Peyton void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
5875e8470afSJim Cownie __kmp_destroy_futex_lock(lck);
5885e8470afSJim Cownie lck->lk.depth_locked = 0;
5895e8470afSJim Cownie }
5905e8470afSJim Cownie
__kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t * lck)5913041982dSJonathan Peyton static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
5925e8470afSJim Cownie char const *const func = "omp_destroy_nest_lock";
5935e8470afSJim Cownie if (!__kmp_is_futex_lock_nestable(lck)) {
5945e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
5955e8470afSJim Cownie }
5965e8470afSJim Cownie if (__kmp_get_futex_lock_owner(lck) != -1) {
5975e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
5985e8470afSJim Cownie }
5995e8470afSJim Cownie __kmp_destroy_nested_futex_lock(lck);
6005e8470afSJim Cownie }
6015e8470afSJim Cownie
602fb043fdfSPaul Osmialowski #endif // KMP_USE_FUTEX
6035e8470afSJim Cownie
6045e8470afSJim Cownie /* ------------------------------------------------------------------------ */
6055e8470afSJim Cownie /* ticket (bakery) locks */
6065e8470afSJim Cownie
__kmp_get_ticket_lock_owner(kmp_ticket_lock_t * lck)6073041982dSJonathan Peyton static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
6083041982dSJonathan Peyton return std::atomic_load_explicit(&lck->lk.owner_id,
6093041982dSJonathan Peyton std::memory_order_relaxed) -
6103041982dSJonathan Peyton 1;
6115e8470afSJim Cownie }
6125e8470afSJim Cownie
__kmp_is_ticket_lock_nestable(kmp_ticket_lock_t * lck)6133041982dSJonathan Peyton static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
6143041982dSJonathan Peyton return std::atomic_load_explicit(&lck->lk.depth_locked,
6153041982dSJonathan Peyton std::memory_order_relaxed) != -1;
6165e8470afSJim Cownie }
6175e8470afSJim Cownie
__kmp_bakery_check(void * now_serving,kmp_uint32 my_ticket)6183041982dSJonathan Peyton static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
6193041982dSJonathan Peyton return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
6203041982dSJonathan Peyton std::memory_order_acquire) == my_ticket;
6215e8470afSJim Cownie }
6225e8470afSJim Cownie
6230e6d4577SJonathan Peyton __forceinline static int
__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t * lck,kmp_int32 gtid)6243041982dSJonathan Peyton __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
6253041982dSJonathan Peyton kmp_int32 gtid) {
6263041982dSJonathan Peyton kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
6273041982dSJonathan Peyton &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
6285e8470afSJim Cownie
6295e8470afSJim Cownie #ifdef USE_LOCK_PROFILE
6303041982dSJonathan Peyton if (std::atomic_load_explicit(&lck->lk.now_serving,
6313041982dSJonathan Peyton std::memory_order_relaxed) != my_ticket)
6325e8470afSJim Cownie __kmp_printf("LOCK CONTENTION: %p\n", lck);
6335e8470afSJim Cownie /* else __kmp_printf( "." );*/
6345e8470afSJim Cownie #endif /* USE_LOCK_PROFILE */
6355e8470afSJim Cownie
6363041982dSJonathan Peyton if (std::atomic_load_explicit(&lck->lk.now_serving,
6373041982dSJonathan Peyton std::memory_order_acquire) == my_ticket) {
6380e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
6395e8470afSJim Cownie }
640e47d32f1SJonathan Peyton KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
6410e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
6425e8470afSJim Cownie }
6435e8470afSJim Cownie
__kmp_acquire_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)6443041982dSJonathan Peyton int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
64550fed047SJonas Hahnfeld int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
64650fed047SJonas Hahnfeld return retval;
6475e8470afSJim Cownie }
6485e8470afSJim Cownie
__kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)6493041982dSJonathan Peyton static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
6503041982dSJonathan Peyton kmp_int32 gtid) {
6515e8470afSJim Cownie char const *const func = "omp_set_lock";
652f7cc6affSPaul Osmialowski
6533041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
6543041982dSJonathan Peyton std::memory_order_relaxed)) {
655f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
656f7cc6affSPaul Osmialowski }
657f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
6585e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
6595e8470afSJim Cownie }
6605e8470afSJim Cownie if (__kmp_is_ticket_lock_nestable(lck)) {
6615e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
6625e8470afSJim Cownie }
6635e8470afSJim Cownie if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
6645e8470afSJim Cownie KMP_FATAL(LockIsAlreadyOwned, func);
6655e8470afSJim Cownie }
6665e8470afSJim Cownie
6675e8470afSJim Cownie __kmp_acquire_ticket_lock(lck, gtid);
6685e8470afSJim Cownie
6693041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
6703041982dSJonathan Peyton std::memory_order_relaxed);
6710e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
6725e8470afSJim Cownie }
6735e8470afSJim Cownie
__kmp_test_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)6743041982dSJonathan Peyton int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
6753041982dSJonathan Peyton kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
6763041982dSJonathan Peyton std::memory_order_relaxed);
677f7cc6affSPaul Osmialowski
6783041982dSJonathan Peyton if (std::atomic_load_explicit(&lck->lk.now_serving,
6793041982dSJonathan Peyton std::memory_order_relaxed) == my_ticket) {
6805e8470afSJim Cownie kmp_uint32 next_ticket = my_ticket + 1;
6813041982dSJonathan Peyton if (std::atomic_compare_exchange_strong_explicit(
6823041982dSJonathan Peyton &lck->lk.next_ticket, &my_ticket, next_ticket,
6833041982dSJonathan Peyton std::memory_order_acquire, std::memory_order_acquire)) {
6845e8470afSJim Cownie return TRUE;
6855e8470afSJim Cownie }
6865e8470afSJim Cownie }
6875e8470afSJim Cownie return FALSE;
6885e8470afSJim Cownie }
6895e8470afSJim Cownie
__kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)6903041982dSJonathan Peyton static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
6913041982dSJonathan Peyton kmp_int32 gtid) {
6925e8470afSJim Cownie char const *const func = "omp_test_lock";
693f7cc6affSPaul Osmialowski
6943041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
6953041982dSJonathan Peyton std::memory_order_relaxed)) {
696f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
697f7cc6affSPaul Osmialowski }
698f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
6995e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
7005e8470afSJim Cownie }
7015e8470afSJim Cownie if (__kmp_is_ticket_lock_nestable(lck)) {
7025e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
7035e8470afSJim Cownie }
7045e8470afSJim Cownie
7055e8470afSJim Cownie int retval = __kmp_test_ticket_lock(lck, gtid);
7065e8470afSJim Cownie
7074cc4bb4cSJim Cownie if (retval) {
7083041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
7093041982dSJonathan Peyton std::memory_order_relaxed);
7105e8470afSJim Cownie }
7115e8470afSJim Cownie return retval;
7125e8470afSJim Cownie }
7135e8470afSJim Cownie
__kmp_release_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)7143041982dSJonathan Peyton int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
7153041982dSJonathan Peyton kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
7163041982dSJonathan Peyton std::memory_order_relaxed) -
7173041982dSJonathan Peyton std::atomic_load_explicit(&lck->lk.now_serving,
7183041982dSJonathan Peyton std::memory_order_relaxed);
7195e8470afSJim Cownie
7203041982dSJonathan Peyton std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
7213041982dSJonathan Peyton std::memory_order_release);
7225e8470afSJim Cownie
7233041982dSJonathan Peyton KMP_YIELD(distance >
7243041982dSJonathan Peyton (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
7258d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
7265e8470afSJim Cownie }
7275e8470afSJim Cownie
__kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)7283041982dSJonathan Peyton static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
7293041982dSJonathan Peyton kmp_int32 gtid) {
7305e8470afSJim Cownie char const *const func = "omp_unset_lock";
731f7cc6affSPaul Osmialowski
7323041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
7333041982dSJonathan Peyton std::memory_order_relaxed)) {
734f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
735f7cc6affSPaul Osmialowski }
736f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
7375e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
7385e8470afSJim Cownie }
7395e8470afSJim Cownie if (__kmp_is_ticket_lock_nestable(lck)) {
7405e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
7415e8470afSJim Cownie }
7425e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) == -1) {
7435e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
7445e8470afSJim Cownie }
7453041982dSJonathan Peyton if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
7463041982dSJonathan Peyton (__kmp_get_ticket_lock_owner(lck) != gtid)) {
7475e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
7485e8470afSJim Cownie }
749f7cc6affSPaul Osmialowski std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
7508d09facaSAndrey Churbanov return __kmp_release_ticket_lock(lck, gtid);
7515e8470afSJim Cownie }
7525e8470afSJim Cownie
__kmp_init_ticket_lock(kmp_ticket_lock_t * lck)7533041982dSJonathan Peyton void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
7545e8470afSJim Cownie lck->lk.location = NULL;
755f7cc6affSPaul Osmialowski lck->lk.self = lck;
7563041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
7573041982dSJonathan Peyton std::memory_order_relaxed);
7583041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.now_serving, 0U,
7593041982dSJonathan Peyton std::memory_order_relaxed);
7603041982dSJonathan Peyton std::atomic_store_explicit(
7613041982dSJonathan Peyton &lck->lk.owner_id, 0,
7623041982dSJonathan Peyton std::memory_order_relaxed); // no thread owns the lock.
7633041982dSJonathan Peyton std::atomic_store_explicit(
7643041982dSJonathan Peyton &lck->lk.depth_locked, -1,
7653041982dSJonathan Peyton std::memory_order_relaxed); // -1 => not a nested lock.
7663041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.initialized, true,
7673041982dSJonathan Peyton std::memory_order_release);
7685e8470afSJim Cownie }
7695e8470afSJim Cownie
__kmp_destroy_ticket_lock(kmp_ticket_lock_t * lck)7703041982dSJonathan Peyton void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
7713041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.initialized, false,
7723041982dSJonathan Peyton std::memory_order_release);
773f7cc6affSPaul Osmialowski lck->lk.self = NULL;
7745e8470afSJim Cownie lck->lk.location = NULL;
7753041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
7763041982dSJonathan Peyton std::memory_order_relaxed);
7773041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.now_serving, 0U,
7783041982dSJonathan Peyton std::memory_order_relaxed);
779f7cc6affSPaul Osmialowski std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
7803041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.depth_locked, -1,
7813041982dSJonathan Peyton std::memory_order_relaxed);
7825e8470afSJim Cownie }
7835e8470afSJim Cownie
__kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t * lck)7843041982dSJonathan Peyton static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
7855e8470afSJim Cownie char const *const func = "omp_destroy_lock";
786f7cc6affSPaul Osmialowski
7873041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
7883041982dSJonathan Peyton std::memory_order_relaxed)) {
789f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
790f7cc6affSPaul Osmialowski }
791f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
7925e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
7935e8470afSJim Cownie }
7945e8470afSJim Cownie if (__kmp_is_ticket_lock_nestable(lck)) {
7955e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
7965e8470afSJim Cownie }
7975e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) != -1) {
7985e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
7995e8470afSJim Cownie }
8005e8470afSJim Cownie __kmp_destroy_ticket_lock(lck);
8015e8470afSJim Cownie }
8025e8470afSJim Cownie
8035e8470afSJim Cownie // nested ticket locks
8045e8470afSJim Cownie
__kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)8053041982dSJonathan Peyton int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
8065e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
8075e8470afSJim Cownie
8085e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) == gtid) {
8093041982dSJonathan Peyton std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
8103041982dSJonathan Peyton std::memory_order_relaxed);
8110e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_NEXT;
8123041982dSJonathan Peyton } else {
8135e8470afSJim Cownie __kmp_acquire_ticket_lock_timed_template(lck, gtid);
8143041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.depth_locked, 1,
8153041982dSJonathan Peyton std::memory_order_relaxed);
8163041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
8173041982dSJonathan Peyton std::memory_order_relaxed);
8180e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
8195e8470afSJim Cownie }
8205e8470afSJim Cownie }
8215e8470afSJim Cownie
__kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)8223041982dSJonathan Peyton static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
8233041982dSJonathan Peyton kmp_int32 gtid) {
8245e8470afSJim Cownie char const *const func = "omp_set_nest_lock";
825f7cc6affSPaul Osmialowski
8263041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
8273041982dSJonathan Peyton std::memory_order_relaxed)) {
828f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
829f7cc6affSPaul Osmialowski }
830f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
8315e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
8325e8470afSJim Cownie }
8335e8470afSJim Cownie if (!__kmp_is_ticket_lock_nestable(lck)) {
8345e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
8355e8470afSJim Cownie }
8360e6d4577SJonathan Peyton return __kmp_acquire_nested_ticket_lock(lck, gtid);
8375e8470afSJim Cownie }
8385e8470afSJim Cownie
__kmp_test_nested_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)8393041982dSJonathan Peyton int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
8405e8470afSJim Cownie int retval;
8415e8470afSJim Cownie
8425e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
8435e8470afSJim Cownie
8445e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) == gtid) {
8453041982dSJonathan Peyton retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
8463041982dSJonathan Peyton std::memory_order_relaxed) +
8473041982dSJonathan Peyton 1;
8483041982dSJonathan Peyton } else if (!__kmp_test_ticket_lock(lck, gtid)) {
8495e8470afSJim Cownie retval = 0;
8503041982dSJonathan Peyton } else {
8513041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.depth_locked, 1,
8523041982dSJonathan Peyton std::memory_order_relaxed);
8533041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
8543041982dSJonathan Peyton std::memory_order_relaxed);
855f7cc6affSPaul Osmialowski retval = 1;
8565e8470afSJim Cownie }
8575e8470afSJim Cownie return retval;
8585e8470afSJim Cownie }
8595e8470afSJim Cownie
__kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)8603041982dSJonathan Peyton static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
8613041982dSJonathan Peyton kmp_int32 gtid) {
8625e8470afSJim Cownie char const *const func = "omp_test_nest_lock";
863f7cc6affSPaul Osmialowski
8643041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
8653041982dSJonathan Peyton std::memory_order_relaxed)) {
866f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
867f7cc6affSPaul Osmialowski }
868f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
8695e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
8705e8470afSJim Cownie }
8715e8470afSJim Cownie if (!__kmp_is_ticket_lock_nestable(lck)) {
8725e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
8735e8470afSJim Cownie }
8745e8470afSJim Cownie return __kmp_test_nested_ticket_lock(lck, gtid);
8755e8470afSJim Cownie }
8765e8470afSJim Cownie
__kmp_release_nested_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)8773041982dSJonathan Peyton int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
8785e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
8795e8470afSJim Cownie
8803041982dSJonathan Peyton if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
8813041982dSJonathan Peyton std::memory_order_relaxed) -
8823041982dSJonathan Peyton 1) == 0) {
883f7cc6affSPaul Osmialowski std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
8845e8470afSJim Cownie __kmp_release_ticket_lock(lck, gtid);
8858d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
8865e8470afSJim Cownie }
8878d09facaSAndrey Churbanov return KMP_LOCK_STILL_HELD;
8885e8470afSJim Cownie }
8895e8470afSJim Cownie
__kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)8903041982dSJonathan Peyton static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
8913041982dSJonathan Peyton kmp_int32 gtid) {
8925e8470afSJim Cownie char const *const func = "omp_unset_nest_lock";
893f7cc6affSPaul Osmialowski
8943041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
8953041982dSJonathan Peyton std::memory_order_relaxed)) {
896f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
897f7cc6affSPaul Osmialowski }
898f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
8995e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
9005e8470afSJim Cownie }
9015e8470afSJim Cownie if (!__kmp_is_ticket_lock_nestable(lck)) {
9025e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
9035e8470afSJim Cownie }
9045e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) == -1) {
9055e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
9065e8470afSJim Cownie }
9075e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) != gtid) {
9085e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
9095e8470afSJim Cownie }
9108d09facaSAndrey Churbanov return __kmp_release_nested_ticket_lock(lck, gtid);
9115e8470afSJim Cownie }
9125e8470afSJim Cownie
__kmp_init_nested_ticket_lock(kmp_ticket_lock_t * lck)9133041982dSJonathan Peyton void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
9145e8470afSJim Cownie __kmp_init_ticket_lock(lck);
9153041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916c47afcd9SAndrey Churbanov std::memory_order_relaxed);
917c47afcd9SAndrey Churbanov // >= 0 for nestable locks, -1 for simple locks
9185e8470afSJim Cownie }
9195e8470afSJim Cownie
__kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t * lck)9203041982dSJonathan Peyton void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
9215e8470afSJim Cownie __kmp_destroy_ticket_lock(lck);
9223041982dSJonathan Peyton std::atomic_store_explicit(&lck->lk.depth_locked, 0,
9233041982dSJonathan Peyton std::memory_order_relaxed);
9245e8470afSJim Cownie }
9255e8470afSJim Cownie
9265e8470afSJim Cownie static void
__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck)9273041982dSJonathan Peyton __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
9285e8470afSJim Cownie char const *const func = "omp_destroy_nest_lock";
929f7cc6affSPaul Osmialowski
9303041982dSJonathan Peyton if (!std::atomic_load_explicit(&lck->lk.initialized,
9313041982dSJonathan Peyton std::memory_order_relaxed)) {
932f7cc6affSPaul Osmialowski KMP_FATAL(LockIsUninitialized, func);
933f7cc6affSPaul Osmialowski }
934f7cc6affSPaul Osmialowski if (lck->lk.self != lck) {
9355e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
9365e8470afSJim Cownie }
9375e8470afSJim Cownie if (!__kmp_is_ticket_lock_nestable(lck)) {
9385e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
9395e8470afSJim Cownie }
9405e8470afSJim Cownie if (__kmp_get_ticket_lock_owner(lck) != -1) {
9415e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
9425e8470afSJim Cownie }
9435e8470afSJim Cownie __kmp_destroy_nested_ticket_lock(lck);
9445e8470afSJim Cownie }
9455e8470afSJim Cownie
9465e8470afSJim Cownie // access functions to fields which don't exist for all lock kinds.
9475e8470afSJim Cownie
__kmp_get_ticket_lock_location(kmp_ticket_lock_t * lck)9483041982dSJonathan Peyton static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
9495e8470afSJim Cownie return lck->lk.location;
9505e8470afSJim Cownie }
9515e8470afSJim Cownie
__kmp_set_ticket_lock_location(kmp_ticket_lock_t * lck,const ident_t * loc)9523041982dSJonathan Peyton static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
9533041982dSJonathan Peyton const ident_t *loc) {
9545e8470afSJim Cownie lck->lk.location = loc;
9555e8470afSJim Cownie }
9565e8470afSJim Cownie
__kmp_get_ticket_lock_flags(kmp_ticket_lock_t * lck)9573041982dSJonathan Peyton static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
9585e8470afSJim Cownie return lck->lk.flags;
9595e8470afSJim Cownie }
9605e8470afSJim Cownie
__kmp_set_ticket_lock_flags(kmp_ticket_lock_t * lck,kmp_lock_flags_t flags)9613041982dSJonathan Peyton static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
9623041982dSJonathan Peyton kmp_lock_flags_t flags) {
9635e8470afSJim Cownie lck->lk.flags = flags;
9645e8470afSJim Cownie }
9655e8470afSJim Cownie
9665e8470afSJim Cownie /* ------------------------------------------------------------------------ */
9675e8470afSJim Cownie /* queuing locks */
9685e8470afSJim Cownie
9693041982dSJonathan Peyton /* First the states
9703041982dSJonathan Peyton (head,tail) = 0, 0 means lock is unheld, nobody on queue
9713041982dSJonathan Peyton UINT_MAX or -1, 0 means lock is held, nobody on queue
9723041982dSJonathan Peyton h, h means lock held or about to transition,
9733041982dSJonathan Peyton 1 element on queue
9743041982dSJonathan Peyton h, t h <> t, means lock is held or about to
9753041982dSJonathan Peyton transition, >1 elements on queue
9763041982dSJonathan Peyton
9773041982dSJonathan Peyton Now the transitions
9783041982dSJonathan Peyton Acquire(0,0) = -1 ,0
9793041982dSJonathan Peyton Release(0,0) = Error
9803041982dSJonathan Peyton Acquire(-1,0) = h ,h h > 0
9813041982dSJonathan Peyton Release(-1,0) = 0 ,0
9823041982dSJonathan Peyton Acquire(h,h) = h ,t h > 0, t > 0, h <> t
9833041982dSJonathan Peyton Release(h,h) = -1 ,0 h > 0
9843041982dSJonathan Peyton Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
9853041982dSJonathan Peyton Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
9863041982dSJonathan Peyton
9873041982dSJonathan Peyton And pictorially
9883041982dSJonathan Peyton
9893041982dSJonathan Peyton +-----+
9903041982dSJonathan Peyton | 0, 0|------- release -------> Error
9913041982dSJonathan Peyton +-----+
9923041982dSJonathan Peyton | ^
9933041982dSJonathan Peyton acquire| |release
9943041982dSJonathan Peyton | |
9953041982dSJonathan Peyton | |
9963041982dSJonathan Peyton v |
9973041982dSJonathan Peyton +-----+
9983041982dSJonathan Peyton |-1, 0|
9993041982dSJonathan Peyton +-----+
10003041982dSJonathan Peyton | ^
10013041982dSJonathan Peyton acquire| |release
10023041982dSJonathan Peyton | |
10033041982dSJonathan Peyton | |
10043041982dSJonathan Peyton v |
10053041982dSJonathan Peyton +-----+
10063041982dSJonathan Peyton | h, h|
10073041982dSJonathan Peyton +-----+
10083041982dSJonathan Peyton | ^
10093041982dSJonathan Peyton acquire| |release
10103041982dSJonathan Peyton | |
10113041982dSJonathan Peyton | |
10123041982dSJonathan Peyton v |
10133041982dSJonathan Peyton +-----+
10143041982dSJonathan Peyton | h, t|----- acquire, release loopback ---+
10153041982dSJonathan Peyton +-----+ |
10163041982dSJonathan Peyton ^ |
10173041982dSJonathan Peyton | |
10183041982dSJonathan Peyton +------------------------------------+
10195e8470afSJim Cownie */
10205e8470afSJim Cownie
10215e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
10225e8470afSJim Cownie
10235e8470afSJim Cownie /* Stuff for circular trace buffer */
10245e8470afSJim Cownie #define TRACE_BUF_ELE 1024
10253041982dSJonathan Peyton static char traces[TRACE_BUF_ELE][128] = {0};
10265e8470afSJim Cownie static int tc = 0;
10273041982dSJonathan Peyton #define TRACE_LOCK(X, Y) \
10283041982dSJonathan Peyton KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
10293041982dSJonathan Peyton #define TRACE_LOCK_T(X, Y, Z) \
10303041982dSJonathan Peyton KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
10313041982dSJonathan Peyton #define TRACE_LOCK_HT(X, Y, Z, Q) \
10323041982dSJonathan Peyton KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
10333041982dSJonathan Peyton Z, Q);
10345e8470afSJim Cownie
__kmp_dump_queuing_lock(kmp_info_t * this_thr,kmp_int32 gtid,kmp_queuing_lock_t * lck,kmp_int32 head_id,kmp_int32 tail_id)10353041982dSJonathan Peyton static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
10363041982dSJonathan Peyton kmp_queuing_lock_t *lck, kmp_int32 head_id,
10373041982dSJonathan Peyton kmp_int32 tail_id) {
10385e8470afSJim Cownie kmp_int32 t, i;
10395e8470afSJim Cownie
10405e8470afSJim Cownie __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
10415e8470afSJim Cownie
10425e8470afSJim Cownie i = tc % TRACE_BUF_ELE;
10435e8470afSJim Cownie __kmp_printf_no_lock("%s\n", traces[i]);
10445e8470afSJim Cownie i = (i + 1) % TRACE_BUF_ELE;
10455e8470afSJim Cownie while (i != (tc % TRACE_BUF_ELE)) {
10465e8470afSJim Cownie __kmp_printf_no_lock("%s", traces[i]);
10475e8470afSJim Cownie i = (i + 1) % TRACE_BUF_ELE;
10485e8470afSJim Cownie }
10495e8470afSJim Cownie __kmp_printf_no_lock("\n");
10505e8470afSJim Cownie
10513041982dSJonathan Peyton __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
10523041982dSJonathan Peyton "next_wait:%d, head_id:%d, tail_id:%d\n",
10533041982dSJonathan Peyton gtid + 1, this_thr->th.th_spin_here,
10543041982dSJonathan Peyton this_thr->th.th_next_waiting, head_id, tail_id);
10555e8470afSJim Cownie
10565e8470afSJim Cownie __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
10575e8470afSJim Cownie
10585e8470afSJim Cownie if (lck->lk.head_id >= 1) {
10595e8470afSJim Cownie t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
10605e8470afSJim Cownie while (t > 0) {
10615e8470afSJim Cownie __kmp_printf_no_lock("-> %d ", t);
10625e8470afSJim Cownie t = __kmp_threads[t - 1]->th.th_next_waiting;
10635e8470afSJim Cownie }
10645e8470afSJim Cownie }
10655e8470afSJim Cownie __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
10665e8470afSJim Cownie __kmp_printf_no_lock("\n\n");
10675e8470afSJim Cownie }
10685e8470afSJim Cownie
10695e8470afSJim Cownie #endif /* DEBUG_QUEUING_LOCKS */
10705e8470afSJim Cownie
__kmp_get_queuing_lock_owner(kmp_queuing_lock_t * lck)10713041982dSJonathan Peyton static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
10725e8470afSJim Cownie return TCR_4(lck->lk.owner_id) - 1;
10735e8470afSJim Cownie }
10745e8470afSJim Cownie
__kmp_is_queuing_lock_nestable(kmp_queuing_lock_t * lck)10753041982dSJonathan Peyton static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
10765e8470afSJim Cownie return lck->lk.depth_locked != -1;
10775e8470afSJim Cownie }
10785e8470afSJim Cownie
10795e8470afSJim Cownie /* Acquire a lock using a the queuing lock implementation */
10805e8470afSJim Cownie template <bool takeTime>
10813041982dSJonathan Peyton /* [TLW] The unused template above is left behind because of what BEB believes
10823041982dSJonathan Peyton is a potential compiler problem with __forceinline. */
10830e6d4577SJonathan Peyton __forceinline static int
__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t * lck,kmp_int32 gtid)10845e8470afSJim Cownie __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
10853041982dSJonathan Peyton kmp_int32 gtid) {
1086414544c9SEd Maste kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
10875e8470afSJim Cownie volatile kmp_int32 *head_id_p = &lck->lk.head_id;
10885e8470afSJim Cownie volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
10895e8470afSJim Cownie volatile kmp_uint32 *spin_here_p;
10905e8470afSJim Cownie
1091d7d088f8SAndrey Churbanov #if OMPT_SUPPORT
10920e0d6cddSJoachim Protze ompt_state_t prev_state = ompt_state_undefined;
1093d7d088f8SAndrey Churbanov #endif
1094d7d088f8SAndrey Churbanov
10953041982dSJonathan Peyton KA_TRACE(1000,
10963041982dSJonathan Peyton ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
10975e8470afSJim Cownie
10985e8470afSJim Cownie KMP_FSYNC_PREPARE(lck);
10995e8470afSJim Cownie KMP_DEBUG_ASSERT(this_thr != NULL);
11005e8470afSJim Cownie spin_here_p = &this_thr->th.th_spin_here;
11015e8470afSJim Cownie
11025e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11035e8470afSJim Cownie TRACE_LOCK(gtid + 1, "acq ent");
11045e8470afSJim Cownie if (*spin_here_p)
11055e8470afSJim Cownie __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
11065e8470afSJim Cownie if (this_thr->th.th_next_waiting != 0)
11075e8470afSJim Cownie __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
11085e8470afSJim Cownie #endif
11095e8470afSJim Cownie KMP_DEBUG_ASSERT(!*spin_here_p);
11105e8470afSJim Cownie KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
11115e8470afSJim Cownie
11123041982dSJonathan Peyton /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
11133041982dSJonathan Peyton head_id_p that may follow, not just in execution order, but also in
11143041982dSJonathan Peyton visibility order. This way, when a releasing thread observes the changes to
11153041982dSJonathan Peyton the queue by this thread, it can rightly assume that spin_here_p has
11163041982dSJonathan Peyton already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
11173041982dSJonathan Peyton not premature. If the releasing thread sets spin_here_p to FALSE before
11183041982dSJonathan Peyton this thread sets it to TRUE, this thread will hang. */
11195e8470afSJim Cownie *spin_here_p = TRUE; /* before enqueuing to prevent race */
11205e8470afSJim Cownie
11215e8470afSJim Cownie while (1) {
11225e8470afSJim Cownie kmp_int32 enqueued;
11235e8470afSJim Cownie kmp_int32 head;
11245e8470afSJim Cownie kmp_int32 tail;
11255e8470afSJim Cownie
11265e8470afSJim Cownie head = *head_id_p;
11275e8470afSJim Cownie
11285e8470afSJim Cownie switch (head) {
11295e8470afSJim Cownie
11303041982dSJonathan Peyton case -1: {
11315e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11325e8470afSJim Cownie tail = *tail_id_p;
11335e8470afSJim Cownie TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
11345e8470afSJim Cownie #endif
11353041982dSJonathan Peyton tail = 0; /* to make sure next link asynchronously read is not set
11363041982dSJonathan Peyton accidentally; this assignment prevents us from entering the
11373041982dSJonathan Peyton if ( t > 0 ) condition in the enqueued case below, which is not
11383041982dSJonathan Peyton necessary for this state transition */
11395e8470afSJim Cownie
11405e8470afSJim Cownie /* try (-1,0)->(tid,tid) */
11415e8470afSJim Cownie enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
11425e8470afSJim Cownie KMP_PACK_64(-1, 0),
11435e8470afSJim Cownie KMP_PACK_64(gtid + 1, gtid + 1));
11445e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11453041982dSJonathan Peyton if (enqueued)
11463041982dSJonathan Peyton TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
11475e8470afSJim Cownie #endif
11483041982dSJonathan Peyton } break;
11495e8470afSJim Cownie
11503041982dSJonathan Peyton default: {
11515e8470afSJim Cownie tail = *tail_id_p;
11525e8470afSJim Cownie KMP_DEBUG_ASSERT(tail != gtid + 1);
11535e8470afSJim Cownie
11545e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11555e8470afSJim Cownie TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
11565e8470afSJim Cownie #endif
11575e8470afSJim Cownie
11585e8470afSJim Cownie if (tail == 0) {
11595e8470afSJim Cownie enqueued = FALSE;
11603041982dSJonathan Peyton } else {
11615e8470afSJim Cownie /* try (h,t) or (h,h)->(h,tid) */
11625e8470afSJim Cownie enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
11635e8470afSJim Cownie
11645e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11653041982dSJonathan Peyton if (enqueued)
11663041982dSJonathan Peyton TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
11675e8470afSJim Cownie #endif
11685e8470afSJim Cownie }
11693041982dSJonathan Peyton } break;
11705e8470afSJim Cownie
11715e8470afSJim Cownie case 0: /* empty queue */
11725e8470afSJim Cownie {
11735e8470afSJim Cownie kmp_int32 grabbed_lock;
11745e8470afSJim Cownie
11755e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11765e8470afSJim Cownie tail = *tail_id_p;
11775e8470afSJim Cownie TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
11785e8470afSJim Cownie #endif
11795e8470afSJim Cownie /* try (0,0)->(-1,0) */
11805e8470afSJim Cownie
11813041982dSJonathan Peyton /* only legal transition out of head = 0 is head = -1 with no change to
11823041982dSJonathan Peyton * tail */
11835e8470afSJim Cownie grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
11845e8470afSJim Cownie
11855e8470afSJim Cownie if (grabbed_lock) {
11865e8470afSJim Cownie
11875e8470afSJim Cownie *spin_here_p = FALSE;
11885e8470afSJim Cownie
11893041982dSJonathan Peyton KA_TRACE(
11903041982dSJonathan Peyton 1000,
11913041982dSJonathan Peyton ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
11925e8470afSJim Cownie lck, gtid));
11935e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
11945e8470afSJim Cownie TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
11955e8470afSJim Cownie #endif
1196d7d088f8SAndrey Churbanov
1197d7d088f8SAndrey Churbanov #if OMPT_SUPPORT
11980e0d6cddSJoachim Protze if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1199d7d088f8SAndrey Churbanov /* change the state before clearing wait_id */
1200d7d088f8SAndrey Churbanov this_thr->th.ompt_thread_info.state = prev_state;
1201d7d088f8SAndrey Churbanov this_thr->th.ompt_thread_info.wait_id = 0;
1202d7d088f8SAndrey Churbanov }
1203d7d088f8SAndrey Churbanov #endif
1204d7d088f8SAndrey Churbanov
12055e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
12060e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
12075e8470afSJim Cownie }
12085e8470afSJim Cownie enqueued = FALSE;
12093041982dSJonathan Peyton } break;
12105e8470afSJim Cownie }
12115e8470afSJim Cownie
1212d7d088f8SAndrey Churbanov #if OMPT_SUPPORT
12130e0d6cddSJoachim Protze if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1214d7d088f8SAndrey Churbanov /* this thread will spin; set wait_id before entering wait state */
1215d7d088f8SAndrey Churbanov prev_state = this_thr->th.ompt_thread_info.state;
1216d7d088f8SAndrey Churbanov this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
12170e0d6cddSJoachim Protze this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1218d7d088f8SAndrey Churbanov }
1219d7d088f8SAndrey Churbanov #endif
1220d7d088f8SAndrey Churbanov
12215e8470afSJim Cownie if (enqueued) {
12225e8470afSJim Cownie if (tail > 0) {
12235e8470afSJim Cownie kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
12245e8470afSJim Cownie KMP_ASSERT(tail_thr != NULL);
12255e8470afSJim Cownie tail_thr->th.th_next_waiting = gtid + 1;
12265e8470afSJim Cownie /* corresponding wait for this write in release code */
12275e8470afSJim Cownie }
12283041982dSJonathan Peyton KA_TRACE(1000,
12293041982dSJonathan Peyton ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
12303041982dSJonathan Peyton lck, gtid));
12315e8470afSJim Cownie
12325e8470afSJim Cownie KMP_MB();
1233e47d32f1SJonathan Peyton // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1234e47d32f1SJonathan Peyton KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1235236ac68fSHenry Kao // Synchronize writes to both runtime thread structures
1236236ac68fSHenry Kao // and writes in user code.
1237236ac68fSHenry Kao KMP_MB();
12385e8470afSJim Cownie
12395e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
12405e8470afSJim Cownie TRACE_LOCK(gtid + 1, "acq spin");
12415e8470afSJim Cownie
12425e8470afSJim Cownie if (this_thr->th.th_next_waiting != 0)
12435e8470afSJim Cownie __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
12445e8470afSJim Cownie #endif
12455e8470afSJim Cownie KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
12463041982dSJonathan Peyton KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
12473041982dSJonathan Peyton "waiting on queue\n",
12485e8470afSJim Cownie lck, gtid));
12495e8470afSJim Cownie
12505e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
12515e8470afSJim Cownie TRACE_LOCK(gtid + 1, "acq exit 2");
12525e8470afSJim Cownie #endif
1253d7d088f8SAndrey Churbanov
1254d7d088f8SAndrey Churbanov #if OMPT_SUPPORT
1255d7d088f8SAndrey Churbanov /* change the state before clearing wait_id */
1256d7d088f8SAndrey Churbanov this_thr->th.ompt_thread_info.state = prev_state;
1257d7d088f8SAndrey Churbanov this_thr->th.ompt_thread_info.wait_id = 0;
1258d7d088f8SAndrey Churbanov #endif
1259d7d088f8SAndrey Churbanov
12605e8470afSJim Cownie /* got lock, we were dequeued by the thread that released lock */
12610e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
12625e8470afSJim Cownie }
12635e8470afSJim Cownie
12645e8470afSJim Cownie /* Yield if number of threads > number of logical processors */
12655e8470afSJim Cownie /* ToDo: Not sure why this should only be in oversubscription case,
12665e8470afSJim Cownie maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1267e47d32f1SJonathan Peyton KMP_YIELD_OVERSUB();
1268e47d32f1SJonathan Peyton
12695e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
12705e8470afSJim Cownie TRACE_LOCK(gtid + 1, "acq retry");
12715e8470afSJim Cownie #endif
12725e8470afSJim Cownie }
12735e8470afSJim Cownie KMP_ASSERT2(0, "should not get here");
12740e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
12755e8470afSJim Cownie }
12765e8470afSJim Cownie
__kmp_acquire_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)12773041982dSJonathan Peyton int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
12785e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
12795e8470afSJim Cownie
128050fed047SJonas Hahnfeld int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
128150fed047SJonas Hahnfeld return retval;
12825e8470afSJim Cownie }
12835e8470afSJim Cownie
__kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)12843041982dSJonathan Peyton static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
12853041982dSJonathan Peyton kmp_int32 gtid) {
12865e8470afSJim Cownie char const *const func = "omp_set_lock";
12875e8470afSJim Cownie if (lck->lk.initialized != lck) {
12885e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
12895e8470afSJim Cownie }
12905e8470afSJim Cownie if (__kmp_is_queuing_lock_nestable(lck)) {
12915e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
12925e8470afSJim Cownie }
12935e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) == gtid) {
12945e8470afSJim Cownie KMP_FATAL(LockIsAlreadyOwned, func);
12955e8470afSJim Cownie }
12965e8470afSJim Cownie
12975e8470afSJim Cownie __kmp_acquire_queuing_lock(lck, gtid);
12985e8470afSJim Cownie
12995e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
13000e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
13015e8470afSJim Cownie }
13025e8470afSJim Cownie
__kmp_test_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)13033041982dSJonathan Peyton int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
13045e8470afSJim Cownie volatile kmp_int32 *head_id_p = &lck->lk.head_id;
13055e8470afSJim Cownie kmp_int32 head;
13065e8470afSJim Cownie #ifdef KMP_DEBUG
13075e8470afSJim Cownie kmp_info_t *this_thr;
13085e8470afSJim Cownie #endif
13095e8470afSJim Cownie
13105e8470afSJim Cownie KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
13115e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
13125e8470afSJim Cownie #ifdef KMP_DEBUG
13135e8470afSJim Cownie this_thr = __kmp_thread_from_gtid(gtid);
13145e8470afSJim Cownie KMP_DEBUG_ASSERT(this_thr != NULL);
13155e8470afSJim Cownie KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
13165e8470afSJim Cownie #endif
13175e8470afSJim Cownie
13185e8470afSJim Cownie head = *head_id_p;
13195e8470afSJim Cownie
13205e8470afSJim Cownie if (head == 0) { /* nobody on queue, nobody holding */
13215e8470afSJim Cownie /* try (0,0)->(-1,0) */
13225e8470afSJim Cownie if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
13233041982dSJonathan Peyton KA_TRACE(1000,
13243041982dSJonathan Peyton ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
13255e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
13265e8470afSJim Cownie return TRUE;
13275e8470afSJim Cownie }
13285e8470afSJim Cownie }
13295e8470afSJim Cownie
13303041982dSJonathan Peyton KA_TRACE(1000,
13313041982dSJonathan Peyton ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
13325e8470afSJim Cownie return FALSE;
13335e8470afSJim Cownie }
13345e8470afSJim Cownie
__kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)13353041982dSJonathan Peyton static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
13363041982dSJonathan Peyton kmp_int32 gtid) {
13375e8470afSJim Cownie char const *const func = "omp_test_lock";
13385e8470afSJim Cownie if (lck->lk.initialized != lck) {
13395e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
13405e8470afSJim Cownie }
13415e8470afSJim Cownie if (__kmp_is_queuing_lock_nestable(lck)) {
13425e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
13435e8470afSJim Cownie }
13445e8470afSJim Cownie
13455e8470afSJim Cownie int retval = __kmp_test_queuing_lock(lck, gtid);
13465e8470afSJim Cownie
13474cc4bb4cSJim Cownie if (retval) {
13485e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
13495e8470afSJim Cownie }
13505e8470afSJim Cownie return retval;
13515e8470afSJim Cownie }
13525e8470afSJim Cownie
__kmp_release_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)13533041982dSJonathan Peyton int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
13545e8470afSJim Cownie volatile kmp_int32 *head_id_p = &lck->lk.head_id;
13555e8470afSJim Cownie volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
13565e8470afSJim Cownie
13573041982dSJonathan Peyton KA_TRACE(1000,
13583041982dSJonathan Peyton ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
13595e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
13608b81524cSAndreyChurbanov #if KMP_DEBUG || DEBUG_QUEUING_LOCKS
13618b81524cSAndreyChurbanov kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
13628b81524cSAndreyChurbanov #endif
13635e8470afSJim Cownie KMP_DEBUG_ASSERT(this_thr != NULL);
13645e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
13655e8470afSJim Cownie TRACE_LOCK(gtid + 1, "rel ent");
13665e8470afSJim Cownie
13675e8470afSJim Cownie if (this_thr->th.th_spin_here)
13685e8470afSJim Cownie __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
13695e8470afSJim Cownie if (this_thr->th.th_next_waiting != 0)
13705e8470afSJim Cownie __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
13715e8470afSJim Cownie #endif
13725e8470afSJim Cownie KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
13735e8470afSJim Cownie KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
13745e8470afSJim Cownie
13755e8470afSJim Cownie KMP_FSYNC_RELEASING(lck);
13765e8470afSJim Cownie
13775e8470afSJim Cownie while (1) {
13785e8470afSJim Cownie kmp_int32 dequeued;
13795e8470afSJim Cownie kmp_int32 head;
13805e8470afSJim Cownie kmp_int32 tail;
13815e8470afSJim Cownie
13825e8470afSJim Cownie head = *head_id_p;
13835e8470afSJim Cownie
13845e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
13855e8470afSJim Cownie tail = *tail_id_p;
13865e8470afSJim Cownie TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
13873041982dSJonathan Peyton if (head == 0)
13883041982dSJonathan Peyton __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
13895e8470afSJim Cownie #endif
13903041982dSJonathan Peyton KMP_DEBUG_ASSERT(head !=
13913041982dSJonathan Peyton 0); /* holding the lock, head must be -1 or queue head */
13925e8470afSJim Cownie
13935e8470afSJim Cownie if (head == -1) { /* nobody on queue */
13945e8470afSJim Cownie /* try (-1,0)->(0,0) */
13955e8470afSJim Cownie if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
13963041982dSJonathan Peyton KA_TRACE(
13973041982dSJonathan Peyton 1000,
13983041982dSJonathan Peyton ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
13995e8470afSJim Cownie lck, gtid));
14005e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14015e8470afSJim Cownie TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
14025e8470afSJim Cownie #endif
1403d7d088f8SAndrey Churbanov
1404d7d088f8SAndrey Churbanov #if OMPT_SUPPORT
1405d7d088f8SAndrey Churbanov /* nothing to do - no other thread is trying to shift blame */
1406d7d088f8SAndrey Churbanov #endif
14078d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
14085e8470afSJim Cownie }
14095e8470afSJim Cownie dequeued = FALSE;
14103041982dSJonathan Peyton } else {
141186c30782SJonas Hahnfeld KMP_MB();
14125e8470afSJim Cownie tail = *tail_id_p;
14135e8470afSJim Cownie if (head == tail) { /* only one thread on the queue */
14145e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14153041982dSJonathan Peyton if (head <= 0)
14163041982dSJonathan Peyton __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
14175e8470afSJim Cownie #endif
14185e8470afSJim Cownie KMP_DEBUG_ASSERT(head > 0);
14195e8470afSJim Cownie
14205e8470afSJim Cownie /* try (h,h)->(-1,0) */
1421c47afcd9SAndrey Churbanov dequeued = KMP_COMPARE_AND_STORE_REL64(
14225ba90c79SAndrey Churbanov RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
14235ba90c79SAndrey Churbanov KMP_PACK_64(-1, 0));
14245e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14255e8470afSJim Cownie TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
14265e8470afSJim Cownie #endif
14275e8470afSJim Cownie
14283041982dSJonathan Peyton } else {
14295e8470afSJim Cownie volatile kmp_int32 *waiting_id_p;
14305e8470afSJim Cownie kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
14315e8470afSJim Cownie KMP_DEBUG_ASSERT(head_thr != NULL);
14325e8470afSJim Cownie waiting_id_p = &head_thr->th.th_next_waiting;
14335e8470afSJim Cownie
14345e8470afSJim Cownie /* Does this require synchronous reads? */
14355e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14363041982dSJonathan Peyton if (head <= 0 || tail <= 0)
14373041982dSJonathan Peyton __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
14385e8470afSJim Cownie #endif
14395e8470afSJim Cownie KMP_DEBUG_ASSERT(head > 0 && tail > 0);
14405e8470afSJim Cownie
14415e8470afSJim Cownie /* try (h,t)->(h',t) or (t,t) */
14425e8470afSJim Cownie KMP_MB();
14433041982dSJonathan Peyton /* make sure enqueuing thread has time to update next waiting thread
14443041982dSJonathan Peyton * field */
1445e47d32f1SJonathan Peyton *head_id_p =
1446e47d32f1SJonathan Peyton KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
14475e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14485e8470afSJim Cownie TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
14495e8470afSJim Cownie #endif
14505e8470afSJim Cownie dequeued = TRUE;
14515e8470afSJim Cownie }
14525e8470afSJim Cownie }
14535e8470afSJim Cownie
14545e8470afSJim Cownie if (dequeued) {
14555e8470afSJim Cownie kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
14565e8470afSJim Cownie KMP_DEBUG_ASSERT(head_thr != NULL);
14575e8470afSJim Cownie
14585e8470afSJim Cownie /* Does this require synchronous reads? */
14595e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14603041982dSJonathan Peyton if (head <= 0 || tail <= 0)
14613041982dSJonathan Peyton __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
14625e8470afSJim Cownie #endif
14635e8470afSJim Cownie KMP_DEBUG_ASSERT(head > 0 && tail > 0);
14645e8470afSJim Cownie
14653041982dSJonathan Peyton /* For clean code only. Thread not released until next statement prevents
14663041982dSJonathan Peyton race with acquire code. */
14675e8470afSJim Cownie head_thr->th.th_next_waiting = 0;
14685e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14695e8470afSJim Cownie TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
14705e8470afSJim Cownie #endif
14715e8470afSJim Cownie
14725e8470afSJim Cownie KMP_MB();
14735e8470afSJim Cownie /* reset spin value */
14745e8470afSJim Cownie head_thr->th.th_spin_here = FALSE;
14755e8470afSJim Cownie
14763041982dSJonathan Peyton KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
14773041982dSJonathan Peyton "dequeuing\n",
14785e8470afSJim Cownie lck, gtid));
14795e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14805e8470afSJim Cownie TRACE_LOCK(gtid + 1, "rel exit 2");
14815e8470afSJim Cownie #endif
14828d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
14835e8470afSJim Cownie }
14843041982dSJonathan Peyton /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
14853041982dSJonathan Peyton threads */
14865e8470afSJim Cownie
14875e8470afSJim Cownie #ifdef DEBUG_QUEUING_LOCKS
14885e8470afSJim Cownie TRACE_LOCK(gtid + 1, "rel retry");
14895e8470afSJim Cownie #endif
14905e8470afSJim Cownie
14915e8470afSJim Cownie } /* while */
14925e8470afSJim Cownie KMP_ASSERT2(0, "should not get here");
14938d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
14945e8470afSJim Cownie }
14955e8470afSJim Cownie
__kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)14963041982dSJonathan Peyton static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
14973041982dSJonathan Peyton kmp_int32 gtid) {
14985e8470afSJim Cownie char const *const func = "omp_unset_lock";
14995e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
15005e8470afSJim Cownie if (lck->lk.initialized != lck) {
15015e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
15025e8470afSJim Cownie }
15035e8470afSJim Cownie if (__kmp_is_queuing_lock_nestable(lck)) {
15045e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
15055e8470afSJim Cownie }
15065e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) == -1) {
15075e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
15085e8470afSJim Cownie }
15095e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) != gtid) {
15105e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
15115e8470afSJim Cownie }
15125e8470afSJim Cownie lck->lk.owner_id = 0;
15138d09facaSAndrey Churbanov return __kmp_release_queuing_lock(lck, gtid);
15145e8470afSJim Cownie }
15155e8470afSJim Cownie
__kmp_init_queuing_lock(kmp_queuing_lock_t * lck)15163041982dSJonathan Peyton void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
15175e8470afSJim Cownie lck->lk.location = NULL;
15185e8470afSJim Cownie lck->lk.head_id = 0;
15195e8470afSJim Cownie lck->lk.tail_id = 0;
15205e8470afSJim Cownie lck->lk.next_ticket = 0;
15215e8470afSJim Cownie lck->lk.now_serving = 0;
15225e8470afSJim Cownie lck->lk.owner_id = 0; // no thread owns the lock.
15235e8470afSJim Cownie lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
15245e8470afSJim Cownie lck->lk.initialized = lck;
15255e8470afSJim Cownie
15265e8470afSJim Cownie KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
15275e8470afSJim Cownie }
15285e8470afSJim Cownie
__kmp_destroy_queuing_lock(kmp_queuing_lock_t * lck)15293041982dSJonathan Peyton void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
15305e8470afSJim Cownie lck->lk.initialized = NULL;
15315e8470afSJim Cownie lck->lk.location = NULL;
15325e8470afSJim Cownie lck->lk.head_id = 0;
15335e8470afSJim Cownie lck->lk.tail_id = 0;
15345e8470afSJim Cownie lck->lk.next_ticket = 0;
15355e8470afSJim Cownie lck->lk.now_serving = 0;
15365e8470afSJim Cownie lck->lk.owner_id = 0;
15375e8470afSJim Cownie lck->lk.depth_locked = -1;
15385e8470afSJim Cownie }
15395e8470afSJim Cownie
__kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t * lck)15403041982dSJonathan Peyton static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
15415e8470afSJim Cownie char const *const func = "omp_destroy_lock";
15425e8470afSJim Cownie if (lck->lk.initialized != lck) {
15435e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
15445e8470afSJim Cownie }
15455e8470afSJim Cownie if (__kmp_is_queuing_lock_nestable(lck)) {
15465e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
15475e8470afSJim Cownie }
15485e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) != -1) {
15495e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
15505e8470afSJim Cownie }
15515e8470afSJim Cownie __kmp_destroy_queuing_lock(lck);
15525e8470afSJim Cownie }
15535e8470afSJim Cownie
15545e8470afSJim Cownie // nested queuing locks
15555e8470afSJim Cownie
__kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)15563041982dSJonathan Peyton int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
15575e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
15585e8470afSJim Cownie
15595e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) == gtid) {
15605e8470afSJim Cownie lck->lk.depth_locked += 1;
15610e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_NEXT;
15623041982dSJonathan Peyton } else {
15635e8470afSJim Cownie __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
15645e8470afSJim Cownie KMP_MB();
15655e8470afSJim Cownie lck->lk.depth_locked = 1;
15665e8470afSJim Cownie KMP_MB();
15675e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
15680e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
15695e8470afSJim Cownie }
15705e8470afSJim Cownie }
15715e8470afSJim Cownie
15720e6d4577SJonathan Peyton static int
__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)15733041982dSJonathan Peyton __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
15743041982dSJonathan Peyton kmp_int32 gtid) {
15755e8470afSJim Cownie char const *const func = "omp_set_nest_lock";
15765e8470afSJim Cownie if (lck->lk.initialized != lck) {
15775e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
15785e8470afSJim Cownie }
15795e8470afSJim Cownie if (!__kmp_is_queuing_lock_nestable(lck)) {
15805e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
15815e8470afSJim Cownie }
15820e6d4577SJonathan Peyton return __kmp_acquire_nested_queuing_lock(lck, gtid);
15835e8470afSJim Cownie }
15845e8470afSJim Cownie
__kmp_test_nested_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)15853041982dSJonathan Peyton int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
15865e8470afSJim Cownie int retval;
15875e8470afSJim Cownie
15885e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
15895e8470afSJim Cownie
15905e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) == gtid) {
15915e8470afSJim Cownie retval = ++lck->lk.depth_locked;
15923041982dSJonathan Peyton } else if (!__kmp_test_queuing_lock(lck, gtid)) {
15935e8470afSJim Cownie retval = 0;
15943041982dSJonathan Peyton } else {
15955e8470afSJim Cownie KMP_MB();
15965e8470afSJim Cownie retval = lck->lk.depth_locked = 1;
15975e8470afSJim Cownie KMP_MB();
15985e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
15995e8470afSJim Cownie }
16005e8470afSJim Cownie return retval;
16015e8470afSJim Cownie }
16025e8470afSJim Cownie
__kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)16033041982dSJonathan Peyton static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
16043041982dSJonathan Peyton kmp_int32 gtid) {
16055e8470afSJim Cownie char const *const func = "omp_test_nest_lock";
16065e8470afSJim Cownie if (lck->lk.initialized != lck) {
16075e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
16085e8470afSJim Cownie }
16095e8470afSJim Cownie if (!__kmp_is_queuing_lock_nestable(lck)) {
16105e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
16115e8470afSJim Cownie }
16125e8470afSJim Cownie return __kmp_test_nested_queuing_lock(lck, gtid);
16135e8470afSJim Cownie }
16145e8470afSJim Cownie
__kmp_release_nested_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)16153041982dSJonathan Peyton int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
16165e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
16175e8470afSJim Cownie
16185e8470afSJim Cownie KMP_MB();
16195e8470afSJim Cownie if (--(lck->lk.depth_locked) == 0) {
16205e8470afSJim Cownie KMP_MB();
16215e8470afSJim Cownie lck->lk.owner_id = 0;
16225e8470afSJim Cownie __kmp_release_queuing_lock(lck, gtid);
16238d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
16245e8470afSJim Cownie }
16258d09facaSAndrey Churbanov return KMP_LOCK_STILL_HELD;
16265e8470afSJim Cownie }
16275e8470afSJim Cownie
16288d09facaSAndrey Churbanov static int
__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)16293041982dSJonathan Peyton __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
16303041982dSJonathan Peyton kmp_int32 gtid) {
16315e8470afSJim Cownie char const *const func = "omp_unset_nest_lock";
16325e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
16335e8470afSJim Cownie if (lck->lk.initialized != lck) {
16345e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
16355e8470afSJim Cownie }
16365e8470afSJim Cownie if (!__kmp_is_queuing_lock_nestable(lck)) {
16375e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
16385e8470afSJim Cownie }
16395e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) == -1) {
16405e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
16415e8470afSJim Cownie }
16425e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) != gtid) {
16435e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
16445e8470afSJim Cownie }
16458d09facaSAndrey Churbanov return __kmp_release_nested_queuing_lock(lck, gtid);
16465e8470afSJim Cownie }
16475e8470afSJim Cownie
__kmp_init_nested_queuing_lock(kmp_queuing_lock_t * lck)16483041982dSJonathan Peyton void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
16495e8470afSJim Cownie __kmp_init_queuing_lock(lck);
16505e8470afSJim Cownie lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
16515e8470afSJim Cownie }
16525e8470afSJim Cownie
__kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t * lck)16533041982dSJonathan Peyton void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
16545e8470afSJim Cownie __kmp_destroy_queuing_lock(lck);
16555e8470afSJim Cownie lck->lk.depth_locked = 0;
16565e8470afSJim Cownie }
16575e8470afSJim Cownie
16585e8470afSJim Cownie static void
__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck)16593041982dSJonathan Peyton __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
16605e8470afSJim Cownie char const *const func = "omp_destroy_nest_lock";
16615e8470afSJim Cownie if (lck->lk.initialized != lck) {
16625e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
16635e8470afSJim Cownie }
16645e8470afSJim Cownie if (!__kmp_is_queuing_lock_nestable(lck)) {
16655e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
16665e8470afSJim Cownie }
16675e8470afSJim Cownie if (__kmp_get_queuing_lock_owner(lck) != -1) {
16685e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
16695e8470afSJim Cownie }
16705e8470afSJim Cownie __kmp_destroy_nested_queuing_lock(lck);
16715e8470afSJim Cownie }
16725e8470afSJim Cownie
16735e8470afSJim Cownie // access functions to fields which don't exist for all lock kinds.
16745e8470afSJim Cownie
__kmp_get_queuing_lock_location(kmp_queuing_lock_t * lck)16753041982dSJonathan Peyton static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
16765e8470afSJim Cownie return lck->lk.location;
16775e8470afSJim Cownie }
16785e8470afSJim Cownie
__kmp_set_queuing_lock_location(kmp_queuing_lock_t * lck,const ident_t * loc)16793041982dSJonathan Peyton static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
16803041982dSJonathan Peyton const ident_t *loc) {
16815e8470afSJim Cownie lck->lk.location = loc;
16825e8470afSJim Cownie }
16835e8470afSJim Cownie
__kmp_get_queuing_lock_flags(kmp_queuing_lock_t * lck)16843041982dSJonathan Peyton static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
16855e8470afSJim Cownie return lck->lk.flags;
16865e8470afSJim Cownie }
16875e8470afSJim Cownie
__kmp_set_queuing_lock_flags(kmp_queuing_lock_t * lck,kmp_lock_flags_t flags)16883041982dSJonathan Peyton static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
16893041982dSJonathan Peyton kmp_lock_flags_t flags) {
16905e8470afSJim Cownie lck->lk.flags = flags;
16915e8470afSJim Cownie }
16925e8470afSJim Cownie
16935e8470afSJim Cownie #if KMP_USE_ADAPTIVE_LOCKS
16945e8470afSJim Cownie
16953041982dSJonathan Peyton /* RTM Adaptive locks */
16965e8470afSJim Cownie
1697e0665a90STerry Wilmarth #if KMP_HAVE_RTM_INTRINSICS
1698a03533d3SJonathan Peyton #include <immintrin.h>
1699a03533d3SJonathan Peyton #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1700a03533d3SJonathan Peyton
1701a03533d3SJonathan Peyton #else
17025e8470afSJim Cownie
17035e8470afSJim Cownie // Values from the status register after failed speculation.
17045e8470afSJim Cownie #define _XBEGIN_STARTED (~0u)
17055e8470afSJim Cownie #define _XABORT_EXPLICIT (1 << 0)
17065e8470afSJim Cownie #define _XABORT_RETRY (1 << 1)
17075e8470afSJim Cownie #define _XABORT_CONFLICT (1 << 2)
17085e8470afSJim Cownie #define _XABORT_CAPACITY (1 << 3)
17095e8470afSJim Cownie #define _XABORT_DEBUG (1 << 4)
17105e8470afSJim Cownie #define _XABORT_NESTED (1 << 5)
17115e8470afSJim Cownie #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
17125e8470afSJim Cownie
17135e8470afSJim Cownie // Aborts for which it's worth trying again immediately
17145e8470afSJim Cownie #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
17155e8470afSJim Cownie
17165e8470afSJim Cownie #define STRINGIZE_INTERNAL(arg) #arg
17175e8470afSJim Cownie #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
17185e8470afSJim Cownie
17195e8470afSJim Cownie // Access to RTM instructions
17203041982dSJonathan Peyton /*A version of XBegin which returns -1 on speculation, and the value of EAX on
17213041982dSJonathan Peyton an abort. This is the same definition as the compiler intrinsic that will be
17223041982dSJonathan Peyton supported at some point. */
_xbegin()17233041982dSJonathan Peyton static __inline int _xbegin() {
17245e8470afSJim Cownie int res = -1;
17255e8470afSJim Cownie
17265e8470afSJim Cownie #if KMP_OS_WINDOWS
17275e8470afSJim Cownie #if KMP_ARCH_X86_64
17285e8470afSJim Cownie _asm {
17295e8470afSJim Cownie _emit 0xC7
17305e8470afSJim Cownie _emit 0xF8
17315e8470afSJim Cownie _emit 2
17325e8470afSJim Cownie _emit 0
17335e8470afSJim Cownie _emit 0
17345e8470afSJim Cownie _emit 0
17355e8470afSJim Cownie jmp L2
17365e8470afSJim Cownie mov res, eax
17375e8470afSJim Cownie L2:
17385e8470afSJim Cownie }
17395e8470afSJim Cownie #else /* IA32 */
17405e8470afSJim Cownie _asm {
17415e8470afSJim Cownie _emit 0xC7
17425e8470afSJim Cownie _emit 0xF8
17435e8470afSJim Cownie _emit 2
17445e8470afSJim Cownie _emit 0
17455e8470afSJim Cownie _emit 0
17465e8470afSJim Cownie _emit 0
17475e8470afSJim Cownie jmp L2
17485e8470afSJim Cownie mov res, eax
17495e8470afSJim Cownie L2:
17505e8470afSJim Cownie }
17515e8470afSJim Cownie #endif // KMP_ARCH_X86_64
17525e8470afSJim Cownie #else
17533041982dSJonathan Peyton /* Note that %eax must be noted as killed (clobbered), because the XSR is
17543041982dSJonathan Peyton returned in %eax(%rax) on abort. Other register values are restored, so
17553041982dSJonathan Peyton don't need to be killed.
17563041982dSJonathan Peyton
17573041982dSJonathan Peyton We must also mark 'res' as an input and an output, since otherwise
17583041982dSJonathan Peyton 'res=-1' may be dropped as being dead, whereas we do need the assignment on
17593041982dSJonathan Peyton the successful (i.e., non-abort) path. */
17605e8470afSJim Cownie __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
17615e8470afSJim Cownie " .long 1f-1b-6\n"
17625e8470afSJim Cownie " jmp 2f\n"
17635e8470afSJim Cownie "1: movl %%eax,%0\n"
17645e8470afSJim Cownie "2:"
17655e8470afSJim Cownie : "+r"(res)::"memory", "%eax");
17665e8470afSJim Cownie #endif // KMP_OS_WINDOWS
17675e8470afSJim Cownie return res;
17685e8470afSJim Cownie }
17695e8470afSJim Cownie
17703041982dSJonathan Peyton /* Transaction end */
_xend()17713041982dSJonathan Peyton static __inline void _xend() {
17725e8470afSJim Cownie #if KMP_OS_WINDOWS
17735e8470afSJim Cownie __asm {
17745e8470afSJim Cownie _emit 0x0f
17755e8470afSJim Cownie _emit 0x01
17765e8470afSJim Cownie _emit 0xd5
17775e8470afSJim Cownie }
17785e8470afSJim Cownie #else
17795e8470afSJim Cownie __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
17805e8470afSJim Cownie #endif
17815e8470afSJim Cownie }
17825e8470afSJim Cownie
17833041982dSJonathan Peyton /* This is a macro, the argument must be a single byte constant which can be
17843041982dSJonathan Peyton evaluated by the inline assembler, since it is emitted as a byte into the
17853041982dSJonathan Peyton assembly code. */
17863041982dSJonathan Peyton // clang-format off
17875e8470afSJim Cownie #if KMP_OS_WINDOWS
17883041982dSJonathan Peyton #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
17895e8470afSJim Cownie #else
17905e8470afSJim Cownie #define _xabort(ARG) \
17915e8470afSJim Cownie __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
17925e8470afSJim Cownie #endif
17933041982dSJonathan Peyton // clang-format on
1794a03533d3SJonathan Peyton #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1795a03533d3SJonathan Peyton
17965e8470afSJim Cownie // Statistics is collected for testing purpose
17975e8470afSJim Cownie #if KMP_DEBUG_ADAPTIVE_LOCKS
17985e8470afSJim Cownie
17993041982dSJonathan Peyton // We accumulate speculative lock statistics when the lock is destroyed. We
18003041982dSJonathan Peyton // keep locks that haven't been destroyed in the liveLocks list so that we can
18013041982dSJonathan Peyton // grab their statistics too.
18025e8470afSJim Cownie static kmp_adaptive_lock_statistics_t destroyedStats;
18035e8470afSJim Cownie
18045e8470afSJim Cownie // To hold the list of live locks.
18054cc4bb4cSJim Cownie static kmp_adaptive_lock_info_t liveLocks;
18065e8470afSJim Cownie
18075e8470afSJim Cownie // A lock so we can safely update the list of locks.
18088692e142SJonathan Peyton static kmp_bootstrap_lock_t chain_lock =
18098692e142SJonathan Peyton KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
18105e8470afSJim Cownie
18115e8470afSJim Cownie // Initialize the list of stats.
__kmp_init_speculative_stats()18123041982dSJonathan Peyton void __kmp_init_speculative_stats() {
18134cc4bb4cSJim Cownie kmp_adaptive_lock_info_t *lck = &liveLocks;
18145e8470afSJim Cownie
18158692e142SJonathan Peyton memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
18168692e142SJonathan Peyton sizeof(lck->stats));
18175e8470afSJim Cownie lck->stats.next = lck;
18185e8470afSJim Cownie lck->stats.prev = lck;
18195e8470afSJim Cownie
18205e8470afSJim Cownie KMP_ASSERT(lck->stats.next->stats.prev == lck);
18215e8470afSJim Cownie KMP_ASSERT(lck->stats.prev->stats.next == lck);
18225e8470afSJim Cownie
18235e8470afSJim Cownie __kmp_init_bootstrap_lock(&chain_lock);
18245e8470afSJim Cownie }
18255e8470afSJim Cownie
18265e8470afSJim Cownie // Insert the lock into the circular list
__kmp_remember_lock(kmp_adaptive_lock_info_t * lck)18273041982dSJonathan Peyton static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
18285e8470afSJim Cownie __kmp_acquire_bootstrap_lock(&chain_lock);
18295e8470afSJim Cownie
18305e8470afSJim Cownie lck->stats.next = liveLocks.stats.next;
18315e8470afSJim Cownie lck->stats.prev = &liveLocks;
18325e8470afSJim Cownie
18335e8470afSJim Cownie liveLocks.stats.next = lck;
18345e8470afSJim Cownie lck->stats.next->stats.prev = lck;
18355e8470afSJim Cownie
18365e8470afSJim Cownie KMP_ASSERT(lck->stats.next->stats.prev == lck);
18375e8470afSJim Cownie KMP_ASSERT(lck->stats.prev->stats.next == lck);
18385e8470afSJim Cownie
18395e8470afSJim Cownie __kmp_release_bootstrap_lock(&chain_lock);
18405e8470afSJim Cownie }
18415e8470afSJim Cownie
__kmp_forget_lock(kmp_adaptive_lock_info_t * lck)18423041982dSJonathan Peyton static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
18435e8470afSJim Cownie KMP_ASSERT(lck->stats.next->stats.prev == lck);
18445e8470afSJim Cownie KMP_ASSERT(lck->stats.prev->stats.next == lck);
18455e8470afSJim Cownie
18464cc4bb4cSJim Cownie kmp_adaptive_lock_info_t *n = lck->stats.next;
18474cc4bb4cSJim Cownie kmp_adaptive_lock_info_t *p = lck->stats.prev;
18485e8470afSJim Cownie
18495e8470afSJim Cownie n->stats.prev = p;
18505e8470afSJim Cownie p->stats.next = n;
18515e8470afSJim Cownie }
18525e8470afSJim Cownie
__kmp_zero_speculative_stats(kmp_adaptive_lock_info_t * lck)18533041982dSJonathan Peyton static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
18548692e142SJonathan Peyton memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
18558692e142SJonathan Peyton sizeof(lck->stats));
18565e8470afSJim Cownie __kmp_remember_lock(lck);
18575e8470afSJim Cownie }
18585e8470afSJim Cownie
__kmp_add_stats(kmp_adaptive_lock_statistics_t * t,kmp_adaptive_lock_info_t * lck)18593041982dSJonathan Peyton static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
18603041982dSJonathan Peyton kmp_adaptive_lock_info_t *lck) {
18615e8470afSJim Cownie kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
18625e8470afSJim Cownie
18635e8470afSJim Cownie t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
18645e8470afSJim Cownie t->successfulSpeculations += s->successfulSpeculations;
18655e8470afSJim Cownie t->hardFailedSpeculations += s->hardFailedSpeculations;
18665e8470afSJim Cownie t->softFailedSpeculations += s->softFailedSpeculations;
18675e8470afSJim Cownie t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
18685e8470afSJim Cownie t->lemmingYields += s->lemmingYields;
18695e8470afSJim Cownie }
18705e8470afSJim Cownie
__kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t * lck)18713041982dSJonathan Peyton static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
18725e8470afSJim Cownie __kmp_acquire_bootstrap_lock(&chain_lock);
18735e8470afSJim Cownie
18745e8470afSJim Cownie __kmp_add_stats(&destroyedStats, lck);
18755e8470afSJim Cownie __kmp_forget_lock(lck);
18765e8470afSJim Cownie
18775e8470afSJim Cownie __kmp_release_bootstrap_lock(&chain_lock);
18785e8470afSJim Cownie }
18795e8470afSJim Cownie
percent(kmp_uint32 count,kmp_uint32 total)18803041982dSJonathan Peyton static float percent(kmp_uint32 count, kmp_uint32 total) {
18815e8470afSJim Cownie return (total == 0) ? 0.0 : (100.0 * count) / total;
18825e8470afSJim Cownie }
18835e8470afSJim Cownie
__kmp_print_speculative_stats()18843041982dSJonathan Peyton void __kmp_print_speculative_stats() {
18855e8470afSJim Cownie kmp_adaptive_lock_statistics_t total = destroyedStats;
18864cc4bb4cSJim Cownie kmp_adaptive_lock_info_t *lck;
18875e8470afSJim Cownie
18885e8470afSJim Cownie for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
18895e8470afSJim Cownie __kmp_add_stats(&total, lck);
18905e8470afSJim Cownie }
18915e8470afSJim Cownie kmp_adaptive_lock_statistics_t *t = &total;
18923041982dSJonathan Peyton kmp_uint32 totalSections =
18933041982dSJonathan Peyton t->nonSpeculativeAcquires + t->successfulSpeculations;
18943041982dSJonathan Peyton kmp_uint32 totalSpeculations = t->successfulSpeculations +
18953041982dSJonathan Peyton t->hardFailedSpeculations +
18965e8470afSJim Cownie t->softFailedSpeculations;
18978692e142SJonathan Peyton if (totalSections <= 0)
18988692e142SJonathan Peyton return;
18998692e142SJonathan Peyton
19005aafdd7bSPeyton, Jonathan L kmp_safe_raii_file_t statsFile;
19015aafdd7bSPeyton, Jonathan L if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
19025aafdd7bSPeyton, Jonathan L statsFile.set_stdout();
19035aafdd7bSPeyton, Jonathan L } else {
19045aafdd7bSPeyton, Jonathan L size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
19055aafdd7bSPeyton, Jonathan L char buffer[buffLen];
19065aafdd7bSPeyton, Jonathan L KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
19075aafdd7bSPeyton, Jonathan L (kmp_int32)getpid());
19085aafdd7bSPeyton, Jonathan L statsFile.open(buffer, "w");
19095aafdd7bSPeyton, Jonathan L }
19105e8470afSJim Cownie
19115e8470afSJim Cownie fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1912309b00a4SShilei Tian fprintf(statsFile,
1913309b00a4SShilei Tian " Lock parameters: \n"
19145e8470afSJim Cownie " max_soft_retries : %10d\n"
19155e8470afSJim Cownie " max_badness : %10d\n",
19165e8470afSJim Cownie __kmp_adaptive_backoff_params.max_soft_retries,
19175e8470afSJim Cownie __kmp_adaptive_backoff_params.max_badness);
19183041982dSJonathan Peyton fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
19193041982dSJonathan Peyton t->nonSpeculativeAcquireAttempts);
19203041982dSJonathan Peyton fprintf(statsFile, " Total critical sections : %10d\n",
19213041982dSJonathan Peyton totalSections);
19225e8470afSJim Cownie fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
19233041982dSJonathan Peyton t->successfulSpeculations,
19243041982dSJonathan Peyton percent(t->successfulSpeculations, totalSections));
19255e8470afSJim Cownie fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
19263041982dSJonathan Peyton t->nonSpeculativeAcquires,
19273041982dSJonathan Peyton percent(t->nonSpeculativeAcquires, totalSections));
19283041982dSJonathan Peyton fprintf(statsFile, " Lemming yields : %10d\n\n",
19293041982dSJonathan Peyton t->lemmingYields);
19305e8470afSJim Cownie
19313041982dSJonathan Peyton fprintf(statsFile, " Speculative acquire attempts : %10d\n",
19323041982dSJonathan Peyton totalSpeculations);
19335e8470afSJim Cownie fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
19343041982dSJonathan Peyton t->successfulSpeculations,
19353041982dSJonathan Peyton percent(t->successfulSpeculations, totalSpeculations));
19365e8470afSJim Cownie fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
19373041982dSJonathan Peyton t->softFailedSpeculations,
19383041982dSJonathan Peyton percent(t->softFailedSpeculations, totalSpeculations));
19395e8470afSJim Cownie fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
19403041982dSJonathan Peyton t->hardFailedSpeculations,
19413041982dSJonathan Peyton percent(t->hardFailedSpeculations, totalSpeculations));
19425e8470afSJim Cownie }
19435e8470afSJim Cownie
19445e8470afSJim Cownie #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
19455e8470afSJim Cownie #else
19465e8470afSJim Cownie #define KMP_INC_STAT(lck, stat)
19475e8470afSJim Cownie
19485e8470afSJim Cownie #endif // KMP_DEBUG_ADAPTIVE_LOCKS
19495e8470afSJim Cownie
__kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t * lck)19503041982dSJonathan Peyton static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
19515e8470afSJim Cownie // It is enough to check that the head_id is zero.
19525e8470afSJim Cownie // We don't also need to check the tail.
19535e8470afSJim Cownie bool res = lck->lk.head_id == 0;
19545e8470afSJim Cownie
19555e8470afSJim Cownie // We need a fence here, since we must ensure that no memory operations
19565e8470afSJim Cownie // from later in this thread float above that read.
1957*1234011bSJonathan Peyton #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
19585e8470afSJim Cownie _mm_mfence();
1959181b4bb3SJim Cownie #else
1960181b4bb3SJim Cownie __sync_synchronize();
19615e8470afSJim Cownie #endif
19625e8470afSJim Cownie
19635e8470afSJim Cownie return res;
19645e8470afSJim Cownie }
19655e8470afSJim Cownie
19665e8470afSJim Cownie // Functions for manipulating the badness
19675e8470afSJim Cownie static __inline void
__kmp_update_badness_after_success(kmp_adaptive_lock_t * lck)19683041982dSJonathan Peyton __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
19695e8470afSJim Cownie // Reset the badness to zero so we eagerly try to speculate again
19705e8470afSJim Cownie lck->lk.adaptive.badness = 0;
19715e8470afSJim Cownie KMP_INC_STAT(lck, successfulSpeculations);
19725e8470afSJim Cownie }
19735e8470afSJim Cownie
19745e8470afSJim Cownie // Create a bit mask with one more set bit.
__kmp_step_badness(kmp_adaptive_lock_t * lck)19753041982dSJonathan Peyton static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
19765e8470afSJim Cownie kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
19775e8470afSJim Cownie if (newBadness > lck->lk.adaptive.max_badness) {
19785e8470afSJim Cownie return;
19795e8470afSJim Cownie } else {
19805e8470afSJim Cownie lck->lk.adaptive.badness = newBadness;
19815e8470afSJim Cownie }
19825e8470afSJim Cownie }
19835e8470afSJim Cownie
19845e8470afSJim Cownie // Check whether speculation should be attempted.
1985e0665a90STerry Wilmarth KMP_ATTRIBUTE_TARGET_RTM
__kmp_should_speculate(kmp_adaptive_lock_t * lck,kmp_int32 gtid)19863041982dSJonathan Peyton static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
19873041982dSJonathan Peyton kmp_int32 gtid) {
19885e8470afSJim Cownie kmp_uint32 badness = lck->lk.adaptive.badness;
19895e8470afSJim Cownie kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
19905e8470afSJim Cownie int res = (attempts & badness) == 0;
19915e8470afSJim Cownie return res;
19925e8470afSJim Cownie }
19935e8470afSJim Cownie
19945e8470afSJim Cownie // Attempt to acquire only the speculative lock.
19955e8470afSJim Cownie // Does not back off to the non-speculative lock.
1996e0665a90STerry Wilmarth KMP_ATTRIBUTE_TARGET_RTM
__kmp_test_adaptive_lock_only(kmp_adaptive_lock_t * lck,kmp_int32 gtid)19973041982dSJonathan Peyton static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
19983041982dSJonathan Peyton kmp_int32 gtid) {
19995e8470afSJim Cownie int retries = lck->lk.adaptive.max_soft_retries;
20005e8470afSJim Cownie
20013041982dSJonathan Peyton // We don't explicitly count the start of speculation, rather we record the
20023041982dSJonathan Peyton // results (success, hard fail, soft fail). The sum of all of those is the
20033041982dSJonathan Peyton // total number of times we started speculation since all speculations must
20043041982dSJonathan Peyton // end one of those ways.
20053041982dSJonathan Peyton do {
20065e8470afSJim Cownie kmp_uint32 status = _xbegin();
20073041982dSJonathan Peyton // Switch this in to disable actual speculation but exercise at least some
20083041982dSJonathan Peyton // of the rest of the code. Useful for debugging...
20095e8470afSJim Cownie // kmp_uint32 status = _XABORT_NESTED;
20105e8470afSJim Cownie
20113041982dSJonathan Peyton if (status == _XBEGIN_STARTED) {
20123041982dSJonathan Peyton /* We have successfully started speculation. Check that no-one acquired
20133041982dSJonathan Peyton the lock for real between when we last looked and now. This also gets
20143041982dSJonathan Peyton the lock cache line into our read-set, which we need so that we'll
20153041982dSJonathan Peyton abort if anyone later claims it for real. */
20163041982dSJonathan Peyton if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
20173041982dSJonathan Peyton // Lock is now visibly acquired, so someone beat us to it. Abort the
20183041982dSJonathan Peyton // transaction so we'll restart from _xbegin with the failure status.
2019a03533d3SJonathan Peyton _xabort(0x01);
20205e8470afSJim Cownie KMP_ASSERT2(0, "should not get here");
20215e8470afSJim Cownie }
20225e8470afSJim Cownie return 1; // Lock has been acquired (speculatively)
20235e8470afSJim Cownie } else {
20245e8470afSJim Cownie // We have aborted, update the statistics
20253041982dSJonathan Peyton if (status & SOFT_ABORT_MASK) {
20265e8470afSJim Cownie KMP_INC_STAT(lck, softFailedSpeculations);
20275e8470afSJim Cownie // and loop round to retry.
20283041982dSJonathan Peyton } else {
20295e8470afSJim Cownie KMP_INC_STAT(lck, hardFailedSpeculations);
20305e8470afSJim Cownie // Give up if we had a hard failure.
20315e8470afSJim Cownie break;
20325e8470afSJim Cownie }
20335e8470afSJim Cownie }
20345e8470afSJim Cownie } while (retries--); // Loop while we have retries, and didn't fail hard.
20355e8470afSJim Cownie
20365e8470afSJim Cownie // Either we had a hard failure or we didn't succeed softly after
20375e8470afSJim Cownie // the full set of attempts, so back off the badness.
20385e8470afSJim Cownie __kmp_step_badness(lck);
20395e8470afSJim Cownie return 0;
20405e8470afSJim Cownie }
20415e8470afSJim Cownie
20423041982dSJonathan Peyton // Attempt to acquire the speculative lock, or back off to the non-speculative
20433041982dSJonathan Peyton // one if the speculative lock cannot be acquired.
20445e8470afSJim Cownie // We can succeed speculatively, non-speculatively, or fail.
__kmp_test_adaptive_lock(kmp_adaptive_lock_t * lck,kmp_int32 gtid)20453041982dSJonathan Peyton static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
20465e8470afSJim Cownie // First try to acquire the lock speculatively
20473041982dSJonathan Peyton if (__kmp_should_speculate(lck, gtid) &&
20483041982dSJonathan Peyton __kmp_test_adaptive_lock_only(lck, gtid))
20495e8470afSJim Cownie return 1;
20505e8470afSJim Cownie
20515e8470afSJim Cownie // Speculative acquisition failed, so try to acquire it non-speculatively.
20525e8470afSJim Cownie // Count the non-speculative acquire attempt
20535e8470afSJim Cownie lck->lk.adaptive.acquire_attempts++;
20545e8470afSJim Cownie
20555e8470afSJim Cownie // Use base, non-speculative lock.
20563041982dSJonathan Peyton if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
20575e8470afSJim Cownie KMP_INC_STAT(lck, nonSpeculativeAcquires);
20585e8470afSJim Cownie return 1; // Lock is acquired (non-speculatively)
20593041982dSJonathan Peyton } else {
20605e8470afSJim Cownie return 0; // Failed to acquire the lock, it's already visibly locked.
20615e8470afSJim Cownie }
20625e8470afSJim Cownie }
20635e8470afSJim Cownie
__kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck,kmp_int32 gtid)20643041982dSJonathan Peyton static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
20653041982dSJonathan Peyton kmp_int32 gtid) {
20665e8470afSJim Cownie char const *const func = "omp_test_lock";
20674cc4bb4cSJim Cownie if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
20685e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
20695e8470afSJim Cownie }
20705e8470afSJim Cownie
20715e8470afSJim Cownie int retval = __kmp_test_adaptive_lock(lck, gtid);
20725e8470afSJim Cownie
20734cc4bb4cSJim Cownie if (retval) {
20744cc4bb4cSJim Cownie lck->lk.qlk.owner_id = gtid + 1;
20755e8470afSJim Cownie }
20765e8470afSJim Cownie return retval;
20775e8470afSJim Cownie }
20785e8470afSJim Cownie
20793041982dSJonathan Peyton // Block until we can acquire a speculative, adaptive lock. We check whether we
20803041982dSJonathan Peyton // should be trying to speculate. If we should be, we check the real lock to see
20813041982dSJonathan Peyton // if it is free, and, if not, pause without attempting to acquire it until it
20823041982dSJonathan Peyton // is. Then we try the speculative acquire. This means that although we suffer
20833041982dSJonathan Peyton // from lemmings a little (because all we can't acquire the lock speculatively
20843041982dSJonathan Peyton // until the queue of threads waiting has cleared), we don't get into a state
20853041982dSJonathan Peyton // where we can never acquire the lock speculatively (because we force the queue
20863041982dSJonathan Peyton // to clear by preventing new arrivals from entering the queue). This does mean
20873041982dSJonathan Peyton // that when we're trying to break lemmings, the lock is no longer fair. However
20883041982dSJonathan Peyton // OpenMP makes no guarantee that its locks are fair, so this isn't a real
20893041982dSJonathan Peyton // problem.
__kmp_acquire_adaptive_lock(kmp_adaptive_lock_t * lck,kmp_int32 gtid)20903041982dSJonathan Peyton static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
20913041982dSJonathan Peyton kmp_int32 gtid) {
20923041982dSJonathan Peyton if (__kmp_should_speculate(lck, gtid)) {
20933041982dSJonathan Peyton if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
20945e8470afSJim Cownie if (__kmp_test_adaptive_lock_only(lck, gtid))
20955e8470afSJim Cownie return;
20965e8470afSJim Cownie // We tried speculation and failed, so give up.
20973041982dSJonathan Peyton } else {
20983041982dSJonathan Peyton // We can't try speculation until the lock is free, so we pause here
20993041982dSJonathan Peyton // (without suspending on the queueing lock, to allow it to drain, then
21003041982dSJonathan Peyton // try again. All other threads will also see the same result for
21013041982dSJonathan Peyton // shouldSpeculate, so will be doing the same if they try to claim the
21023041982dSJonathan Peyton // lock from now on.
21033041982dSJonathan Peyton while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
21045e8470afSJim Cownie KMP_INC_STAT(lck, lemmingYields);
2105e47d32f1SJonathan Peyton KMP_YIELD(TRUE);
21065e8470afSJim Cownie }
21075e8470afSJim Cownie
21085e8470afSJim Cownie if (__kmp_test_adaptive_lock_only(lck, gtid))
21095e8470afSJim Cownie return;
21105e8470afSJim Cownie }
21115e8470afSJim Cownie }
21125e8470afSJim Cownie
21135e8470afSJim Cownie // Speculative acquisition failed, so acquire it non-speculatively.
21145e8470afSJim Cownie // Count the non-speculative acquire attempt
21155e8470afSJim Cownie lck->lk.adaptive.acquire_attempts++;
21165e8470afSJim Cownie
21174cc4bb4cSJim Cownie __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
21185e8470afSJim Cownie // We have acquired the base lock, so count that.
21195e8470afSJim Cownie KMP_INC_STAT(lck, nonSpeculativeAcquires);
21205e8470afSJim Cownie }
21215e8470afSJim Cownie
__kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck,kmp_int32 gtid)21223041982dSJonathan Peyton static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
21233041982dSJonathan Peyton kmp_int32 gtid) {
21245e8470afSJim Cownie char const *const func = "omp_set_lock";
21254cc4bb4cSJim Cownie if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
21265e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
21275e8470afSJim Cownie }
21284cc4bb4cSJim Cownie if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
21295e8470afSJim Cownie KMP_FATAL(LockIsAlreadyOwned, func);
21305e8470afSJim Cownie }
21315e8470afSJim Cownie
21325e8470afSJim Cownie __kmp_acquire_adaptive_lock(lck, gtid);
21335e8470afSJim Cownie
21344cc4bb4cSJim Cownie lck->lk.qlk.owner_id = gtid + 1;
21355e8470afSJim Cownie }
21365e8470afSJim Cownie
2137e0665a90STerry Wilmarth KMP_ATTRIBUTE_TARGET_RTM
__kmp_release_adaptive_lock(kmp_adaptive_lock_t * lck,kmp_int32 gtid)21383041982dSJonathan Peyton static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
21393041982dSJonathan Peyton kmp_int32 gtid) {
21403041982dSJonathan Peyton if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
21413041982dSJonathan Peyton lck))) { // If the lock doesn't look claimed we must be speculating.
21425e8470afSJim Cownie // (Or the user's code is buggy and they're releasing without locking;
21435e8470afSJim Cownie // if we had XTEST we'd be able to check that case...)
21445e8470afSJim Cownie _xend(); // Exit speculation
21455e8470afSJim Cownie __kmp_update_badness_after_success(lck);
21463041982dSJonathan Peyton } else { // Since the lock *is* visibly locked we're not speculating,
21475e8470afSJim Cownie // so should use the underlying lock's release scheme.
21484cc4bb4cSJim Cownie __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
21495e8470afSJim Cownie }
21508d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
21515e8470afSJim Cownie }
21525e8470afSJim Cownie
__kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck,kmp_int32 gtid)21533041982dSJonathan Peyton static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
21543041982dSJonathan Peyton kmp_int32 gtid) {
21555e8470afSJim Cownie char const *const func = "omp_unset_lock";
21565e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
21574cc4bb4cSJim Cownie if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
21585e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
21595e8470afSJim Cownie }
21604cc4bb4cSJim Cownie if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
21615e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
21625e8470afSJim Cownie }
21634cc4bb4cSJim Cownie if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
21645e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
21655e8470afSJim Cownie }
21664cc4bb4cSJim Cownie lck->lk.qlk.owner_id = 0;
21675e8470afSJim Cownie __kmp_release_adaptive_lock(lck, gtid);
21688d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
21695e8470afSJim Cownie }
21705e8470afSJim Cownie
__kmp_init_adaptive_lock(kmp_adaptive_lock_t * lck)21713041982dSJonathan Peyton static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
21724cc4bb4cSJim Cownie __kmp_init_queuing_lock(GET_QLK_PTR(lck));
21735e8470afSJim Cownie lck->lk.adaptive.badness = 0;
21745e8470afSJim Cownie lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
21753041982dSJonathan Peyton lck->lk.adaptive.max_soft_retries =
21763041982dSJonathan Peyton __kmp_adaptive_backoff_params.max_soft_retries;
21775e8470afSJim Cownie lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
21785e8470afSJim Cownie #if KMP_DEBUG_ADAPTIVE_LOCKS
21795e8470afSJim Cownie __kmp_zero_speculative_stats(&lck->lk.adaptive);
21805e8470afSJim Cownie #endif
21815e8470afSJim Cownie KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
21825e8470afSJim Cownie }
21835e8470afSJim Cownie
__kmp_destroy_adaptive_lock(kmp_adaptive_lock_t * lck)21843041982dSJonathan Peyton static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
21855e8470afSJim Cownie #if KMP_DEBUG_ADAPTIVE_LOCKS
21865e8470afSJim Cownie __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
21875e8470afSJim Cownie #endif
21884cc4bb4cSJim Cownie __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
21895e8470afSJim Cownie // Nothing needed for the speculative part.
21905e8470afSJim Cownie }
21915e8470afSJim Cownie
__kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck)21923041982dSJonathan Peyton static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
21935e8470afSJim Cownie char const *const func = "omp_destroy_lock";
21944cc4bb4cSJim Cownie if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
21955e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
21965e8470afSJim Cownie }
21974cc4bb4cSJim Cownie if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
21985e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
21995e8470afSJim Cownie }
22005e8470afSJim Cownie __kmp_destroy_adaptive_lock(lck);
22015e8470afSJim Cownie }
22025e8470afSJim Cownie
22035e8470afSJim Cownie #endif // KMP_USE_ADAPTIVE_LOCKS
22045e8470afSJim Cownie
22055e8470afSJim Cownie /* ------------------------------------------------------------------------ */
22065e8470afSJim Cownie /* DRDPA ticket locks */
22075e8470afSJim Cownie /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
22085e8470afSJim Cownie
__kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t * lck)22093041982dSJonathan Peyton static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
221037e2ef54SJonathan Peyton return lck->lk.owner_id - 1;
22115e8470afSJim Cownie }
22125e8470afSJim Cownie
__kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t * lck)22133041982dSJonathan Peyton static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
22145e8470afSJim Cownie return lck->lk.depth_locked != -1;
22155e8470afSJim Cownie }
22165e8470afSJim Cownie
22170e6d4577SJonathan Peyton __forceinline static int
__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t * lck,kmp_int32 gtid)22183041982dSJonathan Peyton __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
221937e2ef54SJonathan Peyton kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
222037e2ef54SJonathan Peyton kmp_uint64 mask = lck->lk.mask; // atomic load
222137e2ef54SJonathan Peyton std::atomic<kmp_uint64> *polls = lck->lk.polls;
22225e8470afSJim Cownie
22235e8470afSJim Cownie #ifdef USE_LOCK_PROFILE
222437e2ef54SJonathan Peyton if (polls[ticket & mask] != ticket)
22255e8470afSJim Cownie __kmp_printf("LOCK CONTENTION: %p\n", lck);
22265e8470afSJim Cownie /* else __kmp_printf( "." );*/
22275e8470afSJim Cownie #endif /* USE_LOCK_PROFILE */
22285e8470afSJim Cownie
22295e8470afSJim Cownie // Now spin-wait, but reload the polls pointer and mask, in case the
22305e8470afSJim Cownie // polling area has been reconfigured. Unless it is reconfigured, the
22315e8470afSJim Cownie // reloads stay in L1 cache and are cheap.
22325e8470afSJim Cownie //
2233e47d32f1SJonathan Peyton // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2234e47d32f1SJonathan Peyton // The current implementation of KMP_WAIT doesn't allow for mask
22355e8470afSJim Cownie // and poll to be re-read every spin iteration.
22365e8470afSJim Cownie kmp_uint32 spins;
22372e02579aSTerry Wilmarth kmp_uint64 time;
22385e8470afSJim Cownie KMP_FSYNC_PREPARE(lck);
22395e8470afSJim Cownie KMP_INIT_YIELD(spins);
22402e02579aSTerry Wilmarth KMP_INIT_BACKOFF(time);
224137e2ef54SJonathan Peyton while (polls[ticket & mask] < ticket) { // atomic load
22422e02579aSTerry Wilmarth KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
22435e8470afSJim Cownie // Re-read the mask and the poll pointer from the lock structure.
22445e8470afSJim Cownie //
22455e8470afSJim Cownie // Make certain that "mask" is read before "polls" !!!
22465e8470afSJim Cownie //
22473041982dSJonathan Peyton // If another thread picks reconfigures the polling area and updates their
22483041982dSJonathan Peyton // values, and we get the new value of mask and the old polls pointer, we
22493041982dSJonathan Peyton // could access memory beyond the end of the old polling area.
225037e2ef54SJonathan Peyton mask = lck->lk.mask; // atomic load
225137e2ef54SJonathan Peyton polls = lck->lk.polls; // atomic load
22525e8470afSJim Cownie }
22535e8470afSJim Cownie
22545e8470afSJim Cownie // Critical section starts here
22555e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
22565e8470afSJim Cownie KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
22575e8470afSJim Cownie ticket, lck));
22585e8470afSJim Cownie lck->lk.now_serving = ticket; // non-volatile store
22595e8470afSJim Cownie
22605e8470afSJim Cownie // Deallocate a garbage polling area if we know that we are the last
22615e8470afSJim Cownie // thread that could possibly access it.
22625e8470afSJim Cownie //
22635e8470afSJim Cownie // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
22645e8470afSJim Cownie // ticket.
22655e8470afSJim Cownie if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
226637e2ef54SJonathan Peyton __kmp_free(lck->lk.old_polls);
22675e8470afSJim Cownie lck->lk.old_polls = NULL;
22685e8470afSJim Cownie lck->lk.cleanup_ticket = 0;
22695e8470afSJim Cownie }
22705e8470afSJim Cownie
22715e8470afSJim Cownie // Check to see if we should reconfigure the polling area.
22725e8470afSJim Cownie // If there is still a garbage polling area to be deallocated from a
22735e8470afSJim Cownie // previous reconfiguration, let a later thread reconfigure it.
22745e8470afSJim Cownie if (lck->lk.old_polls == NULL) {
22755e8470afSJim Cownie bool reconfigure = false;
227637e2ef54SJonathan Peyton std::atomic<kmp_uint64> *old_polls = polls;
22775e8470afSJim Cownie kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
22785e8470afSJim Cownie
22793041982dSJonathan Peyton if (TCR_4(__kmp_nth) >
22803041982dSJonathan Peyton (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
22815e8470afSJim Cownie // We are in oversubscription mode. Contract the polling area
22825e8470afSJim Cownie // down to a single location, if that hasn't been done already.
22835e8470afSJim Cownie if (num_polls > 1) {
22845e8470afSJim Cownie reconfigure = true;
22855e8470afSJim Cownie num_polls = TCR_4(lck->lk.num_polls);
22865e8470afSJim Cownie mask = 0;
22875e8470afSJim Cownie num_polls = 1;
228837e2ef54SJonathan Peyton polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
228937e2ef54SJonathan Peyton sizeof(*polls));
229037e2ef54SJonathan Peyton polls[0] = ticket;
22915e8470afSJim Cownie }
22923041982dSJonathan Peyton } else {
22935e8470afSJim Cownie // We are in under/fully subscribed mode. Check the number of
22945e8470afSJim Cownie // threads waiting on the lock. The size of the polling area
22955e8470afSJim Cownie // should be at least the number of threads waiting.
22965e8470afSJim Cownie kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
22975e8470afSJim Cownie if (num_waiting > num_polls) {
22985e8470afSJim Cownie kmp_uint32 old_num_polls = num_polls;
22995e8470afSJim Cownie reconfigure = true;
23005e8470afSJim Cownie do {
23015e8470afSJim Cownie mask = (mask << 1) | 1;
23025e8470afSJim Cownie num_polls *= 2;
23035e8470afSJim Cownie } while (num_polls <= num_waiting);
23045e8470afSJim Cownie
23055e8470afSJim Cownie // Allocate the new polling area, and copy the relevant portion
23065e8470afSJim Cownie // of the old polling area to the new area. __kmp_allocate()
23075e8470afSJim Cownie // zeroes the memory it allocates, and most of the old area is
23085e8470afSJim Cownie // just zero padding, so we only copy the release counters.
230937e2ef54SJonathan Peyton polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
231037e2ef54SJonathan Peyton sizeof(*polls));
23115e8470afSJim Cownie kmp_uint32 i;
23125e8470afSJim Cownie for (i = 0; i < old_num_polls; i++) {
231337e2ef54SJonathan Peyton polls[i].store(old_polls[i]);
23145e8470afSJim Cownie }
23155e8470afSJim Cownie }
23165e8470afSJim Cownie }
23175e8470afSJim Cownie
23185e8470afSJim Cownie if (reconfigure) {
23195e8470afSJim Cownie // Now write the updated fields back to the lock structure.
23205e8470afSJim Cownie //
23215e8470afSJim Cownie // Make certain that "polls" is written before "mask" !!!
23225e8470afSJim Cownie //
23233041982dSJonathan Peyton // If another thread picks up the new value of mask and the old polls
23243041982dSJonathan Peyton // pointer , it could access memory beyond the end of the old polling
23253041982dSJonathan Peyton // area.
23265e8470afSJim Cownie //
23275e8470afSJim Cownie // On x86, we need memory fences.
23283041982dSJonathan Peyton KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
23293041982dSJonathan Peyton "lock %p to %d polls\n",
23305e8470afSJim Cownie ticket, lck, num_polls));
23315e8470afSJim Cownie
233237e2ef54SJonathan Peyton lck->lk.old_polls = old_polls;
233337e2ef54SJonathan Peyton lck->lk.polls = polls; // atomic store
23345e8470afSJim Cownie
23355e8470afSJim Cownie KMP_MB();
23365e8470afSJim Cownie
233737e2ef54SJonathan Peyton lck->lk.num_polls = num_polls;
233837e2ef54SJonathan Peyton lck->lk.mask = mask; // atomic store
23395e8470afSJim Cownie
23405e8470afSJim Cownie KMP_MB();
23415e8470afSJim Cownie
23425e8470afSJim Cownie // Only after the new polling area and mask have been flushed
23435e8470afSJim Cownie // to main memory can we update the cleanup ticket field.
23445e8470afSJim Cownie //
23455e8470afSJim Cownie // volatile load / non-volatile store
234637e2ef54SJonathan Peyton lck->lk.cleanup_ticket = lck->lk.next_ticket;
23475e8470afSJim Cownie }
23485e8470afSJim Cownie }
23490e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
23505e8470afSJim Cownie }
23515e8470afSJim Cownie
__kmp_acquire_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)23523041982dSJonathan Peyton int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
235350fed047SJonas Hahnfeld int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
235450fed047SJonas Hahnfeld return retval;
23555e8470afSJim Cownie }
23565e8470afSJim Cownie
__kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)23573041982dSJonathan Peyton static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
23583041982dSJonathan Peyton kmp_int32 gtid) {
23595e8470afSJim Cownie char const *const func = "omp_set_lock";
23605e8470afSJim Cownie if (lck->lk.initialized != lck) {
23615e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
23625e8470afSJim Cownie }
23635e8470afSJim Cownie if (__kmp_is_drdpa_lock_nestable(lck)) {
23645e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
23655e8470afSJim Cownie }
23665e8470afSJim Cownie if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
23675e8470afSJim Cownie KMP_FATAL(LockIsAlreadyOwned, func);
23685e8470afSJim Cownie }
23695e8470afSJim Cownie
23705e8470afSJim Cownie __kmp_acquire_drdpa_lock(lck, gtid);
23715e8470afSJim Cownie
23725e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
23730e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
23745e8470afSJim Cownie }
23755e8470afSJim Cownie
__kmp_test_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)23763041982dSJonathan Peyton int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
23775e8470afSJim Cownie // First get a ticket, then read the polls pointer and the mask.
23785e8470afSJim Cownie // The polls pointer must be read before the mask!!! (See above)
237937e2ef54SJonathan Peyton kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
238037e2ef54SJonathan Peyton std::atomic<kmp_uint64> *polls = lck->lk.polls;
238137e2ef54SJonathan Peyton kmp_uint64 mask = lck->lk.mask; // atomic load
238237e2ef54SJonathan Peyton if (polls[ticket & mask] == ticket) {
23835e8470afSJim Cownie kmp_uint64 next_ticket = ticket + 1;
238437e2ef54SJonathan Peyton if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
23853041982dSJonathan Peyton next_ticket)) {
23865e8470afSJim Cownie KMP_FSYNC_ACQUIRED(lck);
23875e8470afSJim Cownie KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
23885e8470afSJim Cownie ticket, lck));
23895e8470afSJim Cownie lck->lk.now_serving = ticket; // non-volatile store
23905e8470afSJim Cownie
23913041982dSJonathan Peyton // Since no threads are waiting, there is no possibility that we would
23923041982dSJonathan Peyton // want to reconfigure the polling area. We might have the cleanup ticket
23933041982dSJonathan Peyton // value (which says that it is now safe to deallocate old_polls), but
23943041982dSJonathan Peyton // we'll let a later thread which calls __kmp_acquire_lock do that - this
23953041982dSJonathan Peyton // routine isn't supposed to block, and we would risk blocks if we called
23963041982dSJonathan Peyton // __kmp_free() to do the deallocation.
23975e8470afSJim Cownie return TRUE;
23985e8470afSJim Cownie }
23995e8470afSJim Cownie }
24005e8470afSJim Cownie return FALSE;
24015e8470afSJim Cownie }
24025e8470afSJim Cownie
__kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)24033041982dSJonathan Peyton static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
24043041982dSJonathan Peyton kmp_int32 gtid) {
24055e8470afSJim Cownie char const *const func = "omp_test_lock";
24065e8470afSJim Cownie if (lck->lk.initialized != lck) {
24075e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
24085e8470afSJim Cownie }
24095e8470afSJim Cownie if (__kmp_is_drdpa_lock_nestable(lck)) {
24105e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
24115e8470afSJim Cownie }
24125e8470afSJim Cownie
24135e8470afSJim Cownie int retval = __kmp_test_drdpa_lock(lck, gtid);
24145e8470afSJim Cownie
24154cc4bb4cSJim Cownie if (retval) {
24165e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
24175e8470afSJim Cownie }
24185e8470afSJim Cownie return retval;
24195e8470afSJim Cownie }
24205e8470afSJim Cownie
__kmp_release_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)24213041982dSJonathan Peyton int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
24223041982dSJonathan Peyton // Read the ticket value from the lock data struct, then the polls pointer and
24233041982dSJonathan Peyton // the mask. The polls pointer must be read before the mask!!! (See above)
242437e2ef54SJonathan Peyton kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
242537e2ef54SJonathan Peyton std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
242637e2ef54SJonathan Peyton kmp_uint64 mask = lck->lk.mask; // atomic load
24275e8470afSJim Cownie KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
24285e8470afSJim Cownie ticket - 1, lck));
24295e8470afSJim Cownie KMP_FSYNC_RELEASING(lck);
243037e2ef54SJonathan Peyton polls[ticket & mask] = ticket; // atomic store
24318d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
24325e8470afSJim Cownie }
24335e8470afSJim Cownie
__kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)24343041982dSJonathan Peyton static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
24353041982dSJonathan Peyton kmp_int32 gtid) {
24365e8470afSJim Cownie char const *const func = "omp_unset_lock";
24375e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
24385e8470afSJim Cownie if (lck->lk.initialized != lck) {
24395e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
24405e8470afSJim Cownie }
24415e8470afSJim Cownie if (__kmp_is_drdpa_lock_nestable(lck)) {
24425e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
24435e8470afSJim Cownie }
24445e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) == -1) {
24455e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
24465e8470afSJim Cownie }
24473041982dSJonathan Peyton if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
24483041982dSJonathan Peyton (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
24495e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
24505e8470afSJim Cownie }
24515e8470afSJim Cownie lck->lk.owner_id = 0;
24528d09facaSAndrey Churbanov return __kmp_release_drdpa_lock(lck, gtid);
24535e8470afSJim Cownie }
24545e8470afSJim Cownie
__kmp_init_drdpa_lock(kmp_drdpa_lock_t * lck)24553041982dSJonathan Peyton void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
24565e8470afSJim Cownie lck->lk.location = NULL;
24575e8470afSJim Cownie lck->lk.mask = 0;
24585e8470afSJim Cownie lck->lk.num_polls = 1;
245937e2ef54SJonathan Peyton lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
24603041982dSJonathan Peyton lck->lk.num_polls * sizeof(*(lck->lk.polls)));
24615e8470afSJim Cownie lck->lk.cleanup_ticket = 0;
24625e8470afSJim Cownie lck->lk.old_polls = NULL;
24635e8470afSJim Cownie lck->lk.next_ticket = 0;
24645e8470afSJim Cownie lck->lk.now_serving = 0;
24655e8470afSJim Cownie lck->lk.owner_id = 0; // no thread owns the lock.
24665e8470afSJim Cownie lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
24675e8470afSJim Cownie lck->lk.initialized = lck;
24685e8470afSJim Cownie
24695e8470afSJim Cownie KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
24705e8470afSJim Cownie }
24715e8470afSJim Cownie
__kmp_destroy_drdpa_lock(kmp_drdpa_lock_t * lck)24723041982dSJonathan Peyton void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
24735e8470afSJim Cownie lck->lk.initialized = NULL;
24745e8470afSJim Cownie lck->lk.location = NULL;
247537e2ef54SJonathan Peyton if (lck->lk.polls.load() != NULL) {
247637e2ef54SJonathan Peyton __kmp_free(lck->lk.polls.load());
24775e8470afSJim Cownie lck->lk.polls = NULL;
24785e8470afSJim Cownie }
24795e8470afSJim Cownie if (lck->lk.old_polls != NULL) {
248037e2ef54SJonathan Peyton __kmp_free(lck->lk.old_polls);
24815e8470afSJim Cownie lck->lk.old_polls = NULL;
24825e8470afSJim Cownie }
24835e8470afSJim Cownie lck->lk.mask = 0;
24845e8470afSJim Cownie lck->lk.num_polls = 0;
24855e8470afSJim Cownie lck->lk.cleanup_ticket = 0;
24865e8470afSJim Cownie lck->lk.next_ticket = 0;
24875e8470afSJim Cownie lck->lk.now_serving = 0;
24885e8470afSJim Cownie lck->lk.owner_id = 0;
24895e8470afSJim Cownie lck->lk.depth_locked = -1;
24905e8470afSJim Cownie }
24915e8470afSJim Cownie
__kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)24923041982dSJonathan Peyton static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
24935e8470afSJim Cownie char const *const func = "omp_destroy_lock";
24945e8470afSJim Cownie if (lck->lk.initialized != lck) {
24955e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
24965e8470afSJim Cownie }
24975e8470afSJim Cownie if (__kmp_is_drdpa_lock_nestable(lck)) {
24985e8470afSJim Cownie KMP_FATAL(LockNestableUsedAsSimple, func);
24995e8470afSJim Cownie }
25005e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) != -1) {
25015e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
25025e8470afSJim Cownie }
25035e8470afSJim Cownie __kmp_destroy_drdpa_lock(lck);
25045e8470afSJim Cownie }
25055e8470afSJim Cownie
25065e8470afSJim Cownie // nested drdpa ticket locks
25075e8470afSJim Cownie
__kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)25083041982dSJonathan Peyton int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
25095e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
25105e8470afSJim Cownie
25115e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
25125e8470afSJim Cownie lck->lk.depth_locked += 1;
25130e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_NEXT;
25143041982dSJonathan Peyton } else {
25155e8470afSJim Cownie __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
25165e8470afSJim Cownie KMP_MB();
25175e8470afSJim Cownie lck->lk.depth_locked = 1;
25185e8470afSJim Cownie KMP_MB();
25195e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
25200e6d4577SJonathan Peyton return KMP_LOCK_ACQUIRED_FIRST;
25215e8470afSJim Cownie }
25225e8470afSJim Cownie }
25235e8470afSJim Cownie
__kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)25243041982dSJonathan Peyton static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
25253041982dSJonathan Peyton kmp_int32 gtid) {
25265e8470afSJim Cownie char const *const func = "omp_set_nest_lock";
25275e8470afSJim Cownie if (lck->lk.initialized != lck) {
25285e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
25295e8470afSJim Cownie }
25305e8470afSJim Cownie if (!__kmp_is_drdpa_lock_nestable(lck)) {
25315e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
25325e8470afSJim Cownie }
25335e8470afSJim Cownie __kmp_acquire_nested_drdpa_lock(lck, gtid);
25345e8470afSJim Cownie }
25355e8470afSJim Cownie
__kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)25363041982dSJonathan Peyton int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
25375e8470afSJim Cownie int retval;
25385e8470afSJim Cownie
25395e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
25405e8470afSJim Cownie
25415e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
25425e8470afSJim Cownie retval = ++lck->lk.depth_locked;
25433041982dSJonathan Peyton } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
25445e8470afSJim Cownie retval = 0;
25453041982dSJonathan Peyton } else {
25465e8470afSJim Cownie KMP_MB();
25475e8470afSJim Cownie retval = lck->lk.depth_locked = 1;
25485e8470afSJim Cownie KMP_MB();
25495e8470afSJim Cownie lck->lk.owner_id = gtid + 1;
25505e8470afSJim Cownie }
25515e8470afSJim Cownie return retval;
25525e8470afSJim Cownie }
25535e8470afSJim Cownie
__kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)25543041982dSJonathan Peyton static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
25553041982dSJonathan Peyton kmp_int32 gtid) {
25565e8470afSJim Cownie char const *const func = "omp_test_nest_lock";
25575e8470afSJim Cownie if (lck->lk.initialized != lck) {
25585e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
25595e8470afSJim Cownie }
25605e8470afSJim Cownie if (!__kmp_is_drdpa_lock_nestable(lck)) {
25615e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
25625e8470afSJim Cownie }
25635e8470afSJim Cownie return __kmp_test_nested_drdpa_lock(lck, gtid);
25645e8470afSJim Cownie }
25655e8470afSJim Cownie
__kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)25663041982dSJonathan Peyton int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
25675e8470afSJim Cownie KMP_DEBUG_ASSERT(gtid >= 0);
25685e8470afSJim Cownie
25695e8470afSJim Cownie KMP_MB();
25705e8470afSJim Cownie if (--(lck->lk.depth_locked) == 0) {
25715e8470afSJim Cownie KMP_MB();
25725e8470afSJim Cownie lck->lk.owner_id = 0;
25735e8470afSJim Cownie __kmp_release_drdpa_lock(lck, gtid);
25748d09facaSAndrey Churbanov return KMP_LOCK_RELEASED;
25755e8470afSJim Cownie }
25768d09facaSAndrey Churbanov return KMP_LOCK_STILL_HELD;
25775e8470afSJim Cownie }
25785e8470afSJim Cownie
__kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)25793041982dSJonathan Peyton static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
25803041982dSJonathan Peyton kmp_int32 gtid) {
25815e8470afSJim Cownie char const *const func = "omp_unset_nest_lock";
25825e8470afSJim Cownie KMP_MB(); /* in case another processor initialized lock */
25835e8470afSJim Cownie if (lck->lk.initialized != lck) {
25845e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
25855e8470afSJim Cownie }
25865e8470afSJim Cownie if (!__kmp_is_drdpa_lock_nestable(lck)) {
25875e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
25885e8470afSJim Cownie }
25895e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) == -1) {
25905e8470afSJim Cownie KMP_FATAL(LockUnsettingFree, func);
25915e8470afSJim Cownie }
25925e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
25935e8470afSJim Cownie KMP_FATAL(LockUnsettingSetByAnother, func);
25945e8470afSJim Cownie }
25958d09facaSAndrey Churbanov return __kmp_release_nested_drdpa_lock(lck, gtid);
25965e8470afSJim Cownie }
25975e8470afSJim Cownie
__kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t * lck)25983041982dSJonathan Peyton void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
25995e8470afSJim Cownie __kmp_init_drdpa_lock(lck);
26005e8470afSJim Cownie lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
26015e8470afSJim Cownie }
26025e8470afSJim Cownie
__kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t * lck)26033041982dSJonathan Peyton void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
26045e8470afSJim Cownie __kmp_destroy_drdpa_lock(lck);
26055e8470afSJim Cownie lck->lk.depth_locked = 0;
26065e8470afSJim Cownie }
26075e8470afSJim Cownie
__kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)26083041982dSJonathan Peyton static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
26095e8470afSJim Cownie char const *const func = "omp_destroy_nest_lock";
26105e8470afSJim Cownie if (lck->lk.initialized != lck) {
26115e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
26125e8470afSJim Cownie }
26135e8470afSJim Cownie if (!__kmp_is_drdpa_lock_nestable(lck)) {
26145e8470afSJim Cownie KMP_FATAL(LockSimpleUsedAsNestable, func);
26155e8470afSJim Cownie }
26165e8470afSJim Cownie if (__kmp_get_drdpa_lock_owner(lck) != -1) {
26175e8470afSJim Cownie KMP_FATAL(LockStillOwned, func);
26185e8470afSJim Cownie }
26195e8470afSJim Cownie __kmp_destroy_nested_drdpa_lock(lck);
26205e8470afSJim Cownie }
26215e8470afSJim Cownie
26225e8470afSJim Cownie // access functions to fields which don't exist for all lock kinds.
26235e8470afSJim Cownie
__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t * lck)26243041982dSJonathan Peyton static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
26255e8470afSJim Cownie return lck->lk.location;
26265e8470afSJim Cownie }
26275e8470afSJim Cownie
__kmp_set_drdpa_lock_location(kmp_drdpa_lock_t * lck,const ident_t * loc)26283041982dSJonathan Peyton static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
26293041982dSJonathan Peyton const ident_t *loc) {
26305e8470afSJim Cownie lck->lk.location = loc;
26315e8470afSJim Cownie }
26325e8470afSJim Cownie
__kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t * lck)26333041982dSJonathan Peyton static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
26345e8470afSJim Cownie return lck->lk.flags;
26355e8470afSJim Cownie }
26365e8470afSJim Cownie
__kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t * lck,kmp_lock_flags_t flags)26373041982dSJonathan Peyton static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
26383041982dSJonathan Peyton kmp_lock_flags_t flags) {
26395e8470afSJim Cownie lck->lk.flags = flags;
26405e8470afSJim Cownie }
26415e8470afSJim Cownie
2642377aa40dSJonathan Peyton // Time stamp counter
2643377aa40dSJonathan Peyton #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644377aa40dSJonathan Peyton #define __kmp_tsc() __kmp_hardware_timestamp()
2645377aa40dSJonathan Peyton // Runtime's default backoff parameters
2646377aa40dSJonathan Peyton kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2647377aa40dSJonathan Peyton #else
2648377aa40dSJonathan Peyton // Use nanoseconds for other platforms
2649377aa40dSJonathan Peyton extern kmp_uint64 __kmp_now_nsec();
2650377aa40dSJonathan Peyton kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2651377aa40dSJonathan Peyton #define __kmp_tsc() __kmp_now_nsec()
2652377aa40dSJonathan Peyton #endif
2653377aa40dSJonathan Peyton
2654377aa40dSJonathan Peyton // A useful predicate for dealing with timestamps that may wrap.
26553041982dSJonathan Peyton // Is a before b? Since the timestamps may wrap, this is asking whether it's
2656377aa40dSJonathan Peyton // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2657377aa40dSJonathan Peyton // Times where going clockwise is less distance than going anti-clockwise
26583041982dSJonathan Peyton // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
26593041982dSJonathan Peyton // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
26603041982dSJonathan Peyton // signed(b) = 0 captures the actual difference
before(kmp_uint64 a,kmp_uint64 b)26613041982dSJonathan Peyton static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2662377aa40dSJonathan Peyton return ((kmp_int64)b - (kmp_int64)a) > 0;
2663377aa40dSJonathan Peyton }
2664377aa40dSJonathan Peyton
2665377aa40dSJonathan Peyton // Truncated binary exponential backoff function
__kmp_spin_backoff(kmp_backoff_t * boff)26663041982dSJonathan Peyton void __kmp_spin_backoff(kmp_backoff_t *boff) {
26673041982dSJonathan Peyton // We could flatten this loop, but making it a nested loop gives better result
2668377aa40dSJonathan Peyton kmp_uint32 i;
2669377aa40dSJonathan Peyton for (i = boff->step; i > 0; i--) {
2670377aa40dSJonathan Peyton kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
26712e02579aSTerry Wilmarth #if KMP_HAVE_UMWAIT
26722e02579aSTerry Wilmarth if (__kmp_umwait_enabled) {
26732e02579aSTerry Wilmarth __kmp_tpause(0, boff->min_tick);
26742e02579aSTerry Wilmarth } else {
26752e02579aSTerry Wilmarth #endif
2676377aa40dSJonathan Peyton do {
2677377aa40dSJonathan Peyton KMP_CPU_PAUSE();
2678377aa40dSJonathan Peyton } while (before(__kmp_tsc(), goal));
26792e02579aSTerry Wilmarth #if KMP_HAVE_UMWAIT
26802e02579aSTerry Wilmarth }
26812e02579aSTerry Wilmarth #endif
2682377aa40dSJonathan Peyton }
2683377aa40dSJonathan Peyton boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2684377aa40dSJonathan Peyton }
2685377aa40dSJonathan Peyton
26865c56fb55SAndrey Churbanov #if KMP_USE_DYNAMIC_LOCK
26875c56fb55SAndrey Churbanov
26883041982dSJonathan Peyton // Direct lock initializers. It simply writes a tag to the low 8 bits of the
26893041982dSJonathan Peyton // lock word.
__kmp_init_direct_lock(kmp_dyna_lock_t * lck,kmp_dyna_lockseq_t seq)26903041982dSJonathan Peyton static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
26913041982dSJonathan Peyton kmp_dyna_lockseq_t seq) {
2692a03533d3SJonathan Peyton TCW_4(*lck, KMP_GET_D_TAG(seq));
26933041982dSJonathan Peyton KA_TRACE(
26943041982dSJonathan Peyton 20,
26953041982dSJonathan Peyton ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
26965c56fb55SAndrey Churbanov }
26975c56fb55SAndrey Churbanov
2698dae13d81SJonathan Peyton #if KMP_USE_TSX
26995c56fb55SAndrey Churbanov
27005c56fb55SAndrey Churbanov // HLE lock functions - imported from the testbed runtime.
27015c56fb55SAndrey Churbanov #define HLE_ACQUIRE ".byte 0xf2;"
27025c56fb55SAndrey Churbanov #define HLE_RELEASE ".byte 0xf3;"
27035c56fb55SAndrey Churbanov
swap4(kmp_uint32 volatile * p,kmp_uint32 v)27043041982dSJonathan Peyton static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
27053041982dSJonathan Peyton __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
27065c56fb55SAndrey Churbanov return v;
27075c56fb55SAndrey Churbanov }
27085c56fb55SAndrey Churbanov
__kmp_destroy_hle_lock(kmp_dyna_lock_t * lck)27093041982dSJonathan Peyton static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
27105c56fb55SAndrey Churbanov
__kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t * lck)2711baad3f60SJonathan Peyton static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2712baad3f60SJonathan Peyton TCW_4(*lck, 0);
2713baad3f60SJonathan Peyton }
2714baad3f60SJonathan Peyton
__kmp_acquire_hle_lock(kmp_dyna_lock_t * lck,kmp_int32 gtid)27153041982dSJonathan Peyton static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2716f2d119ffSJonathan Peyton // Use gtid for KMP_LOCK_BUSY if necessary
2717f2d119ffSJonathan Peyton if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
27185c56fb55SAndrey Churbanov int delay = 1;
27195c56fb55SAndrey Churbanov do {
2720f2d119ffSJonathan Peyton while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
27215c56fb55SAndrey Churbanov for (int i = delay; i != 0; --i)
2722a03533d3SJonathan Peyton KMP_CPU_PAUSE();
27235c56fb55SAndrey Churbanov delay = ((delay << 1) | 1) & 7;
27245c56fb55SAndrey Churbanov }
2725f2d119ffSJonathan Peyton } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
27265c56fb55SAndrey Churbanov }
27275c56fb55SAndrey Churbanov }
27285c56fb55SAndrey Churbanov
__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t * lck,kmp_int32 gtid)27293041982dSJonathan Peyton static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
27303041982dSJonathan Peyton kmp_int32 gtid) {
27315c56fb55SAndrey Churbanov __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
27325c56fb55SAndrey Churbanov }
27335c56fb55SAndrey Churbanov
__kmp_release_hle_lock(kmp_dyna_lock_t * lck,kmp_int32 gtid)27343041982dSJonathan Peyton static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
27355c56fb55SAndrey Churbanov __asm__ volatile(HLE_RELEASE "movl %1,%0"
27365c56fb55SAndrey Churbanov : "=m"(*lck)
2737f2d119ffSJonathan Peyton : "r"(KMP_LOCK_FREE(hle))
27385c56fb55SAndrey Churbanov : "memory");
2739a03533d3SJonathan Peyton return KMP_LOCK_RELEASED;
27405c56fb55SAndrey Churbanov }
27415c56fb55SAndrey Churbanov
__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t * lck,kmp_int32 gtid)27423041982dSJonathan Peyton static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
27433041982dSJonathan Peyton kmp_int32 gtid) {
2744a03533d3SJonathan Peyton return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
27455c56fb55SAndrey Churbanov }
27465c56fb55SAndrey Churbanov
__kmp_test_hle_lock(kmp_dyna_lock_t * lck,kmp_int32 gtid)27473041982dSJonathan Peyton static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2748f2d119ffSJonathan Peyton return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
27495c56fb55SAndrey Churbanov }
27505c56fb55SAndrey Churbanov
__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t * lck,kmp_int32 gtid)27513041982dSJonathan Peyton static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
27523041982dSJonathan Peyton kmp_int32 gtid) {
27535c56fb55SAndrey Churbanov return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
27545c56fb55SAndrey Churbanov }
27555c56fb55SAndrey Churbanov
__kmp_init_rtm_queuing_lock(kmp_queuing_lock_t * lck)2756c3b5009aSHansang Bae static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2757dae13d81SJonathan Peyton __kmp_init_queuing_lock(lck);
2758dae13d81SJonathan Peyton }
27595c56fb55SAndrey Churbanov
__kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t * lck)2760c3b5009aSHansang Bae static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2761dae13d81SJonathan Peyton __kmp_destroy_queuing_lock(lck);
2762dae13d81SJonathan Peyton }
2763dae13d81SJonathan Peyton
2764c3b5009aSHansang Bae static void
__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck)2765c3b5009aSHansang Bae __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2766baad3f60SJonathan Peyton __kmp_destroy_queuing_lock_with_checks(lck);
2767baad3f60SJonathan Peyton }
2768baad3f60SJonathan Peyton
2769e0665a90STerry Wilmarth KMP_ATTRIBUTE_TARGET_RTM
__kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)2770c3b5009aSHansang Bae static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2771c3b5009aSHansang Bae kmp_int32 gtid) {
2772dae13d81SJonathan Peyton unsigned retries = 3, status;
2773dae13d81SJonathan Peyton do {
2774dae13d81SJonathan Peyton status = _xbegin();
2775dae13d81SJonathan Peyton if (status == _XBEGIN_STARTED) {
2776dae13d81SJonathan Peyton if (__kmp_is_unlocked_queuing_lock(lck))
2777dae13d81SJonathan Peyton return;
2778dae13d81SJonathan Peyton _xabort(0xff);
2779dae13d81SJonathan Peyton }
2780dae13d81SJonathan Peyton if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2781dae13d81SJonathan Peyton // Wait until lock becomes free
2782e47d32f1SJonathan Peyton while (!__kmp_is_unlocked_queuing_lock(lck)) {
2783e47d32f1SJonathan Peyton KMP_YIELD(TRUE);
2784e47d32f1SJonathan Peyton }
27853041982dSJonathan Peyton } else if (!(status & _XABORT_RETRY))
2786dae13d81SJonathan Peyton break;
2787dae13d81SJonathan Peyton } while (retries--);
2788dae13d81SJonathan Peyton
2789dae13d81SJonathan Peyton // Fall-back non-speculative lock (xchg)
2790dae13d81SJonathan Peyton __kmp_acquire_queuing_lock(lck, gtid);
2791dae13d81SJonathan Peyton }
2792dae13d81SJonathan Peyton
__kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)2793c3b5009aSHansang Bae static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
27943041982dSJonathan Peyton kmp_int32 gtid) {
2795c3b5009aSHansang Bae __kmp_acquire_rtm_queuing_lock(lck, gtid);
2796dae13d81SJonathan Peyton }
2797dae13d81SJonathan Peyton
2798e0665a90STerry Wilmarth KMP_ATTRIBUTE_TARGET_RTM
__kmp_release_rtm_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)2799c3b5009aSHansang Bae static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800c3b5009aSHansang Bae kmp_int32 gtid) {
2801dae13d81SJonathan Peyton if (__kmp_is_unlocked_queuing_lock(lck)) {
2802dae13d81SJonathan Peyton // Releasing from speculation
2803dae13d81SJonathan Peyton _xend();
28043041982dSJonathan Peyton } else {
2805dae13d81SJonathan Peyton // Releasing from a real lock
2806dae13d81SJonathan Peyton __kmp_release_queuing_lock(lck, gtid);
2807dae13d81SJonathan Peyton }
2808dae13d81SJonathan Peyton return KMP_LOCK_RELEASED;
2809dae13d81SJonathan Peyton }
2810dae13d81SJonathan Peyton
__kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)2811c3b5009aSHansang Bae static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
28123041982dSJonathan Peyton kmp_int32 gtid) {
2813c3b5009aSHansang Bae return __kmp_release_rtm_queuing_lock(lck, gtid);
2814dae13d81SJonathan Peyton }
2815dae13d81SJonathan Peyton
2816e0665a90STerry Wilmarth KMP_ATTRIBUTE_TARGET_RTM
__kmp_test_rtm_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)2817c3b5009aSHansang Bae static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2818c3b5009aSHansang Bae kmp_int32 gtid) {
2819dae13d81SJonathan Peyton unsigned retries = 3, status;
2820dae13d81SJonathan Peyton do {
2821dae13d81SJonathan Peyton status = _xbegin();
2822dae13d81SJonathan Peyton if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2823dae13d81SJonathan Peyton return 1;
2824dae13d81SJonathan Peyton }
2825dae13d81SJonathan Peyton if (!(status & _XABORT_RETRY))
2826dae13d81SJonathan Peyton break;
2827dae13d81SJonathan Peyton } while (retries--);
2828dae13d81SJonathan Peyton
2829c3b5009aSHansang Bae return __kmp_test_queuing_lock(lck, gtid);
2830dae13d81SJonathan Peyton }
2831dae13d81SJonathan Peyton
__kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)2832c3b5009aSHansang Bae static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
28333041982dSJonathan Peyton kmp_int32 gtid) {
2834c3b5009aSHansang Bae return __kmp_test_rtm_queuing_lock(lck, gtid);
2835c3b5009aSHansang Bae }
2836c3b5009aSHansang Bae
2837c3b5009aSHansang Bae // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2838c3b5009aSHansang Bae typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2839c3b5009aSHansang Bae
__kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t * lck)2840c3b5009aSHansang Bae static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2841c3b5009aSHansang Bae KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2842c3b5009aSHansang Bae }
2843c3b5009aSHansang Bae
__kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck)2844c3b5009aSHansang Bae static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2845c3b5009aSHansang Bae __kmp_destroy_rtm_spin_lock(lck);
2846c3b5009aSHansang Bae }
2847c3b5009aSHansang Bae
2848c3b5009aSHansang Bae KMP_ATTRIBUTE_TARGET_RTM
__kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2849c3b5009aSHansang Bae static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2850c3b5009aSHansang Bae kmp_int32 gtid) {
2851c3b5009aSHansang Bae unsigned retries = 3, status;
2852c3b5009aSHansang Bae kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2853c3b5009aSHansang Bae kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2854c3b5009aSHansang Bae do {
2855c3b5009aSHansang Bae status = _xbegin();
2856c3b5009aSHansang Bae if (status == _XBEGIN_STARTED) {
2857c3b5009aSHansang Bae if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2858c3b5009aSHansang Bae return KMP_LOCK_ACQUIRED_FIRST;
2859c3b5009aSHansang Bae _xabort(0xff);
2860c3b5009aSHansang Bae }
2861c3b5009aSHansang Bae if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2862c3b5009aSHansang Bae // Wait until lock becomes free
2863c3b5009aSHansang Bae while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2864c3b5009aSHansang Bae KMP_YIELD(TRUE);
2865c3b5009aSHansang Bae }
2866c3b5009aSHansang Bae } else if (!(status & _XABORT_RETRY))
2867c3b5009aSHansang Bae break;
2868c3b5009aSHansang Bae } while (retries--);
2869c3b5009aSHansang Bae
2870c3b5009aSHansang Bae // Fall-back spin lock
2871c3b5009aSHansang Bae KMP_FSYNC_PREPARE(lck);
2872c3b5009aSHansang Bae kmp_backoff_t backoff = __kmp_spin_backoff_params;
2873c3b5009aSHansang Bae while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2874c3b5009aSHansang Bae !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2875c3b5009aSHansang Bae __kmp_spin_backoff(&backoff);
2876c3b5009aSHansang Bae }
2877c3b5009aSHansang Bae KMP_FSYNC_ACQUIRED(lck);
2878c3b5009aSHansang Bae return KMP_LOCK_ACQUIRED_FIRST;
2879c3b5009aSHansang Bae }
2880c3b5009aSHansang Bae
__kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2881c3b5009aSHansang Bae static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2882c3b5009aSHansang Bae kmp_int32 gtid) {
2883c3b5009aSHansang Bae return __kmp_acquire_rtm_spin_lock(lck, gtid);
2884c3b5009aSHansang Bae }
2885c3b5009aSHansang Bae
2886c3b5009aSHansang Bae KMP_ATTRIBUTE_TARGET_RTM
__kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2887c3b5009aSHansang Bae static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2888c3b5009aSHansang Bae kmp_int32 gtid) {
2889c3b5009aSHansang Bae if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2890c3b5009aSHansang Bae // Releasing from speculation
2891c3b5009aSHansang Bae _xend();
2892c3b5009aSHansang Bae } else {
2893c3b5009aSHansang Bae // Releasing from a real lock
2894c3b5009aSHansang Bae KMP_FSYNC_RELEASING(lck);
2895c3b5009aSHansang Bae KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2896c3b5009aSHansang Bae }
2897c3b5009aSHansang Bae return KMP_LOCK_RELEASED;
2898c3b5009aSHansang Bae }
2899c3b5009aSHansang Bae
__kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2900c3b5009aSHansang Bae static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2901c3b5009aSHansang Bae kmp_int32 gtid) {
2902c3b5009aSHansang Bae return __kmp_release_rtm_spin_lock(lck, gtid);
2903c3b5009aSHansang Bae }
2904c3b5009aSHansang Bae
2905c3b5009aSHansang Bae KMP_ATTRIBUTE_TARGET_RTM
__kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2906c3b5009aSHansang Bae static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2907c3b5009aSHansang Bae unsigned retries = 3, status;
2908c3b5009aSHansang Bae kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2909c3b5009aSHansang Bae kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2910c3b5009aSHansang Bae do {
2911c3b5009aSHansang Bae status = _xbegin();
2912c3b5009aSHansang Bae if (status == _XBEGIN_STARTED &&
2913c3b5009aSHansang Bae KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2914c3b5009aSHansang Bae return TRUE;
2915c3b5009aSHansang Bae }
2916c3b5009aSHansang Bae if (!(status & _XABORT_RETRY))
2917c3b5009aSHansang Bae break;
2918c3b5009aSHansang Bae } while (retries--);
2919c3b5009aSHansang Bae
2920c3b5009aSHansang Bae if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2921c3b5009aSHansang Bae __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2922c3b5009aSHansang Bae KMP_FSYNC_ACQUIRED(lck);
2923c3b5009aSHansang Bae return TRUE;
2924c3b5009aSHansang Bae }
2925c3b5009aSHansang Bae return FALSE;
2926c3b5009aSHansang Bae }
2927c3b5009aSHansang Bae
__kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2928c3b5009aSHansang Bae static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2929c3b5009aSHansang Bae kmp_int32 gtid) {
2930c3b5009aSHansang Bae return __kmp_test_rtm_spin_lock(lck, gtid);
2931dae13d81SJonathan Peyton }
2932dae13d81SJonathan Peyton
2933dae13d81SJonathan Peyton #endif // KMP_USE_TSX
2934dae13d81SJonathan Peyton
29353041982dSJonathan Peyton // Entry functions for indirect locks (first element of direct lock jump tables)
29363041982dSJonathan Peyton static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
29373041982dSJonathan Peyton kmp_dyna_lockseq_t tag);
29385c56fb55SAndrey Churbanov static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
293982e94a59SJoachim Protze static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2940a03533d3SJonathan Peyton static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
29415c56fb55SAndrey Churbanov static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
294282e94a59SJoachim Protze static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
29433041982dSJonathan Peyton kmp_int32);
29443041982dSJonathan Peyton static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
29453041982dSJonathan Peyton kmp_int32);
29463041982dSJonathan Peyton static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
29473041982dSJonathan Peyton kmp_int32);
29485c56fb55SAndrey Churbanov
2949fce39725SJonathan Peyton // Lock function definitions for the union parameter type
2950fce39725SJonathan Peyton #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2951fce39725SJonathan Peyton
2952fce39725SJonathan Peyton #define expand1(lk, op) \
2953fce39725SJonathan Peyton static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2954fce39725SJonathan Peyton __kmp_##op##_##lk##_##lock(&lock->lk); \
2955fce39725SJonathan Peyton }
2956fce39725SJonathan Peyton #define expand2(lk, op) \
2957fce39725SJonathan Peyton static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2958fce39725SJonathan Peyton kmp_int32 gtid) { \
2959fce39725SJonathan Peyton return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2960fce39725SJonathan Peyton }
2961fce39725SJonathan Peyton #define expand3(lk, op) \
2962fce39725SJonathan Peyton static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2963fce39725SJonathan Peyton kmp_lock_flags_t flags) { \
2964fce39725SJonathan Peyton __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2965fce39725SJonathan Peyton }
2966fce39725SJonathan Peyton #define expand4(lk, op) \
2967fce39725SJonathan Peyton static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2968fce39725SJonathan Peyton const ident_t *loc) { \
2969fce39725SJonathan Peyton __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2970fce39725SJonathan Peyton }
2971fce39725SJonathan Peyton
2972fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand1, init)
2973fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2974fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand1, destroy)
2975fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2976fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand2, acquire)
2977fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2978fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand2, release)
2979fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2980fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand2, test)
2981fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2982fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand3, )
2983fce39725SJonathan Peyton KMP_FOREACH_LOCK_KIND(expand4, )
2984fce39725SJonathan Peyton
2985fce39725SJonathan Peyton #undef expand1
2986fce39725SJonathan Peyton #undef expand2
2987fce39725SJonathan Peyton #undef expand3
2988fce39725SJonathan Peyton #undef expand4
2989fce39725SJonathan Peyton
29903041982dSJonathan Peyton // Jump tables for the indirect lock functions
29913041982dSJonathan Peyton // Only fill in the odd entries, that avoids the need to shift out the low bit
29925c56fb55SAndrey Churbanov
2993a03533d3SJonathan Peyton // init functions
2994a03533d3SJonathan Peyton #define expand(l, op) 0, __kmp_init_direct_lock,
29953041982dSJonathan Peyton void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
29963041982dSJonathan Peyton __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2997a03533d3SJonathan Peyton #undef expand
29985c56fb55SAndrey Churbanov
2999a03533d3SJonathan Peyton // destroy functions
3000a03533d3SJonathan Peyton #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3001baad3f60SJonathan Peyton static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
3002baad3f60SJonathan Peyton __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3003baad3f60SJonathan Peyton #undef expand
3004baad3f60SJonathan Peyton #define expand(l, op) \
3005baad3f60SJonathan Peyton 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
3006baad3f60SJonathan Peyton static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
30073041982dSJonathan Peyton __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3008a03533d3SJonathan Peyton #undef expand
30095c56fb55SAndrey Churbanov
3010a03533d3SJonathan Peyton // set/acquire functions
30113041982dSJonathan Peyton #define expand(l, op) \
301282e94a59SJoachim Protze 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
301382e94a59SJoachim Protze static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
30143041982dSJonathan Peyton __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3015a03533d3SJonathan Peyton #undef expand
30163041982dSJonathan Peyton #define expand(l, op) \
301782e94a59SJoachim Protze 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
301882e94a59SJoachim Protze static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
30193041982dSJonathan Peyton __kmp_set_indirect_lock_with_checks, 0,
30203041982dSJonathan Peyton KMP_FOREACH_D_LOCK(expand, acquire)};
3021a03533d3SJonathan Peyton #undef expand
3022a03533d3SJonathan Peyton
3023a03533d3SJonathan Peyton // unset/release and test functions
30243041982dSJonathan Peyton #define expand(l, op) \
30253041982dSJonathan Peyton 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
30263041982dSJonathan Peyton static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
30273041982dSJonathan Peyton __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
30283041982dSJonathan Peyton static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
30293041982dSJonathan Peyton __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3030a03533d3SJonathan Peyton #undef expand
30313041982dSJonathan Peyton #define expand(l, op) \
30323041982dSJonathan Peyton 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
30333041982dSJonathan Peyton static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
30343041982dSJonathan Peyton __kmp_unset_indirect_lock_with_checks, 0,
30353041982dSJonathan Peyton KMP_FOREACH_D_LOCK(expand, release)};
30363041982dSJonathan Peyton static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
30373041982dSJonathan Peyton __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3038a03533d3SJonathan Peyton #undef expand
30395c56fb55SAndrey Churbanov
30405c56fb55SAndrey Churbanov // Exposes only one set of jump tables (*lock or *lock_with_checks).
30414d77e50eSJonas Hahnfeld void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
30424d77e50eSJonas Hahnfeld int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
30434d77e50eSJonas Hahnfeld int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
30444d77e50eSJonas Hahnfeld int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
30455c56fb55SAndrey Churbanov
30463041982dSJonathan Peyton // Jump tables for the indirect lock functions
3047a03533d3SJonathan Peyton #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
30483041982dSJonathan Peyton void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
30493041982dSJonathan Peyton KMP_FOREACH_I_LOCK(expand, init)};
3050baad3f60SJonathan Peyton #undef expand
3051baad3f60SJonathan Peyton
3052baad3f60SJonathan Peyton #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3053baad3f60SJonathan Peyton static void (*indirect_destroy[])(kmp_user_lock_p) = {
3054baad3f60SJonathan Peyton KMP_FOREACH_I_LOCK(expand, destroy)};
3055baad3f60SJonathan Peyton #undef expand
3056baad3f60SJonathan Peyton #define expand(l, op) \
3057baad3f60SJonathan Peyton (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3058baad3f60SJonathan Peyton static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
30593041982dSJonathan Peyton KMP_FOREACH_I_LOCK(expand, destroy)};
3060a03533d3SJonathan Peyton #undef expand
30615c56fb55SAndrey Churbanov
3062a03533d3SJonathan Peyton // set/acquire functions
30633041982dSJonathan Peyton #define expand(l, op) \
306482e94a59SJoachim Protze (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
306582e94a59SJoachim Protze static int (*indirect_set[])(kmp_user_lock_p,
306682e94a59SJoachim Protze kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3067a03533d3SJonathan Peyton #undef expand
30683041982dSJonathan Peyton #define expand(l, op) \
306982e94a59SJoachim Protze (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
307082e94a59SJoachim Protze static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
30713041982dSJonathan Peyton KMP_FOREACH_I_LOCK(expand, acquire)};
3072a03533d3SJonathan Peyton #undef expand
30735c56fb55SAndrey Churbanov
3074a03533d3SJonathan Peyton // unset/release and test functions
30753041982dSJonathan Peyton #define expand(l, op) \
30763041982dSJonathan Peyton (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
30773041982dSJonathan Peyton static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
30783041982dSJonathan Peyton KMP_FOREACH_I_LOCK(expand, release)};
30793041982dSJonathan Peyton static int (*indirect_test[])(kmp_user_lock_p,
30803041982dSJonathan Peyton kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3081a03533d3SJonathan Peyton #undef expand
30823041982dSJonathan Peyton #define expand(l, op) \
30833041982dSJonathan Peyton (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
30843041982dSJonathan Peyton static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
30853041982dSJonathan Peyton KMP_FOREACH_I_LOCK(expand, release)};
30863041982dSJonathan Peyton static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
30873041982dSJonathan Peyton KMP_FOREACH_I_LOCK(expand, test)};
3088a03533d3SJonathan Peyton #undef expand
30895c56fb55SAndrey Churbanov
3090a03533d3SJonathan Peyton // Exposes only one jump tables (*lock or *lock_with_checks).
30914d77e50eSJonas Hahnfeld void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
30924d77e50eSJonas Hahnfeld int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
30934d77e50eSJonas Hahnfeld int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
30944d77e50eSJonas Hahnfeld int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
30955c56fb55SAndrey Churbanov
30965c56fb55SAndrey Churbanov // Lock index table.
3097dae13d81SJonathan Peyton kmp_indirect_lock_table_t __kmp_i_lock_table;
30985c56fb55SAndrey Churbanov
30995c56fb55SAndrey Churbanov // Size of indirect locks.
3100dae13d81SJonathan Peyton static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
31015c56fb55SAndrey Churbanov
31025c56fb55SAndrey Churbanov // Jump tables for lock accessor/modifier.
31033041982dSJonathan Peyton void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
31043041982dSJonathan Peyton const ident_t *) = {0};
31053041982dSJonathan Peyton void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
31063041982dSJonathan Peyton kmp_lock_flags_t) = {0};
31073041982dSJonathan Peyton const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
31083041982dSJonathan Peyton kmp_user_lock_p) = {0};
31093041982dSJonathan Peyton kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
31103041982dSJonathan Peyton kmp_user_lock_p) = {0};
31115c56fb55SAndrey Churbanov
31125c56fb55SAndrey Churbanov // Use different lock pools for different lock types.
3113f2d119ffSJonathan Peyton static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
31145c56fb55SAndrey Churbanov
31153041982dSJonathan Peyton // User lock allocator for dynamically dispatched indirect locks. Every entry of
311642016791SKazuaki Ishizaki // the indirect lock table holds the address and type of the allocated indirect
31173041982dSJonathan Peyton // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
31183041982dSJonathan Peyton // full. A destroyed indirect lock object is returned to the reusable pool of
31193041982dSJonathan Peyton // locks, unique to each lock type.
__kmp_allocate_indirect_lock(void ** user_lock,kmp_int32 gtid,kmp_indirect_locktag_t tag)31203041982dSJonathan Peyton kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
31213041982dSJonathan Peyton kmp_int32 gtid,
31223041982dSJonathan Peyton kmp_indirect_locktag_t tag) {
31235c56fb55SAndrey Churbanov kmp_indirect_lock_t *lck;
31241e45cd75SPeyton, Jonathan L kmp_lock_index_t idx, table_idx;
31255c56fb55SAndrey Churbanov
31265c56fb55SAndrey Churbanov __kmp_acquire_lock(&__kmp_global_lock, gtid);
31275c56fb55SAndrey Churbanov
31285c56fb55SAndrey Churbanov if (__kmp_indirect_lock_pool[tag] != NULL) {
3129dae13d81SJonathan Peyton // Reuse the allocated and destroyed lock object
31305c56fb55SAndrey Churbanov lck = __kmp_indirect_lock_pool[tag];
31315c56fb55SAndrey Churbanov if (OMP_LOCK_T_SIZE < sizeof(void *))
31325c56fb55SAndrey Churbanov idx = lck->lock->pool.index;
31335c56fb55SAndrey Churbanov __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
31343041982dSJonathan Peyton KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
31353041982dSJonathan Peyton lck));
31365c56fb55SAndrey Churbanov } else {
31371e45cd75SPeyton, Jonathan L kmp_uint32 row, col;
31381e45cd75SPeyton, Jonathan L kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
31391e45cd75SPeyton, Jonathan L idx = 0;
31401e45cd75SPeyton, Jonathan L // Find location in list of lock tables to put new lock
31411e45cd75SPeyton, Jonathan L while (1) {
31421e45cd75SPeyton, Jonathan L table_idx = lock_table->next; // index within this table
31431e45cd75SPeyton, Jonathan L idx += lock_table->next; // global index within list of tables
31441e45cd75SPeyton, Jonathan L if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
31451e45cd75SPeyton, Jonathan L row = table_idx / KMP_I_LOCK_CHUNK;
31461e45cd75SPeyton, Jonathan L col = table_idx % KMP_I_LOCK_CHUNK;
31471e45cd75SPeyton, Jonathan L // Allocate a new row of locks if necessary
31481e45cd75SPeyton, Jonathan L if (!lock_table->table[row]) {
31491e45cd75SPeyton, Jonathan L lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
31501e45cd75SPeyton, Jonathan L sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3151dae13d81SJonathan Peyton }
31521e45cd75SPeyton, Jonathan L break;
31531e45cd75SPeyton, Jonathan L }
31541e45cd75SPeyton, Jonathan L // Allocate a new lock table if necessary with double the capacity
31551e45cd75SPeyton, Jonathan L if (!lock_table->next_table) {
31561e45cd75SPeyton, Jonathan L kmp_indirect_lock_table_t *next_table =
31571e45cd75SPeyton, Jonathan L (kmp_indirect_lock_table_t *)__kmp_allocate(
31581e45cd75SPeyton, Jonathan L sizeof(kmp_indirect_lock_table_t));
31591e45cd75SPeyton, Jonathan L next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
31601e45cd75SPeyton, Jonathan L sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
31611e45cd75SPeyton, Jonathan L next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
31621e45cd75SPeyton, Jonathan L next_table->next = 0;
31631e45cd75SPeyton, Jonathan L next_table->next_table = nullptr;
31641e45cd75SPeyton, Jonathan L lock_table->next_table = next_table;
31651e45cd75SPeyton, Jonathan L }
31661e45cd75SPeyton, Jonathan L lock_table = lock_table->next_table;
31671e45cd75SPeyton, Jonathan L KMP_ASSERT(lock_table);
31681e45cd75SPeyton, Jonathan L }
31691e45cd75SPeyton, Jonathan L lock_table->next++;
31701e45cd75SPeyton, Jonathan L
31711e45cd75SPeyton, Jonathan L lck = &lock_table->table[row][col];
3172dae13d81SJonathan Peyton // Allocate a new base lock object
31735c56fb55SAndrey Churbanov lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
31743041982dSJonathan Peyton KA_TRACE(20,
31753041982dSJonathan Peyton ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
31765c56fb55SAndrey Churbanov }
31775c56fb55SAndrey Churbanov
31785c56fb55SAndrey Churbanov __kmp_release_lock(&__kmp_global_lock, gtid);
31795c56fb55SAndrey Churbanov
31805c56fb55SAndrey Churbanov lck->type = tag;
31815c56fb55SAndrey Churbanov
31825c56fb55SAndrey Churbanov if (OMP_LOCK_T_SIZE < sizeof(void *)) {
31833041982dSJonathan Peyton *((kmp_lock_index_t *)user_lock) = idx
31843041982dSJonathan Peyton << 1; // indirect lock word must be even
31855c56fb55SAndrey Churbanov } else {
31865c56fb55SAndrey Churbanov *((kmp_indirect_lock_t **)user_lock) = lck;
31875c56fb55SAndrey Churbanov }
31885c56fb55SAndrey Churbanov
31895c56fb55SAndrey Churbanov return lck;
31905c56fb55SAndrey Churbanov }
31915c56fb55SAndrey Churbanov
31925c56fb55SAndrey Churbanov // User lock lookup for dynamically dispatched locks.
31933041982dSJonathan Peyton static __forceinline kmp_indirect_lock_t *
__kmp_lookup_indirect_lock(void ** user_lock,const char * func)31943041982dSJonathan Peyton __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
31955c56fb55SAndrey Churbanov if (__kmp_env_consistency_check) {
31965c56fb55SAndrey Churbanov kmp_indirect_lock_t *lck = NULL;
31975c56fb55SAndrey Churbanov if (user_lock == NULL) {
31985c56fb55SAndrey Churbanov KMP_FATAL(LockIsUninitialized, func);
31995c56fb55SAndrey Churbanov }
32005c56fb55SAndrey Churbanov if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3201f2d119ffSJonathan Peyton kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
32021e45cd75SPeyton, Jonathan L lck = __kmp_get_i_lock(idx);
32035c56fb55SAndrey Churbanov } else {
32045c56fb55SAndrey Churbanov lck = *((kmp_indirect_lock_t **)user_lock);
32055c56fb55SAndrey Churbanov }
32065c56fb55SAndrey Churbanov if (lck == NULL) {
32075c56fb55SAndrey Churbanov KMP_FATAL(LockIsUninitialized, func);
32085c56fb55SAndrey Churbanov }
32095c56fb55SAndrey Churbanov return lck;
32105c56fb55SAndrey Churbanov } else {
32115c56fb55SAndrey Churbanov if (OMP_LOCK_T_SIZE < sizeof(void *)) {
32121e45cd75SPeyton, Jonathan L return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
32135c56fb55SAndrey Churbanov } else {
32145c56fb55SAndrey Churbanov return *((kmp_indirect_lock_t **)user_lock);
32155c56fb55SAndrey Churbanov }
32165c56fb55SAndrey Churbanov }
32175c56fb55SAndrey Churbanov }
32185c56fb55SAndrey Churbanov
__kmp_init_indirect_lock(kmp_dyna_lock_t * lock,kmp_dyna_lockseq_t seq)32193041982dSJonathan Peyton static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
32203041982dSJonathan Peyton kmp_dyna_lockseq_t seq) {
32215c56fb55SAndrey Churbanov #if KMP_USE_ADAPTIVE_LOCKS
3222343b9e85SPeyton, Jonathan L if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
32235c56fb55SAndrey Churbanov KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
32245c56fb55SAndrey Churbanov seq = lockseq_queuing;
32255c56fb55SAndrey Churbanov }
32265c56fb55SAndrey Churbanov #endif
3227dae13d81SJonathan Peyton #if KMP_USE_TSX
3228343b9e85SPeyton, Jonathan L if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3229dae13d81SJonathan Peyton seq = lockseq_queuing;
3230dae13d81SJonathan Peyton }
3231dae13d81SJonathan Peyton #endif
3232f2d119ffSJonathan Peyton kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
32333041982dSJonathan Peyton kmp_indirect_lock_t *l =
32343041982dSJonathan Peyton __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3235f2d119ffSJonathan Peyton KMP_I_LOCK_FUNC(l, init)(l->lock);
32363041982dSJonathan Peyton KA_TRACE(
32373041982dSJonathan Peyton 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
32383041982dSJonathan Peyton seq));
32395c56fb55SAndrey Churbanov }
32405c56fb55SAndrey Churbanov
__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)32413041982dSJonathan Peyton static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
32425c56fb55SAndrey Churbanov kmp_uint32 gtid = __kmp_entry_gtid();
32433041982dSJonathan Peyton kmp_indirect_lock_t *l =
32443041982dSJonathan Peyton __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3245f2d119ffSJonathan Peyton KMP_I_LOCK_FUNC(l, destroy)(l->lock);
32465c56fb55SAndrey Churbanov kmp_indirect_locktag_t tag = l->type;
32475c56fb55SAndrey Churbanov
32485c56fb55SAndrey Churbanov __kmp_acquire_lock(&__kmp_global_lock, gtid);
32495c56fb55SAndrey Churbanov
32505c56fb55SAndrey Churbanov // Use the base lock's space to keep the pool chain.
32515c56fb55SAndrey Churbanov l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
32525c56fb55SAndrey Churbanov if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3253f2d119ffSJonathan Peyton l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
32545c56fb55SAndrey Churbanov }
32555c56fb55SAndrey Churbanov __kmp_indirect_lock_pool[tag] = l;
32565c56fb55SAndrey Churbanov
32575c56fb55SAndrey Churbanov __kmp_release_lock(&__kmp_global_lock, gtid);
32585c56fb55SAndrey Churbanov }
32595c56fb55SAndrey Churbanov
__kmp_set_indirect_lock(kmp_dyna_lock_t * lock,kmp_int32 gtid)326082e94a59SJoachim Protze static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261f2d119ffSJonathan Peyton kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
326282e94a59SJoachim Protze return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
32635c56fb55SAndrey Churbanov }
32645c56fb55SAndrey Churbanov
__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock,kmp_int32 gtid)32653041982dSJonathan Peyton static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266f2d119ffSJonathan Peyton kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267a03533d3SJonathan Peyton return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
32685c56fb55SAndrey Churbanov }
32695c56fb55SAndrey Churbanov
__kmp_test_indirect_lock(kmp_dyna_lock_t * lock,kmp_int32 gtid)32703041982dSJonathan Peyton static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3271f2d119ffSJonathan Peyton kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3272f2d119ffSJonathan Peyton return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
32735c56fb55SAndrey Churbanov }
32745c56fb55SAndrey Churbanov
__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock,kmp_int32 gtid)327582e94a59SJoachim Protze static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
32763041982dSJonathan Peyton kmp_int32 gtid) {
32773041982dSJonathan Peyton kmp_indirect_lock_t *l =
32783041982dSJonathan Peyton __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
327982e94a59SJoachim Protze return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
32805c56fb55SAndrey Churbanov }
32815c56fb55SAndrey Churbanov
__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock,kmp_int32 gtid)32823041982dSJonathan Peyton static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
32833041982dSJonathan Peyton kmp_int32 gtid) {
32843041982dSJonathan Peyton kmp_indirect_lock_t *l =
32853041982dSJonathan Peyton __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3286a03533d3SJonathan Peyton return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
32875c56fb55SAndrey Churbanov }
32885c56fb55SAndrey Churbanov
__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock,kmp_int32 gtid)32893041982dSJonathan Peyton static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
32903041982dSJonathan Peyton kmp_int32 gtid) {
32913041982dSJonathan Peyton kmp_indirect_lock_t *l =
32923041982dSJonathan Peyton __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3293f2d119ffSJonathan Peyton return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
32945c56fb55SAndrey Churbanov }
32955c56fb55SAndrey Churbanov
32965c56fb55SAndrey Churbanov kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
32975c56fb55SAndrey Churbanov
3298de4749b7SJonathan Peyton // This is used only in kmp_error.cpp when consistency checking is on.
__kmp_get_user_lock_owner(kmp_user_lock_p lck,kmp_uint32 seq)32993041982dSJonathan Peyton kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
33005c56fb55SAndrey Churbanov switch (seq) {
33015c56fb55SAndrey Churbanov case lockseq_tas:
33025c56fb55SAndrey Churbanov case lockseq_nested_tas:
33035c56fb55SAndrey Churbanov return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3304fb043fdfSPaul Osmialowski #if KMP_USE_FUTEX
33055c56fb55SAndrey Churbanov case lockseq_futex:
33065c56fb55SAndrey Churbanov case lockseq_nested_futex:
33075c56fb55SAndrey Churbanov return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
33085c56fb55SAndrey Churbanov #endif
33095c56fb55SAndrey Churbanov case lockseq_ticket:
33105c56fb55SAndrey Churbanov case lockseq_nested_ticket:
33115c56fb55SAndrey Churbanov return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
33125c56fb55SAndrey Churbanov case lockseq_queuing:
33135c56fb55SAndrey Churbanov case lockseq_nested_queuing:
33145c56fb55SAndrey Churbanov #if KMP_USE_ADAPTIVE_LOCKS
33155c56fb55SAndrey Churbanov case lockseq_adaptive:
33165c56fb55SAndrey Churbanov #endif
33171d46d979SJonathan Peyton return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
33185c56fb55SAndrey Churbanov case lockseq_drdpa:
33195c56fb55SAndrey Churbanov case lockseq_nested_drdpa:
33205c56fb55SAndrey Churbanov return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
33215c56fb55SAndrey Churbanov default:
33225c56fb55SAndrey Churbanov return 0;
33235c56fb55SAndrey Churbanov }
33245c56fb55SAndrey Churbanov }
33255c56fb55SAndrey Churbanov
33265c56fb55SAndrey Churbanov // Initializes data for dynamic user locks.
__kmp_init_dynamic_user_locks()33273041982dSJonathan Peyton void __kmp_init_dynamic_user_locks() {
3328a03533d3SJonathan Peyton // Initialize jump table for the lock functions
3329a03533d3SJonathan Peyton if (__kmp_env_consistency_check) {
3330a03533d3SJonathan Peyton __kmp_direct_set = direct_set_check;
3331a03533d3SJonathan Peyton __kmp_direct_unset = direct_unset_check;
3332a03533d3SJonathan Peyton __kmp_direct_test = direct_test_check;
3333baad3f60SJonathan Peyton __kmp_direct_destroy = direct_destroy_check;
3334a03533d3SJonathan Peyton __kmp_indirect_set = indirect_set_check;
3335a03533d3SJonathan Peyton __kmp_indirect_unset = indirect_unset_check;
3336a03533d3SJonathan Peyton __kmp_indirect_test = indirect_test_check;
3337baad3f60SJonathan Peyton __kmp_indirect_destroy = indirect_destroy_check;
33383041982dSJonathan Peyton } else {
3339a03533d3SJonathan Peyton __kmp_direct_set = direct_set;
3340a03533d3SJonathan Peyton __kmp_direct_unset = direct_unset;
3341a03533d3SJonathan Peyton __kmp_direct_test = direct_test;
3342baad3f60SJonathan Peyton __kmp_direct_destroy = direct_destroy;
3343a03533d3SJonathan Peyton __kmp_indirect_set = indirect_set;
3344a03533d3SJonathan Peyton __kmp_indirect_unset = indirect_unset;
3345a03533d3SJonathan Peyton __kmp_indirect_test = indirect_test;
3346baad3f60SJonathan Peyton __kmp_indirect_destroy = indirect_destroy;
3347a03533d3SJonathan Peyton }
33483041982dSJonathan Peyton // If the user locks have already been initialized, then return. Allow the
33493041982dSJonathan Peyton // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
33503041982dSJonathan Peyton // new lock tables if they have already been allocated.
33517f976d55SJonathan Peyton if (__kmp_init_user_locks)
33527f976d55SJonathan Peyton return;
3353a03533d3SJonathan Peyton
3354a03533d3SJonathan Peyton // Initialize lock index table
33551e45cd75SPeyton, Jonathan L __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
33561e45cd75SPeyton, Jonathan L __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
33571e45cd75SPeyton, Jonathan L sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
33583041982dSJonathan Peyton *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
33593041982dSJonathan Peyton KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3360dae13d81SJonathan Peyton __kmp_i_lock_table.next = 0;
33611e45cd75SPeyton, Jonathan L __kmp_i_lock_table.next_table = nullptr;
3362dae13d81SJonathan Peyton
3363dae13d81SJonathan Peyton // Indirect lock size
3364dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3365dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3366dae13d81SJonathan Peyton #if KMP_USE_ADAPTIVE_LOCKS
3367dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3368dae13d81SJonathan Peyton #endif
3369dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3370dae13d81SJonathan Peyton #if KMP_USE_TSX
3371c3b5009aSHansang Bae __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3372dae13d81SJonathan Peyton #endif
3373dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3374dae13d81SJonathan Peyton #if KMP_USE_FUTEX
3375dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3376dae13d81SJonathan Peyton #endif
3377dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3378dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3379dae13d81SJonathan Peyton __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
33805c56fb55SAndrey Churbanov
33815c56fb55SAndrey Churbanov // Initialize lock accessor/modifier
33823041982dSJonathan Peyton #define fill_jumps(table, expand, sep) \
33833041982dSJonathan Peyton { \
3384dae13d81SJonathan Peyton table[locktag##sep##ticket] = expand(ticket); \
3385dae13d81SJonathan Peyton table[locktag##sep##queuing] = expand(queuing); \
3386dae13d81SJonathan Peyton table[locktag##sep##drdpa] = expand(drdpa); \
3387dae13d81SJonathan Peyton }
3388dae13d81SJonathan Peyton
3389dae13d81SJonathan Peyton #if KMP_USE_ADAPTIVE_LOCKS
33903041982dSJonathan Peyton #define fill_table(table, expand) \
33913041982dSJonathan Peyton { \
3392dae13d81SJonathan Peyton fill_jumps(table, expand, _); \
3393dae13d81SJonathan Peyton table[locktag_adaptive] = expand(queuing); \
3394dae13d81SJonathan Peyton fill_jumps(table, expand, _nested_); \
3395dae13d81SJonathan Peyton }
3396dae13d81SJonathan Peyton #else
33973041982dSJonathan Peyton #define fill_table(table, expand) \
33983041982dSJonathan Peyton { \
3399dae13d81SJonathan Peyton fill_jumps(table, expand, _); \
3400dae13d81SJonathan Peyton fill_jumps(table, expand, _nested_); \
3401dae13d81SJonathan Peyton }
3402dae13d81SJonathan Peyton #endif // KMP_USE_ADAPTIVE_LOCKS
3403dae13d81SJonathan Peyton
34043041982dSJonathan Peyton #define expand(l) \
34053041982dSJonathan Peyton (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3406dae13d81SJonathan Peyton fill_table(__kmp_indirect_set_location, expand);
3407dae13d81SJonathan Peyton #undef expand
34083041982dSJonathan Peyton #define expand(l) \
34093041982dSJonathan Peyton (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3410dae13d81SJonathan Peyton fill_table(__kmp_indirect_set_flags, expand);
3411dae13d81SJonathan Peyton #undef expand
34123041982dSJonathan Peyton #define expand(l) \
34133041982dSJonathan Peyton (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3414dae13d81SJonathan Peyton fill_table(__kmp_indirect_get_location, expand);
3415dae13d81SJonathan Peyton #undef expand
34163041982dSJonathan Peyton #define expand(l) \
34173041982dSJonathan Peyton (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3418dae13d81SJonathan Peyton fill_table(__kmp_indirect_get_flags, expand);
3419dae13d81SJonathan Peyton #undef expand
34205c56fb55SAndrey Churbanov
34215c56fb55SAndrey Churbanov __kmp_init_user_locks = TRUE;
34225c56fb55SAndrey Churbanov }
34235c56fb55SAndrey Churbanov
34245c56fb55SAndrey Churbanov // Clean up the lock table.
__kmp_cleanup_indirect_user_locks()34253041982dSJonathan Peyton void __kmp_cleanup_indirect_user_locks() {
34265c56fb55SAndrey Churbanov int k;
34275c56fb55SAndrey Churbanov
34283041982dSJonathan Peyton // Clean up locks in the pools first (they were already destroyed before going
34293041982dSJonathan Peyton // into the pools).
3430f2d119ffSJonathan Peyton for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
34315c56fb55SAndrey Churbanov kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
34325c56fb55SAndrey Churbanov while (l != NULL) {
34335c56fb55SAndrey Churbanov kmp_indirect_lock_t *ll = l;
34345c56fb55SAndrey Churbanov l = (kmp_indirect_lock_t *)l->lock->pool.next;
34353041982dSJonathan Peyton KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
34363041982dSJonathan Peyton ll));
34375c56fb55SAndrey Churbanov __kmp_free(ll->lock);
3438dae13d81SJonathan Peyton ll->lock = NULL;
34395c56fb55SAndrey Churbanov }
34403bd88d4cSJonathan Peyton __kmp_indirect_lock_pool[k] = NULL;
34415c56fb55SAndrey Churbanov }
34425c56fb55SAndrey Churbanov // Clean up the remaining undestroyed locks.
34431e45cd75SPeyton, Jonathan L kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
34441e45cd75SPeyton, Jonathan L while (ptr) {
34451e45cd75SPeyton, Jonathan L for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
34461e45cd75SPeyton, Jonathan L if (!ptr->table[row])
34471e45cd75SPeyton, Jonathan L continue;
34481e45cd75SPeyton, Jonathan L for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
34491e45cd75SPeyton, Jonathan L kmp_indirect_lock_t *l = &ptr->table[row][col];
34501e45cd75SPeyton, Jonathan L if (l->lock) {
34515c56fb55SAndrey Churbanov // Locks not destroyed explicitly need to be destroyed here.
3452f2d119ffSJonathan Peyton KMP_I_LOCK_FUNC(l, destroy)(l->lock);
34531e45cd75SPeyton, Jonathan L KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
34541e45cd75SPeyton, Jonathan L "from table\n",
34553041982dSJonathan Peyton l));
34565c56fb55SAndrey Churbanov __kmp_free(l->lock);
34575c56fb55SAndrey Churbanov }
34585c56fb55SAndrey Churbanov }
34591e45cd75SPeyton, Jonathan L __kmp_free(ptr->table[row]);
34601e45cd75SPeyton, Jonathan L }
34611e45cd75SPeyton, Jonathan L kmp_indirect_lock_table_t *next_table = ptr->next_table;
34621e45cd75SPeyton, Jonathan L if (ptr != &__kmp_i_lock_table)
34631e45cd75SPeyton, Jonathan L __kmp_free(ptr);
34641e45cd75SPeyton, Jonathan L ptr = next_table;
34651e45cd75SPeyton, Jonathan L }
34665c56fb55SAndrey Churbanov
34675c56fb55SAndrey Churbanov __kmp_init_user_locks = FALSE;
34685c56fb55SAndrey Churbanov }
34695c56fb55SAndrey Churbanov
34705c56fb55SAndrey Churbanov enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
34715c56fb55SAndrey Churbanov int __kmp_num_locks_in_block = 1; // FIXME - tune this value
34725c56fb55SAndrey Churbanov
34735c56fb55SAndrey Churbanov #else // KMP_USE_DYNAMIC_LOCK
34745c56fb55SAndrey Churbanov
__kmp_init_tas_lock_with_checks(kmp_tas_lock_t * lck)3475baad3f60SJonathan Peyton static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3476baad3f60SJonathan Peyton __kmp_init_tas_lock(lck);
3477baad3f60SJonathan Peyton }
3478baad3f60SJonathan Peyton
__kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t * lck)3479baad3f60SJonathan Peyton static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3480baad3f60SJonathan Peyton __kmp_init_nested_tas_lock(lck);
3481baad3f60SJonathan Peyton }
3482baad3f60SJonathan Peyton
3483baad3f60SJonathan Peyton #if KMP_USE_FUTEX
__kmp_init_futex_lock_with_checks(kmp_futex_lock_t * lck)3484baad3f60SJonathan Peyton static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485baad3f60SJonathan Peyton __kmp_init_futex_lock(lck);
3486baad3f60SJonathan Peyton }
3487baad3f60SJonathan Peyton
__kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t * lck)3488baad3f60SJonathan Peyton static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3489baad3f60SJonathan Peyton __kmp_init_nested_futex_lock(lck);
3490baad3f60SJonathan Peyton }
3491baad3f60SJonathan Peyton #endif
3492baad3f60SJonathan Peyton
__kmp_is_ticket_lock_initialized(kmp_ticket_lock_t * lck)3493baad3f60SJonathan Peyton static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3494781a0896SRoman Lebedev return lck == lck->lk.self;
3495baad3f60SJonathan Peyton }
3496baad3f60SJonathan Peyton
__kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t * lck)3497baad3f60SJonathan Peyton static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3498baad3f60SJonathan Peyton __kmp_init_ticket_lock(lck);
3499baad3f60SJonathan Peyton }
3500baad3f60SJonathan Peyton
__kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck)3501baad3f60SJonathan Peyton static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3502baad3f60SJonathan Peyton __kmp_init_nested_ticket_lock(lck);
3503baad3f60SJonathan Peyton }
3504baad3f60SJonathan Peyton
__kmp_is_queuing_lock_initialized(kmp_queuing_lock_t * lck)3505baad3f60SJonathan Peyton static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3506baad3f60SJonathan Peyton return lck == lck->lk.initialized;
3507baad3f60SJonathan Peyton }
3508baad3f60SJonathan Peyton
__kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t * lck)3509baad3f60SJonathan Peyton static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3510baad3f60SJonathan Peyton __kmp_init_queuing_lock(lck);
3511baad3f60SJonathan Peyton }
3512baad3f60SJonathan Peyton
3513baad3f60SJonathan Peyton static void
__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck)3514baad3f60SJonathan Peyton __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3515baad3f60SJonathan Peyton __kmp_init_nested_queuing_lock(lck);
3516baad3f60SJonathan Peyton }
3517baad3f60SJonathan Peyton
3518baad3f60SJonathan Peyton #if KMP_USE_ADAPTIVE_LOCKS
__kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck)3519baad3f60SJonathan Peyton static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3520baad3f60SJonathan Peyton __kmp_init_adaptive_lock(lck);
3521baad3f60SJonathan Peyton }
3522baad3f60SJonathan Peyton #endif
3523baad3f60SJonathan Peyton
__kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t * lck)3524baad3f60SJonathan Peyton static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3525baad3f60SJonathan Peyton return lck == lck->lk.initialized;
3526baad3f60SJonathan Peyton }
3527baad3f60SJonathan Peyton
__kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)3528baad3f60SJonathan Peyton static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3529baad3f60SJonathan Peyton __kmp_init_drdpa_lock(lck);
3530baad3f60SJonathan Peyton }
3531baad3f60SJonathan Peyton
__kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)3532baad3f60SJonathan Peyton static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3533baad3f60SJonathan Peyton __kmp_init_nested_drdpa_lock(lck);
3534baad3f60SJonathan Peyton }
3535baad3f60SJonathan Peyton
35365e8470afSJim Cownie /* user locks
35375e8470afSJim Cownie * They are implemented as a table of function pointers which are set to the
35383041982dSJonathan Peyton * lock functions of the appropriate kind, once that has been determined. */
35395e8470afSJim Cownie
35405e8470afSJim Cownie enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
35415e8470afSJim Cownie
35425e8470afSJim Cownie size_t __kmp_base_user_lock_size = 0;
35435e8470afSJim Cownie size_t __kmp_user_lock_size = 0;
35445e8470afSJim Cownie
35455e8470afSJim Cownie kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
35463041982dSJonathan Peyton int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
35473041982dSJonathan Peyton kmp_int32 gtid) = NULL;
35485e8470afSJim Cownie
35493041982dSJonathan Peyton int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
35503041982dSJonathan Peyton kmp_int32 gtid) = NULL;
35513041982dSJonathan Peyton int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
35523041982dSJonathan Peyton kmp_int32 gtid) = NULL;
35535e8470afSJim Cownie void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
35545e8470afSJim Cownie void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
35555e8470afSJim Cownie void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
35563041982dSJonathan Peyton int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
35573041982dSJonathan Peyton kmp_int32 gtid) = NULL;
35585e8470afSJim Cownie
35593041982dSJonathan Peyton int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
35603041982dSJonathan Peyton kmp_int32 gtid) = NULL;
35613041982dSJonathan Peyton int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
35623041982dSJonathan Peyton kmp_int32 gtid) = NULL;
35635e8470afSJim Cownie void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
35645e8470afSJim Cownie void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
35655e8470afSJim Cownie
35665e8470afSJim Cownie int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
35675e8470afSJim Cownie const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
35683041982dSJonathan Peyton void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
35693041982dSJonathan Peyton const ident_t *loc) = NULL;
35705e8470afSJim Cownie kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
35713041982dSJonathan Peyton void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
35723041982dSJonathan Peyton kmp_lock_flags_t flags) = NULL;
35735e8470afSJim Cownie
__kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind)35743041982dSJonathan Peyton void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
35755e8470afSJim Cownie switch (user_lock_kind) {
35765e8470afSJim Cownie case lk_default:
35775e8470afSJim Cownie default:
35785e8470afSJim Cownie KMP_ASSERT(0);
35795e8470afSJim Cownie
35805e8470afSJim Cownie case lk_tas: {
35815e8470afSJim Cownie __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
35825e8470afSJim Cownie __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
35835e8470afSJim Cownie
35845e8470afSJim Cownie __kmp_get_user_lock_owner_ =
35853041982dSJonathan Peyton (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
35865e8470afSJim Cownie
35874cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
35884cc4bb4cSJim Cownie KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
35894cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
35903041982dSJonathan Peyton } else {
35914cc4bb4cSJim Cownie KMP_BIND_USER_LOCK(tas);
35924cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK(tas);
35934cc4bb4cSJim Cownie }
35945e8470afSJim Cownie
35955e8470afSJim Cownie __kmp_destroy_user_lock_ =
35963041982dSJonathan Peyton (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
35975e8470afSJim Cownie
35983041982dSJonathan Peyton __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
35995e8470afSJim Cownie
36003041982dSJonathan Peyton __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
36015e8470afSJim Cownie
36025e8470afSJim Cownie __kmp_set_user_lock_location_ =
36035e8470afSJim Cownie (void (*)(kmp_user_lock_p, const ident_t *))NULL;
36045e8470afSJim Cownie
36053041982dSJonathan Peyton __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
36065e8470afSJim Cownie
36075e8470afSJim Cownie __kmp_set_user_lock_flags_ =
36085e8470afSJim Cownie (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
36093041982dSJonathan Peyton } break;
36105e8470afSJim Cownie
3611fb043fdfSPaul Osmialowski #if KMP_USE_FUTEX
36125e8470afSJim Cownie
36135e8470afSJim Cownie case lk_futex: {
36145e8470afSJim Cownie __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
36155e8470afSJim Cownie __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
36165e8470afSJim Cownie
36175e8470afSJim Cownie __kmp_get_user_lock_owner_ =
36183041982dSJonathan Peyton (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
36195e8470afSJim Cownie
36204cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
36214cc4bb4cSJim Cownie KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
36224cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
36233041982dSJonathan Peyton } else {
36244cc4bb4cSJim Cownie KMP_BIND_USER_LOCK(futex);
36254cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK(futex);
36264cc4bb4cSJim Cownie }
36275e8470afSJim Cownie
36285e8470afSJim Cownie __kmp_destroy_user_lock_ =
36293041982dSJonathan Peyton (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
36305e8470afSJim Cownie
36313041982dSJonathan Peyton __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
36325e8470afSJim Cownie
36333041982dSJonathan Peyton __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
36345e8470afSJim Cownie
36355e8470afSJim Cownie __kmp_set_user_lock_location_ =
36365e8470afSJim Cownie (void (*)(kmp_user_lock_p, const ident_t *))NULL;
36375e8470afSJim Cownie
36383041982dSJonathan Peyton __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
36395e8470afSJim Cownie
36405e8470afSJim Cownie __kmp_set_user_lock_flags_ =
36415e8470afSJim Cownie (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
36423041982dSJonathan Peyton } break;
36435e8470afSJim Cownie
3644fb043fdfSPaul Osmialowski #endif // KMP_USE_FUTEX
36455e8470afSJim Cownie
36465e8470afSJim Cownie case lk_ticket: {
36475e8470afSJim Cownie __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
36485e8470afSJim Cownie __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
36495e8470afSJim Cownie
36505e8470afSJim Cownie __kmp_get_user_lock_owner_ =
36513041982dSJonathan Peyton (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
36525e8470afSJim Cownie
36534cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
36544cc4bb4cSJim Cownie KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
36554cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
36563041982dSJonathan Peyton } else {
36574cc4bb4cSJim Cownie KMP_BIND_USER_LOCK(ticket);
36584cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK(ticket);
36594cc4bb4cSJim Cownie }
36605e8470afSJim Cownie
36615e8470afSJim Cownie __kmp_destroy_user_lock_ =
36623041982dSJonathan Peyton (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
36635e8470afSJim Cownie
36645e8470afSJim Cownie __kmp_is_user_lock_initialized_ =
36653041982dSJonathan Peyton (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
36665e8470afSJim Cownie
36675e8470afSJim Cownie __kmp_get_user_lock_location_ =
36683041982dSJonathan Peyton (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
36695e8470afSJim Cownie
36703041982dSJonathan Peyton __kmp_set_user_lock_location_ = (void (*)(
36713041982dSJonathan Peyton kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
36725e8470afSJim Cownie
36735e8470afSJim Cownie __kmp_get_user_lock_flags_ =
36743041982dSJonathan Peyton (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
36755e8470afSJim Cownie
36763041982dSJonathan Peyton __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
36773041982dSJonathan Peyton &__kmp_set_ticket_lock_flags);
36783041982dSJonathan Peyton } break;
36795e8470afSJim Cownie
36805e8470afSJim Cownie case lk_queuing: {
36815e8470afSJim Cownie __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
36825e8470afSJim Cownie __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
36835e8470afSJim Cownie
36845e8470afSJim Cownie __kmp_get_user_lock_owner_ =
36853041982dSJonathan Peyton (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
36865e8470afSJim Cownie
36874cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
36884cc4bb4cSJim Cownie KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
36894cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
36903041982dSJonathan Peyton } else {
36914cc4bb4cSJim Cownie KMP_BIND_USER_LOCK(queuing);
36924cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK(queuing);
36934cc4bb4cSJim Cownie }
36945e8470afSJim Cownie
36955e8470afSJim Cownie __kmp_destroy_user_lock_ =
36963041982dSJonathan Peyton (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
36975e8470afSJim Cownie
36985e8470afSJim Cownie __kmp_is_user_lock_initialized_ =
36993041982dSJonathan Peyton (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
37005e8470afSJim Cownie
37015e8470afSJim Cownie __kmp_get_user_lock_location_ =
37023041982dSJonathan Peyton (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
37035e8470afSJim Cownie
37043041982dSJonathan Peyton __kmp_set_user_lock_location_ = (void (*)(
37053041982dSJonathan Peyton kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
37065e8470afSJim Cownie
37075e8470afSJim Cownie __kmp_get_user_lock_flags_ =
37083041982dSJonathan Peyton (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
37095e8470afSJim Cownie
37103041982dSJonathan Peyton __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
37113041982dSJonathan Peyton &__kmp_set_queuing_lock_flags);
37123041982dSJonathan Peyton } break;
37135e8470afSJim Cownie
37145e8470afSJim Cownie #if KMP_USE_ADAPTIVE_LOCKS
37155e8470afSJim Cownie case lk_adaptive: {
37164cc4bb4cSJim Cownie __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
37174cc4bb4cSJim Cownie __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
37185e8470afSJim Cownie
37195e8470afSJim Cownie __kmp_get_user_lock_owner_ =
37203041982dSJonathan Peyton (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
37215e8470afSJim Cownie
37224cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
37234cc4bb4cSJim Cownie KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
37243041982dSJonathan Peyton } else {
37254cc4bb4cSJim Cownie KMP_BIND_USER_LOCK(adaptive);
37264cc4bb4cSJim Cownie }
37275e8470afSJim Cownie
37285e8470afSJim Cownie __kmp_destroy_user_lock_ =
37293041982dSJonathan Peyton (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
37305e8470afSJim Cownie
37315e8470afSJim Cownie __kmp_is_user_lock_initialized_ =
37323041982dSJonathan Peyton (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
37335e8470afSJim Cownie
37345e8470afSJim Cownie __kmp_get_user_lock_location_ =
37353041982dSJonathan Peyton (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
37365e8470afSJim Cownie
37373041982dSJonathan Peyton __kmp_set_user_lock_location_ = (void (*)(
37383041982dSJonathan Peyton kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
37395e8470afSJim Cownie
37405e8470afSJim Cownie __kmp_get_user_lock_flags_ =
37413041982dSJonathan Peyton (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
37425e8470afSJim Cownie
37433041982dSJonathan Peyton __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
37443041982dSJonathan Peyton &__kmp_set_queuing_lock_flags);
37455e8470afSJim Cownie
37463041982dSJonathan Peyton } break;
37475e8470afSJim Cownie #endif // KMP_USE_ADAPTIVE_LOCKS
37485e8470afSJim Cownie
37495e8470afSJim Cownie case lk_drdpa: {
37505e8470afSJim Cownie __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
37515e8470afSJim Cownie __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
37525e8470afSJim Cownie
37535e8470afSJim Cownie __kmp_get_user_lock_owner_ =
37543041982dSJonathan Peyton (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
37555e8470afSJim Cownie
37564cc4bb4cSJim Cownie if (__kmp_env_consistency_check) {
37574cc4bb4cSJim Cownie KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
37584cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
37593041982dSJonathan Peyton } else {
37604cc4bb4cSJim Cownie KMP_BIND_USER_LOCK(drdpa);
37614cc4bb4cSJim Cownie KMP_BIND_NESTED_USER_LOCK(drdpa);
37624cc4bb4cSJim Cownie }
37635e8470afSJim Cownie
37645e8470afSJim Cownie __kmp_destroy_user_lock_ =
37653041982dSJonathan Peyton (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
37665e8470afSJim Cownie
37675e8470afSJim Cownie __kmp_is_user_lock_initialized_ =
37683041982dSJonathan Peyton (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
37695e8470afSJim Cownie
37705e8470afSJim Cownie __kmp_get_user_lock_location_ =
37713041982dSJonathan Peyton (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
37725e8470afSJim Cownie
37733041982dSJonathan Peyton __kmp_set_user_lock_location_ = (void (*)(
37743041982dSJonathan Peyton kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
37755e8470afSJim Cownie
37765e8470afSJim Cownie __kmp_get_user_lock_flags_ =
37773041982dSJonathan Peyton (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
37785e8470afSJim Cownie
37793041982dSJonathan Peyton __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
37803041982dSJonathan Peyton &__kmp_set_drdpa_lock_flags);
37813041982dSJonathan Peyton } break;
37825e8470afSJim Cownie }
37835e8470afSJim Cownie }
37845e8470afSJim Cownie
37855e8470afSJim Cownie // ----------------------------------------------------------------------------
37865e8470afSJim Cownie // User lock table & lock allocation
37875e8470afSJim Cownie
37885e8470afSJim Cownie kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
37895e8470afSJim Cownie kmp_user_lock_p __kmp_lock_pool = NULL;
37905e8470afSJim Cownie
37915e8470afSJim Cownie // Lock block-allocation support.
37925e8470afSJim Cownie kmp_block_of_locks *__kmp_lock_blocks = NULL;
37935e8470afSJim Cownie int __kmp_num_locks_in_block = 1; // FIXME - tune this value
37945e8470afSJim Cownie
__kmp_lock_table_insert(kmp_user_lock_p lck)37953041982dSJonathan Peyton static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
37965e8470afSJim Cownie // Assume that kmp_global_lock is held upon entry/exit.
37975e8470afSJim Cownie kmp_lock_index_t index;
37985e8470afSJim Cownie if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
37995e8470afSJim Cownie kmp_lock_index_t size;
38005e8470afSJim Cownie kmp_user_lock_p *table;
38015e8470afSJim Cownie // Reallocate lock table.
38025e8470afSJim Cownie if (__kmp_user_lock_table.allocated == 0) {
38035e8470afSJim Cownie size = 1024;
38043041982dSJonathan Peyton } else {
38055e8470afSJim Cownie size = __kmp_user_lock_table.allocated * 2;
38065e8470afSJim Cownie }
38075e8470afSJim Cownie table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
38083041982dSJonathan Peyton KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
38093041982dSJonathan Peyton sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
38105e8470afSJim Cownie table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
38111406f014SJonathan Peyton // We cannot free the previous table now, since it may be in use by other
38123041982dSJonathan Peyton // threads. So save the pointer to the previous table in in the first
38133041982dSJonathan Peyton // element of the new table. All the tables will be organized into a list,
38143041982dSJonathan Peyton // and could be freed when library shutting down.
38155e8470afSJim Cownie __kmp_user_lock_table.table = table;
38165e8470afSJim Cownie __kmp_user_lock_table.allocated = size;
38175e8470afSJim Cownie }
38183041982dSJonathan Peyton KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
38193041982dSJonathan Peyton __kmp_user_lock_table.allocated);
38205e8470afSJim Cownie index = __kmp_user_lock_table.used;
38215e8470afSJim Cownie __kmp_user_lock_table.table[index] = lck;
38225e8470afSJim Cownie ++__kmp_user_lock_table.used;
38235e8470afSJim Cownie return index;
38245e8470afSJim Cownie }
38255e8470afSJim Cownie
__kmp_lock_block_allocate()38263041982dSJonathan Peyton static kmp_user_lock_p __kmp_lock_block_allocate() {
38275e8470afSJim Cownie // Assume that kmp_global_lock is held upon entry/exit.
38285e8470afSJim Cownie static int last_index = 0;
38293041982dSJonathan Peyton if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
38305e8470afSJim Cownie // Restart the index.
38315e8470afSJim Cownie last_index = 0;
38325e8470afSJim Cownie // Need to allocate a new block.
38335e8470afSJim Cownie KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
38345e8470afSJim Cownie size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
38353041982dSJonathan Peyton char *buffer =
38363041982dSJonathan Peyton (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
38375e8470afSJim Cownie // Set up the new block.
38383041982dSJonathan Peyton kmp_block_of_locks *new_block =
38393041982dSJonathan Peyton (kmp_block_of_locks *)(&buffer[space_for_locks]);
38405e8470afSJim Cownie new_block->next_block = __kmp_lock_blocks;
38415e8470afSJim Cownie new_block->locks = (void *)buffer;
38425e8470afSJim Cownie // Publish the new block.
38435e8470afSJim Cownie KMP_MB();
38445e8470afSJim Cownie __kmp_lock_blocks = new_block;
38455e8470afSJim Cownie }
38463041982dSJonathan Peyton kmp_user_lock_p ret = (kmp_user_lock_p)(&(
38473041982dSJonathan Peyton ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
38485e8470afSJim Cownie last_index++;
38495e8470afSJim Cownie return ret;
38505e8470afSJim Cownie }
38515e8470afSJim Cownie
38525e8470afSJim Cownie // Get memory for a lock. It may be freshly allocated memory or reused memory
38535e8470afSJim Cownie // from lock pool.
__kmp_user_lock_allocate(void ** user_lock,kmp_int32 gtid,kmp_lock_flags_t flags)38543041982dSJonathan Peyton kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
38553041982dSJonathan Peyton kmp_lock_flags_t flags) {
38565e8470afSJim Cownie kmp_user_lock_p lck;
38575e8470afSJim Cownie kmp_lock_index_t index;
38585e8470afSJim Cownie KMP_DEBUG_ASSERT(user_lock);
38595e8470afSJim Cownie
38605e8470afSJim Cownie __kmp_acquire_lock(&__kmp_global_lock, gtid);
38615e8470afSJim Cownie
38625e8470afSJim Cownie if (__kmp_lock_pool == NULL) {
38635e8470afSJim Cownie // Lock pool is empty. Allocate new memory.
386450fed047SJonas Hahnfeld
38655e8470afSJim Cownie if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
38665e8470afSJim Cownie lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
38673041982dSJonathan Peyton } else {
38685e8470afSJim Cownie lck = __kmp_lock_block_allocate();
38695e8470afSJim Cownie }
38705e8470afSJim Cownie
38715e8470afSJim Cownie // Insert lock in the table so that it can be freed in __kmp_cleanup,
38725e8470afSJim Cownie // and debugger has info on all allocated locks.
38735e8470afSJim Cownie index = __kmp_lock_table_insert(lck);
38743041982dSJonathan Peyton } else {
38755e8470afSJim Cownie // Pick up lock from pool.
38765e8470afSJim Cownie lck = __kmp_lock_pool;
38775e8470afSJim Cownie index = __kmp_lock_pool->pool.index;
38785e8470afSJim Cownie __kmp_lock_pool = __kmp_lock_pool->pool.next;
38795e8470afSJim Cownie }
38805e8470afSJim Cownie
38815e8470afSJim Cownie // We could potentially differentiate between nested and regular locks
38825e8470afSJim Cownie // here, and do the lock table lookup for regular locks only.
38835e8470afSJim Cownie if (OMP_LOCK_T_SIZE < sizeof(void *)) {
38845e8470afSJim Cownie *((kmp_lock_index_t *)user_lock) = index;
38853041982dSJonathan Peyton } else {
38865e8470afSJim Cownie *((kmp_user_lock_p *)user_lock) = lck;
38875e8470afSJim Cownie }
38885e8470afSJim Cownie
38895e8470afSJim Cownie // mark the lock if it is critical section lock.
38905e8470afSJim Cownie __kmp_set_user_lock_flags(lck, flags);
38915e8470afSJim Cownie
38923041982dSJonathan Peyton __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
38935e8470afSJim Cownie
38945e8470afSJim Cownie return lck;
38955e8470afSJim Cownie }
38965e8470afSJim Cownie
38975e8470afSJim Cownie // Put lock's memory to pool for reusing.
__kmp_user_lock_free(void ** user_lock,kmp_int32 gtid,kmp_user_lock_p lck)38983041982dSJonathan Peyton void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
38993041982dSJonathan Peyton kmp_user_lock_p lck) {
39005e8470afSJim Cownie KMP_DEBUG_ASSERT(user_lock != NULL);
39015e8470afSJim Cownie KMP_DEBUG_ASSERT(lck != NULL);
39025e8470afSJim Cownie
39035e8470afSJim Cownie __kmp_acquire_lock(&__kmp_global_lock, gtid);
39045e8470afSJim Cownie
39055e8470afSJim Cownie lck->pool.next = __kmp_lock_pool;
39065e8470afSJim Cownie __kmp_lock_pool = lck;
39075e8470afSJim Cownie if (OMP_LOCK_T_SIZE < sizeof(void *)) {
39085e8470afSJim Cownie kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
39095e8470afSJim Cownie KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
39105e8470afSJim Cownie lck->pool.index = index;
39115e8470afSJim Cownie }
39125e8470afSJim Cownie
39135e8470afSJim Cownie __kmp_release_lock(&__kmp_global_lock, gtid);
39145e8470afSJim Cownie }
39155e8470afSJim Cownie
__kmp_lookup_user_lock(void ** user_lock,char const * func)39163041982dSJonathan Peyton kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
39175e8470afSJim Cownie kmp_user_lock_p lck = NULL;
39185e8470afSJim Cownie
39195e8470afSJim Cownie if (__kmp_env_consistency_check) {
39205e8470afSJim Cownie if (user_lock == NULL) {
39215e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
39225e8470afSJim Cownie }
39235e8470afSJim Cownie }
39245e8470afSJim Cownie
39255e8470afSJim Cownie if (OMP_LOCK_T_SIZE < sizeof(void *)) {
39265e8470afSJim Cownie kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
39275e8470afSJim Cownie if (__kmp_env_consistency_check) {
39285e8470afSJim Cownie if (!(0 < index && index < __kmp_user_lock_table.used)) {
39295e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
39305e8470afSJim Cownie }
39315e8470afSJim Cownie }
39325e8470afSJim Cownie KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
39335e8470afSJim Cownie KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
39345e8470afSJim Cownie lck = __kmp_user_lock_table.table[index];
39353041982dSJonathan Peyton } else {
39365e8470afSJim Cownie lck = *((kmp_user_lock_p *)user_lock);
39375e8470afSJim Cownie }
39385e8470afSJim Cownie
39395e8470afSJim Cownie if (__kmp_env_consistency_check) {
39405e8470afSJim Cownie if (lck == NULL) {
39415e8470afSJim Cownie KMP_FATAL(LockIsUninitialized, func);
39425e8470afSJim Cownie }
39435e8470afSJim Cownie }
39445e8470afSJim Cownie
39455e8470afSJim Cownie return lck;
39465e8470afSJim Cownie }
39475e8470afSJim Cownie
__kmp_cleanup_user_locks(void)39483041982dSJonathan Peyton void __kmp_cleanup_user_locks(void) {
39493041982dSJonathan Peyton // Reset lock pool. Don't worry about lock in the pool--we will free them when
39503041982dSJonathan Peyton // iterating through lock table (it includes all the locks, dead or alive).
39515e8470afSJim Cownie __kmp_lock_pool = NULL;
39525e8470afSJim Cownie
39535e8470afSJim Cownie #define IS_CRITICAL(lck) \
39545e8470afSJim Cownie ((__kmp_get_user_lock_flags_ != NULL) && \
39555e8470afSJim Cownie ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
39565e8470afSJim Cownie
39575e8470afSJim Cownie // Loop through lock table, free all locks.
39585e8470afSJim Cownie // Do not free item [0], it is reserved for lock tables list.
39595e8470afSJim Cownie //
39603041982dSJonathan Peyton // FIXME - we are iterating through a list of (pointers to) objects of type
39613041982dSJonathan Peyton // union kmp_user_lock, but we have no way of knowing whether the base type is
39623041982dSJonathan Peyton // currently "pool" or whatever the global user lock type is.
39635e8470afSJim Cownie //
39645e8470afSJim Cownie // We are relying on the fact that for all of the user lock types
39655e8470afSJim Cownie // (except "tas"), the first field in the lock struct is the "initialized"
39665e8470afSJim Cownie // field, which is set to the address of the lock object itself when
39675e8470afSJim Cownie // the lock is initialized. When the union is of type "pool", the
39685e8470afSJim Cownie // first field is a pointer to the next object in the free list, which
39695e8470afSJim Cownie // will not be the same address as the object itself.
39705e8470afSJim Cownie //
39713041982dSJonathan Peyton // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
39723041982dSJonathan Peyton // for "pool" objects on the free list. This must happen as the "location"
39733041982dSJonathan Peyton // field of real user locks overlaps the "index" field of "pool" objects.
39745e8470afSJim Cownie //
39755e8470afSJim Cownie // It would be better to run through the free list, and remove all "pool"
39765e8470afSJim Cownie // objects from the lock table before executing this loop. However,
39775e8470afSJim Cownie // "pool" objects do not always have their index field set (only on
39785e8470afSJim Cownie // lin_32e), and I don't want to search the lock table for the address
39795e8470afSJim Cownie // of every "pool" object on the free list.
39805e8470afSJim Cownie while (__kmp_user_lock_table.used > 1) {
39815e8470afSJim Cownie const ident *loc;
39825e8470afSJim Cownie
39835e8470afSJim Cownie // reduce __kmp_user_lock_table.used before freeing the lock,
39845e8470afSJim Cownie // so that state of locks is consistent
39853041982dSJonathan Peyton kmp_user_lock_p lck =
39863041982dSJonathan Peyton __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
39875e8470afSJim Cownie
39885e8470afSJim Cownie if ((__kmp_is_user_lock_initialized_ != NULL) &&
39895e8470afSJim Cownie (*__kmp_is_user_lock_initialized_)(lck)) {
39903041982dSJonathan Peyton // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
39913041982dSJonathan Peyton // it is NOT a critical section (user is not responsible for destroying
39923041982dSJonathan Peyton // criticals) AND we know source location to report.
39935e8470afSJim Cownie if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
39945e8470afSJim Cownie ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
39955e8470afSJim Cownie (loc->psource != NULL)) {
399622558c85SAndreyChurbanov kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3997e8595de6SAndrey Churbanov KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
39985e8470afSJim Cownie __kmp_str_loc_free(&str_loc);
39995e8470afSJim Cownie }
40005e8470afSJim Cownie
40015e8470afSJim Cownie #ifdef KMP_DEBUG
40025e8470afSJim Cownie if (IS_CRITICAL(lck)) {
40033041982dSJonathan Peyton KA_TRACE(
40043041982dSJonathan Peyton 20,
40053041982dSJonathan Peyton ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
40063041982dSJonathan Peyton lck, *(void **)lck));
40073041982dSJonathan Peyton } else {
40083041982dSJonathan Peyton KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
40093041982dSJonathan Peyton *(void **)lck));
40105e8470afSJim Cownie }
40115e8470afSJim Cownie #endif // KMP_DEBUG
40125e8470afSJim Cownie
40133041982dSJonathan Peyton // Cleanup internal lock dynamic resources (for drdpa locks particularly).
40145e8470afSJim Cownie __kmp_destroy_user_lock(lck);
40155e8470afSJim Cownie }
40165e8470afSJim Cownie
40175e8470afSJim Cownie // Free the lock if block allocation of locks is not used.
40185e8470afSJim Cownie if (__kmp_lock_blocks == NULL) {
40195e8470afSJim Cownie __kmp_free(lck);
40205e8470afSJim Cownie }
40215e8470afSJim Cownie }
40225e8470afSJim Cownie
40235e8470afSJim Cownie #undef IS_CRITICAL
40245e8470afSJim Cownie
40255e8470afSJim Cownie // delete lock table(s).
40265e8470afSJim Cownie kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
40275e8470afSJim Cownie __kmp_user_lock_table.table = NULL;
40285e8470afSJim Cownie __kmp_user_lock_table.allocated = 0;
40295e8470afSJim Cownie
40305e8470afSJim Cownie while (table_ptr != NULL) {
40315e8470afSJim Cownie // In the first element we saved the pointer to the previous
40325e8470afSJim Cownie // (smaller) lock table.
40335e8470afSJim Cownie kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
40345e8470afSJim Cownie __kmp_free(table_ptr);
40355e8470afSJim Cownie table_ptr = next;
40365e8470afSJim Cownie }
40375e8470afSJim Cownie
40385e8470afSJim Cownie // Free buffers allocated for blocks of locks.
40395e8470afSJim Cownie kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
40405e8470afSJim Cownie __kmp_lock_blocks = NULL;
40415e8470afSJim Cownie
40425e8470afSJim Cownie while (block_ptr != NULL) {
40435e8470afSJim Cownie kmp_block_of_locks_t *next = block_ptr->next_block;
40445e8470afSJim Cownie __kmp_free(block_ptr->locks);
40455e8470afSJim Cownie // *block_ptr itself was allocated at the end of the locks vector.
40465e8470afSJim Cownie block_ptr = next;
40475e8470afSJim Cownie }
40485e8470afSJim Cownie
40495e8470afSJim Cownie TCW_4(__kmp_init_user_locks, FALSE);
40505e8470afSJim Cownie }
40515e8470afSJim Cownie
40525c56fb55SAndrey Churbanov #endif // KMP_USE_DYNAMIC_LOCK
4053