1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #include "tsan_annotations.h"
25 
26 #if KMP_USE_FUTEX
27 #include <sys/syscall.h>
28 #include <unistd.h>
29 // We should really include <futex.h>, but that causes compatibility problems on
30 // different Linux* OS distributions that either require that you include (or
31 // break when you try to include) <pci/types.h>. Since all we need is the two
32 // macros below (which are part of the kernel ABI, so can't change) we just
33 // define the constants here and don't include <futex.h>
34 #ifndef FUTEX_WAIT
35 #define FUTEX_WAIT 0
36 #endif
37 #ifndef FUTEX_WAKE
38 #define FUTEX_WAKE 1
39 #endif
40 #endif
41 
42 /* Implement spin locks for internal library use.             */
43 /* The algorithm implemented is Lamport's bakery lock [1974]. */
44 
45 void __kmp_validate_locks(void) {
46   int i;
47   kmp_uint32 x, y;
48 
49   /* Check to make sure unsigned arithmetic does wraps properly */
50   x = ~((kmp_uint32)0) - 2;
51   y = x - 2;
52 
53   for (i = 0; i < 8; ++i, ++x, ++y) {
54     kmp_uint32 z = (x - y);
55     KMP_ASSERT(z == 2);
56   }
57 
58   KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
59 }
60 
61 /* ------------------------------------------------------------------------ */
62 /* test and set locks */
63 
64 // For the non-nested locks, we can only assume that the first 4 bytes were
65 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
66 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
67 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
68 //
69 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
70 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
71 
72 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
73   return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
74 }
75 
76 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
77   return lck->lk.depth_locked != -1;
78 }
79 
80 __forceinline static int
81 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
82   KMP_MB();
83 
84 #ifdef USE_LOCK_PROFILE
85   kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
86   if ((curr != 0) && (curr != gtid + 1))
87     __kmp_printf("LOCK CONTENTION: %p\n", lck);
88 /* else __kmp_printf( "." );*/
89 #endif /* USE_LOCK_PROFILE */
90 
91   kmp_int32 tas_free = KMP_LOCK_FREE(tas);
92   kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
93 
94   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
95       __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
96     KMP_FSYNC_ACQUIRED(lck);
97     return KMP_LOCK_ACQUIRED_FIRST;
98   }
99 
100   kmp_uint32 spins;
101   KMP_FSYNC_PREPARE(lck);
102   KMP_INIT_YIELD(spins);
103   if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
104     KMP_YIELD(TRUE);
105   } else {
106     KMP_YIELD_SPIN(spins);
107   }
108 
109   kmp_backoff_t backoff = __kmp_spin_backoff_params;
110   while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
111          !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
112     __kmp_spin_backoff(&backoff);
113     if (TCR_4(__kmp_nth) >
114         (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
115       KMP_YIELD(TRUE);
116     } else {
117       KMP_YIELD_SPIN(spins);
118     }
119   }
120   KMP_FSYNC_ACQUIRED(lck);
121   return KMP_LOCK_ACQUIRED_FIRST;
122 }
123 
124 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
125   int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
126   ANNOTATE_TAS_ACQUIRED(lck);
127   return retval;
128 }
129 
130 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
131                                               kmp_int32 gtid) {
132   char const *const func = "omp_set_lock";
133   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
134       __kmp_is_tas_lock_nestable(lck)) {
135     KMP_FATAL(LockNestableUsedAsSimple, func);
136   }
137   if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
138     KMP_FATAL(LockIsAlreadyOwned, func);
139   }
140   return __kmp_acquire_tas_lock(lck, gtid);
141 }
142 
143 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
144   kmp_int32 tas_free = KMP_LOCK_FREE(tas);
145   kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
146   if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
147       __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
148     KMP_FSYNC_ACQUIRED(lck);
149     return TRUE;
150   }
151   return FALSE;
152 }
153 
154 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
155                                            kmp_int32 gtid) {
156   char const *const func = "omp_test_lock";
157   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
158       __kmp_is_tas_lock_nestable(lck)) {
159     KMP_FATAL(LockNestableUsedAsSimple, func);
160   }
161   return __kmp_test_tas_lock(lck, gtid);
162 }
163 
164 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
165   KMP_MB(); /* Flush all pending memory write invalidates.  */
166 
167   KMP_FSYNC_RELEASING(lck);
168   ANNOTATE_TAS_RELEASED(lck);
169   KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
170   KMP_MB(); /* Flush all pending memory write invalidates.  */
171 
172   KMP_YIELD(TCR_4(__kmp_nth) >
173             (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
174   return KMP_LOCK_RELEASED;
175 }
176 
177 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
178                                               kmp_int32 gtid) {
179   char const *const func = "omp_unset_lock";
180   KMP_MB(); /* in case another processor initialized lock */
181   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
182       __kmp_is_tas_lock_nestable(lck)) {
183     KMP_FATAL(LockNestableUsedAsSimple, func);
184   }
185   if (__kmp_get_tas_lock_owner(lck) == -1) {
186     KMP_FATAL(LockUnsettingFree, func);
187   }
188   if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
189       (__kmp_get_tas_lock_owner(lck) != gtid)) {
190     KMP_FATAL(LockUnsettingSetByAnother, func);
191   }
192   return __kmp_release_tas_lock(lck, gtid);
193 }
194 
195 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
196   lck->lk.poll = KMP_LOCK_FREE(tas);
197 }
198 
199 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
200 
201 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
202   char const *const func = "omp_destroy_lock";
203   if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
204       __kmp_is_tas_lock_nestable(lck)) {
205     KMP_FATAL(LockNestableUsedAsSimple, func);
206   }
207   if (__kmp_get_tas_lock_owner(lck) != -1) {
208     KMP_FATAL(LockStillOwned, func);
209   }
210   __kmp_destroy_tas_lock(lck);
211 }
212 
213 // nested test and set locks
214 
215 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
216   KMP_DEBUG_ASSERT(gtid >= 0);
217 
218   if (__kmp_get_tas_lock_owner(lck) == gtid) {
219     lck->lk.depth_locked += 1;
220     return KMP_LOCK_ACQUIRED_NEXT;
221   } else {
222     __kmp_acquire_tas_lock_timed_template(lck, gtid);
223     ANNOTATE_TAS_ACQUIRED(lck);
224     lck->lk.depth_locked = 1;
225     return KMP_LOCK_ACQUIRED_FIRST;
226   }
227 }
228 
229 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
230                                                      kmp_int32 gtid) {
231   char const *const func = "omp_set_nest_lock";
232   if (!__kmp_is_tas_lock_nestable(lck)) {
233     KMP_FATAL(LockSimpleUsedAsNestable, func);
234   }
235   return __kmp_acquire_nested_tas_lock(lck, gtid);
236 }
237 
238 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
239   int retval;
240 
241   KMP_DEBUG_ASSERT(gtid >= 0);
242 
243   if (__kmp_get_tas_lock_owner(lck) == gtid) {
244     retval = ++lck->lk.depth_locked;
245   } else if (!__kmp_test_tas_lock(lck, gtid)) {
246     retval = 0;
247   } else {
248     KMP_MB();
249     retval = lck->lk.depth_locked = 1;
250   }
251   return retval;
252 }
253 
254 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
255                                                   kmp_int32 gtid) {
256   char const *const func = "omp_test_nest_lock";
257   if (!__kmp_is_tas_lock_nestable(lck)) {
258     KMP_FATAL(LockSimpleUsedAsNestable, func);
259   }
260   return __kmp_test_nested_tas_lock(lck, gtid);
261 }
262 
263 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
264   KMP_DEBUG_ASSERT(gtid >= 0);
265 
266   KMP_MB();
267   if (--(lck->lk.depth_locked) == 0) {
268     __kmp_release_tas_lock(lck, gtid);
269     return KMP_LOCK_RELEASED;
270   }
271   return KMP_LOCK_STILL_HELD;
272 }
273 
274 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
275                                                      kmp_int32 gtid) {
276   char const *const func = "omp_unset_nest_lock";
277   KMP_MB(); /* in case another processor initialized lock */
278   if (!__kmp_is_tas_lock_nestable(lck)) {
279     KMP_FATAL(LockSimpleUsedAsNestable, func);
280   }
281   if (__kmp_get_tas_lock_owner(lck) == -1) {
282     KMP_FATAL(LockUnsettingFree, func);
283   }
284   if (__kmp_get_tas_lock_owner(lck) != gtid) {
285     KMP_FATAL(LockUnsettingSetByAnother, func);
286   }
287   return __kmp_release_nested_tas_lock(lck, gtid);
288 }
289 
290 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
291   __kmp_init_tas_lock(lck);
292   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
293 }
294 
295 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
296   __kmp_destroy_tas_lock(lck);
297   lck->lk.depth_locked = 0;
298 }
299 
300 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
301   char const *const func = "omp_destroy_nest_lock";
302   if (!__kmp_is_tas_lock_nestable(lck)) {
303     KMP_FATAL(LockSimpleUsedAsNestable, func);
304   }
305   if (__kmp_get_tas_lock_owner(lck) != -1) {
306     KMP_FATAL(LockStillOwned, func);
307   }
308   __kmp_destroy_nested_tas_lock(lck);
309 }
310 
311 #if KMP_USE_FUTEX
312 
313 /* ------------------------------------------------------------------------ */
314 /* futex locks */
315 
316 // futex locks are really just test and set locks, with a different method
317 // of handling contention.  They take the same amount of space as test and
318 // set locks, and are allocated the same way (i.e. use the area allocated by
319 // the compiler for non-nested locks / allocate nested locks on the heap).
320 
321 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
322   return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
323 }
324 
325 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
326   return lck->lk.depth_locked != -1;
327 }
328 
329 __forceinline static int
330 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
331   kmp_int32 gtid_code = (gtid + 1) << 1;
332 
333   KMP_MB();
334 
335 #ifdef USE_LOCK_PROFILE
336   kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
337   if ((curr != 0) && (curr != gtid_code))
338     __kmp_printf("LOCK CONTENTION: %p\n", lck);
339 /* else __kmp_printf( "." );*/
340 #endif /* USE_LOCK_PROFILE */
341 
342   KMP_FSYNC_PREPARE(lck);
343   KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
344                   lck, lck->lk.poll, gtid));
345 
346   kmp_int32 poll_val;
347 
348   while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
349               &(lck->lk.poll), KMP_LOCK_FREE(futex),
350               KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
351 
352     kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
353     KA_TRACE(
354         1000,
355         ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
356          lck, gtid, poll_val, cond));
357 
358     // NOTE: if you try to use the following condition for this branch
359     //
360     // if ( poll_val & 1 == 0 )
361     //
362     // Then the 12.0 compiler has a bug where the following block will
363     // always be skipped, regardless of the value of the LSB of poll_val.
364     if (!cond) {
365       // Try to set the lsb in the poll to indicate to the owner
366       // thread that they need to wake this thread up.
367       if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
368                                        poll_val | KMP_LOCK_BUSY(1, futex))) {
369         KA_TRACE(
370             1000,
371             ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
372              lck, lck->lk.poll, gtid));
373         continue;
374       }
375       poll_val |= KMP_LOCK_BUSY(1, futex);
376 
377       KA_TRACE(1000,
378                ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
379                 lck->lk.poll, gtid));
380     }
381 
382     KA_TRACE(
383         1000,
384         ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
385          lck, gtid, poll_val));
386 
387     kmp_int32 rc;
388     if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
389                       NULL, 0)) != 0) {
390       KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
391                       "failed (rc=%d errno=%d)\n",
392                       lck, gtid, poll_val, rc, errno));
393       continue;
394     }
395 
396     KA_TRACE(1000,
397              ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
398               lck, gtid, poll_val));
399     // This thread has now done a successful futex wait call and was entered on
400     // the OS futex queue.  We must now perform a futex wake call when releasing
401     // the lock, as we have no idea how many other threads are in the queue.
402     gtid_code |= 1;
403   }
404 
405   KMP_FSYNC_ACQUIRED(lck);
406   KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
407                   lck->lk.poll, gtid));
408   return KMP_LOCK_ACQUIRED_FIRST;
409 }
410 
411 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
412   int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
413   ANNOTATE_FUTEX_ACQUIRED(lck);
414   return retval;
415 }
416 
417 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
418                                                 kmp_int32 gtid) {
419   char const *const func = "omp_set_lock";
420   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
421       __kmp_is_futex_lock_nestable(lck)) {
422     KMP_FATAL(LockNestableUsedAsSimple, func);
423   }
424   if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
425     KMP_FATAL(LockIsAlreadyOwned, func);
426   }
427   return __kmp_acquire_futex_lock(lck, gtid);
428 }
429 
430 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
431   if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
432                                   KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
433     KMP_FSYNC_ACQUIRED(lck);
434     return TRUE;
435   }
436   return FALSE;
437 }
438 
439 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
440                                              kmp_int32 gtid) {
441   char const *const func = "omp_test_lock";
442   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
443       __kmp_is_futex_lock_nestable(lck)) {
444     KMP_FATAL(LockNestableUsedAsSimple, func);
445   }
446   return __kmp_test_futex_lock(lck, gtid);
447 }
448 
449 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
450   KMP_MB(); /* Flush all pending memory write invalidates.  */
451 
452   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
453                   lck, lck->lk.poll, gtid));
454 
455   KMP_FSYNC_RELEASING(lck);
456   ANNOTATE_FUTEX_RELEASED(lck);
457 
458   kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
459 
460   KA_TRACE(1000,
461            ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
462             lck, gtid, poll_val));
463 
464   if (KMP_LOCK_STRIP(poll_val) & 1) {
465     KA_TRACE(1000,
466              ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
467               lck, gtid));
468     syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
469             NULL, NULL, 0);
470   }
471 
472   KMP_MB(); /* Flush all pending memory write invalidates.  */
473 
474   KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
475                   lck->lk.poll, gtid));
476 
477   KMP_YIELD(TCR_4(__kmp_nth) >
478             (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
479   return KMP_LOCK_RELEASED;
480 }
481 
482 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
483                                                 kmp_int32 gtid) {
484   char const *const func = "omp_unset_lock";
485   KMP_MB(); /* in case another processor initialized lock */
486   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
487       __kmp_is_futex_lock_nestable(lck)) {
488     KMP_FATAL(LockNestableUsedAsSimple, func);
489   }
490   if (__kmp_get_futex_lock_owner(lck) == -1) {
491     KMP_FATAL(LockUnsettingFree, func);
492   }
493   if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
494       (__kmp_get_futex_lock_owner(lck) != gtid)) {
495     KMP_FATAL(LockUnsettingSetByAnother, func);
496   }
497   return __kmp_release_futex_lock(lck, gtid);
498 }
499 
500 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
501   TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
502 }
503 
504 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
505 
506 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
507   char const *const func = "omp_destroy_lock";
508   if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
509       __kmp_is_futex_lock_nestable(lck)) {
510     KMP_FATAL(LockNestableUsedAsSimple, func);
511   }
512   if (__kmp_get_futex_lock_owner(lck) != -1) {
513     KMP_FATAL(LockStillOwned, func);
514   }
515   __kmp_destroy_futex_lock(lck);
516 }
517 
518 // nested futex locks
519 
520 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
521   KMP_DEBUG_ASSERT(gtid >= 0);
522 
523   if (__kmp_get_futex_lock_owner(lck) == gtid) {
524     lck->lk.depth_locked += 1;
525     return KMP_LOCK_ACQUIRED_NEXT;
526   } else {
527     __kmp_acquire_futex_lock_timed_template(lck, gtid);
528     ANNOTATE_FUTEX_ACQUIRED(lck);
529     lck->lk.depth_locked = 1;
530     return KMP_LOCK_ACQUIRED_FIRST;
531   }
532 }
533 
534 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
535                                                        kmp_int32 gtid) {
536   char const *const func = "omp_set_nest_lock";
537   if (!__kmp_is_futex_lock_nestable(lck)) {
538     KMP_FATAL(LockSimpleUsedAsNestable, func);
539   }
540   return __kmp_acquire_nested_futex_lock(lck, gtid);
541 }
542 
543 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
544   int retval;
545 
546   KMP_DEBUG_ASSERT(gtid >= 0);
547 
548   if (__kmp_get_futex_lock_owner(lck) == gtid) {
549     retval = ++lck->lk.depth_locked;
550   } else if (!__kmp_test_futex_lock(lck, gtid)) {
551     retval = 0;
552   } else {
553     KMP_MB();
554     retval = lck->lk.depth_locked = 1;
555   }
556   return retval;
557 }
558 
559 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
560                                                     kmp_int32 gtid) {
561   char const *const func = "omp_test_nest_lock";
562   if (!__kmp_is_futex_lock_nestable(lck)) {
563     KMP_FATAL(LockSimpleUsedAsNestable, func);
564   }
565   return __kmp_test_nested_futex_lock(lck, gtid);
566 }
567 
568 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
569   KMP_DEBUG_ASSERT(gtid >= 0);
570 
571   KMP_MB();
572   if (--(lck->lk.depth_locked) == 0) {
573     __kmp_release_futex_lock(lck, gtid);
574     return KMP_LOCK_RELEASED;
575   }
576   return KMP_LOCK_STILL_HELD;
577 }
578 
579 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
580                                                        kmp_int32 gtid) {
581   char const *const func = "omp_unset_nest_lock";
582   KMP_MB(); /* in case another processor initialized lock */
583   if (!__kmp_is_futex_lock_nestable(lck)) {
584     KMP_FATAL(LockSimpleUsedAsNestable, func);
585   }
586   if (__kmp_get_futex_lock_owner(lck) == -1) {
587     KMP_FATAL(LockUnsettingFree, func);
588   }
589   if (__kmp_get_futex_lock_owner(lck) != gtid) {
590     KMP_FATAL(LockUnsettingSetByAnother, func);
591   }
592   return __kmp_release_nested_futex_lock(lck, gtid);
593 }
594 
595 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
596   __kmp_init_futex_lock(lck);
597   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
598 }
599 
600 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
601   __kmp_destroy_futex_lock(lck);
602   lck->lk.depth_locked = 0;
603 }
604 
605 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
606   char const *const func = "omp_destroy_nest_lock";
607   if (!__kmp_is_futex_lock_nestable(lck)) {
608     KMP_FATAL(LockSimpleUsedAsNestable, func);
609   }
610   if (__kmp_get_futex_lock_owner(lck) != -1) {
611     KMP_FATAL(LockStillOwned, func);
612   }
613   __kmp_destroy_nested_futex_lock(lck);
614 }
615 
616 #endif // KMP_USE_FUTEX
617 
618 /* ------------------------------------------------------------------------ */
619 /* ticket (bakery) locks */
620 
621 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
622   return std::atomic_load_explicit(&lck->lk.owner_id,
623                                    std::memory_order_relaxed) -
624          1;
625 }
626 
627 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
628   return std::atomic_load_explicit(&lck->lk.depth_locked,
629                                    std::memory_order_relaxed) != -1;
630 }
631 
632 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
633   return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
634                                    std::memory_order_acquire) == my_ticket;
635 }
636 
637 __forceinline static int
638 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
639                                          kmp_int32 gtid) {
640   kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
641       &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
642 
643 #ifdef USE_LOCK_PROFILE
644   if (std::atomic_load_explicit(&lck->lk.now_serving,
645                                 std::memory_order_relaxed) != my_ticket)
646     __kmp_printf("LOCK CONTENTION: %p\n", lck);
647 /* else __kmp_printf( "." );*/
648 #endif /* USE_LOCK_PROFILE */
649 
650   if (std::atomic_load_explicit(&lck->lk.now_serving,
651                                 std::memory_order_acquire) == my_ticket) {
652     return KMP_LOCK_ACQUIRED_FIRST;
653   }
654   KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
655   return KMP_LOCK_ACQUIRED_FIRST;
656 }
657 
658 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
659   int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
660   ANNOTATE_TICKET_ACQUIRED(lck);
661   return retval;
662 }
663 
664 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
665                                                  kmp_int32 gtid) {
666   char const *const func = "omp_set_lock";
667 
668   if (!std::atomic_load_explicit(&lck->lk.initialized,
669                                  std::memory_order_relaxed)) {
670     KMP_FATAL(LockIsUninitialized, func);
671   }
672   if (lck->lk.self != lck) {
673     KMP_FATAL(LockIsUninitialized, func);
674   }
675   if (__kmp_is_ticket_lock_nestable(lck)) {
676     KMP_FATAL(LockNestableUsedAsSimple, func);
677   }
678   if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
679     KMP_FATAL(LockIsAlreadyOwned, func);
680   }
681 
682   __kmp_acquire_ticket_lock(lck, gtid);
683 
684   std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
685                              std::memory_order_relaxed);
686   return KMP_LOCK_ACQUIRED_FIRST;
687 }
688 
689 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
690   kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
691                                                    std::memory_order_relaxed);
692 
693   if (std::atomic_load_explicit(&lck->lk.now_serving,
694                                 std::memory_order_relaxed) == my_ticket) {
695     kmp_uint32 next_ticket = my_ticket + 1;
696     if (std::atomic_compare_exchange_strong_explicit(
697             &lck->lk.next_ticket, &my_ticket, next_ticket,
698             std::memory_order_acquire, std::memory_order_acquire)) {
699       return TRUE;
700     }
701   }
702   return FALSE;
703 }
704 
705 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
706                                               kmp_int32 gtid) {
707   char const *const func = "omp_test_lock";
708 
709   if (!std::atomic_load_explicit(&lck->lk.initialized,
710                                  std::memory_order_relaxed)) {
711     KMP_FATAL(LockIsUninitialized, func);
712   }
713   if (lck->lk.self != lck) {
714     KMP_FATAL(LockIsUninitialized, func);
715   }
716   if (__kmp_is_ticket_lock_nestable(lck)) {
717     KMP_FATAL(LockNestableUsedAsSimple, func);
718   }
719 
720   int retval = __kmp_test_ticket_lock(lck, gtid);
721 
722   if (retval) {
723     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
724                                std::memory_order_relaxed);
725   }
726   return retval;
727 }
728 
729 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
730   kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
731                                                   std::memory_order_relaxed) -
732                         std::atomic_load_explicit(&lck->lk.now_serving,
733                                                   std::memory_order_relaxed);
734 
735   ANNOTATE_TICKET_RELEASED(lck);
736   std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
737                                  std::memory_order_release);
738 
739   KMP_YIELD(distance >
740             (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
741   return KMP_LOCK_RELEASED;
742 }
743 
744 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
745                                                  kmp_int32 gtid) {
746   char const *const func = "omp_unset_lock";
747 
748   if (!std::atomic_load_explicit(&lck->lk.initialized,
749                                  std::memory_order_relaxed)) {
750     KMP_FATAL(LockIsUninitialized, func);
751   }
752   if (lck->lk.self != lck) {
753     KMP_FATAL(LockIsUninitialized, func);
754   }
755   if (__kmp_is_ticket_lock_nestable(lck)) {
756     KMP_FATAL(LockNestableUsedAsSimple, func);
757   }
758   if (__kmp_get_ticket_lock_owner(lck) == -1) {
759     KMP_FATAL(LockUnsettingFree, func);
760   }
761   if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
762       (__kmp_get_ticket_lock_owner(lck) != gtid)) {
763     KMP_FATAL(LockUnsettingSetByAnother, func);
764   }
765   std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
766   return __kmp_release_ticket_lock(lck, gtid);
767 }
768 
769 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
770   lck->lk.location = NULL;
771   lck->lk.self = lck;
772   std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
773                              std::memory_order_relaxed);
774   std::atomic_store_explicit(&lck->lk.now_serving, 0U,
775                              std::memory_order_relaxed);
776   std::atomic_store_explicit(
777       &lck->lk.owner_id, 0,
778       std::memory_order_relaxed); // no thread owns the lock.
779   std::atomic_store_explicit(
780       &lck->lk.depth_locked, -1,
781       std::memory_order_relaxed); // -1 => not a nested lock.
782   std::atomic_store_explicit(&lck->lk.initialized, true,
783                              std::memory_order_release);
784 }
785 
786 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
787   std::atomic_store_explicit(&lck->lk.initialized, false,
788                              std::memory_order_release);
789   lck->lk.self = NULL;
790   lck->lk.location = NULL;
791   std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
792                              std::memory_order_relaxed);
793   std::atomic_store_explicit(&lck->lk.now_serving, 0U,
794                              std::memory_order_relaxed);
795   std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
796   std::atomic_store_explicit(&lck->lk.depth_locked, -1,
797                              std::memory_order_relaxed);
798 }
799 
800 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
801   char const *const func = "omp_destroy_lock";
802 
803   if (!std::atomic_load_explicit(&lck->lk.initialized,
804                                  std::memory_order_relaxed)) {
805     KMP_FATAL(LockIsUninitialized, func);
806   }
807   if (lck->lk.self != lck) {
808     KMP_FATAL(LockIsUninitialized, func);
809   }
810   if (__kmp_is_ticket_lock_nestable(lck)) {
811     KMP_FATAL(LockNestableUsedAsSimple, func);
812   }
813   if (__kmp_get_ticket_lock_owner(lck) != -1) {
814     KMP_FATAL(LockStillOwned, func);
815   }
816   __kmp_destroy_ticket_lock(lck);
817 }
818 
819 // nested ticket locks
820 
821 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
822   KMP_DEBUG_ASSERT(gtid >= 0);
823 
824   if (__kmp_get_ticket_lock_owner(lck) == gtid) {
825     std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
826                                    std::memory_order_relaxed);
827     return KMP_LOCK_ACQUIRED_NEXT;
828   } else {
829     __kmp_acquire_ticket_lock_timed_template(lck, gtid);
830     ANNOTATE_TICKET_ACQUIRED(lck);
831     std::atomic_store_explicit(&lck->lk.depth_locked, 1,
832                                std::memory_order_relaxed);
833     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
834                                std::memory_order_relaxed);
835     return KMP_LOCK_ACQUIRED_FIRST;
836   }
837 }
838 
839 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
840                                                         kmp_int32 gtid) {
841   char const *const func = "omp_set_nest_lock";
842 
843   if (!std::atomic_load_explicit(&lck->lk.initialized,
844                                  std::memory_order_relaxed)) {
845     KMP_FATAL(LockIsUninitialized, func);
846   }
847   if (lck->lk.self != lck) {
848     KMP_FATAL(LockIsUninitialized, func);
849   }
850   if (!__kmp_is_ticket_lock_nestable(lck)) {
851     KMP_FATAL(LockSimpleUsedAsNestable, func);
852   }
853   return __kmp_acquire_nested_ticket_lock(lck, gtid);
854 }
855 
856 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
857   int retval;
858 
859   KMP_DEBUG_ASSERT(gtid >= 0);
860 
861   if (__kmp_get_ticket_lock_owner(lck) == gtid) {
862     retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
863                                             std::memory_order_relaxed) +
864              1;
865   } else if (!__kmp_test_ticket_lock(lck, gtid)) {
866     retval = 0;
867   } else {
868     std::atomic_store_explicit(&lck->lk.depth_locked, 1,
869                                std::memory_order_relaxed);
870     std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
871                                std::memory_order_relaxed);
872     retval = 1;
873   }
874   return retval;
875 }
876 
877 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
878                                                      kmp_int32 gtid) {
879   char const *const func = "omp_test_nest_lock";
880 
881   if (!std::atomic_load_explicit(&lck->lk.initialized,
882                                  std::memory_order_relaxed)) {
883     KMP_FATAL(LockIsUninitialized, func);
884   }
885   if (lck->lk.self != lck) {
886     KMP_FATAL(LockIsUninitialized, func);
887   }
888   if (!__kmp_is_ticket_lock_nestable(lck)) {
889     KMP_FATAL(LockSimpleUsedAsNestable, func);
890   }
891   return __kmp_test_nested_ticket_lock(lck, gtid);
892 }
893 
894 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
895   KMP_DEBUG_ASSERT(gtid >= 0);
896 
897   if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
898                                       std::memory_order_relaxed) -
899        1) == 0) {
900     std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
901     __kmp_release_ticket_lock(lck, gtid);
902     return KMP_LOCK_RELEASED;
903   }
904   return KMP_LOCK_STILL_HELD;
905 }
906 
907 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
908                                                         kmp_int32 gtid) {
909   char const *const func = "omp_unset_nest_lock";
910 
911   if (!std::atomic_load_explicit(&lck->lk.initialized,
912                                  std::memory_order_relaxed)) {
913     KMP_FATAL(LockIsUninitialized, func);
914   }
915   if (lck->lk.self != lck) {
916     KMP_FATAL(LockIsUninitialized, func);
917   }
918   if (!__kmp_is_ticket_lock_nestable(lck)) {
919     KMP_FATAL(LockSimpleUsedAsNestable, func);
920   }
921   if (__kmp_get_ticket_lock_owner(lck) == -1) {
922     KMP_FATAL(LockUnsettingFree, func);
923   }
924   if (__kmp_get_ticket_lock_owner(lck) != gtid) {
925     KMP_FATAL(LockUnsettingSetByAnother, func);
926   }
927   return __kmp_release_nested_ticket_lock(lck, gtid);
928 }
929 
930 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
931   __kmp_init_ticket_lock(lck);
932   std::atomic_store_explicit(&lck->lk.depth_locked, 0,
933                              std::memory_order_relaxed);
934   // >= 0 for nestable locks, -1 for simple locks
935 }
936 
937 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
938   __kmp_destroy_ticket_lock(lck);
939   std::atomic_store_explicit(&lck->lk.depth_locked, 0,
940                              std::memory_order_relaxed);
941 }
942 
943 static void
944 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
945   char const *const func = "omp_destroy_nest_lock";
946 
947   if (!std::atomic_load_explicit(&lck->lk.initialized,
948                                  std::memory_order_relaxed)) {
949     KMP_FATAL(LockIsUninitialized, func);
950   }
951   if (lck->lk.self != lck) {
952     KMP_FATAL(LockIsUninitialized, func);
953   }
954   if (!__kmp_is_ticket_lock_nestable(lck)) {
955     KMP_FATAL(LockSimpleUsedAsNestable, func);
956   }
957   if (__kmp_get_ticket_lock_owner(lck) != -1) {
958     KMP_FATAL(LockStillOwned, func);
959   }
960   __kmp_destroy_nested_ticket_lock(lck);
961 }
962 
963 // access functions to fields which don't exist for all lock kinds.
964 
965 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
966   return lck->lk.location;
967 }
968 
969 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
970                                            const ident_t *loc) {
971   lck->lk.location = loc;
972 }
973 
974 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
975   return lck->lk.flags;
976 }
977 
978 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
979                                         kmp_lock_flags_t flags) {
980   lck->lk.flags = flags;
981 }
982 
983 /* ------------------------------------------------------------------------ */
984 /* queuing locks */
985 
986 /* First the states
987    (head,tail) =              0, 0  means lock is unheld, nobody on queue
988                  UINT_MAX or -1, 0  means lock is held, nobody on queue
989                               h, h  means lock held or about to transition,
990                                     1 element on queue
991                               h, t  h <> t, means lock is held or about to
992                                     transition, >1 elements on queue
993 
994    Now the transitions
995       Acquire(0,0)  = -1 ,0
996       Release(0,0)  = Error
997       Acquire(-1,0) =  h ,h    h > 0
998       Release(-1,0) =  0 ,0
999       Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1000       Release(h,h)  = -1 ,0    h > 0
1001       Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1002       Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1003 
1004    And pictorially
1005 
1006            +-----+
1007            | 0, 0|------- release -------> Error
1008            +-----+
1009              |  ^
1010       acquire|  |release
1011              |  |
1012              |  |
1013              v  |
1014            +-----+
1015            |-1, 0|
1016            +-----+
1017              |  ^
1018       acquire|  |release
1019              |  |
1020              |  |
1021              v  |
1022            +-----+
1023            | h, h|
1024            +-----+
1025              |  ^
1026       acquire|  |release
1027              |  |
1028              |  |
1029              v  |
1030            +-----+
1031            | h, t|----- acquire, release loopback ---+
1032            +-----+                                   |
1033                 ^                                    |
1034                 |                                    |
1035                 +------------------------------------+
1036  */
1037 
1038 #ifdef DEBUG_QUEUING_LOCKS
1039 
1040 /* Stuff for circular trace buffer */
1041 #define TRACE_BUF_ELE 1024
1042 static char traces[TRACE_BUF_ELE][128] = {0};
1043 static int tc = 0;
1044 #define TRACE_LOCK(X, Y)                                                       \
1045   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1046 #define TRACE_LOCK_T(X, Y, Z)                                                  \
1047   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1048 #define TRACE_LOCK_HT(X, Y, Z, Q)                                              \
1049   KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y,   \
1050                Z, Q);
1051 
1052 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1053                                     kmp_queuing_lock_t *lck, kmp_int32 head_id,
1054                                     kmp_int32 tail_id) {
1055   kmp_int32 t, i;
1056 
1057   __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1058 
1059   i = tc % TRACE_BUF_ELE;
1060   __kmp_printf_no_lock("%s\n", traces[i]);
1061   i = (i + 1) % TRACE_BUF_ELE;
1062   while (i != (tc % TRACE_BUF_ELE)) {
1063     __kmp_printf_no_lock("%s", traces[i]);
1064     i = (i + 1) % TRACE_BUF_ELE;
1065   }
1066   __kmp_printf_no_lock("\n");
1067 
1068   __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1069                        "next_wait:%d, head_id:%d, tail_id:%d\n",
1070                        gtid + 1, this_thr->th.th_spin_here,
1071                        this_thr->th.th_next_waiting, head_id, tail_id);
1072 
1073   __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1074 
1075   if (lck->lk.head_id >= 1) {
1076     t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1077     while (t > 0) {
1078       __kmp_printf_no_lock("-> %d ", t);
1079       t = __kmp_threads[t - 1]->th.th_next_waiting;
1080     }
1081   }
1082   __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
1083   __kmp_printf_no_lock("\n\n");
1084 }
1085 
1086 #endif /* DEBUG_QUEUING_LOCKS */
1087 
1088 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1089   return TCR_4(lck->lk.owner_id) - 1;
1090 }
1091 
1092 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1093   return lck->lk.depth_locked != -1;
1094 }
1095 
1096 /* Acquire a lock using a the queuing lock implementation */
1097 template <bool takeTime>
1098 /* [TLW] The unused template above is left behind because of what BEB believes
1099    is a potential compiler problem with __forceinline. */
1100 __forceinline static int
1101 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1102                                           kmp_int32 gtid) {
1103   kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1104   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1105   volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1106   volatile kmp_uint32 *spin_here_p;
1107   kmp_int32 need_mf = 1;
1108 
1109 #if OMPT_SUPPORT
1110   ompt_state_t prev_state = ompt_state_undefined;
1111 #endif
1112 
1113   KA_TRACE(1000,
1114            ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1115 
1116   KMP_FSYNC_PREPARE(lck);
1117   KMP_DEBUG_ASSERT(this_thr != NULL);
1118   spin_here_p = &this_thr->th.th_spin_here;
1119 
1120 #ifdef DEBUG_QUEUING_LOCKS
1121   TRACE_LOCK(gtid + 1, "acq ent");
1122   if (*spin_here_p)
1123     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1124   if (this_thr->th.th_next_waiting != 0)
1125     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1126 #endif
1127   KMP_DEBUG_ASSERT(!*spin_here_p);
1128   KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1129 
1130   /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1131      head_id_p that may follow, not just in execution order, but also in
1132      visibility order. This way, when a releasing thread observes the changes to
1133      the queue by this thread, it can rightly assume that spin_here_p has
1134      already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1135      not premature.  If the releasing thread sets spin_here_p to FALSE before
1136      this thread sets it to TRUE, this thread will hang. */
1137   *spin_here_p = TRUE; /* before enqueuing to prevent race */
1138 
1139   while (1) {
1140     kmp_int32 enqueued;
1141     kmp_int32 head;
1142     kmp_int32 tail;
1143 
1144     head = *head_id_p;
1145 
1146     switch (head) {
1147 
1148     case -1: {
1149 #ifdef DEBUG_QUEUING_LOCKS
1150       tail = *tail_id_p;
1151       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1152 #endif
1153       tail = 0; /* to make sure next link asynchronously read is not set
1154                 accidentally; this assignment prevents us from entering the
1155                 if ( t > 0 ) condition in the enqueued case below, which is not
1156                 necessary for this state transition */
1157 
1158       need_mf = 0;
1159       /* try (-1,0)->(tid,tid) */
1160       enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1161                                              KMP_PACK_64(-1, 0),
1162                                              KMP_PACK_64(gtid + 1, gtid + 1));
1163 #ifdef DEBUG_QUEUING_LOCKS
1164       if (enqueued)
1165         TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1166 #endif
1167     } break;
1168 
1169     default: {
1170       tail = *tail_id_p;
1171       KMP_DEBUG_ASSERT(tail != gtid + 1);
1172 
1173 #ifdef DEBUG_QUEUING_LOCKS
1174       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1175 #endif
1176 
1177       if (tail == 0) {
1178         enqueued = FALSE;
1179       } else {
1180         need_mf = 0;
1181         /* try (h,t) or (h,h)->(h,tid) */
1182         enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1183 
1184 #ifdef DEBUG_QUEUING_LOCKS
1185         if (enqueued)
1186           TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1187 #endif
1188       }
1189     } break;
1190 
1191     case 0: /* empty queue */
1192     {
1193       kmp_int32 grabbed_lock;
1194 
1195 #ifdef DEBUG_QUEUING_LOCKS
1196       tail = *tail_id_p;
1197       TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1198 #endif
1199       /* try (0,0)->(-1,0) */
1200 
1201       /* only legal transition out of head = 0 is head = -1 with no change to
1202        * tail */
1203       grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1204 
1205       if (grabbed_lock) {
1206 
1207         *spin_here_p = FALSE;
1208 
1209         KA_TRACE(
1210             1000,
1211             ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1212              lck, gtid));
1213 #ifdef DEBUG_QUEUING_LOCKS
1214         TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1215 #endif
1216 
1217 #if OMPT_SUPPORT
1218         if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1219           /* change the state before clearing wait_id */
1220           this_thr->th.ompt_thread_info.state = prev_state;
1221           this_thr->th.ompt_thread_info.wait_id = 0;
1222         }
1223 #endif
1224 
1225         KMP_FSYNC_ACQUIRED(lck);
1226         return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1227       }
1228       enqueued = FALSE;
1229     } break;
1230     }
1231 
1232 #if OMPT_SUPPORT
1233     if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1234       /* this thread will spin; set wait_id before entering wait state */
1235       prev_state = this_thr->th.ompt_thread_info.state;
1236       this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1237       this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1238     }
1239 #endif
1240 
1241     if (enqueued) {
1242       if (tail > 0) {
1243         kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1244         KMP_ASSERT(tail_thr != NULL);
1245         tail_thr->th.th_next_waiting = gtid + 1;
1246         /* corresponding wait for this write in release code */
1247       }
1248       KA_TRACE(1000,
1249                ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1250                 lck, gtid));
1251 
1252       /* ToDo: May want to consider using __kmp_wait_sleep  or something that
1253          sleeps for throughput only here. */
1254       KMP_MB();
1255       KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1256 
1257 #ifdef DEBUG_QUEUING_LOCKS
1258       TRACE_LOCK(gtid + 1, "acq spin");
1259 
1260       if (this_thr->th.th_next_waiting != 0)
1261         __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1262 #endif
1263       KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1264       KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1265                       "waiting on queue\n",
1266                       lck, gtid));
1267 
1268 #ifdef DEBUG_QUEUING_LOCKS
1269       TRACE_LOCK(gtid + 1, "acq exit 2");
1270 #endif
1271 
1272 #if OMPT_SUPPORT
1273       /* change the state before clearing wait_id */
1274       this_thr->th.ompt_thread_info.state = prev_state;
1275       this_thr->th.ompt_thread_info.wait_id = 0;
1276 #endif
1277 
1278       /* got lock, we were dequeued by the thread that released lock */
1279       return KMP_LOCK_ACQUIRED_FIRST;
1280     }
1281 
1282     /* Yield if number of threads > number of logical processors */
1283     /* ToDo: Not sure why this should only be in oversubscription case,
1284        maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1285     KMP_YIELD(TCR_4(__kmp_nth) >
1286               (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
1287 #ifdef DEBUG_QUEUING_LOCKS
1288     TRACE_LOCK(gtid + 1, "acq retry");
1289 #endif
1290   }
1291   KMP_ASSERT2(0, "should not get here");
1292   return KMP_LOCK_ACQUIRED_FIRST;
1293 }
1294 
1295 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1296   KMP_DEBUG_ASSERT(gtid >= 0);
1297 
1298   int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1299   ANNOTATE_QUEUING_ACQUIRED(lck);
1300   return retval;
1301 }
1302 
1303 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1304                                                   kmp_int32 gtid) {
1305   char const *const func = "omp_set_lock";
1306   if (lck->lk.initialized != lck) {
1307     KMP_FATAL(LockIsUninitialized, func);
1308   }
1309   if (__kmp_is_queuing_lock_nestable(lck)) {
1310     KMP_FATAL(LockNestableUsedAsSimple, func);
1311   }
1312   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1313     KMP_FATAL(LockIsAlreadyOwned, func);
1314   }
1315 
1316   __kmp_acquire_queuing_lock(lck, gtid);
1317 
1318   lck->lk.owner_id = gtid + 1;
1319   return KMP_LOCK_ACQUIRED_FIRST;
1320 }
1321 
1322 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1323   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1324   kmp_int32 head;
1325 #ifdef KMP_DEBUG
1326   kmp_info_t *this_thr;
1327 #endif
1328 
1329   KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1330   KMP_DEBUG_ASSERT(gtid >= 0);
1331 #ifdef KMP_DEBUG
1332   this_thr = __kmp_thread_from_gtid(gtid);
1333   KMP_DEBUG_ASSERT(this_thr != NULL);
1334   KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1335 #endif
1336 
1337   head = *head_id_p;
1338 
1339   if (head == 0) { /* nobody on queue, nobody holding */
1340     /* try (0,0)->(-1,0) */
1341     if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1342       KA_TRACE(1000,
1343                ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1344       KMP_FSYNC_ACQUIRED(lck);
1345       ANNOTATE_QUEUING_ACQUIRED(lck);
1346       return TRUE;
1347     }
1348   }
1349 
1350   KA_TRACE(1000,
1351            ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1352   return FALSE;
1353 }
1354 
1355 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1356                                                kmp_int32 gtid) {
1357   char const *const func = "omp_test_lock";
1358   if (lck->lk.initialized != lck) {
1359     KMP_FATAL(LockIsUninitialized, func);
1360   }
1361   if (__kmp_is_queuing_lock_nestable(lck)) {
1362     KMP_FATAL(LockNestableUsedAsSimple, func);
1363   }
1364 
1365   int retval = __kmp_test_queuing_lock(lck, gtid);
1366 
1367   if (retval) {
1368     lck->lk.owner_id = gtid + 1;
1369   }
1370   return retval;
1371 }
1372 
1373 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1374   kmp_info_t *this_thr;
1375   volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1376   volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1377 
1378   KA_TRACE(1000,
1379            ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1380   KMP_DEBUG_ASSERT(gtid >= 0);
1381   this_thr = __kmp_thread_from_gtid(gtid);
1382   KMP_DEBUG_ASSERT(this_thr != NULL);
1383 #ifdef DEBUG_QUEUING_LOCKS
1384   TRACE_LOCK(gtid + 1, "rel ent");
1385 
1386   if (this_thr->th.th_spin_here)
1387     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1388   if (this_thr->th.th_next_waiting != 0)
1389     __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1390 #endif
1391   KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1392   KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1393 
1394   KMP_FSYNC_RELEASING(lck);
1395   ANNOTATE_QUEUING_RELEASED(lck);
1396 
1397   while (1) {
1398     kmp_int32 dequeued;
1399     kmp_int32 head;
1400     kmp_int32 tail;
1401 
1402     head = *head_id_p;
1403 
1404 #ifdef DEBUG_QUEUING_LOCKS
1405     tail = *tail_id_p;
1406     TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1407     if (head == 0)
1408       __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1409 #endif
1410     KMP_DEBUG_ASSERT(head !=
1411                      0); /* holding the lock, head must be -1 or queue head */
1412 
1413     if (head == -1) { /* nobody on queue */
1414       /* try (-1,0)->(0,0) */
1415       if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1416         KA_TRACE(
1417             1000,
1418             ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1419              lck, gtid));
1420 #ifdef DEBUG_QUEUING_LOCKS
1421         TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1422 #endif
1423 
1424 #if OMPT_SUPPORT
1425 /* nothing to do - no other thread is trying to shift blame */
1426 #endif
1427         return KMP_LOCK_RELEASED;
1428       }
1429       dequeued = FALSE;
1430     } else {
1431       KMP_MB();
1432       tail = *tail_id_p;
1433       if (head == tail) { /* only one thread on the queue */
1434 #ifdef DEBUG_QUEUING_LOCKS
1435         if (head <= 0)
1436           __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1437 #endif
1438         KMP_DEBUG_ASSERT(head > 0);
1439 
1440         /* try (h,h)->(-1,0) */
1441         dequeued = KMP_COMPARE_AND_STORE_REL64(
1442             RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1443             KMP_PACK_64(-1, 0));
1444 #ifdef DEBUG_QUEUING_LOCKS
1445         TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1446 #endif
1447 
1448       } else {
1449         volatile kmp_int32 *waiting_id_p;
1450         kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1451         KMP_DEBUG_ASSERT(head_thr != NULL);
1452         waiting_id_p = &head_thr->th.th_next_waiting;
1453 
1454 /* Does this require synchronous reads? */
1455 #ifdef DEBUG_QUEUING_LOCKS
1456         if (head <= 0 || tail <= 0)
1457           __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1458 #endif
1459         KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1460 
1461         /* try (h,t)->(h',t) or (t,t) */
1462         KMP_MB();
1463         /* make sure enqueuing thread has time to update next waiting thread
1464          * field */
1465         *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0,
1466                                     KMP_NEQ, NULL);
1467 #ifdef DEBUG_QUEUING_LOCKS
1468         TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1469 #endif
1470         dequeued = TRUE;
1471       }
1472     }
1473 
1474     if (dequeued) {
1475       kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1476       KMP_DEBUG_ASSERT(head_thr != NULL);
1477 
1478 /* Does this require synchronous reads? */
1479 #ifdef DEBUG_QUEUING_LOCKS
1480       if (head <= 0 || tail <= 0)
1481         __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1482 #endif
1483       KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1484 
1485       /* For clean code only. Thread not released until next statement prevents
1486          race with acquire code. */
1487       head_thr->th.th_next_waiting = 0;
1488 #ifdef DEBUG_QUEUING_LOCKS
1489       TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1490 #endif
1491 
1492       KMP_MB();
1493       /* reset spin value */
1494       head_thr->th.th_spin_here = FALSE;
1495 
1496       KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1497                       "dequeuing\n",
1498                       lck, gtid));
1499 #ifdef DEBUG_QUEUING_LOCKS
1500       TRACE_LOCK(gtid + 1, "rel exit 2");
1501 #endif
1502       return KMP_LOCK_RELEASED;
1503     }
1504 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1505    threads */
1506 
1507 #ifdef DEBUG_QUEUING_LOCKS
1508     TRACE_LOCK(gtid + 1, "rel retry");
1509 #endif
1510 
1511   } /* while */
1512   KMP_ASSERT2(0, "should not get here");
1513   return KMP_LOCK_RELEASED;
1514 }
1515 
1516 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1517                                                   kmp_int32 gtid) {
1518   char const *const func = "omp_unset_lock";
1519   KMP_MB(); /* in case another processor initialized lock */
1520   if (lck->lk.initialized != lck) {
1521     KMP_FATAL(LockIsUninitialized, func);
1522   }
1523   if (__kmp_is_queuing_lock_nestable(lck)) {
1524     KMP_FATAL(LockNestableUsedAsSimple, func);
1525   }
1526   if (__kmp_get_queuing_lock_owner(lck) == -1) {
1527     KMP_FATAL(LockUnsettingFree, func);
1528   }
1529   if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1530     KMP_FATAL(LockUnsettingSetByAnother, func);
1531   }
1532   lck->lk.owner_id = 0;
1533   return __kmp_release_queuing_lock(lck, gtid);
1534 }
1535 
1536 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1537   lck->lk.location = NULL;
1538   lck->lk.head_id = 0;
1539   lck->lk.tail_id = 0;
1540   lck->lk.next_ticket = 0;
1541   lck->lk.now_serving = 0;
1542   lck->lk.owner_id = 0; // no thread owns the lock.
1543   lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1544   lck->lk.initialized = lck;
1545 
1546   KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1547 }
1548 
1549 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1550   lck->lk.initialized = NULL;
1551   lck->lk.location = NULL;
1552   lck->lk.head_id = 0;
1553   lck->lk.tail_id = 0;
1554   lck->lk.next_ticket = 0;
1555   lck->lk.now_serving = 0;
1556   lck->lk.owner_id = 0;
1557   lck->lk.depth_locked = -1;
1558 }
1559 
1560 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1561   char const *const func = "omp_destroy_lock";
1562   if (lck->lk.initialized != lck) {
1563     KMP_FATAL(LockIsUninitialized, func);
1564   }
1565   if (__kmp_is_queuing_lock_nestable(lck)) {
1566     KMP_FATAL(LockNestableUsedAsSimple, func);
1567   }
1568   if (__kmp_get_queuing_lock_owner(lck) != -1) {
1569     KMP_FATAL(LockStillOwned, func);
1570   }
1571   __kmp_destroy_queuing_lock(lck);
1572 }
1573 
1574 // nested queuing locks
1575 
1576 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1577   KMP_DEBUG_ASSERT(gtid >= 0);
1578 
1579   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1580     lck->lk.depth_locked += 1;
1581     return KMP_LOCK_ACQUIRED_NEXT;
1582   } else {
1583     __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1584     ANNOTATE_QUEUING_ACQUIRED(lck);
1585     KMP_MB();
1586     lck->lk.depth_locked = 1;
1587     KMP_MB();
1588     lck->lk.owner_id = gtid + 1;
1589     return KMP_LOCK_ACQUIRED_FIRST;
1590   }
1591 }
1592 
1593 static int
1594 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1595                                               kmp_int32 gtid) {
1596   char const *const func = "omp_set_nest_lock";
1597   if (lck->lk.initialized != lck) {
1598     KMP_FATAL(LockIsUninitialized, func);
1599   }
1600   if (!__kmp_is_queuing_lock_nestable(lck)) {
1601     KMP_FATAL(LockSimpleUsedAsNestable, func);
1602   }
1603   return __kmp_acquire_nested_queuing_lock(lck, gtid);
1604 }
1605 
1606 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1607   int retval;
1608 
1609   KMP_DEBUG_ASSERT(gtid >= 0);
1610 
1611   if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1612     retval = ++lck->lk.depth_locked;
1613   } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1614     retval = 0;
1615   } else {
1616     KMP_MB();
1617     retval = lck->lk.depth_locked = 1;
1618     KMP_MB();
1619     lck->lk.owner_id = gtid + 1;
1620   }
1621   return retval;
1622 }
1623 
1624 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1625                                                       kmp_int32 gtid) {
1626   char const *const func = "omp_test_nest_lock";
1627   if (lck->lk.initialized != lck) {
1628     KMP_FATAL(LockIsUninitialized, func);
1629   }
1630   if (!__kmp_is_queuing_lock_nestable(lck)) {
1631     KMP_FATAL(LockSimpleUsedAsNestable, func);
1632   }
1633   return __kmp_test_nested_queuing_lock(lck, gtid);
1634 }
1635 
1636 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1637   KMP_DEBUG_ASSERT(gtid >= 0);
1638 
1639   KMP_MB();
1640   if (--(lck->lk.depth_locked) == 0) {
1641     KMP_MB();
1642     lck->lk.owner_id = 0;
1643     __kmp_release_queuing_lock(lck, gtid);
1644     return KMP_LOCK_RELEASED;
1645   }
1646   return KMP_LOCK_STILL_HELD;
1647 }
1648 
1649 static int
1650 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1651                                               kmp_int32 gtid) {
1652   char const *const func = "omp_unset_nest_lock";
1653   KMP_MB(); /* in case another processor initialized lock */
1654   if (lck->lk.initialized != lck) {
1655     KMP_FATAL(LockIsUninitialized, func);
1656   }
1657   if (!__kmp_is_queuing_lock_nestable(lck)) {
1658     KMP_FATAL(LockSimpleUsedAsNestable, func);
1659   }
1660   if (__kmp_get_queuing_lock_owner(lck) == -1) {
1661     KMP_FATAL(LockUnsettingFree, func);
1662   }
1663   if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1664     KMP_FATAL(LockUnsettingSetByAnother, func);
1665   }
1666   return __kmp_release_nested_queuing_lock(lck, gtid);
1667 }
1668 
1669 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1670   __kmp_init_queuing_lock(lck);
1671   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1672 }
1673 
1674 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1675   __kmp_destroy_queuing_lock(lck);
1676   lck->lk.depth_locked = 0;
1677 }
1678 
1679 static void
1680 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1681   char const *const func = "omp_destroy_nest_lock";
1682   if (lck->lk.initialized != lck) {
1683     KMP_FATAL(LockIsUninitialized, func);
1684   }
1685   if (!__kmp_is_queuing_lock_nestable(lck)) {
1686     KMP_FATAL(LockSimpleUsedAsNestable, func);
1687   }
1688   if (__kmp_get_queuing_lock_owner(lck) != -1) {
1689     KMP_FATAL(LockStillOwned, func);
1690   }
1691   __kmp_destroy_nested_queuing_lock(lck);
1692 }
1693 
1694 // access functions to fields which don't exist for all lock kinds.
1695 
1696 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1697   return lck->lk.location;
1698 }
1699 
1700 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1701                                             const ident_t *loc) {
1702   lck->lk.location = loc;
1703 }
1704 
1705 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1706   return lck->lk.flags;
1707 }
1708 
1709 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1710                                          kmp_lock_flags_t flags) {
1711   lck->lk.flags = flags;
1712 }
1713 
1714 #if KMP_USE_ADAPTIVE_LOCKS
1715 
1716 /* RTM Adaptive locks */
1717 
1718 #if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) ||                          \
1719     (KMP_COMPILER_MSVC && _MSC_VER >= 1700) ||                                 \
1720     (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT)
1721 
1722 #include <immintrin.h>
1723 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1724 
1725 #else
1726 
1727 // Values from the status register after failed speculation.
1728 #define _XBEGIN_STARTED (~0u)
1729 #define _XABORT_EXPLICIT (1 << 0)
1730 #define _XABORT_RETRY (1 << 1)
1731 #define _XABORT_CONFLICT (1 << 2)
1732 #define _XABORT_CAPACITY (1 << 3)
1733 #define _XABORT_DEBUG (1 << 4)
1734 #define _XABORT_NESTED (1 << 5)
1735 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1736 
1737 // Aborts for which it's worth trying again immediately
1738 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1739 
1740 #define STRINGIZE_INTERNAL(arg) #arg
1741 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1742 
1743 // Access to RTM instructions
1744 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1745   an abort. This is the same definition as the compiler intrinsic that will be
1746   supported at some point. */
1747 static __inline int _xbegin() {
1748   int res = -1;
1749 
1750 #if KMP_OS_WINDOWS
1751 #if KMP_ARCH_X86_64
1752   _asm {
1753         _emit 0xC7
1754         _emit 0xF8
1755         _emit 2
1756         _emit 0
1757         _emit 0
1758         _emit 0
1759         jmp   L2
1760         mov   res, eax
1761     L2:
1762   }
1763 #else /* IA32 */
1764   _asm {
1765         _emit 0xC7
1766         _emit 0xF8
1767         _emit 2
1768         _emit 0
1769         _emit 0
1770         _emit 0
1771         jmp   L2
1772         mov   res, eax
1773     L2:
1774   }
1775 #endif // KMP_ARCH_X86_64
1776 #else
1777   /* Note that %eax must be noted as killed (clobbered), because the XSR is
1778      returned in %eax(%rax) on abort.  Other register values are restored, so
1779      don't need to be killed.
1780 
1781      We must also mark 'res' as an input and an output, since otherwise
1782      'res=-1' may be dropped as being dead, whereas we do need the assignment on
1783      the successful (i.e., non-abort) path. */
1784   __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
1785                    "   .long  1f-1b-6\n"
1786                    "    jmp   2f\n"
1787                    "1:  movl  %%eax,%0\n"
1788                    "2:"
1789                    : "+r"(res)::"memory", "%eax");
1790 #endif // KMP_OS_WINDOWS
1791   return res;
1792 }
1793 
1794 /* Transaction end */
1795 static __inline void _xend() {
1796 #if KMP_OS_WINDOWS
1797   __asm {
1798         _emit 0x0f
1799         _emit 0x01
1800         _emit 0xd5
1801   }
1802 #else
1803   __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1804 #endif
1805 }
1806 
1807 /* This is a macro, the argument must be a single byte constant which can be
1808    evaluated by the inline assembler, since it is emitted as a byte into the
1809    assembly code. */
1810 // clang-format off
1811 #if KMP_OS_WINDOWS
1812 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1813 #else
1814 #define _xabort(ARG)                                                           \
1815   __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1816 #endif
1817 // clang-format on
1818 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1819 
1820 // Statistics is collected for testing purpose
1821 #if KMP_DEBUG_ADAPTIVE_LOCKS
1822 
1823 // We accumulate speculative lock statistics when the lock is destroyed. We
1824 // keep locks that haven't been destroyed in the liveLocks list so that we can
1825 // grab their statistics too.
1826 static kmp_adaptive_lock_statistics_t destroyedStats;
1827 
1828 // To hold the list of live locks.
1829 static kmp_adaptive_lock_info_t liveLocks;
1830 
1831 // A lock so we can safely update the list of locks.
1832 static kmp_bootstrap_lock_t chain_lock =
1833     KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1834 
1835 // Initialize the list of stats.
1836 void __kmp_init_speculative_stats() {
1837   kmp_adaptive_lock_info_t *lck = &liveLocks;
1838 
1839   memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1840          sizeof(lck->stats));
1841   lck->stats.next = lck;
1842   lck->stats.prev = lck;
1843 
1844   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1845   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1846 
1847   __kmp_init_bootstrap_lock(&chain_lock);
1848 }
1849 
1850 // Insert the lock into the circular list
1851 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1852   __kmp_acquire_bootstrap_lock(&chain_lock);
1853 
1854   lck->stats.next = liveLocks.stats.next;
1855   lck->stats.prev = &liveLocks;
1856 
1857   liveLocks.stats.next = lck;
1858   lck->stats.next->stats.prev = lck;
1859 
1860   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1861   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1862 
1863   __kmp_release_bootstrap_lock(&chain_lock);
1864 }
1865 
1866 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1867   KMP_ASSERT(lck->stats.next->stats.prev == lck);
1868   KMP_ASSERT(lck->stats.prev->stats.next == lck);
1869 
1870   kmp_adaptive_lock_info_t *n = lck->stats.next;
1871   kmp_adaptive_lock_info_t *p = lck->stats.prev;
1872 
1873   n->stats.prev = p;
1874   p->stats.next = n;
1875 }
1876 
1877 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1878   memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1879          sizeof(lck->stats));
1880   __kmp_remember_lock(lck);
1881 }
1882 
1883 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1884                             kmp_adaptive_lock_info_t *lck) {
1885   kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1886 
1887   t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1888   t->successfulSpeculations += s->successfulSpeculations;
1889   t->hardFailedSpeculations += s->hardFailedSpeculations;
1890   t->softFailedSpeculations += s->softFailedSpeculations;
1891   t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1892   t->lemmingYields += s->lemmingYields;
1893 }
1894 
1895 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1896   __kmp_acquire_bootstrap_lock(&chain_lock);
1897 
1898   __kmp_add_stats(&destroyedStats, lck);
1899   __kmp_forget_lock(lck);
1900 
1901   __kmp_release_bootstrap_lock(&chain_lock);
1902 }
1903 
1904 static float percent(kmp_uint32 count, kmp_uint32 total) {
1905   return (total == 0) ? 0.0 : (100.0 * count) / total;
1906 }
1907 
1908 static FILE *__kmp_open_stats_file() {
1909   if (strcmp(__kmp_speculative_statsfile, "-") == 0)
1910     return stdout;
1911 
1912   size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1913   char buffer[buffLen];
1914   KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1915                (kmp_int32)getpid());
1916   FILE *result = fopen(&buffer[0], "w");
1917 
1918   // Maybe we should issue a warning here...
1919   return result ? result : stdout;
1920 }
1921 
1922 void __kmp_print_speculative_stats() {
1923   kmp_adaptive_lock_statistics_t total = destroyedStats;
1924   kmp_adaptive_lock_info_t *lck;
1925 
1926   for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1927     __kmp_add_stats(&total, lck);
1928   }
1929   kmp_adaptive_lock_statistics_t *t = &total;
1930   kmp_uint32 totalSections =
1931       t->nonSpeculativeAcquires + t->successfulSpeculations;
1932   kmp_uint32 totalSpeculations = t->successfulSpeculations +
1933                                  t->hardFailedSpeculations +
1934                                  t->softFailedSpeculations;
1935   if (totalSections <= 0)
1936     return;
1937 
1938   FILE *statsFile = __kmp_open_stats_file();
1939 
1940   fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1941   fprintf(statsFile, " Lock parameters: \n"
1942                      "   max_soft_retries               : %10d\n"
1943                      "   max_badness                    : %10d\n",
1944           __kmp_adaptive_backoff_params.max_soft_retries,
1945           __kmp_adaptive_backoff_params.max_badness);
1946   fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1947           t->nonSpeculativeAcquireAttempts);
1948   fprintf(statsFile, " Total critical sections          : %10d\n",
1949           totalSections);
1950   fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
1951           t->successfulSpeculations,
1952           percent(t->successfulSpeculations, totalSections));
1953   fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
1954           t->nonSpeculativeAcquires,
1955           percent(t->nonSpeculativeAcquires, totalSections));
1956   fprintf(statsFile, " Lemming yields                   : %10d\n\n",
1957           t->lemmingYields);
1958 
1959   fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
1960           totalSpeculations);
1961   fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
1962           t->successfulSpeculations,
1963           percent(t->successfulSpeculations, totalSpeculations));
1964   fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
1965           t->softFailedSpeculations,
1966           percent(t->softFailedSpeculations, totalSpeculations));
1967   fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
1968           t->hardFailedSpeculations,
1969           percent(t->hardFailedSpeculations, totalSpeculations));
1970 
1971   if (statsFile != stdout)
1972     fclose(statsFile);
1973 }
1974 
1975 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1976 #else
1977 #define KMP_INC_STAT(lck, stat)
1978 
1979 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1980 
1981 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1982   // It is enough to check that the head_id is zero.
1983   // We don't also need to check the tail.
1984   bool res = lck->lk.head_id == 0;
1985 
1986 // We need a fence here, since we must ensure that no memory operations
1987 // from later in this thread float above that read.
1988 #if KMP_COMPILER_ICC
1989   _mm_mfence();
1990 #else
1991   __sync_synchronize();
1992 #endif
1993 
1994   return res;
1995 }
1996 
1997 // Functions for manipulating the badness
1998 static __inline void
1999 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
2000   // Reset the badness to zero so we eagerly try to speculate again
2001   lck->lk.adaptive.badness = 0;
2002   KMP_INC_STAT(lck, successfulSpeculations);
2003 }
2004 
2005 // Create a bit mask with one more set bit.
2006 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
2007   kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
2008   if (newBadness > lck->lk.adaptive.max_badness) {
2009     return;
2010   } else {
2011     lck->lk.adaptive.badness = newBadness;
2012   }
2013 }
2014 
2015 // Check whether speculation should be attempted.
2016 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
2017                                            kmp_int32 gtid) {
2018   kmp_uint32 badness = lck->lk.adaptive.badness;
2019   kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
2020   int res = (attempts & badness) == 0;
2021   return res;
2022 }
2023 
2024 // Attempt to acquire only the speculative lock.
2025 // Does not back off to the non-speculative lock.
2026 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
2027                                          kmp_int32 gtid) {
2028   int retries = lck->lk.adaptive.max_soft_retries;
2029 
2030   // We don't explicitly count the start of speculation, rather we record the
2031   // results (success, hard fail, soft fail). The sum of all of those is the
2032   // total number of times we started speculation since all speculations must
2033   // end one of those ways.
2034   do {
2035     kmp_uint32 status = _xbegin();
2036     // Switch this in to disable actual speculation but exercise at least some
2037     // of the rest of the code. Useful for debugging...
2038     // kmp_uint32 status = _XABORT_NESTED;
2039 
2040     if (status == _XBEGIN_STARTED) {
2041       /* We have successfully started speculation. Check that no-one acquired
2042          the lock for real between when we last looked and now. This also gets
2043          the lock cache line into our read-set, which we need so that we'll
2044          abort if anyone later claims it for real. */
2045       if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2046         // Lock is now visibly acquired, so someone beat us to it. Abort the
2047         // transaction so we'll restart from _xbegin with the failure status.
2048         _xabort(0x01);
2049         KMP_ASSERT2(0, "should not get here");
2050       }
2051       return 1; // Lock has been acquired (speculatively)
2052     } else {
2053       // We have aborted, update the statistics
2054       if (status & SOFT_ABORT_MASK) {
2055         KMP_INC_STAT(lck, softFailedSpeculations);
2056         // and loop round to retry.
2057       } else {
2058         KMP_INC_STAT(lck, hardFailedSpeculations);
2059         // Give up if we had a hard failure.
2060         break;
2061       }
2062     }
2063   } while (retries--); // Loop while we have retries, and didn't fail hard.
2064 
2065   // Either we had a hard failure or we didn't succeed softly after
2066   // the full set of attempts, so back off the badness.
2067   __kmp_step_badness(lck);
2068   return 0;
2069 }
2070 
2071 // Attempt to acquire the speculative lock, or back off to the non-speculative
2072 // one if the speculative lock cannot be acquired.
2073 // We can succeed speculatively, non-speculatively, or fail.
2074 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2075   // First try to acquire the lock speculatively
2076   if (__kmp_should_speculate(lck, gtid) &&
2077       __kmp_test_adaptive_lock_only(lck, gtid))
2078     return 1;
2079 
2080   // Speculative acquisition failed, so try to acquire it non-speculatively.
2081   // Count the non-speculative acquire attempt
2082   lck->lk.adaptive.acquire_attempts++;
2083 
2084   // Use base, non-speculative lock.
2085   if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2086     KMP_INC_STAT(lck, nonSpeculativeAcquires);
2087     return 1; // Lock is acquired (non-speculatively)
2088   } else {
2089     return 0; // Failed to acquire the lock, it's already visibly locked.
2090   }
2091 }
2092 
2093 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2094                                                 kmp_int32 gtid) {
2095   char const *const func = "omp_test_lock";
2096   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2097     KMP_FATAL(LockIsUninitialized, func);
2098   }
2099 
2100   int retval = __kmp_test_adaptive_lock(lck, gtid);
2101 
2102   if (retval) {
2103     lck->lk.qlk.owner_id = gtid + 1;
2104   }
2105   return retval;
2106 }
2107 
2108 // Block until we can acquire a speculative, adaptive lock. We check whether we
2109 // should be trying to speculate. If we should be, we check the real lock to see
2110 // if it is free, and, if not, pause without attempting to acquire it until it
2111 // is. Then we try the speculative acquire. This means that although we suffer
2112 // from lemmings a little (because all we can't acquire the lock speculatively
2113 // until the queue of threads waiting has cleared), we don't get into a state
2114 // where we can never acquire the lock speculatively (because we force the queue
2115 // to clear by preventing new arrivals from entering the queue). This does mean
2116 // that when we're trying to break lemmings, the lock is no longer fair. However
2117 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2118 // problem.
2119 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2120                                         kmp_int32 gtid) {
2121   if (__kmp_should_speculate(lck, gtid)) {
2122     if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2123       if (__kmp_test_adaptive_lock_only(lck, gtid))
2124         return;
2125       // We tried speculation and failed, so give up.
2126     } else {
2127       // We can't try speculation until the lock is free, so we pause here
2128       // (without suspending on the queueing lock, to allow it to drain, then
2129       // try again. All other threads will also see the same result for
2130       // shouldSpeculate, so will be doing the same if they try to claim the
2131       // lock from now on.
2132       while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2133         KMP_INC_STAT(lck, lemmingYields);
2134         __kmp_yield(TRUE);
2135       }
2136 
2137       if (__kmp_test_adaptive_lock_only(lck, gtid))
2138         return;
2139     }
2140   }
2141 
2142   // Speculative acquisition failed, so acquire it non-speculatively.
2143   // Count the non-speculative acquire attempt
2144   lck->lk.adaptive.acquire_attempts++;
2145 
2146   __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2147   // We have acquired the base lock, so count that.
2148   KMP_INC_STAT(lck, nonSpeculativeAcquires);
2149   ANNOTATE_QUEUING_ACQUIRED(lck);
2150 }
2151 
2152 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2153                                                     kmp_int32 gtid) {
2154   char const *const func = "omp_set_lock";
2155   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2156     KMP_FATAL(LockIsUninitialized, func);
2157   }
2158   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2159     KMP_FATAL(LockIsAlreadyOwned, func);
2160   }
2161 
2162   __kmp_acquire_adaptive_lock(lck, gtid);
2163 
2164   lck->lk.qlk.owner_id = gtid + 1;
2165 }
2166 
2167 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2168                                        kmp_int32 gtid) {
2169   if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2170           lck))) { // If the lock doesn't look claimed we must be speculating.
2171     // (Or the user's code is buggy and they're releasing without locking;
2172     // if we had XTEST we'd be able to check that case...)
2173     _xend(); // Exit speculation
2174     __kmp_update_badness_after_success(lck);
2175   } else { // Since the lock *is* visibly locked we're not speculating,
2176     // so should use the underlying lock's release scheme.
2177     __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2178   }
2179   return KMP_LOCK_RELEASED;
2180 }
2181 
2182 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2183                                                    kmp_int32 gtid) {
2184   char const *const func = "omp_unset_lock";
2185   KMP_MB(); /* in case another processor initialized lock */
2186   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2187     KMP_FATAL(LockIsUninitialized, func);
2188   }
2189   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2190     KMP_FATAL(LockUnsettingFree, func);
2191   }
2192   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2193     KMP_FATAL(LockUnsettingSetByAnother, func);
2194   }
2195   lck->lk.qlk.owner_id = 0;
2196   __kmp_release_adaptive_lock(lck, gtid);
2197   return KMP_LOCK_RELEASED;
2198 }
2199 
2200 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2201   __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2202   lck->lk.adaptive.badness = 0;
2203   lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2204   lck->lk.adaptive.max_soft_retries =
2205       __kmp_adaptive_backoff_params.max_soft_retries;
2206   lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2207 #if KMP_DEBUG_ADAPTIVE_LOCKS
2208   __kmp_zero_speculative_stats(&lck->lk.adaptive);
2209 #endif
2210   KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2211 }
2212 
2213 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2214 #if KMP_DEBUG_ADAPTIVE_LOCKS
2215   __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2216 #endif
2217   __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2218   // Nothing needed for the speculative part.
2219 }
2220 
2221 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2222   char const *const func = "omp_destroy_lock";
2223   if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2224     KMP_FATAL(LockIsUninitialized, func);
2225   }
2226   if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2227     KMP_FATAL(LockStillOwned, func);
2228   }
2229   __kmp_destroy_adaptive_lock(lck);
2230 }
2231 
2232 #endif // KMP_USE_ADAPTIVE_LOCKS
2233 
2234 /* ------------------------------------------------------------------------ */
2235 /* DRDPA ticket locks                                                */
2236 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2237 
2238 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2239   return lck->lk.owner_id - 1;
2240 }
2241 
2242 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2243   return lck->lk.depth_locked != -1;
2244 }
2245 
2246 __forceinline static int
2247 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2248   kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2249   kmp_uint64 mask = lck->lk.mask; // atomic load
2250   std::atomic<kmp_uint64> *polls = lck->lk.polls;
2251 
2252 #ifdef USE_LOCK_PROFILE
2253   if (polls[ticket & mask] != ticket)
2254     __kmp_printf("LOCK CONTENTION: %p\n", lck);
2255 /* else __kmp_printf( "." );*/
2256 #endif /* USE_LOCK_PROFILE */
2257 
2258   // Now spin-wait, but reload the polls pointer and mask, in case the
2259   // polling area has been reconfigured.  Unless it is reconfigured, the
2260   // reloads stay in L1 cache and are cheap.
2261   //
2262   // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
2263   //
2264   // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2265   // and poll to be re-read every spin iteration.
2266   kmp_uint32 spins;
2267 
2268   KMP_FSYNC_PREPARE(lck);
2269   KMP_INIT_YIELD(spins);
2270   while (polls[ticket & mask] < ticket) { // atomic load
2271     // If we are oversubscribed,
2272     // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2273     // CPU Pause is in the macros for yield.
2274     //
2275     KMP_YIELD(TCR_4(__kmp_nth) >
2276               (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2277     KMP_YIELD_SPIN(spins);
2278 
2279     // Re-read the mask and the poll pointer from the lock structure.
2280     //
2281     // Make certain that "mask" is read before "polls" !!!
2282     //
2283     // If another thread picks reconfigures the polling area and updates their
2284     // values, and we get the new value of mask and the old polls pointer, we
2285     // could access memory beyond the end of the old polling area.
2286     mask = lck->lk.mask; // atomic load
2287     polls = lck->lk.polls; // atomic load
2288   }
2289 
2290   // Critical section starts here
2291   KMP_FSYNC_ACQUIRED(lck);
2292   KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2293                   ticket, lck));
2294   lck->lk.now_serving = ticket; // non-volatile store
2295 
2296   // Deallocate a garbage polling area if we know that we are the last
2297   // thread that could possibly access it.
2298   //
2299   // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2300   // ticket.
2301   if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2302     __kmp_free(lck->lk.old_polls);
2303     lck->lk.old_polls = NULL;
2304     lck->lk.cleanup_ticket = 0;
2305   }
2306 
2307   // Check to see if we should reconfigure the polling area.
2308   // If there is still a garbage polling area to be deallocated from a
2309   // previous reconfiguration, let a later thread reconfigure it.
2310   if (lck->lk.old_polls == NULL) {
2311     bool reconfigure = false;
2312     std::atomic<kmp_uint64> *old_polls = polls;
2313     kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2314 
2315     if (TCR_4(__kmp_nth) >
2316         (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2317       // We are in oversubscription mode.  Contract the polling area
2318       // down to a single location, if that hasn't been done already.
2319       if (num_polls > 1) {
2320         reconfigure = true;
2321         num_polls = TCR_4(lck->lk.num_polls);
2322         mask = 0;
2323         num_polls = 1;
2324         polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2325                                                           sizeof(*polls));
2326         polls[0] = ticket;
2327       }
2328     } else {
2329       // We are in under/fully subscribed mode.  Check the number of
2330       // threads waiting on the lock.  The size of the polling area
2331       // should be at least the number of threads waiting.
2332       kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2333       if (num_waiting > num_polls) {
2334         kmp_uint32 old_num_polls = num_polls;
2335         reconfigure = true;
2336         do {
2337           mask = (mask << 1) | 1;
2338           num_polls *= 2;
2339         } while (num_polls <= num_waiting);
2340 
2341         // Allocate the new polling area, and copy the relevant portion
2342         // of the old polling area to the new area.  __kmp_allocate()
2343         // zeroes the memory it allocates, and most of the old area is
2344         // just zero padding, so we only copy the release counters.
2345         polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2346                                                           sizeof(*polls));
2347         kmp_uint32 i;
2348         for (i = 0; i < old_num_polls; i++) {
2349           polls[i].store(old_polls[i]);
2350         }
2351       }
2352     }
2353 
2354     if (reconfigure) {
2355       // Now write the updated fields back to the lock structure.
2356       //
2357       // Make certain that "polls" is written before "mask" !!!
2358       //
2359       // If another thread picks up the new value of mask and the old polls
2360       // pointer , it could access memory beyond the end of the old polling
2361       // area.
2362       //
2363       // On x86, we need memory fences.
2364       KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2365                       "lock %p to %d polls\n",
2366                       ticket, lck, num_polls));
2367 
2368       lck->lk.old_polls = old_polls;
2369       lck->lk.polls = polls; // atomic store
2370 
2371       KMP_MB();
2372 
2373       lck->lk.num_polls = num_polls;
2374       lck->lk.mask = mask; // atomic store
2375 
2376       KMP_MB();
2377 
2378       // Only after the new polling area and mask have been flushed
2379       // to main memory can we update the cleanup ticket field.
2380       //
2381       // volatile load / non-volatile store
2382       lck->lk.cleanup_ticket = lck->lk.next_ticket;
2383     }
2384   }
2385   return KMP_LOCK_ACQUIRED_FIRST;
2386 }
2387 
2388 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2389   int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2390   ANNOTATE_DRDPA_ACQUIRED(lck);
2391   return retval;
2392 }
2393 
2394 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2395                                                 kmp_int32 gtid) {
2396   char const *const func = "omp_set_lock";
2397   if (lck->lk.initialized != lck) {
2398     KMP_FATAL(LockIsUninitialized, func);
2399   }
2400   if (__kmp_is_drdpa_lock_nestable(lck)) {
2401     KMP_FATAL(LockNestableUsedAsSimple, func);
2402   }
2403   if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2404     KMP_FATAL(LockIsAlreadyOwned, func);
2405   }
2406 
2407   __kmp_acquire_drdpa_lock(lck, gtid);
2408 
2409   lck->lk.owner_id = gtid + 1;
2410   return KMP_LOCK_ACQUIRED_FIRST;
2411 }
2412 
2413 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2414   // First get a ticket, then read the polls pointer and the mask.
2415   // The polls pointer must be read before the mask!!! (See above)
2416   kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2417   std::atomic<kmp_uint64> *polls = lck->lk.polls;
2418   kmp_uint64 mask = lck->lk.mask; // atomic load
2419   if (polls[ticket & mask] == ticket) {
2420     kmp_uint64 next_ticket = ticket + 1;
2421     if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2422                                        next_ticket)) {
2423       KMP_FSYNC_ACQUIRED(lck);
2424       KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2425                       ticket, lck));
2426       lck->lk.now_serving = ticket; // non-volatile store
2427 
2428       // Since no threads are waiting, there is no possibility that we would
2429       // want to reconfigure the polling area.  We might have the cleanup ticket
2430       // value (which says that it is now safe to deallocate old_polls), but
2431       // we'll let a later thread which calls __kmp_acquire_lock do that - this
2432       // routine isn't supposed to block, and we would risk blocks if we called
2433       // __kmp_free() to do the deallocation.
2434       return TRUE;
2435     }
2436   }
2437   return FALSE;
2438 }
2439 
2440 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2441                                              kmp_int32 gtid) {
2442   char const *const func = "omp_test_lock";
2443   if (lck->lk.initialized != lck) {
2444     KMP_FATAL(LockIsUninitialized, func);
2445   }
2446   if (__kmp_is_drdpa_lock_nestable(lck)) {
2447     KMP_FATAL(LockNestableUsedAsSimple, func);
2448   }
2449 
2450   int retval = __kmp_test_drdpa_lock(lck, gtid);
2451 
2452   if (retval) {
2453     lck->lk.owner_id = gtid + 1;
2454   }
2455   return retval;
2456 }
2457 
2458 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2459   // Read the ticket value from the lock data struct, then the polls pointer and
2460   // the mask.  The polls pointer must be read before the mask!!! (See above)
2461   kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2462   std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2463   kmp_uint64 mask = lck->lk.mask; // atomic load
2464   KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2465                   ticket - 1, lck));
2466   KMP_FSYNC_RELEASING(lck);
2467   ANNOTATE_DRDPA_RELEASED(lck);
2468   polls[ticket & mask] = ticket; // atomic store
2469   return KMP_LOCK_RELEASED;
2470 }
2471 
2472 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2473                                                 kmp_int32 gtid) {
2474   char const *const func = "omp_unset_lock";
2475   KMP_MB(); /* in case another processor initialized lock */
2476   if (lck->lk.initialized != lck) {
2477     KMP_FATAL(LockIsUninitialized, func);
2478   }
2479   if (__kmp_is_drdpa_lock_nestable(lck)) {
2480     KMP_FATAL(LockNestableUsedAsSimple, func);
2481   }
2482   if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2483     KMP_FATAL(LockUnsettingFree, func);
2484   }
2485   if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2486       (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2487     KMP_FATAL(LockUnsettingSetByAnother, func);
2488   }
2489   lck->lk.owner_id = 0;
2490   return __kmp_release_drdpa_lock(lck, gtid);
2491 }
2492 
2493 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2494   lck->lk.location = NULL;
2495   lck->lk.mask = 0;
2496   lck->lk.num_polls = 1;
2497   lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2498       lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2499   lck->lk.cleanup_ticket = 0;
2500   lck->lk.old_polls = NULL;
2501   lck->lk.next_ticket = 0;
2502   lck->lk.now_serving = 0;
2503   lck->lk.owner_id = 0; // no thread owns the lock.
2504   lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2505   lck->lk.initialized = lck;
2506 
2507   KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2508 }
2509 
2510 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2511   lck->lk.initialized = NULL;
2512   lck->lk.location = NULL;
2513   if (lck->lk.polls.load() != NULL) {
2514     __kmp_free(lck->lk.polls.load());
2515     lck->lk.polls = NULL;
2516   }
2517   if (lck->lk.old_polls != NULL) {
2518     __kmp_free(lck->lk.old_polls);
2519     lck->lk.old_polls = NULL;
2520   }
2521   lck->lk.mask = 0;
2522   lck->lk.num_polls = 0;
2523   lck->lk.cleanup_ticket = 0;
2524   lck->lk.next_ticket = 0;
2525   lck->lk.now_serving = 0;
2526   lck->lk.owner_id = 0;
2527   lck->lk.depth_locked = -1;
2528 }
2529 
2530 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2531   char const *const func = "omp_destroy_lock";
2532   if (lck->lk.initialized != lck) {
2533     KMP_FATAL(LockIsUninitialized, func);
2534   }
2535   if (__kmp_is_drdpa_lock_nestable(lck)) {
2536     KMP_FATAL(LockNestableUsedAsSimple, func);
2537   }
2538   if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2539     KMP_FATAL(LockStillOwned, func);
2540   }
2541   __kmp_destroy_drdpa_lock(lck);
2542 }
2543 
2544 // nested drdpa ticket locks
2545 
2546 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2547   KMP_DEBUG_ASSERT(gtid >= 0);
2548 
2549   if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2550     lck->lk.depth_locked += 1;
2551     return KMP_LOCK_ACQUIRED_NEXT;
2552   } else {
2553     __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2554     ANNOTATE_DRDPA_ACQUIRED(lck);
2555     KMP_MB();
2556     lck->lk.depth_locked = 1;
2557     KMP_MB();
2558     lck->lk.owner_id = gtid + 1;
2559     return KMP_LOCK_ACQUIRED_FIRST;
2560   }
2561 }
2562 
2563 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2564                                                         kmp_int32 gtid) {
2565   char const *const func = "omp_set_nest_lock";
2566   if (lck->lk.initialized != lck) {
2567     KMP_FATAL(LockIsUninitialized, func);
2568   }
2569   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2570     KMP_FATAL(LockSimpleUsedAsNestable, func);
2571   }
2572   __kmp_acquire_nested_drdpa_lock(lck, gtid);
2573 }
2574 
2575 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2576   int retval;
2577 
2578   KMP_DEBUG_ASSERT(gtid >= 0);
2579 
2580   if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2581     retval = ++lck->lk.depth_locked;
2582   } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2583     retval = 0;
2584   } else {
2585     KMP_MB();
2586     retval = lck->lk.depth_locked = 1;
2587     KMP_MB();
2588     lck->lk.owner_id = gtid + 1;
2589   }
2590   return retval;
2591 }
2592 
2593 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2594                                                     kmp_int32 gtid) {
2595   char const *const func = "omp_test_nest_lock";
2596   if (lck->lk.initialized != lck) {
2597     KMP_FATAL(LockIsUninitialized, func);
2598   }
2599   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2600     KMP_FATAL(LockSimpleUsedAsNestable, func);
2601   }
2602   return __kmp_test_nested_drdpa_lock(lck, gtid);
2603 }
2604 
2605 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2606   KMP_DEBUG_ASSERT(gtid >= 0);
2607 
2608   KMP_MB();
2609   if (--(lck->lk.depth_locked) == 0) {
2610     KMP_MB();
2611     lck->lk.owner_id = 0;
2612     __kmp_release_drdpa_lock(lck, gtid);
2613     return KMP_LOCK_RELEASED;
2614   }
2615   return KMP_LOCK_STILL_HELD;
2616 }
2617 
2618 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2619                                                        kmp_int32 gtid) {
2620   char const *const func = "omp_unset_nest_lock";
2621   KMP_MB(); /* in case another processor initialized lock */
2622   if (lck->lk.initialized != lck) {
2623     KMP_FATAL(LockIsUninitialized, func);
2624   }
2625   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2626     KMP_FATAL(LockSimpleUsedAsNestable, func);
2627   }
2628   if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2629     KMP_FATAL(LockUnsettingFree, func);
2630   }
2631   if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2632     KMP_FATAL(LockUnsettingSetByAnother, func);
2633   }
2634   return __kmp_release_nested_drdpa_lock(lck, gtid);
2635 }
2636 
2637 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2638   __kmp_init_drdpa_lock(lck);
2639   lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2640 }
2641 
2642 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2643   __kmp_destroy_drdpa_lock(lck);
2644   lck->lk.depth_locked = 0;
2645 }
2646 
2647 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2648   char const *const func = "omp_destroy_nest_lock";
2649   if (lck->lk.initialized != lck) {
2650     KMP_FATAL(LockIsUninitialized, func);
2651   }
2652   if (!__kmp_is_drdpa_lock_nestable(lck)) {
2653     KMP_FATAL(LockSimpleUsedAsNestable, func);
2654   }
2655   if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2656     KMP_FATAL(LockStillOwned, func);
2657   }
2658   __kmp_destroy_nested_drdpa_lock(lck);
2659 }
2660 
2661 // access functions to fields which don't exist for all lock kinds.
2662 
2663 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2664   return lck->lk.location;
2665 }
2666 
2667 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2668                                           const ident_t *loc) {
2669   lck->lk.location = loc;
2670 }
2671 
2672 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2673   return lck->lk.flags;
2674 }
2675 
2676 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2677                                        kmp_lock_flags_t flags) {
2678   lck->lk.flags = flags;
2679 }
2680 
2681 // Time stamp counter
2682 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2683 #define __kmp_tsc() __kmp_hardware_timestamp()
2684 // Runtime's default backoff parameters
2685 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2686 #else
2687 // Use nanoseconds for other platforms
2688 extern kmp_uint64 __kmp_now_nsec();
2689 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2690 #define __kmp_tsc() __kmp_now_nsec()
2691 #endif
2692 
2693 // A useful predicate for dealing with timestamps that may wrap.
2694 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2695 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2696 // Times where going clockwise is less distance than going anti-clockwise
2697 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2698 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2699 // signed(b) = 0 captures the actual difference
2700 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2701   return ((kmp_int64)b - (kmp_int64)a) > 0;
2702 }
2703 
2704 // Truncated binary exponential backoff function
2705 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2706   // We could flatten this loop, but making it a nested loop gives better result
2707   kmp_uint32 i;
2708   for (i = boff->step; i > 0; i--) {
2709     kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2710     do {
2711       KMP_CPU_PAUSE();
2712     } while (before(__kmp_tsc(), goal));
2713   }
2714   boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2715 }
2716 
2717 #if KMP_USE_DYNAMIC_LOCK
2718 
2719 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2720 // lock word.
2721 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2722                                    kmp_dyna_lockseq_t seq) {
2723   TCW_4(*lck, KMP_GET_D_TAG(seq));
2724   KA_TRACE(
2725       20,
2726       ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2727 }
2728 
2729 #if KMP_USE_TSX
2730 
2731 // HLE lock functions - imported from the testbed runtime.
2732 #define HLE_ACQUIRE ".byte 0xf2;"
2733 #define HLE_RELEASE ".byte 0xf3;"
2734 
2735 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2736   __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2737   return v;
2738 }
2739 
2740 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2741 
2742 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2743   TCW_4(*lck, 0);
2744 }
2745 
2746 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2747   // Use gtid for KMP_LOCK_BUSY if necessary
2748   if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2749     int delay = 1;
2750     do {
2751       while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2752         for (int i = delay; i != 0; --i)
2753           KMP_CPU_PAUSE();
2754         delay = ((delay << 1) | 1) & 7;
2755       }
2756     } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2757   }
2758 }
2759 
2760 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2761                                                kmp_int32 gtid) {
2762   __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2763 }
2764 
2765 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2766   __asm__ volatile(HLE_RELEASE "movl %1,%0"
2767                    : "=m"(*lck)
2768                    : "r"(KMP_LOCK_FREE(hle))
2769                    : "memory");
2770   return KMP_LOCK_RELEASED;
2771 }
2772 
2773 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2774                                               kmp_int32 gtid) {
2775   return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2776 }
2777 
2778 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2779   return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2780 }
2781 
2782 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2783                                            kmp_int32 gtid) {
2784   return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2785 }
2786 
2787 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) {
2788   __kmp_init_queuing_lock(lck);
2789 }
2790 
2791 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) {
2792   __kmp_destroy_queuing_lock(lck);
2793 }
2794 
2795 static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) {
2796   __kmp_destroy_queuing_lock_with_checks(lck);
2797 }
2798 
2799 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2800   unsigned retries = 3, status;
2801   do {
2802     status = _xbegin();
2803     if (status == _XBEGIN_STARTED) {
2804       if (__kmp_is_unlocked_queuing_lock(lck))
2805         return;
2806       _xabort(0xff);
2807     }
2808     if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2809       // Wait until lock becomes free
2810       while (!__kmp_is_unlocked_queuing_lock(lck))
2811         __kmp_yield(TRUE);
2812     } else if (!(status & _XABORT_RETRY))
2813       break;
2814   } while (retries--);
2815 
2816   // Fall-back non-speculative lock (xchg)
2817   __kmp_acquire_queuing_lock(lck, gtid);
2818 }
2819 
2820 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2821                                                kmp_int32 gtid) {
2822   __kmp_acquire_rtm_lock(lck, gtid);
2823 }
2824 
2825 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2826   if (__kmp_is_unlocked_queuing_lock(lck)) {
2827     // Releasing from speculation
2828     _xend();
2829   } else {
2830     // Releasing from a real lock
2831     __kmp_release_queuing_lock(lck, gtid);
2832   }
2833   return KMP_LOCK_RELEASED;
2834 }
2835 
2836 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2837                                               kmp_int32 gtid) {
2838   return __kmp_release_rtm_lock(lck, gtid);
2839 }
2840 
2841 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2842   unsigned retries = 3, status;
2843   do {
2844     status = _xbegin();
2845     if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2846       return 1;
2847     }
2848     if (!(status & _XABORT_RETRY))
2849       break;
2850   } while (retries--);
2851 
2852   return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0;
2853 }
2854 
2855 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2856                                            kmp_int32 gtid) {
2857   return __kmp_test_rtm_lock(lck, gtid);
2858 }
2859 
2860 #endif // KMP_USE_TSX
2861 
2862 // Entry functions for indirect locks (first element of direct lock jump tables)
2863 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2864                                      kmp_dyna_lockseq_t tag);
2865 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2866 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2867 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2868 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2869 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2870                                                kmp_int32);
2871 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2872                                                  kmp_int32);
2873 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2874                                                 kmp_int32);
2875 
2876 // Lock function definitions for the union parameter type
2877 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2878 
2879 #define expand1(lk, op)                                                        \
2880   static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) {               \
2881     __kmp_##op##_##lk##_##lock(&lock->lk);                                     \
2882   }
2883 #define expand2(lk, op)                                                        \
2884   static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock,                  \
2885                                         kmp_int32 gtid) {                      \
2886     return __kmp_##op##_##lk##_##lock(&lock->lk, gtid);                        \
2887   }
2888 #define expand3(lk, op)                                                        \
2889   static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock,              \
2890                                             kmp_lock_flags_t flags) {          \
2891     __kmp_set_##lk##_lock_flags(&lock->lk, flags);                             \
2892   }
2893 #define expand4(lk, op)                                                        \
2894   static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock,           \
2895                                                const ident_t *loc) {           \
2896     __kmp_set_##lk##_lock_location(&lock->lk, loc);                            \
2897   }
2898 
2899 KMP_FOREACH_LOCK_KIND(expand1, init)
2900 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2901 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2902 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2903 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2904 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2905 KMP_FOREACH_LOCK_KIND(expand2, release)
2906 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2907 KMP_FOREACH_LOCK_KIND(expand2, test)
2908 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2909 KMP_FOREACH_LOCK_KIND(expand3, )
2910 KMP_FOREACH_LOCK_KIND(expand4, )
2911 
2912 #undef expand1
2913 #undef expand2
2914 #undef expand3
2915 #undef expand4
2916 
2917 // Jump tables for the indirect lock functions
2918 // Only fill in the odd entries, that avoids the need to shift out the low bit
2919 
2920 // init functions
2921 #define expand(l, op) 0, __kmp_init_direct_lock,
2922 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2923     __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2924 #undef expand
2925 
2926 // destroy functions
2927 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2928 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2929     __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2930 #undef expand
2931 #define expand(l, op)                                                          \
2932   0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2933 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2934     __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2935 #undef expand
2936 
2937 // set/acquire functions
2938 #define expand(l, op)                                                          \
2939   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2940 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2941     __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2942 #undef expand
2943 #define expand(l, op)                                                          \
2944   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2945 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2946     __kmp_set_indirect_lock_with_checks, 0,
2947     KMP_FOREACH_D_LOCK(expand, acquire)};
2948 #undef expand
2949 
2950 // unset/release and test functions
2951 #define expand(l, op)                                                          \
2952   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2953 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
2954     __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
2955 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
2956     __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
2957 #undef expand
2958 #define expand(l, op)                                                          \
2959   0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2960 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2961     __kmp_unset_indirect_lock_with_checks, 0,
2962     KMP_FOREACH_D_LOCK(expand, release)};
2963 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2964     __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
2965 #undef expand
2966 
2967 // Exposes only one set of jump tables (*lock or *lock_with_checks).
2968 void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0;
2969 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
2970 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
2971 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
2972 
2973 // Jump tables for the indirect lock functions
2974 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2975 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
2976     KMP_FOREACH_I_LOCK(expand, init)};
2977 #undef expand
2978 
2979 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2980 static void (*indirect_destroy[])(kmp_user_lock_p) = {
2981     KMP_FOREACH_I_LOCK(expand, destroy)};
2982 #undef expand
2983 #define expand(l, op)                                                          \
2984   (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
2985 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
2986     KMP_FOREACH_I_LOCK(expand, destroy)};
2987 #undef expand
2988 
2989 // set/acquire functions
2990 #define expand(l, op)                                                          \
2991   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2992 static int (*indirect_set[])(kmp_user_lock_p,
2993                              kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
2994 #undef expand
2995 #define expand(l, op)                                                          \
2996   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
2997 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
2998     KMP_FOREACH_I_LOCK(expand, acquire)};
2999 #undef expand
3000 
3001 // unset/release and test functions
3002 #define expand(l, op)                                                          \
3003   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3004 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3005     KMP_FOREACH_I_LOCK(expand, release)};
3006 static int (*indirect_test[])(kmp_user_lock_p,
3007                               kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3008 #undef expand
3009 #define expand(l, op)                                                          \
3010   (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3011 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3012     KMP_FOREACH_I_LOCK(expand, release)};
3013 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3014     KMP_FOREACH_I_LOCK(expand, test)};
3015 #undef expand
3016 
3017 // Exposes only one jump tables (*lock or *lock_with_checks).
3018 void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0;
3019 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3020 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3021 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3022 
3023 // Lock index table.
3024 kmp_indirect_lock_table_t __kmp_i_lock_table;
3025 
3026 // Size of indirect locks.
3027 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3028 
3029 // Jump tables for lock accessor/modifier.
3030 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3031                                                      const ident_t *) = {0};
3032 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3033                                                   kmp_lock_flags_t) = {0};
3034 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3035     kmp_user_lock_p) = {0};
3036 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3037     kmp_user_lock_p) = {0};
3038 
3039 // Use different lock pools for different lock types.
3040 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3041 
3042 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3043 // the indirect lock table holds the address and type of the allocated indrect
3044 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3045 // full. A destroyed indirect lock object is returned to the reusable pool of
3046 // locks, unique to each lock type.
3047 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3048                                                   kmp_int32 gtid,
3049                                                   kmp_indirect_locktag_t tag) {
3050   kmp_indirect_lock_t *lck;
3051   kmp_lock_index_t idx;
3052 
3053   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3054 
3055   if (__kmp_indirect_lock_pool[tag] != NULL) {
3056     // Reuse the allocated and destroyed lock object
3057     lck = __kmp_indirect_lock_pool[tag];
3058     if (OMP_LOCK_T_SIZE < sizeof(void *))
3059       idx = lck->lock->pool.index;
3060     __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3061     KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3062                   lck));
3063   } else {
3064     idx = __kmp_i_lock_table.next;
3065     // Check capacity and double the size if it is full
3066     if (idx == __kmp_i_lock_table.size) {
3067       // Double up the space for block pointers
3068       int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3069       kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3070           2 * row * sizeof(kmp_indirect_lock_t *));
3071       KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3072                  row * sizeof(kmp_indirect_lock_t *));
3073       kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3074       __kmp_i_lock_table.table = new_table;
3075       __kmp_free(old_table);
3076       // Allocate new objects in the new blocks
3077       for (int i = row; i < 2 * row; ++i)
3078         *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3079             KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3080       __kmp_i_lock_table.size = 2 * idx;
3081     }
3082     __kmp_i_lock_table.next++;
3083     lck = KMP_GET_I_LOCK(idx);
3084     // Allocate a new base lock object
3085     lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3086     KA_TRACE(20,
3087              ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3088   }
3089 
3090   __kmp_release_lock(&__kmp_global_lock, gtid);
3091 
3092   lck->type = tag;
3093 
3094   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3095     *((kmp_lock_index_t *)user_lock) = idx
3096                                        << 1; // indirect lock word must be even
3097   } else {
3098     *((kmp_indirect_lock_t **)user_lock) = lck;
3099   }
3100 
3101   return lck;
3102 }
3103 
3104 // User lock lookup for dynamically dispatched locks.
3105 static __forceinline kmp_indirect_lock_t *
3106 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3107   if (__kmp_env_consistency_check) {
3108     kmp_indirect_lock_t *lck = NULL;
3109     if (user_lock == NULL) {
3110       KMP_FATAL(LockIsUninitialized, func);
3111     }
3112     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3113       kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3114       if (idx >= __kmp_i_lock_table.size) {
3115         KMP_FATAL(LockIsUninitialized, func);
3116       }
3117       lck = KMP_GET_I_LOCK(idx);
3118     } else {
3119       lck = *((kmp_indirect_lock_t **)user_lock);
3120     }
3121     if (lck == NULL) {
3122       KMP_FATAL(LockIsUninitialized, func);
3123     }
3124     return lck;
3125   } else {
3126     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3127       return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3128     } else {
3129       return *((kmp_indirect_lock_t **)user_lock);
3130     }
3131   }
3132 }
3133 
3134 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3135                                      kmp_dyna_lockseq_t seq) {
3136 #if KMP_USE_ADAPTIVE_LOCKS
3137   if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3138     KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3139     seq = lockseq_queuing;
3140   }
3141 #endif
3142 #if KMP_USE_TSX
3143   if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3144     seq = lockseq_queuing;
3145   }
3146 #endif
3147   kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3148   kmp_indirect_lock_t *l =
3149       __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3150   KMP_I_LOCK_FUNC(l, init)(l->lock);
3151   KA_TRACE(
3152       20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3153            seq));
3154 }
3155 
3156 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3157   kmp_uint32 gtid = __kmp_entry_gtid();
3158   kmp_indirect_lock_t *l =
3159       __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3160   KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3161   kmp_indirect_locktag_t tag = l->type;
3162 
3163   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3164 
3165   // Use the base lock's space to keep the pool chain.
3166   l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3167   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3168     l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3169   }
3170   __kmp_indirect_lock_pool[tag] = l;
3171 
3172   __kmp_release_lock(&__kmp_global_lock, gtid);
3173 }
3174 
3175 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3176   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3177   return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3178 }
3179 
3180 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3181   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3182   return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3183 }
3184 
3185 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3186   kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3187   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3188 }
3189 
3190 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3191                                                kmp_int32 gtid) {
3192   kmp_indirect_lock_t *l =
3193       __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3194   return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3195 }
3196 
3197 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3198                                                  kmp_int32 gtid) {
3199   kmp_indirect_lock_t *l =
3200       __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3201   return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3202 }
3203 
3204 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3205                                                 kmp_int32 gtid) {
3206   kmp_indirect_lock_t *l =
3207       __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3208   return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3209 }
3210 
3211 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3212 
3213 // This is used only in kmp_error.cpp when consistency checking is on.
3214 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3215   switch (seq) {
3216   case lockseq_tas:
3217   case lockseq_nested_tas:
3218     return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3219 #if KMP_USE_FUTEX
3220   case lockseq_futex:
3221   case lockseq_nested_futex:
3222     return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3223 #endif
3224   case lockseq_ticket:
3225   case lockseq_nested_ticket:
3226     return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3227   case lockseq_queuing:
3228   case lockseq_nested_queuing:
3229 #if KMP_USE_ADAPTIVE_LOCKS
3230   case lockseq_adaptive:
3231 #endif
3232     return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3233   case lockseq_drdpa:
3234   case lockseq_nested_drdpa:
3235     return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3236   default:
3237     return 0;
3238   }
3239 }
3240 
3241 // Initializes data for dynamic user locks.
3242 void __kmp_init_dynamic_user_locks() {
3243   // Initialize jump table for the lock functions
3244   if (__kmp_env_consistency_check) {
3245     __kmp_direct_set = direct_set_check;
3246     __kmp_direct_unset = direct_unset_check;
3247     __kmp_direct_test = direct_test_check;
3248     __kmp_direct_destroy = direct_destroy_check;
3249     __kmp_indirect_set = indirect_set_check;
3250     __kmp_indirect_unset = indirect_unset_check;
3251     __kmp_indirect_test = indirect_test_check;
3252     __kmp_indirect_destroy = indirect_destroy_check;
3253   } else {
3254     __kmp_direct_set = direct_set;
3255     __kmp_direct_unset = direct_unset;
3256     __kmp_direct_test = direct_test;
3257     __kmp_direct_destroy = direct_destroy;
3258     __kmp_indirect_set = indirect_set;
3259     __kmp_indirect_unset = indirect_unset;
3260     __kmp_indirect_test = indirect_test;
3261     __kmp_indirect_destroy = indirect_destroy;
3262   }
3263   // If the user locks have already been initialized, then return. Allow the
3264   // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3265   // new lock tables if they have already been allocated.
3266   if (__kmp_init_user_locks)
3267     return;
3268 
3269   // Initialize lock index table
3270   __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3271   __kmp_i_lock_table.table =
3272       (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3273   *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3274       KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3275   __kmp_i_lock_table.next = 0;
3276 
3277   // Indirect lock size
3278   __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3279   __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3280 #if KMP_USE_ADAPTIVE_LOCKS
3281   __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3282 #endif
3283   __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3284 #if KMP_USE_TSX
3285   __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3286 #endif
3287   __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3288 #if KMP_USE_FUTEX
3289   __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3290 #endif
3291   __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3292   __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3293   __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3294 
3295 // Initialize lock accessor/modifier
3296 #define fill_jumps(table, expand, sep)                                         \
3297   {                                                                            \
3298     table[locktag##sep##ticket] = expand(ticket);                              \
3299     table[locktag##sep##queuing] = expand(queuing);                            \
3300     table[locktag##sep##drdpa] = expand(drdpa);                                \
3301   }
3302 
3303 #if KMP_USE_ADAPTIVE_LOCKS
3304 #define fill_table(table, expand)                                              \
3305   {                                                                            \
3306     fill_jumps(table, expand, _);                                              \
3307     table[locktag_adaptive] = expand(queuing);                                 \
3308     fill_jumps(table, expand, _nested_);                                       \
3309   }
3310 #else
3311 #define fill_table(table, expand)                                              \
3312   {                                                                            \
3313     fill_jumps(table, expand, _);                                              \
3314     fill_jumps(table, expand, _nested_);                                       \
3315   }
3316 #endif // KMP_USE_ADAPTIVE_LOCKS
3317 
3318 #define expand(l)                                                              \
3319   (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3320   fill_table(__kmp_indirect_set_location, expand);
3321 #undef expand
3322 #define expand(l)                                                              \
3323   (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3324   fill_table(__kmp_indirect_set_flags, expand);
3325 #undef expand
3326 #define expand(l)                                                              \
3327   (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3328   fill_table(__kmp_indirect_get_location, expand);
3329 #undef expand
3330 #define expand(l)                                                              \
3331   (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3332   fill_table(__kmp_indirect_get_flags, expand);
3333 #undef expand
3334 
3335   __kmp_init_user_locks = TRUE;
3336 }
3337 
3338 // Clean up the lock table.
3339 void __kmp_cleanup_indirect_user_locks() {
3340   kmp_lock_index_t i;
3341   int k;
3342 
3343   // Clean up locks in the pools first (they were already destroyed before going
3344   // into the pools).
3345   for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3346     kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3347     while (l != NULL) {
3348       kmp_indirect_lock_t *ll = l;
3349       l = (kmp_indirect_lock_t *)l->lock->pool.next;
3350       KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3351                     ll));
3352       __kmp_free(ll->lock);
3353       ll->lock = NULL;
3354     }
3355     __kmp_indirect_lock_pool[k] = NULL;
3356   }
3357   // Clean up the remaining undestroyed locks.
3358   for (i = 0; i < __kmp_i_lock_table.next; i++) {
3359     kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3360     if (l->lock != NULL) {
3361       // Locks not destroyed explicitly need to be destroyed here.
3362       KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3363       KA_TRACE(
3364           20,
3365           ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3366            l));
3367       __kmp_free(l->lock);
3368     }
3369   }
3370   // Free the table
3371   for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3372     __kmp_free(__kmp_i_lock_table.table[i]);
3373   __kmp_free(__kmp_i_lock_table.table);
3374 
3375   __kmp_init_user_locks = FALSE;
3376 }
3377 
3378 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3379 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3380 
3381 #else // KMP_USE_DYNAMIC_LOCK
3382 
3383 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3384   __kmp_init_tas_lock(lck);
3385 }
3386 
3387 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3388   __kmp_init_nested_tas_lock(lck);
3389 }
3390 
3391 #if KMP_USE_FUTEX
3392 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3393   __kmp_init_futex_lock(lck);
3394 }
3395 
3396 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3397   __kmp_init_nested_futex_lock(lck);
3398 }
3399 #endif
3400 
3401 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3402   return lck == lck->lk.self;
3403 }
3404 
3405 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3406   __kmp_init_ticket_lock(lck);
3407 }
3408 
3409 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3410   __kmp_init_nested_ticket_lock(lck);
3411 }
3412 
3413 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3414   return lck == lck->lk.initialized;
3415 }
3416 
3417 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3418   __kmp_init_queuing_lock(lck);
3419 }
3420 
3421 static void
3422 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3423   __kmp_init_nested_queuing_lock(lck);
3424 }
3425 
3426 #if KMP_USE_ADAPTIVE_LOCKS
3427 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3428   __kmp_init_adaptive_lock(lck);
3429 }
3430 #endif
3431 
3432 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3433   return lck == lck->lk.initialized;
3434 }
3435 
3436 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3437   __kmp_init_drdpa_lock(lck);
3438 }
3439 
3440 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3441   __kmp_init_nested_drdpa_lock(lck);
3442 }
3443 
3444 /* user locks
3445  * They are implemented as a table of function pointers which are set to the
3446  * lock functions of the appropriate kind, once that has been determined. */
3447 
3448 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3449 
3450 size_t __kmp_base_user_lock_size = 0;
3451 size_t __kmp_user_lock_size = 0;
3452 
3453 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3454 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3455                                             kmp_int32 gtid) = NULL;
3456 
3457 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3458                                          kmp_int32 gtid) = NULL;
3459 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3460                                             kmp_int32 gtid) = NULL;
3461 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3462 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3463 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3464 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3465                                                    kmp_int32 gtid) = NULL;
3466 
3467 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3468                                                 kmp_int32 gtid) = NULL;
3469 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3470                                                    kmp_int32 gtid) = NULL;
3471 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3472 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3473 
3474 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3475 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3476 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3477                                       const ident_t *loc) = NULL;
3478 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3479 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3480                                    kmp_lock_flags_t flags) = NULL;
3481 
3482 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3483   switch (user_lock_kind) {
3484   case lk_default:
3485   default:
3486     KMP_ASSERT(0);
3487 
3488   case lk_tas: {
3489     __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3490     __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3491 
3492     __kmp_get_user_lock_owner_ =
3493         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3494 
3495     if (__kmp_env_consistency_check) {
3496       KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3497       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3498     } else {
3499       KMP_BIND_USER_LOCK(tas);
3500       KMP_BIND_NESTED_USER_LOCK(tas);
3501     }
3502 
3503     __kmp_destroy_user_lock_ =
3504         (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3505 
3506     __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3507 
3508     __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3509 
3510     __kmp_set_user_lock_location_ =
3511         (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3512 
3513     __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3514 
3515     __kmp_set_user_lock_flags_ =
3516         (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3517   } break;
3518 
3519 #if KMP_USE_FUTEX
3520 
3521   case lk_futex: {
3522     __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3523     __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3524 
3525     __kmp_get_user_lock_owner_ =
3526         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3527 
3528     if (__kmp_env_consistency_check) {
3529       KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3530       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3531     } else {
3532       KMP_BIND_USER_LOCK(futex);
3533       KMP_BIND_NESTED_USER_LOCK(futex);
3534     }
3535 
3536     __kmp_destroy_user_lock_ =
3537         (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3538 
3539     __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3540 
3541     __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3542 
3543     __kmp_set_user_lock_location_ =
3544         (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3545 
3546     __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3547 
3548     __kmp_set_user_lock_flags_ =
3549         (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3550   } break;
3551 
3552 #endif // KMP_USE_FUTEX
3553 
3554   case lk_ticket: {
3555     __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3556     __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3557 
3558     __kmp_get_user_lock_owner_ =
3559         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3560 
3561     if (__kmp_env_consistency_check) {
3562       KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3563       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3564     } else {
3565       KMP_BIND_USER_LOCK(ticket);
3566       KMP_BIND_NESTED_USER_LOCK(ticket);
3567     }
3568 
3569     __kmp_destroy_user_lock_ =
3570         (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3571 
3572     __kmp_is_user_lock_initialized_ =
3573         (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3574 
3575     __kmp_get_user_lock_location_ =
3576         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3577 
3578     __kmp_set_user_lock_location_ = (void (*)(
3579         kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3580 
3581     __kmp_get_user_lock_flags_ =
3582         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3583 
3584     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3585         &__kmp_set_ticket_lock_flags);
3586   } break;
3587 
3588   case lk_queuing: {
3589     __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3590     __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3591 
3592     __kmp_get_user_lock_owner_ =
3593         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3594 
3595     if (__kmp_env_consistency_check) {
3596       KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3597       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3598     } else {
3599       KMP_BIND_USER_LOCK(queuing);
3600       KMP_BIND_NESTED_USER_LOCK(queuing);
3601     }
3602 
3603     __kmp_destroy_user_lock_ =
3604         (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3605 
3606     __kmp_is_user_lock_initialized_ =
3607         (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3608 
3609     __kmp_get_user_lock_location_ =
3610         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3611 
3612     __kmp_set_user_lock_location_ = (void (*)(
3613         kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3614 
3615     __kmp_get_user_lock_flags_ =
3616         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3617 
3618     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3619         &__kmp_set_queuing_lock_flags);
3620   } break;
3621 
3622 #if KMP_USE_ADAPTIVE_LOCKS
3623   case lk_adaptive: {
3624     __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3625     __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3626 
3627     __kmp_get_user_lock_owner_ =
3628         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3629 
3630     if (__kmp_env_consistency_check) {
3631       KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3632     } else {
3633       KMP_BIND_USER_LOCK(adaptive);
3634     }
3635 
3636     __kmp_destroy_user_lock_ =
3637         (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3638 
3639     __kmp_is_user_lock_initialized_ =
3640         (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3641 
3642     __kmp_get_user_lock_location_ =
3643         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3644 
3645     __kmp_set_user_lock_location_ = (void (*)(
3646         kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3647 
3648     __kmp_get_user_lock_flags_ =
3649         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3650 
3651     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3652         &__kmp_set_queuing_lock_flags);
3653 
3654   } break;
3655 #endif // KMP_USE_ADAPTIVE_LOCKS
3656 
3657   case lk_drdpa: {
3658     __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3659     __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3660 
3661     __kmp_get_user_lock_owner_ =
3662         (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3663 
3664     if (__kmp_env_consistency_check) {
3665       KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3666       KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3667     } else {
3668       KMP_BIND_USER_LOCK(drdpa);
3669       KMP_BIND_NESTED_USER_LOCK(drdpa);
3670     }
3671 
3672     __kmp_destroy_user_lock_ =
3673         (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3674 
3675     __kmp_is_user_lock_initialized_ =
3676         (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3677 
3678     __kmp_get_user_lock_location_ =
3679         (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3680 
3681     __kmp_set_user_lock_location_ = (void (*)(
3682         kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3683 
3684     __kmp_get_user_lock_flags_ =
3685         (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3686 
3687     __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3688         &__kmp_set_drdpa_lock_flags);
3689   } break;
3690   }
3691 }
3692 
3693 // ----------------------------------------------------------------------------
3694 // User lock table & lock allocation
3695 
3696 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3697 kmp_user_lock_p __kmp_lock_pool = NULL;
3698 
3699 // Lock block-allocation support.
3700 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3701 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3702 
3703 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3704   // Assume that kmp_global_lock is held upon entry/exit.
3705   kmp_lock_index_t index;
3706   if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3707     kmp_lock_index_t size;
3708     kmp_user_lock_p *table;
3709     // Reallocate lock table.
3710     if (__kmp_user_lock_table.allocated == 0) {
3711       size = 1024;
3712     } else {
3713       size = __kmp_user_lock_table.allocated * 2;
3714     }
3715     table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3716     KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3717                sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3718     table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3719     // We cannot free the previous table now, since it may be in use by other
3720     // threads. So save the pointer to the previous table in in the first
3721     // element of the new table. All the tables will be organized into a list,
3722     // and could be freed when library shutting down.
3723     __kmp_user_lock_table.table = table;
3724     __kmp_user_lock_table.allocated = size;
3725   }
3726   KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3727                    __kmp_user_lock_table.allocated);
3728   index = __kmp_user_lock_table.used;
3729   __kmp_user_lock_table.table[index] = lck;
3730   ++__kmp_user_lock_table.used;
3731   return index;
3732 }
3733 
3734 static kmp_user_lock_p __kmp_lock_block_allocate() {
3735   // Assume that kmp_global_lock is held upon entry/exit.
3736   static int last_index = 0;
3737   if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3738     // Restart the index.
3739     last_index = 0;
3740     // Need to allocate a new block.
3741     KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3742     size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3743     char *buffer =
3744         (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3745     // Set up the new block.
3746     kmp_block_of_locks *new_block =
3747         (kmp_block_of_locks *)(&buffer[space_for_locks]);
3748     new_block->next_block = __kmp_lock_blocks;
3749     new_block->locks = (void *)buffer;
3750     // Publish the new block.
3751     KMP_MB();
3752     __kmp_lock_blocks = new_block;
3753   }
3754   kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3755       ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3756   last_index++;
3757   return ret;
3758 }
3759 
3760 // Get memory for a lock. It may be freshly allocated memory or reused memory
3761 // from lock pool.
3762 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3763                                          kmp_lock_flags_t flags) {
3764   kmp_user_lock_p lck;
3765   kmp_lock_index_t index;
3766   KMP_DEBUG_ASSERT(user_lock);
3767 
3768   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3769 
3770   if (__kmp_lock_pool == NULL) {
3771     // Lock pool is empty. Allocate new memory.
3772 
3773     // ANNOTATION: Found no good way to express the syncronisation
3774     // between allocation and usage, so ignore the allocation
3775     ANNOTATE_IGNORE_WRITES_BEGIN();
3776     if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3777       lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3778     } else {
3779       lck = __kmp_lock_block_allocate();
3780     }
3781     ANNOTATE_IGNORE_WRITES_END();
3782 
3783     // Insert lock in the table so that it can be freed in __kmp_cleanup,
3784     // and debugger has info on all allocated locks.
3785     index = __kmp_lock_table_insert(lck);
3786   } else {
3787     // Pick up lock from pool.
3788     lck = __kmp_lock_pool;
3789     index = __kmp_lock_pool->pool.index;
3790     __kmp_lock_pool = __kmp_lock_pool->pool.next;
3791   }
3792 
3793   // We could potentially differentiate between nested and regular locks
3794   // here, and do the lock table lookup for regular locks only.
3795   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3796     *((kmp_lock_index_t *)user_lock) = index;
3797   } else {
3798     *((kmp_user_lock_p *)user_lock) = lck;
3799   }
3800 
3801   // mark the lock if it is critical section lock.
3802   __kmp_set_user_lock_flags(lck, flags);
3803 
3804   __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3805 
3806   return lck;
3807 }
3808 
3809 // Put lock's memory to pool for reusing.
3810 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3811                           kmp_user_lock_p lck) {
3812   KMP_DEBUG_ASSERT(user_lock != NULL);
3813   KMP_DEBUG_ASSERT(lck != NULL);
3814 
3815   __kmp_acquire_lock(&__kmp_global_lock, gtid);
3816 
3817   lck->pool.next = __kmp_lock_pool;
3818   __kmp_lock_pool = lck;
3819   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3820     kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3821     KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3822     lck->pool.index = index;
3823   }
3824 
3825   __kmp_release_lock(&__kmp_global_lock, gtid);
3826 }
3827 
3828 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3829   kmp_user_lock_p lck = NULL;
3830 
3831   if (__kmp_env_consistency_check) {
3832     if (user_lock == NULL) {
3833       KMP_FATAL(LockIsUninitialized, func);
3834     }
3835   }
3836 
3837   if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3838     kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3839     if (__kmp_env_consistency_check) {
3840       if (!(0 < index && index < __kmp_user_lock_table.used)) {
3841         KMP_FATAL(LockIsUninitialized, func);
3842       }
3843     }
3844     KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3845     KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3846     lck = __kmp_user_lock_table.table[index];
3847   } else {
3848     lck = *((kmp_user_lock_p *)user_lock);
3849   }
3850 
3851   if (__kmp_env_consistency_check) {
3852     if (lck == NULL) {
3853       KMP_FATAL(LockIsUninitialized, func);
3854     }
3855   }
3856 
3857   return lck;
3858 }
3859 
3860 void __kmp_cleanup_user_locks(void) {
3861   // Reset lock pool. Don't worry about lock in the pool--we will free them when
3862   // iterating through lock table (it includes all the locks, dead or alive).
3863   __kmp_lock_pool = NULL;
3864 
3865 #define IS_CRITICAL(lck)                                                       \
3866   ((__kmp_get_user_lock_flags_ != NULL) &&                                     \
3867    ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3868 
3869   // Loop through lock table, free all locks.
3870   // Do not free item [0], it is reserved for lock tables list.
3871   //
3872   // FIXME - we are iterating through a list of (pointers to) objects of type
3873   // union kmp_user_lock, but we have no way of knowing whether the base type is
3874   // currently "pool" or whatever the global user lock type is.
3875   //
3876   // We are relying on the fact that for all of the user lock types
3877   // (except "tas"), the first field in the lock struct is the "initialized"
3878   // field, which is set to the address of the lock object itself when
3879   // the lock is initialized.  When the union is of type "pool", the
3880   // first field is a pointer to the next object in the free list, which
3881   // will not be the same address as the object itself.
3882   //
3883   // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3884   // for "pool" objects on the free list.  This must happen as the "location"
3885   // field of real user locks overlaps the "index" field of "pool" objects.
3886   //
3887   // It would be better to run through the free list, and remove all "pool"
3888   // objects from the lock table before executing this loop.  However,
3889   // "pool" objects do not always have their index field set (only on
3890   // lin_32e), and I don't want to search the lock table for the address
3891   // of every "pool" object on the free list.
3892   while (__kmp_user_lock_table.used > 1) {
3893     const ident *loc;
3894 
3895     // reduce __kmp_user_lock_table.used before freeing the lock,
3896     // so that state of locks is consistent
3897     kmp_user_lock_p lck =
3898         __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3899 
3900     if ((__kmp_is_user_lock_initialized_ != NULL) &&
3901         (*__kmp_is_user_lock_initialized_)(lck)) {
3902       // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3903       // it is NOT a critical section (user is not responsible for destroying
3904       // criticals) AND we know source location to report.
3905       if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3906           ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3907           (loc->psource != NULL)) {
3908         kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0);
3909         KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3910         __kmp_str_loc_free(&str_loc);
3911       }
3912 
3913 #ifdef KMP_DEBUG
3914       if (IS_CRITICAL(lck)) {
3915         KA_TRACE(
3916             20,
3917             ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3918              lck, *(void **)lck));
3919       } else {
3920         KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3921                       *(void **)lck));
3922       }
3923 #endif // KMP_DEBUG
3924 
3925       // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3926       __kmp_destroy_user_lock(lck);
3927     }
3928 
3929     // Free the lock if block allocation of locks is not used.
3930     if (__kmp_lock_blocks == NULL) {
3931       __kmp_free(lck);
3932     }
3933   }
3934 
3935 #undef IS_CRITICAL
3936 
3937   // delete lock table(s).
3938   kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3939   __kmp_user_lock_table.table = NULL;
3940   __kmp_user_lock_table.allocated = 0;
3941 
3942   while (table_ptr != NULL) {
3943     // In the first element we saved the pointer to the previous
3944     // (smaller) lock table.
3945     kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3946     __kmp_free(table_ptr);
3947     table_ptr = next;
3948   }
3949 
3950   // Free buffers allocated for blocks of locks.
3951   kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3952   __kmp_lock_blocks = NULL;
3953 
3954   while (block_ptr != NULL) {
3955     kmp_block_of_locks_t *next = block_ptr->next_block;
3956     __kmp_free(block_ptr->locks);
3957     // *block_ptr itself was allocated at the end of the locks vector.
3958     block_ptr = next;
3959   }
3960 
3961   TCW_4(__kmp_init_user_locks, FALSE);
3962 }
3963 
3964 #endif // KMP_USE_DYNAMIC_LOCK
3965