1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 #include <atomic>
18 
19 #include "kmp.h"
20 #include "kmp_itt.h"
21 #include "kmp_i18n.h"
22 #include "kmp_lock.h"
23 #include "kmp_io.h"
24 
25 #if KMP_USE_FUTEX
26 # include <unistd.h>
27 # include <sys/syscall.h>
28 // We should really include <futex.h>, but that causes compatibility problems on different
29 // Linux* OS distributions that either require that you include (or break when you try to include)
30 // <pci/types.h>.
31 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
32 // we just define the constants here and don't include <futex.h>
33 # ifndef FUTEX_WAIT
34 #  define FUTEX_WAIT    0
35 # endif
36 # ifndef FUTEX_WAKE
37 #  define FUTEX_WAKE    1
38 # endif
39 #endif
40 
41 /* Implement spin locks for internal library use.             */
42 /* The algorithm implemented is Lamport's bakery lock [1974]. */
43 
44 void
45 __kmp_validate_locks( void )
46 {
47     int i;
48     kmp_uint32  x, y;
49 
50     /* Check to make sure unsigned arithmetic does wraps properly */
51     x = ~((kmp_uint32) 0) - 2;
52     y = x - 2;
53 
54     for (i = 0; i < 8; ++i, ++x, ++y) {
55         kmp_uint32 z = (x - y);
56         KMP_ASSERT( z == 2 );
57     }
58 
59     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
60 }
61 
62 
63 /* ------------------------------------------------------------------------ */
64 /* test and set locks */
65 
66 //
67 // For the non-nested locks, we can only assume that the first 4 bytes were
68 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
69 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
70 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
71 //
72 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
73 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
74 //
75 
76 static kmp_int32
77 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
78 {
79     return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
80 }
81 
82 static inline bool
83 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
84 {
85     return lck->lk.depth_locked != -1;
86 }
87 
88 __forceinline static int
89 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
90 {
91     KMP_MB();
92 
93 #ifdef USE_LOCK_PROFILE
94     kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
95     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
96         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
97     /* else __kmp_printf( "." );*/
98 #endif /* USE_LOCK_PROFILE */
99 
100     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
101       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
102         KMP_FSYNC_ACQUIRED(lck);
103         return KMP_LOCK_ACQUIRED_FIRST;
104     }
105 
106     kmp_uint32 spins;
107     KMP_FSYNC_PREPARE( lck );
108     KMP_INIT_YIELD( spins );
109     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
110       __kmp_xproc ) ) {
111         KMP_YIELD( TRUE );
112     }
113     else {
114         KMP_YIELD_SPIN( spins );
115     }
116 
117     kmp_backoff_t backoff = __kmp_spin_backoff_params;
118     while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
119       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
120 
121         __kmp_spin_backoff(&backoff);
122         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
123           __kmp_xproc ) ) {
124             KMP_YIELD( TRUE );
125         }
126         else {
127             KMP_YIELD_SPIN( spins );
128         }
129     }
130     KMP_FSYNC_ACQUIRED( lck );
131     return KMP_LOCK_ACQUIRED_FIRST;
132 }
133 
134 int
135 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
136 {
137     return __kmp_acquire_tas_lock_timed_template( lck, gtid );
138 }
139 
140 static int
141 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
142 {
143     char const * const func = "omp_set_lock";
144     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
145       && __kmp_is_tas_lock_nestable( lck ) ) {
146         KMP_FATAL( LockNestableUsedAsSimple, func );
147     }
148     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
149         KMP_FATAL( LockIsAlreadyOwned, func );
150     }
151     return __kmp_acquire_tas_lock( lck, gtid );
152 }
153 
154 int
155 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
156 {
157     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
158       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
159         KMP_FSYNC_ACQUIRED( lck );
160         return TRUE;
161     }
162     return FALSE;
163 }
164 
165 static int
166 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
167 {
168     char const * const func = "omp_test_lock";
169     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
170       && __kmp_is_tas_lock_nestable( lck ) ) {
171         KMP_FATAL( LockNestableUsedAsSimple, func );
172     }
173     return __kmp_test_tas_lock( lck, gtid );
174 }
175 
176 int
177 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
178 {
179     KMP_MB();       /* Flush all pending memory write invalidates.  */
180 
181     KMP_FSYNC_RELEASING(lck);
182     KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
183     KMP_MB();       /* Flush all pending memory write invalidates.  */
184 
185     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
186       __kmp_xproc ) );
187     return KMP_LOCK_RELEASED;
188 }
189 
190 static int
191 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
192 {
193     char const * const func = "omp_unset_lock";
194     KMP_MB();  /* in case another processor initialized lock */
195     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
196       && __kmp_is_tas_lock_nestable( lck ) ) {
197         KMP_FATAL( LockNestableUsedAsSimple, func );
198     }
199     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
200         KMP_FATAL( LockUnsettingFree, func );
201     }
202     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
203       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
204         KMP_FATAL( LockUnsettingSetByAnother, func );
205     }
206     return __kmp_release_tas_lock( lck, gtid );
207 }
208 
209 void
210 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
211 {
212     TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
213 }
214 
215 static void
216 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
217 {
218     __kmp_init_tas_lock( lck );
219 }
220 
221 void
222 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
223 {
224     lck->lk.poll = 0;
225 }
226 
227 static void
228 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
229 {
230     char const * const func = "omp_destroy_lock";
231     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
232       && __kmp_is_tas_lock_nestable( lck ) ) {
233         KMP_FATAL( LockNestableUsedAsSimple, func );
234     }
235     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
236         KMP_FATAL( LockStillOwned, func );
237     }
238     __kmp_destroy_tas_lock( lck );
239 }
240 
241 
242 //
243 // nested test and set locks
244 //
245 
246 int
247 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
248 {
249     KMP_DEBUG_ASSERT( gtid >= 0 );
250 
251     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
252         lck->lk.depth_locked += 1;
253         return KMP_LOCK_ACQUIRED_NEXT;
254     }
255     else {
256         __kmp_acquire_tas_lock_timed_template( lck, gtid );
257         lck->lk.depth_locked = 1;
258         return KMP_LOCK_ACQUIRED_FIRST;
259     }
260 }
261 
262 static int
263 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
264 {
265     char const * const func = "omp_set_nest_lock";
266     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
267         KMP_FATAL( LockSimpleUsedAsNestable, func );
268     }
269     return __kmp_acquire_nested_tas_lock( lck, gtid );
270 }
271 
272 int
273 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
274 {
275     int retval;
276 
277     KMP_DEBUG_ASSERT( gtid >= 0 );
278 
279     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
280         retval = ++lck->lk.depth_locked;
281     }
282     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
283         retval = 0;
284     }
285     else {
286         KMP_MB();
287         retval = lck->lk.depth_locked = 1;
288     }
289     return retval;
290 }
291 
292 static int
293 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
294 {
295     char const * const func = "omp_test_nest_lock";
296     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
297         KMP_FATAL( LockSimpleUsedAsNestable, func );
298     }
299     return __kmp_test_nested_tas_lock( lck, gtid );
300 }
301 
302 int
303 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
304 {
305     KMP_DEBUG_ASSERT( gtid >= 0 );
306 
307     KMP_MB();
308     if ( --(lck->lk.depth_locked) == 0 ) {
309         __kmp_release_tas_lock( lck, gtid );
310         return KMP_LOCK_RELEASED;
311     }
312     return KMP_LOCK_STILL_HELD;
313 }
314 
315 static int
316 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
317 {
318     char const * const func = "omp_unset_nest_lock";
319     KMP_MB();  /* in case another processor initialized lock */
320     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
321         KMP_FATAL( LockSimpleUsedAsNestable, func );
322     }
323     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
324         KMP_FATAL( LockUnsettingFree, func );
325     }
326     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
327         KMP_FATAL( LockUnsettingSetByAnother, func );
328     }
329     return __kmp_release_nested_tas_lock( lck, gtid );
330 }
331 
332 void
333 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
334 {
335     __kmp_init_tas_lock( lck );
336     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
337 }
338 
339 static void
340 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
341 {
342     __kmp_init_nested_tas_lock( lck );
343 }
344 
345 void
346 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
347 {
348     __kmp_destroy_tas_lock( lck );
349     lck->lk.depth_locked = 0;
350 }
351 
352 static void
353 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
354 {
355     char const * const func = "omp_destroy_nest_lock";
356     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
357         KMP_FATAL( LockSimpleUsedAsNestable, func );
358     }
359     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
360         KMP_FATAL( LockStillOwned, func );
361     }
362     __kmp_destroy_nested_tas_lock( lck );
363 }
364 
365 
366 #if KMP_USE_FUTEX
367 
368 /* ------------------------------------------------------------------------ */
369 /* futex locks */
370 
371 // futex locks are really just test and set locks, with a different method
372 // of handling contention.  They take the same amount of space as test and
373 // set locks, and are allocated the same way (i.e. use the area allocated by
374 // the compiler for non-nested locks / allocate nested locks on the heap).
375 
376 static kmp_int32
377 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
378 {
379     return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
380 }
381 
382 static inline bool
383 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
384 {
385     return lck->lk.depth_locked != -1;
386 }
387 
388 __forceinline static int
389 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
390 {
391     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
392 
393     KMP_MB();
394 
395 #ifdef USE_LOCK_PROFILE
396     kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
397     if ( ( curr != 0 ) && ( curr != gtid_code ) )
398         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
399     /* else __kmp_printf( "." );*/
400 #endif /* USE_LOCK_PROFILE */
401 
402     KMP_FSYNC_PREPARE( lck );
403     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
404       lck, lck->lk.poll, gtid ) );
405 
406     kmp_int32 poll_val;
407 
408     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
409              KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
410 
411         kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
412         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
413            lck, gtid, poll_val, cond ) );
414 
415         //
416         // NOTE: if you try to use the following condition for this branch
417         //
418         // if ( poll_val & 1 == 0 )
419         //
420         // Then the 12.0 compiler has a bug where the following block will
421         // always be skipped, regardless of the value of the LSB of poll_val.
422         //
423         if ( ! cond ) {
424             //
425             // Try to set the lsb in the poll to indicate to the owner
426             // thread that they need to wake this thread up.
427             //
428             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
429                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
430                   lck, lck->lk.poll, gtid ) );
431                 continue;
432             }
433             poll_val |= KMP_LOCK_BUSY(1, futex);
434 
435             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
436               lck, lck->lk.poll, gtid ) );
437         }
438 
439         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
440            lck, gtid, poll_val ) );
441 
442         kmp_int32 rc;
443         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
444           poll_val, NULL, NULL, 0 ) ) != 0 ) {
445             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
446                lck, gtid, poll_val, rc, errno ) );
447             continue;
448         }
449 
450         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
451            lck, gtid, poll_val ) );
452         //
453         // This thread has now done a successful futex wait call and was
454         // entered on the OS futex queue.  We must now perform a futex
455         // wake call when releasing the lock, as we have no idea how many
456         // other threads are in the queue.
457         //
458         gtid_code |= 1;
459     }
460 
461     KMP_FSYNC_ACQUIRED( lck );
462     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
463       lck, lck->lk.poll, gtid ) );
464     return KMP_LOCK_ACQUIRED_FIRST;
465 }
466 
467 int
468 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
469 {
470    return __kmp_acquire_futex_lock_timed_template( lck, gtid );
471 }
472 
473 static int
474 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
475 {
476     char const * const func = "omp_set_lock";
477     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
478       && __kmp_is_futex_lock_nestable( lck ) ) {
479         KMP_FATAL( LockNestableUsedAsSimple, func );
480     }
481     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
482         KMP_FATAL( LockIsAlreadyOwned, func );
483     }
484     return __kmp_acquire_futex_lock( lck, gtid );
485 }
486 
487 int
488 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
489 {
490     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) {
491         KMP_FSYNC_ACQUIRED( lck );
492         return TRUE;
493     }
494     return FALSE;
495 }
496 
497 static int
498 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
499 {
500     char const * const func = "omp_test_lock";
501     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
502       && __kmp_is_futex_lock_nestable( lck ) ) {
503         KMP_FATAL( LockNestableUsedAsSimple, func );
504     }
505     return __kmp_test_futex_lock( lck, gtid );
506 }
507 
508 int
509 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
510 {
511     KMP_MB();       /* Flush all pending memory write invalidates.  */
512 
513     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
514       lck, lck->lk.poll, gtid ) );
515 
516     KMP_FSYNC_RELEASING(lck);
517 
518     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
519 
520     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
521        lck, gtid, poll_val ) );
522 
523     if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
524         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
525            lck, gtid ) );
526         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
527     }
528 
529     KMP_MB();       /* Flush all pending memory write invalidates.  */
530 
531     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
532       lck, lck->lk.poll, gtid ) );
533 
534     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
535       __kmp_xproc ) );
536     return KMP_LOCK_RELEASED;
537 }
538 
539 static int
540 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
541 {
542     char const * const func = "omp_unset_lock";
543     KMP_MB();  /* in case another processor initialized lock */
544     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
545       && __kmp_is_futex_lock_nestable( lck ) ) {
546         KMP_FATAL( LockNestableUsedAsSimple, func );
547     }
548     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
549         KMP_FATAL( LockUnsettingFree, func );
550     }
551     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
552       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
553         KMP_FATAL( LockUnsettingSetByAnother, func );
554     }
555     return __kmp_release_futex_lock( lck, gtid );
556 }
557 
558 void
559 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
560 {
561     TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
562 }
563 
564 static void
565 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
566 {
567     __kmp_init_futex_lock( lck );
568 }
569 
570 void
571 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
572 {
573     lck->lk.poll = 0;
574 }
575 
576 static void
577 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
578 {
579     char const * const func = "omp_destroy_lock";
580     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
581       && __kmp_is_futex_lock_nestable( lck ) ) {
582         KMP_FATAL( LockNestableUsedAsSimple, func );
583     }
584     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
585         KMP_FATAL( LockStillOwned, func );
586     }
587     __kmp_destroy_futex_lock( lck );
588 }
589 
590 
591 //
592 // nested futex locks
593 //
594 
595 int
596 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
597 {
598     KMP_DEBUG_ASSERT( gtid >= 0 );
599 
600     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
601         lck->lk.depth_locked += 1;
602         return KMP_LOCK_ACQUIRED_NEXT;
603     }
604     else {
605         __kmp_acquire_futex_lock_timed_template( lck, gtid );
606         lck->lk.depth_locked = 1;
607         return KMP_LOCK_ACQUIRED_FIRST;
608     }
609 }
610 
611 static int
612 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
613 {
614     char const * const func = "omp_set_nest_lock";
615     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
616         KMP_FATAL( LockSimpleUsedAsNestable, func );
617     }
618     return __kmp_acquire_nested_futex_lock( lck, gtid );
619 }
620 
621 int
622 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
623 {
624     int retval;
625 
626     KMP_DEBUG_ASSERT( gtid >= 0 );
627 
628     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
629         retval = ++lck->lk.depth_locked;
630     }
631     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
632         retval = 0;
633     }
634     else {
635         KMP_MB();
636         retval = lck->lk.depth_locked = 1;
637     }
638     return retval;
639 }
640 
641 static int
642 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
643 {
644     char const * const func = "omp_test_nest_lock";
645     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
646         KMP_FATAL( LockSimpleUsedAsNestable, func );
647     }
648     return __kmp_test_nested_futex_lock( lck, gtid );
649 }
650 
651 int
652 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654     KMP_DEBUG_ASSERT( gtid >= 0 );
655 
656     KMP_MB();
657     if ( --(lck->lk.depth_locked) == 0 ) {
658         __kmp_release_futex_lock( lck, gtid );
659         return KMP_LOCK_RELEASED;
660     }
661     return KMP_LOCK_STILL_HELD;
662 }
663 
664 static int
665 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
666 {
667     char const * const func = "omp_unset_nest_lock";
668     KMP_MB();  /* in case another processor initialized lock */
669     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
670         KMP_FATAL( LockSimpleUsedAsNestable, func );
671     }
672     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
673         KMP_FATAL( LockUnsettingFree, func );
674     }
675     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
676         KMP_FATAL( LockUnsettingSetByAnother, func );
677     }
678     return __kmp_release_nested_futex_lock( lck, gtid );
679 }
680 
681 void
682 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
683 {
684     __kmp_init_futex_lock( lck );
685     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
686 }
687 
688 static void
689 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
690 {
691     __kmp_init_nested_futex_lock( lck );
692 }
693 
694 void
695 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
696 {
697     __kmp_destroy_futex_lock( lck );
698     lck->lk.depth_locked = 0;
699 }
700 
701 static void
702 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
703 {
704     char const * const func = "omp_destroy_nest_lock";
705     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
706         KMP_FATAL( LockSimpleUsedAsNestable, func );
707     }
708     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
709         KMP_FATAL( LockStillOwned, func );
710     }
711     __kmp_destroy_nested_futex_lock( lck );
712 }
713 
714 #endif // KMP_USE_FUTEX
715 
716 
717 /* ------------------------------------------------------------------------ */
718 /* ticket (bakery) locks */
719 
720 static kmp_int32
721 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
722 {
723     return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1;
724 }
725 
726 static inline bool
727 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
728 {
729     return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1;
730 }
731 
732 static kmp_uint32
733 __kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket )
734 {
735     return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket;
736 }
737 
738 __forceinline static int
739 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
740 {
741     kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed );
742 
743 #ifdef USE_LOCK_PROFILE
744     if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket )
745         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
746     /* else __kmp_printf( "." );*/
747 #endif /* USE_LOCK_PROFILE */
748 
749     if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) {
750         return KMP_LOCK_ACQUIRED_FIRST;
751     }
752     KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
753     return KMP_LOCK_ACQUIRED_FIRST;
754 }
755 
756 int
757 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
758 {
759     return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
760 }
761 
762 static int
763 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
764 {
765     char const * const func = "omp_set_lock";
766 
767     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
768         KMP_FATAL( LockIsUninitialized, func );
769     }
770     if ( lck->lk.self != lck ) {
771         KMP_FATAL( LockIsUninitialized, func );
772     }
773     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
774         KMP_FATAL( LockNestableUsedAsSimple, func );
775     }
776     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
777         KMP_FATAL( LockIsAlreadyOwned, func );
778     }
779 
780     __kmp_acquire_ticket_lock( lck, gtid );
781 
782     std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
783     return KMP_LOCK_ACQUIRED_FIRST;
784 }
785 
786 int
787 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
788 {
789     kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed );
790 
791     if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) {
792         kmp_uint32 next_ticket = my_ticket + 1;
793         if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket,
794              &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) {
795             return TRUE;
796         }
797     }
798     return FALSE;
799 }
800 
801 static int
802 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
803 {
804     char const * const func = "omp_test_lock";
805 
806     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
807         KMP_FATAL( LockIsUninitialized, func );
808     }
809     if ( lck->lk.self != lck ) {
810         KMP_FATAL( LockIsUninitialized, func );
811     }
812     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
813         KMP_FATAL( LockNestableUsedAsSimple, func );
814     }
815 
816     int retval = __kmp_test_ticket_lock( lck, gtid );
817 
818     if ( retval ) {
819         std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
820     }
821     return retval;
822 }
823 
824 int
825 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
826 {
827     kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed );
828 
829     std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release );
830 
831     KMP_YIELD( distance
832       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
833     return KMP_LOCK_RELEASED;
834 }
835 
836 static int
837 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
838 {
839     char const * const func = "omp_unset_lock";
840 
841     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
842         KMP_FATAL( LockIsUninitialized, func );
843     }
844     if ( lck->lk.self != lck ) {
845         KMP_FATAL( LockIsUninitialized, func );
846     }
847     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
848         KMP_FATAL( LockNestableUsedAsSimple, func );
849     }
850     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
851         KMP_FATAL( LockUnsettingFree, func );
852     }
853     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
854       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
855         KMP_FATAL( LockUnsettingSetByAnother, func );
856     }
857     std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
858     return __kmp_release_ticket_lock( lck, gtid );
859 }
860 
861 void
862 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
863 {
864     lck->lk.location = NULL;
865     lck->lk.self = lck;
866     std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
867     std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
868     std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock.
869     std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock.
870     std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release );
871 }
872 
873 static void
874 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
875 {
876     __kmp_init_ticket_lock( lck );
877 }
878 
879 void
880 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
881 {
882     std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release );
883     lck->lk.self = NULL;
884     lck->lk.location = NULL;
885     std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
886     std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
887     std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
888     std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed );
889 }
890 
891 static void
892 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
893 {
894     char const * const func = "omp_destroy_lock";
895 
896     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
897         KMP_FATAL( LockIsUninitialized, func );
898     }
899     if ( lck->lk.self != lck ) {
900         KMP_FATAL( LockIsUninitialized, func );
901     }
902     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903         KMP_FATAL( LockNestableUsedAsSimple, func );
904     }
905     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906         KMP_FATAL( LockStillOwned, func );
907     }
908     __kmp_destroy_ticket_lock( lck );
909 }
910 
911 
912 //
913 // nested ticket locks
914 //
915 
916 int
917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918 {
919     KMP_DEBUG_ASSERT( gtid >= 0 );
920 
921     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922         std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
923         return KMP_LOCK_ACQUIRED_NEXT;
924     }
925     else {
926         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927         std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
928         std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
929         return KMP_LOCK_ACQUIRED_FIRST;
930     }
931 }
932 
933 static int
934 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
935 {
936     char const * const func = "omp_set_nest_lock";
937 
938     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
939         KMP_FATAL( LockIsUninitialized, func );
940     }
941     if ( lck->lk.self != lck ) {
942         KMP_FATAL( LockIsUninitialized, func );
943     }
944     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
945         KMP_FATAL( LockSimpleUsedAsNestable, func );
946     }
947     return __kmp_acquire_nested_ticket_lock( lck, gtid );
948 }
949 
950 int
951 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
952 {
953     int retval;
954 
955     KMP_DEBUG_ASSERT( gtid >= 0 );
956 
957     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
958         retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1;
959     }
960     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
961         retval = 0;
962     }
963     else {
964         std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
965         std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
966         retval = 1;
967     }
968     return retval;
969 }
970 
971 static int
972 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
973   kmp_int32 gtid )
974 {
975     char const * const func = "omp_test_nest_lock";
976 
977     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
978         KMP_FATAL( LockIsUninitialized, func );
979     }
980     if ( lck->lk.self != lck ) {
981         KMP_FATAL( LockIsUninitialized, func );
982     }
983     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
984         KMP_FATAL( LockSimpleUsedAsNestable, func );
985     }
986     return __kmp_test_nested_ticket_lock( lck, gtid );
987 }
988 
989 int
990 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
991 {
992     KMP_DEBUG_ASSERT( gtid >= 0 );
993 
994     if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) {
995         std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
996         __kmp_release_ticket_lock( lck, gtid );
997         return KMP_LOCK_RELEASED;
998     }
999     return KMP_LOCK_STILL_HELD;
1000 }
1001 
1002 static int
1003 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1004 {
1005     char const * const func = "omp_unset_nest_lock";
1006 
1007     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1008         KMP_FATAL( LockIsUninitialized, func );
1009     }
1010     if ( lck->lk.self != lck ) {
1011         KMP_FATAL( LockIsUninitialized, func );
1012     }
1013     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1014         KMP_FATAL( LockSimpleUsedAsNestable, func );
1015     }
1016     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1017         KMP_FATAL( LockUnsettingFree, func );
1018     }
1019     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1020         KMP_FATAL( LockUnsettingSetByAnother, func );
1021     }
1022     return __kmp_release_nested_ticket_lock( lck, gtid );
1023 }
1024 
1025 void
1026 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1027 {
1028     __kmp_init_ticket_lock( lck );
1029     std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks
1030 }
1031 
1032 static void
1033 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1034 {
1035     __kmp_init_nested_ticket_lock( lck );
1036 }
1037 
1038 void
1039 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1040 {
1041     __kmp_destroy_ticket_lock( lck );
1042     std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed );
1043 }
1044 
1045 static void
1046 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1047 {
1048     char const * const func = "omp_destroy_nest_lock";
1049 
1050     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1051         KMP_FATAL( LockIsUninitialized, func );
1052     }
1053     if ( lck->lk.self != lck ) {
1054         KMP_FATAL( LockIsUninitialized, func );
1055     }
1056     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1057         KMP_FATAL( LockSimpleUsedAsNestable, func );
1058     }
1059     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1060         KMP_FATAL( LockStillOwned, func );
1061     }
1062     __kmp_destroy_nested_ticket_lock( lck );
1063 }
1064 
1065 
1066 //
1067 // access functions to fields which don't exist for all lock kinds.
1068 //
1069 
1070 static int
1071 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1072 {
1073     return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck);
1074 }
1075 
1076 static const ident_t *
1077 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1078 {
1079     return lck->lk.location;
1080 }
1081 
1082 static void
1083 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1084 {
1085     lck->lk.location = loc;
1086 }
1087 
1088 static kmp_lock_flags_t
1089 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1090 {
1091     return lck->lk.flags;
1092 }
1093 
1094 static void
1095 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1096 {
1097     lck->lk.flags = flags;
1098 }
1099 
1100 /* ------------------------------------------------------------------------ */
1101 /* queuing locks */
1102 
1103 /*
1104  * First the states
1105  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1106  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1107  *                h, h  means lock is held or about to transition, 1 element on queue
1108  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1109  *
1110  * Now the transitions
1111  *    Acquire(0,0)  = -1 ,0
1112  *    Release(0,0)  = Error
1113  *    Acquire(-1,0) =  h ,h    h > 0
1114  *    Release(-1,0) =  0 ,0
1115  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1116  *    Release(h,h)  = -1 ,0    h > 0
1117  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1118  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1119  *
1120  * And pictorially
1121  *
1122  *
1123  *          +-----+
1124  *          | 0, 0|------- release -------> Error
1125  *          +-----+
1126  *            |  ^
1127  *     acquire|  |release
1128  *            |  |
1129  *            |  |
1130  *            v  |
1131  *          +-----+
1132  *          |-1, 0|
1133  *          +-----+
1134  *            |  ^
1135  *     acquire|  |release
1136  *            |  |
1137  *            |  |
1138  *            v  |
1139  *          +-----+
1140  *          | h, h|
1141  *          +-----+
1142  *            |  ^
1143  *     acquire|  |release
1144  *            |  |
1145  *            |  |
1146  *            v  |
1147  *          +-----+
1148  *          | h, t|----- acquire, release loopback ---+
1149  *          +-----+                                   |
1150  *               ^                                    |
1151  *               |                                    |
1152  *               +------------------------------------+
1153  *
1154  */
1155 
1156 #ifdef DEBUG_QUEUING_LOCKS
1157 
1158 /* Stuff for circular trace buffer */
1159 #define TRACE_BUF_ELE	1024
1160 static char traces[TRACE_BUF_ELE][128] = { 0 }
1161 static int tc = 0;
1162 #define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y );
1163 #define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1164 #define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1165 
1166 static void
1167 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1168   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1169 {
1170     kmp_int32 t, i;
1171 
1172     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1173 
1174     i = tc % TRACE_BUF_ELE;
1175     __kmp_printf_no_lock( "%s\n", traces[i] );
1176     i = (i+1) % TRACE_BUF_ELE;
1177     while ( i != (tc % TRACE_BUF_ELE) ) {
1178         __kmp_printf_no_lock( "%s", traces[i] );
1179         i = (i+1) % TRACE_BUF_ELE;
1180     }
1181     __kmp_printf_no_lock( "\n" );
1182 
1183     __kmp_printf_no_lock(
1184              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1185              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1186              head_id, tail_id );
1187 
1188     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1189 
1190     if ( lck->lk.head_id >= 1 ) {
1191         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1192         while (t > 0) {
1193             __kmp_printf_no_lock( "-> %d ", t );
1194             t = __kmp_threads[t-1]->th.th_next_waiting;
1195         }
1196     }
1197     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1198     __kmp_printf_no_lock( "\n\n" );
1199 }
1200 
1201 #endif /* DEBUG_QUEUING_LOCKS */
1202 
1203 static kmp_int32
1204 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1205 {
1206     return TCR_4( lck->lk.owner_id ) - 1;
1207 }
1208 
1209 static inline bool
1210 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1211 {
1212     return lck->lk.depth_locked != -1;
1213 }
1214 
1215 /* Acquire a lock using a the queuing lock implementation */
1216 template <bool takeTime>
1217 /* [TLW] The unused template above is left behind because of what BEB believes is a
1218    potential compiler problem with __forceinline. */
1219 __forceinline static int
1220 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1221   kmp_int32 gtid )
1222 {
1223     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1224     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1225     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1226     volatile kmp_uint32 *spin_here_p;
1227     kmp_int32 need_mf = 1;
1228 
1229 #if OMPT_SUPPORT
1230     ompt_state_t prev_state = ompt_state_undefined;
1231 #endif
1232 
1233     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1234 
1235     KMP_FSYNC_PREPARE( lck );
1236     KMP_DEBUG_ASSERT( this_thr != NULL );
1237     spin_here_p = & this_thr->th.th_spin_here;
1238 
1239 #ifdef DEBUG_QUEUING_LOCKS
1240     TRACE_LOCK( gtid+1, "acq ent" );
1241     if ( *spin_here_p )
1242         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1243     if ( this_thr->th.th_next_waiting != 0 )
1244         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1245 #endif
1246     KMP_DEBUG_ASSERT( !*spin_here_p );
1247     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1248 
1249 
1250     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1251        that may follow, not just in execution order, but also in visibility order.  This way,
1252        when a releasing thread observes the changes to the queue by this thread, it can
1253        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1254        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1255        to FALSE before this thread sets it to TRUE, this thread will hang.
1256     */
1257     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1258 
1259     while( 1 ) {
1260         kmp_int32 enqueued;
1261         kmp_int32 head;
1262         kmp_int32 tail;
1263 
1264         head = *head_id_p;
1265 
1266         switch ( head ) {
1267 
1268             case -1:
1269             {
1270 #ifdef DEBUG_QUEUING_LOCKS
1271                 tail = *tail_id_p;
1272                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1273 #endif
1274                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1275                            this assignment prevents us from entering the if ( t > 0 )
1276                            condition in the enqueued case below, which is not necessary for
1277                            this state transition */
1278 
1279                 need_mf = 0;
1280                 /* try (-1,0)->(tid,tid) */
1281                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1282                   KMP_PACK_64( -1, 0 ),
1283                   KMP_PACK_64( gtid+1, gtid+1 ) );
1284 #ifdef DEBUG_QUEUING_LOCKS
1285                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1286 #endif
1287             }
1288             break;
1289 
1290             default:
1291             {
1292                 tail = *tail_id_p;
1293                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1294 
1295 #ifdef DEBUG_QUEUING_LOCKS
1296                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1297 #endif
1298 
1299                 if ( tail == 0 ) {
1300                     enqueued = FALSE;
1301                 }
1302                 else {
1303                     need_mf = 0;
1304                     /* try (h,t) or (h,h)->(h,tid) */
1305                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1306 
1307 #ifdef DEBUG_QUEUING_LOCKS
1308                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1309 #endif
1310                 }
1311             }
1312             break;
1313 
1314             case 0: /* empty queue */
1315             {
1316                 kmp_int32 grabbed_lock;
1317 
1318 #ifdef DEBUG_QUEUING_LOCKS
1319                 tail = *tail_id_p;
1320                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1321 #endif
1322                 /* try (0,0)->(-1,0) */
1323 
1324                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1325                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1326 
1327                 if ( grabbed_lock ) {
1328 
1329                     *spin_here_p = FALSE;
1330 
1331                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1332                               lck, gtid ));
1333 #ifdef DEBUG_QUEUING_LOCKS
1334                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1335 #endif
1336 
1337 #if OMPT_SUPPORT
1338                     if (ompt_enabled && prev_state != ompt_state_undefined) {
1339                         /* change the state before clearing wait_id */
1340                         this_thr->th.ompt_thread_info.state = prev_state;
1341                         this_thr->th.ompt_thread_info.wait_id = 0;
1342                     }
1343 #endif
1344 
1345                     KMP_FSYNC_ACQUIRED( lck );
1346                     return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1347                 }
1348                 enqueued = FALSE;
1349             }
1350             break;
1351         }
1352 
1353 #if OMPT_SUPPORT
1354         if (ompt_enabled && prev_state == ompt_state_undefined) {
1355             /* this thread will spin; set wait_id before entering wait state */
1356             prev_state = this_thr->th.ompt_thread_info.state;
1357             this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1358             this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1359         }
1360 #endif
1361 
1362         if ( enqueued ) {
1363             if ( tail > 0 ) {
1364                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1365                 KMP_ASSERT( tail_thr != NULL );
1366                 tail_thr->th.th_next_waiting = gtid+1;
1367                 /* corresponding wait for this write in release code */
1368             }
1369             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1370 
1371 
1372             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1373              *       throughput only here.
1374              */
1375             KMP_MB();
1376             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1377 
1378 #ifdef DEBUG_QUEUING_LOCKS
1379             TRACE_LOCK( gtid+1, "acq spin" );
1380 
1381             if ( this_thr->th.th_next_waiting != 0 )
1382                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1383 #endif
1384             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1385             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1386                       lck, gtid ));
1387 
1388 #ifdef DEBUG_QUEUING_LOCKS
1389             TRACE_LOCK( gtid+1, "acq exit 2" );
1390 #endif
1391 
1392 #if OMPT_SUPPORT
1393             /* change the state before clearing wait_id */
1394             this_thr->th.ompt_thread_info.state = prev_state;
1395             this_thr->th.ompt_thread_info.wait_id = 0;
1396 #endif
1397 
1398             /* got lock, we were dequeued by the thread that released lock */
1399             return KMP_LOCK_ACQUIRED_FIRST;
1400         }
1401 
1402         /* Yield if number of threads > number of logical processors */
1403         /* ToDo: Not sure why this should only be in oversubscription case,
1404            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1405         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1406           __kmp_xproc ) );
1407 #ifdef DEBUG_QUEUING_LOCKS
1408         TRACE_LOCK( gtid+1, "acq retry" );
1409 #endif
1410 
1411     }
1412     KMP_ASSERT2( 0, "should not get here" );
1413     return KMP_LOCK_ACQUIRED_FIRST;
1414 }
1415 
1416 int
1417 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1418 {
1419     KMP_DEBUG_ASSERT( gtid >= 0 );
1420 
1421     return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1422 }
1423 
1424 static int
1425 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1426   kmp_int32 gtid )
1427 {
1428     char const * const func = "omp_set_lock";
1429     if ( lck->lk.initialized != lck ) {
1430         KMP_FATAL( LockIsUninitialized, func );
1431     }
1432     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1433         KMP_FATAL( LockNestableUsedAsSimple, func );
1434     }
1435     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1436         KMP_FATAL( LockIsAlreadyOwned, func );
1437     }
1438 
1439     __kmp_acquire_queuing_lock( lck, gtid );
1440 
1441     lck->lk.owner_id = gtid + 1;
1442     return KMP_LOCK_ACQUIRED_FIRST;
1443 }
1444 
1445 int
1446 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1447 {
1448     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1449     kmp_int32 head;
1450 #ifdef KMP_DEBUG
1451     kmp_info_t *this_thr;
1452 #endif
1453 
1454     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1455     KMP_DEBUG_ASSERT( gtid >= 0 );
1456 #ifdef KMP_DEBUG
1457     this_thr = __kmp_thread_from_gtid( gtid );
1458     KMP_DEBUG_ASSERT( this_thr != NULL );
1459     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1460 #endif
1461 
1462     head = *head_id_p;
1463 
1464     if ( head == 0 ) { /* nobody on queue, nobody holding */
1465 
1466         /* try (0,0)->(-1,0) */
1467 
1468         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1469             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1470             KMP_FSYNC_ACQUIRED(lck);
1471             return TRUE;
1472         }
1473     }
1474 
1475     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1476     return FALSE;
1477 }
1478 
1479 static int
1480 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1481 {
1482     char const * const func = "omp_test_lock";
1483     if ( lck->lk.initialized != lck ) {
1484         KMP_FATAL( LockIsUninitialized, func );
1485     }
1486     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1487         KMP_FATAL( LockNestableUsedAsSimple, func );
1488     }
1489 
1490     int retval = __kmp_test_queuing_lock( lck, gtid );
1491 
1492     if ( retval ) {
1493         lck->lk.owner_id = gtid + 1;
1494     }
1495     return retval;
1496 }
1497 
1498 int
1499 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1500 {
1501     register kmp_info_t *this_thr;
1502     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1503     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1504 
1505     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1506     KMP_DEBUG_ASSERT( gtid >= 0 );
1507     this_thr    = __kmp_thread_from_gtid( gtid );
1508     KMP_DEBUG_ASSERT( this_thr != NULL );
1509 #ifdef DEBUG_QUEUING_LOCKS
1510     TRACE_LOCK( gtid+1, "rel ent" );
1511 
1512     if ( this_thr->th.th_spin_here )
1513         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1514     if ( this_thr->th.th_next_waiting != 0 )
1515         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1516 #endif
1517     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1518     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1519 
1520     KMP_FSYNC_RELEASING(lck);
1521 
1522     while( 1 ) {
1523         kmp_int32 dequeued;
1524         kmp_int32 head;
1525         kmp_int32 tail;
1526 
1527         head = *head_id_p;
1528 
1529 #ifdef DEBUG_QUEUING_LOCKS
1530         tail = *tail_id_p;
1531         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1532         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1533 #endif
1534         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1535 
1536         if ( head == -1 ) { /* nobody on queue */
1537 
1538             /* try (-1,0)->(0,0) */
1539             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1540                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1541                           lck, gtid ));
1542 #ifdef DEBUG_QUEUING_LOCKS
1543                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1544 #endif
1545 
1546 #if OMPT_SUPPORT
1547                 /* nothing to do - no other thread is trying to shift blame */
1548 #endif
1549 
1550                 return KMP_LOCK_RELEASED;
1551             }
1552             dequeued = FALSE;
1553 
1554         }
1555         else {
1556 
1557             tail = *tail_id_p;
1558             if ( head == tail ) {  /* only one thread on the queue */
1559 
1560 #ifdef DEBUG_QUEUING_LOCKS
1561                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1562 #endif
1563                 KMP_DEBUG_ASSERT( head > 0 );
1564 
1565                 /* try (h,h)->(-1,0) */
1566                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1567                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1568 #ifdef DEBUG_QUEUING_LOCKS
1569                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1570 #endif
1571 
1572             }
1573             else {
1574                 volatile kmp_int32 *waiting_id_p;
1575                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1576                 KMP_DEBUG_ASSERT( head_thr != NULL );
1577                 waiting_id_p = & head_thr->th.th_next_waiting;
1578 
1579                 /* Does this require synchronous reads? */
1580 #ifdef DEBUG_QUEUING_LOCKS
1581                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1582 #endif
1583                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1584 
1585                 /* try (h,t)->(h',t) or (t,t) */
1586 
1587                 KMP_MB();
1588                 /* make sure enqueuing thread has time to update next waiting thread field */
1589                 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1590 #ifdef DEBUG_QUEUING_LOCKS
1591                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1592 #endif
1593                 dequeued = TRUE;
1594             }
1595         }
1596 
1597         if ( dequeued ) {
1598             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1599             KMP_DEBUG_ASSERT( head_thr != NULL );
1600 
1601             /* Does this require synchronous reads? */
1602 #ifdef DEBUG_QUEUING_LOCKS
1603             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1604 #endif
1605             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1606 
1607             /* For clean code only.
1608              * Thread not released until next statement prevents race with acquire code.
1609              */
1610             head_thr->th.th_next_waiting = 0;
1611 #ifdef DEBUG_QUEUING_LOCKS
1612             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1613 #endif
1614 
1615             KMP_MB();
1616             /* reset spin value */
1617             head_thr->th.th_spin_here = FALSE;
1618 
1619             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1620                       lck, gtid ));
1621 #ifdef DEBUG_QUEUING_LOCKS
1622             TRACE_LOCK( gtid+1, "rel exit 2" );
1623 #endif
1624             return KMP_LOCK_RELEASED;
1625         }
1626         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1627 
1628 #ifdef DEBUG_QUEUING_LOCKS
1629         TRACE_LOCK( gtid+1, "rel retry" );
1630 #endif
1631 
1632     } /* while */
1633     KMP_ASSERT2( 0, "should not get here" );
1634     return KMP_LOCK_RELEASED;
1635 }
1636 
1637 static int
1638 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1639   kmp_int32 gtid )
1640 {
1641     char const * const func = "omp_unset_lock";
1642     KMP_MB();  /* in case another processor initialized lock */
1643     if ( lck->lk.initialized != lck ) {
1644         KMP_FATAL( LockIsUninitialized, func );
1645     }
1646     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1647         KMP_FATAL( LockNestableUsedAsSimple, func );
1648     }
1649     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1650         KMP_FATAL( LockUnsettingFree, func );
1651     }
1652     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1653         KMP_FATAL( LockUnsettingSetByAnother, func );
1654     }
1655     lck->lk.owner_id = 0;
1656     return __kmp_release_queuing_lock( lck, gtid );
1657 }
1658 
1659 void
1660 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1661 {
1662     lck->lk.location = NULL;
1663     lck->lk.head_id = 0;
1664     lck->lk.tail_id = 0;
1665     lck->lk.next_ticket = 0;
1666     lck->lk.now_serving = 0;
1667     lck->lk.owner_id = 0;      // no thread owns the lock.
1668     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1669     lck->lk.initialized = lck;
1670 
1671     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1672 }
1673 
1674 static void
1675 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1676 {
1677     __kmp_init_queuing_lock( lck );
1678 }
1679 
1680 void
1681 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1682 {
1683     lck->lk.initialized = NULL;
1684     lck->lk.location = NULL;
1685     lck->lk.head_id = 0;
1686     lck->lk.tail_id = 0;
1687     lck->lk.next_ticket = 0;
1688     lck->lk.now_serving = 0;
1689     lck->lk.owner_id = 0;
1690     lck->lk.depth_locked = -1;
1691 }
1692 
1693 static void
1694 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1695 {
1696     char const * const func = "omp_destroy_lock";
1697     if ( lck->lk.initialized != lck ) {
1698         KMP_FATAL( LockIsUninitialized, func );
1699     }
1700     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1701         KMP_FATAL( LockNestableUsedAsSimple, func );
1702     }
1703     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1704         KMP_FATAL( LockStillOwned, func );
1705     }
1706     __kmp_destroy_queuing_lock( lck );
1707 }
1708 
1709 
1710 //
1711 // nested queuing locks
1712 //
1713 
1714 int
1715 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716 {
1717     KMP_DEBUG_ASSERT( gtid >= 0 );
1718 
1719     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1720         lck->lk.depth_locked += 1;
1721         return KMP_LOCK_ACQUIRED_NEXT;
1722     }
1723     else {
1724         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1725         KMP_MB();
1726         lck->lk.depth_locked = 1;
1727         KMP_MB();
1728         lck->lk.owner_id = gtid + 1;
1729         return KMP_LOCK_ACQUIRED_FIRST;
1730     }
1731 }
1732 
1733 static int
1734 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1735 {
1736     char const * const func = "omp_set_nest_lock";
1737     if ( lck->lk.initialized != lck ) {
1738         KMP_FATAL( LockIsUninitialized, func );
1739     }
1740     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1741         KMP_FATAL( LockSimpleUsedAsNestable, func );
1742     }
1743     return __kmp_acquire_nested_queuing_lock( lck, gtid );
1744 }
1745 
1746 int
1747 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1748 {
1749     int retval;
1750 
1751     KMP_DEBUG_ASSERT( gtid >= 0 );
1752 
1753     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1754         retval = ++lck->lk.depth_locked;
1755     }
1756     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1757         retval = 0;
1758     }
1759     else {
1760         KMP_MB();
1761         retval = lck->lk.depth_locked = 1;
1762         KMP_MB();
1763         lck->lk.owner_id = gtid + 1;
1764     }
1765     return retval;
1766 }
1767 
1768 static int
1769 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1770   kmp_int32 gtid )
1771 {
1772     char const * const func = "omp_test_nest_lock";
1773     if ( lck->lk.initialized != lck ) {
1774         KMP_FATAL( LockIsUninitialized, func );
1775     }
1776     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1777         KMP_FATAL( LockSimpleUsedAsNestable, func );
1778     }
1779     return __kmp_test_nested_queuing_lock( lck, gtid );
1780 }
1781 
1782 int
1783 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1784 {
1785     KMP_DEBUG_ASSERT( gtid >= 0 );
1786 
1787     KMP_MB();
1788     if ( --(lck->lk.depth_locked) == 0 ) {
1789         KMP_MB();
1790         lck->lk.owner_id = 0;
1791         __kmp_release_queuing_lock( lck, gtid );
1792         return KMP_LOCK_RELEASED;
1793     }
1794     return KMP_LOCK_STILL_HELD;
1795 }
1796 
1797 static int
1798 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1799 {
1800     char const * const func = "omp_unset_nest_lock";
1801     KMP_MB();  /* in case another processor initialized lock */
1802     if ( lck->lk.initialized != lck ) {
1803         KMP_FATAL( LockIsUninitialized, func );
1804     }
1805     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1806         KMP_FATAL( LockSimpleUsedAsNestable, func );
1807     }
1808     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1809         KMP_FATAL( LockUnsettingFree, func );
1810     }
1811     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1812         KMP_FATAL( LockUnsettingSetByAnother, func );
1813     }
1814     return __kmp_release_nested_queuing_lock( lck, gtid );
1815 }
1816 
1817 void
1818 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1819 {
1820     __kmp_init_queuing_lock( lck );
1821     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1822 }
1823 
1824 static void
1825 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1826 {
1827     __kmp_init_nested_queuing_lock( lck );
1828 }
1829 
1830 void
1831 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1832 {
1833     __kmp_destroy_queuing_lock( lck );
1834     lck->lk.depth_locked = 0;
1835 }
1836 
1837 static void
1838 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1839 {
1840     char const * const func = "omp_destroy_nest_lock";
1841     if ( lck->lk.initialized != lck ) {
1842         KMP_FATAL( LockIsUninitialized, func );
1843     }
1844     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1845         KMP_FATAL( LockSimpleUsedAsNestable, func );
1846     }
1847     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1848         KMP_FATAL( LockStillOwned, func );
1849     }
1850     __kmp_destroy_nested_queuing_lock( lck );
1851 }
1852 
1853 
1854 //
1855 // access functions to fields which don't exist for all lock kinds.
1856 //
1857 
1858 static int
1859 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1860 {
1861     return lck == lck->lk.initialized;
1862 }
1863 
1864 static const ident_t *
1865 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1866 {
1867     return lck->lk.location;
1868 }
1869 
1870 static void
1871 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1872 {
1873     lck->lk.location = loc;
1874 }
1875 
1876 static kmp_lock_flags_t
1877 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1878 {
1879     return lck->lk.flags;
1880 }
1881 
1882 static void
1883 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1884 {
1885     lck->lk.flags = flags;
1886 }
1887 
1888 #if KMP_USE_ADAPTIVE_LOCKS
1889 
1890 /*
1891     RTM Adaptive locks
1892 */
1893 
1894 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1895 
1896 #include <immintrin.h>
1897 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1898 
1899 #else
1900 
1901 // Values from the status register after failed speculation.
1902 #define _XBEGIN_STARTED          (~0u)
1903 #define _XABORT_EXPLICIT         (1 << 0)
1904 #define _XABORT_RETRY            (1 << 1)
1905 #define _XABORT_CONFLICT         (1 << 2)
1906 #define _XABORT_CAPACITY         (1 << 3)
1907 #define _XABORT_DEBUG            (1 << 4)
1908 #define _XABORT_NESTED           (1 << 5)
1909 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1910 
1911 // Aborts for which it's worth trying again immediately
1912 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1913 
1914 #define STRINGIZE_INTERNAL(arg) #arg
1915 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1916 
1917 // Access to RTM instructions
1918 
1919 /*
1920   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1921   This is the same definition as the compiler intrinsic that will be supported at some point.
1922 */
1923 static __inline int _xbegin()
1924 {
1925     int res = -1;
1926 
1927 #if KMP_OS_WINDOWS
1928 #if KMP_ARCH_X86_64
1929     _asm {
1930         _emit 0xC7
1931         _emit 0xF8
1932         _emit 2
1933         _emit 0
1934         _emit 0
1935         _emit 0
1936         jmp   L2
1937         mov   res, eax
1938     L2:
1939     }
1940 #else /* IA32 */
1941     _asm {
1942         _emit 0xC7
1943         _emit 0xF8
1944         _emit 2
1945         _emit 0
1946         _emit 0
1947         _emit 0
1948         jmp   L2
1949         mov   res, eax
1950     L2:
1951     }
1952 #endif // KMP_ARCH_X86_64
1953 #else
1954     /* Note that %eax must be noted as killed (clobbered), because
1955      * the XSR is returned in %eax(%rax) on abort.  Other register
1956      * values are restored, so don't need to be killed.
1957      *
1958      * We must also mark 'res' as an input and an output, since otherwise
1959      * 'res=-1' may be dropped as being dead, whereas we do need the
1960      * assignment on the successful (i.e., non-abort) path.
1961      */
1962     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1963                       "   .long  1f-1b-6\n"
1964                       "    jmp   2f\n"
1965                       "1:  movl  %%eax,%0\n"
1966                       "2:"
1967                       :"+r"(res)::"memory","%eax");
1968 #endif // KMP_OS_WINDOWS
1969     return res;
1970 }
1971 
1972 /*
1973   Transaction end
1974 */
1975 static __inline void _xend()
1976 {
1977 #if KMP_OS_WINDOWS
1978     __asm  {
1979         _emit 0x0f
1980         _emit 0x01
1981         _emit 0xd5
1982     }
1983 #else
1984     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1985 #endif
1986 }
1987 
1988 /*
1989   This is a macro, the argument must be a single byte constant which
1990   can be evaluated by the inline assembler, since it is emitted as a
1991   byte into the assembly code.
1992 */
1993 #if KMP_OS_WINDOWS
1994 #define _xabort(ARG)                            \
1995     _asm _emit 0xc6                             \
1996     _asm _emit 0xf8                             \
1997     _asm _emit ARG
1998 #else
1999 #define _xabort(ARG) \
2000     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2001 #endif
2002 
2003 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
2004 
2005 //
2006 //    Statistics is collected for testing purpose
2007 //
2008 #if KMP_DEBUG_ADAPTIVE_LOCKS
2009 
2010 // We accumulate speculative lock statistics when the lock is destroyed.
2011 // We keep locks that haven't been destroyed in the liveLocks list
2012 // so that we can grab their statistics too.
2013 static kmp_adaptive_lock_statistics_t destroyedStats;
2014 
2015 // To hold the list of live locks.
2016 static kmp_adaptive_lock_info_t liveLocks;
2017 
2018 // A lock so we can safely update the list of locks.
2019 static kmp_bootstrap_lock_t chain_lock;
2020 
2021 // Initialize the list of stats.
2022 void
2023 __kmp_init_speculative_stats()
2024 {
2025     kmp_adaptive_lock_info_t *lck = &liveLocks;
2026 
2027     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2028     lck->stats.next = lck;
2029     lck->stats.prev = lck;
2030 
2031     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2032     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2033 
2034     __kmp_init_bootstrap_lock( &chain_lock );
2035 
2036 }
2037 
2038 // Insert the lock into the circular list
2039 static void
2040 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2041 {
2042     __kmp_acquire_bootstrap_lock( &chain_lock );
2043 
2044     lck->stats.next = liveLocks.stats.next;
2045     lck->stats.prev = &liveLocks;
2046 
2047     liveLocks.stats.next = lck;
2048     lck->stats.next->stats.prev  = lck;
2049 
2050     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052 
2053     __kmp_release_bootstrap_lock( &chain_lock );
2054 }
2055 
2056 static void
2057 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2058 {
2059     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2060     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2061 
2062     kmp_adaptive_lock_info_t * n = lck->stats.next;
2063     kmp_adaptive_lock_info_t * p = lck->stats.prev;
2064 
2065     n->stats.prev = p;
2066     p->stats.next = n;
2067 }
2068 
2069 static void
2070 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2071 {
2072     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2073     __kmp_remember_lock( lck );
2074 }
2075 
2076 static void
2077 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2078 {
2079     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2080 
2081     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2082     t->successfulSpeculations += s->successfulSpeculations;
2083     t->hardFailedSpeculations += s->hardFailedSpeculations;
2084     t->softFailedSpeculations += s->softFailedSpeculations;
2085     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2086     t->lemmingYields          += s->lemmingYields;
2087 }
2088 
2089 static void
2090 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2091 {
2092     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2093 
2094     __kmp_acquire_bootstrap_lock( &chain_lock );
2095 
2096     __kmp_add_stats( &destroyedStats, lck );
2097     __kmp_forget_lock( lck );
2098 
2099     __kmp_release_bootstrap_lock( &chain_lock );
2100 }
2101 
2102 static float
2103 percent (kmp_uint32 count, kmp_uint32 total)
2104 {
2105     return (total == 0) ? 0.0: (100.0 * count)/total;
2106 }
2107 
2108 static
2109 FILE * __kmp_open_stats_file()
2110 {
2111     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2112         return stdout;
2113 
2114     size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2115     char buffer[buffLen];
2116     KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2117       (kmp_int32)getpid());
2118     FILE * result = fopen(&buffer[0], "w");
2119 
2120     // Maybe we should issue a warning here...
2121     return result ? result : stdout;
2122 }
2123 
2124 void
2125 __kmp_print_speculative_stats()
2126 {
2127     if (__kmp_user_lock_kind != lk_adaptive)
2128         return;
2129 
2130     FILE * statsFile = __kmp_open_stats_file();
2131 
2132     kmp_adaptive_lock_statistics_t total = destroyedStats;
2133     kmp_adaptive_lock_info_t *lck;
2134 
2135     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2136         __kmp_add_stats( &total, lck );
2137     }
2138     kmp_adaptive_lock_statistics_t *t = &total;
2139     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2140     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2141                                    t->softFailedSpeculations;
2142 
2143     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2144     fprintf ( statsFile, " Lock parameters: \n"
2145              "   max_soft_retries               : %10d\n"
2146              "   max_badness                    : %10d\n",
2147              __kmp_adaptive_backoff_params.max_soft_retries,
2148              __kmp_adaptive_backoff_params.max_badness);
2149     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2150     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2151     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2152              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2153     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2154              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2155     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2156 
2157     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2158     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2159              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2160     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2161              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2162     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2163              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2164 
2165     if (statsFile != stdout)
2166         fclose( statsFile );
2167 }
2168 
2169 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2170 #else
2171 # define KMP_INC_STAT(lck,stat)
2172 
2173 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2174 
2175 static inline bool
2176 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2177 {
2178     // It is enough to check that the head_id is zero.
2179     // We don't also need to check the tail.
2180     bool res = lck->lk.head_id == 0;
2181 
2182     // We need a fence here, since we must ensure that no memory operations
2183     // from later in this thread float above that read.
2184 #if KMP_COMPILER_ICC
2185     _mm_mfence();
2186 #else
2187     __sync_synchronize();
2188 #endif
2189 
2190     return res;
2191 }
2192 
2193 // Functions for manipulating the badness
2194 static __inline void
2195 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2196 {
2197     // Reset the badness to zero so we eagerly try to speculate again
2198     lck->lk.adaptive.badness = 0;
2199     KMP_INC_STAT(lck,successfulSpeculations);
2200 }
2201 
2202 // Create a bit mask with one more set bit.
2203 static __inline void
2204 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2205 {
2206     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2207     if ( newBadness > lck->lk.adaptive.max_badness) {
2208         return;
2209     } else {
2210         lck->lk.adaptive.badness = newBadness;
2211     }
2212 }
2213 
2214 // Check whether speculation should be attempted.
2215 static __inline int
2216 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2217 {
2218     kmp_uint32 badness = lck->lk.adaptive.badness;
2219     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2220     int res = (attempts & badness) == 0;
2221     return res;
2222 }
2223 
2224 // Attempt to acquire only the speculative lock.
2225 // Does not back off to the non-speculative lock.
2226 //
2227 static int
2228 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2229 {
2230     int retries = lck->lk.adaptive.max_soft_retries;
2231 
2232     // We don't explicitly count the start of speculation, rather we record
2233     // the results (success, hard fail, soft fail). The sum of all of those
2234     // is the total number of times we started speculation since all
2235     // speculations must end one of those ways.
2236     do
2237     {
2238         kmp_uint32 status = _xbegin();
2239         // Switch this in to disable actual speculation but exercise
2240         // at least some of the rest of the code. Useful for debugging...
2241         // kmp_uint32 status = _XABORT_NESTED;
2242 
2243         if (status == _XBEGIN_STARTED )
2244         { /* We have successfully started speculation
2245            * Check that no-one acquired the lock for real between when we last looked
2246            * and now. This also gets the lock cache line into our read-set,
2247            * which we need so that we'll abort if anyone later claims it for real.
2248            */
2249             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2250             {
2251                 // Lock is now visibly acquired, so someone beat us to it.
2252                 // Abort the transaction so we'll restart from _xbegin with the
2253                 // failure status.
2254                 _xabort(0x01);
2255                 KMP_ASSERT2( 0, "should not get here" );
2256             }
2257             return 1;   // Lock has been acquired (speculatively)
2258         } else {
2259             // We have aborted, update the statistics
2260             if ( status & SOFT_ABORT_MASK)
2261             {
2262                 KMP_INC_STAT(lck,softFailedSpeculations);
2263                 // and loop round to retry.
2264             }
2265             else
2266             {
2267                 KMP_INC_STAT(lck,hardFailedSpeculations);
2268                 // Give up if we had a hard failure.
2269                 break;
2270             }
2271         }
2272     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2273 
2274     // Either we had a hard failure or we didn't succeed softly after
2275     // the full set of attempts, so back off the badness.
2276     __kmp_step_badness( lck );
2277     return 0;
2278 }
2279 
2280 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2281 // if the speculative lock cannot be acquired.
2282 // We can succeed speculatively, non-speculatively, or fail.
2283 static int
2284 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2285 {
2286     // First try to acquire the lock speculatively
2287     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2288         return 1;
2289 
2290     // Speculative acquisition failed, so try to acquire it non-speculatively.
2291     // Count the non-speculative acquire attempt
2292     lck->lk.adaptive.acquire_attempts++;
2293 
2294     // Use base, non-speculative lock.
2295     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2296     {
2297         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2298         return 1;       // Lock is acquired (non-speculatively)
2299     }
2300     else
2301     {
2302         return 0;       // Failed to acquire the lock, it's already visibly locked.
2303     }
2304 }
2305 
2306 static int
2307 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2308 {
2309     char const * const func = "omp_test_lock";
2310     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2311         KMP_FATAL( LockIsUninitialized, func );
2312     }
2313 
2314     int retval = __kmp_test_adaptive_lock( lck, gtid );
2315 
2316     if ( retval ) {
2317         lck->lk.qlk.owner_id = gtid + 1;
2318     }
2319     return retval;
2320 }
2321 
2322 // Block until we can acquire a speculative, adaptive lock.
2323 // We check whether we should be trying to speculate.
2324 // If we should be, we check the real lock to see if it is free,
2325 // and, if not, pause without attempting to acquire it until it is.
2326 // Then we try the speculative acquire.
2327 // This means that although we suffer from lemmings a little (
2328 // because all we can't acquire the lock speculatively until
2329 // the queue of threads waiting has cleared), we don't get into a
2330 // state where we can never acquire the lock speculatively (because we
2331 // force the queue to clear by preventing new arrivals from entering the
2332 // queue).
2333 // This does mean that when we're trying to break lemmings, the lock
2334 // is no longer fair. However OpenMP makes no guarantee that its
2335 // locks are fair, so this isn't a real problem.
2336 static void
2337 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2338 {
2339     if ( __kmp_should_speculate( lck, gtid ) )
2340     {
2341         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2342         {
2343             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2344                 return;
2345             // We tried speculation and failed, so give up.
2346         }
2347         else
2348         {
2349             // We can't try speculation until the lock is free, so we
2350             // pause here (without suspending on the queueing lock,
2351             // to allow it to drain, then try again.
2352             // All other threads will also see the same result for
2353             // shouldSpeculate, so will be doing the same if they
2354             // try to claim the lock from now on.
2355             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2356             {
2357                 KMP_INC_STAT(lck,lemmingYields);
2358                 __kmp_yield (TRUE);
2359             }
2360 
2361             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2362                 return;
2363         }
2364     }
2365 
2366     // Speculative acquisition failed, so acquire it non-speculatively.
2367     // Count the non-speculative acquire attempt
2368     lck->lk.adaptive.acquire_attempts++;
2369 
2370     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2371     // We have acquired the base lock, so count that.
2372     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2373 }
2374 
2375 static void
2376 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2377 {
2378     char const * const func = "omp_set_lock";
2379     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2380         KMP_FATAL( LockIsUninitialized, func );
2381     }
2382     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2383         KMP_FATAL( LockIsAlreadyOwned, func );
2384     }
2385 
2386     __kmp_acquire_adaptive_lock( lck, gtid );
2387 
2388     lck->lk.qlk.owner_id = gtid + 1;
2389 }
2390 
2391 static int
2392 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2393 {
2394     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2395     {   // If the lock doesn't look claimed we must be speculating.
2396         // (Or the user's code is buggy and they're releasing without locking;
2397         // if we had XTEST we'd be able to check that case...)
2398         _xend();        // Exit speculation
2399         __kmp_update_badness_after_success( lck );
2400     }
2401     else
2402     {   // Since the lock *is* visibly locked we're not speculating,
2403         // so should use the underlying lock's release scheme.
2404         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2405     }
2406     return KMP_LOCK_RELEASED;
2407 }
2408 
2409 static int
2410 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2411 {
2412     char const * const func = "omp_unset_lock";
2413     KMP_MB();  /* in case another processor initialized lock */
2414     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2415         KMP_FATAL( LockIsUninitialized, func );
2416     }
2417     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2418         KMP_FATAL( LockUnsettingFree, func );
2419     }
2420     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2421         KMP_FATAL( LockUnsettingSetByAnother, func );
2422     }
2423     lck->lk.qlk.owner_id = 0;
2424     __kmp_release_adaptive_lock( lck, gtid );
2425     return KMP_LOCK_RELEASED;
2426 }
2427 
2428 static void
2429 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2430 {
2431     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2432     lck->lk.adaptive.badness = 0;
2433     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2434     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2435     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2436 #if KMP_DEBUG_ADAPTIVE_LOCKS
2437     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2438 #endif
2439     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2440 }
2441 
2442 static void
2443 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2444 {
2445     __kmp_init_adaptive_lock( lck );
2446 }
2447 
2448 static void
2449 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2450 {
2451 #if KMP_DEBUG_ADAPTIVE_LOCKS
2452     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2453 #endif
2454     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2455     // Nothing needed for the speculative part.
2456 }
2457 
2458 static void
2459 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2460 {
2461     char const * const func = "omp_destroy_lock";
2462     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2463         KMP_FATAL( LockIsUninitialized, func );
2464     }
2465     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2466         KMP_FATAL( LockStillOwned, func );
2467     }
2468     __kmp_destroy_adaptive_lock( lck );
2469 }
2470 
2471 
2472 #endif // KMP_USE_ADAPTIVE_LOCKS
2473 
2474 
2475 /* ------------------------------------------------------------------------ */
2476 /* DRDPA ticket locks                                                */
2477 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2478 
2479 static kmp_int32
2480 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2481 {
2482     return TCR_4( lck->lk.owner_id ) - 1;
2483 }
2484 
2485 static inline bool
2486 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2487 {
2488     return lck->lk.depth_locked != -1;
2489 }
2490 
2491 __forceinline static int
2492 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2493 {
2494     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2495     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2496     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2497       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2498       TCR_PTR(lck->lk.polls);                           // volatile load
2499 
2500 #ifdef USE_LOCK_PROFILE
2501     if (TCR_8(polls[ticket & mask].poll) != ticket)
2502         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2503     /* else __kmp_printf( "." );*/
2504 #endif /* USE_LOCK_PROFILE */
2505 
2506     //
2507     // Now spin-wait, but reload the polls pointer and mask, in case the
2508     // polling area has been reconfigured.  Unless it is reconfigured, the
2509     // reloads stay in L1 cache and are cheap.
2510     //
2511     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2512     //
2513     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2514     // and poll to be re-read every spin iteration.
2515     //
2516     kmp_uint32 spins;
2517 
2518     KMP_FSYNC_PREPARE(lck);
2519     KMP_INIT_YIELD(spins);
2520     while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
2521         // If we are oversubscribed,
2522         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2523         // CPU Pause is in the macros for yield.
2524         //
2525         KMP_YIELD(TCR_4(__kmp_nth)
2526           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2527         KMP_YIELD_SPIN(spins);
2528 
2529         // Re-read the mask and the poll pointer from the lock structure.
2530         //
2531         // Make certain that "mask" is read before "polls" !!!
2532         //
2533         // If another thread picks reconfigures the polling area and updates
2534         // their values, and we get the new value of mask and the old polls
2535         // pointer, we could access memory beyond the end of the old polling
2536         // area.
2537         //
2538         mask = TCR_8(lck->lk.mask);                     // volatile load
2539         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2540           TCR_PTR(lck->lk.polls);                       // volatile load
2541     }
2542 
2543     //
2544     // Critical section starts here
2545     //
2546     KMP_FSYNC_ACQUIRED(lck);
2547     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2548       ticket, lck));
2549     lck->lk.now_serving = ticket;                       // non-volatile store
2550 
2551     //
2552     // Deallocate a garbage polling area if we know that we are the last
2553     // thread that could possibly access it.
2554     //
2555     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2556     // ticket.
2557     //
2558     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2559         __kmp_free((void *)lck->lk.old_polls);
2560         lck->lk.old_polls = NULL;
2561         lck->lk.cleanup_ticket = 0;
2562     }
2563 
2564     //
2565     // Check to see if we should reconfigure the polling area.
2566     // If there is still a garbage polling area to be deallocated from a
2567     // previous reconfiguration, let a later thread reconfigure it.
2568     //
2569     if (lck->lk.old_polls == NULL) {
2570         bool reconfigure = false;
2571         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2572         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2573 
2574         if (TCR_4(__kmp_nth)
2575           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2576             //
2577             // We are in oversubscription mode.  Contract the polling area
2578             // down to a single location, if that hasn't been done already.
2579             //
2580             if (num_polls > 1) {
2581                 reconfigure = true;
2582                 num_polls = TCR_4(lck->lk.num_polls);
2583                 mask = 0;
2584                 num_polls = 1;
2585                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2586                   __kmp_allocate(num_polls * sizeof(*polls));
2587                 polls[0].poll = ticket;
2588             }
2589         }
2590         else {
2591             //
2592             // We are in under/fully subscribed mode.  Check the number of
2593             // threads waiting on the lock.  The size of the polling area
2594             // should be at least the number of threads waiting.
2595             //
2596             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2597             if (num_waiting > num_polls) {
2598                 kmp_uint32 old_num_polls = num_polls;
2599                 reconfigure = true;
2600                 do {
2601                     mask = (mask << 1) | 1;
2602                     num_polls *= 2;
2603                 } while (num_polls <= num_waiting);
2604 
2605                 //
2606                 // Allocate the new polling area, and copy the relevant portion
2607                 // of the old polling area to the new area.  __kmp_allocate()
2608                 // zeroes the memory it allocates, and most of the old area is
2609                 // just zero padding, so we only copy the release counters.
2610                 //
2611                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2612                   __kmp_allocate(num_polls * sizeof(*polls));
2613                 kmp_uint32 i;
2614                 for (i = 0; i < old_num_polls; i++) {
2615                     polls[i].poll = old_polls[i].poll;
2616                 }
2617             }
2618         }
2619 
2620         if (reconfigure) {
2621             //
2622             // Now write the updated fields back to the lock structure.
2623             //
2624             // Make certain that "polls" is written before "mask" !!!
2625             //
2626             // If another thread picks up the new value of mask and the old
2627             // polls pointer , it could access memory beyond the end of the
2628             // old polling area.
2629             //
2630             // On x86, we need memory fences.
2631             //
2632             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2633               ticket, lck, num_polls));
2634 
2635             lck->lk.old_polls = old_polls;              // non-volatile store
2636             lck->lk.polls = polls;                      // volatile store
2637 
2638             KMP_MB();
2639 
2640             lck->lk.num_polls = num_polls;              // non-volatile store
2641             lck->lk.mask = mask;                        // volatile store
2642 
2643             KMP_MB();
2644 
2645             //
2646             // Only after the new polling area and mask have been flushed
2647             // to main memory can we update the cleanup ticket field.
2648             //
2649             // volatile load / non-volatile store
2650             //
2651             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2652         }
2653     }
2654     return KMP_LOCK_ACQUIRED_FIRST;
2655 }
2656 
2657 int
2658 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2659 {
2660     return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2661 }
2662 
2663 static int
2664 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2665 {
2666     char const * const func = "omp_set_lock";
2667     if ( lck->lk.initialized != lck ) {
2668         KMP_FATAL( LockIsUninitialized, func );
2669     }
2670     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2671         KMP_FATAL( LockNestableUsedAsSimple, func );
2672     }
2673     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2674         KMP_FATAL( LockIsAlreadyOwned, func );
2675     }
2676 
2677     __kmp_acquire_drdpa_lock( lck, gtid );
2678 
2679     lck->lk.owner_id = gtid + 1;
2680     return KMP_LOCK_ACQUIRED_FIRST;
2681 }
2682 
2683 int
2684 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2685 {
2686     //
2687     // First get a ticket, then read the polls pointer and the mask.
2688     // The polls pointer must be read before the mask!!! (See above)
2689     //
2690     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2691     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2692       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2693       TCR_PTR(lck->lk.polls);                           // volatile load
2694     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2695     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2696         kmp_uint64 next_ticket = ticket + 1;
2697         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2698           ticket, next_ticket)) {
2699             KMP_FSYNC_ACQUIRED(lck);
2700             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2701                ticket, lck));
2702             lck->lk.now_serving = ticket;               // non-volatile store
2703 
2704             //
2705             // Since no threads are waiting, there is no possibility that
2706             // we would want to reconfigure the polling area.  We might
2707             // have the cleanup ticket value (which says that it is now
2708             // safe to deallocate old_polls), but we'll let a later thread
2709             // which calls __kmp_acquire_lock do that - this routine
2710             // isn't supposed to block, and we would risk blocks if we
2711             // called __kmp_free() to do the deallocation.
2712             //
2713             return TRUE;
2714         }
2715     }
2716     return FALSE;
2717 }
2718 
2719 static int
2720 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2721 {
2722     char const * const func = "omp_test_lock";
2723     if ( lck->lk.initialized != lck ) {
2724         KMP_FATAL( LockIsUninitialized, func );
2725     }
2726     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2727         KMP_FATAL( LockNestableUsedAsSimple, func );
2728     }
2729 
2730     int retval = __kmp_test_drdpa_lock( lck, gtid );
2731 
2732     if ( retval ) {
2733         lck->lk.owner_id = gtid + 1;
2734     }
2735     return retval;
2736 }
2737 
2738 int
2739 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2740 {
2741     //
2742     // Read the ticket value from the lock data struct, then the polls
2743     // pointer and the mask.  The polls pointer must be read before the
2744     // mask!!! (See above)
2745     //
2746     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2747     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2748       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2749       TCR_PTR(lck->lk.polls);                           // volatile load
2750     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2751     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2752        ticket - 1, lck));
2753     KMP_FSYNC_RELEASING(lck);
2754     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2755     return KMP_LOCK_RELEASED;
2756 }
2757 
2758 static int
2759 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2760 {
2761     char const * const func = "omp_unset_lock";
2762     KMP_MB();  /* in case another processor initialized lock */
2763     if ( lck->lk.initialized != lck ) {
2764         KMP_FATAL( LockIsUninitialized, func );
2765     }
2766     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2767         KMP_FATAL( LockNestableUsedAsSimple, func );
2768     }
2769     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2770         KMP_FATAL( LockUnsettingFree, func );
2771     }
2772     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2773       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2774         KMP_FATAL( LockUnsettingSetByAnother, func );
2775     }
2776     lck->lk.owner_id = 0;
2777     return __kmp_release_drdpa_lock( lck, gtid );
2778 }
2779 
2780 void
2781 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2782 {
2783     lck->lk.location = NULL;
2784     lck->lk.mask = 0;
2785     lck->lk.num_polls = 1;
2786     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2787       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2788     lck->lk.cleanup_ticket = 0;
2789     lck->lk.old_polls = NULL;
2790     lck->lk.next_ticket = 0;
2791     lck->lk.now_serving = 0;
2792     lck->lk.owner_id = 0;      // no thread owns the lock.
2793     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2794     lck->lk.initialized = lck;
2795 
2796     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2797 }
2798 
2799 static void
2800 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2801 {
2802     __kmp_init_drdpa_lock( lck );
2803 }
2804 
2805 void
2806 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2807 {
2808     lck->lk.initialized = NULL;
2809     lck->lk.location    = NULL;
2810     if (lck->lk.polls != NULL) {
2811         __kmp_free((void *)lck->lk.polls);
2812         lck->lk.polls = NULL;
2813     }
2814     if (lck->lk.old_polls != NULL) {
2815         __kmp_free((void *)lck->lk.old_polls);
2816         lck->lk.old_polls = NULL;
2817     }
2818     lck->lk.mask = 0;
2819     lck->lk.num_polls = 0;
2820     lck->lk.cleanup_ticket = 0;
2821     lck->lk.next_ticket = 0;
2822     lck->lk.now_serving = 0;
2823     lck->lk.owner_id = 0;
2824     lck->lk.depth_locked = -1;
2825 }
2826 
2827 static void
2828 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2829 {
2830     char const * const func = "omp_destroy_lock";
2831     if ( lck->lk.initialized != lck ) {
2832         KMP_FATAL( LockIsUninitialized, func );
2833     }
2834     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2835         KMP_FATAL( LockNestableUsedAsSimple, func );
2836     }
2837     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2838         KMP_FATAL( LockStillOwned, func );
2839     }
2840     __kmp_destroy_drdpa_lock( lck );
2841 }
2842 
2843 
2844 //
2845 // nested drdpa ticket locks
2846 //
2847 
2848 int
2849 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2850 {
2851     KMP_DEBUG_ASSERT( gtid >= 0 );
2852 
2853     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2854         lck->lk.depth_locked += 1;
2855         return KMP_LOCK_ACQUIRED_NEXT;
2856     }
2857     else {
2858         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2859         KMP_MB();
2860         lck->lk.depth_locked = 1;
2861         KMP_MB();
2862         lck->lk.owner_id = gtid + 1;
2863         return KMP_LOCK_ACQUIRED_FIRST;
2864     }
2865 }
2866 
2867 static void
2868 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2869 {
2870     char const * const func = "omp_set_nest_lock";
2871     if ( lck->lk.initialized != lck ) {
2872         KMP_FATAL( LockIsUninitialized, func );
2873     }
2874     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2875         KMP_FATAL( LockSimpleUsedAsNestable, func );
2876     }
2877     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2878 }
2879 
2880 int
2881 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2882 {
2883     int retval;
2884 
2885     KMP_DEBUG_ASSERT( gtid >= 0 );
2886 
2887     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2888         retval = ++lck->lk.depth_locked;
2889     }
2890     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2891         retval = 0;
2892     }
2893     else {
2894         KMP_MB();
2895         retval = lck->lk.depth_locked = 1;
2896         KMP_MB();
2897         lck->lk.owner_id = gtid + 1;
2898     }
2899     return retval;
2900 }
2901 
2902 static int
2903 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2904 {
2905     char const * const func = "omp_test_nest_lock";
2906     if ( lck->lk.initialized != lck ) {
2907         KMP_FATAL( LockIsUninitialized, func );
2908     }
2909     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2910         KMP_FATAL( LockSimpleUsedAsNestable, func );
2911     }
2912     return __kmp_test_nested_drdpa_lock( lck, gtid );
2913 }
2914 
2915 int
2916 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2917 {
2918     KMP_DEBUG_ASSERT( gtid >= 0 );
2919 
2920     KMP_MB();
2921     if ( --(lck->lk.depth_locked) == 0 ) {
2922         KMP_MB();
2923         lck->lk.owner_id = 0;
2924         __kmp_release_drdpa_lock( lck, gtid );
2925         return KMP_LOCK_RELEASED;
2926     }
2927     return KMP_LOCK_STILL_HELD;
2928 }
2929 
2930 static int
2931 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2932 {
2933     char const * const func = "omp_unset_nest_lock";
2934     KMP_MB();  /* in case another processor initialized lock */
2935     if ( lck->lk.initialized != lck ) {
2936         KMP_FATAL( LockIsUninitialized, func );
2937     }
2938     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2939         KMP_FATAL( LockSimpleUsedAsNestable, func );
2940     }
2941     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2942         KMP_FATAL( LockUnsettingFree, func );
2943     }
2944     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2945         KMP_FATAL( LockUnsettingSetByAnother, func );
2946     }
2947     return __kmp_release_nested_drdpa_lock( lck, gtid );
2948 }
2949 
2950 void
2951 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2952 {
2953     __kmp_init_drdpa_lock( lck );
2954     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2955 }
2956 
2957 static void
2958 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2959 {
2960     __kmp_init_nested_drdpa_lock( lck );
2961 }
2962 
2963 void
2964 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2965 {
2966     __kmp_destroy_drdpa_lock( lck );
2967     lck->lk.depth_locked = 0;
2968 }
2969 
2970 static void
2971 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2972 {
2973     char const * const func = "omp_destroy_nest_lock";
2974     if ( lck->lk.initialized != lck ) {
2975         KMP_FATAL( LockIsUninitialized, func );
2976     }
2977     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2978         KMP_FATAL( LockSimpleUsedAsNestable, func );
2979     }
2980     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2981         KMP_FATAL( LockStillOwned, func );
2982     }
2983     __kmp_destroy_nested_drdpa_lock( lck );
2984 }
2985 
2986 
2987 //
2988 // access functions to fields which don't exist for all lock kinds.
2989 //
2990 
2991 static int
2992 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2993 {
2994     return lck == lck->lk.initialized;
2995 }
2996 
2997 static const ident_t *
2998 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2999 {
3000     return lck->lk.location;
3001 }
3002 
3003 static void
3004 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3005 {
3006     lck->lk.location = loc;
3007 }
3008 
3009 static kmp_lock_flags_t
3010 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3011 {
3012     return lck->lk.flags;
3013 }
3014 
3015 static void
3016 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3017 {
3018     lck->lk.flags = flags;
3019 }
3020 
3021 // Time stamp counter
3022 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3023 # define __kmp_tsc() __kmp_hardware_timestamp()
3024 // Runtime's default backoff parameters
3025 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3026 #else
3027 // Use nanoseconds for other platforms
3028 extern kmp_uint64 __kmp_now_nsec();
3029 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3030 # define __kmp_tsc() __kmp_now_nsec()
3031 #endif
3032 
3033 // A useful predicate for dealing with timestamps that may wrap.
3034 // Is a before b?
3035 // Since the timestamps may wrap, this is asking whether it's
3036 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3037 // Times where going clockwise is less distance than going anti-clockwise
3038 // are in the future, others are in the past.
3039 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3040 //       whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3041 static inline bool before(kmp_uint64 a, kmp_uint64 b)
3042 {
3043     return ((kmp_int64)b - (kmp_int64)a) > 0;
3044 }
3045 
3046 // Truncated binary exponential backoff function
3047 void
3048 __kmp_spin_backoff(kmp_backoff_t *boff)
3049 {
3050     // We could flatten this loop, but making it a nested loop gives better result.
3051     kmp_uint32 i;
3052     for (i = boff->step; i > 0; i--) {
3053         kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3054         do {
3055             KMP_CPU_PAUSE();
3056         } while (before(__kmp_tsc(), goal));
3057     }
3058     boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3059 }
3060 
3061 #if KMP_USE_DYNAMIC_LOCK
3062 
3063 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3064 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3065 {
3066     TCW_4(*lck, KMP_GET_D_TAG(seq));
3067     KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3068 }
3069 
3070 #if KMP_USE_TSX
3071 
3072 // HLE lock functions - imported from the testbed runtime.
3073 #define HLE_ACQUIRE ".byte 0xf2;"
3074 #define HLE_RELEASE ".byte 0xf3;"
3075 
3076 static inline kmp_uint32
3077 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3078 {
3079     __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3080                     : "+r"(v), "+m"(*p)
3081                     :
3082                     : "memory");
3083     return v;
3084 }
3085 
3086 static void
3087 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3088 {
3089     TCW_4(*lck, 0);
3090 }
3091 
3092 static void
3093 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3094 {
3095     // Use gtid for KMP_LOCK_BUSY if necessary
3096     if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3097         int delay = 1;
3098         do {
3099             while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3100                 for (int i = delay; i != 0; --i)
3101                     KMP_CPU_PAUSE();
3102                 delay = ((delay << 1) | 1) & 7;
3103             }
3104         } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3105     }
3106 }
3107 
3108 static void
3109 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3110 {
3111     __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3112 }
3113 
3114 static int
3115 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3116 {
3117     __asm__ volatile(HLE_RELEASE "movl %1,%0"
3118                     : "=m"(*lck)
3119                     : "r"(KMP_LOCK_FREE(hle))
3120                     : "memory");
3121     return KMP_LOCK_RELEASED;
3122 }
3123 
3124 static int
3125 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3126 {
3127     return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3128 }
3129 
3130 static int
3131 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3132 {
3133     return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3134 }
3135 
3136 static int
3137 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3138 {
3139     return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3140 }
3141 
3142 static void
3143 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3144 {
3145     __kmp_init_queuing_lock(lck);
3146 }
3147 
3148 static void
3149 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3150 {
3151     __kmp_destroy_queuing_lock(lck);
3152 }
3153 
3154 static void
3155 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3156 {
3157     unsigned retries=3, status;
3158     do {
3159         status = _xbegin();
3160         if (status == _XBEGIN_STARTED) {
3161             if (__kmp_is_unlocked_queuing_lock(lck))
3162                 return;
3163             _xabort(0xff);
3164         }
3165         if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3166             // Wait until lock becomes free
3167             while (! __kmp_is_unlocked_queuing_lock(lck))
3168                 __kmp_yield(TRUE);
3169         }
3170         else if (!(status & _XABORT_RETRY))
3171             break;
3172     } while (retries--);
3173 
3174     // Fall-back non-speculative lock (xchg)
3175     __kmp_acquire_queuing_lock(lck, gtid);
3176 }
3177 
3178 static void
3179 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3180 {
3181     __kmp_acquire_rtm_lock(lck, gtid);
3182 }
3183 
3184 static int
3185 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3186 {
3187     if (__kmp_is_unlocked_queuing_lock(lck)) {
3188         // Releasing from speculation
3189         _xend();
3190     }
3191     else {
3192         // Releasing from a real lock
3193         __kmp_release_queuing_lock(lck, gtid);
3194     }
3195     return KMP_LOCK_RELEASED;
3196 }
3197 
3198 static int
3199 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3200 {
3201     return __kmp_release_rtm_lock(lck, gtid);
3202 }
3203 
3204 static int
3205 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3206 {
3207     unsigned retries=3, status;
3208     do {
3209         status = _xbegin();
3210         if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3211             return 1;
3212         }
3213         if (!(status & _XABORT_RETRY))
3214             break;
3215     } while (retries--);
3216 
3217     return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3218 }
3219 
3220 static int
3221 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3222 {
3223     return __kmp_test_rtm_lock(lck, gtid);
3224 }
3225 
3226 #endif // KMP_USE_TSX
3227 
3228 // Entry functions for indirect locks (first element of direct lock jump tables).
3229 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3230 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3231 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3232 static int  __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3233 static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3234 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3235 static int  __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3236 static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3237 
3238 //
3239 // Jump tables for the indirect lock functions.
3240 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3241 //
3242 
3243 // init functions
3244 #define expand(l, op) 0,__kmp_init_direct_lock,
3245 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3246     = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3247 #undef expand
3248 
3249 // destroy functions
3250 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3251 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3252     = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3253 #undef expand
3254 
3255 // set/acquire functions
3256 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3257 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3258     = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3259 #undef expand
3260 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3261 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3262     = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3263 #undef expand
3264 
3265 // unset/release and test functions
3266 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3267 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3268     = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3269 static int  (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3270     = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3271 #undef expand
3272 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3273 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3274     = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3275 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3276     = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3277 #undef expand
3278 
3279 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3280 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3281 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3282 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3283 
3284 //
3285 // Jump tables for the indirect lock functions.
3286 //
3287 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3288 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3289 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3290 #undef expand
3291 
3292 // set/acquire functions
3293 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3294 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3295 #undef expand
3296 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3297 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3298 #undef expand
3299 
3300 // unset/release and test functions
3301 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3302 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3303 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3304 #undef expand
3305 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3306 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3307 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3308 #undef expand
3309 
3310 // Exposes only one jump tables (*lock or *lock_with_checks).
3311 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3312 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3313 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3314 
3315 // Lock index table.
3316 kmp_indirect_lock_table_t __kmp_i_lock_table;
3317 
3318 // Size of indirect locks.
3319 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3320 
3321 // Jump tables for lock accessor/modifier.
3322 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3323 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3324 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3325 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3326 
3327 // Use different lock pools for different lock types.
3328 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3329 
3330 // User lock allocator for dynamically dispatched indirect locks.
3331 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3332 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3333 // object is returned to the reusable pool of locks, unique to each lock type.
3334 kmp_indirect_lock_t *
3335 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3336 {
3337     kmp_indirect_lock_t *lck;
3338     kmp_lock_index_t idx;
3339 
3340     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3341 
3342     if (__kmp_indirect_lock_pool[tag] != NULL) {
3343         // Reuse the allocated and destroyed lock object
3344         lck = __kmp_indirect_lock_pool[tag];
3345         if (OMP_LOCK_T_SIZE < sizeof(void *))
3346             idx = lck->lock->pool.index;
3347         __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3348         KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3349     } else {
3350         idx = __kmp_i_lock_table.next;
3351         // Check capacity and double the size if it is full
3352         if (idx == __kmp_i_lock_table.size) {
3353             // Double up the space for block pointers
3354             int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3355             kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3356             __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3357             KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3358             __kmp_free(old_table);
3359             // Allocate new objects in the new blocks
3360             for (int i = row; i < 2*row; ++i)
3361                 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3362                                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3363             __kmp_i_lock_table.size = 2*idx;
3364         }
3365         __kmp_i_lock_table.next++;
3366         lck = KMP_GET_I_LOCK(idx);
3367         // Allocate a new base lock object
3368         lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3369         KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3370     }
3371 
3372     __kmp_release_lock(&__kmp_global_lock, gtid);
3373 
3374     lck->type = tag;
3375 
3376     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3377         *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3378     } else {
3379         *((kmp_indirect_lock_t **)user_lock) = lck;
3380     }
3381 
3382     return lck;
3383 }
3384 
3385 // User lock lookup for dynamically dispatched locks.
3386 static __forceinline
3387 kmp_indirect_lock_t *
3388 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3389 {
3390     if (__kmp_env_consistency_check) {
3391         kmp_indirect_lock_t *lck = NULL;
3392         if (user_lock == NULL) {
3393             KMP_FATAL(LockIsUninitialized, func);
3394         }
3395         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3396             kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3397             if (idx >= __kmp_i_lock_table.size) {
3398                 KMP_FATAL(LockIsUninitialized, func);
3399             }
3400             lck = KMP_GET_I_LOCK(idx);
3401         } else {
3402             lck = *((kmp_indirect_lock_t **)user_lock);
3403         }
3404         if (lck == NULL) {
3405             KMP_FATAL(LockIsUninitialized, func);
3406         }
3407         return lck;
3408     } else {
3409         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3410             return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3411         } else {
3412             return *((kmp_indirect_lock_t **)user_lock);
3413         }
3414     }
3415 }
3416 
3417 static void
3418 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3419 {
3420 #if KMP_USE_ADAPTIVE_LOCKS
3421     if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3422         KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3423         seq = lockseq_queuing;
3424     }
3425 #endif
3426 #if KMP_USE_TSX
3427     if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3428         seq = lockseq_queuing;
3429     }
3430 #endif
3431     kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3432     kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3433     KMP_I_LOCK_FUNC(l, init)(l->lock);
3434     KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3435 }
3436 
3437 static void
3438 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3439 {
3440     kmp_uint32 gtid = __kmp_entry_gtid();
3441     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3442     KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3443     kmp_indirect_locktag_t tag = l->type;
3444 
3445     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3446 
3447     // Use the base lock's space to keep the pool chain.
3448     l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3449     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3450         l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3451     }
3452     __kmp_indirect_lock_pool[tag] = l;
3453 
3454     __kmp_release_lock(&__kmp_global_lock, gtid);
3455 }
3456 
3457 static void
3458 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3459 {
3460     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3461     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3462 }
3463 
3464 static int
3465 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3466 {
3467     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3468     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3469 }
3470 
3471 static int
3472 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3473 {
3474     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3475     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3476 }
3477 
3478 static void
3479 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3480 {
3481     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3482     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3483 }
3484 
3485 static int
3486 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3487 {
3488     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3489     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3490 }
3491 
3492 static int
3493 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3494 {
3495     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3496     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3497 }
3498 
3499 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3500 
3501 // This is used only in kmp_error.c when consistency checking is on.
3502 kmp_int32
3503 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3504 {
3505     switch (seq) {
3506         case lockseq_tas:
3507         case lockseq_nested_tas:
3508             return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3509 #if KMP_USE_FUTEX
3510         case lockseq_futex:
3511         case lockseq_nested_futex:
3512             return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3513 #endif
3514         case lockseq_ticket:
3515         case lockseq_nested_ticket:
3516             return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3517         case lockseq_queuing:
3518         case lockseq_nested_queuing:
3519 #if KMP_USE_ADAPTIVE_LOCKS
3520         case lockseq_adaptive:
3521 #endif
3522             return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3523         case lockseq_drdpa:
3524         case lockseq_nested_drdpa:
3525             return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3526         default:
3527             return 0;
3528     }
3529 }
3530 
3531 // Initializes data for dynamic user locks.
3532 void
3533 __kmp_init_dynamic_user_locks()
3534 {
3535     // Initialize jump table for the lock functions
3536     if (__kmp_env_consistency_check) {
3537         __kmp_direct_set     = direct_set_check;
3538         __kmp_direct_unset   = direct_unset_check;
3539         __kmp_direct_test    = direct_test_check;
3540         __kmp_indirect_set   = indirect_set_check;
3541         __kmp_indirect_unset = indirect_unset_check;
3542         __kmp_indirect_test  = indirect_test_check;
3543     }
3544     else {
3545         __kmp_direct_set     = direct_set;
3546         __kmp_direct_unset   = direct_unset;
3547         __kmp_direct_test    = direct_test;
3548         __kmp_indirect_set   = indirect_set;
3549         __kmp_indirect_unset = indirect_unset;
3550         __kmp_indirect_test  = indirect_test;
3551     }
3552 
3553     // Initialize lock index table
3554     __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3555     __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3556     *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3557                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3558     __kmp_i_lock_table.next = 0;
3559 
3560     // Indirect lock size
3561     __kmp_indirect_lock_size[locktag_ticket]         = sizeof(kmp_ticket_lock_t);
3562     __kmp_indirect_lock_size[locktag_queuing]        = sizeof(kmp_queuing_lock_t);
3563 #if KMP_USE_ADAPTIVE_LOCKS
3564     __kmp_indirect_lock_size[locktag_adaptive]       = sizeof(kmp_adaptive_lock_t);
3565 #endif
3566     __kmp_indirect_lock_size[locktag_drdpa]          = sizeof(kmp_drdpa_lock_t);
3567 #if KMP_USE_TSX
3568     __kmp_indirect_lock_size[locktag_rtm]            = sizeof(kmp_queuing_lock_t);
3569 #endif
3570     __kmp_indirect_lock_size[locktag_nested_tas]     = sizeof(kmp_tas_lock_t);
3571 #if KMP_USE_FUTEX
3572     __kmp_indirect_lock_size[locktag_nested_futex]   = sizeof(kmp_futex_lock_t);
3573 #endif
3574     __kmp_indirect_lock_size[locktag_nested_ticket]  = sizeof(kmp_ticket_lock_t);
3575     __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3576     __kmp_indirect_lock_size[locktag_nested_drdpa]   = sizeof(kmp_drdpa_lock_t);
3577 
3578     // Initialize lock accessor/modifier
3579 #define fill_jumps(table, expand, sep) {            \
3580     table[locktag##sep##ticket]  = expand(ticket);  \
3581     table[locktag##sep##queuing] = expand(queuing); \
3582     table[locktag##sep##drdpa]   = expand(drdpa);   \
3583 }
3584 
3585 #if KMP_USE_ADAPTIVE_LOCKS
3586 # define fill_table(table, expand) {           \
3587     fill_jumps(table, expand, _);              \
3588     table[locktag_adaptive] = expand(queuing); \
3589     fill_jumps(table, expand, _nested_);       \
3590 }
3591 #else
3592 # define fill_table(table, expand) {           \
3593     fill_jumps(table, expand, _);              \
3594     fill_jumps(table, expand, _nested_);       \
3595 }
3596 #endif // KMP_USE_ADAPTIVE_LOCKS
3597 
3598 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3599     fill_table(__kmp_indirect_set_location, expand);
3600 #undef expand
3601 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3602     fill_table(__kmp_indirect_set_flags, expand);
3603 #undef expand
3604 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3605     fill_table(__kmp_indirect_get_location, expand);
3606 #undef expand
3607 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3608     fill_table(__kmp_indirect_get_flags, expand);
3609 #undef expand
3610 
3611     __kmp_init_user_locks = TRUE;
3612 }
3613 
3614 // Clean up the lock table.
3615 void
3616 __kmp_cleanup_indirect_user_locks()
3617 {
3618     kmp_lock_index_t i;
3619     int k;
3620 
3621     // Clean up locks in the pools first (they were already destroyed before going into the pools).
3622     for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3623         kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3624         while (l != NULL) {
3625             kmp_indirect_lock_t *ll = l;
3626             l = (kmp_indirect_lock_t *)l->lock->pool.next;
3627             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3628             __kmp_free(ll->lock);
3629             ll->lock = NULL;
3630         }
3631         __kmp_indirect_lock_pool[k] = NULL;
3632     }
3633     // Clean up the remaining undestroyed locks.
3634     for (i = 0; i < __kmp_i_lock_table.next; i++) {
3635         kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3636         if (l->lock != NULL) {
3637             // Locks not destroyed explicitly need to be destroyed here.
3638             KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3639             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3640             __kmp_free(l->lock);
3641         }
3642     }
3643     // Free the table
3644     for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3645         __kmp_free(__kmp_i_lock_table.table[i]);
3646     __kmp_free(__kmp_i_lock_table.table);
3647 
3648     __kmp_init_user_locks = FALSE;
3649 }
3650 
3651 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3652 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3653 
3654 #else // KMP_USE_DYNAMIC_LOCK
3655 
3656 /* ------------------------------------------------------------------------ */
3657 /* user locks
3658  *
3659  * They are implemented as a table of function pointers which are set to the
3660  * lock functions of the appropriate kind, once that has been determined.
3661  */
3662 
3663 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3664 
3665 size_t __kmp_base_user_lock_size = 0;
3666 size_t __kmp_user_lock_size = 0;
3667 
3668 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3669 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3670 
3671 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3672 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3673 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3674 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3675 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3676 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3677 
3678 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3679 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3680 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3681 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3682 
3683 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3684 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3685 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3686 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3687 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3688 
3689 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3690 {
3691     switch ( user_lock_kind ) {
3692         case lk_default:
3693         default:
3694         KMP_ASSERT( 0 );
3695 
3696         case lk_tas: {
3697             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3698             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3699 
3700             __kmp_get_user_lock_owner_ =
3701               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3702               ( &__kmp_get_tas_lock_owner );
3703 
3704             if ( __kmp_env_consistency_check ) {
3705                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3706                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3707             }
3708             else {
3709                 KMP_BIND_USER_LOCK(tas);
3710                 KMP_BIND_NESTED_USER_LOCK(tas);
3711             }
3712 
3713             __kmp_destroy_user_lock_ =
3714               ( void ( * )( kmp_user_lock_p ) )
3715               ( &__kmp_destroy_tas_lock );
3716 
3717              __kmp_is_user_lock_initialized_ =
3718                ( int ( * )( kmp_user_lock_p ) ) NULL;
3719 
3720              __kmp_get_user_lock_location_ =
3721                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3722 
3723              __kmp_set_user_lock_location_ =
3724                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3725 
3726              __kmp_get_user_lock_flags_ =
3727                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3728 
3729              __kmp_set_user_lock_flags_ =
3730                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3731         }
3732         break;
3733 
3734 #if KMP_USE_FUTEX
3735 
3736         case lk_futex: {
3737             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3738             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3739 
3740             __kmp_get_user_lock_owner_ =
3741               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3742               ( &__kmp_get_futex_lock_owner );
3743 
3744             if ( __kmp_env_consistency_check ) {
3745                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3746                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3747             }
3748             else {
3749                 KMP_BIND_USER_LOCK(futex);
3750                 KMP_BIND_NESTED_USER_LOCK(futex);
3751             }
3752 
3753             __kmp_destroy_user_lock_ =
3754               ( void ( * )( kmp_user_lock_p ) )
3755               ( &__kmp_destroy_futex_lock );
3756 
3757              __kmp_is_user_lock_initialized_ =
3758                ( int ( * )( kmp_user_lock_p ) ) NULL;
3759 
3760              __kmp_get_user_lock_location_ =
3761                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3762 
3763              __kmp_set_user_lock_location_ =
3764                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3765 
3766              __kmp_get_user_lock_flags_ =
3767                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3768 
3769              __kmp_set_user_lock_flags_ =
3770                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3771         }
3772         break;
3773 
3774 #endif // KMP_USE_FUTEX
3775 
3776         case lk_ticket: {
3777             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3778             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3779 
3780             __kmp_get_user_lock_owner_ =
3781               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3782               ( &__kmp_get_ticket_lock_owner );
3783 
3784             if ( __kmp_env_consistency_check ) {
3785                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3786                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3787             }
3788             else {
3789                 KMP_BIND_USER_LOCK(ticket);
3790                 KMP_BIND_NESTED_USER_LOCK(ticket);
3791             }
3792 
3793             __kmp_destroy_user_lock_ =
3794               ( void ( * )( kmp_user_lock_p ) )
3795               ( &__kmp_destroy_ticket_lock );
3796 
3797              __kmp_is_user_lock_initialized_ =
3798                ( int ( * )( kmp_user_lock_p ) )
3799                ( &__kmp_is_ticket_lock_initialized );
3800 
3801              __kmp_get_user_lock_location_ =
3802                ( const ident_t * ( * )( kmp_user_lock_p ) )
3803                ( &__kmp_get_ticket_lock_location );
3804 
3805              __kmp_set_user_lock_location_ =
3806                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3807                ( &__kmp_set_ticket_lock_location );
3808 
3809              __kmp_get_user_lock_flags_ =
3810                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3811                ( &__kmp_get_ticket_lock_flags );
3812 
3813              __kmp_set_user_lock_flags_ =
3814                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3815                ( &__kmp_set_ticket_lock_flags );
3816         }
3817         break;
3818 
3819         case lk_queuing: {
3820             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3821             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3822 
3823             __kmp_get_user_lock_owner_ =
3824               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3825               ( &__kmp_get_queuing_lock_owner );
3826 
3827             if ( __kmp_env_consistency_check ) {
3828                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3829                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3830             }
3831             else {
3832                 KMP_BIND_USER_LOCK(queuing);
3833                 KMP_BIND_NESTED_USER_LOCK(queuing);
3834             }
3835 
3836             __kmp_destroy_user_lock_ =
3837               ( void ( * )( kmp_user_lock_p ) )
3838               ( &__kmp_destroy_queuing_lock );
3839 
3840              __kmp_is_user_lock_initialized_ =
3841                ( int ( * )( kmp_user_lock_p ) )
3842                ( &__kmp_is_queuing_lock_initialized );
3843 
3844              __kmp_get_user_lock_location_ =
3845                ( const ident_t * ( * )( kmp_user_lock_p ) )
3846                ( &__kmp_get_queuing_lock_location );
3847 
3848              __kmp_set_user_lock_location_ =
3849                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3850                ( &__kmp_set_queuing_lock_location );
3851 
3852              __kmp_get_user_lock_flags_ =
3853                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3854                ( &__kmp_get_queuing_lock_flags );
3855 
3856              __kmp_set_user_lock_flags_ =
3857                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3858                ( &__kmp_set_queuing_lock_flags );
3859         }
3860         break;
3861 
3862 #if KMP_USE_ADAPTIVE_LOCKS
3863         case lk_adaptive: {
3864             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3865             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3866 
3867             __kmp_get_user_lock_owner_ =
3868               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3869               ( &__kmp_get_queuing_lock_owner );
3870 
3871             if ( __kmp_env_consistency_check ) {
3872                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3873             }
3874             else {
3875                 KMP_BIND_USER_LOCK(adaptive);
3876             }
3877 
3878             __kmp_destroy_user_lock_ =
3879               ( void ( * )( kmp_user_lock_p ) )
3880               ( &__kmp_destroy_adaptive_lock );
3881 
3882             __kmp_is_user_lock_initialized_ =
3883               ( int ( * )( kmp_user_lock_p ) )
3884               ( &__kmp_is_queuing_lock_initialized );
3885 
3886             __kmp_get_user_lock_location_ =
3887               ( const ident_t * ( * )( kmp_user_lock_p ) )
3888               ( &__kmp_get_queuing_lock_location );
3889 
3890             __kmp_set_user_lock_location_ =
3891               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3892               ( &__kmp_set_queuing_lock_location );
3893 
3894             __kmp_get_user_lock_flags_ =
3895               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3896               ( &__kmp_get_queuing_lock_flags );
3897 
3898             __kmp_set_user_lock_flags_ =
3899               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3900               ( &__kmp_set_queuing_lock_flags );
3901 
3902         }
3903         break;
3904 #endif // KMP_USE_ADAPTIVE_LOCKS
3905 
3906         case lk_drdpa: {
3907             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3908             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3909 
3910             __kmp_get_user_lock_owner_ =
3911               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3912               ( &__kmp_get_drdpa_lock_owner );
3913 
3914             if ( __kmp_env_consistency_check ) {
3915                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3916                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3917             }
3918             else {
3919                 KMP_BIND_USER_LOCK(drdpa);
3920                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3921             }
3922 
3923             __kmp_destroy_user_lock_ =
3924               ( void ( * )( kmp_user_lock_p ) )
3925               ( &__kmp_destroy_drdpa_lock );
3926 
3927              __kmp_is_user_lock_initialized_ =
3928                ( int ( * )( kmp_user_lock_p ) )
3929                ( &__kmp_is_drdpa_lock_initialized );
3930 
3931              __kmp_get_user_lock_location_ =
3932                ( const ident_t * ( * )( kmp_user_lock_p ) )
3933                ( &__kmp_get_drdpa_lock_location );
3934 
3935              __kmp_set_user_lock_location_ =
3936                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3937                ( &__kmp_set_drdpa_lock_location );
3938 
3939              __kmp_get_user_lock_flags_ =
3940                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3941                ( &__kmp_get_drdpa_lock_flags );
3942 
3943              __kmp_set_user_lock_flags_ =
3944                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3945                ( &__kmp_set_drdpa_lock_flags );
3946         }
3947         break;
3948     }
3949 }
3950 
3951 
3952 // ----------------------------------------------------------------------------
3953 // User lock table & lock allocation
3954 
3955 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3956 kmp_user_lock_p __kmp_lock_pool = NULL;
3957 
3958 // Lock block-allocation support.
3959 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3960 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3961 
3962 static kmp_lock_index_t
3963 __kmp_lock_table_insert( kmp_user_lock_p lck )
3964 {
3965     // Assume that kmp_global_lock is held upon entry/exit.
3966     kmp_lock_index_t index;
3967     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3968         kmp_lock_index_t size;
3969         kmp_user_lock_p *table;
3970         // Reallocate lock table.
3971         if ( __kmp_user_lock_table.allocated == 0 ) {
3972             size = 1024;
3973         }
3974         else {
3975             size = __kmp_user_lock_table.allocated * 2;
3976         }
3977         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3978         KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3979         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3980             // We cannot free the previous table now, since it may be in use by other
3981             // threads. So save the pointer to the previous table in in the first element of the
3982             // new table. All the tables will be organized into a list, and could be freed when
3983             // library shutting down.
3984         __kmp_user_lock_table.table = table;
3985         __kmp_user_lock_table.allocated = size;
3986     }
3987     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3988     index = __kmp_user_lock_table.used;
3989     __kmp_user_lock_table.table[ index ] = lck;
3990     ++ __kmp_user_lock_table.used;
3991     return index;
3992 }
3993 
3994 static kmp_user_lock_p
3995 __kmp_lock_block_allocate()
3996 {
3997     // Assume that kmp_global_lock is held upon entry/exit.
3998     static int last_index = 0;
3999     if ( ( last_index >= __kmp_num_locks_in_block )
4000       || ( __kmp_lock_blocks == NULL ) ) {
4001         // Restart the index.
4002         last_index = 0;
4003         // Need to allocate a new block.
4004         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4005         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
4006         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
4007         // Set up the new block.
4008         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
4009         new_block->next_block = __kmp_lock_blocks;
4010         new_block->locks = (void *)buffer;
4011         // Publish the new block.
4012         KMP_MB();
4013         __kmp_lock_blocks = new_block;
4014     }
4015     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4016       [ last_index * __kmp_user_lock_size ] ) );
4017     last_index++;
4018     return ret;
4019 }
4020 
4021 //
4022 // Get memory for a lock. It may be freshly allocated memory or reused memory
4023 // from lock pool.
4024 //
4025 kmp_user_lock_p
4026 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4027   kmp_lock_flags_t flags )
4028 {
4029     kmp_user_lock_p lck;
4030     kmp_lock_index_t index;
4031     KMP_DEBUG_ASSERT( user_lock );
4032 
4033     __kmp_acquire_lock( &__kmp_global_lock, gtid );
4034 
4035     if ( __kmp_lock_pool == NULL ) {
4036         // Lock pool is empty. Allocate new memory.
4037         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4038             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4039         }
4040         else {
4041             lck = __kmp_lock_block_allocate();
4042         }
4043 
4044         // Insert lock in the table so that it can be freed in __kmp_cleanup,
4045         // and debugger has info on all allocated locks.
4046         index = __kmp_lock_table_insert( lck );
4047     }
4048     else {
4049         // Pick up lock from pool.
4050         lck = __kmp_lock_pool;
4051         index = __kmp_lock_pool->pool.index;
4052         __kmp_lock_pool = __kmp_lock_pool->pool.next;
4053     }
4054 
4055     //
4056     // We could potentially differentiate between nested and regular locks
4057     // here, and do the lock table lookup for regular locks only.
4058     //
4059     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4060         * ( (kmp_lock_index_t *) user_lock ) = index;
4061     }
4062     else {
4063         * ( (kmp_user_lock_p *) user_lock ) = lck;
4064     }
4065 
4066     // mark the lock if it is critical section lock.
4067     __kmp_set_user_lock_flags( lck, flags );
4068 
4069     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4070 
4071     return lck;
4072 }
4073 
4074 // Put lock's memory to pool for reusing.
4075 void
4076 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4077 {
4078     KMP_DEBUG_ASSERT( user_lock != NULL );
4079     KMP_DEBUG_ASSERT( lck != NULL );
4080 
4081     __kmp_acquire_lock( & __kmp_global_lock, gtid );
4082 
4083     lck->pool.next = __kmp_lock_pool;
4084     __kmp_lock_pool = lck;
4085     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4086         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4087         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4088         lck->pool.index = index;
4089     }
4090 
4091     __kmp_release_lock( & __kmp_global_lock, gtid );
4092 }
4093 
4094 kmp_user_lock_p
4095 __kmp_lookup_user_lock( void **user_lock, char const *func )
4096 {
4097     kmp_user_lock_p lck = NULL;
4098 
4099     if ( __kmp_env_consistency_check ) {
4100         if ( user_lock == NULL ) {
4101             KMP_FATAL( LockIsUninitialized, func );
4102         }
4103     }
4104 
4105     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4106         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4107         if ( __kmp_env_consistency_check ) {
4108             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4109                 KMP_FATAL( LockIsUninitialized, func );
4110             }
4111         }
4112         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4113         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4114         lck = __kmp_user_lock_table.table[index];
4115     }
4116     else {
4117         lck = *( (kmp_user_lock_p *)user_lock );
4118     }
4119 
4120     if ( __kmp_env_consistency_check ) {
4121         if ( lck == NULL ) {
4122             KMP_FATAL( LockIsUninitialized, func );
4123         }
4124     }
4125 
4126     return lck;
4127 }
4128 
4129 void
4130 __kmp_cleanup_user_locks( void )
4131 {
4132     //
4133     // Reset lock pool. Do not worry about lock in the pool -- we will free
4134     // them when iterating through lock table (it includes all the locks,
4135     // dead or alive).
4136     //
4137     __kmp_lock_pool = NULL;
4138 
4139 #define IS_CRITICAL(lck) \
4140         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4141         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4142 
4143     //
4144     // Loop through lock table, free all locks.
4145     //
4146     // Do not free item [0], it is reserved for lock tables list.
4147     //
4148     // FIXME - we are iterating through a list of (pointers to) objects of
4149     // type union kmp_user_lock, but we have no way of knowing whether the
4150     // base type is currently "pool" or whatever the global user lock type
4151     // is.
4152     //
4153     // We are relying on the fact that for all of the user lock types
4154     // (except "tas"), the first field in the lock struct is the "initialized"
4155     // field, which is set to the address of the lock object itself when
4156     // the lock is initialized.  When the union is of type "pool", the
4157     // first field is a pointer to the next object in the free list, which
4158     // will not be the same address as the object itself.
4159     //
4160     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4161     // will fail for "pool" objects on the free list.  This must happen as
4162     // the "location" field of real user locks overlaps the "index" field
4163     // of "pool" objects.
4164     //
4165     // It would be better to run through the free list, and remove all "pool"
4166     // objects from the lock table before executing this loop.  However,
4167     // "pool" objects do not always have their index field set (only on
4168     // lin_32e), and I don't want to search the lock table for the address
4169     // of every "pool" object on the free list.
4170     //
4171     while ( __kmp_user_lock_table.used > 1 ) {
4172         const ident *loc;
4173 
4174         //
4175         // reduce __kmp_user_lock_table.used before freeing the lock,
4176         // so that state of locks is consistent
4177         //
4178         kmp_user_lock_p lck = __kmp_user_lock_table.table[
4179           --__kmp_user_lock_table.used ];
4180 
4181         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4182           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4183             //
4184             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4185             // initialized AND it is NOT a critical section (user is not
4186             // responsible for destroying criticals) AND we know source
4187             // location to report.
4188             //
4189             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4190               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4191               ( loc->psource != NULL ) ) {
4192                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4193                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4194                 __kmp_str_loc_free( &str_loc);
4195             }
4196 
4197 #ifdef KMP_DEBUG
4198             if ( IS_CRITICAL( lck ) ) {
4199                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4200             }
4201             else {
4202                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4203             }
4204 #endif // KMP_DEBUG
4205 
4206             //
4207             // Cleanup internal lock dynamic resources
4208             // (for drdpa locks particularly).
4209             //
4210             __kmp_destroy_user_lock( lck );
4211         }
4212 
4213         //
4214         // Free the lock if block allocation of locks is not used.
4215         //
4216         if ( __kmp_lock_blocks == NULL ) {
4217             __kmp_free( lck );
4218         }
4219     }
4220 
4221 #undef IS_CRITICAL
4222 
4223     //
4224     // delete lock table(s).
4225     //
4226     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4227     __kmp_user_lock_table.table = NULL;
4228     __kmp_user_lock_table.allocated = 0;
4229 
4230     while ( table_ptr != NULL ) {
4231         //
4232         // In the first element we saved the pointer to the previous
4233         // (smaller) lock table.
4234         //
4235         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4236         __kmp_free( table_ptr );
4237         table_ptr = next;
4238     }
4239 
4240     //
4241     // Free buffers allocated for blocks of locks.
4242     //
4243     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4244     __kmp_lock_blocks = NULL;
4245 
4246     while ( block_ptr != NULL ) {
4247         kmp_block_of_locks_t *next = block_ptr->next_block;
4248         __kmp_free( block_ptr->locks );
4249         //
4250         // *block_ptr itself was allocated at the end of the locks vector.
4251         //
4252 	block_ptr = next;
4253     }
4254 
4255     TCW_4(__kmp_init_user_locks, FALSE);
4256 }
4257 
4258 #endif // KMP_USE_DYNAMIC_LOCK
4259