1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 
18 #include "kmp.h"
19 #include "kmp_itt.h"
20 #include "kmp_i18n.h"
21 #include "kmp_lock.h"
22 #include "kmp_io.h"
23 
24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
25 # include <unistd.h>
26 # include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on different
28 // Linux* OS distributions that either require that you include (or break when you try to include)
29 // <pci/types.h>.
30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31 // we just define the constants here and don't include <futex.h>
32 # ifndef FUTEX_WAIT
33 #  define FUTEX_WAIT    0
34 # endif
35 # ifndef FUTEX_WAKE
36 #  define FUTEX_WAKE    1
37 # endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void
44 __kmp_validate_locks( void )
45 {
46     int i;
47     kmp_uint32  x, y;
48 
49     /* Check to make sure unsigned arithmetic does wraps properly */
50     x = ~((kmp_uint32) 0) - 2;
51     y = x - 2;
52 
53     for (i = 0; i < 8; ++i, ++x, ++y) {
54         kmp_uint32 z = (x - y);
55         KMP_ASSERT( z == 2 );
56     }
57 
58     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59 }
60 
61 
62 /* ------------------------------------------------------------------------ */
63 /* test and set locks */
64 
65 //
66 // For the non-nested locks, we can only assume that the first 4 bytes were
67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70 //
71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73 //
74 
75 static kmp_int32
76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77 {
78     return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
79 }
80 
81 static inline bool
82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83 {
84     return lck->lk.depth_locked != -1;
85 }
86 
87 __forceinline static int
88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89 {
90     KMP_MB();
91 
92 #ifdef USE_LOCK_PROFILE
93     kmp_uint32 curr = TCR_4( lck->lk.poll );
94     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96     /* else __kmp_printf( "." );*/
97 #endif /* USE_LOCK_PROFILE */
98 
99     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
100       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
101         KMP_FSYNC_ACQUIRED(lck);
102         return KMP_LOCK_ACQUIRED_FIRST;
103     }
104 
105     kmp_uint32 spins;
106     KMP_FSYNC_PREPARE( lck );
107     KMP_INIT_YIELD( spins );
108     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109       __kmp_xproc ) ) {
110         KMP_YIELD( TRUE );
111     }
112     else {
113         KMP_YIELD_SPIN( spins );
114     }
115 
116     while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
117       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
118         //
119         // FIXME - use exponential backoff here
120         //
121         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122           __kmp_xproc ) ) {
123             KMP_YIELD( TRUE );
124         }
125         else {
126             KMP_YIELD_SPIN( spins );
127         }
128     }
129     KMP_FSYNC_ACQUIRED( lck );
130     return KMP_LOCK_ACQUIRED_FIRST;
131 }
132 
133 int
134 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
135 {
136     return __kmp_acquire_tas_lock_timed_template( lck, gtid );
137 }
138 
139 static int
140 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
141 {
142     char const * const func = "omp_set_lock";
143     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
144       && __kmp_is_tas_lock_nestable( lck ) ) {
145         KMP_FATAL( LockNestableUsedAsSimple, func );
146     }
147     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
148         KMP_FATAL( LockIsAlreadyOwned, func );
149     }
150     return __kmp_acquire_tas_lock( lck, gtid );
151 }
152 
153 int
154 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
155 {
156     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
157       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
158         KMP_FSYNC_ACQUIRED( lck );
159         return TRUE;
160     }
161     return FALSE;
162 }
163 
164 static int
165 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
166 {
167     char const * const func = "omp_test_lock";
168     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169       && __kmp_is_tas_lock_nestable( lck ) ) {
170         KMP_FATAL( LockNestableUsedAsSimple, func );
171     }
172     return __kmp_test_tas_lock( lck, gtid );
173 }
174 
175 int
176 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
177 {
178     KMP_MB();       /* Flush all pending memory write invalidates.  */
179 
180     KMP_FSYNC_RELEASING(lck);
181     KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
182     KMP_MB();       /* Flush all pending memory write invalidates.  */
183 
184     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185       __kmp_xproc ) );
186     return KMP_LOCK_RELEASED;
187 }
188 
189 static int
190 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
191 {
192     char const * const func = "omp_unset_lock";
193     KMP_MB();  /* in case another processor initialized lock */
194     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
195       && __kmp_is_tas_lock_nestable( lck ) ) {
196         KMP_FATAL( LockNestableUsedAsSimple, func );
197     }
198     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
199         KMP_FATAL( LockUnsettingFree, func );
200     }
201     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
202       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
203         KMP_FATAL( LockUnsettingSetByAnother, func );
204     }
205     return __kmp_release_tas_lock( lck, gtid );
206 }
207 
208 void
209 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
210 {
211     TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
212 }
213 
214 static void
215 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
216 {
217     __kmp_init_tas_lock( lck );
218 }
219 
220 void
221 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
222 {
223     lck->lk.poll = 0;
224 }
225 
226 static void
227 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
228 {
229     char const * const func = "omp_destroy_lock";
230     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
231       && __kmp_is_tas_lock_nestable( lck ) ) {
232         KMP_FATAL( LockNestableUsedAsSimple, func );
233     }
234     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
235         KMP_FATAL( LockStillOwned, func );
236     }
237     __kmp_destroy_tas_lock( lck );
238 }
239 
240 
241 //
242 // nested test and set locks
243 //
244 
245 int
246 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
247 {
248     KMP_DEBUG_ASSERT( gtid >= 0 );
249 
250     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
251         lck->lk.depth_locked += 1;
252         return KMP_LOCK_ACQUIRED_NEXT;
253     }
254     else {
255         __kmp_acquire_tas_lock_timed_template( lck, gtid );
256         lck->lk.depth_locked = 1;
257         return KMP_LOCK_ACQUIRED_FIRST;
258     }
259 }
260 
261 static int
262 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
263 {
264     char const * const func = "omp_set_nest_lock";
265     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
266         KMP_FATAL( LockSimpleUsedAsNestable, func );
267     }
268     return __kmp_acquire_nested_tas_lock( lck, gtid );
269 }
270 
271 int
272 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
273 {
274     int retval;
275 
276     KMP_DEBUG_ASSERT( gtid >= 0 );
277 
278     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
279         retval = ++lck->lk.depth_locked;
280     }
281     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
282         retval = 0;
283     }
284     else {
285         KMP_MB();
286         retval = lck->lk.depth_locked = 1;
287     }
288     return retval;
289 }
290 
291 static int
292 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293 {
294     char const * const func = "omp_test_nest_lock";
295     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
296         KMP_FATAL( LockSimpleUsedAsNestable, func );
297     }
298     return __kmp_test_nested_tas_lock( lck, gtid );
299 }
300 
301 int
302 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
303 {
304     KMP_DEBUG_ASSERT( gtid >= 0 );
305 
306     KMP_MB();
307     if ( --(lck->lk.depth_locked) == 0 ) {
308         __kmp_release_tas_lock( lck, gtid );
309         return KMP_LOCK_RELEASED;
310     }
311     return KMP_LOCK_STILL_HELD;
312 }
313 
314 static int
315 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
316 {
317     char const * const func = "omp_unset_nest_lock";
318     KMP_MB();  /* in case another processor initialized lock */
319     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
320         KMP_FATAL( LockSimpleUsedAsNestable, func );
321     }
322     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
323         KMP_FATAL( LockUnsettingFree, func );
324     }
325     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
326         KMP_FATAL( LockUnsettingSetByAnother, func );
327     }
328     return __kmp_release_nested_tas_lock( lck, gtid );
329 }
330 
331 void
332 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
333 {
334     __kmp_init_tas_lock( lck );
335     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
336 }
337 
338 static void
339 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
340 {
341     __kmp_init_nested_tas_lock( lck );
342 }
343 
344 void
345 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
346 {
347     __kmp_destroy_tas_lock( lck );
348     lck->lk.depth_locked = 0;
349 }
350 
351 static void
352 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
353 {
354     char const * const func = "omp_destroy_nest_lock";
355     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
356         KMP_FATAL( LockSimpleUsedAsNestable, func );
357     }
358     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
359         KMP_FATAL( LockStillOwned, func );
360     }
361     __kmp_destroy_nested_tas_lock( lck );
362 }
363 
364 
365 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
366 
367 /* ------------------------------------------------------------------------ */
368 /* futex locks */
369 
370 // futex locks are really just test and set locks, with a different method
371 // of handling contention.  They take the same amount of space as test and
372 // set locks, and are allocated the same way (i.e. use the area allocated by
373 // the compiler for non-nested locks / allocate nested locks on the heap).
374 
375 static kmp_int32
376 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
377 {
378     return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
379 }
380 
381 static inline bool
382 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
383 {
384     return lck->lk.depth_locked != -1;
385 }
386 
387 __forceinline static int
388 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
389 {
390     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
391 
392     KMP_MB();
393 
394 #ifdef USE_LOCK_PROFILE
395     kmp_uint32 curr = TCR_4( lck->lk.poll );
396     if ( ( curr != 0 ) && ( curr != gtid_code ) )
397         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
398     /* else __kmp_printf( "." );*/
399 #endif /* USE_LOCK_PROFILE */
400 
401     KMP_FSYNC_PREPARE( lck );
402     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
403       lck, lck->lk.poll, gtid ) );
404 
405     kmp_int32 poll_val;
406 
407     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
408              KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
409 
410         kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
411         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
412            lck, gtid, poll_val, cond ) );
413 
414         //
415         // NOTE: if you try to use the following condition for this branch
416         //
417         // if ( poll_val & 1 == 0 )
418         //
419         // Then the 12.0 compiler has a bug where the following block will
420         // always be skipped, regardless of the value of the LSB of poll_val.
421         //
422         if ( ! cond ) {
423             //
424             // Try to set the lsb in the poll to indicate to the owner
425             // thread that they need to wake this thread up.
426             //
427             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
428                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
429                   lck, lck->lk.poll, gtid ) );
430                 continue;
431             }
432             poll_val |= KMP_LOCK_BUSY(1, futex);
433 
434             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
435               lck, lck->lk.poll, gtid ) );
436         }
437 
438         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
439            lck, gtid, poll_val ) );
440 
441         kmp_int32 rc;
442         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
443           poll_val, NULL, NULL, 0 ) ) != 0 ) {
444             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
445                lck, gtid, poll_val, rc, errno ) );
446             continue;
447         }
448 
449         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
450            lck, gtid, poll_val ) );
451         //
452         // This thread has now done a successful futex wait call and was
453         // entered on the OS futex queue.  We must now perform a futex
454         // wake call when releasing the lock, as we have no idea how many
455         // other threads are in the queue.
456         //
457         gtid_code |= 1;
458     }
459 
460     KMP_FSYNC_ACQUIRED( lck );
461     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
462       lck, lck->lk.poll, gtid ) );
463     return KMP_LOCK_ACQUIRED_FIRST;
464 }
465 
466 int
467 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
468 {
469    return __kmp_acquire_futex_lock_timed_template( lck, gtid );
470 }
471 
472 static int
473 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
474 {
475     char const * const func = "omp_set_lock";
476     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
477       && __kmp_is_futex_lock_nestable( lck ) ) {
478         KMP_FATAL( LockNestableUsedAsSimple, func );
479     }
480     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
481         KMP_FATAL( LockIsAlreadyOwned, func );
482     }
483     return __kmp_acquire_futex_lock( lck, gtid );
484 }
485 
486 int
487 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
488 {
489     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
490         KMP_FSYNC_ACQUIRED( lck );
491         return TRUE;
492     }
493     return FALSE;
494 }
495 
496 static int
497 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
498 {
499     char const * const func = "omp_test_lock";
500     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
501       && __kmp_is_futex_lock_nestable( lck ) ) {
502         KMP_FATAL( LockNestableUsedAsSimple, func );
503     }
504     return __kmp_test_futex_lock( lck, gtid );
505 }
506 
507 int
508 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
509 {
510     KMP_MB();       /* Flush all pending memory write invalidates.  */
511 
512     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
513       lck, lck->lk.poll, gtid ) );
514 
515     KMP_FSYNC_RELEASING(lck);
516 
517     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
518 
519     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
520        lck, gtid, poll_val ) );
521 
522     if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
523         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
524            lck, gtid ) );
525         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
526     }
527 
528     KMP_MB();       /* Flush all pending memory write invalidates.  */
529 
530     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
531       lck, lck->lk.poll, gtid ) );
532 
533     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
534       __kmp_xproc ) );
535     return KMP_LOCK_RELEASED;
536 }
537 
538 static int
539 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
540 {
541     char const * const func = "omp_unset_lock";
542     KMP_MB();  /* in case another processor initialized lock */
543     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
544       && __kmp_is_futex_lock_nestable( lck ) ) {
545         KMP_FATAL( LockNestableUsedAsSimple, func );
546     }
547     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
548         KMP_FATAL( LockUnsettingFree, func );
549     }
550     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
551       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
552         KMP_FATAL( LockUnsettingSetByAnother, func );
553     }
554     return __kmp_release_futex_lock( lck, gtid );
555 }
556 
557 void
558 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
559 {
560     TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
561 }
562 
563 static void
564 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
565 {
566     __kmp_init_futex_lock( lck );
567 }
568 
569 void
570 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
571 {
572     lck->lk.poll = 0;
573 }
574 
575 static void
576 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
577 {
578     char const * const func = "omp_destroy_lock";
579     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
580       && __kmp_is_futex_lock_nestable( lck ) ) {
581         KMP_FATAL( LockNestableUsedAsSimple, func );
582     }
583     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
584         KMP_FATAL( LockStillOwned, func );
585     }
586     __kmp_destroy_futex_lock( lck );
587 }
588 
589 
590 //
591 // nested futex locks
592 //
593 
594 int
595 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
596 {
597     KMP_DEBUG_ASSERT( gtid >= 0 );
598 
599     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
600         lck->lk.depth_locked += 1;
601         return KMP_LOCK_ACQUIRED_NEXT;
602     }
603     else {
604         __kmp_acquire_futex_lock_timed_template( lck, gtid );
605         lck->lk.depth_locked = 1;
606         return KMP_LOCK_ACQUIRED_FIRST;
607     }
608 }
609 
610 static int
611 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
612 {
613     char const * const func = "omp_set_nest_lock";
614     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
615         KMP_FATAL( LockSimpleUsedAsNestable, func );
616     }
617     return __kmp_acquire_nested_futex_lock( lck, gtid );
618 }
619 
620 int
621 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
622 {
623     int retval;
624 
625     KMP_DEBUG_ASSERT( gtid >= 0 );
626 
627     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
628         retval = ++lck->lk.depth_locked;
629     }
630     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
631         retval = 0;
632     }
633     else {
634         KMP_MB();
635         retval = lck->lk.depth_locked = 1;
636     }
637     return retval;
638 }
639 
640 static int
641 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
642 {
643     char const * const func = "omp_test_nest_lock";
644     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
645         KMP_FATAL( LockSimpleUsedAsNestable, func );
646     }
647     return __kmp_test_nested_futex_lock( lck, gtid );
648 }
649 
650 int
651 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
652 {
653     KMP_DEBUG_ASSERT( gtid >= 0 );
654 
655     KMP_MB();
656     if ( --(lck->lk.depth_locked) == 0 ) {
657         __kmp_release_futex_lock( lck, gtid );
658         return KMP_LOCK_RELEASED;
659     }
660     return KMP_LOCK_STILL_HELD;
661 }
662 
663 static int
664 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
665 {
666     char const * const func = "omp_unset_nest_lock";
667     KMP_MB();  /* in case another processor initialized lock */
668     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
669         KMP_FATAL( LockSimpleUsedAsNestable, func );
670     }
671     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
672         KMP_FATAL( LockUnsettingFree, func );
673     }
674     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
675         KMP_FATAL( LockUnsettingSetByAnother, func );
676     }
677     return __kmp_release_nested_futex_lock( lck, gtid );
678 }
679 
680 void
681 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
682 {
683     __kmp_init_futex_lock( lck );
684     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
685 }
686 
687 static void
688 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
689 {
690     __kmp_init_nested_futex_lock( lck );
691 }
692 
693 void
694 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
695 {
696     __kmp_destroy_futex_lock( lck );
697     lck->lk.depth_locked = 0;
698 }
699 
700 static void
701 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
702 {
703     char const * const func = "omp_destroy_nest_lock";
704     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
705         KMP_FATAL( LockSimpleUsedAsNestable, func );
706     }
707     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
708         KMP_FATAL( LockStillOwned, func );
709     }
710     __kmp_destroy_nested_futex_lock( lck );
711 }
712 
713 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
714 
715 
716 /* ------------------------------------------------------------------------ */
717 /* ticket (bakery) locks */
718 
719 static kmp_int32
720 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
721 {
722     return TCR_4( lck->lk.owner_id ) - 1;
723 }
724 
725 static inline bool
726 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
727 {
728     return lck->lk.depth_locked != -1;
729 }
730 
731 static kmp_uint32
732 __kmp_bakery_check(kmp_uint32 value, kmp_uint32 checker)
733 {
734     register kmp_uint32 pause;
735 
736     if (value == checker) {
737         return TRUE;
738     }
739     for (pause = checker - value; pause != 0; --pause);
740     return FALSE;
741 }
742 
743 __forceinline static int
744 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
745 {
746     kmp_uint32 my_ticket;
747     KMP_MB();
748 
749     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
750 
751 #ifdef USE_LOCK_PROFILE
752     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
753         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
754     /* else __kmp_printf( "." );*/
755 #endif /* USE_LOCK_PROFILE */
756 
757     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
758         KMP_FSYNC_ACQUIRED(lck);
759         return KMP_LOCK_ACQUIRED_FIRST;
760     }
761     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
762     KMP_FSYNC_ACQUIRED(lck);
763     return KMP_LOCK_ACQUIRED_FIRST;
764 }
765 
766 int
767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768 {
769     return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770 }
771 
772 static int
773 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
774 {
775     char const * const func = "omp_set_lock";
776     if ( lck->lk.initialized != lck ) {
777         KMP_FATAL( LockIsUninitialized, func );
778     }
779     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
780         KMP_FATAL( LockNestableUsedAsSimple, func );
781     }
782     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
783         KMP_FATAL( LockIsAlreadyOwned, func );
784     }
785 
786     __kmp_acquire_ticket_lock( lck, gtid );
787 
788     lck->lk.owner_id = gtid + 1;
789     return KMP_LOCK_ACQUIRED_FIRST;
790 }
791 
792 int
793 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
794 {
795     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
796     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
797         kmp_uint32 next_ticket = my_ticket + 1;
798         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
799           my_ticket, next_ticket ) ) {
800             KMP_FSYNC_ACQUIRED( lck );
801             return TRUE;
802         }
803     }
804     return FALSE;
805 }
806 
807 static int
808 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
809 {
810     char const * const func = "omp_test_lock";
811     if ( lck->lk.initialized != lck ) {
812         KMP_FATAL( LockIsUninitialized, func );
813     }
814     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
815         KMP_FATAL( LockNestableUsedAsSimple, func );
816     }
817 
818     int retval = __kmp_test_ticket_lock( lck, gtid );
819 
820     if ( retval ) {
821         lck->lk.owner_id = gtid + 1;
822     }
823     return retval;
824 }
825 
826 int
827 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
828 {
829     kmp_uint32  distance;
830 
831     KMP_MB();       /* Flush all pending memory write invalidates.  */
832 
833     KMP_FSYNC_RELEASING(lck);
834     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
835 
836     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
837 
838     KMP_MB();       /* Flush all pending memory write invalidates.  */
839 
840     KMP_YIELD( distance
841       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
842     return KMP_LOCK_RELEASED;
843 }
844 
845 static int
846 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
847 {
848     char const * const func = "omp_unset_lock";
849     KMP_MB();  /* in case another processor initialized lock */
850     if ( lck->lk.initialized != lck ) {
851         KMP_FATAL( LockIsUninitialized, func );
852     }
853     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
854         KMP_FATAL( LockNestableUsedAsSimple, func );
855     }
856     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
857         KMP_FATAL( LockUnsettingFree, func );
858     }
859     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
860       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
861         KMP_FATAL( LockUnsettingSetByAnother, func );
862     }
863     lck->lk.owner_id = 0;
864     return __kmp_release_ticket_lock( lck, gtid );
865 }
866 
867 void
868 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
869 {
870     lck->lk.location = NULL;
871     TCW_4( lck->lk.next_ticket, 0 );
872     TCW_4( lck->lk.now_serving, 0 );
873     lck->lk.owner_id = 0;      // no thread owns the lock.
874     lck->lk.depth_locked = -1; // -1 => not a nested lock.
875     lck->lk.initialized = (kmp_ticket_lock *)lck;
876 }
877 
878 static void
879 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
880 {
881     __kmp_init_ticket_lock( lck );
882 }
883 
884 void
885 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
886 {
887     lck->lk.initialized = NULL;
888     lck->lk.location    = NULL;
889     lck->lk.next_ticket = 0;
890     lck->lk.now_serving = 0;
891     lck->lk.owner_id = 0;
892     lck->lk.depth_locked = -1;
893 }
894 
895 static void
896 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
897 {
898     char const * const func = "omp_destroy_lock";
899     if ( lck->lk.initialized != lck ) {
900         KMP_FATAL( LockIsUninitialized, func );
901     }
902     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903         KMP_FATAL( LockNestableUsedAsSimple, func );
904     }
905     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906         KMP_FATAL( LockStillOwned, func );
907     }
908     __kmp_destroy_ticket_lock( lck );
909 }
910 
911 
912 //
913 // nested ticket locks
914 //
915 
916 int
917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918 {
919     KMP_DEBUG_ASSERT( gtid >= 0 );
920 
921     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922         lck->lk.depth_locked += 1;
923         return KMP_LOCK_ACQUIRED_NEXT;
924     }
925     else {
926         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927         KMP_MB();
928         lck->lk.depth_locked = 1;
929         KMP_MB();
930         lck->lk.owner_id = gtid + 1;
931         return KMP_LOCK_ACQUIRED_FIRST;
932     }
933 }
934 
935 static int
936 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
937 {
938     char const * const func = "omp_set_nest_lock";
939     if ( lck->lk.initialized != lck ) {
940         KMP_FATAL( LockIsUninitialized, func );
941     }
942     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
943         KMP_FATAL( LockSimpleUsedAsNestable, func );
944     }
945     return __kmp_acquire_nested_ticket_lock( lck, gtid );
946 }
947 
948 int
949 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
950 {
951     int retval;
952 
953     KMP_DEBUG_ASSERT( gtid >= 0 );
954 
955     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
956         retval = ++lck->lk.depth_locked;
957     }
958     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
959         retval = 0;
960     }
961     else {
962         KMP_MB();
963         retval = lck->lk.depth_locked = 1;
964         KMP_MB();
965         lck->lk.owner_id = gtid + 1;
966     }
967     return retval;
968 }
969 
970 static int
971 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
972   kmp_int32 gtid )
973 {
974     char const * const func = "omp_test_nest_lock";
975     if ( lck->lk.initialized != lck ) {
976         KMP_FATAL( LockIsUninitialized, func );
977     }
978     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
979         KMP_FATAL( LockSimpleUsedAsNestable, func );
980     }
981     return __kmp_test_nested_ticket_lock( lck, gtid );
982 }
983 
984 int
985 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
986 {
987     KMP_DEBUG_ASSERT( gtid >= 0 );
988 
989     KMP_MB();
990     if ( --(lck->lk.depth_locked) == 0 ) {
991         KMP_MB();
992         lck->lk.owner_id = 0;
993         __kmp_release_ticket_lock( lck, gtid );
994         return KMP_LOCK_RELEASED;
995     }
996     return KMP_LOCK_STILL_HELD;
997 }
998 
999 static int
1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1001 {
1002     char const * const func = "omp_unset_nest_lock";
1003     KMP_MB();  /* in case another processor initialized lock */
1004     if ( lck->lk.initialized != lck ) {
1005         KMP_FATAL( LockIsUninitialized, func );
1006     }
1007     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1008         KMP_FATAL( LockSimpleUsedAsNestable, func );
1009     }
1010     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1011         KMP_FATAL( LockUnsettingFree, func );
1012     }
1013     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1014         KMP_FATAL( LockUnsettingSetByAnother, func );
1015     }
1016     return __kmp_release_nested_ticket_lock( lck, gtid );
1017 }
1018 
1019 void
1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1021 {
1022     __kmp_init_ticket_lock( lck );
1023     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1024 }
1025 
1026 static void
1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1028 {
1029     __kmp_init_nested_ticket_lock( lck );
1030 }
1031 
1032 void
1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1034 {
1035     __kmp_destroy_ticket_lock( lck );
1036     lck->lk.depth_locked = 0;
1037 }
1038 
1039 static void
1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1041 {
1042     char const * const func = "omp_destroy_nest_lock";
1043     if ( lck->lk.initialized != lck ) {
1044         KMP_FATAL( LockIsUninitialized, func );
1045     }
1046     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1047         KMP_FATAL( LockSimpleUsedAsNestable, func );
1048     }
1049     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1050         KMP_FATAL( LockStillOwned, func );
1051     }
1052     __kmp_destroy_nested_ticket_lock( lck );
1053 }
1054 
1055 
1056 //
1057 // access functions to fields which don't exist for all lock kinds.
1058 //
1059 
1060 static int
1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1062 {
1063     return lck == lck->lk.initialized;
1064 }
1065 
1066 static const ident_t *
1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1068 {
1069     return lck->lk.location;
1070 }
1071 
1072 static void
1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1074 {
1075     lck->lk.location = loc;
1076 }
1077 
1078 static kmp_lock_flags_t
1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1080 {
1081     return lck->lk.flags;
1082 }
1083 
1084 static void
1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1086 {
1087     lck->lk.flags = flags;
1088 }
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* queuing locks */
1092 
1093 /*
1094  * First the states
1095  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1096  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1097  *                h, h  means lock is held or about to transition, 1 element on queue
1098  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1099  *
1100  * Now the transitions
1101  *    Acquire(0,0)  = -1 ,0
1102  *    Release(0,0)  = Error
1103  *    Acquire(-1,0) =  h ,h    h > 0
1104  *    Release(-1,0) =  0 ,0
1105  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1106  *    Release(h,h)  = -1 ,0    h > 0
1107  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1108  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1109  *
1110  * And pictorially
1111  *
1112  *
1113  *          +-----+
1114  *          | 0, 0|------- release -------> Error
1115  *          +-----+
1116  *            |  ^
1117  *     acquire|  |release
1118  *            |  |
1119  *            |  |
1120  *            v  |
1121  *          +-----+
1122  *          |-1, 0|
1123  *          +-----+
1124  *            |  ^
1125  *     acquire|  |release
1126  *            |  |
1127  *            |  |
1128  *            v  |
1129  *          +-----+
1130  *          | h, h|
1131  *          +-----+
1132  *            |  ^
1133  *     acquire|  |release
1134  *            |  |
1135  *            |  |
1136  *            v  |
1137  *          +-----+
1138  *          | h, t|----- acquire, release loopback ---+
1139  *          +-----+                                   |
1140  *               ^                                    |
1141  *               |                                    |
1142  *               +------------------------------------+
1143  *
1144  */
1145 
1146 #ifdef DEBUG_QUEUING_LOCKS
1147 
1148 /* Stuff for circular trace buffer */
1149 #define TRACE_BUF_ELE	1024
1150 static char traces[TRACE_BUF_ELE][128] = { 0 }
1151 static int tc = 0;
1152 #define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y );
1153 #define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1154 #define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1155 
1156 static void
1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1158   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1159 {
1160     kmp_int32 t, i;
1161 
1162     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1163 
1164     i = tc % TRACE_BUF_ELE;
1165     __kmp_printf_no_lock( "%s\n", traces[i] );
1166     i = (i+1) % TRACE_BUF_ELE;
1167     while ( i != (tc % TRACE_BUF_ELE) ) {
1168         __kmp_printf_no_lock( "%s", traces[i] );
1169         i = (i+1) % TRACE_BUF_ELE;
1170     }
1171     __kmp_printf_no_lock( "\n" );
1172 
1173     __kmp_printf_no_lock(
1174              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1175              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1176              head_id, tail_id );
1177 
1178     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1179 
1180     if ( lck->lk.head_id >= 1 ) {
1181         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1182         while (t > 0) {
1183             __kmp_printf_no_lock( "-> %d ", t );
1184             t = __kmp_threads[t-1]->th.th_next_waiting;
1185         }
1186     }
1187     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1188     __kmp_printf_no_lock( "\n\n" );
1189 }
1190 
1191 #endif /* DEBUG_QUEUING_LOCKS */
1192 
1193 static kmp_int32
1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1195 {
1196     return TCR_4( lck->lk.owner_id ) - 1;
1197 }
1198 
1199 static inline bool
1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1201 {
1202     return lck->lk.depth_locked != -1;
1203 }
1204 
1205 /* Acquire a lock using a the queuing lock implementation */
1206 template <bool takeTime>
1207 /* [TLW] The unused template above is left behind because of what BEB believes is a
1208    potential compiler problem with __forceinline. */
1209 __forceinline static int
1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1211   kmp_int32 gtid )
1212 {
1213     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1214     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1215     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1216     volatile kmp_uint32 *spin_here_p;
1217     kmp_int32 need_mf = 1;
1218 
1219 #if OMPT_SUPPORT
1220     ompt_state_t prev_state = ompt_state_undefined;
1221 #endif
1222 
1223     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1224 
1225     KMP_FSYNC_PREPARE( lck );
1226     KMP_DEBUG_ASSERT( this_thr != NULL );
1227     spin_here_p = & this_thr->th.th_spin_here;
1228 
1229 #ifdef DEBUG_QUEUING_LOCKS
1230     TRACE_LOCK( gtid+1, "acq ent" );
1231     if ( *spin_here_p )
1232         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1233     if ( this_thr->th.th_next_waiting != 0 )
1234         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1235 #endif
1236     KMP_DEBUG_ASSERT( !*spin_here_p );
1237     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1238 
1239 
1240     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1241        that may follow, not just in execution order, but also in visibility order.  This way,
1242        when a releasing thread observes the changes to the queue by this thread, it can
1243        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1244        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1245        to FALSE before this thread sets it to TRUE, this thread will hang.
1246     */
1247     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1248 
1249     while( 1 ) {
1250         kmp_int32 enqueued;
1251         kmp_int32 head;
1252         kmp_int32 tail;
1253 
1254         head = *head_id_p;
1255 
1256         switch ( head ) {
1257 
1258             case -1:
1259             {
1260 #ifdef DEBUG_QUEUING_LOCKS
1261                 tail = *tail_id_p;
1262                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1263 #endif
1264                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1265                            this assignment prevents us from entering the if ( t > 0 )
1266                            condition in the enqueued case below, which is not necessary for
1267                            this state transition */
1268 
1269                 need_mf = 0;
1270                 /* try (-1,0)->(tid,tid) */
1271                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1272                   KMP_PACK_64( -1, 0 ),
1273                   KMP_PACK_64( gtid+1, gtid+1 ) );
1274 #ifdef DEBUG_QUEUING_LOCKS
1275                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1276 #endif
1277             }
1278             break;
1279 
1280             default:
1281             {
1282                 tail = *tail_id_p;
1283                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287 #endif
1288 
1289                 if ( tail == 0 ) {
1290                     enqueued = FALSE;
1291                 }
1292                 else {
1293                     need_mf = 0;
1294                     /* try (h,t) or (h,h)->(h,tid) */
1295                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1296 
1297 #ifdef DEBUG_QUEUING_LOCKS
1298                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1299 #endif
1300                 }
1301             }
1302             break;
1303 
1304             case 0: /* empty queue */
1305             {
1306                 kmp_int32 grabbed_lock;
1307 
1308 #ifdef DEBUG_QUEUING_LOCKS
1309                 tail = *tail_id_p;
1310                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311 #endif
1312                 /* try (0,0)->(-1,0) */
1313 
1314                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1315                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1316 
1317                 if ( grabbed_lock ) {
1318 
1319                     *spin_here_p = FALSE;
1320 
1321                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1322                               lck, gtid ));
1323 #ifdef DEBUG_QUEUING_LOCKS
1324                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1325 #endif
1326 
1327 #if OMPT_SUPPORT
1328                     if (ompt_enabled && prev_state != ompt_state_undefined) {
1329                         /* change the state before clearing wait_id */
1330                         this_thr->th.ompt_thread_info.state = prev_state;
1331                         this_thr->th.ompt_thread_info.wait_id = 0;
1332                     }
1333 #endif
1334 
1335                     KMP_FSYNC_ACQUIRED( lck );
1336                     return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1337                 }
1338                 enqueued = FALSE;
1339             }
1340             break;
1341         }
1342 
1343 #if OMPT_SUPPORT
1344         if (ompt_enabled && prev_state == ompt_state_undefined) {
1345             /* this thread will spin; set wait_id before entering wait state */
1346             prev_state = this_thr->th.ompt_thread_info.state;
1347             this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1348             this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1349         }
1350 #endif
1351 
1352         if ( enqueued ) {
1353             if ( tail > 0 ) {
1354                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1355                 KMP_ASSERT( tail_thr != NULL );
1356                 tail_thr->th.th_next_waiting = gtid+1;
1357                 /* corresponding wait for this write in release code */
1358             }
1359             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1360 
1361 
1362             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1363              *       throughput only here.
1364              */
1365             KMP_MB();
1366             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1367 
1368 #ifdef DEBUG_QUEUING_LOCKS
1369             TRACE_LOCK( gtid+1, "acq spin" );
1370 
1371             if ( this_thr->th.th_next_waiting != 0 )
1372                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1373 #endif
1374             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1375             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1376                       lck, gtid ));
1377 
1378 #ifdef DEBUG_QUEUING_LOCKS
1379             TRACE_LOCK( gtid+1, "acq exit 2" );
1380 #endif
1381 
1382 #if OMPT_SUPPORT
1383             /* change the state before clearing wait_id */
1384             this_thr->th.ompt_thread_info.state = prev_state;
1385             this_thr->th.ompt_thread_info.wait_id = 0;
1386 #endif
1387 
1388             /* got lock, we were dequeued by the thread that released lock */
1389             return KMP_LOCK_ACQUIRED_FIRST;
1390         }
1391 
1392         /* Yield if number of threads > number of logical processors */
1393         /* ToDo: Not sure why this should only be in oversubscription case,
1394            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1395         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1396           __kmp_xproc ) );
1397 #ifdef DEBUG_QUEUING_LOCKS
1398         TRACE_LOCK( gtid+1, "acq retry" );
1399 #endif
1400 
1401     }
1402     KMP_ASSERT2( 0, "should not get here" );
1403     return KMP_LOCK_ACQUIRED_FIRST;
1404 }
1405 
1406 int
1407 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1408 {
1409     KMP_DEBUG_ASSERT( gtid >= 0 );
1410 
1411     return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1412 }
1413 
1414 static int
1415 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1416   kmp_int32 gtid )
1417 {
1418     char const * const func = "omp_set_lock";
1419     if ( lck->lk.initialized != lck ) {
1420         KMP_FATAL( LockIsUninitialized, func );
1421     }
1422     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1423         KMP_FATAL( LockNestableUsedAsSimple, func );
1424     }
1425     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1426         KMP_FATAL( LockIsAlreadyOwned, func );
1427     }
1428 
1429     __kmp_acquire_queuing_lock( lck, gtid );
1430 
1431     lck->lk.owner_id = gtid + 1;
1432     return KMP_LOCK_ACQUIRED_FIRST;
1433 }
1434 
1435 int
1436 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1437 {
1438     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1439     kmp_int32 head;
1440 #ifdef KMP_DEBUG
1441     kmp_info_t *this_thr;
1442 #endif
1443 
1444     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1445     KMP_DEBUG_ASSERT( gtid >= 0 );
1446 #ifdef KMP_DEBUG
1447     this_thr = __kmp_thread_from_gtid( gtid );
1448     KMP_DEBUG_ASSERT( this_thr != NULL );
1449     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1450 #endif
1451 
1452     head = *head_id_p;
1453 
1454     if ( head == 0 ) { /* nobody on queue, nobody holding */
1455 
1456         /* try (0,0)->(-1,0) */
1457 
1458         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1459             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1460             KMP_FSYNC_ACQUIRED(lck);
1461             return TRUE;
1462         }
1463     }
1464 
1465     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1466     return FALSE;
1467 }
1468 
1469 static int
1470 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1471 {
1472     char const * const func = "omp_test_lock";
1473     if ( lck->lk.initialized != lck ) {
1474         KMP_FATAL( LockIsUninitialized, func );
1475     }
1476     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1477         KMP_FATAL( LockNestableUsedAsSimple, func );
1478     }
1479 
1480     int retval = __kmp_test_queuing_lock( lck, gtid );
1481 
1482     if ( retval ) {
1483         lck->lk.owner_id = gtid + 1;
1484     }
1485     return retval;
1486 }
1487 
1488 int
1489 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1490 {
1491     register kmp_info_t *this_thr;
1492     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1493     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1494 
1495     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1496     KMP_DEBUG_ASSERT( gtid >= 0 );
1497     this_thr    = __kmp_thread_from_gtid( gtid );
1498     KMP_DEBUG_ASSERT( this_thr != NULL );
1499 #ifdef DEBUG_QUEUING_LOCKS
1500     TRACE_LOCK( gtid+1, "rel ent" );
1501 
1502     if ( this_thr->th.th_spin_here )
1503         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1504     if ( this_thr->th.th_next_waiting != 0 )
1505         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1506 #endif
1507     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1508     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1509 
1510     KMP_FSYNC_RELEASING(lck);
1511 
1512     while( 1 ) {
1513         kmp_int32 dequeued;
1514         kmp_int32 head;
1515         kmp_int32 tail;
1516 
1517         head = *head_id_p;
1518 
1519 #ifdef DEBUG_QUEUING_LOCKS
1520         tail = *tail_id_p;
1521         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1522         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1523 #endif
1524         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1525 
1526         if ( head == -1 ) { /* nobody on queue */
1527 
1528             /* try (-1,0)->(0,0) */
1529             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1530                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1531                           lck, gtid ));
1532 #ifdef DEBUG_QUEUING_LOCKS
1533                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1534 #endif
1535 
1536 #if OMPT_SUPPORT
1537                 /* nothing to do - no other thread is trying to shift blame */
1538 #endif
1539 
1540                 return KMP_LOCK_RELEASED;
1541             }
1542             dequeued = FALSE;
1543 
1544         }
1545         else {
1546 
1547             tail = *tail_id_p;
1548             if ( head == tail ) {  /* only one thread on the queue */
1549 
1550 #ifdef DEBUG_QUEUING_LOCKS
1551                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1552 #endif
1553                 KMP_DEBUG_ASSERT( head > 0 );
1554 
1555                 /* try (h,h)->(-1,0) */
1556                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1557                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1558 #ifdef DEBUG_QUEUING_LOCKS
1559                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1560 #endif
1561 
1562             }
1563             else {
1564                 volatile kmp_int32 *waiting_id_p;
1565                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1566                 KMP_DEBUG_ASSERT( head_thr != NULL );
1567                 waiting_id_p = & head_thr->th.th_next_waiting;
1568 
1569                 /* Does this require synchronous reads? */
1570 #ifdef DEBUG_QUEUING_LOCKS
1571                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1572 #endif
1573                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1574 
1575                 /* try (h,t)->(h',t) or (t,t) */
1576 
1577                 KMP_MB();
1578                 /* make sure enqueuing thread has time to update next waiting thread field */
1579                 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1580 #ifdef DEBUG_QUEUING_LOCKS
1581                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1582 #endif
1583                 dequeued = TRUE;
1584             }
1585         }
1586 
1587         if ( dequeued ) {
1588             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1589             KMP_DEBUG_ASSERT( head_thr != NULL );
1590 
1591             /* Does this require synchronous reads? */
1592 #ifdef DEBUG_QUEUING_LOCKS
1593             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1594 #endif
1595             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1596 
1597             /* For clean code only.
1598              * Thread not released until next statement prevents race with acquire code.
1599              */
1600             head_thr->th.th_next_waiting = 0;
1601 #ifdef DEBUG_QUEUING_LOCKS
1602             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1603 #endif
1604 
1605             KMP_MB();
1606             /* reset spin value */
1607             head_thr->th.th_spin_here = FALSE;
1608 
1609             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1610                       lck, gtid ));
1611 #ifdef DEBUG_QUEUING_LOCKS
1612             TRACE_LOCK( gtid+1, "rel exit 2" );
1613 #endif
1614             return KMP_LOCK_RELEASED;
1615         }
1616         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1617 
1618 #ifdef DEBUG_QUEUING_LOCKS
1619         TRACE_LOCK( gtid+1, "rel retry" );
1620 #endif
1621 
1622     } /* while */
1623     KMP_ASSERT2( 0, "should not get here" );
1624     return KMP_LOCK_RELEASED;
1625 }
1626 
1627 static int
1628 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1629   kmp_int32 gtid )
1630 {
1631     char const * const func = "omp_unset_lock";
1632     KMP_MB();  /* in case another processor initialized lock */
1633     if ( lck->lk.initialized != lck ) {
1634         KMP_FATAL( LockIsUninitialized, func );
1635     }
1636     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1637         KMP_FATAL( LockNestableUsedAsSimple, func );
1638     }
1639     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1640         KMP_FATAL( LockUnsettingFree, func );
1641     }
1642     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1643         KMP_FATAL( LockUnsettingSetByAnother, func );
1644     }
1645     lck->lk.owner_id = 0;
1646     return __kmp_release_queuing_lock( lck, gtid );
1647 }
1648 
1649 void
1650 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1651 {
1652     lck->lk.location = NULL;
1653     lck->lk.head_id = 0;
1654     lck->lk.tail_id = 0;
1655     lck->lk.next_ticket = 0;
1656     lck->lk.now_serving = 0;
1657     lck->lk.owner_id = 0;      // no thread owns the lock.
1658     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1659     lck->lk.initialized = lck;
1660 
1661     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1662 }
1663 
1664 static void
1665 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1666 {
1667     __kmp_init_queuing_lock( lck );
1668 }
1669 
1670 void
1671 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1672 {
1673     lck->lk.initialized = NULL;
1674     lck->lk.location = NULL;
1675     lck->lk.head_id = 0;
1676     lck->lk.tail_id = 0;
1677     lck->lk.next_ticket = 0;
1678     lck->lk.now_serving = 0;
1679     lck->lk.owner_id = 0;
1680     lck->lk.depth_locked = -1;
1681 }
1682 
1683 static void
1684 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1685 {
1686     char const * const func = "omp_destroy_lock";
1687     if ( lck->lk.initialized != lck ) {
1688         KMP_FATAL( LockIsUninitialized, func );
1689     }
1690     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1691         KMP_FATAL( LockNestableUsedAsSimple, func );
1692     }
1693     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1694         KMP_FATAL( LockStillOwned, func );
1695     }
1696     __kmp_destroy_queuing_lock( lck );
1697 }
1698 
1699 
1700 //
1701 // nested queuing locks
1702 //
1703 
1704 int
1705 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1706 {
1707     KMP_DEBUG_ASSERT( gtid >= 0 );
1708 
1709     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1710         lck->lk.depth_locked += 1;
1711         return KMP_LOCK_ACQUIRED_NEXT;
1712     }
1713     else {
1714         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1715         KMP_MB();
1716         lck->lk.depth_locked = 1;
1717         KMP_MB();
1718         lck->lk.owner_id = gtid + 1;
1719         return KMP_LOCK_ACQUIRED_FIRST;
1720     }
1721 }
1722 
1723 static int
1724 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1725 {
1726     char const * const func = "omp_set_nest_lock";
1727     if ( lck->lk.initialized != lck ) {
1728         KMP_FATAL( LockIsUninitialized, func );
1729     }
1730     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1731         KMP_FATAL( LockSimpleUsedAsNestable, func );
1732     }
1733     return __kmp_acquire_nested_queuing_lock( lck, gtid );
1734 }
1735 
1736 int
1737 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1738 {
1739     int retval;
1740 
1741     KMP_DEBUG_ASSERT( gtid >= 0 );
1742 
1743     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1744         retval = ++lck->lk.depth_locked;
1745     }
1746     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1747         retval = 0;
1748     }
1749     else {
1750         KMP_MB();
1751         retval = lck->lk.depth_locked = 1;
1752         KMP_MB();
1753         lck->lk.owner_id = gtid + 1;
1754     }
1755     return retval;
1756 }
1757 
1758 static int
1759 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1760   kmp_int32 gtid )
1761 {
1762     char const * const func = "omp_test_nest_lock";
1763     if ( lck->lk.initialized != lck ) {
1764         KMP_FATAL( LockIsUninitialized, func );
1765     }
1766     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1767         KMP_FATAL( LockSimpleUsedAsNestable, func );
1768     }
1769     return __kmp_test_nested_queuing_lock( lck, gtid );
1770 }
1771 
1772 int
1773 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1774 {
1775     KMP_DEBUG_ASSERT( gtid >= 0 );
1776 
1777     KMP_MB();
1778     if ( --(lck->lk.depth_locked) == 0 ) {
1779         KMP_MB();
1780         lck->lk.owner_id = 0;
1781         __kmp_release_queuing_lock( lck, gtid );
1782         return KMP_LOCK_RELEASED;
1783     }
1784     return KMP_LOCK_STILL_HELD;
1785 }
1786 
1787 static int
1788 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1789 {
1790     char const * const func = "omp_unset_nest_lock";
1791     KMP_MB();  /* in case another processor initialized lock */
1792     if ( lck->lk.initialized != lck ) {
1793         KMP_FATAL( LockIsUninitialized, func );
1794     }
1795     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796         KMP_FATAL( LockSimpleUsedAsNestable, func );
1797     }
1798     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1799         KMP_FATAL( LockUnsettingFree, func );
1800     }
1801     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1802         KMP_FATAL( LockUnsettingSetByAnother, func );
1803     }
1804     return __kmp_release_nested_queuing_lock( lck, gtid );
1805 }
1806 
1807 void
1808 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1809 {
1810     __kmp_init_queuing_lock( lck );
1811     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1812 }
1813 
1814 static void
1815 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1816 {
1817     __kmp_init_nested_queuing_lock( lck );
1818 }
1819 
1820 void
1821 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1822 {
1823     __kmp_destroy_queuing_lock( lck );
1824     lck->lk.depth_locked = 0;
1825 }
1826 
1827 static void
1828 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1829 {
1830     char const * const func = "omp_destroy_nest_lock";
1831     if ( lck->lk.initialized != lck ) {
1832         KMP_FATAL( LockIsUninitialized, func );
1833     }
1834     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1835         KMP_FATAL( LockSimpleUsedAsNestable, func );
1836     }
1837     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1838         KMP_FATAL( LockStillOwned, func );
1839     }
1840     __kmp_destroy_nested_queuing_lock( lck );
1841 }
1842 
1843 
1844 //
1845 // access functions to fields which don't exist for all lock kinds.
1846 //
1847 
1848 static int
1849 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1850 {
1851     return lck == lck->lk.initialized;
1852 }
1853 
1854 static const ident_t *
1855 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1856 {
1857     return lck->lk.location;
1858 }
1859 
1860 static void
1861 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1862 {
1863     lck->lk.location = loc;
1864 }
1865 
1866 static kmp_lock_flags_t
1867 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1868 {
1869     return lck->lk.flags;
1870 }
1871 
1872 static void
1873 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1874 {
1875     lck->lk.flags = flags;
1876 }
1877 
1878 #if KMP_USE_ADAPTIVE_LOCKS
1879 
1880 /*
1881     RTM Adaptive locks
1882 */
1883 
1884 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1885 
1886 #include <immintrin.h>
1887 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1888 
1889 #else
1890 
1891 // Values from the status register after failed speculation.
1892 #define _XBEGIN_STARTED          (~0u)
1893 #define _XABORT_EXPLICIT         (1 << 0)
1894 #define _XABORT_RETRY            (1 << 1)
1895 #define _XABORT_CONFLICT         (1 << 2)
1896 #define _XABORT_CAPACITY         (1 << 3)
1897 #define _XABORT_DEBUG            (1 << 4)
1898 #define _XABORT_NESTED           (1 << 5)
1899 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1900 
1901 // Aborts for which it's worth trying again immediately
1902 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1903 
1904 #define STRINGIZE_INTERNAL(arg) #arg
1905 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1906 
1907 // Access to RTM instructions
1908 
1909 /*
1910   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1911   This is the same definition as the compiler intrinsic that will be supported at some point.
1912 */
1913 static __inline int _xbegin()
1914 {
1915     int res = -1;
1916 
1917 #if KMP_OS_WINDOWS
1918 #if KMP_ARCH_X86_64
1919     _asm {
1920         _emit 0xC7
1921         _emit 0xF8
1922         _emit 2
1923         _emit 0
1924         _emit 0
1925         _emit 0
1926         jmp   L2
1927         mov   res, eax
1928     L2:
1929     }
1930 #else /* IA32 */
1931     _asm {
1932         _emit 0xC7
1933         _emit 0xF8
1934         _emit 2
1935         _emit 0
1936         _emit 0
1937         _emit 0
1938         jmp   L2
1939         mov   res, eax
1940     L2:
1941     }
1942 #endif // KMP_ARCH_X86_64
1943 #else
1944     /* Note that %eax must be noted as killed (clobbered), because
1945      * the XSR is returned in %eax(%rax) on abort.  Other register
1946      * values are restored, so don't need to be killed.
1947      *
1948      * We must also mark 'res' as an input and an output, since otherwise
1949      * 'res=-1' may be dropped as being dead, whereas we do need the
1950      * assignment on the successful (i.e., non-abort) path.
1951      */
1952     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1953                       "   .long  1f-1b-6\n"
1954                       "    jmp   2f\n"
1955                       "1:  movl  %%eax,%0\n"
1956                       "2:"
1957                       :"+r"(res)::"memory","%eax");
1958 #endif // KMP_OS_WINDOWS
1959     return res;
1960 }
1961 
1962 /*
1963   Transaction end
1964 */
1965 static __inline void _xend()
1966 {
1967 #if KMP_OS_WINDOWS
1968     __asm  {
1969         _emit 0x0f
1970         _emit 0x01
1971         _emit 0xd5
1972     }
1973 #else
1974     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1975 #endif
1976 }
1977 
1978 /*
1979   This is a macro, the argument must be a single byte constant which
1980   can be evaluated by the inline assembler, since it is emitted as a
1981   byte into the assembly code.
1982 */
1983 #if KMP_OS_WINDOWS
1984 #define _xabort(ARG)                            \
1985     _asm _emit 0xc6                             \
1986     _asm _emit 0xf8                             \
1987     _asm _emit ARG
1988 #else
1989 #define _xabort(ARG) \
1990     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1991 #endif
1992 
1993 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1994 
1995 //
1996 //    Statistics is collected for testing purpose
1997 //
1998 #if KMP_DEBUG_ADAPTIVE_LOCKS
1999 
2000 // We accumulate speculative lock statistics when the lock is destroyed.
2001 // We keep locks that haven't been destroyed in the liveLocks list
2002 // so that we can grab their statistics too.
2003 static kmp_adaptive_lock_statistics_t destroyedStats;
2004 
2005 // To hold the list of live locks.
2006 static kmp_adaptive_lock_info_t liveLocks;
2007 
2008 // A lock so we can safely update the list of locks.
2009 static kmp_bootstrap_lock_t chain_lock;
2010 
2011 // Initialize the list of stats.
2012 void
2013 __kmp_init_speculative_stats()
2014 {
2015     kmp_adaptive_lock_info_t *lck = &liveLocks;
2016 
2017     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2018     lck->stats.next = lck;
2019     lck->stats.prev = lck;
2020 
2021     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2022     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2023 
2024     __kmp_init_bootstrap_lock( &chain_lock );
2025 
2026 }
2027 
2028 // Insert the lock into the circular list
2029 static void
2030 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2031 {
2032     __kmp_acquire_bootstrap_lock( &chain_lock );
2033 
2034     lck->stats.next = liveLocks.stats.next;
2035     lck->stats.prev = &liveLocks;
2036 
2037     liveLocks.stats.next = lck;
2038     lck->stats.next->stats.prev  = lck;
2039 
2040     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2041     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2042 
2043     __kmp_release_bootstrap_lock( &chain_lock );
2044 }
2045 
2046 static void
2047 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2048 {
2049     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2050     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2051 
2052     kmp_adaptive_lock_info_t * n = lck->stats.next;
2053     kmp_adaptive_lock_info_t * p = lck->stats.prev;
2054 
2055     n->stats.prev = p;
2056     p->stats.next = n;
2057 }
2058 
2059 static void
2060 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2061 {
2062     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2063     __kmp_remember_lock( lck );
2064 }
2065 
2066 static void
2067 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2068 {
2069     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2070 
2071     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2072     t->successfulSpeculations += s->successfulSpeculations;
2073     t->hardFailedSpeculations += s->hardFailedSpeculations;
2074     t->softFailedSpeculations += s->softFailedSpeculations;
2075     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2076     t->lemmingYields          += s->lemmingYields;
2077 }
2078 
2079 static void
2080 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2081 {
2082     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2083 
2084     __kmp_acquire_bootstrap_lock( &chain_lock );
2085 
2086     __kmp_add_stats( &destroyedStats, lck );
2087     __kmp_forget_lock( lck );
2088 
2089     __kmp_release_bootstrap_lock( &chain_lock );
2090 }
2091 
2092 static float
2093 percent (kmp_uint32 count, kmp_uint32 total)
2094 {
2095     return (total == 0) ? 0.0: (100.0 * count)/total;
2096 }
2097 
2098 static
2099 FILE * __kmp_open_stats_file()
2100 {
2101     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2102         return stdout;
2103 
2104     size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2105     char buffer[buffLen];
2106     KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2107       (kmp_int32)getpid());
2108     FILE * result = fopen(&buffer[0], "w");
2109 
2110     // Maybe we should issue a warning here...
2111     return result ? result : stdout;
2112 }
2113 
2114 void
2115 __kmp_print_speculative_stats()
2116 {
2117     if (__kmp_user_lock_kind != lk_adaptive)
2118         return;
2119 
2120     FILE * statsFile = __kmp_open_stats_file();
2121 
2122     kmp_adaptive_lock_statistics_t total = destroyedStats;
2123     kmp_adaptive_lock_info_t *lck;
2124 
2125     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2126         __kmp_add_stats( &total, lck );
2127     }
2128     kmp_adaptive_lock_statistics_t *t = &total;
2129     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2130     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2131                                    t->softFailedSpeculations;
2132 
2133     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2134     fprintf ( statsFile, " Lock parameters: \n"
2135              "   max_soft_retries               : %10d\n"
2136              "   max_badness                    : %10d\n",
2137              __kmp_adaptive_backoff_params.max_soft_retries,
2138              __kmp_adaptive_backoff_params.max_badness);
2139     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2140     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2141     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2142              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2143     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2144              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2145     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2146 
2147     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2148     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2149              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2150     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2151              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2152     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2153              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2154 
2155     if (statsFile != stdout)
2156         fclose( statsFile );
2157 }
2158 
2159 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2160 #else
2161 # define KMP_INC_STAT(lck,stat)
2162 
2163 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2164 
2165 static inline bool
2166 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2167 {
2168     // It is enough to check that the head_id is zero.
2169     // We don't also need to check the tail.
2170     bool res = lck->lk.head_id == 0;
2171 
2172     // We need a fence here, since we must ensure that no memory operations
2173     // from later in this thread float above that read.
2174 #if KMP_COMPILER_ICC
2175     _mm_mfence();
2176 #else
2177     __sync_synchronize();
2178 #endif
2179 
2180     return res;
2181 }
2182 
2183 // Functions for manipulating the badness
2184 static __inline void
2185 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2186 {
2187     // Reset the badness to zero so we eagerly try to speculate again
2188     lck->lk.adaptive.badness = 0;
2189     KMP_INC_STAT(lck,successfulSpeculations);
2190 }
2191 
2192 // Create a bit mask with one more set bit.
2193 static __inline void
2194 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2195 {
2196     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2197     if ( newBadness > lck->lk.adaptive.max_badness) {
2198         return;
2199     } else {
2200         lck->lk.adaptive.badness = newBadness;
2201     }
2202 }
2203 
2204 // Check whether speculation should be attempted.
2205 static __inline int
2206 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2207 {
2208     kmp_uint32 badness = lck->lk.adaptive.badness;
2209     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2210     int res = (attempts & badness) == 0;
2211     return res;
2212 }
2213 
2214 // Attempt to acquire only the speculative lock.
2215 // Does not back off to the non-speculative lock.
2216 //
2217 static int
2218 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2219 {
2220     int retries = lck->lk.adaptive.max_soft_retries;
2221 
2222     // We don't explicitly count the start of speculation, rather we record
2223     // the results (success, hard fail, soft fail). The sum of all of those
2224     // is the total number of times we started speculation since all
2225     // speculations must end one of those ways.
2226     do
2227     {
2228         kmp_uint32 status = _xbegin();
2229         // Switch this in to disable actual speculation but exercise
2230         // at least some of the rest of the code. Useful for debugging...
2231         // kmp_uint32 status = _XABORT_NESTED;
2232 
2233         if (status == _XBEGIN_STARTED )
2234         { /* We have successfully started speculation
2235            * Check that no-one acquired the lock for real between when we last looked
2236            * and now. This also gets the lock cache line into our read-set,
2237            * which we need so that we'll abort if anyone later claims it for real.
2238            */
2239             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2240             {
2241                 // Lock is now visibly acquired, so someone beat us to it.
2242                 // Abort the transaction so we'll restart from _xbegin with the
2243                 // failure status.
2244                 _xabort(0x01);
2245                 KMP_ASSERT2( 0, "should not get here" );
2246             }
2247             return 1;   // Lock has been acquired (speculatively)
2248         } else {
2249             // We have aborted, update the statistics
2250             if ( status & SOFT_ABORT_MASK)
2251             {
2252                 KMP_INC_STAT(lck,softFailedSpeculations);
2253                 // and loop round to retry.
2254             }
2255             else
2256             {
2257                 KMP_INC_STAT(lck,hardFailedSpeculations);
2258                 // Give up if we had a hard failure.
2259                 break;
2260             }
2261         }
2262     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2263 
2264     // Either we had a hard failure or we didn't succeed softly after
2265     // the full set of attempts, so back off the badness.
2266     __kmp_step_badness( lck );
2267     return 0;
2268 }
2269 
2270 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2271 // if the speculative lock cannot be acquired.
2272 // We can succeed speculatively, non-speculatively, or fail.
2273 static int
2274 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2275 {
2276     // First try to acquire the lock speculatively
2277     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2278         return 1;
2279 
2280     // Speculative acquisition failed, so try to acquire it non-speculatively.
2281     // Count the non-speculative acquire attempt
2282     lck->lk.adaptive.acquire_attempts++;
2283 
2284     // Use base, non-speculative lock.
2285     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2286     {
2287         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2288         return 1;       // Lock is acquired (non-speculatively)
2289     }
2290     else
2291     {
2292         return 0;       // Failed to acquire the lock, it's already visibly locked.
2293     }
2294 }
2295 
2296 static int
2297 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2298 {
2299     char const * const func = "omp_test_lock";
2300     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2301         KMP_FATAL( LockIsUninitialized, func );
2302     }
2303 
2304     int retval = __kmp_test_adaptive_lock( lck, gtid );
2305 
2306     if ( retval ) {
2307         lck->lk.qlk.owner_id = gtid + 1;
2308     }
2309     return retval;
2310 }
2311 
2312 // Block until we can acquire a speculative, adaptive lock.
2313 // We check whether we should be trying to speculate.
2314 // If we should be, we check the real lock to see if it is free,
2315 // and, if not, pause without attempting to acquire it until it is.
2316 // Then we try the speculative acquire.
2317 // This means that although we suffer from lemmings a little (
2318 // because all we can't acquire the lock speculatively until
2319 // the queue of threads waiting has cleared), we don't get into a
2320 // state where we can never acquire the lock speculatively (because we
2321 // force the queue to clear by preventing new arrivals from entering the
2322 // queue).
2323 // This does mean that when we're trying to break lemmings, the lock
2324 // is no longer fair. However OpenMP makes no guarantee that its
2325 // locks are fair, so this isn't a real problem.
2326 static void
2327 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2328 {
2329     if ( __kmp_should_speculate( lck, gtid ) )
2330     {
2331         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2332         {
2333             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2334                 return;
2335             // We tried speculation and failed, so give up.
2336         }
2337         else
2338         {
2339             // We can't try speculation until the lock is free, so we
2340             // pause here (without suspending on the queueing lock,
2341             // to allow it to drain, then try again.
2342             // All other threads will also see the same result for
2343             // shouldSpeculate, so will be doing the same if they
2344             // try to claim the lock from now on.
2345             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2346             {
2347                 KMP_INC_STAT(lck,lemmingYields);
2348                 __kmp_yield (TRUE);
2349             }
2350 
2351             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2352                 return;
2353         }
2354     }
2355 
2356     // Speculative acquisition failed, so acquire it non-speculatively.
2357     // Count the non-speculative acquire attempt
2358     lck->lk.adaptive.acquire_attempts++;
2359 
2360     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2361     // We have acquired the base lock, so count that.
2362     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2363 }
2364 
2365 static void
2366 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2367 {
2368     char const * const func = "omp_set_lock";
2369     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2370         KMP_FATAL( LockIsUninitialized, func );
2371     }
2372     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2373         KMP_FATAL( LockIsAlreadyOwned, func );
2374     }
2375 
2376     __kmp_acquire_adaptive_lock( lck, gtid );
2377 
2378     lck->lk.qlk.owner_id = gtid + 1;
2379 }
2380 
2381 static int
2382 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2383 {
2384     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2385     {   // If the lock doesn't look claimed we must be speculating.
2386         // (Or the user's code is buggy and they're releasing without locking;
2387         // if we had XTEST we'd be able to check that case...)
2388         _xend();        // Exit speculation
2389         __kmp_update_badness_after_success( lck );
2390     }
2391     else
2392     {   // Since the lock *is* visibly locked we're not speculating,
2393         // so should use the underlying lock's release scheme.
2394         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2395     }
2396     return KMP_LOCK_RELEASED;
2397 }
2398 
2399 static int
2400 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2401 {
2402     char const * const func = "omp_unset_lock";
2403     KMP_MB();  /* in case another processor initialized lock */
2404     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2405         KMP_FATAL( LockIsUninitialized, func );
2406     }
2407     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2408         KMP_FATAL( LockUnsettingFree, func );
2409     }
2410     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2411         KMP_FATAL( LockUnsettingSetByAnother, func );
2412     }
2413     lck->lk.qlk.owner_id = 0;
2414     __kmp_release_adaptive_lock( lck, gtid );
2415     return KMP_LOCK_RELEASED;
2416 }
2417 
2418 static void
2419 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2420 {
2421     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2422     lck->lk.adaptive.badness = 0;
2423     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2424     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2425     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2426 #if KMP_DEBUG_ADAPTIVE_LOCKS
2427     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2428 #endif
2429     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2430 }
2431 
2432 static void
2433 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2434 {
2435     __kmp_init_adaptive_lock( lck );
2436 }
2437 
2438 static void
2439 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2440 {
2441 #if KMP_DEBUG_ADAPTIVE_LOCKS
2442     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2443 #endif
2444     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2445     // Nothing needed for the speculative part.
2446 }
2447 
2448 static void
2449 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2450 {
2451     char const * const func = "omp_destroy_lock";
2452     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2453         KMP_FATAL( LockIsUninitialized, func );
2454     }
2455     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2456         KMP_FATAL( LockStillOwned, func );
2457     }
2458     __kmp_destroy_adaptive_lock( lck );
2459 }
2460 
2461 
2462 #endif // KMP_USE_ADAPTIVE_LOCKS
2463 
2464 
2465 /* ------------------------------------------------------------------------ */
2466 /* DRDPA ticket locks                                                */
2467 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2468 
2469 static kmp_int32
2470 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2471 {
2472     return TCR_4( lck->lk.owner_id ) - 1;
2473 }
2474 
2475 static inline bool
2476 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2477 {
2478     return lck->lk.depth_locked != -1;
2479 }
2480 
2481 __forceinline static int
2482 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2483 {
2484     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2485     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2486     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2487       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2488       TCR_PTR(lck->lk.polls);                           // volatile load
2489 
2490 #ifdef USE_LOCK_PROFILE
2491     if (TCR_8(polls[ticket & mask].poll) != ticket)
2492         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2493     /* else __kmp_printf( "." );*/
2494 #endif /* USE_LOCK_PROFILE */
2495 
2496     //
2497     // Now spin-wait, but reload the polls pointer and mask, in case the
2498     // polling area has been reconfigured.  Unless it is reconfigured, the
2499     // reloads stay in L1 cache and are cheap.
2500     //
2501     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2502     //
2503     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2504     // and poll to be re-read every spin iteration.
2505     //
2506     kmp_uint32 spins;
2507 
2508     KMP_FSYNC_PREPARE(lck);
2509     KMP_INIT_YIELD(spins);
2510     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2511         // If we are oversubscribed,
2512         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2513         // CPU Pause is in the macros for yield.
2514         //
2515         KMP_YIELD(TCR_4(__kmp_nth)
2516           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2517         KMP_YIELD_SPIN(spins);
2518 
2519         // Re-read the mask and the poll pointer from the lock structure.
2520         //
2521         // Make certain that "mask" is read before "polls" !!!
2522         //
2523         // If another thread picks reconfigures the polling area and updates
2524         // their values, and we get the new value of mask and the old polls
2525         // pointer, we could access memory beyond the end of the old polling
2526         // area.
2527         //
2528         mask = TCR_8(lck->lk.mask);                     // volatile load
2529         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2530           TCR_PTR(lck->lk.polls);                       // volatile load
2531     }
2532 
2533     //
2534     // Critical section starts here
2535     //
2536     KMP_FSYNC_ACQUIRED(lck);
2537     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2538       ticket, lck));
2539     lck->lk.now_serving = ticket;                       // non-volatile store
2540 
2541     //
2542     // Deallocate a garbage polling area if we know that we are the last
2543     // thread that could possibly access it.
2544     //
2545     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2546     // ticket.
2547     //
2548     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2549         __kmp_free((void *)lck->lk.old_polls);
2550         lck->lk.old_polls = NULL;
2551         lck->lk.cleanup_ticket = 0;
2552     }
2553 
2554     //
2555     // Check to see if we should reconfigure the polling area.
2556     // If there is still a garbage polling area to be deallocated from a
2557     // previous reconfiguration, let a later thread reconfigure it.
2558     //
2559     if (lck->lk.old_polls == NULL) {
2560         bool reconfigure = false;
2561         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2562         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2563 
2564         if (TCR_4(__kmp_nth)
2565           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2566             //
2567             // We are in oversubscription mode.  Contract the polling area
2568             // down to a single location, if that hasn't been done already.
2569             //
2570             if (num_polls > 1) {
2571                 reconfigure = true;
2572                 num_polls = TCR_4(lck->lk.num_polls);
2573                 mask = 0;
2574                 num_polls = 1;
2575                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2576                   __kmp_allocate(num_polls * sizeof(*polls));
2577                 polls[0].poll = ticket;
2578             }
2579         }
2580         else {
2581             //
2582             // We are in under/fully subscribed mode.  Check the number of
2583             // threads waiting on the lock.  The size of the polling area
2584             // should be at least the number of threads waiting.
2585             //
2586             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2587             if (num_waiting > num_polls) {
2588                 kmp_uint32 old_num_polls = num_polls;
2589                 reconfigure = true;
2590                 do {
2591                     mask = (mask << 1) | 1;
2592                     num_polls *= 2;
2593                 } while (num_polls <= num_waiting);
2594 
2595                 //
2596                 // Allocate the new polling area, and copy the relevant portion
2597                 // of the old polling area to the new area.  __kmp_allocate()
2598                 // zeroes the memory it allocates, and most of the old area is
2599                 // just zero padding, so we only copy the release counters.
2600                 //
2601                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2602                   __kmp_allocate(num_polls * sizeof(*polls));
2603                 kmp_uint32 i;
2604                 for (i = 0; i < old_num_polls; i++) {
2605                     polls[i].poll = old_polls[i].poll;
2606                 }
2607             }
2608         }
2609 
2610         if (reconfigure) {
2611             //
2612             // Now write the updated fields back to the lock structure.
2613             //
2614             // Make certain that "polls" is written before "mask" !!!
2615             //
2616             // If another thread picks up the new value of mask and the old
2617             // polls pointer , it could access memory beyond the end of the
2618             // old polling area.
2619             //
2620             // On x86, we need memory fences.
2621             //
2622             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2623               ticket, lck, num_polls));
2624 
2625             lck->lk.old_polls = old_polls;              // non-volatile store
2626             lck->lk.polls = polls;                      // volatile store
2627 
2628             KMP_MB();
2629 
2630             lck->lk.num_polls = num_polls;              // non-volatile store
2631             lck->lk.mask = mask;                        // volatile store
2632 
2633             KMP_MB();
2634 
2635             //
2636             // Only after the new polling area and mask have been flushed
2637             // to main memory can we update the cleanup ticket field.
2638             //
2639             // volatile load / non-volatile store
2640             //
2641             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2642         }
2643     }
2644     return KMP_LOCK_ACQUIRED_FIRST;
2645 }
2646 
2647 int
2648 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2649 {
2650     return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2651 }
2652 
2653 static int
2654 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2655 {
2656     char const * const func = "omp_set_lock";
2657     if ( lck->lk.initialized != lck ) {
2658         KMP_FATAL( LockIsUninitialized, func );
2659     }
2660     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2661         KMP_FATAL( LockNestableUsedAsSimple, func );
2662     }
2663     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2664         KMP_FATAL( LockIsAlreadyOwned, func );
2665     }
2666 
2667     __kmp_acquire_drdpa_lock( lck, gtid );
2668 
2669     lck->lk.owner_id = gtid + 1;
2670     return KMP_LOCK_ACQUIRED_FIRST;
2671 }
2672 
2673 int
2674 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2675 {
2676     //
2677     // First get a ticket, then read the polls pointer and the mask.
2678     // The polls pointer must be read before the mask!!! (See above)
2679     //
2680     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2681     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2682       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2683       TCR_PTR(lck->lk.polls);                           // volatile load
2684     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2685     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2686         kmp_uint64 next_ticket = ticket + 1;
2687         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2688           ticket, next_ticket)) {
2689             KMP_FSYNC_ACQUIRED(lck);
2690             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2691                ticket, lck));
2692             lck->lk.now_serving = ticket;               // non-volatile store
2693 
2694             //
2695             // Since no threads are waiting, there is no possibility that
2696             // we would want to reconfigure the polling area.  We might
2697             // have the cleanup ticket value (which says that it is now
2698             // safe to deallocate old_polls), but we'll let a later thread
2699             // which calls __kmp_acquire_lock do that - this routine
2700             // isn't supposed to block, and we would risk blocks if we
2701             // called __kmp_free() to do the deallocation.
2702             //
2703             return TRUE;
2704         }
2705     }
2706     return FALSE;
2707 }
2708 
2709 static int
2710 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2711 {
2712     char const * const func = "omp_test_lock";
2713     if ( lck->lk.initialized != lck ) {
2714         KMP_FATAL( LockIsUninitialized, func );
2715     }
2716     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2717         KMP_FATAL( LockNestableUsedAsSimple, func );
2718     }
2719 
2720     int retval = __kmp_test_drdpa_lock( lck, gtid );
2721 
2722     if ( retval ) {
2723         lck->lk.owner_id = gtid + 1;
2724     }
2725     return retval;
2726 }
2727 
2728 int
2729 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2730 {
2731     //
2732     // Read the ticket value from the lock data struct, then the polls
2733     // pointer and the mask.  The polls pointer must be read before the
2734     // mask!!! (See above)
2735     //
2736     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2737     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2738       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2739       TCR_PTR(lck->lk.polls);                           // volatile load
2740     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2741     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2742        ticket - 1, lck));
2743     KMP_FSYNC_RELEASING(lck);
2744     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2745     return KMP_LOCK_RELEASED;
2746 }
2747 
2748 static int
2749 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2750 {
2751     char const * const func = "omp_unset_lock";
2752     KMP_MB();  /* in case another processor initialized lock */
2753     if ( lck->lk.initialized != lck ) {
2754         KMP_FATAL( LockIsUninitialized, func );
2755     }
2756     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2757         KMP_FATAL( LockNestableUsedAsSimple, func );
2758     }
2759     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2760         KMP_FATAL( LockUnsettingFree, func );
2761     }
2762     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2763       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2764         KMP_FATAL( LockUnsettingSetByAnother, func );
2765     }
2766     lck->lk.owner_id = 0;
2767     return __kmp_release_drdpa_lock( lck, gtid );
2768 }
2769 
2770 void
2771 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2772 {
2773     lck->lk.location = NULL;
2774     lck->lk.mask = 0;
2775     lck->lk.num_polls = 1;
2776     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2777       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2778     lck->lk.cleanup_ticket = 0;
2779     lck->lk.old_polls = NULL;
2780     lck->lk.next_ticket = 0;
2781     lck->lk.now_serving = 0;
2782     lck->lk.owner_id = 0;      // no thread owns the lock.
2783     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2784     lck->lk.initialized = lck;
2785 
2786     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2787 }
2788 
2789 static void
2790 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2791 {
2792     __kmp_init_drdpa_lock( lck );
2793 }
2794 
2795 void
2796 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2797 {
2798     lck->lk.initialized = NULL;
2799     lck->lk.location    = NULL;
2800     if (lck->lk.polls != NULL) {
2801         __kmp_free((void *)lck->lk.polls);
2802         lck->lk.polls = NULL;
2803     }
2804     if (lck->lk.old_polls != NULL) {
2805         __kmp_free((void *)lck->lk.old_polls);
2806         lck->lk.old_polls = NULL;
2807     }
2808     lck->lk.mask = 0;
2809     lck->lk.num_polls = 0;
2810     lck->lk.cleanup_ticket = 0;
2811     lck->lk.next_ticket = 0;
2812     lck->lk.now_serving = 0;
2813     lck->lk.owner_id = 0;
2814     lck->lk.depth_locked = -1;
2815 }
2816 
2817 static void
2818 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2819 {
2820     char const * const func = "omp_destroy_lock";
2821     if ( lck->lk.initialized != lck ) {
2822         KMP_FATAL( LockIsUninitialized, func );
2823     }
2824     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2825         KMP_FATAL( LockNestableUsedAsSimple, func );
2826     }
2827     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2828         KMP_FATAL( LockStillOwned, func );
2829     }
2830     __kmp_destroy_drdpa_lock( lck );
2831 }
2832 
2833 
2834 //
2835 // nested drdpa ticket locks
2836 //
2837 
2838 int
2839 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2840 {
2841     KMP_DEBUG_ASSERT( gtid >= 0 );
2842 
2843     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2844         lck->lk.depth_locked += 1;
2845         return KMP_LOCK_ACQUIRED_NEXT;
2846     }
2847     else {
2848         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2849         KMP_MB();
2850         lck->lk.depth_locked = 1;
2851         KMP_MB();
2852         lck->lk.owner_id = gtid + 1;
2853         return KMP_LOCK_ACQUIRED_FIRST;
2854     }
2855 }
2856 
2857 static void
2858 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2859 {
2860     char const * const func = "omp_set_nest_lock";
2861     if ( lck->lk.initialized != lck ) {
2862         KMP_FATAL( LockIsUninitialized, func );
2863     }
2864     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2865         KMP_FATAL( LockSimpleUsedAsNestable, func );
2866     }
2867     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2868 }
2869 
2870 int
2871 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2872 {
2873     int retval;
2874 
2875     KMP_DEBUG_ASSERT( gtid >= 0 );
2876 
2877     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2878         retval = ++lck->lk.depth_locked;
2879     }
2880     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2881         retval = 0;
2882     }
2883     else {
2884         KMP_MB();
2885         retval = lck->lk.depth_locked = 1;
2886         KMP_MB();
2887         lck->lk.owner_id = gtid + 1;
2888     }
2889     return retval;
2890 }
2891 
2892 static int
2893 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2894 {
2895     char const * const func = "omp_test_nest_lock";
2896     if ( lck->lk.initialized != lck ) {
2897         KMP_FATAL( LockIsUninitialized, func );
2898     }
2899     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2900         KMP_FATAL( LockSimpleUsedAsNestable, func );
2901     }
2902     return __kmp_test_nested_drdpa_lock( lck, gtid );
2903 }
2904 
2905 int
2906 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2907 {
2908     KMP_DEBUG_ASSERT( gtid >= 0 );
2909 
2910     KMP_MB();
2911     if ( --(lck->lk.depth_locked) == 0 ) {
2912         KMP_MB();
2913         lck->lk.owner_id = 0;
2914         __kmp_release_drdpa_lock( lck, gtid );
2915         return KMP_LOCK_RELEASED;
2916     }
2917     return KMP_LOCK_STILL_HELD;
2918 }
2919 
2920 static int
2921 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2922 {
2923     char const * const func = "omp_unset_nest_lock";
2924     KMP_MB();  /* in case another processor initialized lock */
2925     if ( lck->lk.initialized != lck ) {
2926         KMP_FATAL( LockIsUninitialized, func );
2927     }
2928     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2929         KMP_FATAL( LockSimpleUsedAsNestable, func );
2930     }
2931     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2932         KMP_FATAL( LockUnsettingFree, func );
2933     }
2934     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2935         KMP_FATAL( LockUnsettingSetByAnother, func );
2936     }
2937     return __kmp_release_nested_drdpa_lock( lck, gtid );
2938 }
2939 
2940 void
2941 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2942 {
2943     __kmp_init_drdpa_lock( lck );
2944     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2945 }
2946 
2947 static void
2948 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2949 {
2950     __kmp_init_nested_drdpa_lock( lck );
2951 }
2952 
2953 void
2954 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2955 {
2956     __kmp_destroy_drdpa_lock( lck );
2957     lck->lk.depth_locked = 0;
2958 }
2959 
2960 static void
2961 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2962 {
2963     char const * const func = "omp_destroy_nest_lock";
2964     if ( lck->lk.initialized != lck ) {
2965         KMP_FATAL( LockIsUninitialized, func );
2966     }
2967     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2968         KMP_FATAL( LockSimpleUsedAsNestable, func );
2969     }
2970     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2971         KMP_FATAL( LockStillOwned, func );
2972     }
2973     __kmp_destroy_nested_drdpa_lock( lck );
2974 }
2975 
2976 
2977 //
2978 // access functions to fields which don't exist for all lock kinds.
2979 //
2980 
2981 static int
2982 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2983 {
2984     return lck == lck->lk.initialized;
2985 }
2986 
2987 static const ident_t *
2988 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2989 {
2990     return lck->lk.location;
2991 }
2992 
2993 static void
2994 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2995 {
2996     lck->lk.location = loc;
2997 }
2998 
2999 static kmp_lock_flags_t
3000 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3001 {
3002     return lck->lk.flags;
3003 }
3004 
3005 static void
3006 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3007 {
3008     lck->lk.flags = flags;
3009 }
3010 
3011 #if KMP_USE_DYNAMIC_LOCK
3012 
3013 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3014 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3015 {
3016     TCW_4(*lck, KMP_GET_D_TAG(seq));
3017     KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3018 }
3019 
3020 #if KMP_USE_TSX
3021 
3022 // HLE lock functions - imported from the testbed runtime.
3023 #define HLE_ACQUIRE ".byte 0xf2;"
3024 #define HLE_RELEASE ".byte 0xf3;"
3025 
3026 static inline kmp_uint32
3027 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3028 {
3029     __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3030                     : "+r"(v), "+m"(*p)
3031                     :
3032                     : "memory");
3033     return v;
3034 }
3035 
3036 static void
3037 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3038 {
3039     TCW_4(*lck, 0);
3040 }
3041 
3042 static void
3043 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3044 {
3045     // Use gtid for KMP_LOCK_BUSY if necessary
3046     if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3047         int delay = 1;
3048         do {
3049             while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3050                 for (int i = delay; i != 0; --i)
3051                     KMP_CPU_PAUSE();
3052                 delay = ((delay << 1) | 1) & 7;
3053             }
3054         } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3055     }
3056 }
3057 
3058 static void
3059 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3060 {
3061     __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3062 }
3063 
3064 static int
3065 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3066 {
3067     __asm__ volatile(HLE_RELEASE "movl %1,%0"
3068                     : "=m"(*lck)
3069                     : "r"(KMP_LOCK_FREE(hle))
3070                     : "memory");
3071     return KMP_LOCK_RELEASED;
3072 }
3073 
3074 static int
3075 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3076 {
3077     return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3078 }
3079 
3080 static int
3081 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3082 {
3083     return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3084 }
3085 
3086 static int
3087 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3088 {
3089     return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3090 }
3091 
3092 static void
3093 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3094 {
3095     __kmp_init_queuing_lock(lck);
3096 }
3097 
3098 static void
3099 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3100 {
3101     __kmp_destroy_queuing_lock(lck);
3102 }
3103 
3104 static void
3105 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3106 {
3107     unsigned retries=3, status;
3108     do {
3109         status = _xbegin();
3110         if (status == _XBEGIN_STARTED) {
3111             if (__kmp_is_unlocked_queuing_lock(lck))
3112                 return;
3113             _xabort(0xff);
3114         }
3115         if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3116             // Wait until lock becomes free
3117             while (! __kmp_is_unlocked_queuing_lock(lck))
3118                 __kmp_yield(TRUE);
3119         }
3120         else if (!(status & _XABORT_RETRY))
3121             break;
3122     } while (retries--);
3123 
3124     // Fall-back non-speculative lock (xchg)
3125     __kmp_acquire_queuing_lock(lck, gtid);
3126 }
3127 
3128 static void
3129 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3130 {
3131     __kmp_acquire_rtm_lock(lck, gtid);
3132 }
3133 
3134 static int
3135 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3136 {
3137     if (__kmp_is_unlocked_queuing_lock(lck)) {
3138         // Releasing from speculation
3139         _xend();
3140     }
3141     else {
3142         // Releasing from a real lock
3143         __kmp_release_queuing_lock(lck, gtid);
3144     }
3145     return KMP_LOCK_RELEASED;
3146 }
3147 
3148 static int
3149 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3150 {
3151     return __kmp_release_rtm_lock(lck, gtid);
3152 }
3153 
3154 static int
3155 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3156 {
3157     unsigned retries=3, status;
3158     do {
3159         status = _xbegin();
3160         if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3161             return 1;
3162         }
3163         if (!(status & _XABORT_RETRY))
3164             break;
3165     } while (retries--);
3166 
3167     return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3168 }
3169 
3170 static int
3171 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3172 {
3173     return __kmp_test_rtm_lock(lck, gtid);
3174 }
3175 
3176 #endif // KMP_USE_TSX
3177 
3178 // Entry functions for indirect locks (first element of direct lock jump tables).
3179 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3180 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3181 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3182 static int  __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3183 static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3184 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3185 static int  __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3186 static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3187 
3188 //
3189 // Jump tables for the indirect lock functions.
3190 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3191 //
3192 
3193 // init functions
3194 #define expand(l, op) 0,__kmp_init_direct_lock,
3195 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3196     = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3197 #undef expand
3198 
3199 // destroy functions
3200 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3201 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3202     = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3203 #undef expand
3204 
3205 // set/acquire functions
3206 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3207 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3208     = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3209 #undef expand
3210 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3211 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3212     = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3213 #undef expand
3214 
3215 // unset/release and test functions
3216 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3217 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3218     = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3219 static int  (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3220     = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3221 #undef expand
3222 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3223 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3224     = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3225 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3226     = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3227 #undef expand
3228 
3229 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3230 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3231 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3232 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3233 
3234 //
3235 // Jump tables for the indirect lock functions.
3236 //
3237 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3238 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3239 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3240 #undef expand
3241 
3242 // set/acquire functions
3243 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3244 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3245 #undef expand
3246 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3247 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3248 #undef expand
3249 
3250 // unset/release and test functions
3251 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3252 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3253 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3254 #undef expand
3255 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3256 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3257 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3258 #undef expand
3259 
3260 // Exposes only one jump tables (*lock or *lock_with_checks).
3261 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3262 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3263 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3264 
3265 // Lock index table.
3266 kmp_indirect_lock_table_t __kmp_i_lock_table;
3267 
3268 // Size of indirect locks.
3269 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3270 
3271 // Jump tables for lock accessor/modifier.
3272 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3273 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3274 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3275 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3276 
3277 // Use different lock pools for different lock types.
3278 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3279 
3280 // User lock allocator for dynamically dispatched indirect locks.
3281 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3282 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3283 // object is returned to the reusable pool of locks, unique to each lock type.
3284 kmp_indirect_lock_t *
3285 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3286 {
3287     kmp_indirect_lock_t *lck;
3288     kmp_lock_index_t idx;
3289 
3290     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3291 
3292     if (__kmp_indirect_lock_pool[tag] != NULL) {
3293         // Reuse the allocated and destroyed lock object
3294         lck = __kmp_indirect_lock_pool[tag];
3295         if (OMP_LOCK_T_SIZE < sizeof(void *))
3296             idx = lck->lock->pool.index;
3297         __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3298         KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3299     } else {
3300         idx = __kmp_i_lock_table.next;
3301         // Check capacity and double the size if it is full
3302         if (idx == __kmp_i_lock_table.size) {
3303             // Double up the space for block pointers
3304             int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3305             kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3306             __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3307             KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3308             __kmp_free(old_table);
3309             // Allocate new objects in the new blocks
3310             for (int i = row; i < 2*row; ++i)
3311                 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3312                                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3313             __kmp_i_lock_table.size = 2*idx;
3314         }
3315         __kmp_i_lock_table.next++;
3316         lck = KMP_GET_I_LOCK(idx);
3317         // Allocate a new base lock object
3318         lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3319         KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3320     }
3321 
3322     __kmp_release_lock(&__kmp_global_lock, gtid);
3323 
3324     lck->type = tag;
3325 
3326     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3327         *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3328     } else {
3329         *((kmp_indirect_lock_t **)user_lock) = lck;
3330     }
3331 
3332     return lck;
3333 }
3334 
3335 // User lock lookup for dynamically dispatched locks.
3336 static __forceinline
3337 kmp_indirect_lock_t *
3338 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3339 {
3340     if (__kmp_env_consistency_check) {
3341         kmp_indirect_lock_t *lck = NULL;
3342         if (user_lock == NULL) {
3343             KMP_FATAL(LockIsUninitialized, func);
3344         }
3345         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3346             kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3347             if (idx >= __kmp_i_lock_table.size) {
3348                 KMP_FATAL(LockIsUninitialized, func);
3349             }
3350             lck = KMP_GET_I_LOCK(idx);
3351         } else {
3352             lck = *((kmp_indirect_lock_t **)user_lock);
3353         }
3354         if (lck == NULL) {
3355             KMP_FATAL(LockIsUninitialized, func);
3356         }
3357         return lck;
3358     } else {
3359         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3360             return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3361         } else {
3362             return *((kmp_indirect_lock_t **)user_lock);
3363         }
3364     }
3365 }
3366 
3367 static void
3368 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3369 {
3370 #if KMP_USE_ADAPTIVE_LOCKS
3371     if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3372         KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3373         seq = lockseq_queuing;
3374     }
3375 #endif
3376 #if KMP_USE_TSX
3377     if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3378         seq = lockseq_queuing;
3379     }
3380 #endif
3381     kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3382     kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3383     KMP_I_LOCK_FUNC(l, init)(l->lock);
3384     KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3385 }
3386 
3387 static void
3388 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3389 {
3390     kmp_uint32 gtid = __kmp_entry_gtid();
3391     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3392     KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3393     kmp_indirect_locktag_t tag = l->type;
3394 
3395     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3396 
3397     // Use the base lock's space to keep the pool chain.
3398     l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3399     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3400         l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3401     }
3402     __kmp_indirect_lock_pool[tag] = l;
3403 
3404     __kmp_release_lock(&__kmp_global_lock, gtid);
3405 }
3406 
3407 static void
3408 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3409 {
3410     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3411     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3412 }
3413 
3414 static int
3415 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3416 {
3417     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3418     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3419 }
3420 
3421 static int
3422 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3423 {
3424     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3425     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3426 }
3427 
3428 static void
3429 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3430 {
3431     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3432     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3433 }
3434 
3435 static int
3436 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3437 {
3438     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3439     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3440 }
3441 
3442 static int
3443 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3444 {
3445     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3446     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3447 }
3448 
3449 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3450 
3451 // This is used only in kmp_error.c when consistency checking is on.
3452 kmp_int32
3453 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3454 {
3455     switch (seq) {
3456         case lockseq_tas:
3457         case lockseq_nested_tas:
3458             return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3459 #if KMP_HAS_FUTEX
3460         case lockseq_futex:
3461         case lockseq_nested_futex:
3462             return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3463 #endif
3464         case lockseq_ticket:
3465         case lockseq_nested_ticket:
3466             return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3467         case lockseq_queuing:
3468         case lockseq_nested_queuing:
3469 #if KMP_USE_ADAPTIVE_LOCKS
3470         case lockseq_adaptive:
3471 #endif
3472             return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3473         case lockseq_drdpa:
3474         case lockseq_nested_drdpa:
3475             return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3476         default:
3477             return 0;
3478     }
3479 }
3480 
3481 // Initializes data for dynamic user locks.
3482 void
3483 __kmp_init_dynamic_user_locks()
3484 {
3485     // Initialize jump table for the lock functions
3486     if (__kmp_env_consistency_check) {
3487         __kmp_direct_set     = direct_set_check;
3488         __kmp_direct_unset   = direct_unset_check;
3489         __kmp_direct_test    = direct_test_check;
3490         __kmp_indirect_set   = indirect_set_check;
3491         __kmp_indirect_unset = indirect_unset_check;
3492         __kmp_indirect_test  = indirect_test_check;
3493     }
3494     else {
3495         __kmp_direct_set     = direct_set;
3496         __kmp_direct_unset   = direct_unset;
3497         __kmp_direct_test    = direct_test;
3498         __kmp_indirect_set   = indirect_set;
3499         __kmp_indirect_unset = indirect_unset;
3500         __kmp_indirect_test  = indirect_test;
3501     }
3502 
3503     // Initialize lock index table
3504     __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3505     __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3506     *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3507                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3508     __kmp_i_lock_table.next = 0;
3509 
3510     // Indirect lock size
3511     __kmp_indirect_lock_size[locktag_ticket]         = sizeof(kmp_ticket_lock_t);
3512     __kmp_indirect_lock_size[locktag_queuing]        = sizeof(kmp_queuing_lock_t);
3513 #if KMP_USE_ADAPTIVE_LOCKS
3514     __kmp_indirect_lock_size[locktag_adaptive]       = sizeof(kmp_adaptive_lock_t);
3515 #endif
3516     __kmp_indirect_lock_size[locktag_drdpa]          = sizeof(kmp_drdpa_lock_t);
3517 #if KMP_USE_TSX
3518     __kmp_indirect_lock_size[locktag_rtm]            = sizeof(kmp_queuing_lock_t);
3519 #endif
3520     __kmp_indirect_lock_size[locktag_nested_tas]     = sizeof(kmp_tas_lock_t);
3521 #if KMP_USE_FUTEX
3522     __kmp_indirect_lock_size[locktag_nested_futex]   = sizeof(kmp_futex_lock_t);
3523 #endif
3524     __kmp_indirect_lock_size[locktag_nested_ticket]  = sizeof(kmp_ticket_lock_t);
3525     __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3526     __kmp_indirect_lock_size[locktag_nested_drdpa]   = sizeof(kmp_drdpa_lock_t);
3527 
3528     // Initialize lock accessor/modifier
3529 #define fill_jumps(table, expand, sep) {            \
3530     table[locktag##sep##ticket]  = expand(ticket);  \
3531     table[locktag##sep##queuing] = expand(queuing); \
3532     table[locktag##sep##drdpa]   = expand(drdpa);   \
3533 }
3534 
3535 #if KMP_USE_ADAPTIVE_LOCKS
3536 # define fill_table(table, expand) {           \
3537     fill_jumps(table, expand, _);              \
3538     table[locktag_adaptive] = expand(queuing); \
3539     fill_jumps(table, expand, _nested_);       \
3540 }
3541 #else
3542 # define fill_table(table, expand) {           \
3543     fill_jumps(table, expand, _);              \
3544     fill_jumps(table, expand, _nested_);       \
3545 }
3546 #endif // KMP_USE_ADAPTIVE_LOCKS
3547 
3548 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3549     fill_table(__kmp_indirect_set_location, expand);
3550 #undef expand
3551 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3552     fill_table(__kmp_indirect_set_flags, expand);
3553 #undef expand
3554 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3555     fill_table(__kmp_indirect_get_location, expand);
3556 #undef expand
3557 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3558     fill_table(__kmp_indirect_get_flags, expand);
3559 #undef expand
3560 
3561     __kmp_init_user_locks = TRUE;
3562 }
3563 
3564 // Clean up the lock table.
3565 void
3566 __kmp_cleanup_indirect_user_locks()
3567 {
3568     kmp_lock_index_t i;
3569     int k;
3570 
3571     // Clean up locks in the pools first (they were already destroyed before going into the pools).
3572     for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3573         kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3574         while (l != NULL) {
3575             kmp_indirect_lock_t *ll = l;
3576             l = (kmp_indirect_lock_t *)l->lock->pool.next;
3577             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3578             __kmp_free(ll->lock);
3579             ll->lock = NULL;
3580         }
3581         __kmp_indirect_lock_pool[k] = NULL;
3582     }
3583     // Clean up the remaining undestroyed locks.
3584     for (i = 0; i < __kmp_i_lock_table.next; i++) {
3585         kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3586         if (l->lock != NULL) {
3587             // Locks not destroyed explicitly need to be destroyed here.
3588             KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3589             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3590             __kmp_free(l->lock);
3591         }
3592     }
3593     // Free the table
3594     for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3595         __kmp_free(__kmp_i_lock_table.table[i]);
3596     __kmp_free(__kmp_i_lock_table.table);
3597 
3598     __kmp_init_user_locks = FALSE;
3599 }
3600 
3601 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3602 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3603 
3604 #else // KMP_USE_DYNAMIC_LOCK
3605 
3606 /* ------------------------------------------------------------------------ */
3607 /* user locks
3608  *
3609  * They are implemented as a table of function pointers which are set to the
3610  * lock functions of the appropriate kind, once that has been determined.
3611  */
3612 
3613 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3614 
3615 size_t __kmp_base_user_lock_size = 0;
3616 size_t __kmp_user_lock_size = 0;
3617 
3618 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3619 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3620 
3621 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3622 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3623 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3624 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3625 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3626 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3627 
3628 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3629 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3630 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3631 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3632 
3633 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3634 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3635 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3636 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3637 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3638 
3639 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3640 {
3641     switch ( user_lock_kind ) {
3642         case lk_default:
3643         default:
3644         KMP_ASSERT( 0 );
3645 
3646         case lk_tas: {
3647             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3648             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3649 
3650             __kmp_get_user_lock_owner_ =
3651               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3652               ( &__kmp_get_tas_lock_owner );
3653 
3654             if ( __kmp_env_consistency_check ) {
3655                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3656                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3657             }
3658             else {
3659                 KMP_BIND_USER_LOCK(tas);
3660                 KMP_BIND_NESTED_USER_LOCK(tas);
3661             }
3662 
3663             __kmp_destroy_user_lock_ =
3664               ( void ( * )( kmp_user_lock_p ) )
3665               ( &__kmp_destroy_tas_lock );
3666 
3667              __kmp_is_user_lock_initialized_ =
3668                ( int ( * )( kmp_user_lock_p ) ) NULL;
3669 
3670              __kmp_get_user_lock_location_ =
3671                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3672 
3673              __kmp_set_user_lock_location_ =
3674                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3675 
3676              __kmp_get_user_lock_flags_ =
3677                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3678 
3679              __kmp_set_user_lock_flags_ =
3680                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3681         }
3682         break;
3683 
3684 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3685 
3686         case lk_futex: {
3687             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3688             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3689 
3690             __kmp_get_user_lock_owner_ =
3691               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3692               ( &__kmp_get_futex_lock_owner );
3693 
3694             if ( __kmp_env_consistency_check ) {
3695                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3696                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3697             }
3698             else {
3699                 KMP_BIND_USER_LOCK(futex);
3700                 KMP_BIND_NESTED_USER_LOCK(futex);
3701             }
3702 
3703             __kmp_destroy_user_lock_ =
3704               ( void ( * )( kmp_user_lock_p ) )
3705               ( &__kmp_destroy_futex_lock );
3706 
3707              __kmp_is_user_lock_initialized_ =
3708                ( int ( * )( kmp_user_lock_p ) ) NULL;
3709 
3710              __kmp_get_user_lock_location_ =
3711                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3712 
3713              __kmp_set_user_lock_location_ =
3714                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3715 
3716              __kmp_get_user_lock_flags_ =
3717                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3718 
3719              __kmp_set_user_lock_flags_ =
3720                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3721         }
3722         break;
3723 
3724 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3725 
3726         case lk_ticket: {
3727             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3728             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3729 
3730             __kmp_get_user_lock_owner_ =
3731               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3732               ( &__kmp_get_ticket_lock_owner );
3733 
3734             if ( __kmp_env_consistency_check ) {
3735                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3736                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3737             }
3738             else {
3739                 KMP_BIND_USER_LOCK(ticket);
3740                 KMP_BIND_NESTED_USER_LOCK(ticket);
3741             }
3742 
3743             __kmp_destroy_user_lock_ =
3744               ( void ( * )( kmp_user_lock_p ) )
3745               ( &__kmp_destroy_ticket_lock );
3746 
3747              __kmp_is_user_lock_initialized_ =
3748                ( int ( * )( kmp_user_lock_p ) )
3749                ( &__kmp_is_ticket_lock_initialized );
3750 
3751              __kmp_get_user_lock_location_ =
3752                ( const ident_t * ( * )( kmp_user_lock_p ) )
3753                ( &__kmp_get_ticket_lock_location );
3754 
3755              __kmp_set_user_lock_location_ =
3756                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3757                ( &__kmp_set_ticket_lock_location );
3758 
3759              __kmp_get_user_lock_flags_ =
3760                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3761                ( &__kmp_get_ticket_lock_flags );
3762 
3763              __kmp_set_user_lock_flags_ =
3764                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3765                ( &__kmp_set_ticket_lock_flags );
3766         }
3767         break;
3768 
3769         case lk_queuing: {
3770             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3771             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3772 
3773             __kmp_get_user_lock_owner_ =
3774               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3775               ( &__kmp_get_queuing_lock_owner );
3776 
3777             if ( __kmp_env_consistency_check ) {
3778                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3779                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3780             }
3781             else {
3782                 KMP_BIND_USER_LOCK(queuing);
3783                 KMP_BIND_NESTED_USER_LOCK(queuing);
3784             }
3785 
3786             __kmp_destroy_user_lock_ =
3787               ( void ( * )( kmp_user_lock_p ) )
3788               ( &__kmp_destroy_queuing_lock );
3789 
3790              __kmp_is_user_lock_initialized_ =
3791                ( int ( * )( kmp_user_lock_p ) )
3792                ( &__kmp_is_queuing_lock_initialized );
3793 
3794              __kmp_get_user_lock_location_ =
3795                ( const ident_t * ( * )( kmp_user_lock_p ) )
3796                ( &__kmp_get_queuing_lock_location );
3797 
3798              __kmp_set_user_lock_location_ =
3799                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3800                ( &__kmp_set_queuing_lock_location );
3801 
3802              __kmp_get_user_lock_flags_ =
3803                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3804                ( &__kmp_get_queuing_lock_flags );
3805 
3806              __kmp_set_user_lock_flags_ =
3807                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3808                ( &__kmp_set_queuing_lock_flags );
3809         }
3810         break;
3811 
3812 #if KMP_USE_ADAPTIVE_LOCKS
3813         case lk_adaptive: {
3814             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3815             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3816 
3817             __kmp_get_user_lock_owner_ =
3818               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3819               ( &__kmp_get_queuing_lock_owner );
3820 
3821             if ( __kmp_env_consistency_check ) {
3822                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3823             }
3824             else {
3825                 KMP_BIND_USER_LOCK(adaptive);
3826             }
3827 
3828             __kmp_destroy_user_lock_ =
3829               ( void ( * )( kmp_user_lock_p ) )
3830               ( &__kmp_destroy_adaptive_lock );
3831 
3832             __kmp_is_user_lock_initialized_ =
3833               ( int ( * )( kmp_user_lock_p ) )
3834               ( &__kmp_is_queuing_lock_initialized );
3835 
3836             __kmp_get_user_lock_location_ =
3837               ( const ident_t * ( * )( kmp_user_lock_p ) )
3838               ( &__kmp_get_queuing_lock_location );
3839 
3840             __kmp_set_user_lock_location_ =
3841               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3842               ( &__kmp_set_queuing_lock_location );
3843 
3844             __kmp_get_user_lock_flags_ =
3845               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3846               ( &__kmp_get_queuing_lock_flags );
3847 
3848             __kmp_set_user_lock_flags_ =
3849               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3850               ( &__kmp_set_queuing_lock_flags );
3851 
3852         }
3853         break;
3854 #endif // KMP_USE_ADAPTIVE_LOCKS
3855 
3856         case lk_drdpa: {
3857             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3858             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3859 
3860             __kmp_get_user_lock_owner_ =
3861               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3862               ( &__kmp_get_drdpa_lock_owner );
3863 
3864             if ( __kmp_env_consistency_check ) {
3865                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3866                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3867             }
3868             else {
3869                 KMP_BIND_USER_LOCK(drdpa);
3870                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3871             }
3872 
3873             __kmp_destroy_user_lock_ =
3874               ( void ( * )( kmp_user_lock_p ) )
3875               ( &__kmp_destroy_drdpa_lock );
3876 
3877              __kmp_is_user_lock_initialized_ =
3878                ( int ( * )( kmp_user_lock_p ) )
3879                ( &__kmp_is_drdpa_lock_initialized );
3880 
3881              __kmp_get_user_lock_location_ =
3882                ( const ident_t * ( * )( kmp_user_lock_p ) )
3883                ( &__kmp_get_drdpa_lock_location );
3884 
3885              __kmp_set_user_lock_location_ =
3886                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3887                ( &__kmp_set_drdpa_lock_location );
3888 
3889              __kmp_get_user_lock_flags_ =
3890                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3891                ( &__kmp_get_drdpa_lock_flags );
3892 
3893              __kmp_set_user_lock_flags_ =
3894                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3895                ( &__kmp_set_drdpa_lock_flags );
3896         }
3897         break;
3898     }
3899 }
3900 
3901 
3902 // ----------------------------------------------------------------------------
3903 // User lock table & lock allocation
3904 
3905 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3906 kmp_user_lock_p __kmp_lock_pool = NULL;
3907 
3908 // Lock block-allocation support.
3909 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3910 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3911 
3912 static kmp_lock_index_t
3913 __kmp_lock_table_insert( kmp_user_lock_p lck )
3914 {
3915     // Assume that kmp_global_lock is held upon entry/exit.
3916     kmp_lock_index_t index;
3917     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3918         kmp_lock_index_t size;
3919         kmp_user_lock_p *table;
3920         // Reallocate lock table.
3921         if ( __kmp_user_lock_table.allocated == 0 ) {
3922             size = 1024;
3923         }
3924         else {
3925             size = __kmp_user_lock_table.allocated * 2;
3926         }
3927         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3928         KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3929         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3930             // We cannot free the previous table now, since it may be in use by other
3931             // threads. So save the pointer to the previous table in in the first element of the
3932             // new table. All the tables will be organized into a list, and could be freed when
3933             // library shutting down.
3934         __kmp_user_lock_table.table = table;
3935         __kmp_user_lock_table.allocated = size;
3936     }
3937     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3938     index = __kmp_user_lock_table.used;
3939     __kmp_user_lock_table.table[ index ] = lck;
3940     ++ __kmp_user_lock_table.used;
3941     return index;
3942 }
3943 
3944 static kmp_user_lock_p
3945 __kmp_lock_block_allocate()
3946 {
3947     // Assume that kmp_global_lock is held upon entry/exit.
3948     static int last_index = 0;
3949     if ( ( last_index >= __kmp_num_locks_in_block )
3950       || ( __kmp_lock_blocks == NULL ) ) {
3951         // Restart the index.
3952         last_index = 0;
3953         // Need to allocate a new block.
3954         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3955         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3956         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3957         // Set up the new block.
3958         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3959         new_block->next_block = __kmp_lock_blocks;
3960         new_block->locks = (void *)buffer;
3961         // Publish the new block.
3962         KMP_MB();
3963         __kmp_lock_blocks = new_block;
3964     }
3965     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3966       [ last_index * __kmp_user_lock_size ] ) );
3967     last_index++;
3968     return ret;
3969 }
3970 
3971 //
3972 // Get memory for a lock. It may be freshly allocated memory or reused memory
3973 // from lock pool.
3974 //
3975 kmp_user_lock_p
3976 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3977   kmp_lock_flags_t flags )
3978 {
3979     kmp_user_lock_p lck;
3980     kmp_lock_index_t index;
3981     KMP_DEBUG_ASSERT( user_lock );
3982 
3983     __kmp_acquire_lock( &__kmp_global_lock, gtid );
3984 
3985     if ( __kmp_lock_pool == NULL ) {
3986         // Lock pool is empty. Allocate new memory.
3987         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3988             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3989         }
3990         else {
3991             lck = __kmp_lock_block_allocate();
3992         }
3993 
3994         // Insert lock in the table so that it can be freed in __kmp_cleanup,
3995         // and debugger has info on all allocated locks.
3996         index = __kmp_lock_table_insert( lck );
3997     }
3998     else {
3999         // Pick up lock from pool.
4000         lck = __kmp_lock_pool;
4001         index = __kmp_lock_pool->pool.index;
4002         __kmp_lock_pool = __kmp_lock_pool->pool.next;
4003     }
4004 
4005     //
4006     // We could potentially differentiate between nested and regular locks
4007     // here, and do the lock table lookup for regular locks only.
4008     //
4009     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4010         * ( (kmp_lock_index_t *) user_lock ) = index;
4011     }
4012     else {
4013         * ( (kmp_user_lock_p *) user_lock ) = lck;
4014     }
4015 
4016     // mark the lock if it is critical section lock.
4017     __kmp_set_user_lock_flags( lck, flags );
4018 
4019     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4020 
4021     return lck;
4022 }
4023 
4024 // Put lock's memory to pool for reusing.
4025 void
4026 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4027 {
4028     KMP_DEBUG_ASSERT( user_lock != NULL );
4029     KMP_DEBUG_ASSERT( lck != NULL );
4030 
4031     __kmp_acquire_lock( & __kmp_global_lock, gtid );
4032 
4033     lck->pool.next = __kmp_lock_pool;
4034     __kmp_lock_pool = lck;
4035     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4036         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4037         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4038         lck->pool.index = index;
4039     }
4040 
4041     __kmp_release_lock( & __kmp_global_lock, gtid );
4042 }
4043 
4044 kmp_user_lock_p
4045 __kmp_lookup_user_lock( void **user_lock, char const *func )
4046 {
4047     kmp_user_lock_p lck = NULL;
4048 
4049     if ( __kmp_env_consistency_check ) {
4050         if ( user_lock == NULL ) {
4051             KMP_FATAL( LockIsUninitialized, func );
4052         }
4053     }
4054 
4055     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4056         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4057         if ( __kmp_env_consistency_check ) {
4058             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4059                 KMP_FATAL( LockIsUninitialized, func );
4060             }
4061         }
4062         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4063         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4064         lck = __kmp_user_lock_table.table[index];
4065     }
4066     else {
4067         lck = *( (kmp_user_lock_p *)user_lock );
4068     }
4069 
4070     if ( __kmp_env_consistency_check ) {
4071         if ( lck == NULL ) {
4072             KMP_FATAL( LockIsUninitialized, func );
4073         }
4074     }
4075 
4076     return lck;
4077 }
4078 
4079 void
4080 __kmp_cleanup_user_locks( void )
4081 {
4082     //
4083     // Reset lock pool. Do not worry about lock in the pool -- we will free
4084     // them when iterating through lock table (it includes all the locks,
4085     // dead or alive).
4086     //
4087     __kmp_lock_pool = NULL;
4088 
4089 #define IS_CRITICAL(lck) \
4090         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4091         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4092 
4093     //
4094     // Loop through lock table, free all locks.
4095     //
4096     // Do not free item [0], it is reserved for lock tables list.
4097     //
4098     // FIXME - we are iterating through a list of (pointers to) objects of
4099     // type union kmp_user_lock, but we have no way of knowing whether the
4100     // base type is currently "pool" or whatever the global user lock type
4101     // is.
4102     //
4103     // We are relying on the fact that for all of the user lock types
4104     // (except "tas"), the first field in the lock struct is the "initialized"
4105     // field, which is set to the address of the lock object itself when
4106     // the lock is initialized.  When the union is of type "pool", the
4107     // first field is a pointer to the next object in the free list, which
4108     // will not be the same address as the object itself.
4109     //
4110     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4111     // will fail for "pool" objects on the free list.  This must happen as
4112     // the "location" field of real user locks overlaps the "index" field
4113     // of "pool" objects.
4114     //
4115     // It would be better to run through the free list, and remove all "pool"
4116     // objects from the lock table before executing this loop.  However,
4117     // "pool" objects do not always have their index field set (only on
4118     // lin_32e), and I don't want to search the lock table for the address
4119     // of every "pool" object on the free list.
4120     //
4121     while ( __kmp_user_lock_table.used > 1 ) {
4122         const ident *loc;
4123 
4124         //
4125         // reduce __kmp_user_lock_table.used before freeing the lock,
4126         // so that state of locks is consistent
4127         //
4128         kmp_user_lock_p lck = __kmp_user_lock_table.table[
4129           --__kmp_user_lock_table.used ];
4130 
4131         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4132           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4133             //
4134             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4135             // initialized AND it is NOT a critical section (user is not
4136             // responsible for destroying criticals) AND we know source
4137             // location to report.
4138             //
4139             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4140               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4141               ( loc->psource != NULL ) ) {
4142                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4143                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4144                 __kmp_str_loc_free( &str_loc);
4145             }
4146 
4147 #ifdef KMP_DEBUG
4148             if ( IS_CRITICAL( lck ) ) {
4149                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4150             }
4151             else {
4152                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4153             }
4154 #endif // KMP_DEBUG
4155 
4156             //
4157             // Cleanup internal lock dynamic resources
4158             // (for drdpa locks particularly).
4159             //
4160             __kmp_destroy_user_lock( lck );
4161         }
4162 
4163         //
4164         // Free the lock if block allocation of locks is not used.
4165         //
4166         if ( __kmp_lock_blocks == NULL ) {
4167             __kmp_free( lck );
4168         }
4169     }
4170 
4171 #undef IS_CRITICAL
4172 
4173     //
4174     // delete lock table(s).
4175     //
4176     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4177     __kmp_user_lock_table.table = NULL;
4178     __kmp_user_lock_table.allocated = 0;
4179 
4180     while ( table_ptr != NULL ) {
4181         //
4182         // In the first element we saved the pointer to the previous
4183         // (smaller) lock table.
4184         //
4185         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4186         __kmp_free( table_ptr );
4187         table_ptr = next;
4188     }
4189 
4190     //
4191     // Free buffers allocated for blocks of locks.
4192     //
4193     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4194     __kmp_lock_blocks = NULL;
4195 
4196     while ( block_ptr != NULL ) {
4197         kmp_block_of_locks_t *next = block_ptr->next_block;
4198         __kmp_free( block_ptr->locks );
4199         //
4200         // *block_ptr itself was allocated at the end of the locks vector.
4201         //
4202 	block_ptr = next;
4203     }
4204 
4205     TCW_4(__kmp_init_user_locks, FALSE);
4206 }
4207 
4208 #endif // KMP_USE_DYNAMIC_LOCK
4209