1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 
18 #include "kmp.h"
19 #include "kmp_itt.h"
20 #include "kmp_i18n.h"
21 #include "kmp_lock.h"
22 #include "kmp_io.h"
23 
24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
25 # include <unistd.h>
26 # include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on different
28 // Linux* OS distributions that either require that you include (or break when you try to include)
29 // <pci/types.h>.
30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31 // we just define the constants here and don't include <futex.h>
32 # ifndef FUTEX_WAIT
33 #  define FUTEX_WAIT    0
34 # endif
35 # ifndef FUTEX_WAKE
36 #  define FUTEX_WAKE    1
37 # endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void
44 __kmp_validate_locks( void )
45 {
46     int i;
47     kmp_uint32  x, y;
48 
49     /* Check to make sure unsigned arithmetic does wraps properly */
50     x = ~((kmp_uint32) 0) - 2;
51     y = x - 2;
52 
53     for (i = 0; i < 8; ++i, ++x, ++y) {
54         kmp_uint32 z = (x - y);
55         KMP_ASSERT( z == 2 );
56     }
57 
58     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59 }
60 
61 
62 /* ------------------------------------------------------------------------ */
63 /* test and set locks */
64 
65 //
66 // For the non-nested locks, we can only assume that the first 4 bytes were
67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70 //
71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73 //
74 
75 static kmp_int32
76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77 {
78     return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
79 }
80 
81 static inline bool
82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83 {
84     return lck->lk.depth_locked != -1;
85 }
86 
87 __forceinline static int
88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89 {
90     KMP_MB();
91 
92 #ifdef USE_LOCK_PROFILE
93     kmp_uint32 curr = TCR_4( lck->lk.poll );
94     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96     /* else __kmp_printf( "." );*/
97 #endif /* USE_LOCK_PROFILE */
98 
99     if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
100       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
101         KMP_FSYNC_ACQUIRED(lck);
102         return KMP_LOCK_ACQUIRED_FIRST;
103     }
104 
105     kmp_uint32 spins;
106     KMP_FSYNC_PREPARE( lck );
107     KMP_INIT_YIELD( spins );
108     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109       __kmp_xproc ) ) {
110         KMP_YIELD( TRUE );
111     }
112     else {
113         KMP_YIELD_SPIN( spins );
114     }
115 
116     while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
117       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
118         //
119         // FIXME - use exponential backoff here
120         //
121         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122           __kmp_xproc ) ) {
123             KMP_YIELD( TRUE );
124         }
125         else {
126             KMP_YIELD_SPIN( spins );
127         }
128     }
129     KMP_FSYNC_ACQUIRED( lck );
130     return KMP_LOCK_ACQUIRED_FIRST;
131 }
132 
133 int
134 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
135 {
136     return __kmp_acquire_tas_lock_timed_template( lck, gtid );
137 }
138 
139 static int
140 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
141 {
142     char const * const func = "omp_set_lock";
143     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
144       && __kmp_is_tas_lock_nestable( lck ) ) {
145         KMP_FATAL( LockNestableUsedAsSimple, func );
146     }
147     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
148         KMP_FATAL( LockIsAlreadyOwned, func );
149     }
150     return __kmp_acquire_tas_lock( lck, gtid );
151 }
152 
153 int
154 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
155 {
156     if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
157       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
158         KMP_FSYNC_ACQUIRED( lck );
159         return TRUE;
160     }
161     return FALSE;
162 }
163 
164 static int
165 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
166 {
167     char const * const func = "omp_test_lock";
168     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169       && __kmp_is_tas_lock_nestable( lck ) ) {
170         KMP_FATAL( LockNestableUsedAsSimple, func );
171     }
172     return __kmp_test_tas_lock( lck, gtid );
173 }
174 
175 int
176 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
177 {
178     KMP_MB();       /* Flush all pending memory write invalidates.  */
179 
180     KMP_FSYNC_RELEASING(lck);
181     KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
182     KMP_MB();       /* Flush all pending memory write invalidates.  */
183 
184     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185       __kmp_xproc ) );
186     return KMP_LOCK_RELEASED;
187 }
188 
189 static int
190 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
191 {
192     char const * const func = "omp_unset_lock";
193     KMP_MB();  /* in case another processor initialized lock */
194     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
195       && __kmp_is_tas_lock_nestable( lck ) ) {
196         KMP_FATAL( LockNestableUsedAsSimple, func );
197     }
198     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
199         KMP_FATAL( LockUnsettingFree, func );
200     }
201     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
202       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
203         KMP_FATAL( LockUnsettingSetByAnother, func );
204     }
205     return __kmp_release_tas_lock( lck, gtid );
206 }
207 
208 void
209 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
210 {
211     TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
212 }
213 
214 static void
215 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
216 {
217     __kmp_init_tas_lock( lck );
218 }
219 
220 void
221 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
222 {
223     lck->lk.poll = 0;
224 }
225 
226 static void
227 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
228 {
229     char const * const func = "omp_destroy_lock";
230     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
231       && __kmp_is_tas_lock_nestable( lck ) ) {
232         KMP_FATAL( LockNestableUsedAsSimple, func );
233     }
234     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
235         KMP_FATAL( LockStillOwned, func );
236     }
237     __kmp_destroy_tas_lock( lck );
238 }
239 
240 
241 //
242 // nested test and set locks
243 //
244 
245 int
246 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
247 {
248     KMP_DEBUG_ASSERT( gtid >= 0 );
249 
250     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
251         lck->lk.depth_locked += 1;
252         return KMP_LOCK_ACQUIRED_NEXT;
253     }
254     else {
255         __kmp_acquire_tas_lock_timed_template( lck, gtid );
256         lck->lk.depth_locked = 1;
257         return KMP_LOCK_ACQUIRED_FIRST;
258     }
259 }
260 
261 static int
262 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
263 {
264     char const * const func = "omp_set_nest_lock";
265     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
266         KMP_FATAL( LockSimpleUsedAsNestable, func );
267     }
268     return __kmp_acquire_nested_tas_lock( lck, gtid );
269 }
270 
271 int
272 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
273 {
274     int retval;
275 
276     KMP_DEBUG_ASSERT( gtid >= 0 );
277 
278     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
279         retval = ++lck->lk.depth_locked;
280     }
281     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
282         retval = 0;
283     }
284     else {
285         KMP_MB();
286         retval = lck->lk.depth_locked = 1;
287     }
288     return retval;
289 }
290 
291 static int
292 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293 {
294     char const * const func = "omp_test_nest_lock";
295     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
296         KMP_FATAL( LockSimpleUsedAsNestable, func );
297     }
298     return __kmp_test_nested_tas_lock( lck, gtid );
299 }
300 
301 int
302 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
303 {
304     KMP_DEBUG_ASSERT( gtid >= 0 );
305 
306     KMP_MB();
307     if ( --(lck->lk.depth_locked) == 0 ) {
308         __kmp_release_tas_lock( lck, gtid );
309         return KMP_LOCK_RELEASED;
310     }
311     return KMP_LOCK_STILL_HELD;
312 }
313 
314 static int
315 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
316 {
317     char const * const func = "omp_unset_nest_lock";
318     KMP_MB();  /* in case another processor initialized lock */
319     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
320         KMP_FATAL( LockSimpleUsedAsNestable, func );
321     }
322     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
323         KMP_FATAL( LockUnsettingFree, func );
324     }
325     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
326         KMP_FATAL( LockUnsettingSetByAnother, func );
327     }
328     return __kmp_release_nested_tas_lock( lck, gtid );
329 }
330 
331 void
332 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
333 {
334     __kmp_init_tas_lock( lck );
335     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
336 }
337 
338 static void
339 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
340 {
341     __kmp_init_nested_tas_lock( lck );
342 }
343 
344 void
345 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
346 {
347     __kmp_destroy_tas_lock( lck );
348     lck->lk.depth_locked = 0;
349 }
350 
351 static void
352 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
353 {
354     char const * const func = "omp_destroy_nest_lock";
355     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
356         KMP_FATAL( LockSimpleUsedAsNestable, func );
357     }
358     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
359         KMP_FATAL( LockStillOwned, func );
360     }
361     __kmp_destroy_nested_tas_lock( lck );
362 }
363 
364 
365 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
366 
367 /* ------------------------------------------------------------------------ */
368 /* futex locks */
369 
370 // futex locks are really just test and set locks, with a different method
371 // of handling contention.  They take the same amount of space as test and
372 // set locks, and are allocated the same way (i.e. use the area allocated by
373 // the compiler for non-nested locks / allocate nested locks on the heap).
374 
375 static kmp_int32
376 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
377 {
378     return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
379 }
380 
381 static inline bool
382 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
383 {
384     return lck->lk.depth_locked != -1;
385 }
386 
387 __forceinline static int
388 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
389 {
390     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
391 
392     KMP_MB();
393 
394 #ifdef USE_LOCK_PROFILE
395     kmp_uint32 curr = TCR_4( lck->lk.poll );
396     if ( ( curr != 0 ) && ( curr != gtid_code ) )
397         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
398     /* else __kmp_printf( "." );*/
399 #endif /* USE_LOCK_PROFILE */
400 
401     KMP_FSYNC_PREPARE( lck );
402     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
403       lck, lck->lk.poll, gtid ) );
404 
405     kmp_int32 poll_val;
406 
407     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
408              DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
409 
410         kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
411         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
412            lck, gtid, poll_val, cond ) );
413 
414         //
415         // NOTE: if you try to use the following condition for this branch
416         //
417         // if ( poll_val & 1 == 0 )
418         //
419         // Then the 12.0 compiler has a bug where the following block will
420         // always be skipped, regardless of the value of the LSB of poll_val.
421         //
422         if ( ! cond ) {
423             //
424             // Try to set the lsb in the poll to indicate to the owner
425             // thread that they need to wake this thread up.
426             //
427             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
428                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
429                   lck, lck->lk.poll, gtid ) );
430                 continue;
431             }
432             poll_val |= DYNA_LOCK_BUSY(1, futex);
433 
434             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
435               lck, lck->lk.poll, gtid ) );
436         }
437 
438         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
439            lck, gtid, poll_val ) );
440 
441         kmp_int32 rc;
442         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
443           poll_val, NULL, NULL, 0 ) ) != 0 ) {
444             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
445                lck, gtid, poll_val, rc, errno ) );
446             continue;
447         }
448 
449         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
450            lck, gtid, poll_val ) );
451         //
452         // This thread has now done a successful futex wait call and was
453         // entered on the OS futex queue.  We must now perform a futex
454         // wake call when releasing the lock, as we have no idea how many
455         // other threads are in the queue.
456         //
457         gtid_code |= 1;
458     }
459 
460     KMP_FSYNC_ACQUIRED( lck );
461     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
462       lck, lck->lk.poll, gtid ) );
463     return KMP_LOCK_ACQUIRED_FIRST;
464 }
465 
466 int
467 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
468 {
469    return __kmp_acquire_futex_lock_timed_template( lck, gtid );
470 }
471 
472 static int
473 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
474 {
475     char const * const func = "omp_set_lock";
476     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
477       && __kmp_is_futex_lock_nestable( lck ) ) {
478         KMP_FATAL( LockNestableUsedAsSimple, func );
479     }
480     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
481         KMP_FATAL( LockIsAlreadyOwned, func );
482     }
483     return __kmp_acquire_futex_lock( lck, gtid );
484 }
485 
486 int
487 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
488 {
489     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
490         KMP_FSYNC_ACQUIRED( lck );
491         return TRUE;
492     }
493     return FALSE;
494 }
495 
496 static int
497 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
498 {
499     char const * const func = "omp_test_lock";
500     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
501       && __kmp_is_futex_lock_nestable( lck ) ) {
502         KMP_FATAL( LockNestableUsedAsSimple, func );
503     }
504     return __kmp_test_futex_lock( lck, gtid );
505 }
506 
507 int
508 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
509 {
510     KMP_MB();       /* Flush all pending memory write invalidates.  */
511 
512     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
513       lck, lck->lk.poll, gtid ) );
514 
515     KMP_FSYNC_RELEASING(lck);
516 
517     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
518 
519     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
520        lck, gtid, poll_val ) );
521 
522     if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
523         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
524            lck, gtid ) );
525         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
526     }
527 
528     KMP_MB();       /* Flush all pending memory write invalidates.  */
529 
530     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
531       lck, lck->lk.poll, gtid ) );
532 
533     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
534       __kmp_xproc ) );
535     return KMP_LOCK_RELEASED;
536 }
537 
538 static int
539 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
540 {
541     char const * const func = "omp_unset_lock";
542     KMP_MB();  /* in case another processor initialized lock */
543     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
544       && __kmp_is_futex_lock_nestable( lck ) ) {
545         KMP_FATAL( LockNestableUsedAsSimple, func );
546     }
547     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
548         KMP_FATAL( LockUnsettingFree, func );
549     }
550     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
551       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
552         KMP_FATAL( LockUnsettingSetByAnother, func );
553     }
554     return __kmp_release_futex_lock( lck, gtid );
555 }
556 
557 void
558 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
559 {
560     TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
561 }
562 
563 static void
564 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
565 {
566     __kmp_init_futex_lock( lck );
567 }
568 
569 void
570 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
571 {
572     lck->lk.poll = 0;
573 }
574 
575 static void
576 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
577 {
578     char const * const func = "omp_destroy_lock";
579     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
580       && __kmp_is_futex_lock_nestable( lck ) ) {
581         KMP_FATAL( LockNestableUsedAsSimple, func );
582     }
583     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
584         KMP_FATAL( LockStillOwned, func );
585     }
586     __kmp_destroy_futex_lock( lck );
587 }
588 
589 
590 //
591 // nested futex locks
592 //
593 
594 int
595 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
596 {
597     KMP_DEBUG_ASSERT( gtid >= 0 );
598 
599     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
600         lck->lk.depth_locked += 1;
601         return KMP_LOCK_ACQUIRED_NEXT;
602     }
603     else {
604         __kmp_acquire_futex_lock_timed_template( lck, gtid );
605         lck->lk.depth_locked = 1;
606         return KMP_LOCK_ACQUIRED_FIRST;
607     }
608 }
609 
610 static int
611 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
612 {
613     char const * const func = "omp_set_nest_lock";
614     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
615         KMP_FATAL( LockSimpleUsedAsNestable, func );
616     }
617     return __kmp_acquire_nested_futex_lock( lck, gtid );
618 }
619 
620 int
621 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
622 {
623     int retval;
624 
625     KMP_DEBUG_ASSERT( gtid >= 0 );
626 
627     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
628         retval = ++lck->lk.depth_locked;
629     }
630     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
631         retval = 0;
632     }
633     else {
634         KMP_MB();
635         retval = lck->lk.depth_locked = 1;
636     }
637     return retval;
638 }
639 
640 static int
641 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
642 {
643     char const * const func = "omp_test_nest_lock";
644     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
645         KMP_FATAL( LockSimpleUsedAsNestable, func );
646     }
647     return __kmp_test_nested_futex_lock( lck, gtid );
648 }
649 
650 int
651 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
652 {
653     KMP_DEBUG_ASSERT( gtid >= 0 );
654 
655     KMP_MB();
656     if ( --(lck->lk.depth_locked) == 0 ) {
657         __kmp_release_futex_lock( lck, gtid );
658         return KMP_LOCK_RELEASED;
659     }
660     return KMP_LOCK_STILL_HELD;
661 }
662 
663 static int
664 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
665 {
666     char const * const func = "omp_unset_nest_lock";
667     KMP_MB();  /* in case another processor initialized lock */
668     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
669         KMP_FATAL( LockSimpleUsedAsNestable, func );
670     }
671     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
672         KMP_FATAL( LockUnsettingFree, func );
673     }
674     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
675         KMP_FATAL( LockUnsettingSetByAnother, func );
676     }
677     return __kmp_release_nested_futex_lock( lck, gtid );
678 }
679 
680 void
681 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
682 {
683     __kmp_init_futex_lock( lck );
684     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
685 }
686 
687 static void
688 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
689 {
690     __kmp_init_nested_futex_lock( lck );
691 }
692 
693 void
694 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
695 {
696     __kmp_destroy_futex_lock( lck );
697     lck->lk.depth_locked = 0;
698 }
699 
700 static void
701 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
702 {
703     char const * const func = "omp_destroy_nest_lock";
704     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
705         KMP_FATAL( LockSimpleUsedAsNestable, func );
706     }
707     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
708         KMP_FATAL( LockStillOwned, func );
709     }
710     __kmp_destroy_nested_futex_lock( lck );
711 }
712 
713 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
714 
715 
716 /* ------------------------------------------------------------------------ */
717 /* ticket (bakery) locks */
718 
719 static kmp_int32
720 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
721 {
722     return TCR_4( lck->lk.owner_id ) - 1;
723 }
724 
725 static inline bool
726 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
727 {
728     return lck->lk.depth_locked != -1;
729 }
730 
731 static kmp_uint32
732 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
733 {
734     register kmp_uint32 pause;
735 
736     if (value == checker) {
737         return TRUE;
738     }
739     for (pause = checker - value; pause != 0; --pause);
740     return FALSE;
741 }
742 
743 __forceinline static int
744 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
745 {
746     kmp_uint32 my_ticket;
747     KMP_MB();
748 
749     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
750 
751 #ifdef USE_LOCK_PROFILE
752     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
753         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
754     /* else __kmp_printf( "." );*/
755 #endif /* USE_LOCK_PROFILE */
756 
757     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
758         KMP_FSYNC_ACQUIRED(lck);
759         return KMP_LOCK_ACQUIRED_FIRST;
760     }
761     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
762     KMP_FSYNC_ACQUIRED(lck);
763     return KMP_LOCK_ACQUIRED_FIRST;
764 }
765 
766 int
767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768 {
769     return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770 }
771 
772 static int
773 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
774 {
775     char const * const func = "omp_set_lock";
776     if ( lck->lk.initialized != lck ) {
777         KMP_FATAL( LockIsUninitialized, func );
778     }
779     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
780         KMP_FATAL( LockNestableUsedAsSimple, func );
781     }
782     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
783         KMP_FATAL( LockIsAlreadyOwned, func );
784     }
785 
786     __kmp_acquire_ticket_lock( lck, gtid );
787 
788     lck->lk.owner_id = gtid + 1;
789     return KMP_LOCK_ACQUIRED_FIRST;
790 }
791 
792 int
793 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
794 {
795     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
796     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
797         kmp_uint32 next_ticket = my_ticket + 1;
798         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
799           my_ticket, next_ticket ) ) {
800             KMP_FSYNC_ACQUIRED( lck );
801             return TRUE;
802         }
803     }
804     return FALSE;
805 }
806 
807 static int
808 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
809 {
810     char const * const func = "omp_test_lock";
811     if ( lck->lk.initialized != lck ) {
812         KMP_FATAL( LockIsUninitialized, func );
813     }
814     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
815         KMP_FATAL( LockNestableUsedAsSimple, func );
816     }
817 
818     int retval = __kmp_test_ticket_lock( lck, gtid );
819 
820     if ( retval ) {
821         lck->lk.owner_id = gtid + 1;
822     }
823     return retval;
824 }
825 
826 int
827 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
828 {
829     kmp_uint32  distance;
830 
831     KMP_MB();       /* Flush all pending memory write invalidates.  */
832 
833     KMP_FSYNC_RELEASING(lck);
834     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
835 
836     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
837 
838     KMP_MB();       /* Flush all pending memory write invalidates.  */
839 
840     KMP_YIELD( distance
841       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
842     return KMP_LOCK_RELEASED;
843 }
844 
845 static int
846 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
847 {
848     char const * const func = "omp_unset_lock";
849     KMP_MB();  /* in case another processor initialized lock */
850     if ( lck->lk.initialized != lck ) {
851         KMP_FATAL( LockIsUninitialized, func );
852     }
853     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
854         KMP_FATAL( LockNestableUsedAsSimple, func );
855     }
856     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
857         KMP_FATAL( LockUnsettingFree, func );
858     }
859     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
860       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
861         KMP_FATAL( LockUnsettingSetByAnother, func );
862     }
863     lck->lk.owner_id = 0;
864     return __kmp_release_ticket_lock( lck, gtid );
865 }
866 
867 void
868 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
869 {
870     lck->lk.location = NULL;
871     TCW_4( lck->lk.next_ticket, 0 );
872     TCW_4( lck->lk.now_serving, 0 );
873     lck->lk.owner_id = 0;      // no thread owns the lock.
874     lck->lk.depth_locked = -1; // -1 => not a nested lock.
875     lck->lk.initialized = (kmp_ticket_lock *)lck;
876 }
877 
878 static void
879 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
880 {
881     __kmp_init_ticket_lock( lck );
882 }
883 
884 void
885 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
886 {
887     lck->lk.initialized = NULL;
888     lck->lk.location    = NULL;
889     lck->lk.next_ticket = 0;
890     lck->lk.now_serving = 0;
891     lck->lk.owner_id = 0;
892     lck->lk.depth_locked = -1;
893 }
894 
895 static void
896 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
897 {
898     char const * const func = "omp_destroy_lock";
899     if ( lck->lk.initialized != lck ) {
900         KMP_FATAL( LockIsUninitialized, func );
901     }
902     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903         KMP_FATAL( LockNestableUsedAsSimple, func );
904     }
905     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906         KMP_FATAL( LockStillOwned, func );
907     }
908     __kmp_destroy_ticket_lock( lck );
909 }
910 
911 
912 //
913 // nested ticket locks
914 //
915 
916 int
917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918 {
919     KMP_DEBUG_ASSERT( gtid >= 0 );
920 
921     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922         lck->lk.depth_locked += 1;
923         return KMP_LOCK_ACQUIRED_NEXT;
924     }
925     else {
926         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927         KMP_MB();
928         lck->lk.depth_locked = 1;
929         KMP_MB();
930         lck->lk.owner_id = gtid + 1;
931         return KMP_LOCK_ACQUIRED_FIRST;
932     }
933 }
934 
935 static int
936 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
937 {
938     char const * const func = "omp_set_nest_lock";
939     if ( lck->lk.initialized != lck ) {
940         KMP_FATAL( LockIsUninitialized, func );
941     }
942     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
943         KMP_FATAL( LockSimpleUsedAsNestable, func );
944     }
945     return __kmp_acquire_nested_ticket_lock( lck, gtid );
946 }
947 
948 int
949 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
950 {
951     int retval;
952 
953     KMP_DEBUG_ASSERT( gtid >= 0 );
954 
955     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
956         retval = ++lck->lk.depth_locked;
957     }
958     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
959         retval = 0;
960     }
961     else {
962         KMP_MB();
963         retval = lck->lk.depth_locked = 1;
964         KMP_MB();
965         lck->lk.owner_id = gtid + 1;
966     }
967     return retval;
968 }
969 
970 static int
971 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
972   kmp_int32 gtid )
973 {
974     char const * const func = "omp_test_nest_lock";
975     if ( lck->lk.initialized != lck ) {
976         KMP_FATAL( LockIsUninitialized, func );
977     }
978     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
979         KMP_FATAL( LockSimpleUsedAsNestable, func );
980     }
981     return __kmp_test_nested_ticket_lock( lck, gtid );
982 }
983 
984 int
985 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
986 {
987     KMP_DEBUG_ASSERT( gtid >= 0 );
988 
989     KMP_MB();
990     if ( --(lck->lk.depth_locked) == 0 ) {
991         KMP_MB();
992         lck->lk.owner_id = 0;
993         __kmp_release_ticket_lock( lck, gtid );
994         return KMP_LOCK_RELEASED;
995     }
996     return KMP_LOCK_STILL_HELD;
997 }
998 
999 static int
1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1001 {
1002     char const * const func = "omp_unset_nest_lock";
1003     KMP_MB();  /* in case another processor initialized lock */
1004     if ( lck->lk.initialized != lck ) {
1005         KMP_FATAL( LockIsUninitialized, func );
1006     }
1007     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1008         KMP_FATAL( LockSimpleUsedAsNestable, func );
1009     }
1010     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1011         KMP_FATAL( LockUnsettingFree, func );
1012     }
1013     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1014         KMP_FATAL( LockUnsettingSetByAnother, func );
1015     }
1016     return __kmp_release_nested_ticket_lock( lck, gtid );
1017 }
1018 
1019 void
1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1021 {
1022     __kmp_init_ticket_lock( lck );
1023     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1024 }
1025 
1026 static void
1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1028 {
1029     __kmp_init_nested_ticket_lock( lck );
1030 }
1031 
1032 void
1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1034 {
1035     __kmp_destroy_ticket_lock( lck );
1036     lck->lk.depth_locked = 0;
1037 }
1038 
1039 static void
1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1041 {
1042     char const * const func = "omp_destroy_nest_lock";
1043     if ( lck->lk.initialized != lck ) {
1044         KMP_FATAL( LockIsUninitialized, func );
1045     }
1046     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1047         KMP_FATAL( LockSimpleUsedAsNestable, func );
1048     }
1049     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1050         KMP_FATAL( LockStillOwned, func );
1051     }
1052     __kmp_destroy_nested_ticket_lock( lck );
1053 }
1054 
1055 
1056 //
1057 // access functions to fields which don't exist for all lock kinds.
1058 //
1059 
1060 static int
1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1062 {
1063     return lck == lck->lk.initialized;
1064 }
1065 
1066 static const ident_t *
1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1068 {
1069     return lck->lk.location;
1070 }
1071 
1072 static void
1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1074 {
1075     lck->lk.location = loc;
1076 }
1077 
1078 static kmp_lock_flags_t
1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1080 {
1081     return lck->lk.flags;
1082 }
1083 
1084 static void
1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1086 {
1087     lck->lk.flags = flags;
1088 }
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* queuing locks */
1092 
1093 /*
1094  * First the states
1095  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1096  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1097  *                h, h  means lock is held or about to transition, 1 element on queue
1098  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1099  *
1100  * Now the transitions
1101  *    Acquire(0,0)  = -1 ,0
1102  *    Release(0,0)  = Error
1103  *    Acquire(-1,0) =  h ,h    h > 0
1104  *    Release(-1,0) =  0 ,0
1105  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1106  *    Release(h,h)  = -1 ,0    h > 0
1107  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1108  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1109  *
1110  * And pictorially
1111  *
1112  *
1113  *          +-----+
1114  *          | 0, 0|------- release -------> Error
1115  *          +-----+
1116  *            |  ^
1117  *     acquire|  |release
1118  *            |  |
1119  *            |  |
1120  *            v  |
1121  *          +-----+
1122  *          |-1, 0|
1123  *          +-----+
1124  *            |  ^
1125  *     acquire|  |release
1126  *            |  |
1127  *            |  |
1128  *            v  |
1129  *          +-----+
1130  *          | h, h|
1131  *          +-----+
1132  *            |  ^
1133  *     acquire|  |release
1134  *            |  |
1135  *            |  |
1136  *            v  |
1137  *          +-----+
1138  *          | h, t|----- acquire, release loopback ---+
1139  *          +-----+                                   |
1140  *               ^                                    |
1141  *               |                                    |
1142  *               +------------------------------------+
1143  *
1144  */
1145 
1146 #ifdef DEBUG_QUEUING_LOCKS
1147 
1148 /* Stuff for circular trace buffer */
1149 #define TRACE_BUF_ELE	1024
1150 static char traces[TRACE_BUF_ELE][128] = { 0 }
1151 static int tc = 0;
1152 #define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y );
1153 #define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1154 #define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1155 
1156 static void
1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1158   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1159 {
1160     kmp_int32 t, i;
1161 
1162     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1163 
1164     i = tc % TRACE_BUF_ELE;
1165     __kmp_printf_no_lock( "%s\n", traces[i] );
1166     i = (i+1) % TRACE_BUF_ELE;
1167     while ( i != (tc % TRACE_BUF_ELE) ) {
1168         __kmp_printf_no_lock( "%s", traces[i] );
1169         i = (i+1) % TRACE_BUF_ELE;
1170     }
1171     __kmp_printf_no_lock( "\n" );
1172 
1173     __kmp_printf_no_lock(
1174              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1175              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1176              head_id, tail_id );
1177 
1178     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1179 
1180     if ( lck->lk.head_id >= 1 ) {
1181         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1182         while (t > 0) {
1183             __kmp_printf_no_lock( "-> %d ", t );
1184             t = __kmp_threads[t-1]->th.th_next_waiting;
1185         }
1186     }
1187     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1188     __kmp_printf_no_lock( "\n\n" );
1189 }
1190 
1191 #endif /* DEBUG_QUEUING_LOCKS */
1192 
1193 static kmp_int32
1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1195 {
1196     return TCR_4( lck->lk.owner_id ) - 1;
1197 }
1198 
1199 static inline bool
1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1201 {
1202     return lck->lk.depth_locked != -1;
1203 }
1204 
1205 /* Acquire a lock using a the queuing lock implementation */
1206 template <bool takeTime>
1207 /* [TLW] The unused template above is left behind because of what BEB believes is a
1208    potential compiler problem with __forceinline. */
1209 __forceinline static int
1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1211   kmp_int32 gtid )
1212 {
1213     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1214     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1215     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1216     volatile kmp_uint32 *spin_here_p;
1217     kmp_int32 need_mf = 1;
1218 
1219 #if OMPT_SUPPORT
1220     ompt_state_t prev_state = ompt_state_undefined;
1221 #endif
1222 
1223     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1224 
1225     KMP_FSYNC_PREPARE( lck );
1226     KMP_DEBUG_ASSERT( this_thr != NULL );
1227     spin_here_p = & this_thr->th.th_spin_here;
1228 
1229 #ifdef DEBUG_QUEUING_LOCKS
1230     TRACE_LOCK( gtid+1, "acq ent" );
1231     if ( *spin_here_p )
1232         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1233     if ( this_thr->th.th_next_waiting != 0 )
1234         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1235 #endif
1236     KMP_DEBUG_ASSERT( !*spin_here_p );
1237     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1238 
1239 
1240     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1241        that may follow, not just in execution order, but also in visibility order.  This way,
1242        when a releasing thread observes the changes to the queue by this thread, it can
1243        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1244        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1245        to FALSE before this thread sets it to TRUE, this thread will hang.
1246     */
1247     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1248 
1249     while( 1 ) {
1250         kmp_int32 enqueued;
1251         kmp_int32 head;
1252         kmp_int32 tail;
1253 
1254         head = *head_id_p;
1255 
1256         switch ( head ) {
1257 
1258             case -1:
1259             {
1260 #ifdef DEBUG_QUEUING_LOCKS
1261                 tail = *tail_id_p;
1262                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1263 #endif
1264                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1265                            this assignment prevents us from entering the if ( t > 0 )
1266                            condition in the enqueued case below, which is not necessary for
1267                            this state transition */
1268 
1269                 need_mf = 0;
1270                 /* try (-1,0)->(tid,tid) */
1271                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1272                   KMP_PACK_64( -1, 0 ),
1273                   KMP_PACK_64( gtid+1, gtid+1 ) );
1274 #ifdef DEBUG_QUEUING_LOCKS
1275                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1276 #endif
1277             }
1278             break;
1279 
1280             default:
1281             {
1282                 tail = *tail_id_p;
1283                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287 #endif
1288 
1289                 if ( tail == 0 ) {
1290                     enqueued = FALSE;
1291                 }
1292                 else {
1293                     need_mf = 0;
1294                     /* try (h,t) or (h,h)->(h,tid) */
1295                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1296 
1297 #ifdef DEBUG_QUEUING_LOCKS
1298                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1299 #endif
1300                 }
1301             }
1302             break;
1303 
1304             case 0: /* empty queue */
1305             {
1306                 kmp_int32 grabbed_lock;
1307 
1308 #ifdef DEBUG_QUEUING_LOCKS
1309                 tail = *tail_id_p;
1310                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311 #endif
1312                 /* try (0,0)->(-1,0) */
1313 
1314                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1315                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1316 
1317                 if ( grabbed_lock ) {
1318 
1319                     *spin_here_p = FALSE;
1320 
1321                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1322                               lck, gtid ));
1323 #ifdef DEBUG_QUEUING_LOCKS
1324                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1325 #endif
1326 
1327 #if OMPT_SUPPORT
1328                     if (ompt_enabled && prev_state != ompt_state_undefined) {
1329                         /* change the state before clearing wait_id */
1330                         this_thr->th.ompt_thread_info.state = prev_state;
1331                         this_thr->th.ompt_thread_info.wait_id = 0;
1332                     }
1333 #endif
1334 
1335                     KMP_FSYNC_ACQUIRED( lck );
1336                     return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1337                 }
1338                 enqueued = FALSE;
1339             }
1340             break;
1341         }
1342 
1343 #if OMPT_SUPPORT
1344         if (ompt_enabled && prev_state == ompt_state_undefined) {
1345             /* this thread will spin; set wait_id before entering wait state */
1346             prev_state = this_thr->th.ompt_thread_info.state;
1347             this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1348             this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1349         }
1350 #endif
1351 
1352         if ( enqueued ) {
1353             if ( tail > 0 ) {
1354                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1355                 KMP_ASSERT( tail_thr != NULL );
1356                 tail_thr->th.th_next_waiting = gtid+1;
1357                 /* corresponding wait for this write in release code */
1358             }
1359             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1360 
1361 
1362             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1363              *       throughput only here.
1364              */
1365             KMP_MB();
1366             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1367 
1368 #ifdef DEBUG_QUEUING_LOCKS
1369             TRACE_LOCK( gtid+1, "acq spin" );
1370 
1371             if ( this_thr->th.th_next_waiting != 0 )
1372                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1373 #endif
1374             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1375             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1376                       lck, gtid ));
1377 
1378 #ifdef DEBUG_QUEUING_LOCKS
1379             TRACE_LOCK( gtid+1, "acq exit 2" );
1380 #endif
1381 
1382 #if OMPT_SUPPORT
1383             /* change the state before clearing wait_id */
1384             this_thr->th.ompt_thread_info.state = prev_state;
1385             this_thr->th.ompt_thread_info.wait_id = 0;
1386 #endif
1387 
1388             /* got lock, we were dequeued by the thread that released lock */
1389             return KMP_LOCK_ACQUIRED_FIRST;
1390         }
1391 
1392         /* Yield if number of threads > number of logical processors */
1393         /* ToDo: Not sure why this should only be in oversubscription case,
1394            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1395         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1396           __kmp_xproc ) );
1397 #ifdef DEBUG_QUEUING_LOCKS
1398         TRACE_LOCK( gtid+1, "acq retry" );
1399 #endif
1400 
1401     }
1402     KMP_ASSERT2( 0, "should not get here" );
1403     return KMP_LOCK_ACQUIRED_FIRST;
1404 }
1405 
1406 int
1407 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1408 {
1409     KMP_DEBUG_ASSERT( gtid >= 0 );
1410 
1411     return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1412 }
1413 
1414 static int
1415 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1416   kmp_int32 gtid )
1417 {
1418     char const * const func = "omp_set_lock";
1419     if ( lck->lk.initialized != lck ) {
1420         KMP_FATAL( LockIsUninitialized, func );
1421     }
1422     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1423         KMP_FATAL( LockNestableUsedAsSimple, func );
1424     }
1425     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1426         KMP_FATAL( LockIsAlreadyOwned, func );
1427     }
1428 
1429     __kmp_acquire_queuing_lock( lck, gtid );
1430 
1431     lck->lk.owner_id = gtid + 1;
1432     return KMP_LOCK_ACQUIRED_FIRST;
1433 }
1434 
1435 int
1436 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1437 {
1438     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1439     kmp_int32 head;
1440 #ifdef KMP_DEBUG
1441     kmp_info_t *this_thr;
1442 #endif
1443 
1444     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1445     KMP_DEBUG_ASSERT( gtid >= 0 );
1446 #ifdef KMP_DEBUG
1447     this_thr = __kmp_thread_from_gtid( gtid );
1448     KMP_DEBUG_ASSERT( this_thr != NULL );
1449     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1450 #endif
1451 
1452     head = *head_id_p;
1453 
1454     if ( head == 0 ) { /* nobody on queue, nobody holding */
1455 
1456         /* try (0,0)->(-1,0) */
1457 
1458         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1459             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1460             KMP_FSYNC_ACQUIRED(lck);
1461             return TRUE;
1462         }
1463     }
1464 
1465     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1466     return FALSE;
1467 }
1468 
1469 static int
1470 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1471 {
1472     char const * const func = "omp_test_lock";
1473     if ( lck->lk.initialized != lck ) {
1474         KMP_FATAL( LockIsUninitialized, func );
1475     }
1476     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1477         KMP_FATAL( LockNestableUsedAsSimple, func );
1478     }
1479 
1480     int retval = __kmp_test_queuing_lock( lck, gtid );
1481 
1482     if ( retval ) {
1483         lck->lk.owner_id = gtid + 1;
1484     }
1485     return retval;
1486 }
1487 
1488 int
1489 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1490 {
1491     register kmp_info_t *this_thr;
1492     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1493     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1494 
1495     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1496     KMP_DEBUG_ASSERT( gtid >= 0 );
1497     this_thr    = __kmp_thread_from_gtid( gtid );
1498     KMP_DEBUG_ASSERT( this_thr != NULL );
1499 #ifdef DEBUG_QUEUING_LOCKS
1500     TRACE_LOCK( gtid+1, "rel ent" );
1501 
1502     if ( this_thr->th.th_spin_here )
1503         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1504     if ( this_thr->th.th_next_waiting != 0 )
1505         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1506 #endif
1507     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1508     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1509 
1510     KMP_FSYNC_RELEASING(lck);
1511 
1512     while( 1 ) {
1513         kmp_int32 dequeued;
1514         kmp_int32 head;
1515         kmp_int32 tail;
1516 
1517         head = *head_id_p;
1518 
1519 #ifdef DEBUG_QUEUING_LOCKS
1520         tail = *tail_id_p;
1521         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1522         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1523 #endif
1524         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1525 
1526         if ( head == -1 ) { /* nobody on queue */
1527 
1528             /* try (-1,0)->(0,0) */
1529             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1530                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1531                           lck, gtid ));
1532 #ifdef DEBUG_QUEUING_LOCKS
1533                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1534 #endif
1535 
1536 #if OMPT_SUPPORT
1537                 /* nothing to do - no other thread is trying to shift blame */
1538 #endif
1539 
1540                 return KMP_LOCK_RELEASED;
1541             }
1542             dequeued = FALSE;
1543 
1544         }
1545         else {
1546 
1547             tail = *tail_id_p;
1548             if ( head == tail ) {  /* only one thread on the queue */
1549 
1550 #ifdef DEBUG_QUEUING_LOCKS
1551                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1552 #endif
1553                 KMP_DEBUG_ASSERT( head > 0 );
1554 
1555                 /* try (h,h)->(-1,0) */
1556                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1557                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1558 #ifdef DEBUG_QUEUING_LOCKS
1559                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1560 #endif
1561 
1562             }
1563             else {
1564                 volatile kmp_int32 *waiting_id_p;
1565                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1566                 KMP_DEBUG_ASSERT( head_thr != NULL );
1567                 waiting_id_p = & head_thr->th.th_next_waiting;
1568 
1569                 /* Does this require synchronous reads? */
1570 #ifdef DEBUG_QUEUING_LOCKS
1571                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1572 #endif
1573                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1574 
1575                 /* try (h,t)->(h',t) or (t,t) */
1576 
1577                 KMP_MB();
1578                 /* make sure enqueuing thread has time to update next waiting thread field */
1579                 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1580 #ifdef DEBUG_QUEUING_LOCKS
1581                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1582 #endif
1583                 dequeued = TRUE;
1584             }
1585         }
1586 
1587         if ( dequeued ) {
1588             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1589             KMP_DEBUG_ASSERT( head_thr != NULL );
1590 
1591             /* Does this require synchronous reads? */
1592 #ifdef DEBUG_QUEUING_LOCKS
1593             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1594 #endif
1595             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1596 
1597             /* For clean code only.
1598              * Thread not released until next statement prevents race with acquire code.
1599              */
1600             head_thr->th.th_next_waiting = 0;
1601 #ifdef DEBUG_QUEUING_LOCKS
1602             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1603 #endif
1604 
1605             KMP_MB();
1606             /* reset spin value */
1607             head_thr->th.th_spin_here = FALSE;
1608 
1609             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1610                       lck, gtid ));
1611 #ifdef DEBUG_QUEUING_LOCKS
1612             TRACE_LOCK( gtid+1, "rel exit 2" );
1613 #endif
1614             return KMP_LOCK_RELEASED;
1615         }
1616         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1617 
1618 #ifdef DEBUG_QUEUING_LOCKS
1619         TRACE_LOCK( gtid+1, "rel retry" );
1620 #endif
1621 
1622     } /* while */
1623     KMP_ASSERT2( 0, "should not get here" );
1624     return KMP_LOCK_RELEASED;
1625 }
1626 
1627 static int
1628 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1629   kmp_int32 gtid )
1630 {
1631     char const * const func = "omp_unset_lock";
1632     KMP_MB();  /* in case another processor initialized lock */
1633     if ( lck->lk.initialized != lck ) {
1634         KMP_FATAL( LockIsUninitialized, func );
1635     }
1636     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1637         KMP_FATAL( LockNestableUsedAsSimple, func );
1638     }
1639     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1640         KMP_FATAL( LockUnsettingFree, func );
1641     }
1642     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1643         KMP_FATAL( LockUnsettingSetByAnother, func );
1644     }
1645     lck->lk.owner_id = 0;
1646     return __kmp_release_queuing_lock( lck, gtid );
1647 }
1648 
1649 void
1650 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1651 {
1652     lck->lk.location = NULL;
1653     lck->lk.head_id = 0;
1654     lck->lk.tail_id = 0;
1655     lck->lk.next_ticket = 0;
1656     lck->lk.now_serving = 0;
1657     lck->lk.owner_id = 0;      // no thread owns the lock.
1658     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1659     lck->lk.initialized = lck;
1660 
1661     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1662 }
1663 
1664 static void
1665 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1666 {
1667     __kmp_init_queuing_lock( lck );
1668 }
1669 
1670 void
1671 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1672 {
1673     lck->lk.initialized = NULL;
1674     lck->lk.location = NULL;
1675     lck->lk.head_id = 0;
1676     lck->lk.tail_id = 0;
1677     lck->lk.next_ticket = 0;
1678     lck->lk.now_serving = 0;
1679     lck->lk.owner_id = 0;
1680     lck->lk.depth_locked = -1;
1681 }
1682 
1683 static void
1684 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1685 {
1686     char const * const func = "omp_destroy_lock";
1687     if ( lck->lk.initialized != lck ) {
1688         KMP_FATAL( LockIsUninitialized, func );
1689     }
1690     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1691         KMP_FATAL( LockNestableUsedAsSimple, func );
1692     }
1693     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1694         KMP_FATAL( LockStillOwned, func );
1695     }
1696     __kmp_destroy_queuing_lock( lck );
1697 }
1698 
1699 
1700 //
1701 // nested queuing locks
1702 //
1703 
1704 int
1705 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1706 {
1707     KMP_DEBUG_ASSERT( gtid >= 0 );
1708 
1709     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1710         lck->lk.depth_locked += 1;
1711         return KMP_LOCK_ACQUIRED_NEXT;
1712     }
1713     else {
1714         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1715         KMP_MB();
1716         lck->lk.depth_locked = 1;
1717         KMP_MB();
1718         lck->lk.owner_id = gtid + 1;
1719         return KMP_LOCK_ACQUIRED_FIRST;
1720     }
1721 }
1722 
1723 static int
1724 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1725 {
1726     char const * const func = "omp_set_nest_lock";
1727     if ( lck->lk.initialized != lck ) {
1728         KMP_FATAL( LockIsUninitialized, func );
1729     }
1730     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1731         KMP_FATAL( LockSimpleUsedAsNestable, func );
1732     }
1733     return __kmp_acquire_nested_queuing_lock( lck, gtid );
1734 }
1735 
1736 int
1737 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1738 {
1739     int retval;
1740 
1741     KMP_DEBUG_ASSERT( gtid >= 0 );
1742 
1743     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1744         retval = ++lck->lk.depth_locked;
1745     }
1746     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1747         retval = 0;
1748     }
1749     else {
1750         KMP_MB();
1751         retval = lck->lk.depth_locked = 1;
1752         KMP_MB();
1753         lck->lk.owner_id = gtid + 1;
1754     }
1755     return retval;
1756 }
1757 
1758 static int
1759 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1760   kmp_int32 gtid )
1761 {
1762     char const * const func = "omp_test_nest_lock";
1763     if ( lck->lk.initialized != lck ) {
1764         KMP_FATAL( LockIsUninitialized, func );
1765     }
1766     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1767         KMP_FATAL( LockSimpleUsedAsNestable, func );
1768     }
1769     return __kmp_test_nested_queuing_lock( lck, gtid );
1770 }
1771 
1772 int
1773 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1774 {
1775     KMP_DEBUG_ASSERT( gtid >= 0 );
1776 
1777     KMP_MB();
1778     if ( --(lck->lk.depth_locked) == 0 ) {
1779         KMP_MB();
1780         lck->lk.owner_id = 0;
1781         __kmp_release_queuing_lock( lck, gtid );
1782         return KMP_LOCK_RELEASED;
1783     }
1784     return KMP_LOCK_STILL_HELD;
1785 }
1786 
1787 static int
1788 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1789 {
1790     char const * const func = "omp_unset_nest_lock";
1791     KMP_MB();  /* in case another processor initialized lock */
1792     if ( lck->lk.initialized != lck ) {
1793         KMP_FATAL( LockIsUninitialized, func );
1794     }
1795     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796         KMP_FATAL( LockSimpleUsedAsNestable, func );
1797     }
1798     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1799         KMP_FATAL( LockUnsettingFree, func );
1800     }
1801     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1802         KMP_FATAL( LockUnsettingSetByAnother, func );
1803     }
1804     return __kmp_release_nested_queuing_lock( lck, gtid );
1805 }
1806 
1807 void
1808 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1809 {
1810     __kmp_init_queuing_lock( lck );
1811     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1812 }
1813 
1814 static void
1815 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1816 {
1817     __kmp_init_nested_queuing_lock( lck );
1818 }
1819 
1820 void
1821 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1822 {
1823     __kmp_destroy_queuing_lock( lck );
1824     lck->lk.depth_locked = 0;
1825 }
1826 
1827 static void
1828 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1829 {
1830     char const * const func = "omp_destroy_nest_lock";
1831     if ( lck->lk.initialized != lck ) {
1832         KMP_FATAL( LockIsUninitialized, func );
1833     }
1834     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1835         KMP_FATAL( LockSimpleUsedAsNestable, func );
1836     }
1837     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1838         KMP_FATAL( LockStillOwned, func );
1839     }
1840     __kmp_destroy_nested_queuing_lock( lck );
1841 }
1842 
1843 
1844 //
1845 // access functions to fields which don't exist for all lock kinds.
1846 //
1847 
1848 static int
1849 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1850 {
1851     return lck == lck->lk.initialized;
1852 }
1853 
1854 static const ident_t *
1855 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1856 {
1857     return lck->lk.location;
1858 }
1859 
1860 static void
1861 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1862 {
1863     lck->lk.location = loc;
1864 }
1865 
1866 static kmp_lock_flags_t
1867 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1868 {
1869     return lck->lk.flags;
1870 }
1871 
1872 static void
1873 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1874 {
1875     lck->lk.flags = flags;
1876 }
1877 
1878 #if KMP_USE_ADAPTIVE_LOCKS
1879 
1880 /*
1881     RTM Adaptive locks
1882 */
1883 
1884 // TODO: Use the header for intrinsics below with the compiler 13.0
1885 //#include <immintrin.h>
1886 
1887 // Values from the status register after failed speculation.
1888 #define _XBEGIN_STARTED          (~0u)
1889 #define _XABORT_EXPLICIT         (1 << 0)
1890 #define _XABORT_RETRY            (1 << 1)
1891 #define _XABORT_CONFLICT         (1 << 2)
1892 #define _XABORT_CAPACITY         (1 << 3)
1893 #define _XABORT_DEBUG            (1 << 4)
1894 #define _XABORT_NESTED           (1 << 5)
1895 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1896 
1897 // Aborts for which it's worth trying again immediately
1898 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1899 
1900 #define STRINGIZE_INTERNAL(arg) #arg
1901 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1902 
1903 // Access to RTM instructions
1904 
1905 /*
1906   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1907   This is the same definition as the compiler intrinsic that will be supported at some point.
1908 */
1909 static __inline int _xbegin()
1910 {
1911     int res = -1;
1912 
1913 #if KMP_OS_WINDOWS
1914 #if KMP_ARCH_X86_64
1915     _asm {
1916         _emit 0xC7
1917         _emit 0xF8
1918         _emit 2
1919         _emit 0
1920         _emit 0
1921         _emit 0
1922         jmp   L2
1923         mov   res, eax
1924     L2:
1925     }
1926 #else /* IA32 */
1927     _asm {
1928         _emit 0xC7
1929         _emit 0xF8
1930         _emit 2
1931         _emit 0
1932         _emit 0
1933         _emit 0
1934         jmp   L2
1935         mov   res, eax
1936     L2:
1937     }
1938 #endif // KMP_ARCH_X86_64
1939 #else
1940     /* Note that %eax must be noted as killed (clobbered), because
1941      * the XSR is returned in %eax(%rax) on abort.  Other register
1942      * values are restored, so don't need to be killed.
1943      *
1944      * We must also mark 'res' as an input and an output, since otherwise
1945      * 'res=-1' may be dropped as being dead, whereas we do need the
1946      * assignment on the successful (i.e., non-abort) path.
1947      */
1948     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1949                       "   .long  1f-1b-6\n"
1950                       "    jmp   2f\n"
1951                       "1:  movl  %%eax,%0\n"
1952                       "2:"
1953                       :"+r"(res)::"memory","%eax");
1954 #endif // KMP_OS_WINDOWS
1955     return res;
1956 }
1957 
1958 /*
1959   Transaction end
1960 */
1961 static __inline void _xend()
1962 {
1963 #if KMP_OS_WINDOWS
1964     __asm  {
1965         _emit 0x0f
1966         _emit 0x01
1967         _emit 0xd5
1968     }
1969 #else
1970     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1971 #endif
1972 }
1973 
1974 /*
1975   This is a macro, the argument must be a single byte constant which
1976   can be evaluated by the inline assembler, since it is emitted as a
1977   byte into the assembly code.
1978 */
1979 #if KMP_OS_WINDOWS
1980 #define _xabort(ARG)                            \
1981     _asm _emit 0xc6                             \
1982     _asm _emit 0xf8                             \
1983     _asm _emit ARG
1984 #else
1985 #define _xabort(ARG) \
1986     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1987 #endif
1988 
1989 //
1990 //    Statistics is collected for testing purpose
1991 //
1992 #if KMP_DEBUG_ADAPTIVE_LOCKS
1993 
1994 // We accumulate speculative lock statistics when the lock is destroyed.
1995 // We keep locks that haven't been destroyed in the liveLocks list
1996 // so that we can grab their statistics too.
1997 static kmp_adaptive_lock_statistics_t destroyedStats;
1998 
1999 // To hold the list of live locks.
2000 static kmp_adaptive_lock_info_t liveLocks;
2001 
2002 // A lock so we can safely update the list of locks.
2003 static kmp_bootstrap_lock_t chain_lock;
2004 
2005 // Initialize the list of stats.
2006 void
2007 __kmp_init_speculative_stats()
2008 {
2009     kmp_adaptive_lock_info_t *lck = &liveLocks;
2010 
2011     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2012     lck->stats.next = lck;
2013     lck->stats.prev = lck;
2014 
2015     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2016     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2017 
2018     __kmp_init_bootstrap_lock( &chain_lock );
2019 
2020 }
2021 
2022 // Insert the lock into the circular list
2023 static void
2024 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2025 {
2026     __kmp_acquire_bootstrap_lock( &chain_lock );
2027 
2028     lck->stats.next = liveLocks.stats.next;
2029     lck->stats.prev = &liveLocks;
2030 
2031     liveLocks.stats.next = lck;
2032     lck->stats.next->stats.prev  = lck;
2033 
2034     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2035     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2036 
2037     __kmp_release_bootstrap_lock( &chain_lock );
2038 }
2039 
2040 static void
2041 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2042 {
2043     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2044     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2045 
2046     kmp_adaptive_lock_info_t * n = lck->stats.next;
2047     kmp_adaptive_lock_info_t * p = lck->stats.prev;
2048 
2049     n->stats.prev = p;
2050     p->stats.next = n;
2051 }
2052 
2053 static void
2054 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2055 {
2056     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2057     __kmp_remember_lock( lck );
2058 }
2059 
2060 static void
2061 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2062 {
2063     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2064 
2065     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2066     t->successfulSpeculations += s->successfulSpeculations;
2067     t->hardFailedSpeculations += s->hardFailedSpeculations;
2068     t->softFailedSpeculations += s->softFailedSpeculations;
2069     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2070     t->lemmingYields          += s->lemmingYields;
2071 }
2072 
2073 static void
2074 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2075 {
2076     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2077 
2078     __kmp_acquire_bootstrap_lock( &chain_lock );
2079 
2080     __kmp_add_stats( &destroyedStats, lck );
2081     __kmp_forget_lock( lck );
2082 
2083     __kmp_release_bootstrap_lock( &chain_lock );
2084 }
2085 
2086 static float
2087 percent (kmp_uint32 count, kmp_uint32 total)
2088 {
2089     return (total == 0) ? 0.0: (100.0 * count)/total;
2090 }
2091 
2092 static
2093 FILE * __kmp_open_stats_file()
2094 {
2095     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2096         return stdout;
2097 
2098     size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2099     char buffer[buffLen];
2100     KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2101       (kmp_int32)getpid());
2102     FILE * result = fopen(&buffer[0], "w");
2103 
2104     // Maybe we should issue a warning here...
2105     return result ? result : stdout;
2106 }
2107 
2108 void
2109 __kmp_print_speculative_stats()
2110 {
2111     if (__kmp_user_lock_kind != lk_adaptive)
2112         return;
2113 
2114     FILE * statsFile = __kmp_open_stats_file();
2115 
2116     kmp_adaptive_lock_statistics_t total = destroyedStats;
2117     kmp_adaptive_lock_info_t *lck;
2118 
2119     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2120         __kmp_add_stats( &total, lck );
2121     }
2122     kmp_adaptive_lock_statistics_t *t = &total;
2123     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2124     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2125                                    t->softFailedSpeculations;
2126 
2127     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2128     fprintf ( statsFile, " Lock parameters: \n"
2129              "   max_soft_retries               : %10d\n"
2130              "   max_badness                    : %10d\n",
2131              __kmp_adaptive_backoff_params.max_soft_retries,
2132              __kmp_adaptive_backoff_params.max_badness);
2133     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2134     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2135     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2136              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2137     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2138              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2139     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2140 
2141     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2142     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2143              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2144     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2145              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2146     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2147              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2148 
2149     if (statsFile != stdout)
2150         fclose( statsFile );
2151 }
2152 
2153 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2154 #else
2155 # define KMP_INC_STAT(lck,stat)
2156 
2157 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2158 
2159 static inline bool
2160 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2161 {
2162     // It is enough to check that the head_id is zero.
2163     // We don't also need to check the tail.
2164     bool res = lck->lk.head_id == 0;
2165 
2166     // We need a fence here, since we must ensure that no memory operations
2167     // from later in this thread float above that read.
2168 #if KMP_COMPILER_ICC
2169     _mm_mfence();
2170 #else
2171     __sync_synchronize();
2172 #endif
2173 
2174     return res;
2175 }
2176 
2177 // Functions for manipulating the badness
2178 static __inline void
2179 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2180 {
2181     // Reset the badness to zero so we eagerly try to speculate again
2182     lck->lk.adaptive.badness = 0;
2183     KMP_INC_STAT(lck,successfulSpeculations);
2184 }
2185 
2186 // Create a bit mask with one more set bit.
2187 static __inline void
2188 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2189 {
2190     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2191     if ( newBadness > lck->lk.adaptive.max_badness) {
2192         return;
2193     } else {
2194         lck->lk.adaptive.badness = newBadness;
2195     }
2196 }
2197 
2198 // Check whether speculation should be attempted.
2199 static __inline int
2200 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2201 {
2202     kmp_uint32 badness = lck->lk.adaptive.badness;
2203     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2204     int res = (attempts & badness) == 0;
2205     return res;
2206 }
2207 
2208 // Attempt to acquire only the speculative lock.
2209 // Does not back off to the non-speculative lock.
2210 //
2211 static int
2212 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2213 {
2214     int retries = lck->lk.adaptive.max_soft_retries;
2215 
2216     // We don't explicitly count the start of speculation, rather we record
2217     // the results (success, hard fail, soft fail). The sum of all of those
2218     // is the total number of times we started speculation since all
2219     // speculations must end one of those ways.
2220     do
2221     {
2222         kmp_uint32 status = _xbegin();
2223         // Switch this in to disable actual speculation but exercise
2224         // at least some of the rest of the code. Useful for debugging...
2225         // kmp_uint32 status = _XABORT_NESTED;
2226 
2227         if (status == _XBEGIN_STARTED )
2228         { /* We have successfully started speculation
2229            * Check that no-one acquired the lock for real between when we last looked
2230            * and now. This also gets the lock cache line into our read-set,
2231            * which we need so that we'll abort if anyone later claims it for real.
2232            */
2233             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2234             {
2235                 // Lock is now visibly acquired, so someone beat us to it.
2236                 // Abort the transaction so we'll restart from _xbegin with the
2237                 // failure status.
2238                 _xabort(0x01)
2239                 KMP_ASSERT2( 0, "should not get here" );
2240             }
2241             return 1;   // Lock has been acquired (speculatively)
2242         } else {
2243             // We have aborted, update the statistics
2244             if ( status & SOFT_ABORT_MASK)
2245             {
2246                 KMP_INC_STAT(lck,softFailedSpeculations);
2247                 // and loop round to retry.
2248             }
2249             else
2250             {
2251                 KMP_INC_STAT(lck,hardFailedSpeculations);
2252                 // Give up if we had a hard failure.
2253                 break;
2254             }
2255         }
2256     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2257 
2258     // Either we had a hard failure or we didn't succeed softly after
2259     // the full set of attempts, so back off the badness.
2260     __kmp_step_badness( lck );
2261     return 0;
2262 }
2263 
2264 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2265 // if the speculative lock cannot be acquired.
2266 // We can succeed speculatively, non-speculatively, or fail.
2267 static int
2268 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2269 {
2270     // First try to acquire the lock speculatively
2271     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2272         return 1;
2273 
2274     // Speculative acquisition failed, so try to acquire it non-speculatively.
2275     // Count the non-speculative acquire attempt
2276     lck->lk.adaptive.acquire_attempts++;
2277 
2278     // Use base, non-speculative lock.
2279     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2280     {
2281         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2282         return 1;       // Lock is acquired (non-speculatively)
2283     }
2284     else
2285     {
2286         return 0;       // Failed to acquire the lock, it's already visibly locked.
2287     }
2288 }
2289 
2290 static int
2291 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2292 {
2293     char const * const func = "omp_test_lock";
2294     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2295         KMP_FATAL( LockIsUninitialized, func );
2296     }
2297 
2298     int retval = __kmp_test_adaptive_lock( lck, gtid );
2299 
2300     if ( retval ) {
2301         lck->lk.qlk.owner_id = gtid + 1;
2302     }
2303     return retval;
2304 }
2305 
2306 // Block until we can acquire a speculative, adaptive lock.
2307 // We check whether we should be trying to speculate.
2308 // If we should be, we check the real lock to see if it is free,
2309 // and, if not, pause without attempting to acquire it until it is.
2310 // Then we try the speculative acquire.
2311 // This means that although we suffer from lemmings a little (
2312 // because all we can't acquire the lock speculatively until
2313 // the queue of threads waiting has cleared), we don't get into a
2314 // state where we can never acquire the lock speculatively (because we
2315 // force the queue to clear by preventing new arrivals from entering the
2316 // queue).
2317 // This does mean that when we're trying to break lemmings, the lock
2318 // is no longer fair. However OpenMP makes no guarantee that its
2319 // locks are fair, so this isn't a real problem.
2320 static void
2321 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2322 {
2323     if ( __kmp_should_speculate( lck, gtid ) )
2324     {
2325         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2326         {
2327             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2328                 return;
2329             // We tried speculation and failed, so give up.
2330         }
2331         else
2332         {
2333             // We can't try speculation until the lock is free, so we
2334             // pause here (without suspending on the queueing lock,
2335             // to allow it to drain, then try again.
2336             // All other threads will also see the same result for
2337             // shouldSpeculate, so will be doing the same if they
2338             // try to claim the lock from now on.
2339             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2340             {
2341                 KMP_INC_STAT(lck,lemmingYields);
2342                 __kmp_yield (TRUE);
2343             }
2344 
2345             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2346                 return;
2347         }
2348     }
2349 
2350     // Speculative acquisition failed, so acquire it non-speculatively.
2351     // Count the non-speculative acquire attempt
2352     lck->lk.adaptive.acquire_attempts++;
2353 
2354     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2355     // We have acquired the base lock, so count that.
2356     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2357 }
2358 
2359 static void
2360 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2361 {
2362     char const * const func = "omp_set_lock";
2363     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2364         KMP_FATAL( LockIsUninitialized, func );
2365     }
2366     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2367         KMP_FATAL( LockIsAlreadyOwned, func );
2368     }
2369 
2370     __kmp_acquire_adaptive_lock( lck, gtid );
2371 
2372     lck->lk.qlk.owner_id = gtid + 1;
2373 }
2374 
2375 static int
2376 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2377 {
2378     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2379     {   // If the lock doesn't look claimed we must be speculating.
2380         // (Or the user's code is buggy and they're releasing without locking;
2381         // if we had XTEST we'd be able to check that case...)
2382         _xend();        // Exit speculation
2383         __kmp_update_badness_after_success( lck );
2384     }
2385     else
2386     {   // Since the lock *is* visibly locked we're not speculating,
2387         // so should use the underlying lock's release scheme.
2388         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2389     }
2390     return KMP_LOCK_RELEASED;
2391 }
2392 
2393 static int
2394 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2395 {
2396     char const * const func = "omp_unset_lock";
2397     KMP_MB();  /* in case another processor initialized lock */
2398     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2399         KMP_FATAL( LockIsUninitialized, func );
2400     }
2401     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2402         KMP_FATAL( LockUnsettingFree, func );
2403     }
2404     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2405         KMP_FATAL( LockUnsettingSetByAnother, func );
2406     }
2407     lck->lk.qlk.owner_id = 0;
2408     __kmp_release_adaptive_lock( lck, gtid );
2409     return KMP_LOCK_RELEASED;
2410 }
2411 
2412 static void
2413 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2414 {
2415     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2416     lck->lk.adaptive.badness = 0;
2417     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2418     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2419     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2420 #if KMP_DEBUG_ADAPTIVE_LOCKS
2421     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2422 #endif
2423     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2424 }
2425 
2426 static void
2427 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2428 {
2429     __kmp_init_adaptive_lock( lck );
2430 }
2431 
2432 static void
2433 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2434 {
2435 #if KMP_DEBUG_ADAPTIVE_LOCKS
2436     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2437 #endif
2438     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2439     // Nothing needed for the speculative part.
2440 }
2441 
2442 static void
2443 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2444 {
2445     char const * const func = "omp_destroy_lock";
2446     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2447         KMP_FATAL( LockIsUninitialized, func );
2448     }
2449     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2450         KMP_FATAL( LockStillOwned, func );
2451     }
2452     __kmp_destroy_adaptive_lock( lck );
2453 }
2454 
2455 
2456 #endif // KMP_USE_ADAPTIVE_LOCKS
2457 
2458 
2459 /* ------------------------------------------------------------------------ */
2460 /* DRDPA ticket locks                                                */
2461 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2462 
2463 static kmp_int32
2464 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2465 {
2466     return TCR_4( lck->lk.owner_id ) - 1;
2467 }
2468 
2469 static inline bool
2470 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2471 {
2472     return lck->lk.depth_locked != -1;
2473 }
2474 
2475 __forceinline static int
2476 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2477 {
2478     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2479     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2480     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2481       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2482       TCR_PTR(lck->lk.polls);                           // volatile load
2483 
2484 #ifdef USE_LOCK_PROFILE
2485     if (TCR_8(polls[ticket & mask].poll) != ticket)
2486         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2487     /* else __kmp_printf( "." );*/
2488 #endif /* USE_LOCK_PROFILE */
2489 
2490     //
2491     // Now spin-wait, but reload the polls pointer and mask, in case the
2492     // polling area has been reconfigured.  Unless it is reconfigured, the
2493     // reloads stay in L1 cache and are cheap.
2494     //
2495     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2496     //
2497     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2498     // and poll to be re-read every spin iteration.
2499     //
2500     kmp_uint32 spins;
2501 
2502     KMP_FSYNC_PREPARE(lck);
2503     KMP_INIT_YIELD(spins);
2504     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2505         // If we are oversubscribed,
2506         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2507         // CPU Pause is in the macros for yield.
2508         //
2509         KMP_YIELD(TCR_4(__kmp_nth)
2510           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2511         KMP_YIELD_SPIN(spins);
2512 
2513         // Re-read the mask and the poll pointer from the lock structure.
2514         //
2515         // Make certain that "mask" is read before "polls" !!!
2516         //
2517         // If another thread picks reconfigures the polling area and updates
2518         // their values, and we get the new value of mask and the old polls
2519         // pointer, we could access memory beyond the end of the old polling
2520         // area.
2521         //
2522         mask = TCR_8(lck->lk.mask);                     // volatile load
2523         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2524           TCR_PTR(lck->lk.polls);                       // volatile load
2525     }
2526 
2527     //
2528     // Critical section starts here
2529     //
2530     KMP_FSYNC_ACQUIRED(lck);
2531     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2532       ticket, lck));
2533     lck->lk.now_serving = ticket;                       // non-volatile store
2534 
2535     //
2536     // Deallocate a garbage polling area if we know that we are the last
2537     // thread that could possibly access it.
2538     //
2539     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2540     // ticket.
2541     //
2542     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2543         __kmp_free((void *)lck->lk.old_polls);
2544         lck->lk.old_polls = NULL;
2545         lck->lk.cleanup_ticket = 0;
2546     }
2547 
2548     //
2549     // Check to see if we should reconfigure the polling area.
2550     // If there is still a garbage polling area to be deallocated from a
2551     // previous reconfiguration, let a later thread reconfigure it.
2552     //
2553     if (lck->lk.old_polls == NULL) {
2554         bool reconfigure = false;
2555         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2556         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2557 
2558         if (TCR_4(__kmp_nth)
2559           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2560             //
2561             // We are in oversubscription mode.  Contract the polling area
2562             // down to a single location, if that hasn't been done already.
2563             //
2564             if (num_polls > 1) {
2565                 reconfigure = true;
2566                 num_polls = TCR_4(lck->lk.num_polls);
2567                 mask = 0;
2568                 num_polls = 1;
2569                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2570                   __kmp_allocate(num_polls * sizeof(*polls));
2571                 polls[0].poll = ticket;
2572             }
2573         }
2574         else {
2575             //
2576             // We are in under/fully subscribed mode.  Check the number of
2577             // threads waiting on the lock.  The size of the polling area
2578             // should be at least the number of threads waiting.
2579             //
2580             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2581             if (num_waiting > num_polls) {
2582                 kmp_uint32 old_num_polls = num_polls;
2583                 reconfigure = true;
2584                 do {
2585                     mask = (mask << 1) | 1;
2586                     num_polls *= 2;
2587                 } while (num_polls <= num_waiting);
2588 
2589                 //
2590                 // Allocate the new polling area, and copy the relevant portion
2591                 // of the old polling area to the new area.  __kmp_allocate()
2592                 // zeroes the memory it allocates, and most of the old area is
2593                 // just zero padding, so we only copy the release counters.
2594                 //
2595                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2596                   __kmp_allocate(num_polls * sizeof(*polls));
2597                 kmp_uint32 i;
2598                 for (i = 0; i < old_num_polls; i++) {
2599                     polls[i].poll = old_polls[i].poll;
2600                 }
2601             }
2602         }
2603 
2604         if (reconfigure) {
2605             //
2606             // Now write the updated fields back to the lock structure.
2607             //
2608             // Make certain that "polls" is written before "mask" !!!
2609             //
2610             // If another thread picks up the new value of mask and the old
2611             // polls pointer , it could access memory beyond the end of the
2612             // old polling area.
2613             //
2614             // On x86, we need memory fences.
2615             //
2616             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2617               ticket, lck, num_polls));
2618 
2619             lck->lk.old_polls = old_polls;              // non-volatile store
2620             lck->lk.polls = polls;                      // volatile store
2621 
2622             KMP_MB();
2623 
2624             lck->lk.num_polls = num_polls;              // non-volatile store
2625             lck->lk.mask = mask;                        // volatile store
2626 
2627             KMP_MB();
2628 
2629             //
2630             // Only after the new polling area and mask have been flushed
2631             // to main memory can we update the cleanup ticket field.
2632             //
2633             // volatile load / non-volatile store
2634             //
2635             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2636         }
2637     }
2638     return KMP_LOCK_ACQUIRED_FIRST;
2639 }
2640 
2641 int
2642 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2643 {
2644     return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2645 }
2646 
2647 static int
2648 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2649 {
2650     char const * const func = "omp_set_lock";
2651     if ( lck->lk.initialized != lck ) {
2652         KMP_FATAL( LockIsUninitialized, func );
2653     }
2654     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2655         KMP_FATAL( LockNestableUsedAsSimple, func );
2656     }
2657     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2658         KMP_FATAL( LockIsAlreadyOwned, func );
2659     }
2660 
2661     __kmp_acquire_drdpa_lock( lck, gtid );
2662 
2663     lck->lk.owner_id = gtid + 1;
2664     return KMP_LOCK_ACQUIRED_FIRST;
2665 }
2666 
2667 int
2668 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2669 {
2670     //
2671     // First get a ticket, then read the polls pointer and the mask.
2672     // The polls pointer must be read before the mask!!! (See above)
2673     //
2674     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2675     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2676       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2677       TCR_PTR(lck->lk.polls);                           // volatile load
2678     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2679     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2680         kmp_uint64 next_ticket = ticket + 1;
2681         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2682           ticket, next_ticket)) {
2683             KMP_FSYNC_ACQUIRED(lck);
2684             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2685                ticket, lck));
2686             lck->lk.now_serving = ticket;               // non-volatile store
2687 
2688             //
2689             // Since no threads are waiting, there is no possibility that
2690             // we would want to reconfigure the polling area.  We might
2691             // have the cleanup ticket value (which says that it is now
2692             // safe to deallocate old_polls), but we'll let a later thread
2693             // which calls __kmp_acquire_lock do that - this routine
2694             // isn't supposed to block, and we would risk blocks if we
2695             // called __kmp_free() to do the deallocation.
2696             //
2697             return TRUE;
2698         }
2699     }
2700     return FALSE;
2701 }
2702 
2703 static int
2704 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2705 {
2706     char const * const func = "omp_test_lock";
2707     if ( lck->lk.initialized != lck ) {
2708         KMP_FATAL( LockIsUninitialized, func );
2709     }
2710     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2711         KMP_FATAL( LockNestableUsedAsSimple, func );
2712     }
2713 
2714     int retval = __kmp_test_drdpa_lock( lck, gtid );
2715 
2716     if ( retval ) {
2717         lck->lk.owner_id = gtid + 1;
2718     }
2719     return retval;
2720 }
2721 
2722 int
2723 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2724 {
2725     //
2726     // Read the ticket value from the lock data struct, then the polls
2727     // pointer and the mask.  The polls pointer must be read before the
2728     // mask!!! (See above)
2729     //
2730     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2731     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2732       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2733       TCR_PTR(lck->lk.polls);                           // volatile load
2734     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2735     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2736        ticket - 1, lck));
2737     KMP_FSYNC_RELEASING(lck);
2738     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2739     return KMP_LOCK_RELEASED;
2740 }
2741 
2742 static int
2743 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2744 {
2745     char const * const func = "omp_unset_lock";
2746     KMP_MB();  /* in case another processor initialized lock */
2747     if ( lck->lk.initialized != lck ) {
2748         KMP_FATAL( LockIsUninitialized, func );
2749     }
2750     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2751         KMP_FATAL( LockNestableUsedAsSimple, func );
2752     }
2753     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2754         KMP_FATAL( LockUnsettingFree, func );
2755     }
2756     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2757       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2758         KMP_FATAL( LockUnsettingSetByAnother, func );
2759     }
2760     lck->lk.owner_id = 0;
2761     return __kmp_release_drdpa_lock( lck, gtid );
2762 }
2763 
2764 void
2765 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2766 {
2767     lck->lk.location = NULL;
2768     lck->lk.mask = 0;
2769     lck->lk.num_polls = 1;
2770     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2771       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2772     lck->lk.cleanup_ticket = 0;
2773     lck->lk.old_polls = NULL;
2774     lck->lk.next_ticket = 0;
2775     lck->lk.now_serving = 0;
2776     lck->lk.owner_id = 0;      // no thread owns the lock.
2777     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2778     lck->lk.initialized = lck;
2779 
2780     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2781 }
2782 
2783 static void
2784 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2785 {
2786     __kmp_init_drdpa_lock( lck );
2787 }
2788 
2789 void
2790 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2791 {
2792     lck->lk.initialized = NULL;
2793     lck->lk.location    = NULL;
2794     if (lck->lk.polls != NULL) {
2795         __kmp_free((void *)lck->lk.polls);
2796         lck->lk.polls = NULL;
2797     }
2798     if (lck->lk.old_polls != NULL) {
2799         __kmp_free((void *)lck->lk.old_polls);
2800         lck->lk.old_polls = NULL;
2801     }
2802     lck->lk.mask = 0;
2803     lck->lk.num_polls = 0;
2804     lck->lk.cleanup_ticket = 0;
2805     lck->lk.next_ticket = 0;
2806     lck->lk.now_serving = 0;
2807     lck->lk.owner_id = 0;
2808     lck->lk.depth_locked = -1;
2809 }
2810 
2811 static void
2812 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2813 {
2814     char const * const func = "omp_destroy_lock";
2815     if ( lck->lk.initialized != lck ) {
2816         KMP_FATAL( LockIsUninitialized, func );
2817     }
2818     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2819         KMP_FATAL( LockNestableUsedAsSimple, func );
2820     }
2821     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2822         KMP_FATAL( LockStillOwned, func );
2823     }
2824     __kmp_destroy_drdpa_lock( lck );
2825 }
2826 
2827 
2828 //
2829 // nested drdpa ticket locks
2830 //
2831 
2832 int
2833 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2834 {
2835     KMP_DEBUG_ASSERT( gtid >= 0 );
2836 
2837     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2838         lck->lk.depth_locked += 1;
2839         return KMP_LOCK_ACQUIRED_NEXT;
2840     }
2841     else {
2842         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2843         KMP_MB();
2844         lck->lk.depth_locked = 1;
2845         KMP_MB();
2846         lck->lk.owner_id = gtid + 1;
2847         return KMP_LOCK_ACQUIRED_FIRST;
2848     }
2849 }
2850 
2851 static void
2852 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2853 {
2854     char const * const func = "omp_set_nest_lock";
2855     if ( lck->lk.initialized != lck ) {
2856         KMP_FATAL( LockIsUninitialized, func );
2857     }
2858     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2859         KMP_FATAL( LockSimpleUsedAsNestable, func );
2860     }
2861     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2862 }
2863 
2864 int
2865 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2866 {
2867     int retval;
2868 
2869     KMP_DEBUG_ASSERT( gtid >= 0 );
2870 
2871     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2872         retval = ++lck->lk.depth_locked;
2873     }
2874     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2875         retval = 0;
2876     }
2877     else {
2878         KMP_MB();
2879         retval = lck->lk.depth_locked = 1;
2880         KMP_MB();
2881         lck->lk.owner_id = gtid + 1;
2882     }
2883     return retval;
2884 }
2885 
2886 static int
2887 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2888 {
2889     char const * const func = "omp_test_nest_lock";
2890     if ( lck->lk.initialized != lck ) {
2891         KMP_FATAL( LockIsUninitialized, func );
2892     }
2893     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2894         KMP_FATAL( LockSimpleUsedAsNestable, func );
2895     }
2896     return __kmp_test_nested_drdpa_lock( lck, gtid );
2897 }
2898 
2899 int
2900 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2901 {
2902     KMP_DEBUG_ASSERT( gtid >= 0 );
2903 
2904     KMP_MB();
2905     if ( --(lck->lk.depth_locked) == 0 ) {
2906         KMP_MB();
2907         lck->lk.owner_id = 0;
2908         __kmp_release_drdpa_lock( lck, gtid );
2909         return KMP_LOCK_RELEASED;
2910     }
2911     return KMP_LOCK_STILL_HELD;
2912 }
2913 
2914 static int
2915 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2916 {
2917     char const * const func = "omp_unset_nest_lock";
2918     KMP_MB();  /* in case another processor initialized lock */
2919     if ( lck->lk.initialized != lck ) {
2920         KMP_FATAL( LockIsUninitialized, func );
2921     }
2922     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2923         KMP_FATAL( LockSimpleUsedAsNestable, func );
2924     }
2925     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2926         KMP_FATAL( LockUnsettingFree, func );
2927     }
2928     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2929         KMP_FATAL( LockUnsettingSetByAnother, func );
2930     }
2931     return __kmp_release_nested_drdpa_lock( lck, gtid );
2932 }
2933 
2934 void
2935 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2936 {
2937     __kmp_init_drdpa_lock( lck );
2938     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2939 }
2940 
2941 static void
2942 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2943 {
2944     __kmp_init_nested_drdpa_lock( lck );
2945 }
2946 
2947 void
2948 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2949 {
2950     __kmp_destroy_drdpa_lock( lck );
2951     lck->lk.depth_locked = 0;
2952 }
2953 
2954 static void
2955 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2956 {
2957     char const * const func = "omp_destroy_nest_lock";
2958     if ( lck->lk.initialized != lck ) {
2959         KMP_FATAL( LockIsUninitialized, func );
2960     }
2961     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2962         KMP_FATAL( LockSimpleUsedAsNestable, func );
2963     }
2964     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2965         KMP_FATAL( LockStillOwned, func );
2966     }
2967     __kmp_destroy_nested_drdpa_lock( lck );
2968 }
2969 
2970 
2971 //
2972 // access functions to fields which don't exist for all lock kinds.
2973 //
2974 
2975 static int
2976 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2977 {
2978     return lck == lck->lk.initialized;
2979 }
2980 
2981 static const ident_t *
2982 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2983 {
2984     return lck->lk.location;
2985 }
2986 
2987 static void
2988 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2989 {
2990     lck->lk.location = loc;
2991 }
2992 
2993 static kmp_lock_flags_t
2994 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2995 {
2996     return lck->lk.flags;
2997 }
2998 
2999 static void
3000 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3001 {
3002     lck->lk.flags = flags;
3003 }
3004 
3005 #if KMP_USE_DYNAMIC_LOCK
3006 
3007 // Definitions of lock hints.
3008 # ifndef __OMP_H
3009 typedef enum kmp_lock_hint_t {
3010     kmp_lock_hint_none = 0,
3011     kmp_lock_hint_contended,
3012     kmp_lock_hint_uncontended,
3013     kmp_lock_hint_nonspeculative,
3014     kmp_lock_hint_speculative,
3015     kmp_lock_hint_adaptive,
3016 } kmp_lock_hint_t;
3017 # endif
3018 
3019 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3020 #define expand_init_lock(l, a)                                              \
3021 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
3022     *lck = DYNA_LOCK_FREE(l);                                               \
3023     KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck));            \
3024 }
3025 FOREACH_D_LOCK(expand_init_lock, 0)
3026 #undef expand_init_lock
3027 
3028 #if DYNA_HAS_HLE
3029 
3030 // HLE lock functions - imported from the testbed runtime.
3031 #if KMP_MIC
3032 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument
3033 #else
3034 # define machine_pause() _mm_pause()
3035 #endif
3036 #define HLE_ACQUIRE ".byte 0xf2;"
3037 #define HLE_RELEASE ".byte 0xf3;"
3038 
3039 static inline kmp_uint32
3040 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3041 {
3042     __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3043                     : "+r"(v), "+m"(*p)
3044                     :
3045                     : "memory");
3046     return v;
3047 }
3048 
3049 static void
3050 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3051 {
3052     *lck = 0;
3053 }
3054 
3055 static void
3056 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3057 {
3058     // Use gtid for DYNA_LOCK_BUSY if necessary
3059     if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
3060         int delay = 1;
3061         do {
3062             while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
3063                 for (int i = delay; i != 0; --i)
3064                     machine_pause();
3065                 delay = ((delay << 1) | 1) & 7;
3066             }
3067         } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
3068     }
3069 }
3070 
3071 static void
3072 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3073 {
3074     __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3075 }
3076 
3077 static void
3078 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3079 {
3080     __asm__ volatile(HLE_RELEASE "movl %1,%0"
3081                     : "=m"(*lck)
3082                     : "r"(DYNA_LOCK_FREE(hle))
3083                     : "memory");
3084 }
3085 
3086 static void
3087 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3088 {
3089     __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3090 }
3091 
3092 static int
3093 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3094 {
3095     return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3096 }
3097 
3098 static int
3099 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3100 {
3101     return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3102 }
3103 
3104 #endif // DYNA_HAS_HLE
3105 
3106 // Entry functions for indirect locks (first element of direct_*_ops[]).
3107 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3108 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3109 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3110 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3111 static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3112 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3113 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3114 static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3115 
3116 //
3117 // Jump tables for the indirect lock functions.
3118 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3119 //
3120 #define expand_func0(l, op) 0,op##_##l##_##lock,
3121 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3122     = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3123 
3124 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3125 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3126     = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3127 
3128 // Differentiates *lock and *lock_with_checks.
3129 #define expand_func2(l, op)  0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3130 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3131 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3132     = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire)  },
3133         { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3134 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3135     = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release)  },
3136         { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3137 
3138 #define expand_func3(l, op)  0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3139 #define expand_func3c(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3140 static int  (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3141     = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test)  },
3142         { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3143 
3144 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3145 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3146 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3147 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3148 
3149 //
3150 // Jump tables for the indirect lock functions.
3151 //
3152 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3153 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3154     = { FOREACH_I_LOCK(expand_func4, init) };
3155 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3156     = { FOREACH_I_LOCK(expand_func4, destroy) };
3157 
3158 // Differentiates *lock and *lock_with_checks.
3159 #define expand_func5(l, op)  (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3160 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3161 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3162     = { { FOREACH_I_LOCK(expand_func5, acquire)  },
3163         { FOREACH_I_LOCK(expand_func5c, acquire) } };
3164 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3165     = { { FOREACH_I_LOCK(expand_func5, release)  },
3166         { FOREACH_I_LOCK(expand_func5c, release) } };
3167 
3168 #define expand_func6(l, op)  (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3169 #define expand_func6c(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3170 static int  (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3171     = { { FOREACH_I_LOCK(expand_func6, test)  },
3172         { FOREACH_I_LOCK(expand_func6c, test) } };
3173 
3174 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3175 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3176 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3177 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3178 
3179 // Lock index table.
3180 kmp_indirect_lock_t **__kmp_indirect_lock_table;
3181 kmp_lock_index_t __kmp_indirect_lock_table_size;
3182 kmp_lock_index_t __kmp_indirect_lock_table_next;
3183 
3184 // Size of indirect locks.
3185 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3186     sizeof(kmp_ticket_lock_t),      sizeof(kmp_queuing_lock_t),
3187 #if KMP_USE_ADAPTIVE_LOCKS
3188     sizeof(kmp_adaptive_lock_t),
3189 #endif
3190     sizeof(kmp_drdpa_lock_t),
3191     sizeof(kmp_tas_lock_t),
3192 #if DYNA_HAS_FUTEX
3193     sizeof(kmp_futex_lock_t),
3194 #endif
3195     sizeof(kmp_ticket_lock_t),      sizeof(kmp_queuing_lock_t),
3196     sizeof(kmp_drdpa_lock_t)
3197 };
3198 
3199 // Jump tables for lock accessor/modifier.
3200 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3201 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3202 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3203 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3204 
3205 // Use different lock pools for different lock types.
3206 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3207 
3208 // Inserts the given lock ptr to the lock table.
3209 kmp_lock_index_t
3210 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3211 {
3212     kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3213     // Check capacity and double the size if required
3214     if (next >= __kmp_indirect_lock_table_size) {
3215         kmp_lock_index_t i;
3216         kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3217         kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3218         __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
3219         KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
3220         __kmp_free(old_table);
3221         __kmp_indirect_lock_table_size = 2*next;
3222     }
3223     // Insert lck to the table and return the index.
3224     __kmp_indirect_lock_table[next] = lck;
3225     __kmp_indirect_lock_table_next++;
3226     return next;
3227 }
3228 
3229 // User lock allocator for dynamically dispatched locks.
3230 kmp_indirect_lock_t *
3231 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3232 {
3233     kmp_indirect_lock_t *lck;
3234     kmp_lock_index_t idx;
3235 
3236     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3237 
3238     if (__kmp_indirect_lock_pool[tag] != NULL) {
3239         lck = __kmp_indirect_lock_pool[tag];
3240         if (OMP_LOCK_T_SIZE < sizeof(void *))
3241             idx = lck->lock->pool.index;
3242         __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3243     } else {
3244         lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3245         lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3246         if (OMP_LOCK_T_SIZE < sizeof(void *))
3247             idx = __kmp_insert_indirect_lock(lck);
3248     }
3249 
3250     __kmp_release_lock(&__kmp_global_lock, gtid);
3251 
3252     lck->type = tag;
3253 
3254     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3255         *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3256     } else {
3257         *((kmp_indirect_lock_t **)user_lock) = lck;
3258     }
3259 
3260     return lck;
3261 }
3262 
3263 // User lock lookup for dynamically dispatched locks.
3264 static __forceinline
3265 kmp_indirect_lock_t *
3266 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3267 {
3268     if (__kmp_env_consistency_check) {
3269         kmp_indirect_lock_t *lck = NULL;
3270         if (user_lock == NULL) {
3271             KMP_FATAL(LockIsUninitialized, func);
3272         }
3273         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3274             kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3275             if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3276                 KMP_FATAL(LockIsUninitialized, func);
3277             }
3278             lck = __kmp_indirect_lock_table[idx];
3279         } else {
3280             lck = *((kmp_indirect_lock_t **)user_lock);
3281         }
3282         if (lck == NULL) {
3283             KMP_FATAL(LockIsUninitialized, func);
3284         }
3285         return lck;
3286     } else {
3287         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3288             return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3289         } else {
3290             return *((kmp_indirect_lock_t **)user_lock);
3291         }
3292     }
3293 }
3294 
3295 static void
3296 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3297 {
3298 #if KMP_USE_ADAPTIVE_LOCKS
3299     if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3300         KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3301         seq = lockseq_queuing;
3302     }
3303 #endif
3304     kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3305     kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3306     DYNA_I_LOCK_FUNC(l, init)(l->lock);
3307     KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3308 }
3309 
3310 static void
3311 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3312 {
3313     kmp_uint32 gtid = __kmp_entry_gtid();
3314     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3315     DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3316     kmp_indirect_locktag_t tag = l->type;
3317 
3318     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3319 
3320     // Use the base lock's space to keep the pool chain.
3321     l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3322     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3323         l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3324     }
3325     __kmp_indirect_lock_pool[tag] = l;
3326 
3327     __kmp_release_lock(&__kmp_global_lock, gtid);
3328 }
3329 
3330 static void
3331 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3332 {
3333     kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3334     DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3335 }
3336 
3337 static void
3338 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3339 {
3340     kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3341     DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3342 }
3343 
3344 static int
3345 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3346 {
3347     kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3348     return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3349 }
3350 
3351 static void
3352 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3353 {
3354     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3355     DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3356 }
3357 
3358 static void
3359 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3360 {
3361     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3362     DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3363 }
3364 
3365 static int
3366 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3367 {
3368     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3369     return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3370 }
3371 
3372 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3373 
3374 // Initialize a hinted lock.
3375 void
3376 __kmp_init_lock_hinted(void **lock, int hint)
3377 {
3378     kmp_dyna_lockseq_t seq;
3379     switch (hint) {
3380         case kmp_lock_hint_uncontended:
3381             seq = lockseq_tas;
3382             break;
3383         case kmp_lock_hint_speculative:
3384 #if DYNA_HAS_HLE
3385             seq = lockseq_hle;
3386 #else
3387             seq = lockseq_tas;
3388 #endif
3389             break;
3390         case kmp_lock_hint_adaptive:
3391 #if KMP_USE_ADAPTIVE_LOCKS
3392             seq = lockseq_adaptive;
3393 #else
3394             seq = lockseq_queuing;
3395 #endif
3396             break;
3397         // Defaults to queuing locks.
3398         case kmp_lock_hint_contended:
3399         case kmp_lock_hint_nonspeculative:
3400         default:
3401             seq = lockseq_queuing;
3402             break;
3403     }
3404     if (DYNA_IS_D_LOCK(seq)) {
3405         DYNA_INIT_D_LOCK(lock, seq);
3406 #if USE_ITT_BUILD
3407         __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3408 #endif
3409     } else {
3410         DYNA_INIT_I_LOCK(lock, seq);
3411 #if USE_ITT_BUILD
3412         kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3413         __kmp_itt_lock_creating(ilk->lock, NULL);
3414 #endif
3415     }
3416 }
3417 
3418 // This is used only in kmp_error.c when consistency checking is on.
3419 kmp_int32
3420 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3421 {
3422     switch (seq) {
3423         case lockseq_tas:
3424         case lockseq_nested_tas:
3425             return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3426 #if DYNA_HAS_FUTEX
3427         case lockseq_futex:
3428         case lockseq_nested_futex:
3429             return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3430 #endif
3431         case lockseq_ticket:
3432         case lockseq_nested_ticket:
3433             return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3434         case lockseq_queuing:
3435         case lockseq_nested_queuing:
3436 #if KMP_USE_ADAPTIVE_LOCKS
3437         case lockseq_adaptive:
3438             return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3439 #endif
3440         case lockseq_drdpa:
3441         case lockseq_nested_drdpa:
3442             return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3443         default:
3444             return 0;
3445     }
3446 }
3447 
3448 // The value initialized from KMP_LOCK_KIND needs to be translated to its
3449 // nested version.
3450 void
3451 __kmp_init_nest_lock_hinted(void **lock, int hint)
3452 {
3453     kmp_dyna_lockseq_t seq;
3454     switch (hint) {
3455         case kmp_lock_hint_uncontended:
3456             seq = lockseq_nested_tas;
3457             break;
3458         // Defaults to queuing locks.
3459         case kmp_lock_hint_contended:
3460         case kmp_lock_hint_nonspeculative:
3461         default:
3462             seq = lockseq_nested_queuing;
3463             break;
3464     }
3465     DYNA_INIT_I_LOCK(lock, seq);
3466 #if USE_ITT_BUILD
3467     kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3468     __kmp_itt_lock_creating(ilk->lock, NULL);
3469 #endif
3470 }
3471 
3472 // Initializes the lock table for indirect locks.
3473 static void
3474 __kmp_init_indirect_lock_table()
3475 {
3476     __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3477     __kmp_indirect_lock_table_size = 1024;
3478     __kmp_indirect_lock_table_next = 0;
3479 }
3480 
3481 #if KMP_USE_ADAPTIVE_LOCKS
3482 # define init_lock_func(table, expand) {             \
3483     table[locktag_ticket]         = expand(ticket);  \
3484     table[locktag_queuing]        = expand(queuing); \
3485     table[locktag_adaptive]       = expand(queuing); \
3486     table[locktag_drdpa]          = expand(drdpa);   \
3487     table[locktag_nested_ticket]  = expand(ticket);  \
3488     table[locktag_nested_queuing] = expand(queuing); \
3489     table[locktag_nested_drdpa]   = expand(drdpa);   \
3490 }
3491 #else
3492 # define init_lock_func(table, expand) {             \
3493     table[locktag_ticket]         = expand(ticket);  \
3494     table[locktag_queuing]        = expand(queuing); \
3495     table[locktag_drdpa]          = expand(drdpa);   \
3496     table[locktag_nested_ticket]  = expand(ticket);  \
3497     table[locktag_nested_queuing] = expand(queuing); \
3498     table[locktag_nested_drdpa]   = expand(drdpa);   \
3499 }
3500 #endif // KMP_USE_ADAPTIVE_LOCKS
3501 
3502 // Initializes data for dynamic user locks.
3503 void
3504 __kmp_init_dynamic_user_locks()
3505 {
3506     // Initialize jump table location
3507     int offset = (__kmp_env_consistency_check)? 1: 0;
3508     __kmp_direct_set_ops = direct_set_tab[offset];
3509     __kmp_direct_unset_ops = direct_unset_tab[offset];
3510     __kmp_direct_test_ops = direct_test_tab[offset];
3511     __kmp_indirect_set_ops = indirect_set_tab[offset];
3512     __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3513     __kmp_indirect_test_ops = indirect_test_tab[offset];
3514     __kmp_init_indirect_lock_table();
3515 
3516     // Initialize lock accessor/modifier
3517     // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3518 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3519     init_lock_func(__kmp_indirect_set_location, expand_func);
3520 #undef expand_func
3521 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3522     init_lock_func(__kmp_indirect_set_flags, expand_func);
3523 #undef expand_func
3524 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3525     init_lock_func(__kmp_indirect_get_location, expand_func);
3526 #undef expand_func
3527 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3528     init_lock_func(__kmp_indirect_get_flags, expand_func);
3529 #undef expand_func
3530 
3531     __kmp_init_user_locks = TRUE;
3532 }
3533 
3534 // Clean up the lock table.
3535 void
3536 __kmp_cleanup_indirect_user_locks()
3537 {
3538     kmp_lock_index_t i;
3539     int k;
3540 
3541     // Clean up locks in the pools first (they were already destroyed before going into the pools).
3542     for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3543         kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3544         while (l != NULL) {
3545             kmp_indirect_lock_t *ll = l;
3546             l = (kmp_indirect_lock_t *)l->lock->pool.next;
3547             if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3548                 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3549             }
3550             __kmp_free(ll->lock);
3551             __kmp_free(ll);
3552         }
3553     }
3554     // Clean up the remaining undestroyed locks.
3555     for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3556         kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3557         if (l != NULL) {
3558             // Locks not destroyed explicitly need to be destroyed here.
3559             DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3560             __kmp_free(l->lock);
3561             __kmp_free(l);
3562         }
3563     }
3564     // Free the table
3565     __kmp_free(__kmp_indirect_lock_table);
3566 
3567     __kmp_init_user_locks = FALSE;
3568 }
3569 
3570 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3571 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3572 
3573 #else // KMP_USE_DYNAMIC_LOCK
3574 
3575 /* ------------------------------------------------------------------------ */
3576 /* user locks
3577  *
3578  * They are implemented as a table of function pointers which are set to the
3579  * lock functions of the appropriate kind, once that has been determined.
3580  */
3581 
3582 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3583 
3584 size_t __kmp_base_user_lock_size = 0;
3585 size_t __kmp_user_lock_size = 0;
3586 
3587 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3588 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3589 
3590 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3591 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3592 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3593 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3594 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3595 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3596 
3597 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3598 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3599 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3600 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3601 
3602 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3603 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3604 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3605 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3606 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3607 
3608 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3609 {
3610     switch ( user_lock_kind ) {
3611         case lk_default:
3612         default:
3613         KMP_ASSERT( 0 );
3614 
3615         case lk_tas: {
3616             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3617             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3618 
3619             __kmp_get_user_lock_owner_ =
3620               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3621               ( &__kmp_get_tas_lock_owner );
3622 
3623             if ( __kmp_env_consistency_check ) {
3624                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3625                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3626             }
3627             else {
3628                 KMP_BIND_USER_LOCK(tas);
3629                 KMP_BIND_NESTED_USER_LOCK(tas);
3630             }
3631 
3632             __kmp_destroy_user_lock_ =
3633               ( void ( * )( kmp_user_lock_p ) )
3634               ( &__kmp_destroy_tas_lock );
3635 
3636              __kmp_is_user_lock_initialized_ =
3637                ( int ( * )( kmp_user_lock_p ) ) NULL;
3638 
3639              __kmp_get_user_lock_location_ =
3640                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3641 
3642              __kmp_set_user_lock_location_ =
3643                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3644 
3645              __kmp_get_user_lock_flags_ =
3646                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3647 
3648              __kmp_set_user_lock_flags_ =
3649                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3650         }
3651         break;
3652 
3653 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3654 
3655         case lk_futex: {
3656             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3657             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3658 
3659             __kmp_get_user_lock_owner_ =
3660               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3661               ( &__kmp_get_futex_lock_owner );
3662 
3663             if ( __kmp_env_consistency_check ) {
3664                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3665                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3666             }
3667             else {
3668                 KMP_BIND_USER_LOCK(futex);
3669                 KMP_BIND_NESTED_USER_LOCK(futex);
3670             }
3671 
3672             __kmp_destroy_user_lock_ =
3673               ( void ( * )( kmp_user_lock_p ) )
3674               ( &__kmp_destroy_futex_lock );
3675 
3676              __kmp_is_user_lock_initialized_ =
3677                ( int ( * )( kmp_user_lock_p ) ) NULL;
3678 
3679              __kmp_get_user_lock_location_ =
3680                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3681 
3682              __kmp_set_user_lock_location_ =
3683                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3684 
3685              __kmp_get_user_lock_flags_ =
3686                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3687 
3688              __kmp_set_user_lock_flags_ =
3689                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3690         }
3691         break;
3692 
3693 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3694 
3695         case lk_ticket: {
3696             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3697             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3698 
3699             __kmp_get_user_lock_owner_ =
3700               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3701               ( &__kmp_get_ticket_lock_owner );
3702 
3703             if ( __kmp_env_consistency_check ) {
3704                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3705                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3706             }
3707             else {
3708                 KMP_BIND_USER_LOCK(ticket);
3709                 KMP_BIND_NESTED_USER_LOCK(ticket);
3710             }
3711 
3712             __kmp_destroy_user_lock_ =
3713               ( void ( * )( kmp_user_lock_p ) )
3714               ( &__kmp_destroy_ticket_lock );
3715 
3716              __kmp_is_user_lock_initialized_ =
3717                ( int ( * )( kmp_user_lock_p ) )
3718                ( &__kmp_is_ticket_lock_initialized );
3719 
3720              __kmp_get_user_lock_location_ =
3721                ( const ident_t * ( * )( kmp_user_lock_p ) )
3722                ( &__kmp_get_ticket_lock_location );
3723 
3724              __kmp_set_user_lock_location_ =
3725                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3726                ( &__kmp_set_ticket_lock_location );
3727 
3728              __kmp_get_user_lock_flags_ =
3729                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3730                ( &__kmp_get_ticket_lock_flags );
3731 
3732              __kmp_set_user_lock_flags_ =
3733                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3734                ( &__kmp_set_ticket_lock_flags );
3735         }
3736         break;
3737 
3738         case lk_queuing: {
3739             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3740             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3741 
3742             __kmp_get_user_lock_owner_ =
3743               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3744               ( &__kmp_get_queuing_lock_owner );
3745 
3746             if ( __kmp_env_consistency_check ) {
3747                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3748                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3749             }
3750             else {
3751                 KMP_BIND_USER_LOCK(queuing);
3752                 KMP_BIND_NESTED_USER_LOCK(queuing);
3753             }
3754 
3755             __kmp_destroy_user_lock_ =
3756               ( void ( * )( kmp_user_lock_p ) )
3757               ( &__kmp_destroy_queuing_lock );
3758 
3759              __kmp_is_user_lock_initialized_ =
3760                ( int ( * )( kmp_user_lock_p ) )
3761                ( &__kmp_is_queuing_lock_initialized );
3762 
3763              __kmp_get_user_lock_location_ =
3764                ( const ident_t * ( * )( kmp_user_lock_p ) )
3765                ( &__kmp_get_queuing_lock_location );
3766 
3767              __kmp_set_user_lock_location_ =
3768                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3769                ( &__kmp_set_queuing_lock_location );
3770 
3771              __kmp_get_user_lock_flags_ =
3772                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3773                ( &__kmp_get_queuing_lock_flags );
3774 
3775              __kmp_set_user_lock_flags_ =
3776                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3777                ( &__kmp_set_queuing_lock_flags );
3778         }
3779         break;
3780 
3781 #if KMP_USE_ADAPTIVE_LOCKS
3782         case lk_adaptive: {
3783             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3784             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3785 
3786             __kmp_get_user_lock_owner_ =
3787               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3788               ( &__kmp_get_queuing_lock_owner );
3789 
3790             if ( __kmp_env_consistency_check ) {
3791                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3792             }
3793             else {
3794                 KMP_BIND_USER_LOCK(adaptive);
3795             }
3796 
3797             __kmp_destroy_user_lock_ =
3798               ( void ( * )( kmp_user_lock_p ) )
3799               ( &__kmp_destroy_adaptive_lock );
3800 
3801             __kmp_is_user_lock_initialized_ =
3802               ( int ( * )( kmp_user_lock_p ) )
3803               ( &__kmp_is_queuing_lock_initialized );
3804 
3805             __kmp_get_user_lock_location_ =
3806               ( const ident_t * ( * )( kmp_user_lock_p ) )
3807               ( &__kmp_get_queuing_lock_location );
3808 
3809             __kmp_set_user_lock_location_ =
3810               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3811               ( &__kmp_set_queuing_lock_location );
3812 
3813             __kmp_get_user_lock_flags_ =
3814               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3815               ( &__kmp_get_queuing_lock_flags );
3816 
3817             __kmp_set_user_lock_flags_ =
3818               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3819               ( &__kmp_set_queuing_lock_flags );
3820 
3821         }
3822         break;
3823 #endif // KMP_USE_ADAPTIVE_LOCKS
3824 
3825         case lk_drdpa: {
3826             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3827             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3828 
3829             __kmp_get_user_lock_owner_ =
3830               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3831               ( &__kmp_get_drdpa_lock_owner );
3832 
3833             if ( __kmp_env_consistency_check ) {
3834                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3835                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3836             }
3837             else {
3838                 KMP_BIND_USER_LOCK(drdpa);
3839                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3840             }
3841 
3842             __kmp_destroy_user_lock_ =
3843               ( void ( * )( kmp_user_lock_p ) )
3844               ( &__kmp_destroy_drdpa_lock );
3845 
3846              __kmp_is_user_lock_initialized_ =
3847                ( int ( * )( kmp_user_lock_p ) )
3848                ( &__kmp_is_drdpa_lock_initialized );
3849 
3850              __kmp_get_user_lock_location_ =
3851                ( const ident_t * ( * )( kmp_user_lock_p ) )
3852                ( &__kmp_get_drdpa_lock_location );
3853 
3854              __kmp_set_user_lock_location_ =
3855                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3856                ( &__kmp_set_drdpa_lock_location );
3857 
3858              __kmp_get_user_lock_flags_ =
3859                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3860                ( &__kmp_get_drdpa_lock_flags );
3861 
3862              __kmp_set_user_lock_flags_ =
3863                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3864                ( &__kmp_set_drdpa_lock_flags );
3865         }
3866         break;
3867     }
3868 }
3869 
3870 
3871 // ----------------------------------------------------------------------------
3872 // User lock table & lock allocation
3873 
3874 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3875 kmp_user_lock_p __kmp_lock_pool = NULL;
3876 
3877 // Lock block-allocation support.
3878 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3879 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3880 
3881 static kmp_lock_index_t
3882 __kmp_lock_table_insert( kmp_user_lock_p lck )
3883 {
3884     // Assume that kmp_global_lock is held upon entry/exit.
3885     kmp_lock_index_t index;
3886     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3887         kmp_lock_index_t size;
3888         kmp_user_lock_p *table;
3889         // Reallocate lock table.
3890         if ( __kmp_user_lock_table.allocated == 0 ) {
3891             size = 1024;
3892         }
3893         else {
3894             size = __kmp_user_lock_table.allocated * 2;
3895         }
3896         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3897         KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3898         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3899             // We cannot free the previous table now, since it may be in use by other
3900             // threads. So save the pointer to the previous table in in the first element of the
3901             // new table. All the tables will be organized into a list, and could be freed when
3902             // library shutting down.
3903         __kmp_user_lock_table.table = table;
3904         __kmp_user_lock_table.allocated = size;
3905     }
3906     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3907     index = __kmp_user_lock_table.used;
3908     __kmp_user_lock_table.table[ index ] = lck;
3909     ++ __kmp_user_lock_table.used;
3910     return index;
3911 }
3912 
3913 static kmp_user_lock_p
3914 __kmp_lock_block_allocate()
3915 {
3916     // Assume that kmp_global_lock is held upon entry/exit.
3917     static int last_index = 0;
3918     if ( ( last_index >= __kmp_num_locks_in_block )
3919       || ( __kmp_lock_blocks == NULL ) ) {
3920         // Restart the index.
3921         last_index = 0;
3922         // Need to allocate a new block.
3923         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3924         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3925         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3926         // Set up the new block.
3927         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3928         new_block->next_block = __kmp_lock_blocks;
3929         new_block->locks = (void *)buffer;
3930         // Publish the new block.
3931         KMP_MB();
3932         __kmp_lock_blocks = new_block;
3933     }
3934     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3935       [ last_index * __kmp_user_lock_size ] ) );
3936     last_index++;
3937     return ret;
3938 }
3939 
3940 //
3941 // Get memory for a lock. It may be freshly allocated memory or reused memory
3942 // from lock pool.
3943 //
3944 kmp_user_lock_p
3945 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3946   kmp_lock_flags_t flags )
3947 {
3948     kmp_user_lock_p lck;
3949     kmp_lock_index_t index;
3950     KMP_DEBUG_ASSERT( user_lock );
3951 
3952     __kmp_acquire_lock( &__kmp_global_lock, gtid );
3953 
3954     if ( __kmp_lock_pool == NULL ) {
3955         // Lock pool is empty. Allocate new memory.
3956         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3957             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3958         }
3959         else {
3960             lck = __kmp_lock_block_allocate();
3961         }
3962 
3963         // Insert lock in the table so that it can be freed in __kmp_cleanup,
3964         // and debugger has info on all allocated locks.
3965         index = __kmp_lock_table_insert( lck );
3966     }
3967     else {
3968         // Pick up lock from pool.
3969         lck = __kmp_lock_pool;
3970         index = __kmp_lock_pool->pool.index;
3971         __kmp_lock_pool = __kmp_lock_pool->pool.next;
3972     }
3973 
3974     //
3975     // We could potentially differentiate between nested and regular locks
3976     // here, and do the lock table lookup for regular locks only.
3977     //
3978     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3979         * ( (kmp_lock_index_t *) user_lock ) = index;
3980     }
3981     else {
3982         * ( (kmp_user_lock_p *) user_lock ) = lck;
3983     }
3984 
3985     // mark the lock if it is critical section lock.
3986     __kmp_set_user_lock_flags( lck, flags );
3987 
3988     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3989 
3990     return lck;
3991 }
3992 
3993 // Put lock's memory to pool for reusing.
3994 void
3995 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3996 {
3997     KMP_DEBUG_ASSERT( user_lock != NULL );
3998     KMP_DEBUG_ASSERT( lck != NULL );
3999 
4000     __kmp_acquire_lock( & __kmp_global_lock, gtid );
4001 
4002     lck->pool.next = __kmp_lock_pool;
4003     __kmp_lock_pool = lck;
4004     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4005         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4006         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4007         lck->pool.index = index;
4008     }
4009 
4010     __kmp_release_lock( & __kmp_global_lock, gtid );
4011 }
4012 
4013 kmp_user_lock_p
4014 __kmp_lookup_user_lock( void **user_lock, char const *func )
4015 {
4016     kmp_user_lock_p lck = NULL;
4017 
4018     if ( __kmp_env_consistency_check ) {
4019         if ( user_lock == NULL ) {
4020             KMP_FATAL( LockIsUninitialized, func );
4021         }
4022     }
4023 
4024     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4025         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4026         if ( __kmp_env_consistency_check ) {
4027             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4028                 KMP_FATAL( LockIsUninitialized, func );
4029             }
4030         }
4031         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4032         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4033         lck = __kmp_user_lock_table.table[index];
4034     }
4035     else {
4036         lck = *( (kmp_user_lock_p *)user_lock );
4037     }
4038 
4039     if ( __kmp_env_consistency_check ) {
4040         if ( lck == NULL ) {
4041             KMP_FATAL( LockIsUninitialized, func );
4042         }
4043     }
4044 
4045     return lck;
4046 }
4047 
4048 void
4049 __kmp_cleanup_user_locks( void )
4050 {
4051     //
4052     // Reset lock pool. Do not worry about lock in the pool -- we will free
4053     // them when iterating through lock table (it includes all the locks,
4054     // dead or alive).
4055     //
4056     __kmp_lock_pool = NULL;
4057 
4058 #define IS_CRITICAL(lck) \
4059         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4060         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4061 
4062     //
4063     // Loop through lock table, free all locks.
4064     //
4065     // Do not free item [0], it is reserved for lock tables list.
4066     //
4067     // FIXME - we are iterating through a list of (pointers to) objects of
4068     // type union kmp_user_lock, but we have no way of knowing whether the
4069     // base type is currently "pool" or whatever the global user lock type
4070     // is.
4071     //
4072     // We are relying on the fact that for all of the user lock types
4073     // (except "tas"), the first field in the lock struct is the "initialized"
4074     // field, which is set to the address of the lock object itself when
4075     // the lock is initialized.  When the union is of type "pool", the
4076     // first field is a pointer to the next object in the free list, which
4077     // will not be the same address as the object itself.
4078     //
4079     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4080     // will fail for "pool" objects on the free list.  This must happen as
4081     // the "location" field of real user locks overlaps the "index" field
4082     // of "pool" objects.
4083     //
4084     // It would be better to run through the free list, and remove all "pool"
4085     // objects from the lock table before executing this loop.  However,
4086     // "pool" objects do not always have their index field set (only on
4087     // lin_32e), and I don't want to search the lock table for the address
4088     // of every "pool" object on the free list.
4089     //
4090     while ( __kmp_user_lock_table.used > 1 ) {
4091         const ident *loc;
4092 
4093         //
4094         // reduce __kmp_user_lock_table.used before freeing the lock,
4095         // so that state of locks is consistent
4096         //
4097         kmp_user_lock_p lck = __kmp_user_lock_table.table[
4098           --__kmp_user_lock_table.used ];
4099 
4100         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4101           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4102             //
4103             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4104             // initialized AND it is NOT a critical section (user is not
4105             // responsible for destroying criticals) AND we know source
4106             // location to report.
4107             //
4108             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4109               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4110               ( loc->psource != NULL ) ) {
4111                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4112                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4113                 __kmp_str_loc_free( &str_loc);
4114             }
4115 
4116 #ifdef KMP_DEBUG
4117             if ( IS_CRITICAL( lck ) ) {
4118                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4119             }
4120             else {
4121                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4122             }
4123 #endif // KMP_DEBUG
4124 
4125             //
4126             // Cleanup internal lock dynamic resources
4127             // (for drdpa locks particularly).
4128             //
4129             __kmp_destroy_user_lock( lck );
4130         }
4131 
4132         //
4133         // Free the lock if block allocation of locks is not used.
4134         //
4135         if ( __kmp_lock_blocks == NULL ) {
4136             __kmp_free( lck );
4137         }
4138     }
4139 
4140 #undef IS_CRITICAL
4141 
4142     //
4143     // delete lock table(s).
4144     //
4145     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4146     __kmp_user_lock_table.table = NULL;
4147     __kmp_user_lock_table.allocated = 0;
4148 
4149     while ( table_ptr != NULL ) {
4150         //
4151         // In the first element we saved the pointer to the previous
4152         // (smaller) lock table.
4153         //
4154         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4155         __kmp_free( table_ptr );
4156         table_ptr = next;
4157     }
4158 
4159     //
4160     // Free buffers allocated for blocks of locks.
4161     //
4162     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4163     __kmp_lock_blocks = NULL;
4164 
4165     while ( block_ptr != NULL ) {
4166         kmp_block_of_locks_t *next = block_ptr->next_block;
4167         __kmp_free( block_ptr->locks );
4168         //
4169         // *block_ptr itself was allocated at the end of the locks vector.
4170         //
4171 	block_ptr = next;
4172     }
4173 
4174     TCW_4(__kmp_init_user_locks, FALSE);
4175 }
4176 
4177 #endif // KMP_USE_DYNAMIC_LOCK
4178