1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 
18 #include "kmp.h"
19 #include "kmp_itt.h"
20 #include "kmp_i18n.h"
21 #include "kmp_lock.h"
22 #include "kmp_io.h"
23 
24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
25 # include <unistd.h>
26 # include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on different
28 // Linux* OS distributions that either require that you include (or break when you try to include)
29 // <pci/types.h>.
30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31 // we just define the constants here and don't include <futex.h>
32 # ifndef FUTEX_WAIT
33 #  define FUTEX_WAIT    0
34 # endif
35 # ifndef FUTEX_WAKE
36 #  define FUTEX_WAKE    1
37 # endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void
44 __kmp_validate_locks( void )
45 {
46     int i;
47     kmp_uint32  x, y;
48 
49     /* Check to make sure unsigned arithmetic does wraps properly */
50     x = ~((kmp_uint32) 0) - 2;
51     y = x - 2;
52 
53     for (i = 0; i < 8; ++i, ++x, ++y) {
54         kmp_uint32 z = (x - y);
55         KMP_ASSERT( z == 2 );
56     }
57 
58     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59 }
60 
61 
62 /* ------------------------------------------------------------------------ */
63 /* test and set locks */
64 
65 //
66 // For the non-nested locks, we can only assume that the first 4 bytes were
67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70 //
71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73 //
74 
75 static kmp_int32
76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77 {
78     return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
79 }
80 
81 static inline bool
82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83 {
84     return lck->lk.depth_locked != -1;
85 }
86 
87 __forceinline static int
88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89 {
90     KMP_MB();
91 
92 #ifdef USE_LOCK_PROFILE
93     kmp_uint32 curr = TCR_4( lck->lk.poll );
94     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96     /* else __kmp_printf( "." );*/
97 #endif /* USE_LOCK_PROFILE */
98 
99     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
100       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
101         KMP_FSYNC_ACQUIRED(lck);
102         return KMP_LOCK_ACQUIRED_FIRST;
103     }
104 
105     kmp_uint32 spins;
106     KMP_FSYNC_PREPARE( lck );
107     KMP_INIT_YIELD( spins );
108     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109       __kmp_xproc ) ) {
110         KMP_YIELD( TRUE );
111     }
112     else {
113         KMP_YIELD_SPIN( spins );
114     }
115 
116     kmp_backoff_t backoff = __kmp_spin_backoff_params;
117     while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
118       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
119 
120         __kmp_spin_backoff(&backoff);
121         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122           __kmp_xproc ) ) {
123             KMP_YIELD( TRUE );
124         }
125         else {
126             KMP_YIELD_SPIN( spins );
127         }
128     }
129     KMP_FSYNC_ACQUIRED( lck );
130     return KMP_LOCK_ACQUIRED_FIRST;
131 }
132 
133 int
134 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
135 {
136     return __kmp_acquire_tas_lock_timed_template( lck, gtid );
137 }
138 
139 static int
140 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
141 {
142     char const * const func = "omp_set_lock";
143     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
144       && __kmp_is_tas_lock_nestable( lck ) ) {
145         KMP_FATAL( LockNestableUsedAsSimple, func );
146     }
147     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
148         KMP_FATAL( LockIsAlreadyOwned, func );
149     }
150     return __kmp_acquire_tas_lock( lck, gtid );
151 }
152 
153 int
154 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
155 {
156     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
157       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
158         KMP_FSYNC_ACQUIRED( lck );
159         return TRUE;
160     }
161     return FALSE;
162 }
163 
164 static int
165 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
166 {
167     char const * const func = "omp_test_lock";
168     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169       && __kmp_is_tas_lock_nestable( lck ) ) {
170         KMP_FATAL( LockNestableUsedAsSimple, func );
171     }
172     return __kmp_test_tas_lock( lck, gtid );
173 }
174 
175 int
176 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
177 {
178     KMP_MB();       /* Flush all pending memory write invalidates.  */
179 
180     KMP_FSYNC_RELEASING(lck);
181     KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
182     KMP_MB();       /* Flush all pending memory write invalidates.  */
183 
184     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185       __kmp_xproc ) );
186     return KMP_LOCK_RELEASED;
187 }
188 
189 static int
190 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
191 {
192     char const * const func = "omp_unset_lock";
193     KMP_MB();  /* in case another processor initialized lock */
194     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
195       && __kmp_is_tas_lock_nestable( lck ) ) {
196         KMP_FATAL( LockNestableUsedAsSimple, func );
197     }
198     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
199         KMP_FATAL( LockUnsettingFree, func );
200     }
201     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
202       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
203         KMP_FATAL( LockUnsettingSetByAnother, func );
204     }
205     return __kmp_release_tas_lock( lck, gtid );
206 }
207 
208 void
209 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
210 {
211     TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
212 }
213 
214 static void
215 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
216 {
217     __kmp_init_tas_lock( lck );
218 }
219 
220 void
221 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
222 {
223     lck->lk.poll = 0;
224 }
225 
226 static void
227 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
228 {
229     char const * const func = "omp_destroy_lock";
230     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
231       && __kmp_is_tas_lock_nestable( lck ) ) {
232         KMP_FATAL( LockNestableUsedAsSimple, func );
233     }
234     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
235         KMP_FATAL( LockStillOwned, func );
236     }
237     __kmp_destroy_tas_lock( lck );
238 }
239 
240 
241 //
242 // nested test and set locks
243 //
244 
245 int
246 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
247 {
248     KMP_DEBUG_ASSERT( gtid >= 0 );
249 
250     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
251         lck->lk.depth_locked += 1;
252         return KMP_LOCK_ACQUIRED_NEXT;
253     }
254     else {
255         __kmp_acquire_tas_lock_timed_template( lck, gtid );
256         lck->lk.depth_locked = 1;
257         return KMP_LOCK_ACQUIRED_FIRST;
258     }
259 }
260 
261 static int
262 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
263 {
264     char const * const func = "omp_set_nest_lock";
265     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
266         KMP_FATAL( LockSimpleUsedAsNestable, func );
267     }
268     return __kmp_acquire_nested_tas_lock( lck, gtid );
269 }
270 
271 int
272 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
273 {
274     int retval;
275 
276     KMP_DEBUG_ASSERT( gtid >= 0 );
277 
278     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
279         retval = ++lck->lk.depth_locked;
280     }
281     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
282         retval = 0;
283     }
284     else {
285         KMP_MB();
286         retval = lck->lk.depth_locked = 1;
287     }
288     return retval;
289 }
290 
291 static int
292 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293 {
294     char const * const func = "omp_test_nest_lock";
295     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
296         KMP_FATAL( LockSimpleUsedAsNestable, func );
297     }
298     return __kmp_test_nested_tas_lock( lck, gtid );
299 }
300 
301 int
302 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
303 {
304     KMP_DEBUG_ASSERT( gtid >= 0 );
305 
306     KMP_MB();
307     if ( --(lck->lk.depth_locked) == 0 ) {
308         __kmp_release_tas_lock( lck, gtid );
309         return KMP_LOCK_RELEASED;
310     }
311     return KMP_LOCK_STILL_HELD;
312 }
313 
314 static int
315 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
316 {
317     char const * const func = "omp_unset_nest_lock";
318     KMP_MB();  /* in case another processor initialized lock */
319     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
320         KMP_FATAL( LockSimpleUsedAsNestable, func );
321     }
322     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
323         KMP_FATAL( LockUnsettingFree, func );
324     }
325     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
326         KMP_FATAL( LockUnsettingSetByAnother, func );
327     }
328     return __kmp_release_nested_tas_lock( lck, gtid );
329 }
330 
331 void
332 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
333 {
334     __kmp_init_tas_lock( lck );
335     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
336 }
337 
338 static void
339 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
340 {
341     __kmp_init_nested_tas_lock( lck );
342 }
343 
344 void
345 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
346 {
347     __kmp_destroy_tas_lock( lck );
348     lck->lk.depth_locked = 0;
349 }
350 
351 static void
352 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
353 {
354     char const * const func = "omp_destroy_nest_lock";
355     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
356         KMP_FATAL( LockSimpleUsedAsNestable, func );
357     }
358     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
359         KMP_FATAL( LockStillOwned, func );
360     }
361     __kmp_destroy_nested_tas_lock( lck );
362 }
363 
364 
365 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
366 
367 /* ------------------------------------------------------------------------ */
368 /* futex locks */
369 
370 // futex locks are really just test and set locks, with a different method
371 // of handling contention.  They take the same amount of space as test and
372 // set locks, and are allocated the same way (i.e. use the area allocated by
373 // the compiler for non-nested locks / allocate nested locks on the heap).
374 
375 static kmp_int32
376 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
377 {
378     return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
379 }
380 
381 static inline bool
382 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
383 {
384     return lck->lk.depth_locked != -1;
385 }
386 
387 __forceinline static int
388 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
389 {
390     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
391 
392     KMP_MB();
393 
394 #ifdef USE_LOCK_PROFILE
395     kmp_uint32 curr = TCR_4( lck->lk.poll );
396     if ( ( curr != 0 ) && ( curr != gtid_code ) )
397         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
398     /* else __kmp_printf( "." );*/
399 #endif /* USE_LOCK_PROFILE */
400 
401     KMP_FSYNC_PREPARE( lck );
402     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
403       lck, lck->lk.poll, gtid ) );
404 
405     kmp_int32 poll_val;
406 
407     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
408              KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
409 
410         kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
411         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
412            lck, gtid, poll_val, cond ) );
413 
414         //
415         // NOTE: if you try to use the following condition for this branch
416         //
417         // if ( poll_val & 1 == 0 )
418         //
419         // Then the 12.0 compiler has a bug where the following block will
420         // always be skipped, regardless of the value of the LSB of poll_val.
421         //
422         if ( ! cond ) {
423             //
424             // Try to set the lsb in the poll to indicate to the owner
425             // thread that they need to wake this thread up.
426             //
427             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
428                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
429                   lck, lck->lk.poll, gtid ) );
430                 continue;
431             }
432             poll_val |= KMP_LOCK_BUSY(1, futex);
433 
434             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
435               lck, lck->lk.poll, gtid ) );
436         }
437 
438         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
439            lck, gtid, poll_val ) );
440 
441         kmp_int32 rc;
442         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
443           poll_val, NULL, NULL, 0 ) ) != 0 ) {
444             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
445                lck, gtid, poll_val, rc, errno ) );
446             continue;
447         }
448 
449         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
450            lck, gtid, poll_val ) );
451         //
452         // This thread has now done a successful futex wait call and was
453         // entered on the OS futex queue.  We must now perform a futex
454         // wake call when releasing the lock, as we have no idea how many
455         // other threads are in the queue.
456         //
457         gtid_code |= 1;
458     }
459 
460     KMP_FSYNC_ACQUIRED( lck );
461     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
462       lck, lck->lk.poll, gtid ) );
463     return KMP_LOCK_ACQUIRED_FIRST;
464 }
465 
466 int
467 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
468 {
469    return __kmp_acquire_futex_lock_timed_template( lck, gtid );
470 }
471 
472 static int
473 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
474 {
475     char const * const func = "omp_set_lock";
476     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
477       && __kmp_is_futex_lock_nestable( lck ) ) {
478         KMP_FATAL( LockNestableUsedAsSimple, func );
479     }
480     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
481         KMP_FATAL( LockIsAlreadyOwned, func );
482     }
483     return __kmp_acquire_futex_lock( lck, gtid );
484 }
485 
486 int
487 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
488 {
489     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
490         KMP_FSYNC_ACQUIRED( lck );
491         return TRUE;
492     }
493     return FALSE;
494 }
495 
496 static int
497 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
498 {
499     char const * const func = "omp_test_lock";
500     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
501       && __kmp_is_futex_lock_nestable( lck ) ) {
502         KMP_FATAL( LockNestableUsedAsSimple, func );
503     }
504     return __kmp_test_futex_lock( lck, gtid );
505 }
506 
507 int
508 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
509 {
510     KMP_MB();       /* Flush all pending memory write invalidates.  */
511 
512     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
513       lck, lck->lk.poll, gtid ) );
514 
515     KMP_FSYNC_RELEASING(lck);
516 
517     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
518 
519     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
520        lck, gtid, poll_val ) );
521 
522     if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
523         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
524            lck, gtid ) );
525         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
526     }
527 
528     KMP_MB();       /* Flush all pending memory write invalidates.  */
529 
530     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
531       lck, lck->lk.poll, gtid ) );
532 
533     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
534       __kmp_xproc ) );
535     return KMP_LOCK_RELEASED;
536 }
537 
538 static int
539 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
540 {
541     char const * const func = "omp_unset_lock";
542     KMP_MB();  /* in case another processor initialized lock */
543     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
544       && __kmp_is_futex_lock_nestable( lck ) ) {
545         KMP_FATAL( LockNestableUsedAsSimple, func );
546     }
547     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
548         KMP_FATAL( LockUnsettingFree, func );
549     }
550     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
551       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
552         KMP_FATAL( LockUnsettingSetByAnother, func );
553     }
554     return __kmp_release_futex_lock( lck, gtid );
555 }
556 
557 void
558 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
559 {
560     TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
561 }
562 
563 static void
564 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
565 {
566     __kmp_init_futex_lock( lck );
567 }
568 
569 void
570 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
571 {
572     lck->lk.poll = 0;
573 }
574 
575 static void
576 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
577 {
578     char const * const func = "omp_destroy_lock";
579     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
580       && __kmp_is_futex_lock_nestable( lck ) ) {
581         KMP_FATAL( LockNestableUsedAsSimple, func );
582     }
583     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
584         KMP_FATAL( LockStillOwned, func );
585     }
586     __kmp_destroy_futex_lock( lck );
587 }
588 
589 
590 //
591 // nested futex locks
592 //
593 
594 int
595 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
596 {
597     KMP_DEBUG_ASSERT( gtid >= 0 );
598 
599     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
600         lck->lk.depth_locked += 1;
601         return KMP_LOCK_ACQUIRED_NEXT;
602     }
603     else {
604         __kmp_acquire_futex_lock_timed_template( lck, gtid );
605         lck->lk.depth_locked = 1;
606         return KMP_LOCK_ACQUIRED_FIRST;
607     }
608 }
609 
610 static int
611 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
612 {
613     char const * const func = "omp_set_nest_lock";
614     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
615         KMP_FATAL( LockSimpleUsedAsNestable, func );
616     }
617     return __kmp_acquire_nested_futex_lock( lck, gtid );
618 }
619 
620 int
621 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
622 {
623     int retval;
624 
625     KMP_DEBUG_ASSERT( gtid >= 0 );
626 
627     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
628         retval = ++lck->lk.depth_locked;
629     }
630     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
631         retval = 0;
632     }
633     else {
634         KMP_MB();
635         retval = lck->lk.depth_locked = 1;
636     }
637     return retval;
638 }
639 
640 static int
641 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
642 {
643     char const * const func = "omp_test_nest_lock";
644     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
645         KMP_FATAL( LockSimpleUsedAsNestable, func );
646     }
647     return __kmp_test_nested_futex_lock( lck, gtid );
648 }
649 
650 int
651 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
652 {
653     KMP_DEBUG_ASSERT( gtid >= 0 );
654 
655     KMP_MB();
656     if ( --(lck->lk.depth_locked) == 0 ) {
657         __kmp_release_futex_lock( lck, gtid );
658         return KMP_LOCK_RELEASED;
659     }
660     return KMP_LOCK_STILL_HELD;
661 }
662 
663 static int
664 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
665 {
666     char const * const func = "omp_unset_nest_lock";
667     KMP_MB();  /* in case another processor initialized lock */
668     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
669         KMP_FATAL( LockSimpleUsedAsNestable, func );
670     }
671     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
672         KMP_FATAL( LockUnsettingFree, func );
673     }
674     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
675         KMP_FATAL( LockUnsettingSetByAnother, func );
676     }
677     return __kmp_release_nested_futex_lock( lck, gtid );
678 }
679 
680 void
681 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
682 {
683     __kmp_init_futex_lock( lck );
684     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
685 }
686 
687 static void
688 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
689 {
690     __kmp_init_nested_futex_lock( lck );
691 }
692 
693 void
694 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
695 {
696     __kmp_destroy_futex_lock( lck );
697     lck->lk.depth_locked = 0;
698 }
699 
700 static void
701 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
702 {
703     char const * const func = "omp_destroy_nest_lock";
704     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
705         KMP_FATAL( LockSimpleUsedAsNestable, func );
706     }
707     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
708         KMP_FATAL( LockStillOwned, func );
709     }
710     __kmp_destroy_nested_futex_lock( lck );
711 }
712 
713 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
714 
715 
716 /* ------------------------------------------------------------------------ */
717 /* ticket (bakery) locks */
718 
719 static kmp_int32
720 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
721 {
722     return TCR_4( lck->lk.owner_id ) - 1;
723 }
724 
725 static inline bool
726 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
727 {
728     return lck->lk.depth_locked != -1;
729 }
730 
731 static kmp_uint32
732 __kmp_bakery_check(kmp_uint32 value, kmp_uint32 checker)
733 {
734     register kmp_uint32 pause;
735 
736     if (value == checker) {
737         return TRUE;
738     }
739     for (pause = checker - value; pause != 0; --pause);
740     return FALSE;
741 }
742 
743 __forceinline static int
744 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
745 {
746     kmp_uint32 my_ticket;
747     KMP_MB();
748 
749     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
750 
751 #ifdef USE_LOCK_PROFILE
752     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
753         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
754     /* else __kmp_printf( "." );*/
755 #endif /* USE_LOCK_PROFILE */
756 
757     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
758         KMP_FSYNC_ACQUIRED(lck);
759         return KMP_LOCK_ACQUIRED_FIRST;
760     }
761     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
762     KMP_FSYNC_ACQUIRED(lck);
763     return KMP_LOCK_ACQUIRED_FIRST;
764 }
765 
766 int
767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768 {
769     return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770 }
771 
772 static int
773 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
774 {
775     char const * const func = "omp_set_lock";
776     if ( lck->lk.initialized != lck ) {
777         KMP_FATAL( LockIsUninitialized, func );
778     }
779     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
780         KMP_FATAL( LockNestableUsedAsSimple, func );
781     }
782     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
783         KMP_FATAL( LockIsAlreadyOwned, func );
784     }
785 
786     __kmp_acquire_ticket_lock( lck, gtid );
787 
788     lck->lk.owner_id = gtid + 1;
789     return KMP_LOCK_ACQUIRED_FIRST;
790 }
791 
792 int
793 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
794 {
795     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
796     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
797         kmp_uint32 next_ticket = my_ticket + 1;
798         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
799           my_ticket, next_ticket ) ) {
800             KMP_FSYNC_ACQUIRED( lck );
801             return TRUE;
802         }
803     }
804     return FALSE;
805 }
806 
807 static int
808 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
809 {
810     char const * const func = "omp_test_lock";
811     if ( lck->lk.initialized != lck ) {
812         KMP_FATAL( LockIsUninitialized, func );
813     }
814     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
815         KMP_FATAL( LockNestableUsedAsSimple, func );
816     }
817 
818     int retval = __kmp_test_ticket_lock( lck, gtid );
819 
820     if ( retval ) {
821         lck->lk.owner_id = gtid + 1;
822     }
823     return retval;
824 }
825 
826 int
827 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
828 {
829     kmp_uint32  distance;
830 
831     KMP_MB();       /* Flush all pending memory write invalidates.  */
832 
833     KMP_FSYNC_RELEASING(lck);
834     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
835 
836     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
837 
838     KMP_MB();       /* Flush all pending memory write invalidates.  */
839 
840     KMP_YIELD( distance
841       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
842     return KMP_LOCK_RELEASED;
843 }
844 
845 static int
846 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
847 {
848     char const * const func = "omp_unset_lock";
849     KMP_MB();  /* in case another processor initialized lock */
850     if ( lck->lk.initialized != lck ) {
851         KMP_FATAL( LockIsUninitialized, func );
852     }
853     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
854         KMP_FATAL( LockNestableUsedAsSimple, func );
855     }
856     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
857         KMP_FATAL( LockUnsettingFree, func );
858     }
859     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
860       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
861         KMP_FATAL( LockUnsettingSetByAnother, func );
862     }
863     lck->lk.owner_id = 0;
864     return __kmp_release_ticket_lock( lck, gtid );
865 }
866 
867 void
868 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
869 {
870     lck->lk.location = NULL;
871     TCW_4( lck->lk.next_ticket, 0 );
872     TCW_4( lck->lk.now_serving, 0 );
873     lck->lk.owner_id = 0;      // no thread owns the lock.
874     lck->lk.depth_locked = -1; // -1 => not a nested lock.
875     lck->lk.initialized = (kmp_ticket_lock *)lck;
876 }
877 
878 static void
879 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
880 {
881     __kmp_init_ticket_lock( lck );
882 }
883 
884 void
885 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
886 {
887     lck->lk.initialized = NULL;
888     lck->lk.location    = NULL;
889     lck->lk.next_ticket = 0;
890     lck->lk.now_serving = 0;
891     lck->lk.owner_id = 0;
892     lck->lk.depth_locked = -1;
893 }
894 
895 static void
896 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
897 {
898     char const * const func = "omp_destroy_lock";
899     if ( lck->lk.initialized != lck ) {
900         KMP_FATAL( LockIsUninitialized, func );
901     }
902     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903         KMP_FATAL( LockNestableUsedAsSimple, func );
904     }
905     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906         KMP_FATAL( LockStillOwned, func );
907     }
908     __kmp_destroy_ticket_lock( lck );
909 }
910 
911 
912 //
913 // nested ticket locks
914 //
915 
916 int
917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918 {
919     KMP_DEBUG_ASSERT( gtid >= 0 );
920 
921     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922         lck->lk.depth_locked += 1;
923         return KMP_LOCK_ACQUIRED_NEXT;
924     }
925     else {
926         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927         KMP_MB();
928         lck->lk.depth_locked = 1;
929         KMP_MB();
930         lck->lk.owner_id = gtid + 1;
931         return KMP_LOCK_ACQUIRED_FIRST;
932     }
933 }
934 
935 static int
936 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
937 {
938     char const * const func = "omp_set_nest_lock";
939     if ( lck->lk.initialized != lck ) {
940         KMP_FATAL( LockIsUninitialized, func );
941     }
942     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
943         KMP_FATAL( LockSimpleUsedAsNestable, func );
944     }
945     return __kmp_acquire_nested_ticket_lock( lck, gtid );
946 }
947 
948 int
949 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
950 {
951     int retval;
952 
953     KMP_DEBUG_ASSERT( gtid >= 0 );
954 
955     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
956         retval = ++lck->lk.depth_locked;
957     }
958     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
959         retval = 0;
960     }
961     else {
962         KMP_MB();
963         retval = lck->lk.depth_locked = 1;
964         KMP_MB();
965         lck->lk.owner_id = gtid + 1;
966     }
967     return retval;
968 }
969 
970 static int
971 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
972   kmp_int32 gtid )
973 {
974     char const * const func = "omp_test_nest_lock";
975     if ( lck->lk.initialized != lck ) {
976         KMP_FATAL( LockIsUninitialized, func );
977     }
978     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
979         KMP_FATAL( LockSimpleUsedAsNestable, func );
980     }
981     return __kmp_test_nested_ticket_lock( lck, gtid );
982 }
983 
984 int
985 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
986 {
987     KMP_DEBUG_ASSERT( gtid >= 0 );
988 
989     KMP_MB();
990     if ( --(lck->lk.depth_locked) == 0 ) {
991         KMP_MB();
992         lck->lk.owner_id = 0;
993         __kmp_release_ticket_lock( lck, gtid );
994         return KMP_LOCK_RELEASED;
995     }
996     return KMP_LOCK_STILL_HELD;
997 }
998 
999 static int
1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1001 {
1002     char const * const func = "omp_unset_nest_lock";
1003     KMP_MB();  /* in case another processor initialized lock */
1004     if ( lck->lk.initialized != lck ) {
1005         KMP_FATAL( LockIsUninitialized, func );
1006     }
1007     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1008         KMP_FATAL( LockSimpleUsedAsNestable, func );
1009     }
1010     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1011         KMP_FATAL( LockUnsettingFree, func );
1012     }
1013     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1014         KMP_FATAL( LockUnsettingSetByAnother, func );
1015     }
1016     return __kmp_release_nested_ticket_lock( lck, gtid );
1017 }
1018 
1019 void
1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1021 {
1022     __kmp_init_ticket_lock( lck );
1023     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1024 }
1025 
1026 static void
1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1028 {
1029     __kmp_init_nested_ticket_lock( lck );
1030 }
1031 
1032 void
1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1034 {
1035     __kmp_destroy_ticket_lock( lck );
1036     lck->lk.depth_locked = 0;
1037 }
1038 
1039 static void
1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1041 {
1042     char const * const func = "omp_destroy_nest_lock";
1043     if ( lck->lk.initialized != lck ) {
1044         KMP_FATAL( LockIsUninitialized, func );
1045     }
1046     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1047         KMP_FATAL( LockSimpleUsedAsNestable, func );
1048     }
1049     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1050         KMP_FATAL( LockStillOwned, func );
1051     }
1052     __kmp_destroy_nested_ticket_lock( lck );
1053 }
1054 
1055 
1056 //
1057 // access functions to fields which don't exist for all lock kinds.
1058 //
1059 
1060 static int
1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1062 {
1063     return lck == lck->lk.initialized;
1064 }
1065 
1066 static const ident_t *
1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1068 {
1069     return lck->lk.location;
1070 }
1071 
1072 static void
1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1074 {
1075     lck->lk.location = loc;
1076 }
1077 
1078 static kmp_lock_flags_t
1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1080 {
1081     return lck->lk.flags;
1082 }
1083 
1084 static void
1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1086 {
1087     lck->lk.flags = flags;
1088 }
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* queuing locks */
1092 
1093 /*
1094  * First the states
1095  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1096  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1097  *                h, h  means lock is held or about to transition, 1 element on queue
1098  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1099  *
1100  * Now the transitions
1101  *    Acquire(0,0)  = -1 ,0
1102  *    Release(0,0)  = Error
1103  *    Acquire(-1,0) =  h ,h    h > 0
1104  *    Release(-1,0) =  0 ,0
1105  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1106  *    Release(h,h)  = -1 ,0    h > 0
1107  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1108  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1109  *
1110  * And pictorially
1111  *
1112  *
1113  *          +-----+
1114  *          | 0, 0|------- release -------> Error
1115  *          +-----+
1116  *            |  ^
1117  *     acquire|  |release
1118  *            |  |
1119  *            |  |
1120  *            v  |
1121  *          +-----+
1122  *          |-1, 0|
1123  *          +-----+
1124  *            |  ^
1125  *     acquire|  |release
1126  *            |  |
1127  *            |  |
1128  *            v  |
1129  *          +-----+
1130  *          | h, h|
1131  *          +-----+
1132  *            |  ^
1133  *     acquire|  |release
1134  *            |  |
1135  *            |  |
1136  *            v  |
1137  *          +-----+
1138  *          | h, t|----- acquire, release loopback ---+
1139  *          +-----+                                   |
1140  *               ^                                    |
1141  *               |                                    |
1142  *               +------------------------------------+
1143  *
1144  */
1145 
1146 #ifdef DEBUG_QUEUING_LOCKS
1147 
1148 /* Stuff for circular trace buffer */
1149 #define TRACE_BUF_ELE	1024
1150 static char traces[TRACE_BUF_ELE][128] = { 0 }
1151 static int tc = 0;
1152 #define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y );
1153 #define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1154 #define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1155 
1156 static void
1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1158   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1159 {
1160     kmp_int32 t, i;
1161 
1162     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1163 
1164     i = tc % TRACE_BUF_ELE;
1165     __kmp_printf_no_lock( "%s\n", traces[i] );
1166     i = (i+1) % TRACE_BUF_ELE;
1167     while ( i != (tc % TRACE_BUF_ELE) ) {
1168         __kmp_printf_no_lock( "%s", traces[i] );
1169         i = (i+1) % TRACE_BUF_ELE;
1170     }
1171     __kmp_printf_no_lock( "\n" );
1172 
1173     __kmp_printf_no_lock(
1174              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1175              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1176              head_id, tail_id );
1177 
1178     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1179 
1180     if ( lck->lk.head_id >= 1 ) {
1181         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1182         while (t > 0) {
1183             __kmp_printf_no_lock( "-> %d ", t );
1184             t = __kmp_threads[t-1]->th.th_next_waiting;
1185         }
1186     }
1187     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1188     __kmp_printf_no_lock( "\n\n" );
1189 }
1190 
1191 #endif /* DEBUG_QUEUING_LOCKS */
1192 
1193 static kmp_int32
1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1195 {
1196     return TCR_4( lck->lk.owner_id ) - 1;
1197 }
1198 
1199 static inline bool
1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1201 {
1202     return lck->lk.depth_locked != -1;
1203 }
1204 
1205 /* Acquire a lock using a the queuing lock implementation */
1206 template <bool takeTime>
1207 /* [TLW] The unused template above is left behind because of what BEB believes is a
1208    potential compiler problem with __forceinline. */
1209 __forceinline static int
1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1211   kmp_int32 gtid )
1212 {
1213     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1214     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1215     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1216     volatile kmp_uint32 *spin_here_p;
1217     kmp_int32 need_mf = 1;
1218 
1219 #if OMPT_SUPPORT
1220     ompt_state_t prev_state = ompt_state_undefined;
1221 #endif
1222 
1223     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1224 
1225     KMP_FSYNC_PREPARE( lck );
1226     KMP_DEBUG_ASSERT( this_thr != NULL );
1227     spin_here_p = & this_thr->th.th_spin_here;
1228 
1229 #ifdef DEBUG_QUEUING_LOCKS
1230     TRACE_LOCK( gtid+1, "acq ent" );
1231     if ( *spin_here_p )
1232         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1233     if ( this_thr->th.th_next_waiting != 0 )
1234         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1235 #endif
1236     KMP_DEBUG_ASSERT( !*spin_here_p );
1237     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1238 
1239 
1240     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1241        that may follow, not just in execution order, but also in visibility order.  This way,
1242        when a releasing thread observes the changes to the queue by this thread, it can
1243        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1244        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1245        to FALSE before this thread sets it to TRUE, this thread will hang.
1246     */
1247     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1248 
1249     while( 1 ) {
1250         kmp_int32 enqueued;
1251         kmp_int32 head;
1252         kmp_int32 tail;
1253 
1254         head = *head_id_p;
1255 
1256         switch ( head ) {
1257 
1258             case -1:
1259             {
1260 #ifdef DEBUG_QUEUING_LOCKS
1261                 tail = *tail_id_p;
1262                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1263 #endif
1264                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1265                            this assignment prevents us from entering the if ( t > 0 )
1266                            condition in the enqueued case below, which is not necessary for
1267                            this state transition */
1268 
1269                 need_mf = 0;
1270                 /* try (-1,0)->(tid,tid) */
1271                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1272                   KMP_PACK_64( -1, 0 ),
1273                   KMP_PACK_64( gtid+1, gtid+1 ) );
1274 #ifdef DEBUG_QUEUING_LOCKS
1275                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1276 #endif
1277             }
1278             break;
1279 
1280             default:
1281             {
1282                 tail = *tail_id_p;
1283                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287 #endif
1288 
1289                 if ( tail == 0 ) {
1290                     enqueued = FALSE;
1291                 }
1292                 else {
1293                     need_mf = 0;
1294                     /* try (h,t) or (h,h)->(h,tid) */
1295                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1296 
1297 #ifdef DEBUG_QUEUING_LOCKS
1298                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1299 #endif
1300                 }
1301             }
1302             break;
1303 
1304             case 0: /* empty queue */
1305             {
1306                 kmp_int32 grabbed_lock;
1307 
1308 #ifdef DEBUG_QUEUING_LOCKS
1309                 tail = *tail_id_p;
1310                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311 #endif
1312                 /* try (0,0)->(-1,0) */
1313 
1314                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1315                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1316 
1317                 if ( grabbed_lock ) {
1318 
1319                     *spin_here_p = FALSE;
1320 
1321                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1322                               lck, gtid ));
1323 #ifdef DEBUG_QUEUING_LOCKS
1324                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1325 #endif
1326 
1327 #if OMPT_SUPPORT
1328                     if (ompt_enabled && prev_state != ompt_state_undefined) {
1329                         /* change the state before clearing wait_id */
1330                         this_thr->th.ompt_thread_info.state = prev_state;
1331                         this_thr->th.ompt_thread_info.wait_id = 0;
1332                     }
1333 #endif
1334 
1335                     KMP_FSYNC_ACQUIRED( lck );
1336                     return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1337                 }
1338                 enqueued = FALSE;
1339             }
1340             break;
1341         }
1342 
1343 #if OMPT_SUPPORT
1344         if (ompt_enabled && prev_state == ompt_state_undefined) {
1345             /* this thread will spin; set wait_id before entering wait state */
1346             prev_state = this_thr->th.ompt_thread_info.state;
1347             this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1348             this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1349         }
1350 #endif
1351 
1352         if ( enqueued ) {
1353             if ( tail > 0 ) {
1354                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1355                 KMP_ASSERT( tail_thr != NULL );
1356                 tail_thr->th.th_next_waiting = gtid+1;
1357                 /* corresponding wait for this write in release code */
1358             }
1359             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1360 
1361 
1362             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1363              *       throughput only here.
1364              */
1365             KMP_MB();
1366             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1367 
1368 #ifdef DEBUG_QUEUING_LOCKS
1369             TRACE_LOCK( gtid+1, "acq spin" );
1370 
1371             if ( this_thr->th.th_next_waiting != 0 )
1372                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1373 #endif
1374             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1375             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1376                       lck, gtid ));
1377 
1378 #ifdef DEBUG_QUEUING_LOCKS
1379             TRACE_LOCK( gtid+1, "acq exit 2" );
1380 #endif
1381 
1382 #if OMPT_SUPPORT
1383             /* change the state before clearing wait_id */
1384             this_thr->th.ompt_thread_info.state = prev_state;
1385             this_thr->th.ompt_thread_info.wait_id = 0;
1386 #endif
1387 
1388             /* got lock, we were dequeued by the thread that released lock */
1389             return KMP_LOCK_ACQUIRED_FIRST;
1390         }
1391 
1392         /* Yield if number of threads > number of logical processors */
1393         /* ToDo: Not sure why this should only be in oversubscription case,
1394            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1395         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1396           __kmp_xproc ) );
1397 #ifdef DEBUG_QUEUING_LOCKS
1398         TRACE_LOCK( gtid+1, "acq retry" );
1399 #endif
1400 
1401     }
1402     KMP_ASSERT2( 0, "should not get here" );
1403     return KMP_LOCK_ACQUIRED_FIRST;
1404 }
1405 
1406 int
1407 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1408 {
1409     KMP_DEBUG_ASSERT( gtid >= 0 );
1410 
1411     return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1412 }
1413 
1414 static int
1415 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1416   kmp_int32 gtid )
1417 {
1418     char const * const func = "omp_set_lock";
1419     if ( lck->lk.initialized != lck ) {
1420         KMP_FATAL( LockIsUninitialized, func );
1421     }
1422     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1423         KMP_FATAL( LockNestableUsedAsSimple, func );
1424     }
1425     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1426         KMP_FATAL( LockIsAlreadyOwned, func );
1427     }
1428 
1429     __kmp_acquire_queuing_lock( lck, gtid );
1430 
1431     lck->lk.owner_id = gtid + 1;
1432     return KMP_LOCK_ACQUIRED_FIRST;
1433 }
1434 
1435 int
1436 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1437 {
1438     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1439     kmp_int32 head;
1440 #ifdef KMP_DEBUG
1441     kmp_info_t *this_thr;
1442 #endif
1443 
1444     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1445     KMP_DEBUG_ASSERT( gtid >= 0 );
1446 #ifdef KMP_DEBUG
1447     this_thr = __kmp_thread_from_gtid( gtid );
1448     KMP_DEBUG_ASSERT( this_thr != NULL );
1449     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1450 #endif
1451 
1452     head = *head_id_p;
1453 
1454     if ( head == 0 ) { /* nobody on queue, nobody holding */
1455 
1456         /* try (0,0)->(-1,0) */
1457 
1458         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1459             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1460             KMP_FSYNC_ACQUIRED(lck);
1461             return TRUE;
1462         }
1463     }
1464 
1465     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1466     return FALSE;
1467 }
1468 
1469 static int
1470 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1471 {
1472     char const * const func = "omp_test_lock";
1473     if ( lck->lk.initialized != lck ) {
1474         KMP_FATAL( LockIsUninitialized, func );
1475     }
1476     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1477         KMP_FATAL( LockNestableUsedAsSimple, func );
1478     }
1479 
1480     int retval = __kmp_test_queuing_lock( lck, gtid );
1481 
1482     if ( retval ) {
1483         lck->lk.owner_id = gtid + 1;
1484     }
1485     return retval;
1486 }
1487 
1488 int
1489 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1490 {
1491     register kmp_info_t *this_thr;
1492     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1493     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1494 
1495     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1496     KMP_DEBUG_ASSERT( gtid >= 0 );
1497     this_thr    = __kmp_thread_from_gtid( gtid );
1498     KMP_DEBUG_ASSERT( this_thr != NULL );
1499 #ifdef DEBUG_QUEUING_LOCKS
1500     TRACE_LOCK( gtid+1, "rel ent" );
1501 
1502     if ( this_thr->th.th_spin_here )
1503         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1504     if ( this_thr->th.th_next_waiting != 0 )
1505         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1506 #endif
1507     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1508     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1509 
1510     KMP_FSYNC_RELEASING(lck);
1511 
1512     while( 1 ) {
1513         kmp_int32 dequeued;
1514         kmp_int32 head;
1515         kmp_int32 tail;
1516 
1517         head = *head_id_p;
1518 
1519 #ifdef DEBUG_QUEUING_LOCKS
1520         tail = *tail_id_p;
1521         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1522         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1523 #endif
1524         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1525 
1526         if ( head == -1 ) { /* nobody on queue */
1527 
1528             /* try (-1,0)->(0,0) */
1529             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1530                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1531                           lck, gtid ));
1532 #ifdef DEBUG_QUEUING_LOCKS
1533                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1534 #endif
1535 
1536 #if OMPT_SUPPORT
1537                 /* nothing to do - no other thread is trying to shift blame */
1538 #endif
1539 
1540                 return KMP_LOCK_RELEASED;
1541             }
1542             dequeued = FALSE;
1543 
1544         }
1545         else {
1546 
1547             tail = *tail_id_p;
1548             if ( head == tail ) {  /* only one thread on the queue */
1549 
1550 #ifdef DEBUG_QUEUING_LOCKS
1551                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1552 #endif
1553                 KMP_DEBUG_ASSERT( head > 0 );
1554 
1555                 /* try (h,h)->(-1,0) */
1556                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1557                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1558 #ifdef DEBUG_QUEUING_LOCKS
1559                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1560 #endif
1561 
1562             }
1563             else {
1564                 volatile kmp_int32 *waiting_id_p;
1565                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1566                 KMP_DEBUG_ASSERT( head_thr != NULL );
1567                 waiting_id_p = & head_thr->th.th_next_waiting;
1568 
1569                 /* Does this require synchronous reads? */
1570 #ifdef DEBUG_QUEUING_LOCKS
1571                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1572 #endif
1573                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1574 
1575                 /* try (h,t)->(h',t) or (t,t) */
1576 
1577                 KMP_MB();
1578                 /* make sure enqueuing thread has time to update next waiting thread field */
1579                 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1580 #ifdef DEBUG_QUEUING_LOCKS
1581                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1582 #endif
1583                 dequeued = TRUE;
1584             }
1585         }
1586 
1587         if ( dequeued ) {
1588             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1589             KMP_DEBUG_ASSERT( head_thr != NULL );
1590 
1591             /* Does this require synchronous reads? */
1592 #ifdef DEBUG_QUEUING_LOCKS
1593             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1594 #endif
1595             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1596 
1597             /* For clean code only.
1598              * Thread not released until next statement prevents race with acquire code.
1599              */
1600             head_thr->th.th_next_waiting = 0;
1601 #ifdef DEBUG_QUEUING_LOCKS
1602             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1603 #endif
1604 
1605             KMP_MB();
1606             /* reset spin value */
1607             head_thr->th.th_spin_here = FALSE;
1608 
1609             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1610                       lck, gtid ));
1611 #ifdef DEBUG_QUEUING_LOCKS
1612             TRACE_LOCK( gtid+1, "rel exit 2" );
1613 #endif
1614             return KMP_LOCK_RELEASED;
1615         }
1616         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1617 
1618 #ifdef DEBUG_QUEUING_LOCKS
1619         TRACE_LOCK( gtid+1, "rel retry" );
1620 #endif
1621 
1622     } /* while */
1623     KMP_ASSERT2( 0, "should not get here" );
1624     return KMP_LOCK_RELEASED;
1625 }
1626 
1627 static int
1628 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1629   kmp_int32 gtid )
1630 {
1631     char const * const func = "omp_unset_lock";
1632     KMP_MB();  /* in case another processor initialized lock */
1633     if ( lck->lk.initialized != lck ) {
1634         KMP_FATAL( LockIsUninitialized, func );
1635     }
1636     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1637         KMP_FATAL( LockNestableUsedAsSimple, func );
1638     }
1639     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1640         KMP_FATAL( LockUnsettingFree, func );
1641     }
1642     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1643         KMP_FATAL( LockUnsettingSetByAnother, func );
1644     }
1645     lck->lk.owner_id = 0;
1646     return __kmp_release_queuing_lock( lck, gtid );
1647 }
1648 
1649 void
1650 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1651 {
1652     lck->lk.location = NULL;
1653     lck->lk.head_id = 0;
1654     lck->lk.tail_id = 0;
1655     lck->lk.next_ticket = 0;
1656     lck->lk.now_serving = 0;
1657     lck->lk.owner_id = 0;      // no thread owns the lock.
1658     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1659     lck->lk.initialized = lck;
1660 
1661     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1662 }
1663 
1664 static void
1665 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1666 {
1667     __kmp_init_queuing_lock( lck );
1668 }
1669 
1670 void
1671 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1672 {
1673     lck->lk.initialized = NULL;
1674     lck->lk.location = NULL;
1675     lck->lk.head_id = 0;
1676     lck->lk.tail_id = 0;
1677     lck->lk.next_ticket = 0;
1678     lck->lk.now_serving = 0;
1679     lck->lk.owner_id = 0;
1680     lck->lk.depth_locked = -1;
1681 }
1682 
1683 static void
1684 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1685 {
1686     char const * const func = "omp_destroy_lock";
1687     if ( lck->lk.initialized != lck ) {
1688         KMP_FATAL( LockIsUninitialized, func );
1689     }
1690     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1691         KMP_FATAL( LockNestableUsedAsSimple, func );
1692     }
1693     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1694         KMP_FATAL( LockStillOwned, func );
1695     }
1696     __kmp_destroy_queuing_lock( lck );
1697 }
1698 
1699 
1700 //
1701 // nested queuing locks
1702 //
1703 
1704 int
1705 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1706 {
1707     KMP_DEBUG_ASSERT( gtid >= 0 );
1708 
1709     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1710         lck->lk.depth_locked += 1;
1711         return KMP_LOCK_ACQUIRED_NEXT;
1712     }
1713     else {
1714         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1715         KMP_MB();
1716         lck->lk.depth_locked = 1;
1717         KMP_MB();
1718         lck->lk.owner_id = gtid + 1;
1719         return KMP_LOCK_ACQUIRED_FIRST;
1720     }
1721 }
1722 
1723 static int
1724 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1725 {
1726     char const * const func = "omp_set_nest_lock";
1727     if ( lck->lk.initialized != lck ) {
1728         KMP_FATAL( LockIsUninitialized, func );
1729     }
1730     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1731         KMP_FATAL( LockSimpleUsedAsNestable, func );
1732     }
1733     return __kmp_acquire_nested_queuing_lock( lck, gtid );
1734 }
1735 
1736 int
1737 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1738 {
1739     int retval;
1740 
1741     KMP_DEBUG_ASSERT( gtid >= 0 );
1742 
1743     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1744         retval = ++lck->lk.depth_locked;
1745     }
1746     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1747         retval = 0;
1748     }
1749     else {
1750         KMP_MB();
1751         retval = lck->lk.depth_locked = 1;
1752         KMP_MB();
1753         lck->lk.owner_id = gtid + 1;
1754     }
1755     return retval;
1756 }
1757 
1758 static int
1759 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1760   kmp_int32 gtid )
1761 {
1762     char const * const func = "omp_test_nest_lock";
1763     if ( lck->lk.initialized != lck ) {
1764         KMP_FATAL( LockIsUninitialized, func );
1765     }
1766     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1767         KMP_FATAL( LockSimpleUsedAsNestable, func );
1768     }
1769     return __kmp_test_nested_queuing_lock( lck, gtid );
1770 }
1771 
1772 int
1773 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1774 {
1775     KMP_DEBUG_ASSERT( gtid >= 0 );
1776 
1777     KMP_MB();
1778     if ( --(lck->lk.depth_locked) == 0 ) {
1779         KMP_MB();
1780         lck->lk.owner_id = 0;
1781         __kmp_release_queuing_lock( lck, gtid );
1782         return KMP_LOCK_RELEASED;
1783     }
1784     return KMP_LOCK_STILL_HELD;
1785 }
1786 
1787 static int
1788 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1789 {
1790     char const * const func = "omp_unset_nest_lock";
1791     KMP_MB();  /* in case another processor initialized lock */
1792     if ( lck->lk.initialized != lck ) {
1793         KMP_FATAL( LockIsUninitialized, func );
1794     }
1795     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796         KMP_FATAL( LockSimpleUsedAsNestable, func );
1797     }
1798     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1799         KMP_FATAL( LockUnsettingFree, func );
1800     }
1801     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1802         KMP_FATAL( LockUnsettingSetByAnother, func );
1803     }
1804     return __kmp_release_nested_queuing_lock( lck, gtid );
1805 }
1806 
1807 void
1808 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1809 {
1810     __kmp_init_queuing_lock( lck );
1811     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1812 }
1813 
1814 static void
1815 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1816 {
1817     __kmp_init_nested_queuing_lock( lck );
1818 }
1819 
1820 void
1821 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1822 {
1823     __kmp_destroy_queuing_lock( lck );
1824     lck->lk.depth_locked = 0;
1825 }
1826 
1827 static void
1828 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1829 {
1830     char const * const func = "omp_destroy_nest_lock";
1831     if ( lck->lk.initialized != lck ) {
1832         KMP_FATAL( LockIsUninitialized, func );
1833     }
1834     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1835         KMP_FATAL( LockSimpleUsedAsNestable, func );
1836     }
1837     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1838         KMP_FATAL( LockStillOwned, func );
1839     }
1840     __kmp_destroy_nested_queuing_lock( lck );
1841 }
1842 
1843 
1844 //
1845 // access functions to fields which don't exist for all lock kinds.
1846 //
1847 
1848 static int
1849 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1850 {
1851     return lck == lck->lk.initialized;
1852 }
1853 
1854 static const ident_t *
1855 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1856 {
1857     return lck->lk.location;
1858 }
1859 
1860 static void
1861 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1862 {
1863     lck->lk.location = loc;
1864 }
1865 
1866 static kmp_lock_flags_t
1867 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1868 {
1869     return lck->lk.flags;
1870 }
1871 
1872 static void
1873 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1874 {
1875     lck->lk.flags = flags;
1876 }
1877 
1878 #if KMP_USE_ADAPTIVE_LOCKS
1879 
1880 /*
1881     RTM Adaptive locks
1882 */
1883 
1884 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1885 
1886 #include <immintrin.h>
1887 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1888 
1889 #else
1890 
1891 // Values from the status register after failed speculation.
1892 #define _XBEGIN_STARTED          (~0u)
1893 #define _XABORT_EXPLICIT         (1 << 0)
1894 #define _XABORT_RETRY            (1 << 1)
1895 #define _XABORT_CONFLICT         (1 << 2)
1896 #define _XABORT_CAPACITY         (1 << 3)
1897 #define _XABORT_DEBUG            (1 << 4)
1898 #define _XABORT_NESTED           (1 << 5)
1899 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1900 
1901 // Aborts for which it's worth trying again immediately
1902 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1903 
1904 #define STRINGIZE_INTERNAL(arg) #arg
1905 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1906 
1907 // Access to RTM instructions
1908 
1909 /*
1910   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1911   This is the same definition as the compiler intrinsic that will be supported at some point.
1912 */
1913 static __inline int _xbegin()
1914 {
1915     int res = -1;
1916 
1917 #if KMP_OS_WINDOWS
1918 #if KMP_ARCH_X86_64
1919     _asm {
1920         _emit 0xC7
1921         _emit 0xF8
1922         _emit 2
1923         _emit 0
1924         _emit 0
1925         _emit 0
1926         jmp   L2
1927         mov   res, eax
1928     L2:
1929     }
1930 #else /* IA32 */
1931     _asm {
1932         _emit 0xC7
1933         _emit 0xF8
1934         _emit 2
1935         _emit 0
1936         _emit 0
1937         _emit 0
1938         jmp   L2
1939         mov   res, eax
1940     L2:
1941     }
1942 #endif // KMP_ARCH_X86_64
1943 #else
1944     /* Note that %eax must be noted as killed (clobbered), because
1945      * the XSR is returned in %eax(%rax) on abort.  Other register
1946      * values are restored, so don't need to be killed.
1947      *
1948      * We must also mark 'res' as an input and an output, since otherwise
1949      * 'res=-1' may be dropped as being dead, whereas we do need the
1950      * assignment on the successful (i.e., non-abort) path.
1951      */
1952     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1953                       "   .long  1f-1b-6\n"
1954                       "    jmp   2f\n"
1955                       "1:  movl  %%eax,%0\n"
1956                       "2:"
1957                       :"+r"(res)::"memory","%eax");
1958 #endif // KMP_OS_WINDOWS
1959     return res;
1960 }
1961 
1962 /*
1963   Transaction end
1964 */
1965 static __inline void _xend()
1966 {
1967 #if KMP_OS_WINDOWS
1968     __asm  {
1969         _emit 0x0f
1970         _emit 0x01
1971         _emit 0xd5
1972     }
1973 #else
1974     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1975 #endif
1976 }
1977 
1978 /*
1979   This is a macro, the argument must be a single byte constant which
1980   can be evaluated by the inline assembler, since it is emitted as a
1981   byte into the assembly code.
1982 */
1983 #if KMP_OS_WINDOWS
1984 #define _xabort(ARG)                            \
1985     _asm _emit 0xc6                             \
1986     _asm _emit 0xf8                             \
1987     _asm _emit ARG
1988 #else
1989 #define _xabort(ARG) \
1990     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1991 #endif
1992 
1993 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1994 
1995 //
1996 //    Statistics is collected for testing purpose
1997 //
1998 #if KMP_DEBUG_ADAPTIVE_LOCKS
1999 
2000 // We accumulate speculative lock statistics when the lock is destroyed.
2001 // We keep locks that haven't been destroyed in the liveLocks list
2002 // so that we can grab their statistics too.
2003 static kmp_adaptive_lock_statistics_t destroyedStats;
2004 
2005 // To hold the list of live locks.
2006 static kmp_adaptive_lock_info_t liveLocks;
2007 
2008 // A lock so we can safely update the list of locks.
2009 static kmp_bootstrap_lock_t chain_lock;
2010 
2011 // Initialize the list of stats.
2012 void
2013 __kmp_init_speculative_stats()
2014 {
2015     kmp_adaptive_lock_info_t *lck = &liveLocks;
2016 
2017     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2018     lck->stats.next = lck;
2019     lck->stats.prev = lck;
2020 
2021     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2022     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2023 
2024     __kmp_init_bootstrap_lock( &chain_lock );
2025 
2026 }
2027 
2028 // Insert the lock into the circular list
2029 static void
2030 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2031 {
2032     __kmp_acquire_bootstrap_lock( &chain_lock );
2033 
2034     lck->stats.next = liveLocks.stats.next;
2035     lck->stats.prev = &liveLocks;
2036 
2037     liveLocks.stats.next = lck;
2038     lck->stats.next->stats.prev  = lck;
2039 
2040     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2041     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2042 
2043     __kmp_release_bootstrap_lock( &chain_lock );
2044 }
2045 
2046 static void
2047 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2048 {
2049     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2050     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2051 
2052     kmp_adaptive_lock_info_t * n = lck->stats.next;
2053     kmp_adaptive_lock_info_t * p = lck->stats.prev;
2054 
2055     n->stats.prev = p;
2056     p->stats.next = n;
2057 }
2058 
2059 static void
2060 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2061 {
2062     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2063     __kmp_remember_lock( lck );
2064 }
2065 
2066 static void
2067 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2068 {
2069     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2070 
2071     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2072     t->successfulSpeculations += s->successfulSpeculations;
2073     t->hardFailedSpeculations += s->hardFailedSpeculations;
2074     t->softFailedSpeculations += s->softFailedSpeculations;
2075     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2076     t->lemmingYields          += s->lemmingYields;
2077 }
2078 
2079 static void
2080 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2081 {
2082     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2083 
2084     __kmp_acquire_bootstrap_lock( &chain_lock );
2085 
2086     __kmp_add_stats( &destroyedStats, lck );
2087     __kmp_forget_lock( lck );
2088 
2089     __kmp_release_bootstrap_lock( &chain_lock );
2090 }
2091 
2092 static float
2093 percent (kmp_uint32 count, kmp_uint32 total)
2094 {
2095     return (total == 0) ? 0.0: (100.0 * count)/total;
2096 }
2097 
2098 static
2099 FILE * __kmp_open_stats_file()
2100 {
2101     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2102         return stdout;
2103 
2104     size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2105     char buffer[buffLen];
2106     KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2107       (kmp_int32)getpid());
2108     FILE * result = fopen(&buffer[0], "w");
2109 
2110     // Maybe we should issue a warning here...
2111     return result ? result : stdout;
2112 }
2113 
2114 void
2115 __kmp_print_speculative_stats()
2116 {
2117     if (__kmp_user_lock_kind != lk_adaptive)
2118         return;
2119 
2120     FILE * statsFile = __kmp_open_stats_file();
2121 
2122     kmp_adaptive_lock_statistics_t total = destroyedStats;
2123     kmp_adaptive_lock_info_t *lck;
2124 
2125     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2126         __kmp_add_stats( &total, lck );
2127     }
2128     kmp_adaptive_lock_statistics_t *t = &total;
2129     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2130     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2131                                    t->softFailedSpeculations;
2132 
2133     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2134     fprintf ( statsFile, " Lock parameters: \n"
2135              "   max_soft_retries               : %10d\n"
2136              "   max_badness                    : %10d\n",
2137              __kmp_adaptive_backoff_params.max_soft_retries,
2138              __kmp_adaptive_backoff_params.max_badness);
2139     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2140     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2141     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2142              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2143     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2144              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2145     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2146 
2147     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2148     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2149              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2150     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2151              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2152     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2153              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2154 
2155     if (statsFile != stdout)
2156         fclose( statsFile );
2157 }
2158 
2159 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2160 #else
2161 # define KMP_INC_STAT(lck,stat)
2162 
2163 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2164 
2165 static inline bool
2166 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2167 {
2168     // It is enough to check that the head_id is zero.
2169     // We don't also need to check the tail.
2170     bool res = lck->lk.head_id == 0;
2171 
2172     // We need a fence here, since we must ensure that no memory operations
2173     // from later in this thread float above that read.
2174 #if KMP_COMPILER_ICC
2175     _mm_mfence();
2176 #else
2177     __sync_synchronize();
2178 #endif
2179 
2180     return res;
2181 }
2182 
2183 // Functions for manipulating the badness
2184 static __inline void
2185 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2186 {
2187     // Reset the badness to zero so we eagerly try to speculate again
2188     lck->lk.adaptive.badness = 0;
2189     KMP_INC_STAT(lck,successfulSpeculations);
2190 }
2191 
2192 // Create a bit mask with one more set bit.
2193 static __inline void
2194 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2195 {
2196     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2197     if ( newBadness > lck->lk.adaptive.max_badness) {
2198         return;
2199     } else {
2200         lck->lk.adaptive.badness = newBadness;
2201     }
2202 }
2203 
2204 // Check whether speculation should be attempted.
2205 static __inline int
2206 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2207 {
2208     kmp_uint32 badness = lck->lk.adaptive.badness;
2209     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2210     int res = (attempts & badness) == 0;
2211     return res;
2212 }
2213 
2214 // Attempt to acquire only the speculative lock.
2215 // Does not back off to the non-speculative lock.
2216 //
2217 static int
2218 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2219 {
2220     int retries = lck->lk.adaptive.max_soft_retries;
2221 
2222     // We don't explicitly count the start of speculation, rather we record
2223     // the results (success, hard fail, soft fail). The sum of all of those
2224     // is the total number of times we started speculation since all
2225     // speculations must end one of those ways.
2226     do
2227     {
2228         kmp_uint32 status = _xbegin();
2229         // Switch this in to disable actual speculation but exercise
2230         // at least some of the rest of the code. Useful for debugging...
2231         // kmp_uint32 status = _XABORT_NESTED;
2232 
2233         if (status == _XBEGIN_STARTED )
2234         { /* We have successfully started speculation
2235            * Check that no-one acquired the lock for real between when we last looked
2236            * and now. This also gets the lock cache line into our read-set,
2237            * which we need so that we'll abort if anyone later claims it for real.
2238            */
2239             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2240             {
2241                 // Lock is now visibly acquired, so someone beat us to it.
2242                 // Abort the transaction so we'll restart from _xbegin with the
2243                 // failure status.
2244                 _xabort(0x01);
2245                 KMP_ASSERT2( 0, "should not get here" );
2246             }
2247             return 1;   // Lock has been acquired (speculatively)
2248         } else {
2249             // We have aborted, update the statistics
2250             if ( status & SOFT_ABORT_MASK)
2251             {
2252                 KMP_INC_STAT(lck,softFailedSpeculations);
2253                 // and loop round to retry.
2254             }
2255             else
2256             {
2257                 KMP_INC_STAT(lck,hardFailedSpeculations);
2258                 // Give up if we had a hard failure.
2259                 break;
2260             }
2261         }
2262     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2263 
2264     // Either we had a hard failure or we didn't succeed softly after
2265     // the full set of attempts, so back off the badness.
2266     __kmp_step_badness( lck );
2267     return 0;
2268 }
2269 
2270 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2271 // if the speculative lock cannot be acquired.
2272 // We can succeed speculatively, non-speculatively, or fail.
2273 static int
2274 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2275 {
2276     // First try to acquire the lock speculatively
2277     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2278         return 1;
2279 
2280     // Speculative acquisition failed, so try to acquire it non-speculatively.
2281     // Count the non-speculative acquire attempt
2282     lck->lk.adaptive.acquire_attempts++;
2283 
2284     // Use base, non-speculative lock.
2285     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2286     {
2287         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2288         return 1;       // Lock is acquired (non-speculatively)
2289     }
2290     else
2291     {
2292         return 0;       // Failed to acquire the lock, it's already visibly locked.
2293     }
2294 }
2295 
2296 static int
2297 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2298 {
2299     char const * const func = "omp_test_lock";
2300     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2301         KMP_FATAL( LockIsUninitialized, func );
2302     }
2303 
2304     int retval = __kmp_test_adaptive_lock( lck, gtid );
2305 
2306     if ( retval ) {
2307         lck->lk.qlk.owner_id = gtid + 1;
2308     }
2309     return retval;
2310 }
2311 
2312 // Block until we can acquire a speculative, adaptive lock.
2313 // We check whether we should be trying to speculate.
2314 // If we should be, we check the real lock to see if it is free,
2315 // and, if not, pause without attempting to acquire it until it is.
2316 // Then we try the speculative acquire.
2317 // This means that although we suffer from lemmings a little (
2318 // because all we can't acquire the lock speculatively until
2319 // the queue of threads waiting has cleared), we don't get into a
2320 // state where we can never acquire the lock speculatively (because we
2321 // force the queue to clear by preventing new arrivals from entering the
2322 // queue).
2323 // This does mean that when we're trying to break lemmings, the lock
2324 // is no longer fair. However OpenMP makes no guarantee that its
2325 // locks are fair, so this isn't a real problem.
2326 static void
2327 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2328 {
2329     if ( __kmp_should_speculate( lck, gtid ) )
2330     {
2331         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2332         {
2333             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2334                 return;
2335             // We tried speculation and failed, so give up.
2336         }
2337         else
2338         {
2339             // We can't try speculation until the lock is free, so we
2340             // pause here (without suspending on the queueing lock,
2341             // to allow it to drain, then try again.
2342             // All other threads will also see the same result for
2343             // shouldSpeculate, so will be doing the same if they
2344             // try to claim the lock from now on.
2345             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2346             {
2347                 KMP_INC_STAT(lck,lemmingYields);
2348                 __kmp_yield (TRUE);
2349             }
2350 
2351             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2352                 return;
2353         }
2354     }
2355 
2356     // Speculative acquisition failed, so acquire it non-speculatively.
2357     // Count the non-speculative acquire attempt
2358     lck->lk.adaptive.acquire_attempts++;
2359 
2360     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2361     // We have acquired the base lock, so count that.
2362     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2363 }
2364 
2365 static void
2366 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2367 {
2368     char const * const func = "omp_set_lock";
2369     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2370         KMP_FATAL( LockIsUninitialized, func );
2371     }
2372     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2373         KMP_FATAL( LockIsAlreadyOwned, func );
2374     }
2375 
2376     __kmp_acquire_adaptive_lock( lck, gtid );
2377 
2378     lck->lk.qlk.owner_id = gtid + 1;
2379 }
2380 
2381 static int
2382 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2383 {
2384     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2385     {   // If the lock doesn't look claimed we must be speculating.
2386         // (Or the user's code is buggy and they're releasing without locking;
2387         // if we had XTEST we'd be able to check that case...)
2388         _xend();        // Exit speculation
2389         __kmp_update_badness_after_success( lck );
2390     }
2391     else
2392     {   // Since the lock *is* visibly locked we're not speculating,
2393         // so should use the underlying lock's release scheme.
2394         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2395     }
2396     return KMP_LOCK_RELEASED;
2397 }
2398 
2399 static int
2400 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2401 {
2402     char const * const func = "omp_unset_lock";
2403     KMP_MB();  /* in case another processor initialized lock */
2404     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2405         KMP_FATAL( LockIsUninitialized, func );
2406     }
2407     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2408         KMP_FATAL( LockUnsettingFree, func );
2409     }
2410     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2411         KMP_FATAL( LockUnsettingSetByAnother, func );
2412     }
2413     lck->lk.qlk.owner_id = 0;
2414     __kmp_release_adaptive_lock( lck, gtid );
2415     return KMP_LOCK_RELEASED;
2416 }
2417 
2418 static void
2419 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2420 {
2421     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2422     lck->lk.adaptive.badness = 0;
2423     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2424     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2425     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2426 #if KMP_DEBUG_ADAPTIVE_LOCKS
2427     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2428 #endif
2429     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2430 }
2431 
2432 static void
2433 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2434 {
2435     __kmp_init_adaptive_lock( lck );
2436 }
2437 
2438 static void
2439 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2440 {
2441 #if KMP_DEBUG_ADAPTIVE_LOCKS
2442     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2443 #endif
2444     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2445     // Nothing needed for the speculative part.
2446 }
2447 
2448 static void
2449 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2450 {
2451     char const * const func = "omp_destroy_lock";
2452     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2453         KMP_FATAL( LockIsUninitialized, func );
2454     }
2455     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2456         KMP_FATAL( LockStillOwned, func );
2457     }
2458     __kmp_destroy_adaptive_lock( lck );
2459 }
2460 
2461 
2462 #endif // KMP_USE_ADAPTIVE_LOCKS
2463 
2464 
2465 /* ------------------------------------------------------------------------ */
2466 /* DRDPA ticket locks                                                */
2467 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2468 
2469 static kmp_int32
2470 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2471 {
2472     return TCR_4( lck->lk.owner_id ) - 1;
2473 }
2474 
2475 static inline bool
2476 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2477 {
2478     return lck->lk.depth_locked != -1;
2479 }
2480 
2481 __forceinline static int
2482 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2483 {
2484     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2485     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2486     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2487       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2488       TCR_PTR(lck->lk.polls);                           // volatile load
2489 
2490 #ifdef USE_LOCK_PROFILE
2491     if (TCR_8(polls[ticket & mask].poll) != ticket)
2492         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2493     /* else __kmp_printf( "." );*/
2494 #endif /* USE_LOCK_PROFILE */
2495 
2496     //
2497     // Now spin-wait, but reload the polls pointer and mask, in case the
2498     // polling area has been reconfigured.  Unless it is reconfigured, the
2499     // reloads stay in L1 cache and are cheap.
2500     //
2501     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2502     //
2503     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2504     // and poll to be re-read every spin iteration.
2505     //
2506     kmp_uint32 spins;
2507 
2508     KMP_FSYNC_PREPARE(lck);
2509     KMP_INIT_YIELD(spins);
2510     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2511         // If we are oversubscribed,
2512         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2513         // CPU Pause is in the macros for yield.
2514         //
2515         KMP_YIELD(TCR_4(__kmp_nth)
2516           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2517         KMP_YIELD_SPIN(spins);
2518 
2519         // Re-read the mask and the poll pointer from the lock structure.
2520         //
2521         // Make certain that "mask" is read before "polls" !!!
2522         //
2523         // If another thread picks reconfigures the polling area and updates
2524         // their values, and we get the new value of mask and the old polls
2525         // pointer, we could access memory beyond the end of the old polling
2526         // area.
2527         //
2528         mask = TCR_8(lck->lk.mask);                     // volatile load
2529         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2530           TCR_PTR(lck->lk.polls);                       // volatile load
2531     }
2532 
2533     //
2534     // Critical section starts here
2535     //
2536     KMP_FSYNC_ACQUIRED(lck);
2537     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2538       ticket, lck));
2539     lck->lk.now_serving = ticket;                       // non-volatile store
2540 
2541     //
2542     // Deallocate a garbage polling area if we know that we are the last
2543     // thread that could possibly access it.
2544     //
2545     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2546     // ticket.
2547     //
2548     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2549         __kmp_free((void *)lck->lk.old_polls);
2550         lck->lk.old_polls = NULL;
2551         lck->lk.cleanup_ticket = 0;
2552     }
2553 
2554     //
2555     // Check to see if we should reconfigure the polling area.
2556     // If there is still a garbage polling area to be deallocated from a
2557     // previous reconfiguration, let a later thread reconfigure it.
2558     //
2559     if (lck->lk.old_polls == NULL) {
2560         bool reconfigure = false;
2561         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2562         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2563 
2564         if (TCR_4(__kmp_nth)
2565           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2566             //
2567             // We are in oversubscription mode.  Contract the polling area
2568             // down to a single location, if that hasn't been done already.
2569             //
2570             if (num_polls > 1) {
2571                 reconfigure = true;
2572                 num_polls = TCR_4(lck->lk.num_polls);
2573                 mask = 0;
2574                 num_polls = 1;
2575                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2576                   __kmp_allocate(num_polls * sizeof(*polls));
2577                 polls[0].poll = ticket;
2578             }
2579         }
2580         else {
2581             //
2582             // We are in under/fully subscribed mode.  Check the number of
2583             // threads waiting on the lock.  The size of the polling area
2584             // should be at least the number of threads waiting.
2585             //
2586             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2587             if (num_waiting > num_polls) {
2588                 kmp_uint32 old_num_polls = num_polls;
2589                 reconfigure = true;
2590                 do {
2591                     mask = (mask << 1) | 1;
2592                     num_polls *= 2;
2593                 } while (num_polls <= num_waiting);
2594 
2595                 //
2596                 // Allocate the new polling area, and copy the relevant portion
2597                 // of the old polling area to the new area.  __kmp_allocate()
2598                 // zeroes the memory it allocates, and most of the old area is
2599                 // just zero padding, so we only copy the release counters.
2600                 //
2601                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2602                   __kmp_allocate(num_polls * sizeof(*polls));
2603                 kmp_uint32 i;
2604                 for (i = 0; i < old_num_polls; i++) {
2605                     polls[i].poll = old_polls[i].poll;
2606                 }
2607             }
2608         }
2609 
2610         if (reconfigure) {
2611             //
2612             // Now write the updated fields back to the lock structure.
2613             //
2614             // Make certain that "polls" is written before "mask" !!!
2615             //
2616             // If another thread picks up the new value of mask and the old
2617             // polls pointer , it could access memory beyond the end of the
2618             // old polling area.
2619             //
2620             // On x86, we need memory fences.
2621             //
2622             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2623               ticket, lck, num_polls));
2624 
2625             lck->lk.old_polls = old_polls;              // non-volatile store
2626             lck->lk.polls = polls;                      // volatile store
2627 
2628             KMP_MB();
2629 
2630             lck->lk.num_polls = num_polls;              // non-volatile store
2631             lck->lk.mask = mask;                        // volatile store
2632 
2633             KMP_MB();
2634 
2635             //
2636             // Only after the new polling area and mask have been flushed
2637             // to main memory can we update the cleanup ticket field.
2638             //
2639             // volatile load / non-volatile store
2640             //
2641             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2642         }
2643     }
2644     return KMP_LOCK_ACQUIRED_FIRST;
2645 }
2646 
2647 int
2648 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2649 {
2650     return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2651 }
2652 
2653 static int
2654 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2655 {
2656     char const * const func = "omp_set_lock";
2657     if ( lck->lk.initialized != lck ) {
2658         KMP_FATAL( LockIsUninitialized, func );
2659     }
2660     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2661         KMP_FATAL( LockNestableUsedAsSimple, func );
2662     }
2663     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2664         KMP_FATAL( LockIsAlreadyOwned, func );
2665     }
2666 
2667     __kmp_acquire_drdpa_lock( lck, gtid );
2668 
2669     lck->lk.owner_id = gtid + 1;
2670     return KMP_LOCK_ACQUIRED_FIRST;
2671 }
2672 
2673 int
2674 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2675 {
2676     //
2677     // First get a ticket, then read the polls pointer and the mask.
2678     // The polls pointer must be read before the mask!!! (See above)
2679     //
2680     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2681     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2682       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2683       TCR_PTR(lck->lk.polls);                           // volatile load
2684     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2685     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2686         kmp_uint64 next_ticket = ticket + 1;
2687         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2688           ticket, next_ticket)) {
2689             KMP_FSYNC_ACQUIRED(lck);
2690             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2691                ticket, lck));
2692             lck->lk.now_serving = ticket;               // non-volatile store
2693 
2694             //
2695             // Since no threads are waiting, there is no possibility that
2696             // we would want to reconfigure the polling area.  We might
2697             // have the cleanup ticket value (which says that it is now
2698             // safe to deallocate old_polls), but we'll let a later thread
2699             // which calls __kmp_acquire_lock do that - this routine
2700             // isn't supposed to block, and we would risk blocks if we
2701             // called __kmp_free() to do the deallocation.
2702             //
2703             return TRUE;
2704         }
2705     }
2706     return FALSE;
2707 }
2708 
2709 static int
2710 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2711 {
2712     char const * const func = "omp_test_lock";
2713     if ( lck->lk.initialized != lck ) {
2714         KMP_FATAL( LockIsUninitialized, func );
2715     }
2716     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2717         KMP_FATAL( LockNestableUsedAsSimple, func );
2718     }
2719 
2720     int retval = __kmp_test_drdpa_lock( lck, gtid );
2721 
2722     if ( retval ) {
2723         lck->lk.owner_id = gtid + 1;
2724     }
2725     return retval;
2726 }
2727 
2728 int
2729 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2730 {
2731     //
2732     // Read the ticket value from the lock data struct, then the polls
2733     // pointer and the mask.  The polls pointer must be read before the
2734     // mask!!! (See above)
2735     //
2736     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2737     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2738       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2739       TCR_PTR(lck->lk.polls);                           // volatile load
2740     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2741     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2742        ticket - 1, lck));
2743     KMP_FSYNC_RELEASING(lck);
2744     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2745     return KMP_LOCK_RELEASED;
2746 }
2747 
2748 static int
2749 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2750 {
2751     char const * const func = "omp_unset_lock";
2752     KMP_MB();  /* in case another processor initialized lock */
2753     if ( lck->lk.initialized != lck ) {
2754         KMP_FATAL( LockIsUninitialized, func );
2755     }
2756     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2757         KMP_FATAL( LockNestableUsedAsSimple, func );
2758     }
2759     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2760         KMP_FATAL( LockUnsettingFree, func );
2761     }
2762     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2763       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2764         KMP_FATAL( LockUnsettingSetByAnother, func );
2765     }
2766     lck->lk.owner_id = 0;
2767     return __kmp_release_drdpa_lock( lck, gtid );
2768 }
2769 
2770 void
2771 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2772 {
2773     lck->lk.location = NULL;
2774     lck->lk.mask = 0;
2775     lck->lk.num_polls = 1;
2776     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2777       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2778     lck->lk.cleanup_ticket = 0;
2779     lck->lk.old_polls = NULL;
2780     lck->lk.next_ticket = 0;
2781     lck->lk.now_serving = 0;
2782     lck->lk.owner_id = 0;      // no thread owns the lock.
2783     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2784     lck->lk.initialized = lck;
2785 
2786     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2787 }
2788 
2789 static void
2790 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2791 {
2792     __kmp_init_drdpa_lock( lck );
2793 }
2794 
2795 void
2796 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2797 {
2798     lck->lk.initialized = NULL;
2799     lck->lk.location    = NULL;
2800     if (lck->lk.polls != NULL) {
2801         __kmp_free((void *)lck->lk.polls);
2802         lck->lk.polls = NULL;
2803     }
2804     if (lck->lk.old_polls != NULL) {
2805         __kmp_free((void *)lck->lk.old_polls);
2806         lck->lk.old_polls = NULL;
2807     }
2808     lck->lk.mask = 0;
2809     lck->lk.num_polls = 0;
2810     lck->lk.cleanup_ticket = 0;
2811     lck->lk.next_ticket = 0;
2812     lck->lk.now_serving = 0;
2813     lck->lk.owner_id = 0;
2814     lck->lk.depth_locked = -1;
2815 }
2816 
2817 static void
2818 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2819 {
2820     char const * const func = "omp_destroy_lock";
2821     if ( lck->lk.initialized != lck ) {
2822         KMP_FATAL( LockIsUninitialized, func );
2823     }
2824     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2825         KMP_FATAL( LockNestableUsedAsSimple, func );
2826     }
2827     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2828         KMP_FATAL( LockStillOwned, func );
2829     }
2830     __kmp_destroy_drdpa_lock( lck );
2831 }
2832 
2833 
2834 //
2835 // nested drdpa ticket locks
2836 //
2837 
2838 int
2839 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2840 {
2841     KMP_DEBUG_ASSERT( gtid >= 0 );
2842 
2843     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2844         lck->lk.depth_locked += 1;
2845         return KMP_LOCK_ACQUIRED_NEXT;
2846     }
2847     else {
2848         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2849         KMP_MB();
2850         lck->lk.depth_locked = 1;
2851         KMP_MB();
2852         lck->lk.owner_id = gtid + 1;
2853         return KMP_LOCK_ACQUIRED_FIRST;
2854     }
2855 }
2856 
2857 static void
2858 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2859 {
2860     char const * const func = "omp_set_nest_lock";
2861     if ( lck->lk.initialized != lck ) {
2862         KMP_FATAL( LockIsUninitialized, func );
2863     }
2864     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2865         KMP_FATAL( LockSimpleUsedAsNestable, func );
2866     }
2867     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2868 }
2869 
2870 int
2871 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2872 {
2873     int retval;
2874 
2875     KMP_DEBUG_ASSERT( gtid >= 0 );
2876 
2877     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2878         retval = ++lck->lk.depth_locked;
2879     }
2880     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2881         retval = 0;
2882     }
2883     else {
2884         KMP_MB();
2885         retval = lck->lk.depth_locked = 1;
2886         KMP_MB();
2887         lck->lk.owner_id = gtid + 1;
2888     }
2889     return retval;
2890 }
2891 
2892 static int
2893 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2894 {
2895     char const * const func = "omp_test_nest_lock";
2896     if ( lck->lk.initialized != lck ) {
2897         KMP_FATAL( LockIsUninitialized, func );
2898     }
2899     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2900         KMP_FATAL( LockSimpleUsedAsNestable, func );
2901     }
2902     return __kmp_test_nested_drdpa_lock( lck, gtid );
2903 }
2904 
2905 int
2906 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2907 {
2908     KMP_DEBUG_ASSERT( gtid >= 0 );
2909 
2910     KMP_MB();
2911     if ( --(lck->lk.depth_locked) == 0 ) {
2912         KMP_MB();
2913         lck->lk.owner_id = 0;
2914         __kmp_release_drdpa_lock( lck, gtid );
2915         return KMP_LOCK_RELEASED;
2916     }
2917     return KMP_LOCK_STILL_HELD;
2918 }
2919 
2920 static int
2921 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2922 {
2923     char const * const func = "omp_unset_nest_lock";
2924     KMP_MB();  /* in case another processor initialized lock */
2925     if ( lck->lk.initialized != lck ) {
2926         KMP_FATAL( LockIsUninitialized, func );
2927     }
2928     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2929         KMP_FATAL( LockSimpleUsedAsNestable, func );
2930     }
2931     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2932         KMP_FATAL( LockUnsettingFree, func );
2933     }
2934     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2935         KMP_FATAL( LockUnsettingSetByAnother, func );
2936     }
2937     return __kmp_release_nested_drdpa_lock( lck, gtid );
2938 }
2939 
2940 void
2941 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2942 {
2943     __kmp_init_drdpa_lock( lck );
2944     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2945 }
2946 
2947 static void
2948 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2949 {
2950     __kmp_init_nested_drdpa_lock( lck );
2951 }
2952 
2953 void
2954 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2955 {
2956     __kmp_destroy_drdpa_lock( lck );
2957     lck->lk.depth_locked = 0;
2958 }
2959 
2960 static void
2961 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2962 {
2963     char const * const func = "omp_destroy_nest_lock";
2964     if ( lck->lk.initialized != lck ) {
2965         KMP_FATAL( LockIsUninitialized, func );
2966     }
2967     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2968         KMP_FATAL( LockSimpleUsedAsNestable, func );
2969     }
2970     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2971         KMP_FATAL( LockStillOwned, func );
2972     }
2973     __kmp_destroy_nested_drdpa_lock( lck );
2974 }
2975 
2976 
2977 //
2978 // access functions to fields which don't exist for all lock kinds.
2979 //
2980 
2981 static int
2982 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2983 {
2984     return lck == lck->lk.initialized;
2985 }
2986 
2987 static const ident_t *
2988 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2989 {
2990     return lck->lk.location;
2991 }
2992 
2993 static void
2994 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2995 {
2996     lck->lk.location = loc;
2997 }
2998 
2999 static kmp_lock_flags_t
3000 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3001 {
3002     return lck->lk.flags;
3003 }
3004 
3005 static void
3006 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3007 {
3008     lck->lk.flags = flags;
3009 }
3010 
3011 // Time stamp counter
3012 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3013 # define __kmp_tsc() __kmp_hardware_timestamp()
3014 // Runtime's default backoff parameters
3015 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3016 #else
3017 // Use nanoseconds for other platforms
3018 extern kmp_uint64 __kmp_now_nsec();
3019 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3020 # define __kmp_tsc() __kmp_now_nsec()
3021 #endif
3022 
3023 // A useful predicate for dealing with timestamps that may wrap.
3024 // Is a before b?
3025 // Since the timestamps may wrap, this is asking whether it's
3026 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3027 // Times where going clockwise is less distance than going anti-clockwise
3028 // are in the future, others are in the past.
3029 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3030 //       whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3031 static inline bool before(kmp_uint64 a, kmp_uint64 b)
3032 {
3033     return ((kmp_int64)b - (kmp_int64)a) > 0;
3034 }
3035 
3036 // Truncated binary exponential backoff function
3037 void
3038 __kmp_spin_backoff(kmp_backoff_t *boff)
3039 {
3040     // We could flatten this loop, but making it a nested loop gives better result.
3041     kmp_uint32 i;
3042     for (i = boff->step; i > 0; i--) {
3043         kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3044         do {
3045             KMP_CPU_PAUSE();
3046         } while (before(__kmp_tsc(), goal));
3047     }
3048     boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3049 }
3050 
3051 #if KMP_USE_DYNAMIC_LOCK
3052 
3053 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3054 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3055 {
3056     TCW_4(*lck, KMP_GET_D_TAG(seq));
3057     KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3058 }
3059 
3060 #if KMP_USE_TSX
3061 
3062 // HLE lock functions - imported from the testbed runtime.
3063 #define HLE_ACQUIRE ".byte 0xf2;"
3064 #define HLE_RELEASE ".byte 0xf3;"
3065 
3066 static inline kmp_uint32
3067 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3068 {
3069     __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3070                     : "+r"(v), "+m"(*p)
3071                     :
3072                     : "memory");
3073     return v;
3074 }
3075 
3076 static void
3077 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3078 {
3079     TCW_4(*lck, 0);
3080 }
3081 
3082 static void
3083 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3084 {
3085     // Use gtid for KMP_LOCK_BUSY if necessary
3086     if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3087         int delay = 1;
3088         do {
3089             while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3090                 for (int i = delay; i != 0; --i)
3091                     KMP_CPU_PAUSE();
3092                 delay = ((delay << 1) | 1) & 7;
3093             }
3094         } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3095     }
3096 }
3097 
3098 static void
3099 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3100 {
3101     __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3102 }
3103 
3104 static int
3105 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3106 {
3107     __asm__ volatile(HLE_RELEASE "movl %1,%0"
3108                     : "=m"(*lck)
3109                     : "r"(KMP_LOCK_FREE(hle))
3110                     : "memory");
3111     return KMP_LOCK_RELEASED;
3112 }
3113 
3114 static int
3115 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3116 {
3117     return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3118 }
3119 
3120 static int
3121 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3122 {
3123     return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3124 }
3125 
3126 static int
3127 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3128 {
3129     return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3130 }
3131 
3132 static void
3133 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3134 {
3135     __kmp_init_queuing_lock(lck);
3136 }
3137 
3138 static void
3139 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3140 {
3141     __kmp_destroy_queuing_lock(lck);
3142 }
3143 
3144 static void
3145 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3146 {
3147     unsigned retries=3, status;
3148     do {
3149         status = _xbegin();
3150         if (status == _XBEGIN_STARTED) {
3151             if (__kmp_is_unlocked_queuing_lock(lck))
3152                 return;
3153             _xabort(0xff);
3154         }
3155         if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3156             // Wait until lock becomes free
3157             while (! __kmp_is_unlocked_queuing_lock(lck))
3158                 __kmp_yield(TRUE);
3159         }
3160         else if (!(status & _XABORT_RETRY))
3161             break;
3162     } while (retries--);
3163 
3164     // Fall-back non-speculative lock (xchg)
3165     __kmp_acquire_queuing_lock(lck, gtid);
3166 }
3167 
3168 static void
3169 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3170 {
3171     __kmp_acquire_rtm_lock(lck, gtid);
3172 }
3173 
3174 static int
3175 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3176 {
3177     if (__kmp_is_unlocked_queuing_lock(lck)) {
3178         // Releasing from speculation
3179         _xend();
3180     }
3181     else {
3182         // Releasing from a real lock
3183         __kmp_release_queuing_lock(lck, gtid);
3184     }
3185     return KMP_LOCK_RELEASED;
3186 }
3187 
3188 static int
3189 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3190 {
3191     return __kmp_release_rtm_lock(lck, gtid);
3192 }
3193 
3194 static int
3195 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3196 {
3197     unsigned retries=3, status;
3198     do {
3199         status = _xbegin();
3200         if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3201             return 1;
3202         }
3203         if (!(status & _XABORT_RETRY))
3204             break;
3205     } while (retries--);
3206 
3207     return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3208 }
3209 
3210 static int
3211 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3212 {
3213     return __kmp_test_rtm_lock(lck, gtid);
3214 }
3215 
3216 #endif // KMP_USE_TSX
3217 
3218 // Entry functions for indirect locks (first element of direct lock jump tables).
3219 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3220 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3221 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3222 static int  __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3223 static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3224 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3225 static int  __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3226 static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3227 
3228 //
3229 // Jump tables for the indirect lock functions.
3230 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3231 //
3232 
3233 // init functions
3234 #define expand(l, op) 0,__kmp_init_direct_lock,
3235 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3236     = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3237 #undef expand
3238 
3239 // destroy functions
3240 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3241 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3242     = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3243 #undef expand
3244 
3245 // set/acquire functions
3246 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3247 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3248     = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3249 #undef expand
3250 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3251 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3252     = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3253 #undef expand
3254 
3255 // unset/release and test functions
3256 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3257 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3258     = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3259 static int  (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3260     = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3261 #undef expand
3262 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3263 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3264     = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3265 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3266     = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3267 #undef expand
3268 
3269 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3270 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3271 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3272 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3273 
3274 //
3275 // Jump tables for the indirect lock functions.
3276 //
3277 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3278 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3279 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3280 #undef expand
3281 
3282 // set/acquire functions
3283 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3284 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3285 #undef expand
3286 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3287 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3288 #undef expand
3289 
3290 // unset/release and test functions
3291 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3292 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3293 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3294 #undef expand
3295 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3296 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3297 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3298 #undef expand
3299 
3300 // Exposes only one jump tables (*lock or *lock_with_checks).
3301 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3302 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3303 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3304 
3305 // Lock index table.
3306 kmp_indirect_lock_table_t __kmp_i_lock_table;
3307 
3308 // Size of indirect locks.
3309 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3310 
3311 // Jump tables for lock accessor/modifier.
3312 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3313 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3314 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3315 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3316 
3317 // Use different lock pools for different lock types.
3318 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3319 
3320 // User lock allocator for dynamically dispatched indirect locks.
3321 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3322 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3323 // object is returned to the reusable pool of locks, unique to each lock type.
3324 kmp_indirect_lock_t *
3325 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3326 {
3327     kmp_indirect_lock_t *lck;
3328     kmp_lock_index_t idx;
3329 
3330     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3331 
3332     if (__kmp_indirect_lock_pool[tag] != NULL) {
3333         // Reuse the allocated and destroyed lock object
3334         lck = __kmp_indirect_lock_pool[tag];
3335         if (OMP_LOCK_T_SIZE < sizeof(void *))
3336             idx = lck->lock->pool.index;
3337         __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3338         KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3339     } else {
3340         idx = __kmp_i_lock_table.next;
3341         // Check capacity and double the size if it is full
3342         if (idx == __kmp_i_lock_table.size) {
3343             // Double up the space for block pointers
3344             int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3345             kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3346             __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3347             KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3348             __kmp_free(old_table);
3349             // Allocate new objects in the new blocks
3350             for (int i = row; i < 2*row; ++i)
3351                 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3352                                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3353             __kmp_i_lock_table.size = 2*idx;
3354         }
3355         __kmp_i_lock_table.next++;
3356         lck = KMP_GET_I_LOCK(idx);
3357         // Allocate a new base lock object
3358         lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3359         KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3360     }
3361 
3362     __kmp_release_lock(&__kmp_global_lock, gtid);
3363 
3364     lck->type = tag;
3365 
3366     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3367         *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3368     } else {
3369         *((kmp_indirect_lock_t **)user_lock) = lck;
3370     }
3371 
3372     return lck;
3373 }
3374 
3375 // User lock lookup for dynamically dispatched locks.
3376 static __forceinline
3377 kmp_indirect_lock_t *
3378 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3379 {
3380     if (__kmp_env_consistency_check) {
3381         kmp_indirect_lock_t *lck = NULL;
3382         if (user_lock == NULL) {
3383             KMP_FATAL(LockIsUninitialized, func);
3384         }
3385         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3386             kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3387             if (idx >= __kmp_i_lock_table.size) {
3388                 KMP_FATAL(LockIsUninitialized, func);
3389             }
3390             lck = KMP_GET_I_LOCK(idx);
3391         } else {
3392             lck = *((kmp_indirect_lock_t **)user_lock);
3393         }
3394         if (lck == NULL) {
3395             KMP_FATAL(LockIsUninitialized, func);
3396         }
3397         return lck;
3398     } else {
3399         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3400             return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3401         } else {
3402             return *((kmp_indirect_lock_t **)user_lock);
3403         }
3404     }
3405 }
3406 
3407 static void
3408 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3409 {
3410 #if KMP_USE_ADAPTIVE_LOCKS
3411     if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3412         KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3413         seq = lockseq_queuing;
3414     }
3415 #endif
3416 #if KMP_USE_TSX
3417     if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3418         seq = lockseq_queuing;
3419     }
3420 #endif
3421     kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3422     kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3423     KMP_I_LOCK_FUNC(l, init)(l->lock);
3424     KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3425 }
3426 
3427 static void
3428 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3429 {
3430     kmp_uint32 gtid = __kmp_entry_gtid();
3431     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3432     KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3433     kmp_indirect_locktag_t tag = l->type;
3434 
3435     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3436 
3437     // Use the base lock's space to keep the pool chain.
3438     l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3439     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3440         l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3441     }
3442     __kmp_indirect_lock_pool[tag] = l;
3443 
3444     __kmp_release_lock(&__kmp_global_lock, gtid);
3445 }
3446 
3447 static void
3448 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3449 {
3450     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3451     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3452 }
3453 
3454 static int
3455 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3456 {
3457     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3458     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3459 }
3460 
3461 static int
3462 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3463 {
3464     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3465     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3466 }
3467 
3468 static void
3469 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3470 {
3471     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3472     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3473 }
3474 
3475 static int
3476 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3477 {
3478     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3479     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3480 }
3481 
3482 static int
3483 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3484 {
3485     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3486     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3487 }
3488 
3489 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3490 
3491 // This is used only in kmp_error.c when consistency checking is on.
3492 kmp_int32
3493 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3494 {
3495     switch (seq) {
3496         case lockseq_tas:
3497         case lockseq_nested_tas:
3498             return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3499 #if KMP_HAS_FUTEX
3500         case lockseq_futex:
3501         case lockseq_nested_futex:
3502             return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3503 #endif
3504         case lockseq_ticket:
3505         case lockseq_nested_ticket:
3506             return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3507         case lockseq_queuing:
3508         case lockseq_nested_queuing:
3509 #if KMP_USE_ADAPTIVE_LOCKS
3510         case lockseq_adaptive:
3511 #endif
3512             return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3513         case lockseq_drdpa:
3514         case lockseq_nested_drdpa:
3515             return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3516         default:
3517             return 0;
3518     }
3519 }
3520 
3521 // Initializes data for dynamic user locks.
3522 void
3523 __kmp_init_dynamic_user_locks()
3524 {
3525     // Initialize jump table for the lock functions
3526     if (__kmp_env_consistency_check) {
3527         __kmp_direct_set     = direct_set_check;
3528         __kmp_direct_unset   = direct_unset_check;
3529         __kmp_direct_test    = direct_test_check;
3530         __kmp_indirect_set   = indirect_set_check;
3531         __kmp_indirect_unset = indirect_unset_check;
3532         __kmp_indirect_test  = indirect_test_check;
3533     }
3534     else {
3535         __kmp_direct_set     = direct_set;
3536         __kmp_direct_unset   = direct_unset;
3537         __kmp_direct_test    = direct_test;
3538         __kmp_indirect_set   = indirect_set;
3539         __kmp_indirect_unset = indirect_unset;
3540         __kmp_indirect_test  = indirect_test;
3541     }
3542 
3543     // Initialize lock index table
3544     __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3545     __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3546     *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3547                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3548     __kmp_i_lock_table.next = 0;
3549 
3550     // Indirect lock size
3551     __kmp_indirect_lock_size[locktag_ticket]         = sizeof(kmp_ticket_lock_t);
3552     __kmp_indirect_lock_size[locktag_queuing]        = sizeof(kmp_queuing_lock_t);
3553 #if KMP_USE_ADAPTIVE_LOCKS
3554     __kmp_indirect_lock_size[locktag_adaptive]       = sizeof(kmp_adaptive_lock_t);
3555 #endif
3556     __kmp_indirect_lock_size[locktag_drdpa]          = sizeof(kmp_drdpa_lock_t);
3557 #if KMP_USE_TSX
3558     __kmp_indirect_lock_size[locktag_rtm]            = sizeof(kmp_queuing_lock_t);
3559 #endif
3560     __kmp_indirect_lock_size[locktag_nested_tas]     = sizeof(kmp_tas_lock_t);
3561 #if KMP_USE_FUTEX
3562     __kmp_indirect_lock_size[locktag_nested_futex]   = sizeof(kmp_futex_lock_t);
3563 #endif
3564     __kmp_indirect_lock_size[locktag_nested_ticket]  = sizeof(kmp_ticket_lock_t);
3565     __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3566     __kmp_indirect_lock_size[locktag_nested_drdpa]   = sizeof(kmp_drdpa_lock_t);
3567 
3568     // Initialize lock accessor/modifier
3569 #define fill_jumps(table, expand, sep) {            \
3570     table[locktag##sep##ticket]  = expand(ticket);  \
3571     table[locktag##sep##queuing] = expand(queuing); \
3572     table[locktag##sep##drdpa]   = expand(drdpa);   \
3573 }
3574 
3575 #if KMP_USE_ADAPTIVE_LOCKS
3576 # define fill_table(table, expand) {           \
3577     fill_jumps(table, expand, _);              \
3578     table[locktag_adaptive] = expand(queuing); \
3579     fill_jumps(table, expand, _nested_);       \
3580 }
3581 #else
3582 # define fill_table(table, expand) {           \
3583     fill_jumps(table, expand, _);              \
3584     fill_jumps(table, expand, _nested_);       \
3585 }
3586 #endif // KMP_USE_ADAPTIVE_LOCKS
3587 
3588 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3589     fill_table(__kmp_indirect_set_location, expand);
3590 #undef expand
3591 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3592     fill_table(__kmp_indirect_set_flags, expand);
3593 #undef expand
3594 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3595     fill_table(__kmp_indirect_get_location, expand);
3596 #undef expand
3597 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3598     fill_table(__kmp_indirect_get_flags, expand);
3599 #undef expand
3600 
3601     __kmp_init_user_locks = TRUE;
3602 }
3603 
3604 // Clean up the lock table.
3605 void
3606 __kmp_cleanup_indirect_user_locks()
3607 {
3608     kmp_lock_index_t i;
3609     int k;
3610 
3611     // Clean up locks in the pools first (they were already destroyed before going into the pools).
3612     for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3613         kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3614         while (l != NULL) {
3615             kmp_indirect_lock_t *ll = l;
3616             l = (kmp_indirect_lock_t *)l->lock->pool.next;
3617             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3618             __kmp_free(ll->lock);
3619             ll->lock = NULL;
3620         }
3621         __kmp_indirect_lock_pool[k] = NULL;
3622     }
3623     // Clean up the remaining undestroyed locks.
3624     for (i = 0; i < __kmp_i_lock_table.next; i++) {
3625         kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3626         if (l->lock != NULL) {
3627             // Locks not destroyed explicitly need to be destroyed here.
3628             KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3629             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3630             __kmp_free(l->lock);
3631         }
3632     }
3633     // Free the table
3634     for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3635         __kmp_free(__kmp_i_lock_table.table[i]);
3636     __kmp_free(__kmp_i_lock_table.table);
3637 
3638     __kmp_init_user_locks = FALSE;
3639 }
3640 
3641 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3642 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3643 
3644 #else // KMP_USE_DYNAMIC_LOCK
3645 
3646 /* ------------------------------------------------------------------------ */
3647 /* user locks
3648  *
3649  * They are implemented as a table of function pointers which are set to the
3650  * lock functions of the appropriate kind, once that has been determined.
3651  */
3652 
3653 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3654 
3655 size_t __kmp_base_user_lock_size = 0;
3656 size_t __kmp_user_lock_size = 0;
3657 
3658 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3659 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3660 
3661 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3662 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3663 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3664 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3665 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3666 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3667 
3668 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3669 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3670 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3671 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3672 
3673 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3674 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3675 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3676 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3677 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3678 
3679 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3680 {
3681     switch ( user_lock_kind ) {
3682         case lk_default:
3683         default:
3684         KMP_ASSERT( 0 );
3685 
3686         case lk_tas: {
3687             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3688             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3689 
3690             __kmp_get_user_lock_owner_ =
3691               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3692               ( &__kmp_get_tas_lock_owner );
3693 
3694             if ( __kmp_env_consistency_check ) {
3695                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3696                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3697             }
3698             else {
3699                 KMP_BIND_USER_LOCK(tas);
3700                 KMP_BIND_NESTED_USER_LOCK(tas);
3701             }
3702 
3703             __kmp_destroy_user_lock_ =
3704               ( void ( * )( kmp_user_lock_p ) )
3705               ( &__kmp_destroy_tas_lock );
3706 
3707              __kmp_is_user_lock_initialized_ =
3708                ( int ( * )( kmp_user_lock_p ) ) NULL;
3709 
3710              __kmp_get_user_lock_location_ =
3711                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3712 
3713              __kmp_set_user_lock_location_ =
3714                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3715 
3716              __kmp_get_user_lock_flags_ =
3717                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3718 
3719              __kmp_set_user_lock_flags_ =
3720                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3721         }
3722         break;
3723 
3724 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3725 
3726         case lk_futex: {
3727             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3728             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3729 
3730             __kmp_get_user_lock_owner_ =
3731               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3732               ( &__kmp_get_futex_lock_owner );
3733 
3734             if ( __kmp_env_consistency_check ) {
3735                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3736                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3737             }
3738             else {
3739                 KMP_BIND_USER_LOCK(futex);
3740                 KMP_BIND_NESTED_USER_LOCK(futex);
3741             }
3742 
3743             __kmp_destroy_user_lock_ =
3744               ( void ( * )( kmp_user_lock_p ) )
3745               ( &__kmp_destroy_futex_lock );
3746 
3747              __kmp_is_user_lock_initialized_ =
3748                ( int ( * )( kmp_user_lock_p ) ) NULL;
3749 
3750              __kmp_get_user_lock_location_ =
3751                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3752 
3753              __kmp_set_user_lock_location_ =
3754                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3755 
3756              __kmp_get_user_lock_flags_ =
3757                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3758 
3759              __kmp_set_user_lock_flags_ =
3760                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3761         }
3762         break;
3763 
3764 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3765 
3766         case lk_ticket: {
3767             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3768             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3769 
3770             __kmp_get_user_lock_owner_ =
3771               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3772               ( &__kmp_get_ticket_lock_owner );
3773 
3774             if ( __kmp_env_consistency_check ) {
3775                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3776                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3777             }
3778             else {
3779                 KMP_BIND_USER_LOCK(ticket);
3780                 KMP_BIND_NESTED_USER_LOCK(ticket);
3781             }
3782 
3783             __kmp_destroy_user_lock_ =
3784               ( void ( * )( kmp_user_lock_p ) )
3785               ( &__kmp_destroy_ticket_lock );
3786 
3787              __kmp_is_user_lock_initialized_ =
3788                ( int ( * )( kmp_user_lock_p ) )
3789                ( &__kmp_is_ticket_lock_initialized );
3790 
3791              __kmp_get_user_lock_location_ =
3792                ( const ident_t * ( * )( kmp_user_lock_p ) )
3793                ( &__kmp_get_ticket_lock_location );
3794 
3795              __kmp_set_user_lock_location_ =
3796                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3797                ( &__kmp_set_ticket_lock_location );
3798 
3799              __kmp_get_user_lock_flags_ =
3800                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3801                ( &__kmp_get_ticket_lock_flags );
3802 
3803              __kmp_set_user_lock_flags_ =
3804                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3805                ( &__kmp_set_ticket_lock_flags );
3806         }
3807         break;
3808 
3809         case lk_queuing: {
3810             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3811             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3812 
3813             __kmp_get_user_lock_owner_ =
3814               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3815               ( &__kmp_get_queuing_lock_owner );
3816 
3817             if ( __kmp_env_consistency_check ) {
3818                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3819                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3820             }
3821             else {
3822                 KMP_BIND_USER_LOCK(queuing);
3823                 KMP_BIND_NESTED_USER_LOCK(queuing);
3824             }
3825 
3826             __kmp_destroy_user_lock_ =
3827               ( void ( * )( kmp_user_lock_p ) )
3828               ( &__kmp_destroy_queuing_lock );
3829 
3830              __kmp_is_user_lock_initialized_ =
3831                ( int ( * )( kmp_user_lock_p ) )
3832                ( &__kmp_is_queuing_lock_initialized );
3833 
3834              __kmp_get_user_lock_location_ =
3835                ( const ident_t * ( * )( kmp_user_lock_p ) )
3836                ( &__kmp_get_queuing_lock_location );
3837 
3838              __kmp_set_user_lock_location_ =
3839                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3840                ( &__kmp_set_queuing_lock_location );
3841 
3842              __kmp_get_user_lock_flags_ =
3843                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3844                ( &__kmp_get_queuing_lock_flags );
3845 
3846              __kmp_set_user_lock_flags_ =
3847                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3848                ( &__kmp_set_queuing_lock_flags );
3849         }
3850         break;
3851 
3852 #if KMP_USE_ADAPTIVE_LOCKS
3853         case lk_adaptive: {
3854             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3855             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3856 
3857             __kmp_get_user_lock_owner_ =
3858               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3859               ( &__kmp_get_queuing_lock_owner );
3860 
3861             if ( __kmp_env_consistency_check ) {
3862                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3863             }
3864             else {
3865                 KMP_BIND_USER_LOCK(adaptive);
3866             }
3867 
3868             __kmp_destroy_user_lock_ =
3869               ( void ( * )( kmp_user_lock_p ) )
3870               ( &__kmp_destroy_adaptive_lock );
3871 
3872             __kmp_is_user_lock_initialized_ =
3873               ( int ( * )( kmp_user_lock_p ) )
3874               ( &__kmp_is_queuing_lock_initialized );
3875 
3876             __kmp_get_user_lock_location_ =
3877               ( const ident_t * ( * )( kmp_user_lock_p ) )
3878               ( &__kmp_get_queuing_lock_location );
3879 
3880             __kmp_set_user_lock_location_ =
3881               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3882               ( &__kmp_set_queuing_lock_location );
3883 
3884             __kmp_get_user_lock_flags_ =
3885               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3886               ( &__kmp_get_queuing_lock_flags );
3887 
3888             __kmp_set_user_lock_flags_ =
3889               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3890               ( &__kmp_set_queuing_lock_flags );
3891 
3892         }
3893         break;
3894 #endif // KMP_USE_ADAPTIVE_LOCKS
3895 
3896         case lk_drdpa: {
3897             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3898             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3899 
3900             __kmp_get_user_lock_owner_ =
3901               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3902               ( &__kmp_get_drdpa_lock_owner );
3903 
3904             if ( __kmp_env_consistency_check ) {
3905                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3906                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3907             }
3908             else {
3909                 KMP_BIND_USER_LOCK(drdpa);
3910                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3911             }
3912 
3913             __kmp_destroy_user_lock_ =
3914               ( void ( * )( kmp_user_lock_p ) )
3915               ( &__kmp_destroy_drdpa_lock );
3916 
3917              __kmp_is_user_lock_initialized_ =
3918                ( int ( * )( kmp_user_lock_p ) )
3919                ( &__kmp_is_drdpa_lock_initialized );
3920 
3921              __kmp_get_user_lock_location_ =
3922                ( const ident_t * ( * )( kmp_user_lock_p ) )
3923                ( &__kmp_get_drdpa_lock_location );
3924 
3925              __kmp_set_user_lock_location_ =
3926                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3927                ( &__kmp_set_drdpa_lock_location );
3928 
3929              __kmp_get_user_lock_flags_ =
3930                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3931                ( &__kmp_get_drdpa_lock_flags );
3932 
3933              __kmp_set_user_lock_flags_ =
3934                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3935                ( &__kmp_set_drdpa_lock_flags );
3936         }
3937         break;
3938     }
3939 }
3940 
3941 
3942 // ----------------------------------------------------------------------------
3943 // User lock table & lock allocation
3944 
3945 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3946 kmp_user_lock_p __kmp_lock_pool = NULL;
3947 
3948 // Lock block-allocation support.
3949 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3950 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3951 
3952 static kmp_lock_index_t
3953 __kmp_lock_table_insert( kmp_user_lock_p lck )
3954 {
3955     // Assume that kmp_global_lock is held upon entry/exit.
3956     kmp_lock_index_t index;
3957     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3958         kmp_lock_index_t size;
3959         kmp_user_lock_p *table;
3960         // Reallocate lock table.
3961         if ( __kmp_user_lock_table.allocated == 0 ) {
3962             size = 1024;
3963         }
3964         else {
3965             size = __kmp_user_lock_table.allocated * 2;
3966         }
3967         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3968         KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3969         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3970             // We cannot free the previous table now, since it may be in use by other
3971             // threads. So save the pointer to the previous table in in the first element of the
3972             // new table. All the tables will be organized into a list, and could be freed when
3973             // library shutting down.
3974         __kmp_user_lock_table.table = table;
3975         __kmp_user_lock_table.allocated = size;
3976     }
3977     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3978     index = __kmp_user_lock_table.used;
3979     __kmp_user_lock_table.table[ index ] = lck;
3980     ++ __kmp_user_lock_table.used;
3981     return index;
3982 }
3983 
3984 static kmp_user_lock_p
3985 __kmp_lock_block_allocate()
3986 {
3987     // Assume that kmp_global_lock is held upon entry/exit.
3988     static int last_index = 0;
3989     if ( ( last_index >= __kmp_num_locks_in_block )
3990       || ( __kmp_lock_blocks == NULL ) ) {
3991         // Restart the index.
3992         last_index = 0;
3993         // Need to allocate a new block.
3994         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3995         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3996         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3997         // Set up the new block.
3998         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3999         new_block->next_block = __kmp_lock_blocks;
4000         new_block->locks = (void *)buffer;
4001         // Publish the new block.
4002         KMP_MB();
4003         __kmp_lock_blocks = new_block;
4004     }
4005     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4006       [ last_index * __kmp_user_lock_size ] ) );
4007     last_index++;
4008     return ret;
4009 }
4010 
4011 //
4012 // Get memory for a lock. It may be freshly allocated memory or reused memory
4013 // from lock pool.
4014 //
4015 kmp_user_lock_p
4016 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4017   kmp_lock_flags_t flags )
4018 {
4019     kmp_user_lock_p lck;
4020     kmp_lock_index_t index;
4021     KMP_DEBUG_ASSERT( user_lock );
4022 
4023     __kmp_acquire_lock( &__kmp_global_lock, gtid );
4024 
4025     if ( __kmp_lock_pool == NULL ) {
4026         // Lock pool is empty. Allocate new memory.
4027         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4028             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4029         }
4030         else {
4031             lck = __kmp_lock_block_allocate();
4032         }
4033 
4034         // Insert lock in the table so that it can be freed in __kmp_cleanup,
4035         // and debugger has info on all allocated locks.
4036         index = __kmp_lock_table_insert( lck );
4037     }
4038     else {
4039         // Pick up lock from pool.
4040         lck = __kmp_lock_pool;
4041         index = __kmp_lock_pool->pool.index;
4042         __kmp_lock_pool = __kmp_lock_pool->pool.next;
4043     }
4044 
4045     //
4046     // We could potentially differentiate between nested and regular locks
4047     // here, and do the lock table lookup for regular locks only.
4048     //
4049     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4050         * ( (kmp_lock_index_t *) user_lock ) = index;
4051     }
4052     else {
4053         * ( (kmp_user_lock_p *) user_lock ) = lck;
4054     }
4055 
4056     // mark the lock if it is critical section lock.
4057     __kmp_set_user_lock_flags( lck, flags );
4058 
4059     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4060 
4061     return lck;
4062 }
4063 
4064 // Put lock's memory to pool for reusing.
4065 void
4066 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4067 {
4068     KMP_DEBUG_ASSERT( user_lock != NULL );
4069     KMP_DEBUG_ASSERT( lck != NULL );
4070 
4071     __kmp_acquire_lock( & __kmp_global_lock, gtid );
4072 
4073     lck->pool.next = __kmp_lock_pool;
4074     __kmp_lock_pool = lck;
4075     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4076         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4077         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4078         lck->pool.index = index;
4079     }
4080 
4081     __kmp_release_lock( & __kmp_global_lock, gtid );
4082 }
4083 
4084 kmp_user_lock_p
4085 __kmp_lookup_user_lock( void **user_lock, char const *func )
4086 {
4087     kmp_user_lock_p lck = NULL;
4088 
4089     if ( __kmp_env_consistency_check ) {
4090         if ( user_lock == NULL ) {
4091             KMP_FATAL( LockIsUninitialized, func );
4092         }
4093     }
4094 
4095     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4096         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4097         if ( __kmp_env_consistency_check ) {
4098             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4099                 KMP_FATAL( LockIsUninitialized, func );
4100             }
4101         }
4102         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4103         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4104         lck = __kmp_user_lock_table.table[index];
4105     }
4106     else {
4107         lck = *( (kmp_user_lock_p *)user_lock );
4108     }
4109 
4110     if ( __kmp_env_consistency_check ) {
4111         if ( lck == NULL ) {
4112             KMP_FATAL( LockIsUninitialized, func );
4113         }
4114     }
4115 
4116     return lck;
4117 }
4118 
4119 void
4120 __kmp_cleanup_user_locks( void )
4121 {
4122     //
4123     // Reset lock pool. Do not worry about lock in the pool -- we will free
4124     // them when iterating through lock table (it includes all the locks,
4125     // dead or alive).
4126     //
4127     __kmp_lock_pool = NULL;
4128 
4129 #define IS_CRITICAL(lck) \
4130         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4131         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4132 
4133     //
4134     // Loop through lock table, free all locks.
4135     //
4136     // Do not free item [0], it is reserved for lock tables list.
4137     //
4138     // FIXME - we are iterating through a list of (pointers to) objects of
4139     // type union kmp_user_lock, but we have no way of knowing whether the
4140     // base type is currently "pool" or whatever the global user lock type
4141     // is.
4142     //
4143     // We are relying on the fact that for all of the user lock types
4144     // (except "tas"), the first field in the lock struct is the "initialized"
4145     // field, which is set to the address of the lock object itself when
4146     // the lock is initialized.  When the union is of type "pool", the
4147     // first field is a pointer to the next object in the free list, which
4148     // will not be the same address as the object itself.
4149     //
4150     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4151     // will fail for "pool" objects on the free list.  This must happen as
4152     // the "location" field of real user locks overlaps the "index" field
4153     // of "pool" objects.
4154     //
4155     // It would be better to run through the free list, and remove all "pool"
4156     // objects from the lock table before executing this loop.  However,
4157     // "pool" objects do not always have their index field set (only on
4158     // lin_32e), and I don't want to search the lock table for the address
4159     // of every "pool" object on the free list.
4160     //
4161     while ( __kmp_user_lock_table.used > 1 ) {
4162         const ident *loc;
4163 
4164         //
4165         // reduce __kmp_user_lock_table.used before freeing the lock,
4166         // so that state of locks is consistent
4167         //
4168         kmp_user_lock_p lck = __kmp_user_lock_table.table[
4169           --__kmp_user_lock_table.used ];
4170 
4171         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4172           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4173             //
4174             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4175             // initialized AND it is NOT a critical section (user is not
4176             // responsible for destroying criticals) AND we know source
4177             // location to report.
4178             //
4179             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4180               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4181               ( loc->psource != NULL ) ) {
4182                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4183                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4184                 __kmp_str_loc_free( &str_loc);
4185             }
4186 
4187 #ifdef KMP_DEBUG
4188             if ( IS_CRITICAL( lck ) ) {
4189                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4190             }
4191             else {
4192                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4193             }
4194 #endif // KMP_DEBUG
4195 
4196             //
4197             // Cleanup internal lock dynamic resources
4198             // (for drdpa locks particularly).
4199             //
4200             __kmp_destroy_user_lock( lck );
4201         }
4202 
4203         //
4204         // Free the lock if block allocation of locks is not used.
4205         //
4206         if ( __kmp_lock_blocks == NULL ) {
4207             __kmp_free( lck );
4208         }
4209     }
4210 
4211 #undef IS_CRITICAL
4212 
4213     //
4214     // delete lock table(s).
4215     //
4216     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4217     __kmp_user_lock_table.table = NULL;
4218     __kmp_user_lock_table.allocated = 0;
4219 
4220     while ( table_ptr != NULL ) {
4221         //
4222         // In the first element we saved the pointer to the previous
4223         // (smaller) lock table.
4224         //
4225         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4226         __kmp_free( table_ptr );
4227         table_ptr = next;
4228     }
4229 
4230     //
4231     // Free buffers allocated for blocks of locks.
4232     //
4233     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4234     __kmp_lock_blocks = NULL;
4235 
4236     while ( block_ptr != NULL ) {
4237         kmp_block_of_locks_t *next = block_ptr->next_block;
4238         __kmp_free( block_ptr->locks );
4239         //
4240         // *block_ptr itself was allocated at the end of the locks vector.
4241         //
4242 	block_ptr = next;
4243     }
4244 
4245     TCW_4(__kmp_init_user_locks, FALSE);
4246 }
4247 
4248 #endif // KMP_USE_DYNAMIC_LOCK
4249