1 /*
2  * kmp_lock.cpp -- lock-related functions
3  * $Revision: 42810 $
4  * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
5  */
6 
7 
8 //===----------------------------------------------------------------------===//
9 //
10 //                     The LLVM Compiler Infrastructure
11 //
12 // This file is dual licensed under the MIT and the University of Illinois Open
13 // Source Licenses. See LICENSE.txt for details.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 
18 #include <stddef.h>
19 
20 #include "kmp.h"
21 #include "kmp_itt.h"
22 #include "kmp_i18n.h"
23 #include "kmp_lock.h"
24 #include "kmp_io.h"
25 
26 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
27 # include <unistd.h>
28 # include <sys/syscall.h>
29 // We should really include <futex.h>, but that causes compatibility problems on different
30 // Linux* OS distributions that either require that you include (or break when you try to include)
31 // <pci/types.h>.
32 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
33 // we just define the constants here and don't include <futex.h>
34 # ifndef FUTEX_WAIT
35 #  define FUTEX_WAIT    0
36 # endif
37 # ifndef FUTEX_WAKE
38 #  define FUTEX_WAKE    1
39 # endif
40 #endif
41 
42 
43 #ifndef KMP_DEBUG
44 # define __kmp_static_delay( arg )     /* nothing to do */
45 #else
46 
47 static void
48 __kmp_static_delay( int arg )
49 {
50 /* Work around weird code-gen bug that causes assert to trip */
51 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
52     KMP_ASSERT( arg != 0 );
53 # else
54     KMP_ASSERT( arg >= 0 );
55 # endif
56 }
57 #endif /* KMP_DEBUG */
58 
59 static void
60 __kmp_static_yield( int arg )
61 {
62     __kmp_yield( arg );
63 }
64 
65 /* Implement spin locks for internal library use.             */
66 /* The algorithm implemented is Lamport's bakery lock [1974]. */
67 
68 void
69 __kmp_validate_locks( void )
70 {
71     int i;
72     kmp_uint32  x, y;
73 
74     /* Check to make sure unsigned arithmetic does wraps properly */
75     x = ~((kmp_uint32) 0) - 2;
76     y = x - 2;
77 
78     for (i = 0; i < 8; ++i, ++x, ++y) {
79         kmp_uint32 z = (x - y);
80         KMP_ASSERT( z == 2 );
81     }
82 
83     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
84 }
85 
86 
87 /* ------------------------------------------------------------------------ */
88 /* test and set locks */
89 
90 //
91 // For the non-nested locks, we can only assume that the first 4 bytes were
92 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
93 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
94 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
95 //
96 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
97 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
98 //
99 
100 static kmp_int32
101 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
102 {
103     return TCR_4( lck->lk.poll ) - 1;
104 }
105 
106 static inline bool
107 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
108 {
109     return lck->lk.depth_locked != -1;
110 }
111 
112 __forceinline static void
113 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
114 {
115     KMP_MB();
116 
117 #ifdef USE_LOCK_PROFILE
118     kmp_uint32 curr = TCR_4( lck->lk.poll );
119     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
120         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
121     /* else __kmp_printf( "." );*/
122 #endif /* USE_LOCK_PROFILE */
123 
124     if ( ( lck->lk.poll == 0 )
125       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
126         KMP_FSYNC_ACQUIRED(lck);
127         return;
128     }
129 
130     kmp_uint32 spins;
131     KMP_FSYNC_PREPARE( lck );
132     KMP_INIT_YIELD( spins );
133     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
134       __kmp_xproc ) ) {
135         KMP_YIELD( TRUE );
136     }
137     else {
138         KMP_YIELD_SPIN( spins );
139     }
140 
141     while ( ( lck->lk.poll != 0 ) ||
142       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
143         //
144         // FIXME - use exponential backoff here
145         //
146         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
147           __kmp_xproc ) ) {
148             KMP_YIELD( TRUE );
149         }
150         else {
151             KMP_YIELD_SPIN( spins );
152         }
153     }
154     KMP_FSYNC_ACQUIRED( lck );
155 }
156 
157 void
158 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
159 {
160     __kmp_acquire_tas_lock_timed_template( lck, gtid );
161 }
162 
163 static void
164 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165 {
166     if ( __kmp_env_consistency_check ) {
167         char const * const func = "omp_set_lock";
168         if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169           && __kmp_is_tas_lock_nestable( lck ) ) {
170             KMP_FATAL( LockNestableUsedAsSimple, func );
171         }
172         if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
173             KMP_FATAL( LockIsAlreadyOwned, func );
174         }
175     }
176     __kmp_acquire_tas_lock( lck, gtid );
177 }
178 
179 int
180 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
181 {
182     if ( ( lck->lk.poll == 0 )
183       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
184         KMP_FSYNC_ACQUIRED( lck );
185         return TRUE;
186     }
187     return FALSE;
188 }
189 
190 static int
191 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
192 {
193     if ( __kmp_env_consistency_check ) {
194         char const * const func = "omp_test_lock";
195         if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
196           && __kmp_is_tas_lock_nestable( lck ) ) {
197             KMP_FATAL( LockNestableUsedAsSimple, func );
198         }
199     }
200     return __kmp_test_tas_lock( lck, gtid );
201 }
202 
203 void
204 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
205 {
206     KMP_MB();       /* Flush all pending memory write invalidates.  */
207 
208     KMP_FSYNC_RELEASING(lck);
209     KMP_ST_REL32( &(lck->lk.poll), 0 );
210 
211     KMP_MB();       /* Flush all pending memory write invalidates.  */
212 
213     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
214       __kmp_xproc ) );
215 }
216 
217 static void
218 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
219 {
220     if ( __kmp_env_consistency_check ) {
221         char const * const func = "omp_unset_lock";
222         KMP_MB();  /* in case another processor initialized lock */
223         if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
224           && __kmp_is_tas_lock_nestable( lck ) ) {
225             KMP_FATAL( LockNestableUsedAsSimple, func );
226         }
227         if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
228             KMP_FATAL( LockUnsettingFree, func );
229         }
230         if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
231           && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
232             KMP_FATAL( LockUnsettingSetByAnother, func );
233         }
234     }
235     __kmp_release_tas_lock( lck, gtid );
236 }
237 
238 void
239 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
240 {
241     TCW_4( lck->lk.poll, 0 );
242 }
243 
244 static void
245 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
246 {
247     __kmp_init_tas_lock( lck );
248 }
249 
250 void
251 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
252 {
253     lck->lk.poll = 0;
254 }
255 
256 static void
257 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
258 {
259     if ( __kmp_env_consistency_check ) {
260         char const * const func = "omp_destroy_lock";
261         if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
262           && __kmp_is_tas_lock_nestable( lck ) ) {
263             KMP_FATAL( LockNestableUsedAsSimple, func );
264         }
265         if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
266             KMP_FATAL( LockStillOwned, func );
267         }
268     }
269     __kmp_destroy_tas_lock( lck );
270 }
271 
272 
273 //
274 // nested test and set locks
275 //
276 
277 void
278 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
279 {
280     KMP_DEBUG_ASSERT( gtid >= 0 );
281 
282     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
283         lck->lk.depth_locked += 1;
284     }
285     else {
286         __kmp_acquire_tas_lock_timed_template( lck, gtid );
287         lck->lk.depth_locked = 1;
288     }
289 }
290 
291 static void
292 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293 {
294     if ( __kmp_env_consistency_check ) {
295         char const * const func = "omp_set_nest_lock";
296         if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
297             KMP_FATAL( LockSimpleUsedAsNestable, func );
298         }
299     }
300     __kmp_acquire_nested_tas_lock( lck, gtid );
301 }
302 
303 int
304 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
305 {
306     int retval;
307 
308     KMP_DEBUG_ASSERT( gtid >= 0 );
309 
310     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
311         retval = ++lck->lk.depth_locked;
312     }
313     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
314         retval = 0;
315     }
316     else {
317         KMP_MB();
318         retval = lck->lk.depth_locked = 1;
319     }
320     return retval;
321 }
322 
323 static int
324 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
325 {
326     if ( __kmp_env_consistency_check ) {
327         char const * const func = "omp_test_nest_lock";
328         if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
329             KMP_FATAL( LockSimpleUsedAsNestable, func );
330         }
331     }
332     return __kmp_test_nested_tas_lock( lck, gtid );
333 }
334 
335 void
336 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
337 {
338     KMP_DEBUG_ASSERT( gtid >= 0 );
339 
340     KMP_MB();
341     if ( --(lck->lk.depth_locked) == 0 ) {
342         __kmp_release_tas_lock( lck, gtid );
343     }
344 }
345 
346 static void
347 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
348 {
349     if ( __kmp_env_consistency_check ) {
350         char const * const func = "omp_unset_nest_lock";
351         KMP_MB();  /* in case another processor initialized lock */
352         if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
353             KMP_FATAL( LockSimpleUsedAsNestable, func );
354         }
355         if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
356             KMP_FATAL( LockUnsettingFree, func );
357         }
358         if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
359             KMP_FATAL( LockUnsettingSetByAnother, func );
360         }
361     }
362     __kmp_release_nested_tas_lock( lck, gtid );
363 }
364 
365 void
366 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
367 {
368     __kmp_init_tas_lock( lck );
369     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
370 }
371 
372 static void
373 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
374 {
375     __kmp_init_nested_tas_lock( lck );
376 }
377 
378 void
379 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
380 {
381     __kmp_destroy_tas_lock( lck );
382     lck->lk.depth_locked = 0;
383 }
384 
385 static void
386 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
387 {
388     if ( __kmp_env_consistency_check ) {
389         char const * const func = "omp_destroy_nest_lock";
390         if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
391             KMP_FATAL( LockSimpleUsedAsNestable, func );
392         }
393         if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
394             KMP_FATAL( LockStillOwned, func );
395         }
396     }
397     __kmp_destroy_nested_tas_lock( lck );
398 }
399 
400 
401 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
402 
403 /* ------------------------------------------------------------------------ */
404 /* futex locks */
405 
406 // futex locks are really just test and set locks, with a different method
407 // of handling contention.  They take the same amount of space as test and
408 // set locks, and are allocated the same way (i.e. use the area allocated by
409 // the compiler for non-nested locks / allocate nested locks on the heap).
410 
411 static kmp_int32
412 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
413 {
414     return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
415 }
416 
417 static inline bool
418 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
419 {
420     return lck->lk.depth_locked != -1;
421 }
422 
423 __forceinline static void
424 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
425 {
426     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
427 
428     KMP_MB();
429 
430 #ifdef USE_LOCK_PROFILE
431     kmp_uint32 curr = TCR_4( lck->lk.poll );
432     if ( ( curr != 0 ) && ( curr != gtid_code ) )
433         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
434     /* else __kmp_printf( "." );*/
435 #endif /* USE_LOCK_PROFILE */
436 
437     KMP_FSYNC_PREPARE( lck );
438     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
439       lck, lck->lk.poll, gtid ) );
440 
441     kmp_int32 poll_val;
442     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
443       gtid_code ) ) != 0 ) {
444         kmp_int32 cond = poll_val & 1;
445         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
446            lck, gtid, poll_val, cond ) );
447 
448         //
449         // NOTE: if you try to use the following condition for this branch
450         //
451         // if ( poll_val & 1 == 0 )
452         //
453         // Then the 12.0 compiler has a bug where the following block will
454         // always be skipped, regardless of the value of the LSB of poll_val.
455         //
456         if ( ! cond ) {
457             //
458             // Try to set the lsb in the poll to indicate to the owner
459             // thread that they need to wake this thread up.
460             //
461             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
462               poll_val, poll_val | 1 ) ) {
463                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
464                   lck, lck->lk.poll, gtid ) );
465                 continue;
466             }
467             poll_val |= 1;
468 
469             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
470               lck, lck->lk.poll, gtid ) );
471         }
472 
473         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
474            lck, gtid, poll_val ) );
475 
476         kmp_int32 rc;
477         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
478           poll_val, NULL, NULL, 0 ) ) != 0 ) {
479             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
480                lck, gtid, poll_val, rc, errno ) );
481             continue;
482         }
483 
484         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
485            lck, gtid, poll_val ) );
486         //
487         // This thread has now done a successful futex wait call and was
488         // entered on the OS futex queue.  We must now perform a futex
489         // wake call when releasing the lock, as we have no idea how many
490         // other threads are in the queue.
491         //
492         gtid_code |= 1;
493     }
494 
495     KMP_FSYNC_ACQUIRED( lck );
496     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
497       lck, lck->lk.poll, gtid ) );
498 }
499 
500 void
501 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502 {
503     __kmp_acquire_futex_lock_timed_template( lck, gtid );
504 }
505 
506 static void
507 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
508 {
509     if ( __kmp_env_consistency_check ) {
510         char const * const func = "omp_set_lock";
511         if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
512           && __kmp_is_futex_lock_nestable( lck ) ) {
513             KMP_FATAL( LockNestableUsedAsSimple, func );
514         }
515         if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
516             KMP_FATAL( LockIsAlreadyOwned, func );
517         }
518     }
519     __kmp_acquire_futex_lock( lck, gtid );
520 }
521 
522 int
523 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
524 {
525     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
526         KMP_FSYNC_ACQUIRED( lck );
527         return TRUE;
528     }
529     return FALSE;
530 }
531 
532 static int
533 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
534 {
535     if ( __kmp_env_consistency_check ) {
536         char const * const func = "omp_test_lock";
537         if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
538           && __kmp_is_futex_lock_nestable( lck ) ) {
539             KMP_FATAL( LockNestableUsedAsSimple, func );
540         }
541     }
542     return __kmp_test_futex_lock( lck, gtid );
543 }
544 
545 void
546 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
547 {
548     KMP_MB();       /* Flush all pending memory write invalidates.  */
549 
550     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
551       lck, lck->lk.poll, gtid ) );
552 
553     KMP_FSYNC_RELEASING(lck);
554 
555     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
556 
557     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
558        lck, gtid, poll_val ) );
559 
560     if ( poll_val & 1 ) {
561         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
562            lck, gtid ) );
563         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
564     }
565 
566     KMP_MB();       /* Flush all pending memory write invalidates.  */
567 
568     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
569       lck, lck->lk.poll, gtid ) );
570 
571     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
572       __kmp_xproc ) );
573 }
574 
575 static void
576 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
577 {
578     if ( __kmp_env_consistency_check ) {
579         char const * const func = "omp_unset_lock";
580         KMP_MB();  /* in case another processor initialized lock */
581         if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
582           && __kmp_is_futex_lock_nestable( lck ) ) {
583             KMP_FATAL( LockNestableUsedAsSimple, func );
584         }
585         if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
586             KMP_FATAL( LockUnsettingFree, func );
587         }
588         if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
589           && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
590             KMP_FATAL( LockUnsettingSetByAnother, func );
591         }
592     }
593     __kmp_release_futex_lock( lck, gtid );
594 }
595 
596 void
597 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
598 {
599     TCW_4( lck->lk.poll, 0 );
600 }
601 
602 static void
603 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
604 {
605     __kmp_init_futex_lock( lck );
606 }
607 
608 void
609 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
610 {
611     lck->lk.poll = 0;
612 }
613 
614 static void
615 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
616 {
617     if ( __kmp_env_consistency_check ) {
618         char const * const func = "omp_destroy_lock";
619         if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
620           && __kmp_is_futex_lock_nestable( lck ) ) {
621             KMP_FATAL( LockNestableUsedAsSimple, func );
622         }
623         if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
624             KMP_FATAL( LockStillOwned, func );
625         }
626     }
627     __kmp_destroy_futex_lock( lck );
628 }
629 
630 
631 //
632 // nested futex locks
633 //
634 
635 void
636 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
637 {
638     KMP_DEBUG_ASSERT( gtid >= 0 );
639 
640     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
641         lck->lk.depth_locked += 1;
642     }
643     else {
644         __kmp_acquire_futex_lock_timed_template( lck, gtid );
645         lck->lk.depth_locked = 1;
646     }
647 }
648 
649 static void
650 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
651 {
652     if ( __kmp_env_consistency_check ) {
653         char const * const func = "omp_set_nest_lock";
654         if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
655             KMP_FATAL( LockSimpleUsedAsNestable, func );
656         }
657     }
658     __kmp_acquire_nested_futex_lock( lck, gtid );
659 }
660 
661 int
662 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
663 {
664     int retval;
665 
666     KMP_DEBUG_ASSERT( gtid >= 0 );
667 
668     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
669         retval = ++lck->lk.depth_locked;
670     }
671     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
672         retval = 0;
673     }
674     else {
675         KMP_MB();
676         retval = lck->lk.depth_locked = 1;
677     }
678     return retval;
679 }
680 
681 static int
682 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
683 {
684     if ( __kmp_env_consistency_check ) {
685         char const * const func = "omp_test_nest_lock";
686         if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
687             KMP_FATAL( LockSimpleUsedAsNestable, func );
688         }
689     }
690     return __kmp_test_nested_futex_lock( lck, gtid );
691 }
692 
693 void
694 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
695 {
696     KMP_DEBUG_ASSERT( gtid >= 0 );
697 
698     KMP_MB();
699     if ( --(lck->lk.depth_locked) == 0 ) {
700         __kmp_release_futex_lock( lck, gtid );
701     }
702 }
703 
704 static void
705 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
706 {
707     if ( __kmp_env_consistency_check ) {
708         char const * const func = "omp_unset_nest_lock";
709         KMP_MB();  /* in case another processor initialized lock */
710         if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
711             KMP_FATAL( LockSimpleUsedAsNestable, func );
712         }
713         if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
714             KMP_FATAL( LockUnsettingFree, func );
715         }
716         if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
717             KMP_FATAL( LockUnsettingSetByAnother, func );
718         }
719     }
720     __kmp_release_nested_futex_lock( lck, gtid );
721 }
722 
723 void
724 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
725 {
726     __kmp_init_futex_lock( lck );
727     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
728 }
729 
730 static void
731 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
732 {
733     __kmp_init_nested_futex_lock( lck );
734 }
735 
736 void
737 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
738 {
739     __kmp_destroy_futex_lock( lck );
740     lck->lk.depth_locked = 0;
741 }
742 
743 static void
744 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
745 {
746     if ( __kmp_env_consistency_check ) {
747         char const * const func = "omp_destroy_nest_lock";
748         if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
749             KMP_FATAL( LockSimpleUsedAsNestable, func );
750         }
751         if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
752             KMP_FATAL( LockStillOwned, func );
753         }
754     }
755     __kmp_destroy_nested_futex_lock( lck );
756 }
757 
758 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
759 
760 
761 /* ------------------------------------------------------------------------ */
762 /* ticket (bakery) locks */
763 
764 static kmp_int32
765 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
766 {
767     return TCR_4( lck->lk.owner_id ) - 1;
768 }
769 
770 static inline bool
771 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
772 {
773     return lck->lk.depth_locked != -1;
774 }
775 
776 static kmp_uint32
777 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
778 {
779     register kmp_uint32 pause;
780 
781     if (value == checker) {
782         return TRUE;
783     }
784     for (pause = checker - value; pause != 0; --pause) {
785         __kmp_static_delay(TRUE);
786     }
787     return FALSE;
788 }
789 
790 __forceinline static void
791 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
792 {
793     kmp_uint32 my_ticket;
794     KMP_MB();
795 
796     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
797 
798 #ifdef USE_LOCK_PROFILE
799     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
800         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
801     /* else __kmp_printf( "." );*/
802 #endif /* USE_LOCK_PROFILE */
803 
804     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
805         KMP_FSYNC_ACQUIRED(lck);
806         return;
807     }
808     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
809     KMP_FSYNC_ACQUIRED(lck);
810 }
811 
812 void
813 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814 {
815     __kmp_acquire_ticket_lock_timed_template( lck, gtid );
816 }
817 
818 static void
819 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
820 {
821     if ( __kmp_env_consistency_check ) {
822         char const * const func = "omp_set_lock";
823         if ( lck->lk.initialized != lck ) {
824             KMP_FATAL( LockIsUninitialized, func );
825         }
826         if ( __kmp_is_ticket_lock_nestable( lck ) ) {
827             KMP_FATAL( LockNestableUsedAsSimple, func );
828         }
829         if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
830             KMP_FATAL( LockIsAlreadyOwned, func );
831         }
832     }
833 
834     __kmp_acquire_ticket_lock( lck, gtid );
835 
836     if ( __kmp_env_consistency_check ) {
837         lck->lk.owner_id = gtid + 1;
838     }
839 }
840 
841 int
842 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
843 {
844     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
845     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
846         kmp_uint32 next_ticket = my_ticket + 1;
847         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
848           my_ticket, next_ticket ) ) {
849             KMP_FSYNC_ACQUIRED( lck );
850             return TRUE;
851         }
852     }
853     return FALSE;
854 }
855 
856 static int
857 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
858 {
859     if ( __kmp_env_consistency_check ) {
860         char const * const func = "omp_test_lock";
861         if ( lck->lk.initialized != lck ) {
862             KMP_FATAL( LockIsUninitialized, func );
863         }
864         if ( __kmp_is_ticket_lock_nestable( lck ) ) {
865             KMP_FATAL( LockNestableUsedAsSimple, func );
866         }
867     }
868 
869     int retval = __kmp_test_ticket_lock( lck, gtid );
870 
871     if ( __kmp_env_consistency_check && retval ) {
872         lck->lk.owner_id = gtid + 1;
873     }
874     return retval;
875 }
876 
877 void
878 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
879 {
880     kmp_uint32  distance;
881 
882     KMP_MB();       /* Flush all pending memory write invalidates.  */
883 
884     KMP_FSYNC_RELEASING(lck);
885     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
886 
887     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
888 
889     KMP_MB();       /* Flush all pending memory write invalidates.  */
890 
891     KMP_YIELD( distance
892       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
893 }
894 
895 static void
896 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
897 {
898     if ( __kmp_env_consistency_check ) {
899         char const * const func = "omp_unset_lock";
900         KMP_MB();  /* in case another processor initialized lock */
901         if ( lck->lk.initialized != lck ) {
902             KMP_FATAL( LockIsUninitialized, func );
903         }
904         if ( __kmp_is_ticket_lock_nestable( lck ) ) {
905             KMP_FATAL( LockNestableUsedAsSimple, func );
906         }
907         if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
908             KMP_FATAL( LockUnsettingFree, func );
909         }
910         if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
911           && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
912             KMP_FATAL( LockUnsettingSetByAnother, func );
913         }
914         lck->lk.owner_id = 0;
915     }
916     __kmp_release_ticket_lock( lck, gtid );
917 }
918 
919 void
920 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
921 {
922     lck->lk.location = NULL;
923     TCW_4( lck->lk.next_ticket, 0 );
924     TCW_4( lck->lk.now_serving, 0 );
925     lck->lk.owner_id = 0;      // no thread owns the lock.
926     lck->lk.depth_locked = -1; // -1 => not a nested lock.
927     lck->lk.initialized = (kmp_ticket_lock *)lck;
928 }
929 
930 static void
931 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
932 {
933     __kmp_init_ticket_lock( lck );
934 }
935 
936 void
937 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
938 {
939     lck->lk.initialized = NULL;
940     lck->lk.location    = NULL;
941     lck->lk.next_ticket = 0;
942     lck->lk.now_serving = 0;
943     lck->lk.owner_id = 0;
944     lck->lk.depth_locked = -1;
945 }
946 
947 static void
948 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
949 {
950     if ( __kmp_env_consistency_check ) {
951         char const * const func = "omp_destroy_lock";
952         if ( lck->lk.initialized != lck ) {
953             KMP_FATAL( LockIsUninitialized, func );
954         }
955         if ( __kmp_is_ticket_lock_nestable( lck ) ) {
956             KMP_FATAL( LockNestableUsedAsSimple, func );
957         }
958         if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
959             KMP_FATAL( LockStillOwned, func );
960         }
961     }
962     __kmp_destroy_ticket_lock( lck );
963 }
964 
965 
966 //
967 // nested ticket locks
968 //
969 
970 void
971 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
972 {
973     KMP_DEBUG_ASSERT( gtid >= 0 );
974 
975     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
976         lck->lk.depth_locked += 1;
977     }
978     else {
979         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
980         KMP_MB();
981         lck->lk.depth_locked = 1;
982         KMP_MB();
983         lck->lk.owner_id = gtid + 1;
984     }
985 }
986 
987 static void
988 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
989 {
990     if ( __kmp_env_consistency_check ) {
991         char const * const func = "omp_set_nest_lock";
992         if ( lck->lk.initialized != lck ) {
993             KMP_FATAL( LockIsUninitialized, func );
994         }
995         if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
996             KMP_FATAL( LockSimpleUsedAsNestable, func );
997         }
998     }
999     __kmp_acquire_nested_ticket_lock( lck, gtid );
1000 }
1001 
1002 int
1003 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1004 {
1005     int retval;
1006 
1007     KMP_DEBUG_ASSERT( gtid >= 0 );
1008 
1009     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
1010         retval = ++lck->lk.depth_locked;
1011     }
1012     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
1013         retval = 0;
1014     }
1015     else {
1016         KMP_MB();
1017         retval = lck->lk.depth_locked = 1;
1018         KMP_MB();
1019         lck->lk.owner_id = gtid + 1;
1020     }
1021     return retval;
1022 }
1023 
1024 static int
1025 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
1026   kmp_int32 gtid )
1027 {
1028     if ( __kmp_env_consistency_check ) {
1029         char const * const func = "omp_test_nest_lock";
1030         if ( lck->lk.initialized != lck ) {
1031             KMP_FATAL( LockIsUninitialized, func );
1032         }
1033         if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1034             KMP_FATAL( LockSimpleUsedAsNestable, func );
1035         }
1036     }
1037     return __kmp_test_nested_ticket_lock( lck, gtid );
1038 }
1039 
1040 void
1041 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1042 {
1043     KMP_DEBUG_ASSERT( gtid >= 0 );
1044 
1045     KMP_MB();
1046     if ( --(lck->lk.depth_locked) == 0 ) {
1047         KMP_MB();
1048         lck->lk.owner_id = 0;
1049         __kmp_release_ticket_lock( lck, gtid );
1050     }
1051 }
1052 
1053 static void
1054 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1055 {
1056     if ( __kmp_env_consistency_check ) {
1057         char const * const func = "omp_unset_nest_lock";
1058         KMP_MB();  /* in case another processor initialized lock */
1059         if ( lck->lk.initialized != lck ) {
1060             KMP_FATAL( LockIsUninitialized, func );
1061         }
1062         if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1063             KMP_FATAL( LockSimpleUsedAsNestable, func );
1064         }
1065         if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1066             KMP_FATAL( LockUnsettingFree, func );
1067         }
1068         if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1069             KMP_FATAL( LockUnsettingSetByAnother, func );
1070         }
1071     }
1072     __kmp_release_nested_ticket_lock( lck, gtid );
1073 }
1074 
1075 void
1076 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1077 {
1078     __kmp_init_ticket_lock( lck );
1079     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1080 }
1081 
1082 static void
1083 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1084 {
1085     __kmp_init_nested_ticket_lock( lck );
1086 }
1087 
1088 void
1089 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1090 {
1091     __kmp_destroy_ticket_lock( lck );
1092     lck->lk.depth_locked = 0;
1093 }
1094 
1095 static void
1096 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1097 {
1098     if ( __kmp_env_consistency_check ) {
1099         char const * const func = "omp_destroy_nest_lock";
1100         if ( lck->lk.initialized != lck ) {
1101             KMP_FATAL( LockIsUninitialized, func );
1102         }
1103         if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1104             KMP_FATAL( LockSimpleUsedAsNestable, func );
1105         }
1106         if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1107             KMP_FATAL( LockStillOwned, func );
1108         }
1109     }
1110     __kmp_destroy_nested_ticket_lock( lck );
1111 }
1112 
1113 
1114 //
1115 // access functions to fields which don't exist for all lock kinds.
1116 //
1117 
1118 static int
1119 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1120 {
1121     return lck == lck->lk.initialized;
1122 }
1123 
1124 static const ident_t *
1125 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1126 {
1127     return lck->lk.location;
1128 }
1129 
1130 static void
1131 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1132 {
1133     lck->lk.location = loc;
1134 }
1135 
1136 static kmp_lock_flags_t
1137 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1138 {
1139     return lck->lk.flags;
1140 }
1141 
1142 static void
1143 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1144 {
1145     lck->lk.flags = flags;
1146 }
1147 
1148 /* ------------------------------------------------------------------------ */
1149 /* queuing locks */
1150 
1151 /*
1152  * First the states
1153  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1154  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1155  *                h, h  means lock is held or about to transition, 1 element on queue
1156  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1157  *
1158  * Now the transitions
1159  *    Acquire(0,0)  = -1 ,0
1160  *    Release(0,0)  = Error
1161  *    Acquire(-1,0) =  h ,h    h > 0
1162  *    Release(-1,0) =  0 ,0
1163  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1164  *    Release(h,h)  = -1 ,0    h > 0
1165  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1166  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1167  *
1168  * And pictorially
1169  *
1170  *
1171  *          +-----+
1172  *          | 0, 0|------- release -------> Error
1173  *          +-----+
1174  *            |  ^
1175  *     acquire|  |release
1176  *            |  |
1177  *            |  |
1178  *            v  |
1179  *          +-----+
1180  *          |-1, 0|
1181  *          +-----+
1182  *            |  ^
1183  *     acquire|  |release
1184  *            |  |
1185  *            |  |
1186  *            v  |
1187  *          +-----+
1188  *          | h, h|
1189  *          +-----+
1190  *            |  ^
1191  *     acquire|  |release
1192  *            |  |
1193  *            |  |
1194  *            v  |
1195  *          +-----+
1196  *          | h, t|----- acquire, release loopback ---+
1197  *          +-----+                                   |
1198  *               ^                                    |
1199  *               |                                    |
1200  *               +------------------------------------+
1201  *
1202  */
1203 
1204 #ifdef DEBUG_QUEUING_LOCKS
1205 
1206 /* Stuff for circular trace buffer */
1207 #define TRACE_BUF_ELE	1024
1208 static char traces[TRACE_BUF_ELE][128] = { 0 }
1209 static int tc = 0;
1210 #define TRACE_LOCK(X,Y)          sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1211 #define TRACE_LOCK_T(X,Y,Z)      sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1212 #define TRACE_LOCK_HT(X,Y,Z,Q)   sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1213 
1214 static void
1215 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1216   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1217 {
1218     kmp_int32 t, i;
1219 
1220     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1221 
1222     i = tc % TRACE_BUF_ELE;
1223     __kmp_printf_no_lock( "%s\n", traces[i] );
1224     i = (i+1) % TRACE_BUF_ELE;
1225     while ( i != (tc % TRACE_BUF_ELE) ) {
1226         __kmp_printf_no_lock( "%s", traces[i] );
1227         i = (i+1) % TRACE_BUF_ELE;
1228     }
1229     __kmp_printf_no_lock( "\n" );
1230 
1231     __kmp_printf_no_lock(
1232              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1233              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1234              head_id, tail_id );
1235 
1236     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1237 
1238     if ( lck->lk.head_id >= 1 ) {
1239         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1240         while (t > 0) {
1241             __kmp_printf_no_lock( "-> %d ", t );
1242             t = __kmp_threads[t-1]->th.th_next_waiting;
1243         }
1244     }
1245     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1246     __kmp_printf_no_lock( "\n\n" );
1247 }
1248 
1249 #endif /* DEBUG_QUEUING_LOCKS */
1250 
1251 static kmp_int32
1252 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1253 {
1254     return TCR_4( lck->lk.owner_id ) - 1;
1255 }
1256 
1257 static inline bool
1258 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1259 {
1260     return lck->lk.depth_locked != -1;
1261 }
1262 
1263 /* Acquire a lock using a the queuing lock implementation */
1264 template <bool takeTime>
1265 /* [TLW] The unused template above is left behind because of what BEB believes is a
1266    potential compiler problem with __forceinline. */
1267 __forceinline static void
1268 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1269   kmp_int32 gtid )
1270 {
1271     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1272     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1273     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1274     volatile kmp_uint32 *spin_here_p;
1275     kmp_int32 need_mf = 1;
1276 
1277     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1278 
1279     KMP_FSYNC_PREPARE( lck );
1280     KMP_DEBUG_ASSERT( this_thr != NULL );
1281     spin_here_p = & this_thr->th.th_spin_here;
1282 
1283 #ifdef DEBUG_QUEUING_LOCKS
1284     TRACE_LOCK( gtid+1, "acq ent" );
1285     if ( *spin_here_p )
1286         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1287     if ( this_thr->th.th_next_waiting != 0 )
1288         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1289 #endif
1290     KMP_DEBUG_ASSERT( !*spin_here_p );
1291     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1292 
1293 
1294     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1295        that may follow, not just in execution order, but also in visibility order.  This way,
1296        when a releasing thread observes the changes to the queue by this thread, it can
1297        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1298        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1299        to FALSE before this thread sets it to TRUE, this thread will hang.
1300     */
1301     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1302 
1303     while( 1 ) {
1304         kmp_int32 enqueued;
1305         kmp_int32 head;
1306         kmp_int32 tail;
1307 
1308         head = *head_id_p;
1309 
1310         switch ( head ) {
1311 
1312             case -1:
1313             {
1314 #ifdef DEBUG_QUEUING_LOCKS
1315                 tail = *tail_id_p;
1316                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1317 #endif
1318                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1319                            this assignment prevents us from entering the if ( t > 0 )
1320                            condition in the enqueued case below, which is not necessary for
1321                            this state transition */
1322 
1323                 need_mf = 0;
1324                 /* try (-1,0)->(tid,tid) */
1325                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1326                   KMP_PACK_64( -1, 0 ),
1327                   KMP_PACK_64( gtid+1, gtid+1 ) );
1328 #ifdef DEBUG_QUEUING_LOCKS
1329                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1330 #endif
1331             }
1332             break;
1333 
1334             default:
1335             {
1336                 tail = *tail_id_p;
1337                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1338 
1339 #ifdef DEBUG_QUEUING_LOCKS
1340                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1341 #endif
1342 
1343                 if ( tail == 0 ) {
1344                     enqueued = FALSE;
1345                 }
1346                 else {
1347                     need_mf = 0;
1348                     /* try (h,t) or (h,h)->(h,tid) */
1349                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1350 
1351 #ifdef DEBUG_QUEUING_LOCKS
1352                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1353 #endif
1354                 }
1355             }
1356             break;
1357 
1358             case 0: /* empty queue */
1359             {
1360                 kmp_int32 grabbed_lock;
1361 
1362 #ifdef DEBUG_QUEUING_LOCKS
1363                 tail = *tail_id_p;
1364                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1365 #endif
1366                 /* try (0,0)->(-1,0) */
1367 
1368                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1369                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1370 
1371                 if ( grabbed_lock ) {
1372 
1373                     *spin_here_p = FALSE;
1374 
1375                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1376                               lck, gtid ));
1377 #ifdef DEBUG_QUEUING_LOCKS
1378                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1379 #endif
1380                     KMP_FSYNC_ACQUIRED( lck );
1381                     return; /* lock holder cannot be on queue */
1382                 }
1383                 enqueued = FALSE;
1384             }
1385             break;
1386         }
1387 
1388         if ( enqueued ) {
1389             if ( tail > 0 ) {
1390                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1391                 KMP_ASSERT( tail_thr != NULL );
1392                 tail_thr->th.th_next_waiting = gtid+1;
1393                 /* corresponding wait for this write in release code */
1394             }
1395             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1396 
1397 
1398             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1399              *       throughput only here.
1400              */
1401             KMP_MB();
1402             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1403 
1404 #ifdef DEBUG_QUEUING_LOCKS
1405             TRACE_LOCK( gtid+1, "acq spin" );
1406 
1407             if ( this_thr->th.th_next_waiting != 0 )
1408                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1409 #endif
1410             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1411             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1412                       lck, gtid ));
1413 
1414 #ifdef DEBUG_QUEUING_LOCKS
1415             TRACE_LOCK( gtid+1, "acq exit 2" );
1416 #endif
1417             /* got lock, we were dequeued by the thread that released lock */
1418             return;
1419         }
1420 
1421         /* Yield if number of threads > number of logical processors */
1422         /* ToDo: Not sure why this should only be in oversubscription case,
1423            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1424         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1425           __kmp_xproc ) );
1426 #ifdef DEBUG_QUEUING_LOCKS
1427         TRACE_LOCK( gtid+1, "acq retry" );
1428 #endif
1429 
1430     }
1431     KMP_ASSERT2( 0, "should not get here" );
1432 }
1433 
1434 void
1435 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1436 {
1437     KMP_DEBUG_ASSERT( gtid >= 0 );
1438 
1439     __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1440 }
1441 
1442 static void
1443 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1444   kmp_int32 gtid )
1445 {
1446     if ( __kmp_env_consistency_check ) {
1447         char const * const func = "omp_set_lock";
1448         if ( lck->lk.initialized != lck ) {
1449             KMP_FATAL( LockIsUninitialized, func );
1450         }
1451         if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1452             KMP_FATAL( LockNestableUsedAsSimple, func );
1453         }
1454         if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1455             KMP_FATAL( LockIsAlreadyOwned, func );
1456         }
1457     }
1458 
1459     __kmp_acquire_queuing_lock( lck, gtid );
1460 
1461     if ( __kmp_env_consistency_check ) {
1462         lck->lk.owner_id = gtid + 1;
1463     }
1464 }
1465 
1466 int
1467 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1468 {
1469     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1470     kmp_int32 head;
1471 #ifdef KMP_DEBUG
1472     kmp_info_t *this_thr;
1473 #endif
1474 
1475     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1476     KMP_DEBUG_ASSERT( gtid >= 0 );
1477 #ifdef KMP_DEBUG
1478     this_thr = __kmp_thread_from_gtid( gtid );
1479     KMP_DEBUG_ASSERT( this_thr != NULL );
1480     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1481 #endif
1482 
1483     head = *head_id_p;
1484 
1485     if ( head == 0 ) { /* nobody on queue, nobody holding */
1486 
1487         /* try (0,0)->(-1,0) */
1488 
1489         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1490             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1491             KMP_FSYNC_ACQUIRED(lck);
1492             return TRUE;
1493         }
1494     }
1495 
1496     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1497     return FALSE;
1498 }
1499 
1500 static int
1501 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1502 {
1503     if ( __kmp_env_consistency_check ) {
1504         char const * const func = "omp_test_lock";
1505         if ( lck->lk.initialized != lck ) {
1506             KMP_FATAL( LockIsUninitialized, func );
1507         }
1508         if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1509             KMP_FATAL( LockNestableUsedAsSimple, func );
1510         }
1511     }
1512 
1513     int retval = __kmp_test_queuing_lock( lck, gtid );
1514 
1515     if ( __kmp_env_consistency_check && retval ) {
1516         lck->lk.owner_id = gtid + 1;
1517     }
1518     return retval;
1519 }
1520 
1521 void
1522 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1523 {
1524     register kmp_info_t *this_thr;
1525     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1526     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1527 
1528     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1529     KMP_DEBUG_ASSERT( gtid >= 0 );
1530     this_thr    = __kmp_thread_from_gtid( gtid );
1531     KMP_DEBUG_ASSERT( this_thr != NULL );
1532 #ifdef DEBUG_QUEUING_LOCKS
1533     TRACE_LOCK( gtid+1, "rel ent" );
1534 
1535     if ( this_thr->th.th_spin_here )
1536         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1537     if ( this_thr->th.th_next_waiting != 0 )
1538         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1539 #endif
1540     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1541     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1542 
1543     KMP_FSYNC_RELEASING(lck);
1544 
1545     while( 1 ) {
1546         kmp_int32 dequeued;
1547         kmp_int32 head;
1548         kmp_int32 tail;
1549 
1550         head = *head_id_p;
1551 
1552 #ifdef DEBUG_QUEUING_LOCKS
1553         tail = *tail_id_p;
1554         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1555         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1556 #endif
1557         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1558 
1559         if ( head == -1 ) { /* nobody on queue */
1560 
1561             /* try (-1,0)->(0,0) */
1562             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1563                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1564                           lck, gtid ));
1565 #ifdef DEBUG_QUEUING_LOCKS
1566                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1567 #endif
1568                 return;
1569             }
1570             dequeued = FALSE;
1571 
1572         }
1573         else {
1574 
1575             tail = *tail_id_p;
1576             if ( head == tail ) {  /* only one thread on the queue */
1577 
1578 #ifdef DEBUG_QUEUING_LOCKS
1579                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1580 #endif
1581                 KMP_DEBUG_ASSERT( head > 0 );
1582 
1583                 /* try (h,h)->(-1,0) */
1584                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1585                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1586 #ifdef DEBUG_QUEUING_LOCKS
1587                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1588 #endif
1589 
1590             }
1591             else {
1592                 volatile kmp_int32 *waiting_id_p;
1593                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1594                 KMP_DEBUG_ASSERT( head_thr != NULL );
1595                 waiting_id_p = & head_thr->th.th_next_waiting;
1596 
1597                 /* Does this require synchronous reads? */
1598 #ifdef DEBUG_QUEUING_LOCKS
1599                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1600 #endif
1601                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1602 
1603                 /* try (h,t)->(h',t) or (t,t) */
1604 
1605                 KMP_MB();
1606                 /* make sure enqueuing thread has time to update next waiting thread field */
1607                 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1608 #ifdef DEBUG_QUEUING_LOCKS
1609                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1610 #endif
1611                 dequeued = TRUE;
1612             }
1613         }
1614 
1615         if ( dequeued ) {
1616             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1617             KMP_DEBUG_ASSERT( head_thr != NULL );
1618 
1619             /* Does this require synchronous reads? */
1620 #ifdef DEBUG_QUEUING_LOCKS
1621             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1622 #endif
1623             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1624 
1625             /* For clean code only.
1626              * Thread not released until next statement prevents race with acquire code.
1627              */
1628             head_thr->th.th_next_waiting = 0;
1629 #ifdef DEBUG_QUEUING_LOCKS
1630             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1631 #endif
1632 
1633             KMP_MB();
1634             /* reset spin value */
1635             head_thr->th.th_spin_here = FALSE;
1636 
1637             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1638                       lck, gtid ));
1639 #ifdef DEBUG_QUEUING_LOCKS
1640             TRACE_LOCK( gtid+1, "rel exit 2" );
1641 #endif
1642             return;
1643         }
1644         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1645 
1646 #ifdef DEBUG_QUEUING_LOCKS
1647         TRACE_LOCK( gtid+1, "rel retry" );
1648 #endif
1649 
1650     } /* while */
1651     KMP_ASSERT2( 0, "should not get here" );
1652 }
1653 
1654 static void
1655 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1656   kmp_int32 gtid )
1657 {
1658     if ( __kmp_env_consistency_check ) {
1659         char const * const func = "omp_unset_lock";
1660         KMP_MB();  /* in case another processor initialized lock */
1661         if ( lck->lk.initialized != lck ) {
1662             KMP_FATAL( LockIsUninitialized, func );
1663         }
1664         if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1665             KMP_FATAL( LockNestableUsedAsSimple, func );
1666         }
1667         if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1668             KMP_FATAL( LockUnsettingFree, func );
1669         }
1670         if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1671             KMP_FATAL( LockUnsettingSetByAnother, func );
1672         }
1673         lck->lk.owner_id = 0;
1674     }
1675     __kmp_release_queuing_lock( lck, gtid );
1676 }
1677 
1678 void
1679 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1680 {
1681     lck->lk.location = NULL;
1682     lck->lk.head_id = 0;
1683     lck->lk.tail_id = 0;
1684     lck->lk.next_ticket = 0;
1685     lck->lk.now_serving = 0;
1686     lck->lk.owner_id = 0;      // no thread owns the lock.
1687     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1688     lck->lk.initialized = lck;
1689 
1690     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1691 }
1692 
1693 static void
1694 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1695 {
1696     __kmp_init_queuing_lock( lck );
1697 }
1698 
1699 void
1700 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1701 {
1702     lck->lk.initialized = NULL;
1703     lck->lk.location = NULL;
1704     lck->lk.head_id = 0;
1705     lck->lk.tail_id = 0;
1706     lck->lk.next_ticket = 0;
1707     lck->lk.now_serving = 0;
1708     lck->lk.owner_id = 0;
1709     lck->lk.depth_locked = -1;
1710 }
1711 
1712 static void
1713 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1714 {
1715     if ( __kmp_env_consistency_check ) {
1716         char const * const func = "omp_destroy_lock";
1717         if ( lck->lk.initialized != lck ) {
1718             KMP_FATAL( LockIsUninitialized, func );
1719         }
1720         if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1721             KMP_FATAL( LockNestableUsedAsSimple, func );
1722         }
1723         if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1724             KMP_FATAL( LockStillOwned, func );
1725         }
1726     }
1727     __kmp_destroy_queuing_lock( lck );
1728 }
1729 
1730 
1731 //
1732 // nested queuing locks
1733 //
1734 
1735 void
1736 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1737 {
1738     KMP_DEBUG_ASSERT( gtid >= 0 );
1739 
1740     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1741         lck->lk.depth_locked += 1;
1742     }
1743     else {
1744         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1745         KMP_MB();
1746         lck->lk.depth_locked = 1;
1747         KMP_MB();
1748         lck->lk.owner_id = gtid + 1;
1749     }
1750 }
1751 
1752 static void
1753 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1754 {
1755     if ( __kmp_env_consistency_check ) {
1756         char const * const func = "omp_set_nest_lock";
1757         if ( lck->lk.initialized != lck ) {
1758             KMP_FATAL( LockIsUninitialized, func );
1759         }
1760         if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1761             KMP_FATAL( LockSimpleUsedAsNestable, func );
1762         }
1763     }
1764     __kmp_acquire_nested_queuing_lock( lck, gtid );
1765 }
1766 
1767 int
1768 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1769 {
1770     int retval;
1771 
1772     KMP_DEBUG_ASSERT( gtid >= 0 );
1773 
1774     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1775         retval = ++lck->lk.depth_locked;
1776     }
1777     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1778         retval = 0;
1779     }
1780     else {
1781         KMP_MB();
1782         retval = lck->lk.depth_locked = 1;
1783         KMP_MB();
1784         lck->lk.owner_id = gtid + 1;
1785     }
1786     return retval;
1787 }
1788 
1789 static int
1790 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1791   kmp_int32 gtid )
1792 {
1793     if ( __kmp_env_consistency_check ) {
1794         char const * const func = "omp_test_nest_lock";
1795         if ( lck->lk.initialized != lck ) {
1796             KMP_FATAL( LockIsUninitialized, func );
1797         }
1798         if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1799             KMP_FATAL( LockSimpleUsedAsNestable, func );
1800         }
1801     }
1802     return __kmp_test_nested_queuing_lock( lck, gtid );
1803 }
1804 
1805 void
1806 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1807 {
1808     KMP_DEBUG_ASSERT( gtid >= 0 );
1809 
1810     KMP_MB();
1811     if ( --(lck->lk.depth_locked) == 0 ) {
1812         KMP_MB();
1813         lck->lk.owner_id = 0;
1814         __kmp_release_queuing_lock( lck, gtid );
1815     }
1816 }
1817 
1818 static void
1819 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1820 {
1821     if ( __kmp_env_consistency_check ) {
1822         char const * const func = "omp_unset_nest_lock";
1823         KMP_MB();  /* in case another processor initialized lock */
1824         if ( lck->lk.initialized != lck ) {
1825             KMP_FATAL( LockIsUninitialized, func );
1826         }
1827         if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1828             KMP_FATAL( LockSimpleUsedAsNestable, func );
1829         }
1830         if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1831             KMP_FATAL( LockUnsettingFree, func );
1832         }
1833         if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1834             KMP_FATAL( LockUnsettingSetByAnother, func );
1835         }
1836     }
1837     __kmp_release_nested_queuing_lock( lck, gtid );
1838 }
1839 
1840 void
1841 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1842 {
1843     __kmp_init_queuing_lock( lck );
1844     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1845 }
1846 
1847 static void
1848 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1849 {
1850     __kmp_init_nested_queuing_lock( lck );
1851 }
1852 
1853 void
1854 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1855 {
1856     __kmp_destroy_queuing_lock( lck );
1857     lck->lk.depth_locked = 0;
1858 }
1859 
1860 static void
1861 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1862 {
1863     if ( __kmp_env_consistency_check ) {
1864         char const * const func = "omp_destroy_nest_lock";
1865         if ( lck->lk.initialized != lck ) {
1866             KMP_FATAL( LockIsUninitialized, func );
1867         }
1868         if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1869             KMP_FATAL( LockSimpleUsedAsNestable, func );
1870         }
1871         if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1872             KMP_FATAL( LockStillOwned, func );
1873         }
1874     }
1875     __kmp_destroy_nested_queuing_lock( lck );
1876 }
1877 
1878 
1879 //
1880 // access functions to fields which don't exist for all lock kinds.
1881 //
1882 
1883 static int
1884 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1885 {
1886     return lck == lck->lk.initialized;
1887 }
1888 
1889 static const ident_t *
1890 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1891 {
1892     return lck->lk.location;
1893 }
1894 
1895 static void
1896 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1897 {
1898     lck->lk.location = loc;
1899 }
1900 
1901 static kmp_lock_flags_t
1902 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1903 {
1904     return lck->lk.flags;
1905 }
1906 
1907 static void
1908 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1909 {
1910     lck->lk.flags = flags;
1911 }
1912 
1913 #if KMP_USE_ADAPTIVE_LOCKS
1914 
1915 /*
1916     RTM Adaptive locks
1917 */
1918 
1919 // TODO: Use the header for intrinsics below with the compiler 13.0
1920 //#include <immintrin.h>
1921 
1922 // Values from the status register after failed speculation.
1923 #define _XBEGIN_STARTED          (~0u)
1924 #define _XABORT_EXPLICIT         (1 << 0)
1925 #define _XABORT_RETRY            (1 << 1)
1926 #define _XABORT_CONFLICT         (1 << 2)
1927 #define _XABORT_CAPACITY         (1 << 3)
1928 #define _XABORT_DEBUG            (1 << 4)
1929 #define _XABORT_NESTED           (1 << 5)
1930 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1931 
1932 // Aborts for which it's worth trying again immediately
1933 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1934 
1935 #define STRINGIZE_INTERNAL(arg) #arg
1936 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1937 
1938 // Access to RTM instructions
1939 
1940 /*
1941   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1942   This is the same definition as the compiler intrinsic that will be supported at some point.
1943 */
1944 static __inline int _xbegin()
1945 {
1946     int res = -1;
1947 
1948 #if KMP_OS_WINDOWS
1949 #if KMP_ARCH_X86_64
1950     _asm {
1951         _emit 0xC7
1952         _emit 0xF8
1953         _emit 2
1954         _emit 0
1955         _emit 0
1956         _emit 0
1957         jmp   L2
1958         mov   res, eax
1959     L2:
1960     }
1961 #else /* IA32 */
1962     _asm {
1963         _emit 0xC7
1964         _emit 0xF8
1965         _emit 2
1966         _emit 0
1967         _emit 0
1968         _emit 0
1969         jmp   L2
1970         mov   res, eax
1971     L2:
1972     }
1973 #endif // KMP_ARCH_X86_64
1974 #else
1975     /* Note that %eax must be noted as killed (clobbered), because
1976      * the XSR is returned in %eax(%rax) on abort.  Other register
1977      * values are restored, so don't need to be killed.
1978      *
1979      * We must also mark 'res' as an input and an output, since otherwise
1980      * 'res=-1' may be dropped as being dead, whereas we do need the
1981      * assignment on the successful (i.e., non-abort) path.
1982      */
1983     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1984                       "   .long  1f-1b-6\n"
1985                       "    jmp   2f\n"
1986                       "1:  movl  %%eax,%0\n"
1987                       "2:"
1988                       :"+r"(res)::"memory","%eax");
1989 #endif // KMP_OS_WINDOWS
1990     return res;
1991 }
1992 
1993 /*
1994   Transaction end
1995 */
1996 static __inline void _xend()
1997 {
1998 #if KMP_OS_WINDOWS
1999     __asm  {
2000         _emit 0x0f
2001         _emit 0x01
2002         _emit 0xd5
2003     }
2004 #else
2005     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2006 #endif
2007 }
2008 
2009 /*
2010   This is a macro, the argument must be a single byte constant which
2011   can be evaluated by the inline assembler, since it is emitted as a
2012   byte into the assembly code.
2013 */
2014 #if KMP_OS_WINDOWS
2015 #define _xabort(ARG)                            \
2016     _asm _emit 0xc6                             \
2017     _asm _emit 0xf8                             \
2018     _asm _emit ARG
2019 #else
2020 #define _xabort(ARG) \
2021     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2022 #endif
2023 
2024 //
2025 //    Statistics is collected for testing purpose
2026 //
2027 #if KMP_DEBUG_ADAPTIVE_LOCKS
2028 
2029 // We accumulate speculative lock statistics when the lock is destroyed.
2030 // We keep locks that haven't been destroyed in the liveLocks list
2031 // so that we can grab their statistics too.
2032 static kmp_adaptive_lock_statistics_t destroyedStats;
2033 
2034 // To hold the list of live locks.
2035 static kmp_adaptive_lock_t liveLocks;
2036 
2037 // A lock so we can safely update the list of locks.
2038 static kmp_bootstrap_lock_t chain_lock;
2039 
2040 // Initialize the list of stats.
2041 void
2042 __kmp_init_speculative_stats()
2043 {
2044     kmp_adaptive_lock *lck = &liveLocks;
2045 
2046     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2047     lck->stats.next = lck;
2048     lck->stats.prev = lck;
2049 
2050     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052 
2053     __kmp_init_bootstrap_lock( &chain_lock );
2054 
2055 }
2056 
2057 // Insert the lock into the circular list
2058 static void
2059 __kmp_remember_lock( kmp_adaptive_lock * lck )
2060 {
2061     __kmp_acquire_bootstrap_lock( &chain_lock );
2062 
2063     lck->stats.next = liveLocks.stats.next;
2064     lck->stats.prev = &liveLocks;
2065 
2066     liveLocks.stats.next = lck;
2067     lck->stats.next->stats.prev  = lck;
2068 
2069     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2070     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2071 
2072     __kmp_release_bootstrap_lock( &chain_lock );
2073 }
2074 
2075 static void
2076 __kmp_forget_lock( kmp_adaptive_lock * lck )
2077 {
2078     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2079     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2080 
2081     kmp_adaptive_lock * n = lck->stats.next;
2082     kmp_adaptive_lock * p = lck->stats.prev;
2083 
2084     n->stats.prev = p;
2085     p->stats.next = n;
2086 }
2087 
2088 static void
2089 __kmp_zero_speculative_stats( kmp_adaptive_lock * lck )
2090 {
2091     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2092     __kmp_remember_lock( lck );
2093 }
2094 
2095 static void
2096 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_t * lck )
2097 {
2098     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2099 
2100     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2101     t->successfulSpeculations += s->successfulSpeculations;
2102     t->hardFailedSpeculations += s->hardFailedSpeculations;
2103     t->softFailedSpeculations += s->softFailedSpeculations;
2104     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2105     t->lemmingYields          += s->lemmingYields;
2106 }
2107 
2108 static void
2109 __kmp_accumulate_speculative_stats( kmp_adaptive_lock * lck)
2110 {
2111     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2112 
2113     __kmp_acquire_bootstrap_lock( &chain_lock );
2114 
2115     __kmp_add_stats( &destroyedStats, lck );
2116     __kmp_forget_lock( lck );
2117 
2118     __kmp_release_bootstrap_lock( &chain_lock );
2119 }
2120 
2121 static float
2122 percent (kmp_uint32 count, kmp_uint32 total)
2123 {
2124     return (total == 0) ? 0.0: (100.0 * count)/total;
2125 }
2126 
2127 static
2128 FILE * __kmp_open_stats_file()
2129 {
2130     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2131         return stdout;
2132 
2133     size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2134     char buffer[buffLen];
2135     snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, getpid());
2136     FILE * result = fopen(&buffer[0], "w");
2137 
2138     // Maybe we should issue a warning here...
2139     return result ? result : stdout;
2140 }
2141 
2142 void
2143 __kmp_print_speculative_stats()
2144 {
2145     if (__kmp_user_lock_kind != lk_adaptive)
2146         return;
2147 
2148     FILE * statsFile = __kmp_open_stats_file();
2149 
2150     kmp_adaptive_lock_statistics_t total = destroyedStats;
2151     kmp_adaptive_lock *lck;
2152 
2153     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2154         __kmp_add_stats( &total, lck );
2155     }
2156     kmp_adaptive_lock_statistics_t *t = &total;
2157     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2158     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2159                                    t->softFailedSpeculations;
2160 
2161     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2162     fprintf ( statsFile, " Lock parameters: \n"
2163              "   max_soft_retries               : %10d\n"
2164              "   max_badness                    : %10d\n",
2165              __kmp_adaptive_backoff_params.max_soft_retries,
2166              __kmp_adaptive_backoff_params.max_badness);
2167     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2168     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2169     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2170              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2171     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2172              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2173     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2174 
2175     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2176     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2177              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2178     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2179              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2180     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2181              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2182 
2183     if (statsFile != stdout)
2184         fclose( statsFile );
2185 }
2186 
2187 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2188 #else
2189 # define KMP_INC_STAT(lck,stat)
2190 
2191 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2192 
2193 static inline bool
2194 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2195 {
2196     // It is enough to check that the head_id is zero.
2197     // We don't also need to check the tail.
2198     bool res = lck->lk.head_id == 0;
2199 
2200     // We need a fence here, since we must ensure that no memory operations
2201     // from later in this thread float above that read.
2202 #if KMP_COMPILER_ICC
2203     _mm_mfence();
2204 #else
2205     __sync_synchronize();
2206 #endif
2207 
2208     return res;
2209 }
2210 
2211 // Functions for manipulating the badness
2212 static __inline void
2213 __kmp_update_badness_after_success( kmp_queuing_lock_t *lck )
2214 {
2215     // Reset the badness to zero so we eagerly try to speculate again
2216     lck->lk.adaptive.badness = 0;
2217     KMP_INC_STAT(lck,successfulSpeculations);
2218 }
2219 
2220 // Create a bit mask with one more set bit.
2221 static __inline void
2222 __kmp_step_badness( kmp_queuing_lock_t *lck )
2223 {
2224     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2225     if ( newBadness > lck->lk.adaptive.max_badness) {
2226         return;
2227     } else {
2228         lck->lk.adaptive.badness = newBadness;
2229     }
2230 }
2231 
2232 // Check whether speculation should be attempted.
2233 static __inline int
2234 __kmp_should_speculate( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2235 {
2236     kmp_uint32 badness = lck->lk.adaptive.badness;
2237     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2238     int res = (attempts & badness) == 0;
2239     return res;
2240 }
2241 
2242 // Attempt to acquire only the speculative lock.
2243 // Does not back off to the non-speculative lock.
2244 //
2245 static int
2246 __kmp_test_adaptive_lock_only( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2247 {
2248     int retries = lck->lk.adaptive.max_soft_retries;
2249 
2250     // We don't explicitly count the start of speculation, rather we record
2251     // the results (success, hard fail, soft fail). The sum of all of those
2252     // is the total number of times we started speculation since all
2253     // speculations must end one of those ways.
2254     do
2255     {
2256         kmp_uint32 status = _xbegin();
2257         // Switch this in to disable actual speculation but exercise
2258         // at least some of the rest of the code. Useful for debugging...
2259         // kmp_uint32 status = _XABORT_NESTED;
2260 
2261         if (status == _XBEGIN_STARTED )
2262         { /* We have successfully started speculation
2263            * Check that no-one acquired the lock for real between when we last looked
2264            * and now. This also gets the lock cache line into our read-set,
2265            * which we need so that we'll abort if anyone later claims it for real.
2266            */
2267             if (! __kmp_is_unlocked_queuing_lock( lck ) )
2268             {
2269                 // Lock is now visibly acquired, so someone beat us to it.
2270                 // Abort the transaction so we'll restart from _xbegin with the
2271                 // failure status.
2272                 _xabort(0x01)
2273                 KMP_ASSERT2( 0, "should not get here" );
2274             }
2275             return 1;   // Lock has been acquired (speculatively)
2276         } else {
2277             // We have aborted, update the statistics
2278             if ( status & SOFT_ABORT_MASK)
2279             {
2280                 KMP_INC_STAT(lck,softFailedSpeculations);
2281                 // and loop round to retry.
2282             }
2283             else
2284             {
2285                 KMP_INC_STAT(lck,hardFailedSpeculations);
2286                 // Give up if we had a hard failure.
2287                 break;
2288             }
2289         }
2290     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2291 
2292     // Either we had a hard failure or we didn't succeed softly after
2293     // the full set of attempts, so back off the badness.
2294     __kmp_step_badness( lck );
2295     return 0;
2296 }
2297 
2298 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2299 // if the speculative lock cannot be acquired.
2300 // We can succeed speculatively, non-speculatively, or fail.
2301 static int
2302 __kmp_test_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2303 {
2304     // First try to acquire the lock speculatively
2305     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2306         return 1;
2307 
2308     // Speculative acquisition failed, so try to acquire it non-speculatively.
2309     // Count the non-speculative acquire attempt
2310     lck->lk.adaptive.acquire_attempts++;
2311 
2312     // Use base, non-speculative lock.
2313     if ( __kmp_test_queuing_lock( lck, gtid ) )
2314     {
2315         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2316         return 1;       // Lock is acquired (non-speculatively)
2317     }
2318     else
2319     {
2320         return 0;       // Failed to acquire the lock, it's already visibly locked.
2321     }
2322 }
2323 
2324 static int
2325 __kmp_test_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2326 {
2327     if ( __kmp_env_consistency_check ) {
2328         char const * const func = "omp_test_lock";
2329         if ( lck->lk.initialized != lck ) {
2330             KMP_FATAL( LockIsUninitialized, func );
2331         }
2332     }
2333 
2334     int retval = __kmp_test_adaptive_lock( lck, gtid );
2335 
2336     if ( __kmp_env_consistency_check && retval ) {
2337         lck->lk.owner_id = gtid + 1;
2338     }
2339     return retval;
2340 }
2341 
2342 // Block until we can acquire a speculative, adaptive lock.
2343 // We check whether we should be trying to speculate.
2344 // If we should be, we check the real lock to see if it is free,
2345 // and, if not, pause without attempting to acquire it until it is.
2346 // Then we try the speculative acquire.
2347 // This means that although we suffer from lemmings a little (
2348 // because all we can't acquire the lock speculatively until
2349 // the queue of threads waiting has cleared), we don't get into a
2350 // state where we can never acquire the lock speculatively (because we
2351 // force the queue to clear by preventing new arrivals from entering the
2352 // queue).
2353 // This does mean that when we're trying to break lemmings, the lock
2354 // is no longer fair. However OpenMP makes no guarantee that its
2355 // locks are fair, so this isn't a real problem.
2356 static void
2357 __kmp_acquire_adaptive_lock( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2358 {
2359     if ( __kmp_should_speculate( lck, gtid ) )
2360     {
2361         if ( __kmp_is_unlocked_queuing_lock( lck ) )
2362         {
2363             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2364                 return;
2365             // We tried speculation and failed, so give up.
2366         }
2367         else
2368         {
2369             // We can't try speculation until the lock is free, so we
2370             // pause here (without suspending on the queueing lock,
2371             // to allow it to drain, then try again.
2372             // All other threads will also see the same result for
2373             // shouldSpeculate, so will be doing the same if they
2374             // try to claim the lock from now on.
2375             while ( ! __kmp_is_unlocked_queuing_lock( lck ) )
2376             {
2377                 KMP_INC_STAT(lck,lemmingYields);
2378                 __kmp_yield (TRUE);
2379             }
2380 
2381             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2382                 return;
2383         }
2384     }
2385 
2386     // Speculative acquisition failed, so acquire it non-speculatively.
2387     // Count the non-speculative acquire attempt
2388     lck->lk.adaptive.acquire_attempts++;
2389 
2390     __kmp_acquire_queuing_lock_timed_template<FALSE>( lck, gtid );
2391     // We have acquired the base lock, so count that.
2392     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2393 }
2394 
2395 static void
2396 __kmp_acquire_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2397 {
2398     if ( __kmp_env_consistency_check ) {
2399         char const * const func = "omp_set_lock";
2400         if ( lck->lk.initialized != lck ) {
2401             KMP_FATAL( LockIsUninitialized, func );
2402         }
2403         if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
2404             KMP_FATAL( LockIsAlreadyOwned, func );
2405         }
2406     }
2407 
2408     __kmp_acquire_adaptive_lock( lck, gtid );
2409 
2410     if ( __kmp_env_consistency_check ) {
2411         lck->lk.owner_id = gtid + 1;
2412     }
2413 }
2414 
2415 static void
2416 __kmp_release_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2417 {
2418     if ( __kmp_is_unlocked_queuing_lock( lck ) )
2419     {   // If the lock doesn't look claimed we must be speculating.
2420         // (Or the user's code is buggy and they're releasing without locking;
2421         // if we had XTEST we'd be able to check that case...)
2422         _xend();        // Exit speculation
2423         __kmp_update_badness_after_success( lck );
2424     }
2425     else
2426     {   // Since the lock *is* visibly locked we're not speculating,
2427         // so should use the underlying lock's release scheme.
2428         __kmp_release_queuing_lock( lck, gtid );
2429     }
2430 }
2431 
2432 static void
2433 __kmp_release_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2434 {
2435     if ( __kmp_env_consistency_check ) {
2436         char const * const func = "omp_unset_lock";
2437         KMP_MB();  /* in case another processor initialized lock */
2438         if ( lck->lk.initialized != lck ) {
2439             KMP_FATAL( LockIsUninitialized, func );
2440         }
2441         if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
2442             KMP_FATAL( LockUnsettingFree, func );
2443         }
2444         if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
2445             KMP_FATAL( LockUnsettingSetByAnother, func );
2446         }
2447         lck->lk.owner_id = 0;
2448     }
2449     __kmp_release_adaptive_lock( lck, gtid );
2450 }
2451 
2452 static void
2453 __kmp_init_adaptive_lock( kmp_queuing_lock_t *lck )
2454 {
2455     __kmp_init_queuing_lock( lck );
2456     lck->lk.adaptive.badness = 0;
2457     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2458     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2459     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2460 #if KMP_DEBUG_ADAPTIVE_LOCKS
2461     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2462 #endif
2463     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2464 }
2465 
2466 static void
2467 __kmp_init_adaptive_lock_with_checks( kmp_queuing_lock_t * lck )
2468 {
2469     __kmp_init_adaptive_lock( lck );
2470 }
2471 
2472 static void
2473 __kmp_destroy_adaptive_lock( kmp_queuing_lock_t *lck )
2474 {
2475 #if KMP_DEBUG_ADAPTIVE_LOCKS
2476     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2477 #endif
2478     __kmp_destroy_queuing_lock (lck);
2479     // Nothing needed for the speculative part.
2480 }
2481 
2482 static void
2483 __kmp_destroy_adaptive_lock_with_checks( kmp_queuing_lock_t *lck )
2484 {
2485     if ( __kmp_env_consistency_check ) {
2486         char const * const func = "omp_destroy_lock";
2487         if ( lck->lk.initialized != lck ) {
2488             KMP_FATAL( LockIsUninitialized, func );
2489         }
2490         if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
2491             KMP_FATAL( LockStillOwned, func );
2492         }
2493     }
2494     __kmp_destroy_adaptive_lock( lck );
2495 }
2496 
2497 
2498 #endif // KMP_USE_ADAPTIVE_LOCKS
2499 
2500 
2501 /* ------------------------------------------------------------------------ */
2502 /* DRDPA ticket locks                                                */
2503 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2504 
2505 static kmp_int32
2506 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2507 {
2508     return TCR_4( lck->lk.owner_id ) - 1;
2509 }
2510 
2511 static inline bool
2512 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2513 {
2514     return lck->lk.depth_locked != -1;
2515 }
2516 
2517 __forceinline static void
2518 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2519 {
2520     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2521     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2522     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2523       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2524       TCR_PTR(lck->lk.polls);                           // volatile load
2525 
2526 #ifdef USE_LOCK_PROFILE
2527     if (TCR_8(polls[ticket & mask].poll) != ticket)
2528         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2529     /* else __kmp_printf( "." );*/
2530 #endif /* USE_LOCK_PROFILE */
2531 
2532     //
2533     // Now spin-wait, but reload the polls pointer and mask, in case the
2534     // polling area has been reconfigured.  Unless it is reconfigured, the
2535     // reloads stay in L1 cache and are cheap.
2536     //
2537     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2538     //
2539     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2540     // and poll to be re-read every spin iteration.
2541     //
2542     kmp_uint32 spins;
2543 
2544     KMP_FSYNC_PREPARE(lck);
2545     KMP_INIT_YIELD(spins);
2546     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2547         __kmp_static_delay(TRUE);
2548 
2549         //
2550         // If we are oversubscribed,
2551         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2552         // CPU Pause is in the macros for yield.
2553         //
2554         KMP_YIELD(TCR_4(__kmp_nth)
2555           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2556         KMP_YIELD_SPIN(spins);
2557 
2558         // Re-read the mask and the poll pointer from the lock structure.
2559         //
2560         // Make certain that "mask" is read before "polls" !!!
2561         //
2562         // If another thread picks reconfigures the polling area and updates
2563         // their values, and we get the new value of mask and the old polls
2564         // pointer, we could access memory beyond the end of the old polling
2565         // area.
2566         //
2567         mask = TCR_8(lck->lk.mask);                     // volatile load
2568         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2569           TCR_PTR(lck->lk.polls);                       // volatile load
2570     }
2571 
2572     //
2573     // Critical section starts here
2574     //
2575     KMP_FSYNC_ACQUIRED(lck);
2576     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2577       ticket, lck));
2578     lck->lk.now_serving = ticket;                       // non-volatile store
2579 
2580     //
2581     // Deallocate a garbage polling area if we know that we are the last
2582     // thread that could possibly access it.
2583     //
2584     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2585     // ticket.
2586     //
2587     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2588         __kmp_free((void *)lck->lk.old_polls);
2589         lck->lk.old_polls = NULL;
2590         lck->lk.cleanup_ticket = 0;
2591     }
2592 
2593     //
2594     // Check to see if we should reconfigure the polling area.
2595     // If there is still a garbage polling area to be deallocated from a
2596     // previous reconfiguration, let a later thread reconfigure it.
2597     //
2598     if (lck->lk.old_polls == NULL) {
2599         bool reconfigure = false;
2600         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2601         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2602 
2603         if (TCR_4(__kmp_nth)
2604           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2605             //
2606             // We are in oversubscription mode.  Contract the polling area
2607             // down to a single location, if that hasn't been done already.
2608             //
2609             if (num_polls > 1) {
2610                 reconfigure = true;
2611                 num_polls = TCR_4(lck->lk.num_polls);
2612                 mask = 0;
2613                 num_polls = 1;
2614                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2615                   __kmp_allocate(num_polls * sizeof(*polls));
2616                 polls[0].poll = ticket;
2617             }
2618         }
2619         else {
2620             //
2621             // We are in under/fully subscribed mode.  Check the number of
2622             // threads waiting on the lock.  The size of the polling area
2623             // should be at least the number of threads waiting.
2624             //
2625             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2626             if (num_waiting > num_polls) {
2627                 kmp_uint32 old_num_polls = num_polls;
2628                 reconfigure = true;
2629                 do {
2630                     mask = (mask << 1) | 1;
2631                     num_polls *= 2;
2632                 } while (num_polls <= num_waiting);
2633 
2634                 //
2635                 // Allocate the new polling area, and copy the relevant portion
2636                 // of the old polling area to the new area.  __kmp_allocate()
2637                 // zeroes the memory it allocates, and most of the old area is
2638                 // just zero padding, so we only copy the release counters.
2639                 //
2640                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2641                   __kmp_allocate(num_polls * sizeof(*polls));
2642                 kmp_uint32 i;
2643                 for (i = 0; i < old_num_polls; i++) {
2644                     polls[i].poll = old_polls[i].poll;
2645                 }
2646             }
2647         }
2648 
2649         if (reconfigure) {
2650             //
2651             // Now write the updated fields back to the lock structure.
2652             //
2653             // Make certain that "polls" is written before "mask" !!!
2654             //
2655             // If another thread picks up the new value of mask and the old
2656             // polls pointer , it could access memory beyond the end of the
2657             // old polling area.
2658             //
2659             // On x86, we need memory fences.
2660             //
2661             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2662               ticket, lck, num_polls));
2663 
2664             lck->lk.old_polls = old_polls;              // non-volatile store
2665             lck->lk.polls = polls;                      // volatile store
2666 
2667             KMP_MB();
2668 
2669             lck->lk.num_polls = num_polls;              // non-volatile store
2670             lck->lk.mask = mask;                        // volatile store
2671 
2672             KMP_MB();
2673 
2674             //
2675             // Only after the new polling area and mask have been flushed
2676             // to main memory can we update the cleanup ticket field.
2677             //
2678             // volatile load / non-volatile store
2679             //
2680             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2681         }
2682     }
2683 }
2684 
2685 void
2686 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2687 {
2688     __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2689 }
2690 
2691 static void
2692 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2693 {
2694     if ( __kmp_env_consistency_check ) {
2695         char const * const func = "omp_set_lock";
2696         if ( lck->lk.initialized != lck ) {
2697             KMP_FATAL( LockIsUninitialized, func );
2698         }
2699         if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2700             KMP_FATAL( LockNestableUsedAsSimple, func );
2701         }
2702         if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2703             KMP_FATAL( LockIsAlreadyOwned, func );
2704         }
2705     }
2706 
2707     __kmp_acquire_drdpa_lock( lck, gtid );
2708 
2709     if ( __kmp_env_consistency_check ) {
2710         lck->lk.owner_id = gtid + 1;
2711     }
2712 }
2713 
2714 int
2715 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2716 {
2717     //
2718     // First get a ticket, then read the polls pointer and the mask.
2719     // The polls pointer must be read before the mask!!! (See above)
2720     //
2721     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2722     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2723       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2724       TCR_PTR(lck->lk.polls);                           // volatile load
2725     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2726     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2727         kmp_uint64 next_ticket = ticket + 1;
2728         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2729           ticket, next_ticket)) {
2730             KMP_FSYNC_ACQUIRED(lck);
2731             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2732                ticket, lck));
2733             lck->lk.now_serving = ticket;               // non-volatile store
2734 
2735             //
2736             // Since no threads are waiting, there is no possibility that
2737             // we would want to reconfigure the polling area.  We might
2738             // have the cleanup ticket value (which says that it is now
2739             // safe to deallocate old_polls), but we'll let a later thread
2740             // which calls __kmp_acquire_lock do that - this routine
2741             // isn't supposed to block, and we would risk blocks if we
2742             // called __kmp_free() to do the deallocation.
2743             //
2744             return TRUE;
2745         }
2746     }
2747     return FALSE;
2748 }
2749 
2750 static int
2751 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2752 {
2753     if ( __kmp_env_consistency_check ) {
2754         char const * const func = "omp_test_lock";
2755         if ( lck->lk.initialized != lck ) {
2756             KMP_FATAL( LockIsUninitialized, func );
2757         }
2758         if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2759             KMP_FATAL( LockNestableUsedAsSimple, func );
2760         }
2761     }
2762 
2763     int retval = __kmp_test_drdpa_lock( lck, gtid );
2764 
2765     if ( __kmp_env_consistency_check && retval ) {
2766         lck->lk.owner_id = gtid + 1;
2767     }
2768     return retval;
2769 }
2770 
2771 void
2772 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2773 {
2774     //
2775     // Read the ticket value from the lock data struct, then the polls
2776     // pointer and the mask.  The polls pointer must be read before the
2777     // mask!!! (See above)
2778     //
2779     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2780     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2781       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2782       TCR_PTR(lck->lk.polls);                           // volatile load
2783     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2784     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2785        ticket - 1, lck));
2786     KMP_FSYNC_RELEASING(lck);
2787     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2788 }
2789 
2790 static void
2791 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2792 {
2793     if ( __kmp_env_consistency_check ) {
2794         char const * const func = "omp_unset_lock";
2795         KMP_MB();  /* in case another processor initialized lock */
2796         if ( lck->lk.initialized != lck ) {
2797             KMP_FATAL( LockIsUninitialized, func );
2798         }
2799         if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2800             KMP_FATAL( LockNestableUsedAsSimple, func );
2801         }
2802         if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2803             KMP_FATAL( LockUnsettingFree, func );
2804         }
2805         if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2806           && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2807             KMP_FATAL( LockUnsettingSetByAnother, func );
2808         }
2809         lck->lk.owner_id = 0;
2810     }
2811     __kmp_release_drdpa_lock( lck, gtid );
2812 }
2813 
2814 void
2815 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2816 {
2817     lck->lk.location = NULL;
2818     lck->lk.mask = 0;
2819     lck->lk.num_polls = 1;
2820     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2821       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2822     lck->lk.cleanup_ticket = 0;
2823     lck->lk.old_polls = NULL;
2824     lck->lk.next_ticket = 0;
2825     lck->lk.now_serving = 0;
2826     lck->lk.owner_id = 0;      // no thread owns the lock.
2827     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2828     lck->lk.initialized = lck;
2829 
2830     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2831 }
2832 
2833 static void
2834 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2835 {
2836     __kmp_init_drdpa_lock( lck );
2837 }
2838 
2839 void
2840 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2841 {
2842     lck->lk.initialized = NULL;
2843     lck->lk.location    = NULL;
2844     if (lck->lk.polls != NULL) {
2845         __kmp_free((void *)lck->lk.polls);
2846         lck->lk.polls = NULL;
2847     }
2848     if (lck->lk.old_polls != NULL) {
2849         __kmp_free((void *)lck->lk.old_polls);
2850         lck->lk.old_polls = NULL;
2851     }
2852     lck->lk.mask = 0;
2853     lck->lk.num_polls = 0;
2854     lck->lk.cleanup_ticket = 0;
2855     lck->lk.next_ticket = 0;
2856     lck->lk.now_serving = 0;
2857     lck->lk.owner_id = 0;
2858     lck->lk.depth_locked = -1;
2859 }
2860 
2861 static void
2862 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2863 {
2864     if ( __kmp_env_consistency_check ) {
2865         char const * const func = "omp_destroy_lock";
2866         if ( lck->lk.initialized != lck ) {
2867             KMP_FATAL( LockIsUninitialized, func );
2868         }
2869         if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2870             KMP_FATAL( LockNestableUsedAsSimple, func );
2871         }
2872         if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2873             KMP_FATAL( LockStillOwned, func );
2874         }
2875     }
2876     __kmp_destroy_drdpa_lock( lck );
2877 }
2878 
2879 
2880 //
2881 // nested drdpa ticket locks
2882 //
2883 
2884 void
2885 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2886 {
2887     KMP_DEBUG_ASSERT( gtid >= 0 );
2888 
2889     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2890         lck->lk.depth_locked += 1;
2891     }
2892     else {
2893         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2894         KMP_MB();
2895         lck->lk.depth_locked = 1;
2896         KMP_MB();
2897         lck->lk.owner_id = gtid + 1;
2898     }
2899 }
2900 
2901 static void
2902 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2903 {
2904     if ( __kmp_env_consistency_check ) {
2905         char const * const func = "omp_set_nest_lock";
2906         if ( lck->lk.initialized != lck ) {
2907             KMP_FATAL( LockIsUninitialized, func );
2908         }
2909         if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2910             KMP_FATAL( LockSimpleUsedAsNestable, func );
2911         }
2912     }
2913     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2914 }
2915 
2916 int
2917 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2918 {
2919     int retval;
2920 
2921     KMP_DEBUG_ASSERT( gtid >= 0 );
2922 
2923     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2924         retval = ++lck->lk.depth_locked;
2925     }
2926     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2927         retval = 0;
2928     }
2929     else {
2930         KMP_MB();
2931         retval = lck->lk.depth_locked = 1;
2932         KMP_MB();
2933         lck->lk.owner_id = gtid + 1;
2934     }
2935     return retval;
2936 }
2937 
2938 static int
2939 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2940 {
2941     if ( __kmp_env_consistency_check ) {
2942         char const * const func = "omp_test_nest_lock";
2943         if ( lck->lk.initialized != lck ) {
2944             KMP_FATAL( LockIsUninitialized, func );
2945         }
2946         if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2947             KMP_FATAL( LockSimpleUsedAsNestable, func );
2948         }
2949     }
2950     return __kmp_test_nested_drdpa_lock( lck, gtid );
2951 }
2952 
2953 void
2954 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2955 {
2956     KMP_DEBUG_ASSERT( gtid >= 0 );
2957 
2958     KMP_MB();
2959     if ( --(lck->lk.depth_locked) == 0 ) {
2960         KMP_MB();
2961         lck->lk.owner_id = 0;
2962         __kmp_release_drdpa_lock( lck, gtid );
2963     }
2964 }
2965 
2966 static void
2967 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2968 {
2969     if ( __kmp_env_consistency_check ) {
2970         char const * const func = "omp_unset_nest_lock";
2971         KMP_MB();  /* in case another processor initialized lock */
2972         if ( lck->lk.initialized != lck ) {
2973             KMP_FATAL( LockIsUninitialized, func );
2974         }
2975         if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2976             KMP_FATAL( LockSimpleUsedAsNestable, func );
2977         }
2978         if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2979             KMP_FATAL( LockUnsettingFree, func );
2980         }
2981         if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2982             KMP_FATAL( LockUnsettingSetByAnother, func );
2983         }
2984     }
2985     __kmp_release_nested_drdpa_lock( lck, gtid );
2986 }
2987 
2988 void
2989 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2990 {
2991     __kmp_init_drdpa_lock( lck );
2992     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2993 }
2994 
2995 static void
2996 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2997 {
2998     __kmp_init_nested_drdpa_lock( lck );
2999 }
3000 
3001 void
3002 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
3003 {
3004     __kmp_destroy_drdpa_lock( lck );
3005     lck->lk.depth_locked = 0;
3006 }
3007 
3008 static void
3009 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
3010 {
3011     if ( __kmp_env_consistency_check ) {
3012         char const * const func = "omp_destroy_nest_lock";
3013         if ( lck->lk.initialized != lck ) {
3014             KMP_FATAL( LockIsUninitialized, func );
3015         }
3016         if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3017             KMP_FATAL( LockSimpleUsedAsNestable, func );
3018         }
3019         if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3020             KMP_FATAL( LockStillOwned, func );
3021         }
3022     }
3023     __kmp_destroy_nested_drdpa_lock( lck );
3024 }
3025 
3026 
3027 //
3028 // access functions to fields which don't exist for all lock kinds.
3029 //
3030 
3031 static int
3032 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3033 {
3034     return lck == lck->lk.initialized;
3035 }
3036 
3037 static const ident_t *
3038 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3039 {
3040     return lck->lk.location;
3041 }
3042 
3043 static void
3044 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3045 {
3046     lck->lk.location = loc;
3047 }
3048 
3049 static kmp_lock_flags_t
3050 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3051 {
3052     return lck->lk.flags;
3053 }
3054 
3055 static void
3056 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3057 {
3058     lck->lk.flags = flags;
3059 }
3060 
3061 /* ------------------------------------------------------------------------ */
3062 /* user locks
3063  *
3064  * They are implemented as a table of function pointers which are set to the
3065  * lock functions of the appropriate kind, once that has been determined.
3066  */
3067 
3068 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3069 
3070 size_t __kmp_base_user_lock_size = 0;
3071 size_t __kmp_user_lock_size = 0;
3072 
3073 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3074 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3075 
3076 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3077 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3078 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3079 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3080 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3081 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3082 
3083 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3084 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3085 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3086 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3087 
3088 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3089 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3090 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3091 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3092 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3093 
3094 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3095 {
3096     switch ( user_lock_kind ) {
3097         case lk_default:
3098         default:
3099         KMP_ASSERT( 0 );
3100 
3101         case lk_tas: {
3102             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3103             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3104 
3105             __kmp_get_user_lock_owner_ =
3106               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3107               ( &__kmp_get_tas_lock_owner );
3108 
3109             __kmp_acquire_user_lock_with_checks_ =
3110               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3111               ( &__kmp_acquire_tas_lock_with_checks );
3112 
3113             __kmp_test_user_lock_with_checks_ =
3114               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3115               ( &__kmp_test_tas_lock_with_checks );
3116 
3117             __kmp_release_user_lock_with_checks_ =
3118               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3119               ( &__kmp_release_tas_lock_with_checks );
3120 
3121             __kmp_init_user_lock_with_checks_ =
3122               ( void ( * )( kmp_user_lock_p ) )
3123               ( &__kmp_init_tas_lock_with_checks );
3124 
3125             __kmp_destroy_user_lock_ =
3126               ( void ( * )( kmp_user_lock_p ) )
3127               ( &__kmp_destroy_tas_lock );
3128 
3129             __kmp_destroy_user_lock_with_checks_ =
3130               ( void ( * )( kmp_user_lock_p ) )
3131               ( &__kmp_destroy_tas_lock_with_checks );
3132 
3133             __kmp_acquire_nested_user_lock_with_checks_ =
3134               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3135               ( &__kmp_acquire_nested_tas_lock_with_checks );
3136 
3137             __kmp_test_nested_user_lock_with_checks_ =
3138               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3139               ( &__kmp_test_nested_tas_lock_with_checks );
3140 
3141             __kmp_release_nested_user_lock_with_checks_ =
3142               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3143               ( &__kmp_release_nested_tas_lock_with_checks );
3144 
3145             __kmp_init_nested_user_lock_with_checks_ =
3146               ( void ( * )( kmp_user_lock_p ) )
3147               ( &__kmp_init_nested_tas_lock_with_checks );
3148 
3149             __kmp_destroy_nested_user_lock_with_checks_ =
3150               ( void ( * )( kmp_user_lock_p ) )
3151               ( &__kmp_destroy_nested_tas_lock_with_checks );
3152 
3153              __kmp_is_user_lock_initialized_ =
3154                ( int ( * )( kmp_user_lock_p ) ) NULL;
3155 
3156              __kmp_get_user_lock_location_ =
3157                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3158 
3159              __kmp_set_user_lock_location_ =
3160                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3161 
3162              __kmp_get_user_lock_flags_ =
3163                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3164 
3165              __kmp_set_user_lock_flags_ =
3166                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3167         }
3168         break;
3169 
3170 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3171 
3172         case lk_futex: {
3173             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3174             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3175 
3176             __kmp_get_user_lock_owner_ =
3177               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3178               ( &__kmp_get_futex_lock_owner );
3179 
3180             __kmp_acquire_user_lock_with_checks_ =
3181               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3182               ( &__kmp_acquire_futex_lock_with_checks );
3183 
3184             __kmp_test_user_lock_with_checks_ =
3185               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3186               ( &__kmp_test_futex_lock_with_checks );
3187 
3188             __kmp_release_user_lock_with_checks_ =
3189               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3190               ( &__kmp_release_futex_lock_with_checks );
3191 
3192             __kmp_init_user_lock_with_checks_ =
3193               ( void ( * )( kmp_user_lock_p ) )
3194               ( &__kmp_init_futex_lock_with_checks );
3195 
3196             __kmp_destroy_user_lock_ =
3197               ( void ( * )( kmp_user_lock_p ) )
3198               ( &__kmp_destroy_futex_lock );
3199 
3200             __kmp_destroy_user_lock_with_checks_ =
3201               ( void ( * )( kmp_user_lock_p ) )
3202               ( &__kmp_destroy_futex_lock_with_checks );
3203 
3204             __kmp_acquire_nested_user_lock_with_checks_ =
3205               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3206               ( &__kmp_acquire_nested_futex_lock_with_checks );
3207 
3208             __kmp_test_nested_user_lock_with_checks_ =
3209               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3210               ( &__kmp_test_nested_futex_lock_with_checks );
3211 
3212             __kmp_release_nested_user_lock_with_checks_ =
3213               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3214               ( &__kmp_release_nested_futex_lock_with_checks );
3215 
3216             __kmp_init_nested_user_lock_with_checks_ =
3217               ( void ( * )( kmp_user_lock_p ) )
3218               ( &__kmp_init_nested_futex_lock_with_checks );
3219 
3220             __kmp_destroy_nested_user_lock_with_checks_ =
3221               ( void ( * )( kmp_user_lock_p ) )
3222               ( &__kmp_destroy_nested_futex_lock_with_checks );
3223 
3224              __kmp_is_user_lock_initialized_ =
3225                ( int ( * )( kmp_user_lock_p ) ) NULL;
3226 
3227              __kmp_get_user_lock_location_ =
3228                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3229 
3230              __kmp_set_user_lock_location_ =
3231                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3232 
3233              __kmp_get_user_lock_flags_ =
3234                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3235 
3236              __kmp_set_user_lock_flags_ =
3237                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3238         }
3239         break;
3240 
3241 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3242 
3243         case lk_ticket: {
3244             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3245             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3246 
3247             __kmp_get_user_lock_owner_ =
3248               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3249               ( &__kmp_get_ticket_lock_owner );
3250 
3251             __kmp_acquire_user_lock_with_checks_ =
3252               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3253               ( &__kmp_acquire_ticket_lock_with_checks );
3254 
3255             __kmp_test_user_lock_with_checks_ =
3256               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3257               ( &__kmp_test_ticket_lock_with_checks );
3258 
3259             __kmp_release_user_lock_with_checks_ =
3260               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3261               ( &__kmp_release_ticket_lock_with_checks );
3262 
3263             __kmp_init_user_lock_with_checks_ =
3264               ( void ( * )( kmp_user_lock_p ) )
3265               ( &__kmp_init_ticket_lock_with_checks );
3266 
3267             __kmp_destroy_user_lock_ =
3268               ( void ( * )( kmp_user_lock_p ) )
3269               ( &__kmp_destroy_ticket_lock );
3270 
3271             __kmp_destroy_user_lock_with_checks_ =
3272               ( void ( * )( kmp_user_lock_p ) )
3273               ( &__kmp_destroy_ticket_lock_with_checks );
3274 
3275             __kmp_acquire_nested_user_lock_with_checks_ =
3276               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3277               ( &__kmp_acquire_nested_ticket_lock_with_checks );
3278 
3279             __kmp_test_nested_user_lock_with_checks_ =
3280               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3281               ( &__kmp_test_nested_ticket_lock_with_checks );
3282 
3283             __kmp_release_nested_user_lock_with_checks_ =
3284               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3285               ( &__kmp_release_nested_ticket_lock_with_checks );
3286 
3287             __kmp_init_nested_user_lock_with_checks_ =
3288               ( void ( * )( kmp_user_lock_p ) )
3289               ( &__kmp_init_nested_ticket_lock_with_checks );
3290 
3291             __kmp_destroy_nested_user_lock_with_checks_ =
3292               ( void ( * )( kmp_user_lock_p ) )
3293               ( &__kmp_destroy_nested_ticket_lock_with_checks );
3294 
3295              __kmp_is_user_lock_initialized_ =
3296                ( int ( * )( kmp_user_lock_p ) )
3297                ( &__kmp_is_ticket_lock_initialized );
3298 
3299              __kmp_get_user_lock_location_ =
3300                ( const ident_t * ( * )( kmp_user_lock_p ) )
3301                ( &__kmp_get_ticket_lock_location );
3302 
3303              __kmp_set_user_lock_location_ =
3304                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3305                ( &__kmp_set_ticket_lock_location );
3306 
3307              __kmp_get_user_lock_flags_ =
3308                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3309                ( &__kmp_get_ticket_lock_flags );
3310 
3311              __kmp_set_user_lock_flags_ =
3312                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3313                ( &__kmp_set_ticket_lock_flags );
3314         }
3315         break;
3316 
3317         case lk_queuing: {
3318             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3319             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3320 
3321             __kmp_get_user_lock_owner_ =
3322               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3323               ( &__kmp_get_queuing_lock_owner );
3324 
3325             __kmp_acquire_user_lock_with_checks_ =
3326               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3327               ( &__kmp_acquire_queuing_lock_with_checks );
3328 
3329             __kmp_test_user_lock_with_checks_ =
3330               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3331               ( &__kmp_test_queuing_lock_with_checks );
3332 
3333             __kmp_release_user_lock_with_checks_ =
3334               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3335               ( &__kmp_release_queuing_lock_with_checks );
3336 
3337             __kmp_init_user_lock_with_checks_ =
3338               ( void ( * )( kmp_user_lock_p ) )
3339               ( &__kmp_init_queuing_lock_with_checks );
3340 
3341             __kmp_destroy_user_lock_ =
3342               ( void ( * )( kmp_user_lock_p ) )
3343               ( &__kmp_destroy_queuing_lock );
3344 
3345             __kmp_destroy_user_lock_with_checks_ =
3346               ( void ( * )( kmp_user_lock_p ) )
3347               ( &__kmp_destroy_queuing_lock_with_checks );
3348 
3349             __kmp_acquire_nested_user_lock_with_checks_ =
3350               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3351               ( &__kmp_acquire_nested_queuing_lock_with_checks );
3352 
3353             __kmp_test_nested_user_lock_with_checks_ =
3354               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3355               ( &__kmp_test_nested_queuing_lock_with_checks );
3356 
3357             __kmp_release_nested_user_lock_with_checks_ =
3358               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3359               ( &__kmp_release_nested_queuing_lock_with_checks );
3360 
3361             __kmp_init_nested_user_lock_with_checks_ =
3362               ( void ( * )( kmp_user_lock_p ) )
3363               ( &__kmp_init_nested_queuing_lock_with_checks );
3364 
3365             __kmp_destroy_nested_user_lock_with_checks_ =
3366               ( void ( * )( kmp_user_lock_p ) )
3367               ( &__kmp_destroy_nested_queuing_lock_with_checks );
3368 
3369              __kmp_is_user_lock_initialized_ =
3370                ( int ( * )( kmp_user_lock_p ) )
3371                ( &__kmp_is_queuing_lock_initialized );
3372 
3373              __kmp_get_user_lock_location_ =
3374                ( const ident_t * ( * )( kmp_user_lock_p ) )
3375                ( &__kmp_get_queuing_lock_location );
3376 
3377              __kmp_set_user_lock_location_ =
3378                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3379                ( &__kmp_set_queuing_lock_location );
3380 
3381              __kmp_get_user_lock_flags_ =
3382                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3383                ( &__kmp_get_queuing_lock_flags );
3384 
3385              __kmp_set_user_lock_flags_ =
3386                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3387                ( &__kmp_set_queuing_lock_flags );
3388         }
3389         break;
3390 
3391 #if KMP_USE_ADAPTIVE_LOCKS
3392         case lk_adaptive: {
3393             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3394             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3395 
3396             __kmp_get_user_lock_owner_ =
3397               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3398               ( &__kmp_get_queuing_lock_owner );
3399 
3400             __kmp_acquire_user_lock_with_checks_ =
3401               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3402               ( &__kmp_acquire_adaptive_lock_with_checks );
3403 
3404             __kmp_test_user_lock_with_checks_ =
3405               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3406               ( &__kmp_test_adaptive_lock_with_checks );
3407 
3408             __kmp_release_user_lock_with_checks_ =
3409               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3410               ( &__kmp_release_adaptive_lock_with_checks );
3411 
3412             __kmp_init_user_lock_with_checks_ =
3413               ( void ( * )( kmp_user_lock_p ) )
3414               ( &__kmp_init_adaptive_lock_with_checks );
3415 
3416             __kmp_destroy_user_lock_with_checks_ =
3417               ( void ( * )( kmp_user_lock_p ) )
3418               ( &__kmp_destroy_adaptive_lock_with_checks );
3419 
3420             __kmp_destroy_user_lock_ =
3421               ( void ( * )( kmp_user_lock_p ) )
3422               ( &__kmp_destroy_adaptive_lock );
3423 
3424             __kmp_is_user_lock_initialized_ =
3425               ( int ( * )( kmp_user_lock_p ) )
3426               ( &__kmp_is_queuing_lock_initialized );
3427 
3428             __kmp_get_user_lock_location_ =
3429               ( const ident_t * ( * )( kmp_user_lock_p ) )
3430               ( &__kmp_get_queuing_lock_location );
3431 
3432             __kmp_set_user_lock_location_ =
3433               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3434               ( &__kmp_set_queuing_lock_location );
3435 
3436             __kmp_get_user_lock_flags_ =
3437               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3438               ( &__kmp_get_queuing_lock_flags );
3439 
3440             __kmp_set_user_lock_flags_ =
3441               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3442               ( &__kmp_set_queuing_lock_flags );
3443 
3444         }
3445         break;
3446 #endif // KMP_USE_ADAPTIVE_LOCKS
3447 
3448         case lk_drdpa: {
3449             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3450             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3451 
3452             __kmp_get_user_lock_owner_ =
3453               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3454               ( &__kmp_get_drdpa_lock_owner );
3455 
3456             __kmp_acquire_user_lock_with_checks_ =
3457               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3458               ( &__kmp_acquire_drdpa_lock_with_checks );
3459 
3460             __kmp_test_user_lock_with_checks_ =
3461               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3462               ( &__kmp_test_drdpa_lock_with_checks );
3463 
3464             __kmp_release_user_lock_with_checks_ =
3465               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3466               ( &__kmp_release_drdpa_lock_with_checks );
3467 
3468             __kmp_init_user_lock_with_checks_ =
3469               ( void ( * )( kmp_user_lock_p ) )
3470               ( &__kmp_init_drdpa_lock_with_checks );
3471 
3472             __kmp_destroy_user_lock_ =
3473               ( void ( * )( kmp_user_lock_p ) )
3474               ( &__kmp_destroy_drdpa_lock );
3475 
3476             __kmp_destroy_user_lock_with_checks_ =
3477               ( void ( * )( kmp_user_lock_p ) )
3478               ( &__kmp_destroy_drdpa_lock_with_checks );
3479 
3480             __kmp_acquire_nested_user_lock_with_checks_ =
3481               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3482               ( &__kmp_acquire_nested_drdpa_lock_with_checks );
3483 
3484             __kmp_test_nested_user_lock_with_checks_ =
3485               ( int  ( * )( kmp_user_lock_p, kmp_int32 ) )
3486               ( &__kmp_test_nested_drdpa_lock_with_checks );
3487 
3488             __kmp_release_nested_user_lock_with_checks_ =
3489               ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3490               ( &__kmp_release_nested_drdpa_lock_with_checks );
3491 
3492             __kmp_init_nested_user_lock_with_checks_ =
3493               ( void ( * )( kmp_user_lock_p ) )
3494               ( &__kmp_init_nested_drdpa_lock_with_checks );
3495 
3496             __kmp_destroy_nested_user_lock_with_checks_ =
3497               ( void ( * )( kmp_user_lock_p ) )
3498               ( &__kmp_destroy_nested_drdpa_lock_with_checks );
3499 
3500              __kmp_is_user_lock_initialized_ =
3501                ( int ( * )( kmp_user_lock_p ) )
3502                ( &__kmp_is_drdpa_lock_initialized );
3503 
3504              __kmp_get_user_lock_location_ =
3505                ( const ident_t * ( * )( kmp_user_lock_p ) )
3506                ( &__kmp_get_drdpa_lock_location );
3507 
3508              __kmp_set_user_lock_location_ =
3509                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3510                ( &__kmp_set_drdpa_lock_location );
3511 
3512              __kmp_get_user_lock_flags_ =
3513                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3514                ( &__kmp_get_drdpa_lock_flags );
3515 
3516              __kmp_set_user_lock_flags_ =
3517                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3518                ( &__kmp_set_drdpa_lock_flags );
3519         }
3520         break;
3521     }
3522 }
3523 
3524 
3525 // ----------------------------------------------------------------------------
3526 // User lock table & lock allocation
3527 
3528 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3529 kmp_user_lock_p __kmp_lock_pool = NULL;
3530 
3531 // Lock block-allocation support.
3532 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3533 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3534 
3535 static kmp_lock_index_t
3536 __kmp_lock_table_insert( kmp_user_lock_p lck )
3537 {
3538     // Assume that kmp_global_lock is held upon entry/exit.
3539     kmp_lock_index_t index;
3540     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3541         kmp_lock_index_t size;
3542         kmp_user_lock_p *table;
3543         kmp_lock_index_t i;
3544         // Reallocate lock table.
3545         if ( __kmp_user_lock_table.allocated == 0 ) {
3546             size = 1024;
3547         }
3548         else {
3549             size = __kmp_user_lock_table.allocated * 2;
3550         }
3551         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3552         memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3553         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3554             // We cannot free the previos table now, sinse it may be in use by other
3555             // threads. So save the pointer to the previous table in in the first element of the
3556             // new table. All the tables will be organized into a list, and could be freed when
3557             // library shutting down.
3558         __kmp_user_lock_table.table = table;
3559         __kmp_user_lock_table.allocated = size;
3560     }
3561     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3562     index = __kmp_user_lock_table.used;
3563     __kmp_user_lock_table.table[ index ] = lck;
3564     ++ __kmp_user_lock_table.used;
3565     return index;
3566 }
3567 
3568 static kmp_user_lock_p
3569 __kmp_lock_block_allocate()
3570 {
3571     // Assume that kmp_global_lock is held upon entry/exit.
3572     static int last_index = 0;
3573     if ( ( last_index >= __kmp_num_locks_in_block )
3574       || ( __kmp_lock_blocks == NULL ) ) {
3575         // Restart the index.
3576         last_index = 0;
3577         // Need to allocate a new block.
3578         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3579         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3580         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3581         // Set up the new block.
3582         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3583         new_block->next_block = __kmp_lock_blocks;
3584         new_block->locks = (void *)buffer;
3585         // Publish the new block.
3586         KMP_MB();
3587         __kmp_lock_blocks = new_block;
3588     }
3589     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3590       [ last_index * __kmp_user_lock_size ] ) );
3591     last_index++;
3592     return ret;
3593 }
3594 
3595 //
3596 // Get memory for a lock. It may be freshly allocated memory or reused memory
3597 // from lock pool.
3598 //
3599 kmp_user_lock_p
3600 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3601   kmp_lock_flags_t flags )
3602 {
3603     kmp_user_lock_p lck;
3604     kmp_lock_index_t index;
3605     KMP_DEBUG_ASSERT( user_lock );
3606 
3607     __kmp_acquire_lock( &__kmp_global_lock, gtid );
3608 
3609     if ( __kmp_lock_pool == NULL ) {
3610         // Lock pool is empty. Allocate new memory.
3611         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3612             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3613         }
3614         else {
3615             lck = __kmp_lock_block_allocate();
3616         }
3617 
3618         // Insert lock in the table so that it can be freed in __kmp_cleanup,
3619         // and debugger has info on all allocated locks.
3620         index = __kmp_lock_table_insert( lck );
3621     }
3622     else {
3623         // Pick up lock from pool.
3624         lck = __kmp_lock_pool;
3625         index = __kmp_lock_pool->pool.index;
3626         __kmp_lock_pool = __kmp_lock_pool->pool.next;
3627     }
3628 
3629     //
3630     // We could potentially differentiate between nested and regular locks
3631     // here, and do the lock table lookup for regular locks only.
3632     //
3633     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3634         * ( (kmp_lock_index_t *) user_lock ) = index;
3635     }
3636     else {
3637         * ( (kmp_user_lock_p *) user_lock ) = lck;
3638     }
3639 
3640     // mark the lock if it is critical section lock.
3641     __kmp_set_user_lock_flags( lck, flags );
3642 
3643     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3644 
3645     return lck;
3646 }
3647 
3648 // Put lock's memory to pool for reusing.
3649 void
3650 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3651 {
3652     kmp_lock_pool_t * lock_pool;
3653 
3654     KMP_DEBUG_ASSERT( user_lock != NULL );
3655     KMP_DEBUG_ASSERT( lck != NULL );
3656 
3657     __kmp_acquire_lock( & __kmp_global_lock, gtid );
3658 
3659     lck->pool.next = __kmp_lock_pool;
3660     __kmp_lock_pool = lck;
3661     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3662         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3663         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3664         lck->pool.index = index;
3665     }
3666 
3667     __kmp_release_lock( & __kmp_global_lock, gtid );
3668 }
3669 
3670 kmp_user_lock_p
3671 __kmp_lookup_user_lock( void **user_lock, char const *func )
3672 {
3673     kmp_user_lock_p lck = NULL;
3674 
3675     if ( __kmp_env_consistency_check ) {
3676         if ( user_lock == NULL ) {
3677             KMP_FATAL( LockIsUninitialized, func );
3678         }
3679     }
3680 
3681     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3682         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3683         if ( __kmp_env_consistency_check ) {
3684             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3685                 KMP_FATAL( LockIsUninitialized, func );
3686             }
3687         }
3688         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3689         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3690         lck = __kmp_user_lock_table.table[index];
3691     }
3692     else {
3693         lck = *( (kmp_user_lock_p *)user_lock );
3694     }
3695 
3696     if ( __kmp_env_consistency_check ) {
3697         if ( lck == NULL ) {
3698             KMP_FATAL( LockIsUninitialized, func );
3699         }
3700     }
3701 
3702     return lck;
3703 }
3704 
3705 void
3706 __kmp_cleanup_user_locks( void )
3707 {
3708     //
3709     // Reset lock pool. Do not worry about lock in the pool -- we will free
3710     // them when iterating through lock table (it includes all the locks,
3711     // dead or alive).
3712     //
3713     __kmp_lock_pool = NULL;
3714 
3715 #define IS_CRITICAL(lck) \
3716         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3717         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3718 
3719     //
3720     // Loop through lock table, free all locks.
3721     //
3722     // Do not free item [0], it is reserved for lock tables list.
3723     //
3724     // FIXME - we are iterating through a list of (pointers to) objects of
3725     // type union kmp_user_lock, but we have no way of knowing whether the
3726     // base type is currently "pool" or whatever the global user lock type
3727     // is.
3728     //
3729     // We are relying on the fact that for all of the user lock types
3730     // (except "tas"), the first field in the lock struct is the "initialized"
3731     // field, which is set to the address of the lock object itself when
3732     // the lock is initialized.  When the union is of type "pool", the
3733     // first field is a pointer to the next object in the free list, which
3734     // will not be the same address as the object itself.
3735     //
3736     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3737     // will fail for "pool" objects on the free list.  This must happen as
3738     // the "location" field of real user locks overlaps the "index" field
3739     // of "pool" objects.
3740     //
3741     // It would be better to run through the free list, and remove all "pool"
3742     // objects from the lock table before executing this loop.  However,
3743     // "pool" objects do not always have their index field set (only on
3744     // lin_32e), and I don't want to search the lock table for the address
3745     // of every "pool" object on the free list.
3746     //
3747     while ( __kmp_user_lock_table.used > 1 ) {
3748         const ident *loc;
3749 
3750         //
3751         // reduce __kmp_user_lock_table.used before freeing the lock,
3752         // so that state of locks is consistent
3753         //
3754         kmp_user_lock_p lck = __kmp_user_lock_table.table[
3755           --__kmp_user_lock_table.used ];
3756 
3757         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3758           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3759             //
3760             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3761             // initialized AND it is NOT a critical section (user is not
3762             // responsible for destroying criticals) AND we know source
3763             // location to report.
3764             //
3765             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3766               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3767               ( loc->psource != NULL ) ) {
3768                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3769                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3770                   str_loc.line, str_loc.col );
3771                 __kmp_str_loc_free( &str_loc);
3772             }
3773 
3774 #ifdef KMP_DEBUG
3775             if ( IS_CRITICAL( lck ) ) {
3776                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3777             }
3778             else {
3779                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3780             }
3781 #endif // KMP_DEBUG
3782 
3783             //
3784             // Cleanup internal lock dynamic resources
3785             // (for drdpa locks particularly).
3786             //
3787             __kmp_destroy_user_lock( lck );
3788         }
3789 
3790         //
3791         // Free the lock if block allocation of locks is not used.
3792         //
3793         if ( __kmp_lock_blocks == NULL ) {
3794             __kmp_free( lck );
3795         }
3796     }
3797 
3798 #undef IS_CRITICAL
3799 
3800     //
3801     // delete lock table(s).
3802     //
3803     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3804     __kmp_user_lock_table.table = NULL;
3805     __kmp_user_lock_table.allocated = 0;
3806 
3807     while ( table_ptr != NULL ) {
3808         //
3809         // In the first element we saved the pointer to the previous
3810         // (smaller) lock table.
3811         //
3812         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3813         __kmp_free( table_ptr );
3814         table_ptr = next;
3815     }
3816 
3817     //
3818     // Free buffers allocated for blocks of locks.
3819     //
3820     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3821     __kmp_lock_blocks = NULL;
3822 
3823     while ( block_ptr != NULL ) {
3824         kmp_block_of_locks_t *next = block_ptr->next_block;
3825         __kmp_free( block_ptr->locks );
3826         //
3827         // *block_ptr itself was allocated at the end of the locks vector.
3828         //
3829 	block_ptr = next;
3830     }
3831 
3832     TCW_4(__kmp_init_user_locks, FALSE);
3833 }
3834 
3835