1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 
18 #include "kmp.h"
19 #include "kmp_itt.h"
20 #include "kmp_i18n.h"
21 #include "kmp_lock.h"
22 #include "kmp_io.h"
23 
24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
25 # include <unistd.h>
26 # include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on different
28 // Linux* OS distributions that either require that you include (or break when you try to include)
29 // <pci/types.h>.
30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31 // we just define the constants here and don't include <futex.h>
32 # ifndef FUTEX_WAIT
33 #  define FUTEX_WAIT    0
34 # endif
35 # ifndef FUTEX_WAKE
36 #  define FUTEX_WAKE    1
37 # endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void
44 __kmp_validate_locks( void )
45 {
46     int i;
47     kmp_uint32  x, y;
48 
49     /* Check to make sure unsigned arithmetic does wraps properly */
50     x = ~((kmp_uint32) 0) - 2;
51     y = x - 2;
52 
53     for (i = 0; i < 8; ++i, ++x, ++y) {
54         kmp_uint32 z = (x - y);
55         KMP_ASSERT( z == 2 );
56     }
57 
58     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59 }
60 
61 
62 /* ------------------------------------------------------------------------ */
63 /* test and set locks */
64 
65 //
66 // For the non-nested locks, we can only assume that the first 4 bytes were
67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70 //
71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73 //
74 
75 static kmp_int32
76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77 {
78     return TCR_4( lck->lk.poll ) - 1;
79 }
80 
81 static inline bool
82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83 {
84     return lck->lk.depth_locked != -1;
85 }
86 
87 __forceinline static void
88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89 {
90     KMP_MB();
91 
92 #ifdef USE_LOCK_PROFILE
93     kmp_uint32 curr = TCR_4( lck->lk.poll );
94     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96     /* else __kmp_printf( "." );*/
97 #endif /* USE_LOCK_PROFILE */
98 
99     if ( ( lck->lk.poll == 0 )
100       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
101         KMP_FSYNC_ACQUIRED(lck);
102         return;
103     }
104 
105     kmp_uint32 spins;
106     KMP_FSYNC_PREPARE( lck );
107     KMP_INIT_YIELD( spins );
108     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109       __kmp_xproc ) ) {
110         KMP_YIELD( TRUE );
111     }
112     else {
113         KMP_YIELD_SPIN( spins );
114     }
115 
116     while ( ( lck->lk.poll != 0 ) ||
117       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
118         //
119         // FIXME - use exponential backoff here
120         //
121         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122           __kmp_xproc ) ) {
123             KMP_YIELD( TRUE );
124         }
125         else {
126             KMP_YIELD_SPIN( spins );
127         }
128     }
129     KMP_FSYNC_ACQUIRED( lck );
130 }
131 
132 void
133 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
134 {
135     __kmp_acquire_tas_lock_timed_template( lck, gtid );
136 }
137 
138 static void
139 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
140 {
141     char const * const func = "omp_set_lock";
142     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
143       && __kmp_is_tas_lock_nestable( lck ) ) {
144         KMP_FATAL( LockNestableUsedAsSimple, func );
145     }
146     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
147         KMP_FATAL( LockIsAlreadyOwned, func );
148     }
149     __kmp_acquire_tas_lock( lck, gtid );
150 }
151 
152 int
153 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
154 {
155     if ( ( lck->lk.poll == 0 )
156       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
157         KMP_FSYNC_ACQUIRED( lck );
158         return TRUE;
159     }
160     return FALSE;
161 }
162 
163 static int
164 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165 {
166     char const * const func = "omp_test_lock";
167     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
168       && __kmp_is_tas_lock_nestable( lck ) ) {
169         KMP_FATAL( LockNestableUsedAsSimple, func );
170     }
171     return __kmp_test_tas_lock( lck, gtid );
172 }
173 
174 void
175 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
176 {
177     KMP_MB();       /* Flush all pending memory write invalidates.  */
178 
179     KMP_FSYNC_RELEASING(lck);
180     KMP_ST_REL32( &(lck->lk.poll), 0 );
181 
182     KMP_MB();       /* Flush all pending memory write invalidates.  */
183 
184     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185       __kmp_xproc ) );
186 }
187 
188 static void
189 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
190 {
191     char const * const func = "omp_unset_lock";
192     KMP_MB();  /* in case another processor initialized lock */
193     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
194       && __kmp_is_tas_lock_nestable( lck ) ) {
195         KMP_FATAL( LockNestableUsedAsSimple, func );
196     }
197     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
198         KMP_FATAL( LockUnsettingFree, func );
199     }
200     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
201       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
202         KMP_FATAL( LockUnsettingSetByAnother, func );
203     }
204     __kmp_release_tas_lock( lck, gtid );
205 }
206 
207 void
208 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
209 {
210     TCW_4( lck->lk.poll, 0 );
211 }
212 
213 static void
214 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
215 {
216     __kmp_init_tas_lock( lck );
217 }
218 
219 void
220 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
221 {
222     lck->lk.poll = 0;
223 }
224 
225 static void
226 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
227 {
228     char const * const func = "omp_destroy_lock";
229     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
230       && __kmp_is_tas_lock_nestable( lck ) ) {
231         KMP_FATAL( LockNestableUsedAsSimple, func );
232     }
233     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
234         KMP_FATAL( LockStillOwned, func );
235     }
236     __kmp_destroy_tas_lock( lck );
237 }
238 
239 
240 //
241 // nested test and set locks
242 //
243 
244 void
245 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
246 {
247     KMP_DEBUG_ASSERT( gtid >= 0 );
248 
249     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
250         lck->lk.depth_locked += 1;
251     }
252     else {
253         __kmp_acquire_tas_lock_timed_template( lck, gtid );
254         lck->lk.depth_locked = 1;
255     }
256 }
257 
258 static void
259 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
260 {
261     char const * const func = "omp_set_nest_lock";
262     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
263         KMP_FATAL( LockSimpleUsedAsNestable, func );
264     }
265     __kmp_acquire_nested_tas_lock( lck, gtid );
266 }
267 
268 int
269 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
270 {
271     int retval;
272 
273     KMP_DEBUG_ASSERT( gtid >= 0 );
274 
275     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
276         retval = ++lck->lk.depth_locked;
277     }
278     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
279         retval = 0;
280     }
281     else {
282         KMP_MB();
283         retval = lck->lk.depth_locked = 1;
284     }
285     return retval;
286 }
287 
288 static int
289 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
290 {
291     char const * const func = "omp_test_nest_lock";
292     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
293         KMP_FATAL( LockSimpleUsedAsNestable, func );
294     }
295     return __kmp_test_nested_tas_lock( lck, gtid );
296 }
297 
298 void
299 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
300 {
301     KMP_DEBUG_ASSERT( gtid >= 0 );
302 
303     KMP_MB();
304     if ( --(lck->lk.depth_locked) == 0 ) {
305         __kmp_release_tas_lock( lck, gtid );
306     }
307 }
308 
309 static void
310 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
311 {
312     char const * const func = "omp_unset_nest_lock";
313     KMP_MB();  /* in case another processor initialized lock */
314     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
315         KMP_FATAL( LockSimpleUsedAsNestable, func );
316     }
317     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
318         KMP_FATAL( LockUnsettingFree, func );
319     }
320     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
321         KMP_FATAL( LockUnsettingSetByAnother, func );
322     }
323     __kmp_release_nested_tas_lock( lck, gtid );
324 }
325 
326 void
327 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
328 {
329     __kmp_init_tas_lock( lck );
330     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
331 }
332 
333 static void
334 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
335 {
336     __kmp_init_nested_tas_lock( lck );
337 }
338 
339 void
340 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
341 {
342     __kmp_destroy_tas_lock( lck );
343     lck->lk.depth_locked = 0;
344 }
345 
346 static void
347 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
348 {
349     char const * const func = "omp_destroy_nest_lock";
350     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
351         KMP_FATAL( LockSimpleUsedAsNestable, func );
352     }
353     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
354         KMP_FATAL( LockStillOwned, func );
355     }
356     __kmp_destroy_nested_tas_lock( lck );
357 }
358 
359 
360 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
361 
362 /* ------------------------------------------------------------------------ */
363 /* futex locks */
364 
365 // futex locks are really just test and set locks, with a different method
366 // of handling contention.  They take the same amount of space as test and
367 // set locks, and are allocated the same way (i.e. use the area allocated by
368 // the compiler for non-nested locks / allocate nested locks on the heap).
369 
370 static kmp_int32
371 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
372 {
373     return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
374 }
375 
376 static inline bool
377 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
378 {
379     return lck->lk.depth_locked != -1;
380 }
381 
382 __forceinline static void
383 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
384 {
385     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
386 
387     KMP_MB();
388 
389 #ifdef USE_LOCK_PROFILE
390     kmp_uint32 curr = TCR_4( lck->lk.poll );
391     if ( ( curr != 0 ) && ( curr != gtid_code ) )
392         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
393     /* else __kmp_printf( "." );*/
394 #endif /* USE_LOCK_PROFILE */
395 
396     KMP_FSYNC_PREPARE( lck );
397     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
398       lck, lck->lk.poll, gtid ) );
399 
400     kmp_int32 poll_val;
401     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
402       gtid_code ) ) != 0 ) {
403         kmp_int32 cond = poll_val & 1;
404         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
405            lck, gtid, poll_val, cond ) );
406 
407         //
408         // NOTE: if you try to use the following condition for this branch
409         //
410         // if ( poll_val & 1 == 0 )
411         //
412         // Then the 12.0 compiler has a bug where the following block will
413         // always be skipped, regardless of the value of the LSB of poll_val.
414         //
415         if ( ! cond ) {
416             //
417             // Try to set the lsb in the poll to indicate to the owner
418             // thread that they need to wake this thread up.
419             //
420             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
421               poll_val, poll_val | 1 ) ) {
422                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
423                   lck, lck->lk.poll, gtid ) );
424                 continue;
425             }
426             poll_val |= 1;
427 
428             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
429               lck, lck->lk.poll, gtid ) );
430         }
431 
432         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
433            lck, gtid, poll_val ) );
434 
435         kmp_int32 rc;
436         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
437           poll_val, NULL, NULL, 0 ) ) != 0 ) {
438             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
439                lck, gtid, poll_val, rc, errno ) );
440             continue;
441         }
442 
443         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
444            lck, gtid, poll_val ) );
445         //
446         // This thread has now done a successful futex wait call and was
447         // entered on the OS futex queue.  We must now perform a futex
448         // wake call when releasing the lock, as we have no idea how many
449         // other threads are in the queue.
450         //
451         gtid_code |= 1;
452     }
453 
454     KMP_FSYNC_ACQUIRED( lck );
455     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
456       lck, lck->lk.poll, gtid ) );
457 }
458 
459 void
460 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
461 {
462     __kmp_acquire_futex_lock_timed_template( lck, gtid );
463 }
464 
465 static void
466 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
467 {
468     char const * const func = "omp_set_lock";
469     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
470       && __kmp_is_futex_lock_nestable( lck ) ) {
471         KMP_FATAL( LockNestableUsedAsSimple, func );
472     }
473     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
474         KMP_FATAL( LockIsAlreadyOwned, func );
475     }
476     __kmp_acquire_futex_lock( lck, gtid );
477 }
478 
479 int
480 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
481 {
482     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
483         KMP_FSYNC_ACQUIRED( lck );
484         return TRUE;
485     }
486     return FALSE;
487 }
488 
489 static int
490 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
491 {
492     char const * const func = "omp_test_lock";
493     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
494       && __kmp_is_futex_lock_nestable( lck ) ) {
495         KMP_FATAL( LockNestableUsedAsSimple, func );
496     }
497     return __kmp_test_futex_lock( lck, gtid );
498 }
499 
500 void
501 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502 {
503     KMP_MB();       /* Flush all pending memory write invalidates.  */
504 
505     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
506       lck, lck->lk.poll, gtid ) );
507 
508     KMP_FSYNC_RELEASING(lck);
509 
510     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
511 
512     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
513        lck, gtid, poll_val ) );
514 
515     if ( poll_val & 1 ) {
516         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
517            lck, gtid ) );
518         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
519     }
520 
521     KMP_MB();       /* Flush all pending memory write invalidates.  */
522 
523     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
524       lck, lck->lk.poll, gtid ) );
525 
526     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
527       __kmp_xproc ) );
528 }
529 
530 static void
531 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
532 {
533     char const * const func = "omp_unset_lock";
534     KMP_MB();  /* in case another processor initialized lock */
535     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
536       && __kmp_is_futex_lock_nestable( lck ) ) {
537         KMP_FATAL( LockNestableUsedAsSimple, func );
538     }
539     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
540         KMP_FATAL( LockUnsettingFree, func );
541     }
542     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
543       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
544         KMP_FATAL( LockUnsettingSetByAnother, func );
545     }
546     __kmp_release_futex_lock( lck, gtid );
547 }
548 
549 void
550 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
551 {
552     TCW_4( lck->lk.poll, 0 );
553 }
554 
555 static void
556 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
557 {
558     __kmp_init_futex_lock( lck );
559 }
560 
561 void
562 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
563 {
564     lck->lk.poll = 0;
565 }
566 
567 static void
568 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
569 {
570     char const * const func = "omp_destroy_lock";
571     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
572       && __kmp_is_futex_lock_nestable( lck ) ) {
573         KMP_FATAL( LockNestableUsedAsSimple, func );
574     }
575     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
576         KMP_FATAL( LockStillOwned, func );
577     }
578     __kmp_destroy_futex_lock( lck );
579 }
580 
581 
582 //
583 // nested futex locks
584 //
585 
586 void
587 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
588 {
589     KMP_DEBUG_ASSERT( gtid >= 0 );
590 
591     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
592         lck->lk.depth_locked += 1;
593     }
594     else {
595         __kmp_acquire_futex_lock_timed_template( lck, gtid );
596         lck->lk.depth_locked = 1;
597     }
598 }
599 
600 static void
601 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
602 {
603     char const * const func = "omp_set_nest_lock";
604     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
605         KMP_FATAL( LockSimpleUsedAsNestable, func );
606     }
607     __kmp_acquire_nested_futex_lock( lck, gtid );
608 }
609 
610 int
611 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
612 {
613     int retval;
614 
615     KMP_DEBUG_ASSERT( gtid >= 0 );
616 
617     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
618         retval = ++lck->lk.depth_locked;
619     }
620     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
621         retval = 0;
622     }
623     else {
624         KMP_MB();
625         retval = lck->lk.depth_locked = 1;
626     }
627     return retval;
628 }
629 
630 static int
631 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
632 {
633     char const * const func = "omp_test_nest_lock";
634     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
635         KMP_FATAL( LockSimpleUsedAsNestable, func );
636     }
637     return __kmp_test_nested_futex_lock( lck, gtid );
638 }
639 
640 void
641 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
642 {
643     KMP_DEBUG_ASSERT( gtid >= 0 );
644 
645     KMP_MB();
646     if ( --(lck->lk.depth_locked) == 0 ) {
647         __kmp_release_futex_lock( lck, gtid );
648     }
649 }
650 
651 static void
652 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654     char const * const func = "omp_unset_nest_lock";
655     KMP_MB();  /* in case another processor initialized lock */
656     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
657         KMP_FATAL( LockSimpleUsedAsNestable, func );
658     }
659     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
660         KMP_FATAL( LockUnsettingFree, func );
661     }
662     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
663         KMP_FATAL( LockUnsettingSetByAnother, func );
664     }
665     __kmp_release_nested_futex_lock( lck, gtid );
666 }
667 
668 void
669 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
670 {
671     __kmp_init_futex_lock( lck );
672     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
673 }
674 
675 static void
676 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
677 {
678     __kmp_init_nested_futex_lock( lck );
679 }
680 
681 void
682 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
683 {
684     __kmp_destroy_futex_lock( lck );
685     lck->lk.depth_locked = 0;
686 }
687 
688 static void
689 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
690 {
691     char const * const func = "omp_destroy_nest_lock";
692     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
693         KMP_FATAL( LockSimpleUsedAsNestable, func );
694     }
695     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
696         KMP_FATAL( LockStillOwned, func );
697     }
698     __kmp_destroy_nested_futex_lock( lck );
699 }
700 
701 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
702 
703 
704 /* ------------------------------------------------------------------------ */
705 /* ticket (bakery) locks */
706 
707 static kmp_int32
708 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
709 {
710     return TCR_4( lck->lk.owner_id ) - 1;
711 }
712 
713 static inline bool
714 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
715 {
716     return lck->lk.depth_locked != -1;
717 }
718 
719 static kmp_uint32
720 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
721 {
722     register kmp_uint32 pause;
723 
724     if (value == checker) {
725         return TRUE;
726     }
727     for (pause = checker - value; pause != 0; --pause);
728     return FALSE;
729 }
730 
731 __forceinline static void
732 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
733 {
734     kmp_uint32 my_ticket;
735     KMP_MB();
736 
737     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
738 
739 #ifdef USE_LOCK_PROFILE
740     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
741         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
742     /* else __kmp_printf( "." );*/
743 #endif /* USE_LOCK_PROFILE */
744 
745     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
746         KMP_FSYNC_ACQUIRED(lck);
747         return;
748     }
749     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
750     KMP_FSYNC_ACQUIRED(lck);
751 }
752 
753 void
754 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
755 {
756     __kmp_acquire_ticket_lock_timed_template( lck, gtid );
757 }
758 
759 static void
760 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
761 {
762     char const * const func = "omp_set_lock";
763     if ( lck->lk.initialized != lck ) {
764         KMP_FATAL( LockIsUninitialized, func );
765     }
766     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
767         KMP_FATAL( LockNestableUsedAsSimple, func );
768     }
769     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
770         KMP_FATAL( LockIsAlreadyOwned, func );
771     }
772 
773     __kmp_acquire_ticket_lock( lck, gtid );
774 
775     lck->lk.owner_id = gtid + 1;
776 }
777 
778 int
779 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
780 {
781     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
782     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
783         kmp_uint32 next_ticket = my_ticket + 1;
784         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
785           my_ticket, next_ticket ) ) {
786             KMP_FSYNC_ACQUIRED( lck );
787             return TRUE;
788         }
789     }
790     return FALSE;
791 }
792 
793 static int
794 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
795 {
796     char const * const func = "omp_test_lock";
797     if ( lck->lk.initialized != lck ) {
798         KMP_FATAL( LockIsUninitialized, func );
799     }
800     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
801         KMP_FATAL( LockNestableUsedAsSimple, func );
802     }
803 
804     int retval = __kmp_test_ticket_lock( lck, gtid );
805 
806     if ( retval ) {
807         lck->lk.owner_id = gtid + 1;
808     }
809     return retval;
810 }
811 
812 void
813 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814 {
815     kmp_uint32  distance;
816 
817     KMP_MB();       /* Flush all pending memory write invalidates.  */
818 
819     KMP_FSYNC_RELEASING(lck);
820     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
821 
822     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
823 
824     KMP_MB();       /* Flush all pending memory write invalidates.  */
825 
826     KMP_YIELD( distance
827       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
828 }
829 
830 static void
831 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
832 {
833     char const * const func = "omp_unset_lock";
834     KMP_MB();  /* in case another processor initialized lock */
835     if ( lck->lk.initialized != lck ) {
836         KMP_FATAL( LockIsUninitialized, func );
837     }
838     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
839         KMP_FATAL( LockNestableUsedAsSimple, func );
840     }
841     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
842         KMP_FATAL( LockUnsettingFree, func );
843     }
844     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
845       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
846         KMP_FATAL( LockUnsettingSetByAnother, func );
847     }
848     lck->lk.owner_id = 0;
849     __kmp_release_ticket_lock( lck, gtid );
850 }
851 
852 void
853 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
854 {
855     lck->lk.location = NULL;
856     TCW_4( lck->lk.next_ticket, 0 );
857     TCW_4( lck->lk.now_serving, 0 );
858     lck->lk.owner_id = 0;      // no thread owns the lock.
859     lck->lk.depth_locked = -1; // -1 => not a nested lock.
860     lck->lk.initialized = (kmp_ticket_lock *)lck;
861 }
862 
863 static void
864 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
865 {
866     __kmp_init_ticket_lock( lck );
867 }
868 
869 void
870 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
871 {
872     lck->lk.initialized = NULL;
873     lck->lk.location    = NULL;
874     lck->lk.next_ticket = 0;
875     lck->lk.now_serving = 0;
876     lck->lk.owner_id = 0;
877     lck->lk.depth_locked = -1;
878 }
879 
880 static void
881 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
882 {
883     char const * const func = "omp_destroy_lock";
884     if ( lck->lk.initialized != lck ) {
885         KMP_FATAL( LockIsUninitialized, func );
886     }
887     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
888         KMP_FATAL( LockNestableUsedAsSimple, func );
889     }
890     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
891         KMP_FATAL( LockStillOwned, func );
892     }
893     __kmp_destroy_ticket_lock( lck );
894 }
895 
896 
897 //
898 // nested ticket locks
899 //
900 
901 void
902 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
903 {
904     KMP_DEBUG_ASSERT( gtid >= 0 );
905 
906     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
907         lck->lk.depth_locked += 1;
908     }
909     else {
910         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
911         KMP_MB();
912         lck->lk.depth_locked = 1;
913         KMP_MB();
914         lck->lk.owner_id = gtid + 1;
915     }
916 }
917 
918 static void
919 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
920 {
921     char const * const func = "omp_set_nest_lock";
922     if ( lck->lk.initialized != lck ) {
923         KMP_FATAL( LockIsUninitialized, func );
924     }
925     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
926         KMP_FATAL( LockSimpleUsedAsNestable, func );
927     }
928     __kmp_acquire_nested_ticket_lock( lck, gtid );
929 }
930 
931 int
932 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
933 {
934     int retval;
935 
936     KMP_DEBUG_ASSERT( gtid >= 0 );
937 
938     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
939         retval = ++lck->lk.depth_locked;
940     }
941     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
942         retval = 0;
943     }
944     else {
945         KMP_MB();
946         retval = lck->lk.depth_locked = 1;
947         KMP_MB();
948         lck->lk.owner_id = gtid + 1;
949     }
950     return retval;
951 }
952 
953 static int
954 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
955   kmp_int32 gtid )
956 {
957     char const * const func = "omp_test_nest_lock";
958     if ( lck->lk.initialized != lck ) {
959         KMP_FATAL( LockIsUninitialized, func );
960     }
961     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
962         KMP_FATAL( LockSimpleUsedAsNestable, func );
963     }
964     return __kmp_test_nested_ticket_lock( lck, gtid );
965 }
966 
967 void
968 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
969 {
970     KMP_DEBUG_ASSERT( gtid >= 0 );
971 
972     KMP_MB();
973     if ( --(lck->lk.depth_locked) == 0 ) {
974         KMP_MB();
975         lck->lk.owner_id = 0;
976         __kmp_release_ticket_lock( lck, gtid );
977     }
978 }
979 
980 static void
981 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
982 {
983     char const * const func = "omp_unset_nest_lock";
984     KMP_MB();  /* in case another processor initialized lock */
985     if ( lck->lk.initialized != lck ) {
986         KMP_FATAL( LockIsUninitialized, func );
987     }
988     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
989         KMP_FATAL( LockSimpleUsedAsNestable, func );
990     }
991     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
992         KMP_FATAL( LockUnsettingFree, func );
993     }
994     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
995         KMP_FATAL( LockUnsettingSetByAnother, func );
996     }
997     __kmp_release_nested_ticket_lock( lck, gtid );
998 }
999 
1000 void
1001 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1002 {
1003     __kmp_init_ticket_lock( lck );
1004     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1005 }
1006 
1007 static void
1008 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1009 {
1010     __kmp_init_nested_ticket_lock( lck );
1011 }
1012 
1013 void
1014 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1015 {
1016     __kmp_destroy_ticket_lock( lck );
1017     lck->lk.depth_locked = 0;
1018 }
1019 
1020 static void
1021 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1022 {
1023     char const * const func = "omp_destroy_nest_lock";
1024     if ( lck->lk.initialized != lck ) {
1025         KMP_FATAL( LockIsUninitialized, func );
1026     }
1027     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028         KMP_FATAL( LockSimpleUsedAsNestable, func );
1029     }
1030     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1031         KMP_FATAL( LockStillOwned, func );
1032     }
1033     __kmp_destroy_nested_ticket_lock( lck );
1034 }
1035 
1036 
1037 //
1038 // access functions to fields which don't exist for all lock kinds.
1039 //
1040 
1041 static int
1042 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1043 {
1044     return lck == lck->lk.initialized;
1045 }
1046 
1047 static const ident_t *
1048 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1049 {
1050     return lck->lk.location;
1051 }
1052 
1053 static void
1054 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1055 {
1056     lck->lk.location = loc;
1057 }
1058 
1059 static kmp_lock_flags_t
1060 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1061 {
1062     return lck->lk.flags;
1063 }
1064 
1065 static void
1066 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1067 {
1068     lck->lk.flags = flags;
1069 }
1070 
1071 /* ------------------------------------------------------------------------ */
1072 /* queuing locks */
1073 
1074 /*
1075  * First the states
1076  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1077  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1078  *                h, h  means lock is held or about to transition, 1 element on queue
1079  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1080  *
1081  * Now the transitions
1082  *    Acquire(0,0)  = -1 ,0
1083  *    Release(0,0)  = Error
1084  *    Acquire(-1,0) =  h ,h    h > 0
1085  *    Release(-1,0) =  0 ,0
1086  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1087  *    Release(h,h)  = -1 ,0    h > 0
1088  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1089  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1090  *
1091  * And pictorially
1092  *
1093  *
1094  *          +-----+
1095  *          | 0, 0|------- release -------> Error
1096  *          +-----+
1097  *            |  ^
1098  *     acquire|  |release
1099  *            |  |
1100  *            |  |
1101  *            v  |
1102  *          +-----+
1103  *          |-1, 0|
1104  *          +-----+
1105  *            |  ^
1106  *     acquire|  |release
1107  *            |  |
1108  *            |  |
1109  *            v  |
1110  *          +-----+
1111  *          | h, h|
1112  *          +-----+
1113  *            |  ^
1114  *     acquire|  |release
1115  *            |  |
1116  *            |  |
1117  *            v  |
1118  *          +-----+
1119  *          | h, t|----- acquire, release loopback ---+
1120  *          +-----+                                   |
1121  *               ^                                    |
1122  *               |                                    |
1123  *               +------------------------------------+
1124  *
1125  */
1126 
1127 #ifdef DEBUG_QUEUING_LOCKS
1128 
1129 /* Stuff for circular trace buffer */
1130 #define TRACE_BUF_ELE	1024
1131 static char traces[TRACE_BUF_ELE][128] = { 0 }
1132 static int tc = 0;
1133 #define TRACE_LOCK(X,Y)          sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1134 #define TRACE_LOCK_T(X,Y,Z)      sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1135 #define TRACE_LOCK_HT(X,Y,Z,Q)   sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1136 
1137 static void
1138 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1139   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1140 {
1141     kmp_int32 t, i;
1142 
1143     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1144 
1145     i = tc % TRACE_BUF_ELE;
1146     __kmp_printf_no_lock( "%s\n", traces[i] );
1147     i = (i+1) % TRACE_BUF_ELE;
1148     while ( i != (tc % TRACE_BUF_ELE) ) {
1149         __kmp_printf_no_lock( "%s", traces[i] );
1150         i = (i+1) % TRACE_BUF_ELE;
1151     }
1152     __kmp_printf_no_lock( "\n" );
1153 
1154     __kmp_printf_no_lock(
1155              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1156              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1157              head_id, tail_id );
1158 
1159     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1160 
1161     if ( lck->lk.head_id >= 1 ) {
1162         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1163         while (t > 0) {
1164             __kmp_printf_no_lock( "-> %d ", t );
1165             t = __kmp_threads[t-1]->th.th_next_waiting;
1166         }
1167     }
1168     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1169     __kmp_printf_no_lock( "\n\n" );
1170 }
1171 
1172 #endif /* DEBUG_QUEUING_LOCKS */
1173 
1174 static kmp_int32
1175 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1176 {
1177     return TCR_4( lck->lk.owner_id ) - 1;
1178 }
1179 
1180 static inline bool
1181 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1182 {
1183     return lck->lk.depth_locked != -1;
1184 }
1185 
1186 /* Acquire a lock using a the queuing lock implementation */
1187 template <bool takeTime>
1188 /* [TLW] The unused template above is left behind because of what BEB believes is a
1189    potential compiler problem with __forceinline. */
1190 __forceinline static void
1191 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1192   kmp_int32 gtid )
1193 {
1194     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1195     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1196     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1197     volatile kmp_uint32 *spin_here_p;
1198     kmp_int32 need_mf = 1;
1199 
1200     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1201 
1202     KMP_FSYNC_PREPARE( lck );
1203     KMP_DEBUG_ASSERT( this_thr != NULL );
1204     spin_here_p = & this_thr->th.th_spin_here;
1205 
1206 #ifdef DEBUG_QUEUING_LOCKS
1207     TRACE_LOCK( gtid+1, "acq ent" );
1208     if ( *spin_here_p )
1209         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1210     if ( this_thr->th.th_next_waiting != 0 )
1211         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1212 #endif
1213     KMP_DEBUG_ASSERT( !*spin_here_p );
1214     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1215 
1216 
1217     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1218        that may follow, not just in execution order, but also in visibility order.  This way,
1219        when a releasing thread observes the changes to the queue by this thread, it can
1220        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1221        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1222        to FALSE before this thread sets it to TRUE, this thread will hang.
1223     */
1224     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1225 
1226     while( 1 ) {
1227         kmp_int32 enqueued;
1228         kmp_int32 head;
1229         kmp_int32 tail;
1230 
1231         head = *head_id_p;
1232 
1233         switch ( head ) {
1234 
1235             case -1:
1236             {
1237 #ifdef DEBUG_QUEUING_LOCKS
1238                 tail = *tail_id_p;
1239                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1240 #endif
1241                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1242                            this assignment prevents us from entering the if ( t > 0 )
1243                            condition in the enqueued case below, which is not necessary for
1244                            this state transition */
1245 
1246                 need_mf = 0;
1247                 /* try (-1,0)->(tid,tid) */
1248                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1249                   KMP_PACK_64( -1, 0 ),
1250                   KMP_PACK_64( gtid+1, gtid+1 ) );
1251 #ifdef DEBUG_QUEUING_LOCKS
1252                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1253 #endif
1254             }
1255             break;
1256 
1257             default:
1258             {
1259                 tail = *tail_id_p;
1260                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1264 #endif
1265 
1266                 if ( tail == 0 ) {
1267                     enqueued = FALSE;
1268                 }
1269                 else {
1270                     need_mf = 0;
1271                     /* try (h,t) or (h,h)->(h,tid) */
1272                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1273 
1274 #ifdef DEBUG_QUEUING_LOCKS
1275                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1276 #endif
1277                 }
1278             }
1279             break;
1280 
1281             case 0: /* empty queue */
1282             {
1283                 kmp_int32 grabbed_lock;
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286                 tail = *tail_id_p;
1287                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1288 #endif
1289                 /* try (0,0)->(-1,0) */
1290 
1291                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1292                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1293 
1294                 if ( grabbed_lock ) {
1295 
1296                     *spin_here_p = FALSE;
1297 
1298                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1299                               lck, gtid ));
1300 #ifdef DEBUG_QUEUING_LOCKS
1301                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1302 #endif
1303                     KMP_FSYNC_ACQUIRED( lck );
1304                     return; /* lock holder cannot be on queue */
1305                 }
1306                 enqueued = FALSE;
1307             }
1308             break;
1309         }
1310 
1311         if ( enqueued ) {
1312             if ( tail > 0 ) {
1313                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1314                 KMP_ASSERT( tail_thr != NULL );
1315                 tail_thr->th.th_next_waiting = gtid+1;
1316                 /* corresponding wait for this write in release code */
1317             }
1318             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1319 
1320 
1321             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1322              *       throughput only here.
1323              */
1324             KMP_MB();
1325             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1326 
1327 #ifdef DEBUG_QUEUING_LOCKS
1328             TRACE_LOCK( gtid+1, "acq spin" );
1329 
1330             if ( this_thr->th.th_next_waiting != 0 )
1331                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1332 #endif
1333             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1334             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1335                       lck, gtid ));
1336 
1337 #ifdef DEBUG_QUEUING_LOCKS
1338             TRACE_LOCK( gtid+1, "acq exit 2" );
1339 #endif
1340             /* got lock, we were dequeued by the thread that released lock */
1341             return;
1342         }
1343 
1344         /* Yield if number of threads > number of logical processors */
1345         /* ToDo: Not sure why this should only be in oversubscription case,
1346            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1347         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1348           __kmp_xproc ) );
1349 #ifdef DEBUG_QUEUING_LOCKS
1350         TRACE_LOCK( gtid+1, "acq retry" );
1351 #endif
1352 
1353     }
1354     KMP_ASSERT2( 0, "should not get here" );
1355 }
1356 
1357 void
1358 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1359 {
1360     KMP_DEBUG_ASSERT( gtid >= 0 );
1361 
1362     __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1363 }
1364 
1365 static void
1366 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1367   kmp_int32 gtid )
1368 {
1369     char const * const func = "omp_set_lock";
1370     if ( lck->lk.initialized != lck ) {
1371         KMP_FATAL( LockIsUninitialized, func );
1372     }
1373     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1374         KMP_FATAL( LockNestableUsedAsSimple, func );
1375     }
1376     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1377         KMP_FATAL( LockIsAlreadyOwned, func );
1378     }
1379 
1380     __kmp_acquire_queuing_lock( lck, gtid );
1381 
1382     lck->lk.owner_id = gtid + 1;
1383 }
1384 
1385 int
1386 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1387 {
1388     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1389     kmp_int32 head;
1390 #ifdef KMP_DEBUG
1391     kmp_info_t *this_thr;
1392 #endif
1393 
1394     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1395     KMP_DEBUG_ASSERT( gtid >= 0 );
1396 #ifdef KMP_DEBUG
1397     this_thr = __kmp_thread_from_gtid( gtid );
1398     KMP_DEBUG_ASSERT( this_thr != NULL );
1399     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1400 #endif
1401 
1402     head = *head_id_p;
1403 
1404     if ( head == 0 ) { /* nobody on queue, nobody holding */
1405 
1406         /* try (0,0)->(-1,0) */
1407 
1408         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1409             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1410             KMP_FSYNC_ACQUIRED(lck);
1411             return TRUE;
1412         }
1413     }
1414 
1415     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1416     return FALSE;
1417 }
1418 
1419 static int
1420 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1421 {
1422     char const * const func = "omp_test_lock";
1423     if ( lck->lk.initialized != lck ) {
1424         KMP_FATAL( LockIsUninitialized, func );
1425     }
1426     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1427         KMP_FATAL( LockNestableUsedAsSimple, func );
1428     }
1429 
1430     int retval = __kmp_test_queuing_lock( lck, gtid );
1431 
1432     if ( retval ) {
1433         lck->lk.owner_id = gtid + 1;
1434     }
1435     return retval;
1436 }
1437 
1438 void
1439 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1440 {
1441     register kmp_info_t *this_thr;
1442     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1443     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1444 
1445     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1446     KMP_DEBUG_ASSERT( gtid >= 0 );
1447     this_thr    = __kmp_thread_from_gtid( gtid );
1448     KMP_DEBUG_ASSERT( this_thr != NULL );
1449 #ifdef DEBUG_QUEUING_LOCKS
1450     TRACE_LOCK( gtid+1, "rel ent" );
1451 
1452     if ( this_thr->th.th_spin_here )
1453         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1454     if ( this_thr->th.th_next_waiting != 0 )
1455         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1456 #endif
1457     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1458     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1459 
1460     KMP_FSYNC_RELEASING(lck);
1461 
1462     while( 1 ) {
1463         kmp_int32 dequeued;
1464         kmp_int32 head;
1465         kmp_int32 tail;
1466 
1467         head = *head_id_p;
1468 
1469 #ifdef DEBUG_QUEUING_LOCKS
1470         tail = *tail_id_p;
1471         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1472         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1473 #endif
1474         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1475 
1476         if ( head == -1 ) { /* nobody on queue */
1477 
1478             /* try (-1,0)->(0,0) */
1479             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1480                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1481                           lck, gtid ));
1482 #ifdef DEBUG_QUEUING_LOCKS
1483                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1484 #endif
1485                 return;
1486             }
1487             dequeued = FALSE;
1488 
1489         }
1490         else {
1491 
1492             tail = *tail_id_p;
1493             if ( head == tail ) {  /* only one thread on the queue */
1494 
1495 #ifdef DEBUG_QUEUING_LOCKS
1496                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1497 #endif
1498                 KMP_DEBUG_ASSERT( head > 0 );
1499 
1500                 /* try (h,h)->(-1,0) */
1501                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1502                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1503 #ifdef DEBUG_QUEUING_LOCKS
1504                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1505 #endif
1506 
1507             }
1508             else {
1509                 volatile kmp_int32 *waiting_id_p;
1510                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1511                 KMP_DEBUG_ASSERT( head_thr != NULL );
1512                 waiting_id_p = & head_thr->th.th_next_waiting;
1513 
1514                 /* Does this require synchronous reads? */
1515 #ifdef DEBUG_QUEUING_LOCKS
1516                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1517 #endif
1518                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1519 
1520                 /* try (h,t)->(h',t) or (t,t) */
1521 
1522                 KMP_MB();
1523                 /* make sure enqueuing thread has time to update next waiting thread field */
1524                 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1525 #ifdef DEBUG_QUEUING_LOCKS
1526                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1527 #endif
1528                 dequeued = TRUE;
1529             }
1530         }
1531 
1532         if ( dequeued ) {
1533             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1534             KMP_DEBUG_ASSERT( head_thr != NULL );
1535 
1536             /* Does this require synchronous reads? */
1537 #ifdef DEBUG_QUEUING_LOCKS
1538             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1539 #endif
1540             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1541 
1542             /* For clean code only.
1543              * Thread not released until next statement prevents race with acquire code.
1544              */
1545             head_thr->th.th_next_waiting = 0;
1546 #ifdef DEBUG_QUEUING_LOCKS
1547             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1548 #endif
1549 
1550             KMP_MB();
1551             /* reset spin value */
1552             head_thr->th.th_spin_here = FALSE;
1553 
1554             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1555                       lck, gtid ));
1556 #ifdef DEBUG_QUEUING_LOCKS
1557             TRACE_LOCK( gtid+1, "rel exit 2" );
1558 #endif
1559             return;
1560         }
1561         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1562 
1563 #ifdef DEBUG_QUEUING_LOCKS
1564         TRACE_LOCK( gtid+1, "rel retry" );
1565 #endif
1566 
1567     } /* while */
1568     KMP_ASSERT2( 0, "should not get here" );
1569 }
1570 
1571 static void
1572 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1573   kmp_int32 gtid )
1574 {
1575     char const * const func = "omp_unset_lock";
1576     KMP_MB();  /* in case another processor initialized lock */
1577     if ( lck->lk.initialized != lck ) {
1578         KMP_FATAL( LockIsUninitialized, func );
1579     }
1580     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1581         KMP_FATAL( LockNestableUsedAsSimple, func );
1582     }
1583     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1584         KMP_FATAL( LockUnsettingFree, func );
1585     }
1586     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1587         KMP_FATAL( LockUnsettingSetByAnother, func );
1588     }
1589     lck->lk.owner_id = 0;
1590     __kmp_release_queuing_lock( lck, gtid );
1591 }
1592 
1593 void
1594 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1595 {
1596     lck->lk.location = NULL;
1597     lck->lk.head_id = 0;
1598     lck->lk.tail_id = 0;
1599     lck->lk.next_ticket = 0;
1600     lck->lk.now_serving = 0;
1601     lck->lk.owner_id = 0;      // no thread owns the lock.
1602     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1603     lck->lk.initialized = lck;
1604 
1605     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1606 }
1607 
1608 static void
1609 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1610 {
1611     __kmp_init_queuing_lock( lck );
1612 }
1613 
1614 void
1615 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1616 {
1617     lck->lk.initialized = NULL;
1618     lck->lk.location = NULL;
1619     lck->lk.head_id = 0;
1620     lck->lk.tail_id = 0;
1621     lck->lk.next_ticket = 0;
1622     lck->lk.now_serving = 0;
1623     lck->lk.owner_id = 0;
1624     lck->lk.depth_locked = -1;
1625 }
1626 
1627 static void
1628 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1629 {
1630     char const * const func = "omp_destroy_lock";
1631     if ( lck->lk.initialized != lck ) {
1632         KMP_FATAL( LockIsUninitialized, func );
1633     }
1634     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1635         KMP_FATAL( LockNestableUsedAsSimple, func );
1636     }
1637     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1638         KMP_FATAL( LockStillOwned, func );
1639     }
1640     __kmp_destroy_queuing_lock( lck );
1641 }
1642 
1643 
1644 //
1645 // nested queuing locks
1646 //
1647 
1648 void
1649 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1650 {
1651     KMP_DEBUG_ASSERT( gtid >= 0 );
1652 
1653     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1654         lck->lk.depth_locked += 1;
1655     }
1656     else {
1657         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1658         KMP_MB();
1659         lck->lk.depth_locked = 1;
1660         KMP_MB();
1661         lck->lk.owner_id = gtid + 1;
1662     }
1663 }
1664 
1665 static void
1666 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1667 {
1668     char const * const func = "omp_set_nest_lock";
1669     if ( lck->lk.initialized != lck ) {
1670         KMP_FATAL( LockIsUninitialized, func );
1671     }
1672     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1673         KMP_FATAL( LockSimpleUsedAsNestable, func );
1674     }
1675     __kmp_acquire_nested_queuing_lock( lck, gtid );
1676 }
1677 
1678 int
1679 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1680 {
1681     int retval;
1682 
1683     KMP_DEBUG_ASSERT( gtid >= 0 );
1684 
1685     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1686         retval = ++lck->lk.depth_locked;
1687     }
1688     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1689         retval = 0;
1690     }
1691     else {
1692         KMP_MB();
1693         retval = lck->lk.depth_locked = 1;
1694         KMP_MB();
1695         lck->lk.owner_id = gtid + 1;
1696     }
1697     return retval;
1698 }
1699 
1700 static int
1701 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1702   kmp_int32 gtid )
1703 {
1704     char const * const func = "omp_test_nest_lock";
1705     if ( lck->lk.initialized != lck ) {
1706         KMP_FATAL( LockIsUninitialized, func );
1707     }
1708     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1709         KMP_FATAL( LockSimpleUsedAsNestable, func );
1710     }
1711     return __kmp_test_nested_queuing_lock( lck, gtid );
1712 }
1713 
1714 void
1715 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716 {
1717     KMP_DEBUG_ASSERT( gtid >= 0 );
1718 
1719     KMP_MB();
1720     if ( --(lck->lk.depth_locked) == 0 ) {
1721         KMP_MB();
1722         lck->lk.owner_id = 0;
1723         __kmp_release_queuing_lock( lck, gtid );
1724     }
1725 }
1726 
1727 static void
1728 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1729 {
1730     char const * const func = "omp_unset_nest_lock";
1731     KMP_MB();  /* in case another processor initialized lock */
1732     if ( lck->lk.initialized != lck ) {
1733         KMP_FATAL( LockIsUninitialized, func );
1734     }
1735     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1736         KMP_FATAL( LockSimpleUsedAsNestable, func );
1737     }
1738     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1739         KMP_FATAL( LockUnsettingFree, func );
1740     }
1741     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1742         KMP_FATAL( LockUnsettingSetByAnother, func );
1743     }
1744     __kmp_release_nested_queuing_lock( lck, gtid );
1745 }
1746 
1747 void
1748 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1749 {
1750     __kmp_init_queuing_lock( lck );
1751     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1752 }
1753 
1754 static void
1755 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1756 {
1757     __kmp_init_nested_queuing_lock( lck );
1758 }
1759 
1760 void
1761 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1762 {
1763     __kmp_destroy_queuing_lock( lck );
1764     lck->lk.depth_locked = 0;
1765 }
1766 
1767 static void
1768 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1769 {
1770     char const * const func = "omp_destroy_nest_lock";
1771     if ( lck->lk.initialized != lck ) {
1772         KMP_FATAL( LockIsUninitialized, func );
1773     }
1774     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1775         KMP_FATAL( LockSimpleUsedAsNestable, func );
1776     }
1777     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1778         KMP_FATAL( LockStillOwned, func );
1779     }
1780     __kmp_destroy_nested_queuing_lock( lck );
1781 }
1782 
1783 
1784 //
1785 // access functions to fields which don't exist for all lock kinds.
1786 //
1787 
1788 static int
1789 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1790 {
1791     return lck == lck->lk.initialized;
1792 }
1793 
1794 static const ident_t *
1795 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1796 {
1797     return lck->lk.location;
1798 }
1799 
1800 static void
1801 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1802 {
1803     lck->lk.location = loc;
1804 }
1805 
1806 static kmp_lock_flags_t
1807 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1808 {
1809     return lck->lk.flags;
1810 }
1811 
1812 static void
1813 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1814 {
1815     lck->lk.flags = flags;
1816 }
1817 
1818 #if KMP_USE_ADAPTIVE_LOCKS
1819 
1820 /*
1821     RTM Adaptive locks
1822 */
1823 
1824 // TODO: Use the header for intrinsics below with the compiler 13.0
1825 //#include <immintrin.h>
1826 
1827 // Values from the status register after failed speculation.
1828 #define _XBEGIN_STARTED          (~0u)
1829 #define _XABORT_EXPLICIT         (1 << 0)
1830 #define _XABORT_RETRY            (1 << 1)
1831 #define _XABORT_CONFLICT         (1 << 2)
1832 #define _XABORT_CAPACITY         (1 << 3)
1833 #define _XABORT_DEBUG            (1 << 4)
1834 #define _XABORT_NESTED           (1 << 5)
1835 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1836 
1837 // Aborts for which it's worth trying again immediately
1838 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1839 
1840 #define STRINGIZE_INTERNAL(arg) #arg
1841 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1842 
1843 // Access to RTM instructions
1844 
1845 /*
1846   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1847   This is the same definition as the compiler intrinsic that will be supported at some point.
1848 */
1849 static __inline int _xbegin()
1850 {
1851     int res = -1;
1852 
1853 #if KMP_OS_WINDOWS
1854 #if KMP_ARCH_X86_64
1855     _asm {
1856         _emit 0xC7
1857         _emit 0xF8
1858         _emit 2
1859         _emit 0
1860         _emit 0
1861         _emit 0
1862         jmp   L2
1863         mov   res, eax
1864     L2:
1865     }
1866 #else /* IA32 */
1867     _asm {
1868         _emit 0xC7
1869         _emit 0xF8
1870         _emit 2
1871         _emit 0
1872         _emit 0
1873         _emit 0
1874         jmp   L2
1875         mov   res, eax
1876     L2:
1877     }
1878 #endif // KMP_ARCH_X86_64
1879 #else
1880     /* Note that %eax must be noted as killed (clobbered), because
1881      * the XSR is returned in %eax(%rax) on abort.  Other register
1882      * values are restored, so don't need to be killed.
1883      *
1884      * We must also mark 'res' as an input and an output, since otherwise
1885      * 'res=-1' may be dropped as being dead, whereas we do need the
1886      * assignment on the successful (i.e., non-abort) path.
1887      */
1888     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1889                       "   .long  1f-1b-6\n"
1890                       "    jmp   2f\n"
1891                       "1:  movl  %%eax,%0\n"
1892                       "2:"
1893                       :"+r"(res)::"memory","%eax");
1894 #endif // KMP_OS_WINDOWS
1895     return res;
1896 }
1897 
1898 /*
1899   Transaction end
1900 */
1901 static __inline void _xend()
1902 {
1903 #if KMP_OS_WINDOWS
1904     __asm  {
1905         _emit 0x0f
1906         _emit 0x01
1907         _emit 0xd5
1908     }
1909 #else
1910     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1911 #endif
1912 }
1913 
1914 /*
1915   This is a macro, the argument must be a single byte constant which
1916   can be evaluated by the inline assembler, since it is emitted as a
1917   byte into the assembly code.
1918 */
1919 #if KMP_OS_WINDOWS
1920 #define _xabort(ARG)                            \
1921     _asm _emit 0xc6                             \
1922     _asm _emit 0xf8                             \
1923     _asm _emit ARG
1924 #else
1925 #define _xabort(ARG) \
1926     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1927 #endif
1928 
1929 //
1930 //    Statistics is collected for testing purpose
1931 //
1932 #if KMP_DEBUG_ADAPTIVE_LOCKS
1933 
1934 // We accumulate speculative lock statistics when the lock is destroyed.
1935 // We keep locks that haven't been destroyed in the liveLocks list
1936 // so that we can grab their statistics too.
1937 static kmp_adaptive_lock_statistics_t destroyedStats;
1938 
1939 // To hold the list of live locks.
1940 static kmp_adaptive_lock_info_t liveLocks;
1941 
1942 // A lock so we can safely update the list of locks.
1943 static kmp_bootstrap_lock_t chain_lock;
1944 
1945 // Initialize the list of stats.
1946 void
1947 __kmp_init_speculative_stats()
1948 {
1949     kmp_adaptive_lock_info_t *lck = &liveLocks;
1950 
1951     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1952     lck->stats.next = lck;
1953     lck->stats.prev = lck;
1954 
1955     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1956     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1957 
1958     __kmp_init_bootstrap_lock( &chain_lock );
1959 
1960 }
1961 
1962 // Insert the lock into the circular list
1963 static void
1964 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
1965 {
1966     __kmp_acquire_bootstrap_lock( &chain_lock );
1967 
1968     lck->stats.next = liveLocks.stats.next;
1969     lck->stats.prev = &liveLocks;
1970 
1971     liveLocks.stats.next = lck;
1972     lck->stats.next->stats.prev  = lck;
1973 
1974     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1975     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1976 
1977     __kmp_release_bootstrap_lock( &chain_lock );
1978 }
1979 
1980 static void
1981 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
1982 {
1983     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1984     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1985 
1986     kmp_adaptive_lock_info_t * n = lck->stats.next;
1987     kmp_adaptive_lock_info_t * p = lck->stats.prev;
1988 
1989     n->stats.prev = p;
1990     p->stats.next = n;
1991 }
1992 
1993 static void
1994 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
1995 {
1996     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
1997     __kmp_remember_lock( lck );
1998 }
1999 
2000 static void
2001 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2002 {
2003     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2004 
2005     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2006     t->successfulSpeculations += s->successfulSpeculations;
2007     t->hardFailedSpeculations += s->hardFailedSpeculations;
2008     t->softFailedSpeculations += s->softFailedSpeculations;
2009     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2010     t->lemmingYields          += s->lemmingYields;
2011 }
2012 
2013 static void
2014 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2015 {
2016     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2017 
2018     __kmp_acquire_bootstrap_lock( &chain_lock );
2019 
2020     __kmp_add_stats( &destroyedStats, lck );
2021     __kmp_forget_lock( lck );
2022 
2023     __kmp_release_bootstrap_lock( &chain_lock );
2024 }
2025 
2026 static float
2027 percent (kmp_uint32 count, kmp_uint32 total)
2028 {
2029     return (total == 0) ? 0.0: (100.0 * count)/total;
2030 }
2031 
2032 static
2033 FILE * __kmp_open_stats_file()
2034 {
2035     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2036         return stdout;
2037 
2038     size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2039     char buffer[buffLen];
2040     snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile,
2041       (kmp_int32)getpid());
2042     FILE * result = fopen(&buffer[0], "w");
2043 
2044     // Maybe we should issue a warning here...
2045     return result ? result : stdout;
2046 }
2047 
2048 void
2049 __kmp_print_speculative_stats()
2050 {
2051     if (__kmp_user_lock_kind != lk_adaptive)
2052         return;
2053 
2054     FILE * statsFile = __kmp_open_stats_file();
2055 
2056     kmp_adaptive_lock_statistics_t total = destroyedStats;
2057     kmp_adaptive_lock_info_t *lck;
2058 
2059     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2060         __kmp_add_stats( &total, lck );
2061     }
2062     kmp_adaptive_lock_statistics_t *t = &total;
2063     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2064     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2065                                    t->softFailedSpeculations;
2066 
2067     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2068     fprintf ( statsFile, " Lock parameters: \n"
2069              "   max_soft_retries               : %10d\n"
2070              "   max_badness                    : %10d\n",
2071              __kmp_adaptive_backoff_params.max_soft_retries,
2072              __kmp_adaptive_backoff_params.max_badness);
2073     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2074     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2075     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2076              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2077     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2078              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2079     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2080 
2081     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2082     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2083              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2084     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2085              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2086     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2087              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2088 
2089     if (statsFile != stdout)
2090         fclose( statsFile );
2091 }
2092 
2093 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2094 #else
2095 # define KMP_INC_STAT(lck,stat)
2096 
2097 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2098 
2099 static inline bool
2100 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2101 {
2102     // It is enough to check that the head_id is zero.
2103     // We don't also need to check the tail.
2104     bool res = lck->lk.head_id == 0;
2105 
2106     // We need a fence here, since we must ensure that no memory operations
2107     // from later in this thread float above that read.
2108 #if KMP_COMPILER_ICC
2109     _mm_mfence();
2110 #else
2111     __sync_synchronize();
2112 #endif
2113 
2114     return res;
2115 }
2116 
2117 // Functions for manipulating the badness
2118 static __inline void
2119 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2120 {
2121     // Reset the badness to zero so we eagerly try to speculate again
2122     lck->lk.adaptive.badness = 0;
2123     KMP_INC_STAT(lck,successfulSpeculations);
2124 }
2125 
2126 // Create a bit mask with one more set bit.
2127 static __inline void
2128 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2129 {
2130     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2131     if ( newBadness > lck->lk.adaptive.max_badness) {
2132         return;
2133     } else {
2134         lck->lk.adaptive.badness = newBadness;
2135     }
2136 }
2137 
2138 // Check whether speculation should be attempted.
2139 static __inline int
2140 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2141 {
2142     kmp_uint32 badness = lck->lk.adaptive.badness;
2143     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2144     int res = (attempts & badness) == 0;
2145     return res;
2146 }
2147 
2148 // Attempt to acquire only the speculative lock.
2149 // Does not back off to the non-speculative lock.
2150 //
2151 static int
2152 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2153 {
2154     int retries = lck->lk.adaptive.max_soft_retries;
2155 
2156     // We don't explicitly count the start of speculation, rather we record
2157     // the results (success, hard fail, soft fail). The sum of all of those
2158     // is the total number of times we started speculation since all
2159     // speculations must end one of those ways.
2160     do
2161     {
2162         kmp_uint32 status = _xbegin();
2163         // Switch this in to disable actual speculation but exercise
2164         // at least some of the rest of the code. Useful for debugging...
2165         // kmp_uint32 status = _XABORT_NESTED;
2166 
2167         if (status == _XBEGIN_STARTED )
2168         { /* We have successfully started speculation
2169            * Check that no-one acquired the lock for real between when we last looked
2170            * and now. This also gets the lock cache line into our read-set,
2171            * which we need so that we'll abort if anyone later claims it for real.
2172            */
2173             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2174             {
2175                 // Lock is now visibly acquired, so someone beat us to it.
2176                 // Abort the transaction so we'll restart from _xbegin with the
2177                 // failure status.
2178                 _xabort(0x01)
2179                 KMP_ASSERT2( 0, "should not get here" );
2180             }
2181             return 1;   // Lock has been acquired (speculatively)
2182         } else {
2183             // We have aborted, update the statistics
2184             if ( status & SOFT_ABORT_MASK)
2185             {
2186                 KMP_INC_STAT(lck,softFailedSpeculations);
2187                 // and loop round to retry.
2188             }
2189             else
2190             {
2191                 KMP_INC_STAT(lck,hardFailedSpeculations);
2192                 // Give up if we had a hard failure.
2193                 break;
2194             }
2195         }
2196     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2197 
2198     // Either we had a hard failure or we didn't succeed softly after
2199     // the full set of attempts, so back off the badness.
2200     __kmp_step_badness( lck );
2201     return 0;
2202 }
2203 
2204 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2205 // if the speculative lock cannot be acquired.
2206 // We can succeed speculatively, non-speculatively, or fail.
2207 static int
2208 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2209 {
2210     // First try to acquire the lock speculatively
2211     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2212         return 1;
2213 
2214     // Speculative acquisition failed, so try to acquire it non-speculatively.
2215     // Count the non-speculative acquire attempt
2216     lck->lk.adaptive.acquire_attempts++;
2217 
2218     // Use base, non-speculative lock.
2219     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2220     {
2221         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2222         return 1;       // Lock is acquired (non-speculatively)
2223     }
2224     else
2225     {
2226         return 0;       // Failed to acquire the lock, it's already visibly locked.
2227     }
2228 }
2229 
2230 static int
2231 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2232 {
2233     char const * const func = "omp_test_lock";
2234     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2235         KMP_FATAL( LockIsUninitialized, func );
2236     }
2237 
2238     int retval = __kmp_test_adaptive_lock( lck, gtid );
2239 
2240     if ( retval ) {
2241         lck->lk.qlk.owner_id = gtid + 1;
2242     }
2243     return retval;
2244 }
2245 
2246 // Block until we can acquire a speculative, adaptive lock.
2247 // We check whether we should be trying to speculate.
2248 // If we should be, we check the real lock to see if it is free,
2249 // and, if not, pause without attempting to acquire it until it is.
2250 // Then we try the speculative acquire.
2251 // This means that although we suffer from lemmings a little (
2252 // because all we can't acquire the lock speculatively until
2253 // the queue of threads waiting has cleared), we don't get into a
2254 // state where we can never acquire the lock speculatively (because we
2255 // force the queue to clear by preventing new arrivals from entering the
2256 // queue).
2257 // This does mean that when we're trying to break lemmings, the lock
2258 // is no longer fair. However OpenMP makes no guarantee that its
2259 // locks are fair, so this isn't a real problem.
2260 static void
2261 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2262 {
2263     if ( __kmp_should_speculate( lck, gtid ) )
2264     {
2265         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2266         {
2267             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2268                 return;
2269             // We tried speculation and failed, so give up.
2270         }
2271         else
2272         {
2273             // We can't try speculation until the lock is free, so we
2274             // pause here (without suspending on the queueing lock,
2275             // to allow it to drain, then try again.
2276             // All other threads will also see the same result for
2277             // shouldSpeculate, so will be doing the same if they
2278             // try to claim the lock from now on.
2279             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2280             {
2281                 KMP_INC_STAT(lck,lemmingYields);
2282                 __kmp_yield (TRUE);
2283             }
2284 
2285             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2286                 return;
2287         }
2288     }
2289 
2290     // Speculative acquisition failed, so acquire it non-speculatively.
2291     // Count the non-speculative acquire attempt
2292     lck->lk.adaptive.acquire_attempts++;
2293 
2294     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2295     // We have acquired the base lock, so count that.
2296     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2297 }
2298 
2299 static void
2300 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2301 {
2302     char const * const func = "omp_set_lock";
2303     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2304         KMP_FATAL( LockIsUninitialized, func );
2305     }
2306     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2307         KMP_FATAL( LockIsAlreadyOwned, func );
2308     }
2309 
2310     __kmp_acquire_adaptive_lock( lck, gtid );
2311 
2312     lck->lk.qlk.owner_id = gtid + 1;
2313 }
2314 
2315 static void
2316 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2317 {
2318     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2319     {   // If the lock doesn't look claimed we must be speculating.
2320         // (Or the user's code is buggy and they're releasing without locking;
2321         // if we had XTEST we'd be able to check that case...)
2322         _xend();        // Exit speculation
2323         __kmp_update_badness_after_success( lck );
2324     }
2325     else
2326     {   // Since the lock *is* visibly locked we're not speculating,
2327         // so should use the underlying lock's release scheme.
2328         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2329     }
2330 }
2331 
2332 static void
2333 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2334 {
2335     char const * const func = "omp_unset_lock";
2336     KMP_MB();  /* in case another processor initialized lock */
2337     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2338         KMP_FATAL( LockIsUninitialized, func );
2339     }
2340     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2341         KMP_FATAL( LockUnsettingFree, func );
2342     }
2343     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2344         KMP_FATAL( LockUnsettingSetByAnother, func );
2345     }
2346     lck->lk.qlk.owner_id = 0;
2347     __kmp_release_adaptive_lock( lck, gtid );
2348 }
2349 
2350 static void
2351 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2352 {
2353     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2354     lck->lk.adaptive.badness = 0;
2355     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2356     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2357     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2358 #if KMP_DEBUG_ADAPTIVE_LOCKS
2359     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2360 #endif
2361     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2362 }
2363 
2364 static void
2365 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2366 {
2367     __kmp_init_adaptive_lock( lck );
2368 }
2369 
2370 static void
2371 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2372 {
2373 #if KMP_DEBUG_ADAPTIVE_LOCKS
2374     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2375 #endif
2376     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2377     // Nothing needed for the speculative part.
2378 }
2379 
2380 static void
2381 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2382 {
2383     char const * const func = "omp_destroy_lock";
2384     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2385         KMP_FATAL( LockIsUninitialized, func );
2386     }
2387     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2388         KMP_FATAL( LockStillOwned, func );
2389     }
2390     __kmp_destroy_adaptive_lock( lck );
2391 }
2392 
2393 
2394 #endif // KMP_USE_ADAPTIVE_LOCKS
2395 
2396 
2397 /* ------------------------------------------------------------------------ */
2398 /* DRDPA ticket locks                                                */
2399 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2400 
2401 static kmp_int32
2402 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2403 {
2404     return TCR_4( lck->lk.owner_id ) - 1;
2405 }
2406 
2407 static inline bool
2408 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2409 {
2410     return lck->lk.depth_locked != -1;
2411 }
2412 
2413 __forceinline static void
2414 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2415 {
2416     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2417     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2418     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2419       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2420       TCR_PTR(lck->lk.polls);                           // volatile load
2421 
2422 #ifdef USE_LOCK_PROFILE
2423     if (TCR_8(polls[ticket & mask].poll) != ticket)
2424         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2425     /* else __kmp_printf( "." );*/
2426 #endif /* USE_LOCK_PROFILE */
2427 
2428     //
2429     // Now spin-wait, but reload the polls pointer and mask, in case the
2430     // polling area has been reconfigured.  Unless it is reconfigured, the
2431     // reloads stay in L1 cache and are cheap.
2432     //
2433     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2434     //
2435     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2436     // and poll to be re-read every spin iteration.
2437     //
2438     kmp_uint32 spins;
2439 
2440     KMP_FSYNC_PREPARE(lck);
2441     KMP_INIT_YIELD(spins);
2442     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2443         // If we are oversubscribed,
2444         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2445         // CPU Pause is in the macros for yield.
2446         //
2447         KMP_YIELD(TCR_4(__kmp_nth)
2448           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2449         KMP_YIELD_SPIN(spins);
2450 
2451         // Re-read the mask and the poll pointer from the lock structure.
2452         //
2453         // Make certain that "mask" is read before "polls" !!!
2454         //
2455         // If another thread picks reconfigures the polling area and updates
2456         // their values, and we get the new value of mask and the old polls
2457         // pointer, we could access memory beyond the end of the old polling
2458         // area.
2459         //
2460         mask = TCR_8(lck->lk.mask);                     // volatile load
2461         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2462           TCR_PTR(lck->lk.polls);                       // volatile load
2463     }
2464 
2465     //
2466     // Critical section starts here
2467     //
2468     KMP_FSYNC_ACQUIRED(lck);
2469     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2470       ticket, lck));
2471     lck->lk.now_serving = ticket;                       // non-volatile store
2472 
2473     //
2474     // Deallocate a garbage polling area if we know that we are the last
2475     // thread that could possibly access it.
2476     //
2477     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2478     // ticket.
2479     //
2480     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2481         __kmp_free((void *)lck->lk.old_polls);
2482         lck->lk.old_polls = NULL;
2483         lck->lk.cleanup_ticket = 0;
2484     }
2485 
2486     //
2487     // Check to see if we should reconfigure the polling area.
2488     // If there is still a garbage polling area to be deallocated from a
2489     // previous reconfiguration, let a later thread reconfigure it.
2490     //
2491     if (lck->lk.old_polls == NULL) {
2492         bool reconfigure = false;
2493         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2494         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2495 
2496         if (TCR_4(__kmp_nth)
2497           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2498             //
2499             // We are in oversubscription mode.  Contract the polling area
2500             // down to a single location, if that hasn't been done already.
2501             //
2502             if (num_polls > 1) {
2503                 reconfigure = true;
2504                 num_polls = TCR_4(lck->lk.num_polls);
2505                 mask = 0;
2506                 num_polls = 1;
2507                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2508                   __kmp_allocate(num_polls * sizeof(*polls));
2509                 polls[0].poll = ticket;
2510             }
2511         }
2512         else {
2513             //
2514             // We are in under/fully subscribed mode.  Check the number of
2515             // threads waiting on the lock.  The size of the polling area
2516             // should be at least the number of threads waiting.
2517             //
2518             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2519             if (num_waiting > num_polls) {
2520                 kmp_uint32 old_num_polls = num_polls;
2521                 reconfigure = true;
2522                 do {
2523                     mask = (mask << 1) | 1;
2524                     num_polls *= 2;
2525                 } while (num_polls <= num_waiting);
2526 
2527                 //
2528                 // Allocate the new polling area, and copy the relevant portion
2529                 // of the old polling area to the new area.  __kmp_allocate()
2530                 // zeroes the memory it allocates, and most of the old area is
2531                 // just zero padding, so we only copy the release counters.
2532                 //
2533                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2534                   __kmp_allocate(num_polls * sizeof(*polls));
2535                 kmp_uint32 i;
2536                 for (i = 0; i < old_num_polls; i++) {
2537                     polls[i].poll = old_polls[i].poll;
2538                 }
2539             }
2540         }
2541 
2542         if (reconfigure) {
2543             //
2544             // Now write the updated fields back to the lock structure.
2545             //
2546             // Make certain that "polls" is written before "mask" !!!
2547             //
2548             // If another thread picks up the new value of mask and the old
2549             // polls pointer , it could access memory beyond the end of the
2550             // old polling area.
2551             //
2552             // On x86, we need memory fences.
2553             //
2554             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2555               ticket, lck, num_polls));
2556 
2557             lck->lk.old_polls = old_polls;              // non-volatile store
2558             lck->lk.polls = polls;                      // volatile store
2559 
2560             KMP_MB();
2561 
2562             lck->lk.num_polls = num_polls;              // non-volatile store
2563             lck->lk.mask = mask;                        // volatile store
2564 
2565             KMP_MB();
2566 
2567             //
2568             // Only after the new polling area and mask have been flushed
2569             // to main memory can we update the cleanup ticket field.
2570             //
2571             // volatile load / non-volatile store
2572             //
2573             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2574         }
2575     }
2576 }
2577 
2578 void
2579 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2580 {
2581     __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2582 }
2583 
2584 static void
2585 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2586 {
2587     char const * const func = "omp_set_lock";
2588     if ( lck->lk.initialized != lck ) {
2589         KMP_FATAL( LockIsUninitialized, func );
2590     }
2591     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2592         KMP_FATAL( LockNestableUsedAsSimple, func );
2593     }
2594     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2595         KMP_FATAL( LockIsAlreadyOwned, func );
2596     }
2597 
2598     __kmp_acquire_drdpa_lock( lck, gtid );
2599 
2600     lck->lk.owner_id = gtid + 1;
2601 }
2602 
2603 int
2604 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2605 {
2606     //
2607     // First get a ticket, then read the polls pointer and the mask.
2608     // The polls pointer must be read before the mask!!! (See above)
2609     //
2610     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2611     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2612       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2613       TCR_PTR(lck->lk.polls);                           // volatile load
2614     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2615     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2616         kmp_uint64 next_ticket = ticket + 1;
2617         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2618           ticket, next_ticket)) {
2619             KMP_FSYNC_ACQUIRED(lck);
2620             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2621                ticket, lck));
2622             lck->lk.now_serving = ticket;               // non-volatile store
2623 
2624             //
2625             // Since no threads are waiting, there is no possibility that
2626             // we would want to reconfigure the polling area.  We might
2627             // have the cleanup ticket value (which says that it is now
2628             // safe to deallocate old_polls), but we'll let a later thread
2629             // which calls __kmp_acquire_lock do that - this routine
2630             // isn't supposed to block, and we would risk blocks if we
2631             // called __kmp_free() to do the deallocation.
2632             //
2633             return TRUE;
2634         }
2635     }
2636     return FALSE;
2637 }
2638 
2639 static int
2640 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2641 {
2642     char const * const func = "omp_test_lock";
2643     if ( lck->lk.initialized != lck ) {
2644         KMP_FATAL( LockIsUninitialized, func );
2645     }
2646     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2647         KMP_FATAL( LockNestableUsedAsSimple, func );
2648     }
2649 
2650     int retval = __kmp_test_drdpa_lock( lck, gtid );
2651 
2652     if ( retval ) {
2653         lck->lk.owner_id = gtid + 1;
2654     }
2655     return retval;
2656 }
2657 
2658 void
2659 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2660 {
2661     //
2662     // Read the ticket value from the lock data struct, then the polls
2663     // pointer and the mask.  The polls pointer must be read before the
2664     // mask!!! (See above)
2665     //
2666     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2667     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2668       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2669       TCR_PTR(lck->lk.polls);                           // volatile load
2670     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2671     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2672        ticket - 1, lck));
2673     KMP_FSYNC_RELEASING(lck);
2674     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2675 }
2676 
2677 static void
2678 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679 {
2680     char const * const func = "omp_unset_lock";
2681     KMP_MB();  /* in case another processor initialized lock */
2682     if ( lck->lk.initialized != lck ) {
2683         KMP_FATAL( LockIsUninitialized, func );
2684     }
2685     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2686         KMP_FATAL( LockNestableUsedAsSimple, func );
2687     }
2688     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2689         KMP_FATAL( LockUnsettingFree, func );
2690     }
2691     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2692       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2693         KMP_FATAL( LockUnsettingSetByAnother, func );
2694     }
2695     lck->lk.owner_id = 0;
2696     __kmp_release_drdpa_lock( lck, gtid );
2697 }
2698 
2699 void
2700 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2701 {
2702     lck->lk.location = NULL;
2703     lck->lk.mask = 0;
2704     lck->lk.num_polls = 1;
2705     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2706       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2707     lck->lk.cleanup_ticket = 0;
2708     lck->lk.old_polls = NULL;
2709     lck->lk.next_ticket = 0;
2710     lck->lk.now_serving = 0;
2711     lck->lk.owner_id = 0;      // no thread owns the lock.
2712     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2713     lck->lk.initialized = lck;
2714 
2715     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2716 }
2717 
2718 static void
2719 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2720 {
2721     __kmp_init_drdpa_lock( lck );
2722 }
2723 
2724 void
2725 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2726 {
2727     lck->lk.initialized = NULL;
2728     lck->lk.location    = NULL;
2729     if (lck->lk.polls != NULL) {
2730         __kmp_free((void *)lck->lk.polls);
2731         lck->lk.polls = NULL;
2732     }
2733     if (lck->lk.old_polls != NULL) {
2734         __kmp_free((void *)lck->lk.old_polls);
2735         lck->lk.old_polls = NULL;
2736     }
2737     lck->lk.mask = 0;
2738     lck->lk.num_polls = 0;
2739     lck->lk.cleanup_ticket = 0;
2740     lck->lk.next_ticket = 0;
2741     lck->lk.now_serving = 0;
2742     lck->lk.owner_id = 0;
2743     lck->lk.depth_locked = -1;
2744 }
2745 
2746 static void
2747 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2748 {
2749     char const * const func = "omp_destroy_lock";
2750     if ( lck->lk.initialized != lck ) {
2751         KMP_FATAL( LockIsUninitialized, func );
2752     }
2753     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2754         KMP_FATAL( LockNestableUsedAsSimple, func );
2755     }
2756     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2757         KMP_FATAL( LockStillOwned, func );
2758     }
2759     __kmp_destroy_drdpa_lock( lck );
2760 }
2761 
2762 
2763 //
2764 // nested drdpa ticket locks
2765 //
2766 
2767 void
2768 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2769 {
2770     KMP_DEBUG_ASSERT( gtid >= 0 );
2771 
2772     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2773         lck->lk.depth_locked += 1;
2774     }
2775     else {
2776         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2777         KMP_MB();
2778         lck->lk.depth_locked = 1;
2779         KMP_MB();
2780         lck->lk.owner_id = gtid + 1;
2781     }
2782 }
2783 
2784 static void
2785 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2786 {
2787     char const * const func = "omp_set_nest_lock";
2788     if ( lck->lk.initialized != lck ) {
2789         KMP_FATAL( LockIsUninitialized, func );
2790     }
2791     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2792         KMP_FATAL( LockSimpleUsedAsNestable, func );
2793     }
2794     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2795 }
2796 
2797 int
2798 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2799 {
2800     int retval;
2801 
2802     KMP_DEBUG_ASSERT( gtid >= 0 );
2803 
2804     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2805         retval = ++lck->lk.depth_locked;
2806     }
2807     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2808         retval = 0;
2809     }
2810     else {
2811         KMP_MB();
2812         retval = lck->lk.depth_locked = 1;
2813         KMP_MB();
2814         lck->lk.owner_id = gtid + 1;
2815     }
2816     return retval;
2817 }
2818 
2819 static int
2820 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2821 {
2822     char const * const func = "omp_test_nest_lock";
2823     if ( lck->lk.initialized != lck ) {
2824         KMP_FATAL( LockIsUninitialized, func );
2825     }
2826     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2827         KMP_FATAL( LockSimpleUsedAsNestable, func );
2828     }
2829     return __kmp_test_nested_drdpa_lock( lck, gtid );
2830 }
2831 
2832 void
2833 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2834 {
2835     KMP_DEBUG_ASSERT( gtid >= 0 );
2836 
2837     KMP_MB();
2838     if ( --(lck->lk.depth_locked) == 0 ) {
2839         KMP_MB();
2840         lck->lk.owner_id = 0;
2841         __kmp_release_drdpa_lock( lck, gtid );
2842     }
2843 }
2844 
2845 static void
2846 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2847 {
2848     char const * const func = "omp_unset_nest_lock";
2849     KMP_MB();  /* in case another processor initialized lock */
2850     if ( lck->lk.initialized != lck ) {
2851         KMP_FATAL( LockIsUninitialized, func );
2852     }
2853     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2854         KMP_FATAL( LockSimpleUsedAsNestable, func );
2855     }
2856     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2857         KMP_FATAL( LockUnsettingFree, func );
2858     }
2859     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2860         KMP_FATAL( LockUnsettingSetByAnother, func );
2861     }
2862     __kmp_release_nested_drdpa_lock( lck, gtid );
2863 }
2864 
2865 void
2866 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2867 {
2868     __kmp_init_drdpa_lock( lck );
2869     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2870 }
2871 
2872 static void
2873 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2874 {
2875     __kmp_init_nested_drdpa_lock( lck );
2876 }
2877 
2878 void
2879 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2880 {
2881     __kmp_destroy_drdpa_lock( lck );
2882     lck->lk.depth_locked = 0;
2883 }
2884 
2885 static void
2886 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2887 {
2888     char const * const func = "omp_destroy_nest_lock";
2889     if ( lck->lk.initialized != lck ) {
2890         KMP_FATAL( LockIsUninitialized, func );
2891     }
2892     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2893         KMP_FATAL( LockSimpleUsedAsNestable, func );
2894     }
2895     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2896         KMP_FATAL( LockStillOwned, func );
2897     }
2898     __kmp_destroy_nested_drdpa_lock( lck );
2899 }
2900 
2901 
2902 //
2903 // access functions to fields which don't exist for all lock kinds.
2904 //
2905 
2906 static int
2907 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2908 {
2909     return lck == lck->lk.initialized;
2910 }
2911 
2912 static const ident_t *
2913 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2914 {
2915     return lck->lk.location;
2916 }
2917 
2918 static void
2919 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2920 {
2921     lck->lk.location = loc;
2922 }
2923 
2924 static kmp_lock_flags_t
2925 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2926 {
2927     return lck->lk.flags;
2928 }
2929 
2930 static void
2931 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2932 {
2933     lck->lk.flags = flags;
2934 }
2935 
2936 /* ------------------------------------------------------------------------ */
2937 /* user locks
2938  *
2939  * They are implemented as a table of function pointers which are set to the
2940  * lock functions of the appropriate kind, once that has been determined.
2941  */
2942 
2943 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
2944 
2945 size_t __kmp_base_user_lock_size = 0;
2946 size_t __kmp_user_lock_size = 0;
2947 
2948 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
2949 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2950 
2951 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2952 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2953 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2954 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
2955 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2956 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2957 
2958 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2959 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2960 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2961 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2962 
2963 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
2964 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
2965 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
2966 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
2967 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
2968 
2969 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
2970 {
2971     switch ( user_lock_kind ) {
2972         case lk_default:
2973         default:
2974         KMP_ASSERT( 0 );
2975 
2976         case lk_tas: {
2977             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
2978             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
2979 
2980             __kmp_get_user_lock_owner_ =
2981               ( kmp_int32 ( * )( kmp_user_lock_p ) )
2982               ( &__kmp_get_tas_lock_owner );
2983 
2984             if ( __kmp_env_consistency_check ) {
2985                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
2986                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
2987             }
2988             else {
2989                 KMP_BIND_USER_LOCK(tas);
2990                 KMP_BIND_NESTED_USER_LOCK(tas);
2991             }
2992 
2993             __kmp_destroy_user_lock_ =
2994               ( void ( * )( kmp_user_lock_p ) )
2995               ( &__kmp_destroy_tas_lock );
2996 
2997              __kmp_is_user_lock_initialized_ =
2998                ( int ( * )( kmp_user_lock_p ) ) NULL;
2999 
3000              __kmp_get_user_lock_location_ =
3001                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3002 
3003              __kmp_set_user_lock_location_ =
3004                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3005 
3006              __kmp_get_user_lock_flags_ =
3007                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3008 
3009              __kmp_set_user_lock_flags_ =
3010                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3011         }
3012         break;
3013 
3014 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3015 
3016         case lk_futex: {
3017             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3018             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3019 
3020             __kmp_get_user_lock_owner_ =
3021               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3022               ( &__kmp_get_futex_lock_owner );
3023 
3024             if ( __kmp_env_consistency_check ) {
3025                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3026                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3027             }
3028             else {
3029                 KMP_BIND_USER_LOCK(futex);
3030                 KMP_BIND_NESTED_USER_LOCK(futex);
3031             }
3032 
3033             __kmp_destroy_user_lock_ =
3034               ( void ( * )( kmp_user_lock_p ) )
3035               ( &__kmp_destroy_futex_lock );
3036 
3037              __kmp_is_user_lock_initialized_ =
3038                ( int ( * )( kmp_user_lock_p ) ) NULL;
3039 
3040              __kmp_get_user_lock_location_ =
3041                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3042 
3043              __kmp_set_user_lock_location_ =
3044                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3045 
3046              __kmp_get_user_lock_flags_ =
3047                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3048 
3049              __kmp_set_user_lock_flags_ =
3050                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3051         }
3052         break;
3053 
3054 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3055 
3056         case lk_ticket: {
3057             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3058             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3059 
3060             __kmp_get_user_lock_owner_ =
3061               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3062               ( &__kmp_get_ticket_lock_owner );
3063 
3064             if ( __kmp_env_consistency_check ) {
3065                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3066                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3067             }
3068             else {
3069                 KMP_BIND_USER_LOCK(ticket);
3070                 KMP_BIND_NESTED_USER_LOCK(ticket);
3071             }
3072 
3073             __kmp_destroy_user_lock_ =
3074               ( void ( * )( kmp_user_lock_p ) )
3075               ( &__kmp_destroy_ticket_lock );
3076 
3077              __kmp_is_user_lock_initialized_ =
3078                ( int ( * )( kmp_user_lock_p ) )
3079                ( &__kmp_is_ticket_lock_initialized );
3080 
3081              __kmp_get_user_lock_location_ =
3082                ( const ident_t * ( * )( kmp_user_lock_p ) )
3083                ( &__kmp_get_ticket_lock_location );
3084 
3085              __kmp_set_user_lock_location_ =
3086                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3087                ( &__kmp_set_ticket_lock_location );
3088 
3089              __kmp_get_user_lock_flags_ =
3090                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3091                ( &__kmp_get_ticket_lock_flags );
3092 
3093              __kmp_set_user_lock_flags_ =
3094                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3095                ( &__kmp_set_ticket_lock_flags );
3096         }
3097         break;
3098 
3099         case lk_queuing: {
3100             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3101             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3102 
3103             __kmp_get_user_lock_owner_ =
3104               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3105               ( &__kmp_get_queuing_lock_owner );
3106 
3107             if ( __kmp_env_consistency_check ) {
3108                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3109                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3110             }
3111             else {
3112                 KMP_BIND_USER_LOCK(queuing);
3113                 KMP_BIND_NESTED_USER_LOCK(queuing);
3114             }
3115 
3116             __kmp_destroy_user_lock_ =
3117               ( void ( * )( kmp_user_lock_p ) )
3118               ( &__kmp_destroy_queuing_lock );
3119 
3120              __kmp_is_user_lock_initialized_ =
3121                ( int ( * )( kmp_user_lock_p ) )
3122                ( &__kmp_is_queuing_lock_initialized );
3123 
3124              __kmp_get_user_lock_location_ =
3125                ( const ident_t * ( * )( kmp_user_lock_p ) )
3126                ( &__kmp_get_queuing_lock_location );
3127 
3128              __kmp_set_user_lock_location_ =
3129                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3130                ( &__kmp_set_queuing_lock_location );
3131 
3132              __kmp_get_user_lock_flags_ =
3133                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3134                ( &__kmp_get_queuing_lock_flags );
3135 
3136              __kmp_set_user_lock_flags_ =
3137                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3138                ( &__kmp_set_queuing_lock_flags );
3139         }
3140         break;
3141 
3142 #if KMP_USE_ADAPTIVE_LOCKS
3143         case lk_adaptive: {
3144             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3145             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3146 
3147             __kmp_get_user_lock_owner_ =
3148               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3149               ( &__kmp_get_queuing_lock_owner );
3150 
3151             if ( __kmp_env_consistency_check ) {
3152                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3153             }
3154             else {
3155                 KMP_BIND_USER_LOCK(adaptive);
3156             }
3157 
3158             __kmp_destroy_user_lock_ =
3159               ( void ( * )( kmp_user_lock_p ) )
3160               ( &__kmp_destroy_adaptive_lock );
3161 
3162             __kmp_is_user_lock_initialized_ =
3163               ( int ( * )( kmp_user_lock_p ) )
3164               ( &__kmp_is_queuing_lock_initialized );
3165 
3166             __kmp_get_user_lock_location_ =
3167               ( const ident_t * ( * )( kmp_user_lock_p ) )
3168               ( &__kmp_get_queuing_lock_location );
3169 
3170             __kmp_set_user_lock_location_ =
3171               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3172               ( &__kmp_set_queuing_lock_location );
3173 
3174             __kmp_get_user_lock_flags_ =
3175               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3176               ( &__kmp_get_queuing_lock_flags );
3177 
3178             __kmp_set_user_lock_flags_ =
3179               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3180               ( &__kmp_set_queuing_lock_flags );
3181 
3182         }
3183         break;
3184 #endif // KMP_USE_ADAPTIVE_LOCKS
3185 
3186         case lk_drdpa: {
3187             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3188             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3189 
3190             __kmp_get_user_lock_owner_ =
3191               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3192               ( &__kmp_get_drdpa_lock_owner );
3193 
3194             if ( __kmp_env_consistency_check ) {
3195                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3196                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3197             }
3198             else {
3199                 KMP_BIND_USER_LOCK(drdpa);
3200                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3201             }
3202 
3203             __kmp_destroy_user_lock_ =
3204               ( void ( * )( kmp_user_lock_p ) )
3205               ( &__kmp_destroy_drdpa_lock );
3206 
3207              __kmp_is_user_lock_initialized_ =
3208                ( int ( * )( kmp_user_lock_p ) )
3209                ( &__kmp_is_drdpa_lock_initialized );
3210 
3211              __kmp_get_user_lock_location_ =
3212                ( const ident_t * ( * )( kmp_user_lock_p ) )
3213                ( &__kmp_get_drdpa_lock_location );
3214 
3215              __kmp_set_user_lock_location_ =
3216                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3217                ( &__kmp_set_drdpa_lock_location );
3218 
3219              __kmp_get_user_lock_flags_ =
3220                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3221                ( &__kmp_get_drdpa_lock_flags );
3222 
3223              __kmp_set_user_lock_flags_ =
3224                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3225                ( &__kmp_set_drdpa_lock_flags );
3226         }
3227         break;
3228     }
3229 }
3230 
3231 
3232 // ----------------------------------------------------------------------------
3233 // User lock table & lock allocation
3234 
3235 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3236 kmp_user_lock_p __kmp_lock_pool = NULL;
3237 
3238 // Lock block-allocation support.
3239 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3240 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3241 
3242 static kmp_lock_index_t
3243 __kmp_lock_table_insert( kmp_user_lock_p lck )
3244 {
3245     // Assume that kmp_global_lock is held upon entry/exit.
3246     kmp_lock_index_t index;
3247     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3248         kmp_lock_index_t size;
3249         kmp_user_lock_p *table;
3250         kmp_lock_index_t i;
3251         // Reallocate lock table.
3252         if ( __kmp_user_lock_table.allocated == 0 ) {
3253             size = 1024;
3254         }
3255         else {
3256             size = __kmp_user_lock_table.allocated * 2;
3257         }
3258         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3259         memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3260         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3261             // We cannot free the previos table now, sinse it may be in use by other
3262             // threads. So save the pointer to the previous table in in the first element of the
3263             // new table. All the tables will be organized into a list, and could be freed when
3264             // library shutting down.
3265         __kmp_user_lock_table.table = table;
3266         __kmp_user_lock_table.allocated = size;
3267     }
3268     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3269     index = __kmp_user_lock_table.used;
3270     __kmp_user_lock_table.table[ index ] = lck;
3271     ++ __kmp_user_lock_table.used;
3272     return index;
3273 }
3274 
3275 static kmp_user_lock_p
3276 __kmp_lock_block_allocate()
3277 {
3278     // Assume that kmp_global_lock is held upon entry/exit.
3279     static int last_index = 0;
3280     if ( ( last_index >= __kmp_num_locks_in_block )
3281       || ( __kmp_lock_blocks == NULL ) ) {
3282         // Restart the index.
3283         last_index = 0;
3284         // Need to allocate a new block.
3285         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3286         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3287         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3288         // Set up the new block.
3289         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3290         new_block->next_block = __kmp_lock_blocks;
3291         new_block->locks = (void *)buffer;
3292         // Publish the new block.
3293         KMP_MB();
3294         __kmp_lock_blocks = new_block;
3295     }
3296     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3297       [ last_index * __kmp_user_lock_size ] ) );
3298     last_index++;
3299     return ret;
3300 }
3301 
3302 //
3303 // Get memory for a lock. It may be freshly allocated memory or reused memory
3304 // from lock pool.
3305 //
3306 kmp_user_lock_p
3307 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3308   kmp_lock_flags_t flags )
3309 {
3310     kmp_user_lock_p lck;
3311     kmp_lock_index_t index;
3312     KMP_DEBUG_ASSERT( user_lock );
3313 
3314     __kmp_acquire_lock( &__kmp_global_lock, gtid );
3315 
3316     if ( __kmp_lock_pool == NULL ) {
3317         // Lock pool is empty. Allocate new memory.
3318         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3319             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3320         }
3321         else {
3322             lck = __kmp_lock_block_allocate();
3323         }
3324 
3325         // Insert lock in the table so that it can be freed in __kmp_cleanup,
3326         // and debugger has info on all allocated locks.
3327         index = __kmp_lock_table_insert( lck );
3328     }
3329     else {
3330         // Pick up lock from pool.
3331         lck = __kmp_lock_pool;
3332         index = __kmp_lock_pool->pool.index;
3333         __kmp_lock_pool = __kmp_lock_pool->pool.next;
3334     }
3335 
3336     //
3337     // We could potentially differentiate between nested and regular locks
3338     // here, and do the lock table lookup for regular locks only.
3339     //
3340     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3341         * ( (kmp_lock_index_t *) user_lock ) = index;
3342     }
3343     else {
3344         * ( (kmp_user_lock_p *) user_lock ) = lck;
3345     }
3346 
3347     // mark the lock if it is critical section lock.
3348     __kmp_set_user_lock_flags( lck, flags );
3349 
3350     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3351 
3352     return lck;
3353 }
3354 
3355 // Put lock's memory to pool for reusing.
3356 void
3357 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3358 {
3359     kmp_lock_pool_t * lock_pool;
3360 
3361     KMP_DEBUG_ASSERT( user_lock != NULL );
3362     KMP_DEBUG_ASSERT( lck != NULL );
3363 
3364     __kmp_acquire_lock( & __kmp_global_lock, gtid );
3365 
3366     lck->pool.next = __kmp_lock_pool;
3367     __kmp_lock_pool = lck;
3368     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3369         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3370         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3371         lck->pool.index = index;
3372     }
3373 
3374     __kmp_release_lock( & __kmp_global_lock, gtid );
3375 }
3376 
3377 kmp_user_lock_p
3378 __kmp_lookup_user_lock( void **user_lock, char const *func )
3379 {
3380     kmp_user_lock_p lck = NULL;
3381 
3382     if ( __kmp_env_consistency_check ) {
3383         if ( user_lock == NULL ) {
3384             KMP_FATAL( LockIsUninitialized, func );
3385         }
3386     }
3387 
3388     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3389         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3390         if ( __kmp_env_consistency_check ) {
3391             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3392                 KMP_FATAL( LockIsUninitialized, func );
3393             }
3394         }
3395         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3396         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3397         lck = __kmp_user_lock_table.table[index];
3398     }
3399     else {
3400         lck = *( (kmp_user_lock_p *)user_lock );
3401     }
3402 
3403     if ( __kmp_env_consistency_check ) {
3404         if ( lck == NULL ) {
3405             KMP_FATAL( LockIsUninitialized, func );
3406         }
3407     }
3408 
3409     return lck;
3410 }
3411 
3412 void
3413 __kmp_cleanup_user_locks( void )
3414 {
3415     //
3416     // Reset lock pool. Do not worry about lock in the pool -- we will free
3417     // them when iterating through lock table (it includes all the locks,
3418     // dead or alive).
3419     //
3420     __kmp_lock_pool = NULL;
3421 
3422 #define IS_CRITICAL(lck) \
3423         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3424         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3425 
3426     //
3427     // Loop through lock table, free all locks.
3428     //
3429     // Do not free item [0], it is reserved for lock tables list.
3430     //
3431     // FIXME - we are iterating through a list of (pointers to) objects of
3432     // type union kmp_user_lock, but we have no way of knowing whether the
3433     // base type is currently "pool" or whatever the global user lock type
3434     // is.
3435     //
3436     // We are relying on the fact that for all of the user lock types
3437     // (except "tas"), the first field in the lock struct is the "initialized"
3438     // field, which is set to the address of the lock object itself when
3439     // the lock is initialized.  When the union is of type "pool", the
3440     // first field is a pointer to the next object in the free list, which
3441     // will not be the same address as the object itself.
3442     //
3443     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3444     // will fail for "pool" objects on the free list.  This must happen as
3445     // the "location" field of real user locks overlaps the "index" field
3446     // of "pool" objects.
3447     //
3448     // It would be better to run through the free list, and remove all "pool"
3449     // objects from the lock table before executing this loop.  However,
3450     // "pool" objects do not always have their index field set (only on
3451     // lin_32e), and I don't want to search the lock table for the address
3452     // of every "pool" object on the free list.
3453     //
3454     while ( __kmp_user_lock_table.used > 1 ) {
3455         const ident *loc;
3456 
3457         //
3458         // reduce __kmp_user_lock_table.used before freeing the lock,
3459         // so that state of locks is consistent
3460         //
3461         kmp_user_lock_p lck = __kmp_user_lock_table.table[
3462           --__kmp_user_lock_table.used ];
3463 
3464         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3465           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3466             //
3467             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3468             // initialized AND it is NOT a critical section (user is not
3469             // responsible for destroying criticals) AND we know source
3470             // location to report.
3471             //
3472             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3473               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3474               ( loc->psource != NULL ) ) {
3475                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3476                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3477                   str_loc.line, str_loc.col );
3478                 __kmp_str_loc_free( &str_loc);
3479             }
3480 
3481 #ifdef KMP_DEBUG
3482             if ( IS_CRITICAL( lck ) ) {
3483                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3484             }
3485             else {
3486                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3487             }
3488 #endif // KMP_DEBUG
3489 
3490             //
3491             // Cleanup internal lock dynamic resources
3492             // (for drdpa locks particularly).
3493             //
3494             __kmp_destroy_user_lock( lck );
3495         }
3496 
3497         //
3498         // Free the lock if block allocation of locks is not used.
3499         //
3500         if ( __kmp_lock_blocks == NULL ) {
3501             __kmp_free( lck );
3502         }
3503     }
3504 
3505 #undef IS_CRITICAL
3506 
3507     //
3508     // delete lock table(s).
3509     //
3510     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3511     __kmp_user_lock_table.table = NULL;
3512     __kmp_user_lock_table.allocated = 0;
3513 
3514     while ( table_ptr != NULL ) {
3515         //
3516         // In the first element we saved the pointer to the previous
3517         // (smaller) lock table.
3518         //
3519         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3520         __kmp_free( table_ptr );
3521         table_ptr = next;
3522     }
3523 
3524     //
3525     // Free buffers allocated for blocks of locks.
3526     //
3527     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3528     __kmp_lock_blocks = NULL;
3529 
3530     while ( block_ptr != NULL ) {
3531         kmp_block_of_locks_t *next = block_ptr->next_block;
3532         __kmp_free( block_ptr->locks );
3533         //
3534         // *block_ptr itself was allocated at the end of the locks vector.
3535         //
3536 	block_ptr = next;
3537     }
3538 
3539     TCW_4(__kmp_init_user_locks, FALSE);
3540 }
3541 
3542