1 /*
2  * kmp_lock.cpp -- lock-related functions
3  * $Revision: 43389 $
4  * $Date: 2014-08-11 10:54:01 -0500 (Mon, 11 Aug 2014) $
5  */
6 
7 
8 //===----------------------------------------------------------------------===//
9 //
10 //                     The LLVM Compiler Infrastructure
11 //
12 // This file is dual licensed under the MIT and the University of Illinois Open
13 // Source Licenses. See LICENSE.txt for details.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 
18 #include <stddef.h>
19 
20 #include "kmp.h"
21 #include "kmp_itt.h"
22 #include "kmp_i18n.h"
23 #include "kmp_lock.h"
24 #include "kmp_io.h"
25 
26 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
27 # include <unistd.h>
28 # include <sys/syscall.h>
29 // We should really include <futex.h>, but that causes compatibility problems on different
30 // Linux* OS distributions that either require that you include (or break when you try to include)
31 // <pci/types.h>.
32 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
33 // we just define the constants here and don't include <futex.h>
34 # ifndef FUTEX_WAIT
35 #  define FUTEX_WAIT    0
36 # endif
37 # ifndef FUTEX_WAKE
38 #  define FUTEX_WAKE    1
39 # endif
40 #endif
41 
42 /* Implement spin locks for internal library use.             */
43 /* The algorithm implemented is Lamport's bakery lock [1974]. */
44 
45 void
46 __kmp_validate_locks( void )
47 {
48     int i;
49     kmp_uint32  x, y;
50 
51     /* Check to make sure unsigned arithmetic does wraps properly */
52     x = ~((kmp_uint32) 0) - 2;
53     y = x - 2;
54 
55     for (i = 0; i < 8; ++i, ++x, ++y) {
56         kmp_uint32 z = (x - y);
57         KMP_ASSERT( z == 2 );
58     }
59 
60     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
61 }
62 
63 
64 /* ------------------------------------------------------------------------ */
65 /* test and set locks */
66 
67 //
68 // For the non-nested locks, we can only assume that the first 4 bytes were
69 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
70 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
71 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
72 //
73 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
74 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
75 //
76 
77 static kmp_int32
78 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
79 {
80     return TCR_4( lck->lk.poll ) - 1;
81 }
82 
83 static inline bool
84 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
85 {
86     return lck->lk.depth_locked != -1;
87 }
88 
89 __forceinline static void
90 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
91 {
92     KMP_MB();
93 
94 #ifdef USE_LOCK_PROFILE
95     kmp_uint32 curr = TCR_4( lck->lk.poll );
96     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
97         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
98     /* else __kmp_printf( "." );*/
99 #endif /* USE_LOCK_PROFILE */
100 
101     if ( ( lck->lk.poll == 0 )
102       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
103         KMP_FSYNC_ACQUIRED(lck);
104         return;
105     }
106 
107     kmp_uint32 spins;
108     KMP_FSYNC_PREPARE( lck );
109     KMP_INIT_YIELD( spins );
110     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
111       __kmp_xproc ) ) {
112         KMP_YIELD( TRUE );
113     }
114     else {
115         KMP_YIELD_SPIN( spins );
116     }
117 
118     while ( ( lck->lk.poll != 0 ) ||
119       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) {
120         //
121         // FIXME - use exponential backoff here
122         //
123         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
124           __kmp_xproc ) ) {
125             KMP_YIELD( TRUE );
126         }
127         else {
128             KMP_YIELD_SPIN( spins );
129         }
130     }
131     KMP_FSYNC_ACQUIRED( lck );
132 }
133 
134 void
135 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
136 {
137     __kmp_acquire_tas_lock_timed_template( lck, gtid );
138 }
139 
140 static void
141 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
142 {
143     char const * const func = "omp_set_lock";
144     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
145       && __kmp_is_tas_lock_nestable( lck ) ) {
146         KMP_FATAL( LockNestableUsedAsSimple, func );
147     }
148     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
149         KMP_FATAL( LockIsAlreadyOwned, func );
150     }
151     __kmp_acquire_tas_lock( lck, gtid );
152 }
153 
154 int
155 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
156 {
157     if ( ( lck->lk.poll == 0 )
158       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
159         KMP_FSYNC_ACQUIRED( lck );
160         return TRUE;
161     }
162     return FALSE;
163 }
164 
165 static int
166 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
167 {
168     char const * const func = "omp_test_lock";
169     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
170       && __kmp_is_tas_lock_nestable( lck ) ) {
171         KMP_FATAL( LockNestableUsedAsSimple, func );
172     }
173     return __kmp_test_tas_lock( lck, gtid );
174 }
175 
176 void
177 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
178 {
179     KMP_MB();       /* Flush all pending memory write invalidates.  */
180 
181     KMP_FSYNC_RELEASING(lck);
182     KMP_ST_REL32( &(lck->lk.poll), 0 );
183 
184     KMP_MB();       /* Flush all pending memory write invalidates.  */
185 
186     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
187       __kmp_xproc ) );
188 }
189 
190 static void
191 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
192 {
193     char const * const func = "omp_unset_lock";
194     KMP_MB();  /* in case another processor initialized lock */
195     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
196       && __kmp_is_tas_lock_nestable( lck ) ) {
197         KMP_FATAL( LockNestableUsedAsSimple, func );
198     }
199     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
200         KMP_FATAL( LockUnsettingFree, func );
201     }
202     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
203       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
204         KMP_FATAL( LockUnsettingSetByAnother, func );
205     }
206     __kmp_release_tas_lock( lck, gtid );
207 }
208 
209 void
210 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
211 {
212     TCW_4( lck->lk.poll, 0 );
213 }
214 
215 static void
216 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
217 {
218     __kmp_init_tas_lock( lck );
219 }
220 
221 void
222 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
223 {
224     lck->lk.poll = 0;
225 }
226 
227 static void
228 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
229 {
230     char const * const func = "omp_destroy_lock";
231     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
232       && __kmp_is_tas_lock_nestable( lck ) ) {
233         KMP_FATAL( LockNestableUsedAsSimple, func );
234     }
235     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
236         KMP_FATAL( LockStillOwned, func );
237     }
238     __kmp_destroy_tas_lock( lck );
239 }
240 
241 
242 //
243 // nested test and set locks
244 //
245 
246 void
247 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
248 {
249     KMP_DEBUG_ASSERT( gtid >= 0 );
250 
251     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
252         lck->lk.depth_locked += 1;
253     }
254     else {
255         __kmp_acquire_tas_lock_timed_template( lck, gtid );
256         lck->lk.depth_locked = 1;
257     }
258 }
259 
260 static void
261 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
262 {
263     char const * const func = "omp_set_nest_lock";
264     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
265         KMP_FATAL( LockSimpleUsedAsNestable, func );
266     }
267     __kmp_acquire_nested_tas_lock( lck, gtid );
268 }
269 
270 int
271 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
272 {
273     int retval;
274 
275     KMP_DEBUG_ASSERT( gtid >= 0 );
276 
277     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
278         retval = ++lck->lk.depth_locked;
279     }
280     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
281         retval = 0;
282     }
283     else {
284         KMP_MB();
285         retval = lck->lk.depth_locked = 1;
286     }
287     return retval;
288 }
289 
290 static int
291 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
292 {
293     char const * const func = "omp_test_nest_lock";
294     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
295         KMP_FATAL( LockSimpleUsedAsNestable, func );
296     }
297     return __kmp_test_nested_tas_lock( lck, gtid );
298 }
299 
300 void
301 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
302 {
303     KMP_DEBUG_ASSERT( gtid >= 0 );
304 
305     KMP_MB();
306     if ( --(lck->lk.depth_locked) == 0 ) {
307         __kmp_release_tas_lock( lck, gtid );
308     }
309 }
310 
311 static void
312 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
313 {
314     char const * const func = "omp_unset_nest_lock";
315     KMP_MB();  /* in case another processor initialized lock */
316     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
317         KMP_FATAL( LockSimpleUsedAsNestable, func );
318     }
319     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
320         KMP_FATAL( LockUnsettingFree, func );
321     }
322     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
323         KMP_FATAL( LockUnsettingSetByAnother, func );
324     }
325     __kmp_release_nested_tas_lock( lck, gtid );
326 }
327 
328 void
329 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
330 {
331     __kmp_init_tas_lock( lck );
332     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
333 }
334 
335 static void
336 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
337 {
338     __kmp_init_nested_tas_lock( lck );
339 }
340 
341 void
342 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
343 {
344     __kmp_destroy_tas_lock( lck );
345     lck->lk.depth_locked = 0;
346 }
347 
348 static void
349 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
350 {
351     char const * const func = "omp_destroy_nest_lock";
352     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
353         KMP_FATAL( LockSimpleUsedAsNestable, func );
354     }
355     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
356         KMP_FATAL( LockStillOwned, func );
357     }
358     __kmp_destroy_nested_tas_lock( lck );
359 }
360 
361 
362 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
363 
364 /* ------------------------------------------------------------------------ */
365 /* futex locks */
366 
367 // futex locks are really just test and set locks, with a different method
368 // of handling contention.  They take the same amount of space as test and
369 // set locks, and are allocated the same way (i.e. use the area allocated by
370 // the compiler for non-nested locks / allocate nested locks on the heap).
371 
372 static kmp_int32
373 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
374 {
375     return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
376 }
377 
378 static inline bool
379 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
380 {
381     return lck->lk.depth_locked != -1;
382 }
383 
384 __forceinline static void
385 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
386 {
387     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
388 
389     KMP_MB();
390 
391 #ifdef USE_LOCK_PROFILE
392     kmp_uint32 curr = TCR_4( lck->lk.poll );
393     if ( ( curr != 0 ) && ( curr != gtid_code ) )
394         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
395     /* else __kmp_printf( "." );*/
396 #endif /* USE_LOCK_PROFILE */
397 
398     KMP_FSYNC_PREPARE( lck );
399     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
400       lck, lck->lk.poll, gtid ) );
401 
402     kmp_int32 poll_val;
403     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0,
404       gtid_code ) ) != 0 ) {
405         kmp_int32 cond = poll_val & 1;
406         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
407            lck, gtid, poll_val, cond ) );
408 
409         //
410         // NOTE: if you try to use the following condition for this branch
411         //
412         // if ( poll_val & 1 == 0 )
413         //
414         // Then the 12.0 compiler has a bug where the following block will
415         // always be skipped, regardless of the value of the LSB of poll_val.
416         //
417         if ( ! cond ) {
418             //
419             // Try to set the lsb in the poll to indicate to the owner
420             // thread that they need to wake this thread up.
421             //
422             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ),
423               poll_val, poll_val | 1 ) ) {
424                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
425                   lck, lck->lk.poll, gtid ) );
426                 continue;
427             }
428             poll_val |= 1;
429 
430             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
431               lck, lck->lk.poll, gtid ) );
432         }
433 
434         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
435            lck, gtid, poll_val ) );
436 
437         kmp_int32 rc;
438         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
439           poll_val, NULL, NULL, 0 ) ) != 0 ) {
440             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
441                lck, gtid, poll_val, rc, errno ) );
442             continue;
443         }
444 
445         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
446            lck, gtid, poll_val ) );
447         //
448         // This thread has now done a successful futex wait call and was
449         // entered on the OS futex queue.  We must now perform a futex
450         // wake call when releasing the lock, as we have no idea how many
451         // other threads are in the queue.
452         //
453         gtid_code |= 1;
454     }
455 
456     KMP_FSYNC_ACQUIRED( lck );
457     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
458       lck, lck->lk.poll, gtid ) );
459 }
460 
461 void
462 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
463 {
464     __kmp_acquire_futex_lock_timed_template( lck, gtid );
465 }
466 
467 static void
468 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
469 {
470     char const * const func = "omp_set_lock";
471     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
472       && __kmp_is_futex_lock_nestable( lck ) ) {
473         KMP_FATAL( LockNestableUsedAsSimple, func );
474     }
475     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
476         KMP_FATAL( LockIsAlreadyOwned, func );
477     }
478     __kmp_acquire_futex_lock( lck, gtid );
479 }
480 
481 int
482 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
483 {
484     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
485         KMP_FSYNC_ACQUIRED( lck );
486         return TRUE;
487     }
488     return FALSE;
489 }
490 
491 static int
492 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
493 {
494     char const * const func = "omp_test_lock";
495     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
496       && __kmp_is_futex_lock_nestable( lck ) ) {
497         KMP_FATAL( LockNestableUsedAsSimple, func );
498     }
499     return __kmp_test_futex_lock( lck, gtid );
500 }
501 
502 void
503 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
504 {
505     KMP_MB();       /* Flush all pending memory write invalidates.  */
506 
507     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
508       lck, lck->lk.poll, gtid ) );
509 
510     KMP_FSYNC_RELEASING(lck);
511 
512     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 );
513 
514     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
515        lck, gtid, poll_val ) );
516 
517     if ( poll_val & 1 ) {
518         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
519            lck, gtid ) );
520         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
521     }
522 
523     KMP_MB();       /* Flush all pending memory write invalidates.  */
524 
525     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
526       lck, lck->lk.poll, gtid ) );
527 
528     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
529       __kmp_xproc ) );
530 }
531 
532 static void
533 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
534 {
535     char const * const func = "omp_unset_lock";
536     KMP_MB();  /* in case another processor initialized lock */
537     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
538       && __kmp_is_futex_lock_nestable( lck ) ) {
539         KMP_FATAL( LockNestableUsedAsSimple, func );
540     }
541     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
542         KMP_FATAL( LockUnsettingFree, func );
543     }
544     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
545       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
546         KMP_FATAL( LockUnsettingSetByAnother, func );
547     }
548     __kmp_release_futex_lock( lck, gtid );
549 }
550 
551 void
552 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
553 {
554     TCW_4( lck->lk.poll, 0 );
555 }
556 
557 static void
558 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
559 {
560     __kmp_init_futex_lock( lck );
561 }
562 
563 void
564 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
565 {
566     lck->lk.poll = 0;
567 }
568 
569 static void
570 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
571 {
572     char const * const func = "omp_destroy_lock";
573     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
574       && __kmp_is_futex_lock_nestable( lck ) ) {
575         KMP_FATAL( LockNestableUsedAsSimple, func );
576     }
577     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
578         KMP_FATAL( LockStillOwned, func );
579     }
580     __kmp_destroy_futex_lock( lck );
581 }
582 
583 
584 //
585 // nested futex locks
586 //
587 
588 void
589 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
590 {
591     KMP_DEBUG_ASSERT( gtid >= 0 );
592 
593     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
594         lck->lk.depth_locked += 1;
595     }
596     else {
597         __kmp_acquire_futex_lock_timed_template( lck, gtid );
598         lck->lk.depth_locked = 1;
599     }
600 }
601 
602 static void
603 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
604 {
605     char const * const func = "omp_set_nest_lock";
606     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
607         KMP_FATAL( LockSimpleUsedAsNestable, func );
608     }
609     __kmp_acquire_nested_futex_lock( lck, gtid );
610 }
611 
612 int
613 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
614 {
615     int retval;
616 
617     KMP_DEBUG_ASSERT( gtid >= 0 );
618 
619     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
620         retval = ++lck->lk.depth_locked;
621     }
622     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
623         retval = 0;
624     }
625     else {
626         KMP_MB();
627         retval = lck->lk.depth_locked = 1;
628     }
629     return retval;
630 }
631 
632 static int
633 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
634 {
635     char const * const func = "omp_test_nest_lock";
636     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
637         KMP_FATAL( LockSimpleUsedAsNestable, func );
638     }
639     return __kmp_test_nested_futex_lock( lck, gtid );
640 }
641 
642 void
643 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
644 {
645     KMP_DEBUG_ASSERT( gtid >= 0 );
646 
647     KMP_MB();
648     if ( --(lck->lk.depth_locked) == 0 ) {
649         __kmp_release_futex_lock( lck, gtid );
650     }
651 }
652 
653 static void
654 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
655 {
656     char const * const func = "omp_unset_nest_lock";
657     KMP_MB();  /* in case another processor initialized lock */
658     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
659         KMP_FATAL( LockSimpleUsedAsNestable, func );
660     }
661     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
662         KMP_FATAL( LockUnsettingFree, func );
663     }
664     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
665         KMP_FATAL( LockUnsettingSetByAnother, func );
666     }
667     __kmp_release_nested_futex_lock( lck, gtid );
668 }
669 
670 void
671 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
672 {
673     __kmp_init_futex_lock( lck );
674     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
675 }
676 
677 static void
678 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
679 {
680     __kmp_init_nested_futex_lock( lck );
681 }
682 
683 void
684 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
685 {
686     __kmp_destroy_futex_lock( lck );
687     lck->lk.depth_locked = 0;
688 }
689 
690 static void
691 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
692 {
693     char const * const func = "omp_destroy_nest_lock";
694     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
695         KMP_FATAL( LockSimpleUsedAsNestable, func );
696     }
697     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
698         KMP_FATAL( LockStillOwned, func );
699     }
700     __kmp_destroy_nested_futex_lock( lck );
701 }
702 
703 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
704 
705 
706 /* ------------------------------------------------------------------------ */
707 /* ticket (bakery) locks */
708 
709 static kmp_int32
710 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
711 {
712     return TCR_4( lck->lk.owner_id ) - 1;
713 }
714 
715 static inline bool
716 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
717 {
718     return lck->lk.depth_locked != -1;
719 }
720 
721 static kmp_uint32
722 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
723 {
724     register kmp_uint32 pause;
725 
726     if (value == checker) {
727         return TRUE;
728     }
729     for (pause = checker - value; pause != 0; --pause);
730     return FALSE;
731 }
732 
733 __forceinline static void
734 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
735 {
736     kmp_uint32 my_ticket;
737     KMP_MB();
738 
739     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
740 
741 #ifdef USE_LOCK_PROFILE
742     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
743         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
744     /* else __kmp_printf( "." );*/
745 #endif /* USE_LOCK_PROFILE */
746 
747     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
748         KMP_FSYNC_ACQUIRED(lck);
749         return;
750     }
751     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
752     KMP_FSYNC_ACQUIRED(lck);
753 }
754 
755 void
756 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
757 {
758     __kmp_acquire_ticket_lock_timed_template( lck, gtid );
759 }
760 
761 static void
762 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
763 {
764     char const * const func = "omp_set_lock";
765     if ( lck->lk.initialized != lck ) {
766         KMP_FATAL( LockIsUninitialized, func );
767     }
768     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
769         KMP_FATAL( LockNestableUsedAsSimple, func );
770     }
771     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
772         KMP_FATAL( LockIsAlreadyOwned, func );
773     }
774 
775     __kmp_acquire_ticket_lock( lck, gtid );
776 
777     lck->lk.owner_id = gtid + 1;
778 }
779 
780 int
781 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
782 {
783     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
784     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
785         kmp_uint32 next_ticket = my_ticket + 1;
786         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
787           my_ticket, next_ticket ) ) {
788             KMP_FSYNC_ACQUIRED( lck );
789             return TRUE;
790         }
791     }
792     return FALSE;
793 }
794 
795 static int
796 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
797 {
798     char const * const func = "omp_test_lock";
799     if ( lck->lk.initialized != lck ) {
800         KMP_FATAL( LockIsUninitialized, func );
801     }
802     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
803         KMP_FATAL( LockNestableUsedAsSimple, func );
804     }
805 
806     int retval = __kmp_test_ticket_lock( lck, gtid );
807 
808     if ( retval ) {
809         lck->lk.owner_id = gtid + 1;
810     }
811     return retval;
812 }
813 
814 void
815 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
816 {
817     kmp_uint32  distance;
818 
819     KMP_MB();       /* Flush all pending memory write invalidates.  */
820 
821     KMP_FSYNC_RELEASING(lck);
822     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
823 
824     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
825 
826     KMP_MB();       /* Flush all pending memory write invalidates.  */
827 
828     KMP_YIELD( distance
829       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
830 }
831 
832 static void
833 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
834 {
835     char const * const func = "omp_unset_lock";
836     KMP_MB();  /* in case another processor initialized lock */
837     if ( lck->lk.initialized != lck ) {
838         KMP_FATAL( LockIsUninitialized, func );
839     }
840     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
841         KMP_FATAL( LockNestableUsedAsSimple, func );
842     }
843     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
844         KMP_FATAL( LockUnsettingFree, func );
845     }
846     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
847       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
848         KMP_FATAL( LockUnsettingSetByAnother, func );
849     }
850     lck->lk.owner_id = 0;
851     __kmp_release_ticket_lock( lck, gtid );
852 }
853 
854 void
855 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
856 {
857     lck->lk.location = NULL;
858     TCW_4( lck->lk.next_ticket, 0 );
859     TCW_4( lck->lk.now_serving, 0 );
860     lck->lk.owner_id = 0;      // no thread owns the lock.
861     lck->lk.depth_locked = -1; // -1 => not a nested lock.
862     lck->lk.initialized = (kmp_ticket_lock *)lck;
863 }
864 
865 static void
866 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
867 {
868     __kmp_init_ticket_lock( lck );
869 }
870 
871 void
872 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
873 {
874     lck->lk.initialized = NULL;
875     lck->lk.location    = NULL;
876     lck->lk.next_ticket = 0;
877     lck->lk.now_serving = 0;
878     lck->lk.owner_id = 0;
879     lck->lk.depth_locked = -1;
880 }
881 
882 static void
883 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
884 {
885     char const * const func = "omp_destroy_lock";
886     if ( lck->lk.initialized != lck ) {
887         KMP_FATAL( LockIsUninitialized, func );
888     }
889     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
890         KMP_FATAL( LockNestableUsedAsSimple, func );
891     }
892     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
893         KMP_FATAL( LockStillOwned, func );
894     }
895     __kmp_destroy_ticket_lock( lck );
896 }
897 
898 
899 //
900 // nested ticket locks
901 //
902 
903 void
904 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
905 {
906     KMP_DEBUG_ASSERT( gtid >= 0 );
907 
908     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
909         lck->lk.depth_locked += 1;
910     }
911     else {
912         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
913         KMP_MB();
914         lck->lk.depth_locked = 1;
915         KMP_MB();
916         lck->lk.owner_id = gtid + 1;
917     }
918 }
919 
920 static void
921 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
922 {
923     char const * const func = "omp_set_nest_lock";
924     if ( lck->lk.initialized != lck ) {
925         KMP_FATAL( LockIsUninitialized, func );
926     }
927     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
928         KMP_FATAL( LockSimpleUsedAsNestable, func );
929     }
930     __kmp_acquire_nested_ticket_lock( lck, gtid );
931 }
932 
933 int
934 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
935 {
936     int retval;
937 
938     KMP_DEBUG_ASSERT( gtid >= 0 );
939 
940     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
941         retval = ++lck->lk.depth_locked;
942     }
943     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
944         retval = 0;
945     }
946     else {
947         KMP_MB();
948         retval = lck->lk.depth_locked = 1;
949         KMP_MB();
950         lck->lk.owner_id = gtid + 1;
951     }
952     return retval;
953 }
954 
955 static int
956 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
957   kmp_int32 gtid )
958 {
959     char const * const func = "omp_test_nest_lock";
960     if ( lck->lk.initialized != lck ) {
961         KMP_FATAL( LockIsUninitialized, func );
962     }
963     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
964         KMP_FATAL( LockSimpleUsedAsNestable, func );
965     }
966     return __kmp_test_nested_ticket_lock( lck, gtid );
967 }
968 
969 void
970 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
971 {
972     KMP_DEBUG_ASSERT( gtid >= 0 );
973 
974     KMP_MB();
975     if ( --(lck->lk.depth_locked) == 0 ) {
976         KMP_MB();
977         lck->lk.owner_id = 0;
978         __kmp_release_ticket_lock( lck, gtid );
979     }
980 }
981 
982 static void
983 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
984 {
985     char const * const func = "omp_unset_nest_lock";
986     KMP_MB();  /* in case another processor initialized lock */
987     if ( lck->lk.initialized != lck ) {
988         KMP_FATAL( LockIsUninitialized, func );
989     }
990     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
991         KMP_FATAL( LockSimpleUsedAsNestable, func );
992     }
993     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
994         KMP_FATAL( LockUnsettingFree, func );
995     }
996     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
997         KMP_FATAL( LockUnsettingSetByAnother, func );
998     }
999     __kmp_release_nested_ticket_lock( lck, gtid );
1000 }
1001 
1002 void
1003 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1004 {
1005     __kmp_init_ticket_lock( lck );
1006     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1007 }
1008 
1009 static void
1010 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1011 {
1012     __kmp_init_nested_ticket_lock( lck );
1013 }
1014 
1015 void
1016 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1017 {
1018     __kmp_destroy_ticket_lock( lck );
1019     lck->lk.depth_locked = 0;
1020 }
1021 
1022 static void
1023 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1024 {
1025     char const * const func = "omp_destroy_nest_lock";
1026     if ( lck->lk.initialized != lck ) {
1027         KMP_FATAL( LockIsUninitialized, func );
1028     }
1029     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1030         KMP_FATAL( LockSimpleUsedAsNestable, func );
1031     }
1032     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1033         KMP_FATAL( LockStillOwned, func );
1034     }
1035     __kmp_destroy_nested_ticket_lock( lck );
1036 }
1037 
1038 
1039 //
1040 // access functions to fields which don't exist for all lock kinds.
1041 //
1042 
1043 static int
1044 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1045 {
1046     return lck == lck->lk.initialized;
1047 }
1048 
1049 static const ident_t *
1050 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1051 {
1052     return lck->lk.location;
1053 }
1054 
1055 static void
1056 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1057 {
1058     lck->lk.location = loc;
1059 }
1060 
1061 static kmp_lock_flags_t
1062 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1063 {
1064     return lck->lk.flags;
1065 }
1066 
1067 static void
1068 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1069 {
1070     lck->lk.flags = flags;
1071 }
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* queuing locks */
1075 
1076 /*
1077  * First the states
1078  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1079  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1080  *                h, h  means lock is held or about to transition, 1 element on queue
1081  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1082  *
1083  * Now the transitions
1084  *    Acquire(0,0)  = -1 ,0
1085  *    Release(0,0)  = Error
1086  *    Acquire(-1,0) =  h ,h    h > 0
1087  *    Release(-1,0) =  0 ,0
1088  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1089  *    Release(h,h)  = -1 ,0    h > 0
1090  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1091  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1092  *
1093  * And pictorially
1094  *
1095  *
1096  *          +-----+
1097  *          | 0, 0|------- release -------> Error
1098  *          +-----+
1099  *            |  ^
1100  *     acquire|  |release
1101  *            |  |
1102  *            |  |
1103  *            v  |
1104  *          +-----+
1105  *          |-1, 0|
1106  *          +-----+
1107  *            |  ^
1108  *     acquire|  |release
1109  *            |  |
1110  *            |  |
1111  *            v  |
1112  *          +-----+
1113  *          | h, h|
1114  *          +-----+
1115  *            |  ^
1116  *     acquire|  |release
1117  *            |  |
1118  *            |  |
1119  *            v  |
1120  *          +-----+
1121  *          | h, t|----- acquire, release loopback ---+
1122  *          +-----+                                   |
1123  *               ^                                    |
1124  *               |                                    |
1125  *               +------------------------------------+
1126  *
1127  */
1128 
1129 #ifdef DEBUG_QUEUING_LOCKS
1130 
1131 /* Stuff for circular trace buffer */
1132 #define TRACE_BUF_ELE	1024
1133 static char traces[TRACE_BUF_ELE][128] = { 0 }
1134 static int tc = 0;
1135 #define TRACE_LOCK(X,Y)          sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1136 #define TRACE_LOCK_T(X,Y,Z)      sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1137 #define TRACE_LOCK_HT(X,Y,Z,Q)   sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1138 
1139 static void
1140 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1141   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1142 {
1143     kmp_int32 t, i;
1144 
1145     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1146 
1147     i = tc % TRACE_BUF_ELE;
1148     __kmp_printf_no_lock( "%s\n", traces[i] );
1149     i = (i+1) % TRACE_BUF_ELE;
1150     while ( i != (tc % TRACE_BUF_ELE) ) {
1151         __kmp_printf_no_lock( "%s", traces[i] );
1152         i = (i+1) % TRACE_BUF_ELE;
1153     }
1154     __kmp_printf_no_lock( "\n" );
1155 
1156     __kmp_printf_no_lock(
1157              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1158              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1159              head_id, tail_id );
1160 
1161     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1162 
1163     if ( lck->lk.head_id >= 1 ) {
1164         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1165         while (t > 0) {
1166             __kmp_printf_no_lock( "-> %d ", t );
1167             t = __kmp_threads[t-1]->th.th_next_waiting;
1168         }
1169     }
1170     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1171     __kmp_printf_no_lock( "\n\n" );
1172 }
1173 
1174 #endif /* DEBUG_QUEUING_LOCKS */
1175 
1176 static kmp_int32
1177 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1178 {
1179     return TCR_4( lck->lk.owner_id ) - 1;
1180 }
1181 
1182 static inline bool
1183 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1184 {
1185     return lck->lk.depth_locked != -1;
1186 }
1187 
1188 /* Acquire a lock using a the queuing lock implementation */
1189 template <bool takeTime>
1190 /* [TLW] The unused template above is left behind because of what BEB believes is a
1191    potential compiler problem with __forceinline. */
1192 __forceinline static void
1193 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1194   kmp_int32 gtid )
1195 {
1196     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1197     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1198     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1199     volatile kmp_uint32 *spin_here_p;
1200     kmp_int32 need_mf = 1;
1201 
1202     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1203 
1204     KMP_FSYNC_PREPARE( lck );
1205     KMP_DEBUG_ASSERT( this_thr != NULL );
1206     spin_here_p = & this_thr->th.th_spin_here;
1207 
1208 #ifdef DEBUG_QUEUING_LOCKS
1209     TRACE_LOCK( gtid+1, "acq ent" );
1210     if ( *spin_here_p )
1211         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1212     if ( this_thr->th.th_next_waiting != 0 )
1213         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1214 #endif
1215     KMP_DEBUG_ASSERT( !*spin_here_p );
1216     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1217 
1218 
1219     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1220        that may follow, not just in execution order, but also in visibility order.  This way,
1221        when a releasing thread observes the changes to the queue by this thread, it can
1222        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1223        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1224        to FALSE before this thread sets it to TRUE, this thread will hang.
1225     */
1226     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1227 
1228     while( 1 ) {
1229         kmp_int32 enqueued;
1230         kmp_int32 head;
1231         kmp_int32 tail;
1232 
1233         head = *head_id_p;
1234 
1235         switch ( head ) {
1236 
1237             case -1:
1238             {
1239 #ifdef DEBUG_QUEUING_LOCKS
1240                 tail = *tail_id_p;
1241                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1242 #endif
1243                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1244                            this assignment prevents us from entering the if ( t > 0 )
1245                            condition in the enqueued case below, which is not necessary for
1246                            this state transition */
1247 
1248                 need_mf = 0;
1249                 /* try (-1,0)->(tid,tid) */
1250                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1251                   KMP_PACK_64( -1, 0 ),
1252                   KMP_PACK_64( gtid+1, gtid+1 ) );
1253 #ifdef DEBUG_QUEUING_LOCKS
1254                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1255 #endif
1256             }
1257             break;
1258 
1259             default:
1260             {
1261                 tail = *tail_id_p;
1262                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1263 
1264 #ifdef DEBUG_QUEUING_LOCKS
1265                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1266 #endif
1267 
1268                 if ( tail == 0 ) {
1269                     enqueued = FALSE;
1270                 }
1271                 else {
1272                     need_mf = 0;
1273                     /* try (h,t) or (h,h)->(h,tid) */
1274                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1275 
1276 #ifdef DEBUG_QUEUING_LOCKS
1277                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1278 #endif
1279                 }
1280             }
1281             break;
1282 
1283             case 0: /* empty queue */
1284             {
1285                 kmp_int32 grabbed_lock;
1286 
1287 #ifdef DEBUG_QUEUING_LOCKS
1288                 tail = *tail_id_p;
1289                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1290 #endif
1291                 /* try (0,0)->(-1,0) */
1292 
1293                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1294                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1295 
1296                 if ( grabbed_lock ) {
1297 
1298                     *spin_here_p = FALSE;
1299 
1300                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1301                               lck, gtid ));
1302 #ifdef DEBUG_QUEUING_LOCKS
1303                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1304 #endif
1305                     KMP_FSYNC_ACQUIRED( lck );
1306                     return; /* lock holder cannot be on queue */
1307                 }
1308                 enqueued = FALSE;
1309             }
1310             break;
1311         }
1312 
1313         if ( enqueued ) {
1314             if ( tail > 0 ) {
1315                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1316                 KMP_ASSERT( tail_thr != NULL );
1317                 tail_thr->th.th_next_waiting = gtid+1;
1318                 /* corresponding wait for this write in release code */
1319             }
1320             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1321 
1322 
1323             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1324              *       throughput only here.
1325              */
1326             KMP_MB();
1327             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1328 
1329 #ifdef DEBUG_QUEUING_LOCKS
1330             TRACE_LOCK( gtid+1, "acq spin" );
1331 
1332             if ( this_thr->th.th_next_waiting != 0 )
1333                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1334 #endif
1335             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1336             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1337                       lck, gtid ));
1338 
1339 #ifdef DEBUG_QUEUING_LOCKS
1340             TRACE_LOCK( gtid+1, "acq exit 2" );
1341 #endif
1342             /* got lock, we were dequeued by the thread that released lock */
1343             return;
1344         }
1345 
1346         /* Yield if number of threads > number of logical processors */
1347         /* ToDo: Not sure why this should only be in oversubscription case,
1348            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1349         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1350           __kmp_xproc ) );
1351 #ifdef DEBUG_QUEUING_LOCKS
1352         TRACE_LOCK( gtid+1, "acq retry" );
1353 #endif
1354 
1355     }
1356     KMP_ASSERT2( 0, "should not get here" );
1357 }
1358 
1359 void
1360 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1361 {
1362     KMP_DEBUG_ASSERT( gtid >= 0 );
1363 
1364     __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1365 }
1366 
1367 static void
1368 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1369   kmp_int32 gtid )
1370 {
1371     char const * const func = "omp_set_lock";
1372     if ( lck->lk.initialized != lck ) {
1373         KMP_FATAL( LockIsUninitialized, func );
1374     }
1375     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1376         KMP_FATAL( LockNestableUsedAsSimple, func );
1377     }
1378     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1379         KMP_FATAL( LockIsAlreadyOwned, func );
1380     }
1381 
1382     __kmp_acquire_queuing_lock( lck, gtid );
1383 
1384     lck->lk.owner_id = gtid + 1;
1385 }
1386 
1387 int
1388 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1389 {
1390     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1391     kmp_int32 head;
1392 #ifdef KMP_DEBUG
1393     kmp_info_t *this_thr;
1394 #endif
1395 
1396     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1397     KMP_DEBUG_ASSERT( gtid >= 0 );
1398 #ifdef KMP_DEBUG
1399     this_thr = __kmp_thread_from_gtid( gtid );
1400     KMP_DEBUG_ASSERT( this_thr != NULL );
1401     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1402 #endif
1403 
1404     head = *head_id_p;
1405 
1406     if ( head == 0 ) { /* nobody on queue, nobody holding */
1407 
1408         /* try (0,0)->(-1,0) */
1409 
1410         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1411             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1412             KMP_FSYNC_ACQUIRED(lck);
1413             return TRUE;
1414         }
1415     }
1416 
1417     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1418     return FALSE;
1419 }
1420 
1421 static int
1422 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1423 {
1424     char const * const func = "omp_test_lock";
1425     if ( lck->lk.initialized != lck ) {
1426         KMP_FATAL( LockIsUninitialized, func );
1427     }
1428     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1429         KMP_FATAL( LockNestableUsedAsSimple, func );
1430     }
1431 
1432     int retval = __kmp_test_queuing_lock( lck, gtid );
1433 
1434     if ( retval ) {
1435         lck->lk.owner_id = gtid + 1;
1436     }
1437     return retval;
1438 }
1439 
1440 void
1441 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1442 {
1443     register kmp_info_t *this_thr;
1444     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1445     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1446 
1447     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1448     KMP_DEBUG_ASSERT( gtid >= 0 );
1449     this_thr    = __kmp_thread_from_gtid( gtid );
1450     KMP_DEBUG_ASSERT( this_thr != NULL );
1451 #ifdef DEBUG_QUEUING_LOCKS
1452     TRACE_LOCK( gtid+1, "rel ent" );
1453 
1454     if ( this_thr->th.th_spin_here )
1455         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1456     if ( this_thr->th.th_next_waiting != 0 )
1457         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1458 #endif
1459     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1460     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1461 
1462     KMP_FSYNC_RELEASING(lck);
1463 
1464     while( 1 ) {
1465         kmp_int32 dequeued;
1466         kmp_int32 head;
1467         kmp_int32 tail;
1468 
1469         head = *head_id_p;
1470 
1471 #ifdef DEBUG_QUEUING_LOCKS
1472         tail = *tail_id_p;
1473         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1474         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1475 #endif
1476         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1477 
1478         if ( head == -1 ) { /* nobody on queue */
1479 
1480             /* try (-1,0)->(0,0) */
1481             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1482                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1483                           lck, gtid ));
1484 #ifdef DEBUG_QUEUING_LOCKS
1485                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1486 #endif
1487                 return;
1488             }
1489             dequeued = FALSE;
1490 
1491         }
1492         else {
1493 
1494             tail = *tail_id_p;
1495             if ( head == tail ) {  /* only one thread on the queue */
1496 
1497 #ifdef DEBUG_QUEUING_LOCKS
1498                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1499 #endif
1500                 KMP_DEBUG_ASSERT( head > 0 );
1501 
1502                 /* try (h,h)->(-1,0) */
1503                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1504                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1505 #ifdef DEBUG_QUEUING_LOCKS
1506                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1507 #endif
1508 
1509             }
1510             else {
1511                 volatile kmp_int32 *waiting_id_p;
1512                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1513                 KMP_DEBUG_ASSERT( head_thr != NULL );
1514                 waiting_id_p = & head_thr->th.th_next_waiting;
1515 
1516                 /* Does this require synchronous reads? */
1517 #ifdef DEBUG_QUEUING_LOCKS
1518                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1519 #endif
1520                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1521 
1522                 /* try (h,t)->(h',t) or (t,t) */
1523 
1524                 KMP_MB();
1525                 /* make sure enqueuing thread has time to update next waiting thread field */
1526                 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1527 #ifdef DEBUG_QUEUING_LOCKS
1528                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1529 #endif
1530                 dequeued = TRUE;
1531             }
1532         }
1533 
1534         if ( dequeued ) {
1535             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1536             KMP_DEBUG_ASSERT( head_thr != NULL );
1537 
1538             /* Does this require synchronous reads? */
1539 #ifdef DEBUG_QUEUING_LOCKS
1540             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1541 #endif
1542             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1543 
1544             /* For clean code only.
1545              * Thread not released until next statement prevents race with acquire code.
1546              */
1547             head_thr->th.th_next_waiting = 0;
1548 #ifdef DEBUG_QUEUING_LOCKS
1549             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1550 #endif
1551 
1552             KMP_MB();
1553             /* reset spin value */
1554             head_thr->th.th_spin_here = FALSE;
1555 
1556             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1557                       lck, gtid ));
1558 #ifdef DEBUG_QUEUING_LOCKS
1559             TRACE_LOCK( gtid+1, "rel exit 2" );
1560 #endif
1561             return;
1562         }
1563         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1564 
1565 #ifdef DEBUG_QUEUING_LOCKS
1566         TRACE_LOCK( gtid+1, "rel retry" );
1567 #endif
1568 
1569     } /* while */
1570     KMP_ASSERT2( 0, "should not get here" );
1571 }
1572 
1573 static void
1574 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1575   kmp_int32 gtid )
1576 {
1577     char const * const func = "omp_unset_lock";
1578     KMP_MB();  /* in case another processor initialized lock */
1579     if ( lck->lk.initialized != lck ) {
1580         KMP_FATAL( LockIsUninitialized, func );
1581     }
1582     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1583         KMP_FATAL( LockNestableUsedAsSimple, func );
1584     }
1585     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1586         KMP_FATAL( LockUnsettingFree, func );
1587     }
1588     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1589         KMP_FATAL( LockUnsettingSetByAnother, func );
1590     }
1591     lck->lk.owner_id = 0;
1592     __kmp_release_queuing_lock( lck, gtid );
1593 }
1594 
1595 void
1596 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1597 {
1598     lck->lk.location = NULL;
1599     lck->lk.head_id = 0;
1600     lck->lk.tail_id = 0;
1601     lck->lk.next_ticket = 0;
1602     lck->lk.now_serving = 0;
1603     lck->lk.owner_id = 0;      // no thread owns the lock.
1604     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1605     lck->lk.initialized = lck;
1606 
1607     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1608 }
1609 
1610 static void
1611 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1612 {
1613     __kmp_init_queuing_lock( lck );
1614 }
1615 
1616 void
1617 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1618 {
1619     lck->lk.initialized = NULL;
1620     lck->lk.location = NULL;
1621     lck->lk.head_id = 0;
1622     lck->lk.tail_id = 0;
1623     lck->lk.next_ticket = 0;
1624     lck->lk.now_serving = 0;
1625     lck->lk.owner_id = 0;
1626     lck->lk.depth_locked = -1;
1627 }
1628 
1629 static void
1630 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1631 {
1632     char const * const func = "omp_destroy_lock";
1633     if ( lck->lk.initialized != lck ) {
1634         KMP_FATAL( LockIsUninitialized, func );
1635     }
1636     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1637         KMP_FATAL( LockNestableUsedAsSimple, func );
1638     }
1639     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1640         KMP_FATAL( LockStillOwned, func );
1641     }
1642     __kmp_destroy_queuing_lock( lck );
1643 }
1644 
1645 
1646 //
1647 // nested queuing locks
1648 //
1649 
1650 void
1651 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1652 {
1653     KMP_DEBUG_ASSERT( gtid >= 0 );
1654 
1655     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1656         lck->lk.depth_locked += 1;
1657     }
1658     else {
1659         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1660         KMP_MB();
1661         lck->lk.depth_locked = 1;
1662         KMP_MB();
1663         lck->lk.owner_id = gtid + 1;
1664     }
1665 }
1666 
1667 static void
1668 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1669 {
1670     char const * const func = "omp_set_nest_lock";
1671     if ( lck->lk.initialized != lck ) {
1672         KMP_FATAL( LockIsUninitialized, func );
1673     }
1674     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1675         KMP_FATAL( LockSimpleUsedAsNestable, func );
1676     }
1677     __kmp_acquire_nested_queuing_lock( lck, gtid );
1678 }
1679 
1680 int
1681 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1682 {
1683     int retval;
1684 
1685     KMP_DEBUG_ASSERT( gtid >= 0 );
1686 
1687     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1688         retval = ++lck->lk.depth_locked;
1689     }
1690     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1691         retval = 0;
1692     }
1693     else {
1694         KMP_MB();
1695         retval = lck->lk.depth_locked = 1;
1696         KMP_MB();
1697         lck->lk.owner_id = gtid + 1;
1698     }
1699     return retval;
1700 }
1701 
1702 static int
1703 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1704   kmp_int32 gtid )
1705 {
1706     char const * const func = "omp_test_nest_lock";
1707     if ( lck->lk.initialized != lck ) {
1708         KMP_FATAL( LockIsUninitialized, func );
1709     }
1710     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1711         KMP_FATAL( LockSimpleUsedAsNestable, func );
1712     }
1713     return __kmp_test_nested_queuing_lock( lck, gtid );
1714 }
1715 
1716 void
1717 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1718 {
1719     KMP_DEBUG_ASSERT( gtid >= 0 );
1720 
1721     KMP_MB();
1722     if ( --(lck->lk.depth_locked) == 0 ) {
1723         KMP_MB();
1724         lck->lk.owner_id = 0;
1725         __kmp_release_queuing_lock( lck, gtid );
1726     }
1727 }
1728 
1729 static void
1730 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1731 {
1732     char const * const func = "omp_unset_nest_lock";
1733     KMP_MB();  /* in case another processor initialized lock */
1734     if ( lck->lk.initialized != lck ) {
1735         KMP_FATAL( LockIsUninitialized, func );
1736     }
1737     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1738         KMP_FATAL( LockSimpleUsedAsNestable, func );
1739     }
1740     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1741         KMP_FATAL( LockUnsettingFree, func );
1742     }
1743     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1744         KMP_FATAL( LockUnsettingSetByAnother, func );
1745     }
1746     __kmp_release_nested_queuing_lock( lck, gtid );
1747 }
1748 
1749 void
1750 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1751 {
1752     __kmp_init_queuing_lock( lck );
1753     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1754 }
1755 
1756 static void
1757 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1758 {
1759     __kmp_init_nested_queuing_lock( lck );
1760 }
1761 
1762 void
1763 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1764 {
1765     __kmp_destroy_queuing_lock( lck );
1766     lck->lk.depth_locked = 0;
1767 }
1768 
1769 static void
1770 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1771 {
1772     char const * const func = "omp_destroy_nest_lock";
1773     if ( lck->lk.initialized != lck ) {
1774         KMP_FATAL( LockIsUninitialized, func );
1775     }
1776     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1777         KMP_FATAL( LockSimpleUsedAsNestable, func );
1778     }
1779     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1780         KMP_FATAL( LockStillOwned, func );
1781     }
1782     __kmp_destroy_nested_queuing_lock( lck );
1783 }
1784 
1785 
1786 //
1787 // access functions to fields which don't exist for all lock kinds.
1788 //
1789 
1790 static int
1791 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1792 {
1793     return lck == lck->lk.initialized;
1794 }
1795 
1796 static const ident_t *
1797 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1798 {
1799     return lck->lk.location;
1800 }
1801 
1802 static void
1803 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1804 {
1805     lck->lk.location = loc;
1806 }
1807 
1808 static kmp_lock_flags_t
1809 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1810 {
1811     return lck->lk.flags;
1812 }
1813 
1814 static void
1815 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1816 {
1817     lck->lk.flags = flags;
1818 }
1819 
1820 #if KMP_USE_ADAPTIVE_LOCKS
1821 
1822 /*
1823     RTM Adaptive locks
1824 */
1825 
1826 // TODO: Use the header for intrinsics below with the compiler 13.0
1827 //#include <immintrin.h>
1828 
1829 // Values from the status register after failed speculation.
1830 #define _XBEGIN_STARTED          (~0u)
1831 #define _XABORT_EXPLICIT         (1 << 0)
1832 #define _XABORT_RETRY            (1 << 1)
1833 #define _XABORT_CONFLICT         (1 << 2)
1834 #define _XABORT_CAPACITY         (1 << 3)
1835 #define _XABORT_DEBUG            (1 << 4)
1836 #define _XABORT_NESTED           (1 << 5)
1837 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1838 
1839 // Aborts for which it's worth trying again immediately
1840 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1841 
1842 #define STRINGIZE_INTERNAL(arg) #arg
1843 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1844 
1845 // Access to RTM instructions
1846 
1847 /*
1848   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1849   This is the same definition as the compiler intrinsic that will be supported at some point.
1850 */
1851 static __inline int _xbegin()
1852 {
1853     int res = -1;
1854 
1855 #if KMP_OS_WINDOWS
1856 #if KMP_ARCH_X86_64
1857     _asm {
1858         _emit 0xC7
1859         _emit 0xF8
1860         _emit 2
1861         _emit 0
1862         _emit 0
1863         _emit 0
1864         jmp   L2
1865         mov   res, eax
1866     L2:
1867     }
1868 #else /* IA32 */
1869     _asm {
1870         _emit 0xC7
1871         _emit 0xF8
1872         _emit 2
1873         _emit 0
1874         _emit 0
1875         _emit 0
1876         jmp   L2
1877         mov   res, eax
1878     L2:
1879     }
1880 #endif // KMP_ARCH_X86_64
1881 #else
1882     /* Note that %eax must be noted as killed (clobbered), because
1883      * the XSR is returned in %eax(%rax) on abort.  Other register
1884      * values are restored, so don't need to be killed.
1885      *
1886      * We must also mark 'res' as an input and an output, since otherwise
1887      * 'res=-1' may be dropped as being dead, whereas we do need the
1888      * assignment on the successful (i.e., non-abort) path.
1889      */
1890     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1891                       "   .long  1f-1b-6\n"
1892                       "    jmp   2f\n"
1893                       "1:  movl  %%eax,%0\n"
1894                       "2:"
1895                       :"+r"(res)::"memory","%eax");
1896 #endif // KMP_OS_WINDOWS
1897     return res;
1898 }
1899 
1900 /*
1901   Transaction end
1902 */
1903 static __inline void _xend()
1904 {
1905 #if KMP_OS_WINDOWS
1906     __asm  {
1907         _emit 0x0f
1908         _emit 0x01
1909         _emit 0xd5
1910     }
1911 #else
1912     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1913 #endif
1914 }
1915 
1916 /*
1917   This is a macro, the argument must be a single byte constant which
1918   can be evaluated by the inline assembler, since it is emitted as a
1919   byte into the assembly code.
1920 */
1921 #if KMP_OS_WINDOWS
1922 #define _xabort(ARG)                            \
1923     _asm _emit 0xc6                             \
1924     _asm _emit 0xf8                             \
1925     _asm _emit ARG
1926 #else
1927 #define _xabort(ARG) \
1928     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1929 #endif
1930 
1931 //
1932 //    Statistics is collected for testing purpose
1933 //
1934 #if KMP_DEBUG_ADAPTIVE_LOCKS
1935 
1936 // We accumulate speculative lock statistics when the lock is destroyed.
1937 // We keep locks that haven't been destroyed in the liveLocks list
1938 // so that we can grab their statistics too.
1939 static kmp_adaptive_lock_statistics_t destroyedStats;
1940 
1941 // To hold the list of live locks.
1942 static kmp_adaptive_lock_info_t liveLocks;
1943 
1944 // A lock so we can safely update the list of locks.
1945 static kmp_bootstrap_lock_t chain_lock;
1946 
1947 // Initialize the list of stats.
1948 void
1949 __kmp_init_speculative_stats()
1950 {
1951     kmp_adaptive_lock_info_t *lck = &liveLocks;
1952 
1953     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1954     lck->stats.next = lck;
1955     lck->stats.prev = lck;
1956 
1957     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1958     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1959 
1960     __kmp_init_bootstrap_lock( &chain_lock );
1961 
1962 }
1963 
1964 // Insert the lock into the circular list
1965 static void
1966 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
1967 {
1968     __kmp_acquire_bootstrap_lock( &chain_lock );
1969 
1970     lck->stats.next = liveLocks.stats.next;
1971     lck->stats.prev = &liveLocks;
1972 
1973     liveLocks.stats.next = lck;
1974     lck->stats.next->stats.prev  = lck;
1975 
1976     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1977     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1978 
1979     __kmp_release_bootstrap_lock( &chain_lock );
1980 }
1981 
1982 static void
1983 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
1984 {
1985     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1986     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1987 
1988     kmp_adaptive_lock_info_t * n = lck->stats.next;
1989     kmp_adaptive_lock_info_t * p = lck->stats.prev;
1990 
1991     n->stats.prev = p;
1992     p->stats.next = n;
1993 }
1994 
1995 static void
1996 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
1997 {
1998     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
1999     __kmp_remember_lock( lck );
2000 }
2001 
2002 static void
2003 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2004 {
2005     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2006 
2007     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2008     t->successfulSpeculations += s->successfulSpeculations;
2009     t->hardFailedSpeculations += s->hardFailedSpeculations;
2010     t->softFailedSpeculations += s->softFailedSpeculations;
2011     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2012     t->lemmingYields          += s->lemmingYields;
2013 }
2014 
2015 static void
2016 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2017 {
2018     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2019 
2020     __kmp_acquire_bootstrap_lock( &chain_lock );
2021 
2022     __kmp_add_stats( &destroyedStats, lck );
2023     __kmp_forget_lock( lck );
2024 
2025     __kmp_release_bootstrap_lock( &chain_lock );
2026 }
2027 
2028 static float
2029 percent (kmp_uint32 count, kmp_uint32 total)
2030 {
2031     return (total == 0) ? 0.0: (100.0 * count)/total;
2032 }
2033 
2034 static
2035 FILE * __kmp_open_stats_file()
2036 {
2037     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2038         return stdout;
2039 
2040     size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2041     char buffer[buffLen];
2042     snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile,
2043       (kmp_int32)getpid());
2044     FILE * result = fopen(&buffer[0], "w");
2045 
2046     // Maybe we should issue a warning here...
2047     return result ? result : stdout;
2048 }
2049 
2050 void
2051 __kmp_print_speculative_stats()
2052 {
2053     if (__kmp_user_lock_kind != lk_adaptive)
2054         return;
2055 
2056     FILE * statsFile = __kmp_open_stats_file();
2057 
2058     kmp_adaptive_lock_statistics_t total = destroyedStats;
2059     kmp_adaptive_lock_info_t *lck;
2060 
2061     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2062         __kmp_add_stats( &total, lck );
2063     }
2064     kmp_adaptive_lock_statistics_t *t = &total;
2065     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2066     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2067                                    t->softFailedSpeculations;
2068 
2069     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2070     fprintf ( statsFile, " Lock parameters: \n"
2071              "   max_soft_retries               : %10d\n"
2072              "   max_badness                    : %10d\n",
2073              __kmp_adaptive_backoff_params.max_soft_retries,
2074              __kmp_adaptive_backoff_params.max_badness);
2075     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2076     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2077     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2078              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2079     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2080              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2081     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2082 
2083     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2084     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2085              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2086     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2087              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2088     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2089              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2090 
2091     if (statsFile != stdout)
2092         fclose( statsFile );
2093 }
2094 
2095 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2096 #else
2097 # define KMP_INC_STAT(lck,stat)
2098 
2099 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2100 
2101 static inline bool
2102 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2103 {
2104     // It is enough to check that the head_id is zero.
2105     // We don't also need to check the tail.
2106     bool res = lck->lk.head_id == 0;
2107 
2108     // We need a fence here, since we must ensure that no memory operations
2109     // from later in this thread float above that read.
2110 #if KMP_COMPILER_ICC
2111     _mm_mfence();
2112 #else
2113     __sync_synchronize();
2114 #endif
2115 
2116     return res;
2117 }
2118 
2119 // Functions for manipulating the badness
2120 static __inline void
2121 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2122 {
2123     // Reset the badness to zero so we eagerly try to speculate again
2124     lck->lk.adaptive.badness = 0;
2125     KMP_INC_STAT(lck,successfulSpeculations);
2126 }
2127 
2128 // Create a bit mask with one more set bit.
2129 static __inline void
2130 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2131 {
2132     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2133     if ( newBadness > lck->lk.adaptive.max_badness) {
2134         return;
2135     } else {
2136         lck->lk.adaptive.badness = newBadness;
2137     }
2138 }
2139 
2140 // Check whether speculation should be attempted.
2141 static __inline int
2142 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2143 {
2144     kmp_uint32 badness = lck->lk.adaptive.badness;
2145     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2146     int res = (attempts & badness) == 0;
2147     return res;
2148 }
2149 
2150 // Attempt to acquire only the speculative lock.
2151 // Does not back off to the non-speculative lock.
2152 //
2153 static int
2154 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2155 {
2156     int retries = lck->lk.adaptive.max_soft_retries;
2157 
2158     // We don't explicitly count the start of speculation, rather we record
2159     // the results (success, hard fail, soft fail). The sum of all of those
2160     // is the total number of times we started speculation since all
2161     // speculations must end one of those ways.
2162     do
2163     {
2164         kmp_uint32 status = _xbegin();
2165         // Switch this in to disable actual speculation but exercise
2166         // at least some of the rest of the code. Useful for debugging...
2167         // kmp_uint32 status = _XABORT_NESTED;
2168 
2169         if (status == _XBEGIN_STARTED )
2170         { /* We have successfully started speculation
2171            * Check that no-one acquired the lock for real between when we last looked
2172            * and now. This also gets the lock cache line into our read-set,
2173            * which we need so that we'll abort if anyone later claims it for real.
2174            */
2175             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2176             {
2177                 // Lock is now visibly acquired, so someone beat us to it.
2178                 // Abort the transaction so we'll restart from _xbegin with the
2179                 // failure status.
2180                 _xabort(0x01)
2181                 KMP_ASSERT2( 0, "should not get here" );
2182             }
2183             return 1;   // Lock has been acquired (speculatively)
2184         } else {
2185             // We have aborted, update the statistics
2186             if ( status & SOFT_ABORT_MASK)
2187             {
2188                 KMP_INC_STAT(lck,softFailedSpeculations);
2189                 // and loop round to retry.
2190             }
2191             else
2192             {
2193                 KMP_INC_STAT(lck,hardFailedSpeculations);
2194                 // Give up if we had a hard failure.
2195                 break;
2196             }
2197         }
2198     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2199 
2200     // Either we had a hard failure or we didn't succeed softly after
2201     // the full set of attempts, so back off the badness.
2202     __kmp_step_badness( lck );
2203     return 0;
2204 }
2205 
2206 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2207 // if the speculative lock cannot be acquired.
2208 // We can succeed speculatively, non-speculatively, or fail.
2209 static int
2210 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2211 {
2212     // First try to acquire the lock speculatively
2213     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2214         return 1;
2215 
2216     // Speculative acquisition failed, so try to acquire it non-speculatively.
2217     // Count the non-speculative acquire attempt
2218     lck->lk.adaptive.acquire_attempts++;
2219 
2220     // Use base, non-speculative lock.
2221     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2222     {
2223         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2224         return 1;       // Lock is acquired (non-speculatively)
2225     }
2226     else
2227     {
2228         return 0;       // Failed to acquire the lock, it's already visibly locked.
2229     }
2230 }
2231 
2232 static int
2233 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2234 {
2235     char const * const func = "omp_test_lock";
2236     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2237         KMP_FATAL( LockIsUninitialized, func );
2238     }
2239 
2240     int retval = __kmp_test_adaptive_lock( lck, gtid );
2241 
2242     if ( retval ) {
2243         lck->lk.qlk.owner_id = gtid + 1;
2244     }
2245     return retval;
2246 }
2247 
2248 // Block until we can acquire a speculative, adaptive lock.
2249 // We check whether we should be trying to speculate.
2250 // If we should be, we check the real lock to see if it is free,
2251 // and, if not, pause without attempting to acquire it until it is.
2252 // Then we try the speculative acquire.
2253 // This means that although we suffer from lemmings a little (
2254 // because all we can't acquire the lock speculatively until
2255 // the queue of threads waiting has cleared), we don't get into a
2256 // state where we can never acquire the lock speculatively (because we
2257 // force the queue to clear by preventing new arrivals from entering the
2258 // queue).
2259 // This does mean that when we're trying to break lemmings, the lock
2260 // is no longer fair. However OpenMP makes no guarantee that its
2261 // locks are fair, so this isn't a real problem.
2262 static void
2263 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2264 {
2265     if ( __kmp_should_speculate( lck, gtid ) )
2266     {
2267         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2268         {
2269             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2270                 return;
2271             // We tried speculation and failed, so give up.
2272         }
2273         else
2274         {
2275             // We can't try speculation until the lock is free, so we
2276             // pause here (without suspending on the queueing lock,
2277             // to allow it to drain, then try again.
2278             // All other threads will also see the same result for
2279             // shouldSpeculate, so will be doing the same if they
2280             // try to claim the lock from now on.
2281             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2282             {
2283                 KMP_INC_STAT(lck,lemmingYields);
2284                 __kmp_yield (TRUE);
2285             }
2286 
2287             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2288                 return;
2289         }
2290     }
2291 
2292     // Speculative acquisition failed, so acquire it non-speculatively.
2293     // Count the non-speculative acquire attempt
2294     lck->lk.adaptive.acquire_attempts++;
2295 
2296     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2297     // We have acquired the base lock, so count that.
2298     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2299 }
2300 
2301 static void
2302 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2303 {
2304     char const * const func = "omp_set_lock";
2305     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2306         KMP_FATAL( LockIsUninitialized, func );
2307     }
2308     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2309         KMP_FATAL( LockIsAlreadyOwned, func );
2310     }
2311 
2312     __kmp_acquire_adaptive_lock( lck, gtid );
2313 
2314     lck->lk.qlk.owner_id = gtid + 1;
2315 }
2316 
2317 static void
2318 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2319 {
2320     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2321     {   // If the lock doesn't look claimed we must be speculating.
2322         // (Or the user's code is buggy and they're releasing without locking;
2323         // if we had XTEST we'd be able to check that case...)
2324         _xend();        // Exit speculation
2325         __kmp_update_badness_after_success( lck );
2326     }
2327     else
2328     {   // Since the lock *is* visibly locked we're not speculating,
2329         // so should use the underlying lock's release scheme.
2330         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2331     }
2332 }
2333 
2334 static void
2335 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2336 {
2337     char const * const func = "omp_unset_lock";
2338     KMP_MB();  /* in case another processor initialized lock */
2339     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2340         KMP_FATAL( LockIsUninitialized, func );
2341     }
2342     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2343         KMP_FATAL( LockUnsettingFree, func );
2344     }
2345     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2346         KMP_FATAL( LockUnsettingSetByAnother, func );
2347     }
2348     lck->lk.qlk.owner_id = 0;
2349     __kmp_release_adaptive_lock( lck, gtid );
2350 }
2351 
2352 static void
2353 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2354 {
2355     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2356     lck->lk.adaptive.badness = 0;
2357     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2358     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2359     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2360 #if KMP_DEBUG_ADAPTIVE_LOCKS
2361     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2362 #endif
2363     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2364 }
2365 
2366 static void
2367 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2368 {
2369     __kmp_init_adaptive_lock( lck );
2370 }
2371 
2372 static void
2373 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2374 {
2375 #if KMP_DEBUG_ADAPTIVE_LOCKS
2376     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2377 #endif
2378     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2379     // Nothing needed for the speculative part.
2380 }
2381 
2382 static void
2383 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2384 {
2385     char const * const func = "omp_destroy_lock";
2386     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2387         KMP_FATAL( LockIsUninitialized, func );
2388     }
2389     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2390         KMP_FATAL( LockStillOwned, func );
2391     }
2392     __kmp_destroy_adaptive_lock( lck );
2393 }
2394 
2395 
2396 #endif // KMP_USE_ADAPTIVE_LOCKS
2397 
2398 
2399 /* ------------------------------------------------------------------------ */
2400 /* DRDPA ticket locks                                                */
2401 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2402 
2403 static kmp_int32
2404 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2405 {
2406     return TCR_4( lck->lk.owner_id ) - 1;
2407 }
2408 
2409 static inline bool
2410 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2411 {
2412     return lck->lk.depth_locked != -1;
2413 }
2414 
2415 __forceinline static void
2416 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2417 {
2418     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2419     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2420     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2421       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2422       TCR_PTR(lck->lk.polls);                           // volatile load
2423 
2424 #ifdef USE_LOCK_PROFILE
2425     if (TCR_8(polls[ticket & mask].poll) != ticket)
2426         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2427     /* else __kmp_printf( "." );*/
2428 #endif /* USE_LOCK_PROFILE */
2429 
2430     //
2431     // Now spin-wait, but reload the polls pointer and mask, in case the
2432     // polling area has been reconfigured.  Unless it is reconfigured, the
2433     // reloads stay in L1 cache and are cheap.
2434     //
2435     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2436     //
2437     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2438     // and poll to be re-read every spin iteration.
2439     //
2440     kmp_uint32 spins;
2441 
2442     KMP_FSYNC_PREPARE(lck);
2443     KMP_INIT_YIELD(spins);
2444     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2445         // If we are oversubscribed,
2446         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2447         // CPU Pause is in the macros for yield.
2448         //
2449         KMP_YIELD(TCR_4(__kmp_nth)
2450           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2451         KMP_YIELD_SPIN(spins);
2452 
2453         // Re-read the mask and the poll pointer from the lock structure.
2454         //
2455         // Make certain that "mask" is read before "polls" !!!
2456         //
2457         // If another thread picks reconfigures the polling area and updates
2458         // their values, and we get the new value of mask and the old polls
2459         // pointer, we could access memory beyond the end of the old polling
2460         // area.
2461         //
2462         mask = TCR_8(lck->lk.mask);                     // volatile load
2463         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2464           TCR_PTR(lck->lk.polls);                       // volatile load
2465     }
2466 
2467     //
2468     // Critical section starts here
2469     //
2470     KMP_FSYNC_ACQUIRED(lck);
2471     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2472       ticket, lck));
2473     lck->lk.now_serving = ticket;                       // non-volatile store
2474 
2475     //
2476     // Deallocate a garbage polling area if we know that we are the last
2477     // thread that could possibly access it.
2478     //
2479     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2480     // ticket.
2481     //
2482     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2483         __kmp_free((void *)lck->lk.old_polls);
2484         lck->lk.old_polls = NULL;
2485         lck->lk.cleanup_ticket = 0;
2486     }
2487 
2488     //
2489     // Check to see if we should reconfigure the polling area.
2490     // If there is still a garbage polling area to be deallocated from a
2491     // previous reconfiguration, let a later thread reconfigure it.
2492     //
2493     if (lck->lk.old_polls == NULL) {
2494         bool reconfigure = false;
2495         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2496         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2497 
2498         if (TCR_4(__kmp_nth)
2499           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2500             //
2501             // We are in oversubscription mode.  Contract the polling area
2502             // down to a single location, if that hasn't been done already.
2503             //
2504             if (num_polls > 1) {
2505                 reconfigure = true;
2506                 num_polls = TCR_4(lck->lk.num_polls);
2507                 mask = 0;
2508                 num_polls = 1;
2509                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2510                   __kmp_allocate(num_polls * sizeof(*polls));
2511                 polls[0].poll = ticket;
2512             }
2513         }
2514         else {
2515             //
2516             // We are in under/fully subscribed mode.  Check the number of
2517             // threads waiting on the lock.  The size of the polling area
2518             // should be at least the number of threads waiting.
2519             //
2520             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2521             if (num_waiting > num_polls) {
2522                 kmp_uint32 old_num_polls = num_polls;
2523                 reconfigure = true;
2524                 do {
2525                     mask = (mask << 1) | 1;
2526                     num_polls *= 2;
2527                 } while (num_polls <= num_waiting);
2528 
2529                 //
2530                 // Allocate the new polling area, and copy the relevant portion
2531                 // of the old polling area to the new area.  __kmp_allocate()
2532                 // zeroes the memory it allocates, and most of the old area is
2533                 // just zero padding, so we only copy the release counters.
2534                 //
2535                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2536                   __kmp_allocate(num_polls * sizeof(*polls));
2537                 kmp_uint32 i;
2538                 for (i = 0; i < old_num_polls; i++) {
2539                     polls[i].poll = old_polls[i].poll;
2540                 }
2541             }
2542         }
2543 
2544         if (reconfigure) {
2545             //
2546             // Now write the updated fields back to the lock structure.
2547             //
2548             // Make certain that "polls" is written before "mask" !!!
2549             //
2550             // If another thread picks up the new value of mask and the old
2551             // polls pointer , it could access memory beyond the end of the
2552             // old polling area.
2553             //
2554             // On x86, we need memory fences.
2555             //
2556             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2557               ticket, lck, num_polls));
2558 
2559             lck->lk.old_polls = old_polls;              // non-volatile store
2560             lck->lk.polls = polls;                      // volatile store
2561 
2562             KMP_MB();
2563 
2564             lck->lk.num_polls = num_polls;              // non-volatile store
2565             lck->lk.mask = mask;                        // volatile store
2566 
2567             KMP_MB();
2568 
2569             //
2570             // Only after the new polling area and mask have been flushed
2571             // to main memory can we update the cleanup ticket field.
2572             //
2573             // volatile load / non-volatile store
2574             //
2575             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2576         }
2577     }
2578 }
2579 
2580 void
2581 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2582 {
2583     __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2584 }
2585 
2586 static void
2587 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2588 {
2589     char const * const func = "omp_set_lock";
2590     if ( lck->lk.initialized != lck ) {
2591         KMP_FATAL( LockIsUninitialized, func );
2592     }
2593     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2594         KMP_FATAL( LockNestableUsedAsSimple, func );
2595     }
2596     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2597         KMP_FATAL( LockIsAlreadyOwned, func );
2598     }
2599 
2600     __kmp_acquire_drdpa_lock( lck, gtid );
2601 
2602     lck->lk.owner_id = gtid + 1;
2603 }
2604 
2605 int
2606 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2607 {
2608     //
2609     // First get a ticket, then read the polls pointer and the mask.
2610     // The polls pointer must be read before the mask!!! (See above)
2611     //
2612     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2613     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2614       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2615       TCR_PTR(lck->lk.polls);                           // volatile load
2616     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2617     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2618         kmp_uint64 next_ticket = ticket + 1;
2619         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2620           ticket, next_ticket)) {
2621             KMP_FSYNC_ACQUIRED(lck);
2622             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2623                ticket, lck));
2624             lck->lk.now_serving = ticket;               // non-volatile store
2625 
2626             //
2627             // Since no threads are waiting, there is no possibility that
2628             // we would want to reconfigure the polling area.  We might
2629             // have the cleanup ticket value (which says that it is now
2630             // safe to deallocate old_polls), but we'll let a later thread
2631             // which calls __kmp_acquire_lock do that - this routine
2632             // isn't supposed to block, and we would risk blocks if we
2633             // called __kmp_free() to do the deallocation.
2634             //
2635             return TRUE;
2636         }
2637     }
2638     return FALSE;
2639 }
2640 
2641 static int
2642 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2643 {
2644     char const * const func = "omp_test_lock";
2645     if ( lck->lk.initialized != lck ) {
2646         KMP_FATAL( LockIsUninitialized, func );
2647     }
2648     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2649         KMP_FATAL( LockNestableUsedAsSimple, func );
2650     }
2651 
2652     int retval = __kmp_test_drdpa_lock( lck, gtid );
2653 
2654     if ( retval ) {
2655         lck->lk.owner_id = gtid + 1;
2656     }
2657     return retval;
2658 }
2659 
2660 void
2661 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2662 {
2663     //
2664     // Read the ticket value from the lock data struct, then the polls
2665     // pointer and the mask.  The polls pointer must be read before the
2666     // mask!!! (See above)
2667     //
2668     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2669     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2670       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2671       TCR_PTR(lck->lk.polls);                           // volatile load
2672     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2673     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2674        ticket - 1, lck));
2675     KMP_FSYNC_RELEASING(lck);
2676     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2677 }
2678 
2679 static void
2680 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2681 {
2682     char const * const func = "omp_unset_lock";
2683     KMP_MB();  /* in case another processor initialized lock */
2684     if ( lck->lk.initialized != lck ) {
2685         KMP_FATAL( LockIsUninitialized, func );
2686     }
2687     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2688         KMP_FATAL( LockNestableUsedAsSimple, func );
2689     }
2690     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2691         KMP_FATAL( LockUnsettingFree, func );
2692     }
2693     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2694       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2695         KMP_FATAL( LockUnsettingSetByAnother, func );
2696     }
2697     lck->lk.owner_id = 0;
2698     __kmp_release_drdpa_lock( lck, gtid );
2699 }
2700 
2701 void
2702 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2703 {
2704     lck->lk.location = NULL;
2705     lck->lk.mask = 0;
2706     lck->lk.num_polls = 1;
2707     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2708       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2709     lck->lk.cleanup_ticket = 0;
2710     lck->lk.old_polls = NULL;
2711     lck->lk.next_ticket = 0;
2712     lck->lk.now_serving = 0;
2713     lck->lk.owner_id = 0;      // no thread owns the lock.
2714     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2715     lck->lk.initialized = lck;
2716 
2717     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2718 }
2719 
2720 static void
2721 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2722 {
2723     __kmp_init_drdpa_lock( lck );
2724 }
2725 
2726 void
2727 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2728 {
2729     lck->lk.initialized = NULL;
2730     lck->lk.location    = NULL;
2731     if (lck->lk.polls != NULL) {
2732         __kmp_free((void *)lck->lk.polls);
2733         lck->lk.polls = NULL;
2734     }
2735     if (lck->lk.old_polls != NULL) {
2736         __kmp_free((void *)lck->lk.old_polls);
2737         lck->lk.old_polls = NULL;
2738     }
2739     lck->lk.mask = 0;
2740     lck->lk.num_polls = 0;
2741     lck->lk.cleanup_ticket = 0;
2742     lck->lk.next_ticket = 0;
2743     lck->lk.now_serving = 0;
2744     lck->lk.owner_id = 0;
2745     lck->lk.depth_locked = -1;
2746 }
2747 
2748 static void
2749 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2750 {
2751     char const * const func = "omp_destroy_lock";
2752     if ( lck->lk.initialized != lck ) {
2753         KMP_FATAL( LockIsUninitialized, func );
2754     }
2755     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2756         KMP_FATAL( LockNestableUsedAsSimple, func );
2757     }
2758     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2759         KMP_FATAL( LockStillOwned, func );
2760     }
2761     __kmp_destroy_drdpa_lock( lck );
2762 }
2763 
2764 
2765 //
2766 // nested drdpa ticket locks
2767 //
2768 
2769 void
2770 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2771 {
2772     KMP_DEBUG_ASSERT( gtid >= 0 );
2773 
2774     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2775         lck->lk.depth_locked += 1;
2776     }
2777     else {
2778         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2779         KMP_MB();
2780         lck->lk.depth_locked = 1;
2781         KMP_MB();
2782         lck->lk.owner_id = gtid + 1;
2783     }
2784 }
2785 
2786 static void
2787 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2788 {
2789     char const * const func = "omp_set_nest_lock";
2790     if ( lck->lk.initialized != lck ) {
2791         KMP_FATAL( LockIsUninitialized, func );
2792     }
2793     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2794         KMP_FATAL( LockSimpleUsedAsNestable, func );
2795     }
2796     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2797 }
2798 
2799 int
2800 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2801 {
2802     int retval;
2803 
2804     KMP_DEBUG_ASSERT( gtid >= 0 );
2805 
2806     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2807         retval = ++lck->lk.depth_locked;
2808     }
2809     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2810         retval = 0;
2811     }
2812     else {
2813         KMP_MB();
2814         retval = lck->lk.depth_locked = 1;
2815         KMP_MB();
2816         lck->lk.owner_id = gtid + 1;
2817     }
2818     return retval;
2819 }
2820 
2821 static int
2822 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2823 {
2824     char const * const func = "omp_test_nest_lock";
2825     if ( lck->lk.initialized != lck ) {
2826         KMP_FATAL( LockIsUninitialized, func );
2827     }
2828     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2829         KMP_FATAL( LockSimpleUsedAsNestable, func );
2830     }
2831     return __kmp_test_nested_drdpa_lock( lck, gtid );
2832 }
2833 
2834 void
2835 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2836 {
2837     KMP_DEBUG_ASSERT( gtid >= 0 );
2838 
2839     KMP_MB();
2840     if ( --(lck->lk.depth_locked) == 0 ) {
2841         KMP_MB();
2842         lck->lk.owner_id = 0;
2843         __kmp_release_drdpa_lock( lck, gtid );
2844     }
2845 }
2846 
2847 static void
2848 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2849 {
2850     char const * const func = "omp_unset_nest_lock";
2851     KMP_MB();  /* in case another processor initialized lock */
2852     if ( lck->lk.initialized != lck ) {
2853         KMP_FATAL( LockIsUninitialized, func );
2854     }
2855     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2856         KMP_FATAL( LockSimpleUsedAsNestable, func );
2857     }
2858     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2859         KMP_FATAL( LockUnsettingFree, func );
2860     }
2861     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2862         KMP_FATAL( LockUnsettingSetByAnother, func );
2863     }
2864     __kmp_release_nested_drdpa_lock( lck, gtid );
2865 }
2866 
2867 void
2868 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2869 {
2870     __kmp_init_drdpa_lock( lck );
2871     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2872 }
2873 
2874 static void
2875 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2876 {
2877     __kmp_init_nested_drdpa_lock( lck );
2878 }
2879 
2880 void
2881 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2882 {
2883     __kmp_destroy_drdpa_lock( lck );
2884     lck->lk.depth_locked = 0;
2885 }
2886 
2887 static void
2888 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2889 {
2890     char const * const func = "omp_destroy_nest_lock";
2891     if ( lck->lk.initialized != lck ) {
2892         KMP_FATAL( LockIsUninitialized, func );
2893     }
2894     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2895         KMP_FATAL( LockSimpleUsedAsNestable, func );
2896     }
2897     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2898         KMP_FATAL( LockStillOwned, func );
2899     }
2900     __kmp_destroy_nested_drdpa_lock( lck );
2901 }
2902 
2903 
2904 //
2905 // access functions to fields which don't exist for all lock kinds.
2906 //
2907 
2908 static int
2909 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2910 {
2911     return lck == lck->lk.initialized;
2912 }
2913 
2914 static const ident_t *
2915 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2916 {
2917     return lck->lk.location;
2918 }
2919 
2920 static void
2921 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2922 {
2923     lck->lk.location = loc;
2924 }
2925 
2926 static kmp_lock_flags_t
2927 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2928 {
2929     return lck->lk.flags;
2930 }
2931 
2932 static void
2933 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2934 {
2935     lck->lk.flags = flags;
2936 }
2937 
2938 /* ------------------------------------------------------------------------ */
2939 /* user locks
2940  *
2941  * They are implemented as a table of function pointers which are set to the
2942  * lock functions of the appropriate kind, once that has been determined.
2943  */
2944 
2945 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
2946 
2947 size_t __kmp_base_user_lock_size = 0;
2948 size_t __kmp_user_lock_size = 0;
2949 
2950 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
2951 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2952 
2953 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2954 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2955 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2956 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
2957 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2958 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2959 
2960 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2961 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
2962 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2963 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
2964 
2965 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
2966 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
2967 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
2968 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
2969 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
2970 
2971 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
2972 {
2973     switch ( user_lock_kind ) {
2974         case lk_default:
2975         default:
2976         KMP_ASSERT( 0 );
2977 
2978         case lk_tas: {
2979             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
2980             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
2981 
2982             __kmp_get_user_lock_owner_ =
2983               ( kmp_int32 ( * )( kmp_user_lock_p ) )
2984               ( &__kmp_get_tas_lock_owner );
2985 
2986             if ( __kmp_env_consistency_check ) {
2987                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
2988                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
2989             }
2990             else {
2991                 KMP_BIND_USER_LOCK(tas);
2992                 KMP_BIND_NESTED_USER_LOCK(tas);
2993             }
2994 
2995             __kmp_destroy_user_lock_ =
2996               ( void ( * )( kmp_user_lock_p ) )
2997               ( &__kmp_destroy_tas_lock );
2998 
2999              __kmp_is_user_lock_initialized_ =
3000                ( int ( * )( kmp_user_lock_p ) ) NULL;
3001 
3002              __kmp_get_user_lock_location_ =
3003                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3004 
3005              __kmp_set_user_lock_location_ =
3006                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3007 
3008              __kmp_get_user_lock_flags_ =
3009                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3010 
3011              __kmp_set_user_lock_flags_ =
3012                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3013         }
3014         break;
3015 
3016 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3017 
3018         case lk_futex: {
3019             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3020             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3021 
3022             __kmp_get_user_lock_owner_ =
3023               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3024               ( &__kmp_get_futex_lock_owner );
3025 
3026             if ( __kmp_env_consistency_check ) {
3027                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3028                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3029             }
3030             else {
3031                 KMP_BIND_USER_LOCK(futex);
3032                 KMP_BIND_NESTED_USER_LOCK(futex);
3033             }
3034 
3035             __kmp_destroy_user_lock_ =
3036               ( void ( * )( kmp_user_lock_p ) )
3037               ( &__kmp_destroy_futex_lock );
3038 
3039              __kmp_is_user_lock_initialized_ =
3040                ( int ( * )( kmp_user_lock_p ) ) NULL;
3041 
3042              __kmp_get_user_lock_location_ =
3043                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3044 
3045              __kmp_set_user_lock_location_ =
3046                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3047 
3048              __kmp_get_user_lock_flags_ =
3049                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3050 
3051              __kmp_set_user_lock_flags_ =
3052                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3053         }
3054         break;
3055 
3056 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3057 
3058         case lk_ticket: {
3059             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3060             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3061 
3062             __kmp_get_user_lock_owner_ =
3063               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3064               ( &__kmp_get_ticket_lock_owner );
3065 
3066             if ( __kmp_env_consistency_check ) {
3067                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3068                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3069             }
3070             else {
3071                 KMP_BIND_USER_LOCK(ticket);
3072                 KMP_BIND_NESTED_USER_LOCK(ticket);
3073             }
3074 
3075             __kmp_destroy_user_lock_ =
3076               ( void ( * )( kmp_user_lock_p ) )
3077               ( &__kmp_destroy_ticket_lock );
3078 
3079              __kmp_is_user_lock_initialized_ =
3080                ( int ( * )( kmp_user_lock_p ) )
3081                ( &__kmp_is_ticket_lock_initialized );
3082 
3083              __kmp_get_user_lock_location_ =
3084                ( const ident_t * ( * )( kmp_user_lock_p ) )
3085                ( &__kmp_get_ticket_lock_location );
3086 
3087              __kmp_set_user_lock_location_ =
3088                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3089                ( &__kmp_set_ticket_lock_location );
3090 
3091              __kmp_get_user_lock_flags_ =
3092                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3093                ( &__kmp_get_ticket_lock_flags );
3094 
3095              __kmp_set_user_lock_flags_ =
3096                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3097                ( &__kmp_set_ticket_lock_flags );
3098         }
3099         break;
3100 
3101         case lk_queuing: {
3102             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3103             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3104 
3105             __kmp_get_user_lock_owner_ =
3106               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3107               ( &__kmp_get_queuing_lock_owner );
3108 
3109             if ( __kmp_env_consistency_check ) {
3110                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3111                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3112             }
3113             else {
3114                 KMP_BIND_USER_LOCK(queuing);
3115                 KMP_BIND_NESTED_USER_LOCK(queuing);
3116             }
3117 
3118             __kmp_destroy_user_lock_ =
3119               ( void ( * )( kmp_user_lock_p ) )
3120               ( &__kmp_destroy_queuing_lock );
3121 
3122              __kmp_is_user_lock_initialized_ =
3123                ( int ( * )( kmp_user_lock_p ) )
3124                ( &__kmp_is_queuing_lock_initialized );
3125 
3126              __kmp_get_user_lock_location_ =
3127                ( const ident_t * ( * )( kmp_user_lock_p ) )
3128                ( &__kmp_get_queuing_lock_location );
3129 
3130              __kmp_set_user_lock_location_ =
3131                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3132                ( &__kmp_set_queuing_lock_location );
3133 
3134              __kmp_get_user_lock_flags_ =
3135                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3136                ( &__kmp_get_queuing_lock_flags );
3137 
3138              __kmp_set_user_lock_flags_ =
3139                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3140                ( &__kmp_set_queuing_lock_flags );
3141         }
3142         break;
3143 
3144 #if KMP_USE_ADAPTIVE_LOCKS
3145         case lk_adaptive: {
3146             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3147             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3148 
3149             __kmp_get_user_lock_owner_ =
3150               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3151               ( &__kmp_get_queuing_lock_owner );
3152 
3153             if ( __kmp_env_consistency_check ) {
3154                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3155             }
3156             else {
3157                 KMP_BIND_USER_LOCK(adaptive);
3158             }
3159 
3160             __kmp_destroy_user_lock_ =
3161               ( void ( * )( kmp_user_lock_p ) )
3162               ( &__kmp_destroy_adaptive_lock );
3163 
3164             __kmp_is_user_lock_initialized_ =
3165               ( int ( * )( kmp_user_lock_p ) )
3166               ( &__kmp_is_queuing_lock_initialized );
3167 
3168             __kmp_get_user_lock_location_ =
3169               ( const ident_t * ( * )( kmp_user_lock_p ) )
3170               ( &__kmp_get_queuing_lock_location );
3171 
3172             __kmp_set_user_lock_location_ =
3173               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3174               ( &__kmp_set_queuing_lock_location );
3175 
3176             __kmp_get_user_lock_flags_ =
3177               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3178               ( &__kmp_get_queuing_lock_flags );
3179 
3180             __kmp_set_user_lock_flags_ =
3181               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3182               ( &__kmp_set_queuing_lock_flags );
3183 
3184         }
3185         break;
3186 #endif // KMP_USE_ADAPTIVE_LOCKS
3187 
3188         case lk_drdpa: {
3189             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3190             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3191 
3192             __kmp_get_user_lock_owner_ =
3193               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3194               ( &__kmp_get_drdpa_lock_owner );
3195 
3196             if ( __kmp_env_consistency_check ) {
3197                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3198                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3199             }
3200             else {
3201                 KMP_BIND_USER_LOCK(drdpa);
3202                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3203             }
3204 
3205             __kmp_destroy_user_lock_ =
3206               ( void ( * )( kmp_user_lock_p ) )
3207               ( &__kmp_destroy_drdpa_lock );
3208 
3209              __kmp_is_user_lock_initialized_ =
3210                ( int ( * )( kmp_user_lock_p ) )
3211                ( &__kmp_is_drdpa_lock_initialized );
3212 
3213              __kmp_get_user_lock_location_ =
3214                ( const ident_t * ( * )( kmp_user_lock_p ) )
3215                ( &__kmp_get_drdpa_lock_location );
3216 
3217              __kmp_set_user_lock_location_ =
3218                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3219                ( &__kmp_set_drdpa_lock_location );
3220 
3221              __kmp_get_user_lock_flags_ =
3222                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3223                ( &__kmp_get_drdpa_lock_flags );
3224 
3225              __kmp_set_user_lock_flags_ =
3226                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3227                ( &__kmp_set_drdpa_lock_flags );
3228         }
3229         break;
3230     }
3231 }
3232 
3233 
3234 // ----------------------------------------------------------------------------
3235 // User lock table & lock allocation
3236 
3237 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3238 kmp_user_lock_p __kmp_lock_pool = NULL;
3239 
3240 // Lock block-allocation support.
3241 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3242 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3243 
3244 static kmp_lock_index_t
3245 __kmp_lock_table_insert( kmp_user_lock_p lck )
3246 {
3247     // Assume that kmp_global_lock is held upon entry/exit.
3248     kmp_lock_index_t index;
3249     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3250         kmp_lock_index_t size;
3251         kmp_user_lock_p *table;
3252         kmp_lock_index_t i;
3253         // Reallocate lock table.
3254         if ( __kmp_user_lock_table.allocated == 0 ) {
3255             size = 1024;
3256         }
3257         else {
3258             size = __kmp_user_lock_table.allocated * 2;
3259         }
3260         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3261         memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3262         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3263             // We cannot free the previos table now, sinse it may be in use by other
3264             // threads. So save the pointer to the previous table in in the first element of the
3265             // new table. All the tables will be organized into a list, and could be freed when
3266             // library shutting down.
3267         __kmp_user_lock_table.table = table;
3268         __kmp_user_lock_table.allocated = size;
3269     }
3270     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3271     index = __kmp_user_lock_table.used;
3272     __kmp_user_lock_table.table[ index ] = lck;
3273     ++ __kmp_user_lock_table.used;
3274     return index;
3275 }
3276 
3277 static kmp_user_lock_p
3278 __kmp_lock_block_allocate()
3279 {
3280     // Assume that kmp_global_lock is held upon entry/exit.
3281     static int last_index = 0;
3282     if ( ( last_index >= __kmp_num_locks_in_block )
3283       || ( __kmp_lock_blocks == NULL ) ) {
3284         // Restart the index.
3285         last_index = 0;
3286         // Need to allocate a new block.
3287         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3288         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3289         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3290         // Set up the new block.
3291         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3292         new_block->next_block = __kmp_lock_blocks;
3293         new_block->locks = (void *)buffer;
3294         // Publish the new block.
3295         KMP_MB();
3296         __kmp_lock_blocks = new_block;
3297     }
3298     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3299       [ last_index * __kmp_user_lock_size ] ) );
3300     last_index++;
3301     return ret;
3302 }
3303 
3304 //
3305 // Get memory for a lock. It may be freshly allocated memory or reused memory
3306 // from lock pool.
3307 //
3308 kmp_user_lock_p
3309 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3310   kmp_lock_flags_t flags )
3311 {
3312     kmp_user_lock_p lck;
3313     kmp_lock_index_t index;
3314     KMP_DEBUG_ASSERT( user_lock );
3315 
3316     __kmp_acquire_lock( &__kmp_global_lock, gtid );
3317 
3318     if ( __kmp_lock_pool == NULL ) {
3319         // Lock pool is empty. Allocate new memory.
3320         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3321             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3322         }
3323         else {
3324             lck = __kmp_lock_block_allocate();
3325         }
3326 
3327         // Insert lock in the table so that it can be freed in __kmp_cleanup,
3328         // and debugger has info on all allocated locks.
3329         index = __kmp_lock_table_insert( lck );
3330     }
3331     else {
3332         // Pick up lock from pool.
3333         lck = __kmp_lock_pool;
3334         index = __kmp_lock_pool->pool.index;
3335         __kmp_lock_pool = __kmp_lock_pool->pool.next;
3336     }
3337 
3338     //
3339     // We could potentially differentiate between nested and regular locks
3340     // here, and do the lock table lookup for regular locks only.
3341     //
3342     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3343         * ( (kmp_lock_index_t *) user_lock ) = index;
3344     }
3345     else {
3346         * ( (kmp_user_lock_p *) user_lock ) = lck;
3347     }
3348 
3349     // mark the lock if it is critical section lock.
3350     __kmp_set_user_lock_flags( lck, flags );
3351 
3352     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3353 
3354     return lck;
3355 }
3356 
3357 // Put lock's memory to pool for reusing.
3358 void
3359 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3360 {
3361     kmp_lock_pool_t * lock_pool;
3362 
3363     KMP_DEBUG_ASSERT( user_lock != NULL );
3364     KMP_DEBUG_ASSERT( lck != NULL );
3365 
3366     __kmp_acquire_lock( & __kmp_global_lock, gtid );
3367 
3368     lck->pool.next = __kmp_lock_pool;
3369     __kmp_lock_pool = lck;
3370     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3371         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3372         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3373         lck->pool.index = index;
3374     }
3375 
3376     __kmp_release_lock( & __kmp_global_lock, gtid );
3377 }
3378 
3379 kmp_user_lock_p
3380 __kmp_lookup_user_lock( void **user_lock, char const *func )
3381 {
3382     kmp_user_lock_p lck = NULL;
3383 
3384     if ( __kmp_env_consistency_check ) {
3385         if ( user_lock == NULL ) {
3386             KMP_FATAL( LockIsUninitialized, func );
3387         }
3388     }
3389 
3390     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3391         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3392         if ( __kmp_env_consistency_check ) {
3393             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3394                 KMP_FATAL( LockIsUninitialized, func );
3395             }
3396         }
3397         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3398         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3399         lck = __kmp_user_lock_table.table[index];
3400     }
3401     else {
3402         lck = *( (kmp_user_lock_p *)user_lock );
3403     }
3404 
3405     if ( __kmp_env_consistency_check ) {
3406         if ( lck == NULL ) {
3407             KMP_FATAL( LockIsUninitialized, func );
3408         }
3409     }
3410 
3411     return lck;
3412 }
3413 
3414 void
3415 __kmp_cleanup_user_locks( void )
3416 {
3417     //
3418     // Reset lock pool. Do not worry about lock in the pool -- we will free
3419     // them when iterating through lock table (it includes all the locks,
3420     // dead or alive).
3421     //
3422     __kmp_lock_pool = NULL;
3423 
3424 #define IS_CRITICAL(lck) \
3425         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3426         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3427 
3428     //
3429     // Loop through lock table, free all locks.
3430     //
3431     // Do not free item [0], it is reserved for lock tables list.
3432     //
3433     // FIXME - we are iterating through a list of (pointers to) objects of
3434     // type union kmp_user_lock, but we have no way of knowing whether the
3435     // base type is currently "pool" or whatever the global user lock type
3436     // is.
3437     //
3438     // We are relying on the fact that for all of the user lock types
3439     // (except "tas"), the first field in the lock struct is the "initialized"
3440     // field, which is set to the address of the lock object itself when
3441     // the lock is initialized.  When the union is of type "pool", the
3442     // first field is a pointer to the next object in the free list, which
3443     // will not be the same address as the object itself.
3444     //
3445     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3446     // will fail for "pool" objects on the free list.  This must happen as
3447     // the "location" field of real user locks overlaps the "index" field
3448     // of "pool" objects.
3449     //
3450     // It would be better to run through the free list, and remove all "pool"
3451     // objects from the lock table before executing this loop.  However,
3452     // "pool" objects do not always have their index field set (only on
3453     // lin_32e), and I don't want to search the lock table for the address
3454     // of every "pool" object on the free list.
3455     //
3456     while ( __kmp_user_lock_table.used > 1 ) {
3457         const ident *loc;
3458 
3459         //
3460         // reduce __kmp_user_lock_table.used before freeing the lock,
3461         // so that state of locks is consistent
3462         //
3463         kmp_user_lock_p lck = __kmp_user_lock_table.table[
3464           --__kmp_user_lock_table.used ];
3465 
3466         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3467           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3468             //
3469             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3470             // initialized AND it is NOT a critical section (user is not
3471             // responsible for destroying criticals) AND we know source
3472             // location to report.
3473             //
3474             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3475               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3476               ( loc->psource != NULL ) ) {
3477                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3478                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3479                   str_loc.line, str_loc.col );
3480                 __kmp_str_loc_free( &str_loc);
3481             }
3482 
3483 #ifdef KMP_DEBUG
3484             if ( IS_CRITICAL( lck ) ) {
3485                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3486             }
3487             else {
3488                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3489             }
3490 #endif // KMP_DEBUG
3491 
3492             //
3493             // Cleanup internal lock dynamic resources
3494             // (for drdpa locks particularly).
3495             //
3496             __kmp_destroy_user_lock( lck );
3497         }
3498 
3499         //
3500         // Free the lock if block allocation of locks is not used.
3501         //
3502         if ( __kmp_lock_blocks == NULL ) {
3503             __kmp_free( lck );
3504         }
3505     }
3506 
3507 #undef IS_CRITICAL
3508 
3509     //
3510     // delete lock table(s).
3511     //
3512     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3513     __kmp_user_lock_table.table = NULL;
3514     __kmp_user_lock_table.allocated = 0;
3515 
3516     while ( table_ptr != NULL ) {
3517         //
3518         // In the first element we saved the pointer to the previous
3519         // (smaller) lock table.
3520         //
3521         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3522         __kmp_free( table_ptr );
3523         table_ptr = next;
3524     }
3525 
3526     //
3527     // Free buffers allocated for blocks of locks.
3528     //
3529     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3530     __kmp_lock_blocks = NULL;
3531 
3532     while ( block_ptr != NULL ) {
3533         kmp_block_of_locks_t *next = block_ptr->next_block;
3534         __kmp_free( block_ptr->locks );
3535         //
3536         // *block_ptr itself was allocated at the end of the locks vector.
3537         //
3538 	block_ptr = next;
3539     }
3540 
3541     TCW_4(__kmp_init_user_locks, FALSE);
3542 }
3543 
3544