1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 
18 #include "kmp.h"
19 #include "kmp_itt.h"
20 #include "kmp_i18n.h"
21 #include "kmp_lock.h"
22 #include "kmp_io.h"
23 
24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
25 # include <unistd.h>
26 # include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on different
28 // Linux* OS distributions that either require that you include (or break when you try to include)
29 // <pci/types.h>.
30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31 // we just define the constants here and don't include <futex.h>
32 # ifndef FUTEX_WAIT
33 #  define FUTEX_WAIT    0
34 # endif
35 # ifndef FUTEX_WAKE
36 #  define FUTEX_WAKE    1
37 # endif
38 #endif
39 
40 /* Implement spin locks for internal library use.             */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void
44 __kmp_validate_locks( void )
45 {
46     int i;
47     kmp_uint32  x, y;
48 
49     /* Check to make sure unsigned arithmetic does wraps properly */
50     x = ~((kmp_uint32) 0) - 2;
51     y = x - 2;
52 
53     for (i = 0; i < 8; ++i, ++x, ++y) {
54         kmp_uint32 z = (x - y);
55         KMP_ASSERT( z == 2 );
56     }
57 
58     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59 }
60 
61 
62 /* ------------------------------------------------------------------------ */
63 /* test and set locks */
64 
65 //
66 // For the non-nested locks, we can only assume that the first 4 bytes were
67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70 //
71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73 //
74 
75 static kmp_int32
76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77 {
78     return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
79 }
80 
81 static inline bool
82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83 {
84     return lck->lk.depth_locked != -1;
85 }
86 
87 __forceinline static void
88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89 {
90     KMP_MB();
91 
92 #ifdef USE_LOCK_PROFILE
93     kmp_uint32 curr = TCR_4( lck->lk.poll );
94     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96     /* else __kmp_printf( "." );*/
97 #endif /* USE_LOCK_PROFILE */
98 
99     if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
100       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
101         KMP_FSYNC_ACQUIRED(lck);
102         return;
103     }
104 
105     kmp_uint32 spins;
106     KMP_FSYNC_PREPARE( lck );
107     KMP_INIT_YIELD( spins );
108     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109       __kmp_xproc ) ) {
110         KMP_YIELD( TRUE );
111     }
112     else {
113         KMP_YIELD_SPIN( spins );
114     }
115 
116     while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) ||
117       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) {
118         //
119         // FIXME - use exponential backoff here
120         //
121         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122           __kmp_xproc ) ) {
123             KMP_YIELD( TRUE );
124         }
125         else {
126             KMP_YIELD_SPIN( spins );
127         }
128     }
129     KMP_FSYNC_ACQUIRED( lck );
130 }
131 
132 void
133 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
134 {
135     __kmp_acquire_tas_lock_timed_template( lck, gtid );
136 }
137 
138 static void
139 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
140 {
141     char const * const func = "omp_set_lock";
142     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
143       && __kmp_is_tas_lock_nestable( lck ) ) {
144         KMP_FATAL( LockNestableUsedAsSimple, func );
145     }
146     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
147         KMP_FATAL( LockIsAlreadyOwned, func );
148     }
149     __kmp_acquire_tas_lock( lck, gtid );
150 }
151 
152 int
153 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
154 {
155     if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) )
156       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) {
157         KMP_FSYNC_ACQUIRED( lck );
158         return TRUE;
159     }
160     return FALSE;
161 }
162 
163 static int
164 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
165 {
166     char const * const func = "omp_test_lock";
167     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
168       && __kmp_is_tas_lock_nestable( lck ) ) {
169         KMP_FATAL( LockNestableUsedAsSimple, func );
170     }
171     return __kmp_test_tas_lock( lck, gtid );
172 }
173 
174 void
175 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
176 {
177     KMP_MB();       /* Flush all pending memory write invalidates.  */
178 
179     KMP_FSYNC_RELEASING(lck);
180     KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) );
181     KMP_MB();       /* Flush all pending memory write invalidates.  */
182 
183     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
184       __kmp_xproc ) );
185 }
186 
187 static void
188 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
189 {
190     char const * const func = "omp_unset_lock";
191     KMP_MB();  /* in case another processor initialized lock */
192     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
193       && __kmp_is_tas_lock_nestable( lck ) ) {
194         KMP_FATAL( LockNestableUsedAsSimple, func );
195     }
196     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
197         KMP_FATAL( LockUnsettingFree, func );
198     }
199     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
200       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
201         KMP_FATAL( LockUnsettingSetByAnother, func );
202     }
203     __kmp_release_tas_lock( lck, gtid );
204 }
205 
206 void
207 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
208 {
209     TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) );
210 }
211 
212 static void
213 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
214 {
215     __kmp_init_tas_lock( lck );
216 }
217 
218 void
219 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
220 {
221     lck->lk.poll = 0;
222 }
223 
224 static void
225 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
226 {
227     char const * const func = "omp_destroy_lock";
228     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
229       && __kmp_is_tas_lock_nestable( lck ) ) {
230         KMP_FATAL( LockNestableUsedAsSimple, func );
231     }
232     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
233         KMP_FATAL( LockStillOwned, func );
234     }
235     __kmp_destroy_tas_lock( lck );
236 }
237 
238 
239 //
240 // nested test and set locks
241 //
242 
243 void
244 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
245 {
246     KMP_DEBUG_ASSERT( gtid >= 0 );
247 
248     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
249         lck->lk.depth_locked += 1;
250     }
251     else {
252         __kmp_acquire_tas_lock_timed_template( lck, gtid );
253         lck->lk.depth_locked = 1;
254     }
255 }
256 
257 static void
258 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
259 {
260     char const * const func = "omp_set_nest_lock";
261     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
262         KMP_FATAL( LockSimpleUsedAsNestable, func );
263     }
264     __kmp_acquire_nested_tas_lock( lck, gtid );
265 }
266 
267 int
268 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
269 {
270     int retval;
271 
272     KMP_DEBUG_ASSERT( gtid >= 0 );
273 
274     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
275         retval = ++lck->lk.depth_locked;
276     }
277     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
278         retval = 0;
279     }
280     else {
281         KMP_MB();
282         retval = lck->lk.depth_locked = 1;
283     }
284     return retval;
285 }
286 
287 static int
288 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
289 {
290     char const * const func = "omp_test_nest_lock";
291     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
292         KMP_FATAL( LockSimpleUsedAsNestable, func );
293     }
294     return __kmp_test_nested_tas_lock( lck, gtid );
295 }
296 
297 void
298 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
299 {
300     KMP_DEBUG_ASSERT( gtid >= 0 );
301 
302     KMP_MB();
303     if ( --(lck->lk.depth_locked) == 0 ) {
304         __kmp_release_tas_lock( lck, gtid );
305     }
306 }
307 
308 static void
309 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
310 {
311     char const * const func = "omp_unset_nest_lock";
312     KMP_MB();  /* in case another processor initialized lock */
313     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
314         KMP_FATAL( LockSimpleUsedAsNestable, func );
315     }
316     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
317         KMP_FATAL( LockUnsettingFree, func );
318     }
319     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
320         KMP_FATAL( LockUnsettingSetByAnother, func );
321     }
322     __kmp_release_nested_tas_lock( lck, gtid );
323 }
324 
325 void
326 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
327 {
328     __kmp_init_tas_lock( lck );
329     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
330 }
331 
332 static void
333 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
334 {
335     __kmp_init_nested_tas_lock( lck );
336 }
337 
338 void
339 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
340 {
341     __kmp_destroy_tas_lock( lck );
342     lck->lk.depth_locked = 0;
343 }
344 
345 static void
346 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
347 {
348     char const * const func = "omp_destroy_nest_lock";
349     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
350         KMP_FATAL( LockSimpleUsedAsNestable, func );
351     }
352     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
353         KMP_FATAL( LockStillOwned, func );
354     }
355     __kmp_destroy_nested_tas_lock( lck );
356 }
357 
358 
359 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
360 
361 /* ------------------------------------------------------------------------ */
362 /* futex locks */
363 
364 // futex locks are really just test and set locks, with a different method
365 // of handling contention.  They take the same amount of space as test and
366 // set locks, and are allocated the same way (i.e. use the area allocated by
367 // the compiler for non-nested locks / allocate nested locks on the heap).
368 
369 static kmp_int32
370 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
371 {
372     return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
373 }
374 
375 static inline bool
376 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
377 {
378     return lck->lk.depth_locked != -1;
379 }
380 
381 __forceinline static void
382 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
383 {
384     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
385 
386     KMP_MB();
387 
388 #ifdef USE_LOCK_PROFILE
389     kmp_uint32 curr = TCR_4( lck->lk.poll );
390     if ( ( curr != 0 ) && ( curr != gtid_code ) )
391         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
392     /* else __kmp_printf( "." );*/
393 #endif /* USE_LOCK_PROFILE */
394 
395     KMP_FSYNC_PREPARE( lck );
396     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
397       lck, lck->lk.poll, gtid ) );
398 
399     kmp_int32 poll_val;
400 
401     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex),
402              DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) {
403 
404         kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1;
405         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
406            lck, gtid, poll_val, cond ) );
407 
408         //
409         // NOTE: if you try to use the following condition for this branch
410         //
411         // if ( poll_val & 1 == 0 )
412         //
413         // Then the 12.0 compiler has a bug where the following block will
414         // always be skipped, regardless of the value of the LSB of poll_val.
415         //
416         if ( ! cond ) {
417             //
418             // Try to set the lsb in the poll to indicate to the owner
419             // thread that they need to wake this thread up.
420             //
421             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) {
422                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
423                   lck, lck->lk.poll, gtid ) );
424                 continue;
425             }
426             poll_val |= DYNA_LOCK_BUSY(1, futex);
427 
428             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
429               lck, lck->lk.poll, gtid ) );
430         }
431 
432         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
433            lck, gtid, poll_val ) );
434 
435         kmp_int32 rc;
436         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
437           poll_val, NULL, NULL, 0 ) ) != 0 ) {
438             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
439                lck, gtid, poll_val, rc, errno ) );
440             continue;
441         }
442 
443         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
444            lck, gtid, poll_val ) );
445         //
446         // This thread has now done a successful futex wait call and was
447         // entered on the OS futex queue.  We must now perform a futex
448         // wake call when releasing the lock, as we have no idea how many
449         // other threads are in the queue.
450         //
451         gtid_code |= 1;
452     }
453 
454     KMP_FSYNC_ACQUIRED( lck );
455     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
456       lck, lck->lk.poll, gtid ) );
457 }
458 
459 void
460 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
461 {
462     __kmp_acquire_futex_lock_timed_template( lck, gtid );
463 }
464 
465 static void
466 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
467 {
468     char const * const func = "omp_set_lock";
469     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
470       && __kmp_is_futex_lock_nestable( lck ) ) {
471         KMP_FATAL( LockNestableUsedAsSimple, func );
472     }
473     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
474         KMP_FATAL( LockIsAlreadyOwned, func );
475     }
476     __kmp_acquire_futex_lock( lck, gtid );
477 }
478 
479 int
480 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
481 {
482     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
483         KMP_FSYNC_ACQUIRED( lck );
484         return TRUE;
485     }
486     return FALSE;
487 }
488 
489 static int
490 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
491 {
492     char const * const func = "omp_test_lock";
493     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
494       && __kmp_is_futex_lock_nestable( lck ) ) {
495         KMP_FATAL( LockNestableUsedAsSimple, func );
496     }
497     return __kmp_test_futex_lock( lck, gtid );
498 }
499 
500 void
501 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
502 {
503     KMP_MB();       /* Flush all pending memory write invalidates.  */
504 
505     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
506       lck, lck->lk.poll, gtid ) );
507 
508     KMP_FSYNC_RELEASING(lck);
509 
510     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) );
511 
512     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
513        lck, gtid, poll_val ) );
514 
515     if ( DYNA_LOCK_STRIP(poll_val) & 1 ) {
516         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
517            lck, gtid ) );
518         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 );
519     }
520 
521     KMP_MB();       /* Flush all pending memory write invalidates.  */
522 
523     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
524       lck, lck->lk.poll, gtid ) );
525 
526     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
527       __kmp_xproc ) );
528 }
529 
530 static void
531 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
532 {
533     char const * const func = "omp_unset_lock";
534     KMP_MB();  /* in case another processor initialized lock */
535     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
536       && __kmp_is_futex_lock_nestable( lck ) ) {
537         KMP_FATAL( LockNestableUsedAsSimple, func );
538     }
539     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
540         KMP_FATAL( LockUnsettingFree, func );
541     }
542     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
543       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
544         KMP_FATAL( LockUnsettingSetByAnother, func );
545     }
546     __kmp_release_futex_lock( lck, gtid );
547 }
548 
549 void
550 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
551 {
552     TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) );
553 }
554 
555 static void
556 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
557 {
558     __kmp_init_futex_lock( lck );
559 }
560 
561 void
562 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
563 {
564     lck->lk.poll = 0;
565 }
566 
567 static void
568 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
569 {
570     char const * const func = "omp_destroy_lock";
571     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
572       && __kmp_is_futex_lock_nestable( lck ) ) {
573         KMP_FATAL( LockNestableUsedAsSimple, func );
574     }
575     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
576         KMP_FATAL( LockStillOwned, func );
577     }
578     __kmp_destroy_futex_lock( lck );
579 }
580 
581 
582 //
583 // nested futex locks
584 //
585 
586 void
587 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
588 {
589     KMP_DEBUG_ASSERT( gtid >= 0 );
590 
591     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
592         lck->lk.depth_locked += 1;
593     }
594     else {
595         __kmp_acquire_futex_lock_timed_template( lck, gtid );
596         lck->lk.depth_locked = 1;
597     }
598 }
599 
600 static void
601 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
602 {
603     char const * const func = "omp_set_nest_lock";
604     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
605         KMP_FATAL( LockSimpleUsedAsNestable, func );
606     }
607     __kmp_acquire_nested_futex_lock( lck, gtid );
608 }
609 
610 int
611 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
612 {
613     int retval;
614 
615     KMP_DEBUG_ASSERT( gtid >= 0 );
616 
617     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
618         retval = ++lck->lk.depth_locked;
619     }
620     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
621         retval = 0;
622     }
623     else {
624         KMP_MB();
625         retval = lck->lk.depth_locked = 1;
626     }
627     return retval;
628 }
629 
630 static int
631 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
632 {
633     char const * const func = "omp_test_nest_lock";
634     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
635         KMP_FATAL( LockSimpleUsedAsNestable, func );
636     }
637     return __kmp_test_nested_futex_lock( lck, gtid );
638 }
639 
640 void
641 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
642 {
643     KMP_DEBUG_ASSERT( gtid >= 0 );
644 
645     KMP_MB();
646     if ( --(lck->lk.depth_locked) == 0 ) {
647         __kmp_release_futex_lock( lck, gtid );
648     }
649 }
650 
651 static void
652 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654     char const * const func = "omp_unset_nest_lock";
655     KMP_MB();  /* in case another processor initialized lock */
656     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
657         KMP_FATAL( LockSimpleUsedAsNestable, func );
658     }
659     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
660         KMP_FATAL( LockUnsettingFree, func );
661     }
662     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
663         KMP_FATAL( LockUnsettingSetByAnother, func );
664     }
665     __kmp_release_nested_futex_lock( lck, gtid );
666 }
667 
668 void
669 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
670 {
671     __kmp_init_futex_lock( lck );
672     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
673 }
674 
675 static void
676 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
677 {
678     __kmp_init_nested_futex_lock( lck );
679 }
680 
681 void
682 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
683 {
684     __kmp_destroy_futex_lock( lck );
685     lck->lk.depth_locked = 0;
686 }
687 
688 static void
689 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
690 {
691     char const * const func = "omp_destroy_nest_lock";
692     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
693         KMP_FATAL( LockSimpleUsedAsNestable, func );
694     }
695     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
696         KMP_FATAL( LockStillOwned, func );
697     }
698     __kmp_destroy_nested_futex_lock( lck );
699 }
700 
701 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
702 
703 
704 /* ------------------------------------------------------------------------ */
705 /* ticket (bakery) locks */
706 
707 static kmp_int32
708 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
709 {
710     return TCR_4( lck->lk.owner_id ) - 1;
711 }
712 
713 static inline bool
714 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
715 {
716     return lck->lk.depth_locked != -1;
717 }
718 
719 static kmp_uint32
720 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
721 {
722     register kmp_uint32 pause;
723 
724     if (value == checker) {
725         return TRUE;
726     }
727     for (pause = checker - value; pause != 0; --pause);
728     return FALSE;
729 }
730 
731 __forceinline static void
732 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
733 {
734     kmp_uint32 my_ticket;
735     KMP_MB();
736 
737     my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
738 
739 #ifdef USE_LOCK_PROFILE
740     if ( TCR_4( lck->lk.now_serving ) != my_ticket )
741         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
742     /* else __kmp_printf( "." );*/
743 #endif /* USE_LOCK_PROFILE */
744 
745     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
746         KMP_FSYNC_ACQUIRED(lck);
747         return;
748     }
749     KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
750     KMP_FSYNC_ACQUIRED(lck);
751 }
752 
753 void
754 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
755 {
756     __kmp_acquire_ticket_lock_timed_template( lck, gtid );
757 }
758 
759 static void
760 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
761 {
762     char const * const func = "omp_set_lock";
763     if ( lck->lk.initialized != lck ) {
764         KMP_FATAL( LockIsUninitialized, func );
765     }
766     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
767         KMP_FATAL( LockNestableUsedAsSimple, func );
768     }
769     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
770         KMP_FATAL( LockIsAlreadyOwned, func );
771     }
772 
773     __kmp_acquire_ticket_lock( lck, gtid );
774 
775     lck->lk.owner_id = gtid + 1;
776 }
777 
778 int
779 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
780 {
781     kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
782     if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
783         kmp_uint32 next_ticket = my_ticket + 1;
784         if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
785           my_ticket, next_ticket ) ) {
786             KMP_FSYNC_ACQUIRED( lck );
787             return TRUE;
788         }
789     }
790     return FALSE;
791 }
792 
793 static int
794 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
795 {
796     char const * const func = "omp_test_lock";
797     if ( lck->lk.initialized != lck ) {
798         KMP_FATAL( LockIsUninitialized, func );
799     }
800     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
801         KMP_FATAL( LockNestableUsedAsSimple, func );
802     }
803 
804     int retval = __kmp_test_ticket_lock( lck, gtid );
805 
806     if ( retval ) {
807         lck->lk.owner_id = gtid + 1;
808     }
809     return retval;
810 }
811 
812 void
813 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
814 {
815     kmp_uint32  distance;
816 
817     KMP_MB();       /* Flush all pending memory write invalidates.  */
818 
819     KMP_FSYNC_RELEASING(lck);
820     distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
821 
822     KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
823 
824     KMP_MB();       /* Flush all pending memory write invalidates.  */
825 
826     KMP_YIELD( distance
827       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
828 }
829 
830 static void
831 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
832 {
833     char const * const func = "omp_unset_lock";
834     KMP_MB();  /* in case another processor initialized lock */
835     if ( lck->lk.initialized != lck ) {
836         KMP_FATAL( LockIsUninitialized, func );
837     }
838     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
839         KMP_FATAL( LockNestableUsedAsSimple, func );
840     }
841     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
842         KMP_FATAL( LockUnsettingFree, func );
843     }
844     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
845       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
846         KMP_FATAL( LockUnsettingSetByAnother, func );
847     }
848     lck->lk.owner_id = 0;
849     __kmp_release_ticket_lock( lck, gtid );
850 }
851 
852 void
853 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
854 {
855     lck->lk.location = NULL;
856     TCW_4( lck->lk.next_ticket, 0 );
857     TCW_4( lck->lk.now_serving, 0 );
858     lck->lk.owner_id = 0;      // no thread owns the lock.
859     lck->lk.depth_locked = -1; // -1 => not a nested lock.
860     lck->lk.initialized = (kmp_ticket_lock *)lck;
861 }
862 
863 static void
864 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
865 {
866     __kmp_init_ticket_lock( lck );
867 }
868 
869 void
870 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
871 {
872     lck->lk.initialized = NULL;
873     lck->lk.location    = NULL;
874     lck->lk.next_ticket = 0;
875     lck->lk.now_serving = 0;
876     lck->lk.owner_id = 0;
877     lck->lk.depth_locked = -1;
878 }
879 
880 static void
881 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
882 {
883     char const * const func = "omp_destroy_lock";
884     if ( lck->lk.initialized != lck ) {
885         KMP_FATAL( LockIsUninitialized, func );
886     }
887     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
888         KMP_FATAL( LockNestableUsedAsSimple, func );
889     }
890     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
891         KMP_FATAL( LockStillOwned, func );
892     }
893     __kmp_destroy_ticket_lock( lck );
894 }
895 
896 
897 //
898 // nested ticket locks
899 //
900 
901 void
902 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
903 {
904     KMP_DEBUG_ASSERT( gtid >= 0 );
905 
906     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
907         lck->lk.depth_locked += 1;
908     }
909     else {
910         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
911         KMP_MB();
912         lck->lk.depth_locked = 1;
913         KMP_MB();
914         lck->lk.owner_id = gtid + 1;
915     }
916 }
917 
918 static void
919 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
920 {
921     char const * const func = "omp_set_nest_lock";
922     if ( lck->lk.initialized != lck ) {
923         KMP_FATAL( LockIsUninitialized, func );
924     }
925     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
926         KMP_FATAL( LockSimpleUsedAsNestable, func );
927     }
928     __kmp_acquire_nested_ticket_lock( lck, gtid );
929 }
930 
931 int
932 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
933 {
934     int retval;
935 
936     KMP_DEBUG_ASSERT( gtid >= 0 );
937 
938     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
939         retval = ++lck->lk.depth_locked;
940     }
941     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
942         retval = 0;
943     }
944     else {
945         KMP_MB();
946         retval = lck->lk.depth_locked = 1;
947         KMP_MB();
948         lck->lk.owner_id = gtid + 1;
949     }
950     return retval;
951 }
952 
953 static int
954 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
955   kmp_int32 gtid )
956 {
957     char const * const func = "omp_test_nest_lock";
958     if ( lck->lk.initialized != lck ) {
959         KMP_FATAL( LockIsUninitialized, func );
960     }
961     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
962         KMP_FATAL( LockSimpleUsedAsNestable, func );
963     }
964     return __kmp_test_nested_ticket_lock( lck, gtid );
965 }
966 
967 void
968 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
969 {
970     KMP_DEBUG_ASSERT( gtid >= 0 );
971 
972     KMP_MB();
973     if ( --(lck->lk.depth_locked) == 0 ) {
974         KMP_MB();
975         lck->lk.owner_id = 0;
976         __kmp_release_ticket_lock( lck, gtid );
977     }
978 }
979 
980 static void
981 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
982 {
983     char const * const func = "omp_unset_nest_lock";
984     KMP_MB();  /* in case another processor initialized lock */
985     if ( lck->lk.initialized != lck ) {
986         KMP_FATAL( LockIsUninitialized, func );
987     }
988     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
989         KMP_FATAL( LockSimpleUsedAsNestable, func );
990     }
991     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
992         KMP_FATAL( LockUnsettingFree, func );
993     }
994     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
995         KMP_FATAL( LockUnsettingSetByAnother, func );
996     }
997     __kmp_release_nested_ticket_lock( lck, gtid );
998 }
999 
1000 void
1001 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1002 {
1003     __kmp_init_ticket_lock( lck );
1004     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1005 }
1006 
1007 static void
1008 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1009 {
1010     __kmp_init_nested_ticket_lock( lck );
1011 }
1012 
1013 void
1014 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1015 {
1016     __kmp_destroy_ticket_lock( lck );
1017     lck->lk.depth_locked = 0;
1018 }
1019 
1020 static void
1021 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1022 {
1023     char const * const func = "omp_destroy_nest_lock";
1024     if ( lck->lk.initialized != lck ) {
1025         KMP_FATAL( LockIsUninitialized, func );
1026     }
1027     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028         KMP_FATAL( LockSimpleUsedAsNestable, func );
1029     }
1030     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1031         KMP_FATAL( LockStillOwned, func );
1032     }
1033     __kmp_destroy_nested_ticket_lock( lck );
1034 }
1035 
1036 
1037 //
1038 // access functions to fields which don't exist for all lock kinds.
1039 //
1040 
1041 static int
1042 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1043 {
1044     return lck == lck->lk.initialized;
1045 }
1046 
1047 static const ident_t *
1048 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1049 {
1050     return lck->lk.location;
1051 }
1052 
1053 static void
1054 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1055 {
1056     lck->lk.location = loc;
1057 }
1058 
1059 static kmp_lock_flags_t
1060 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1061 {
1062     return lck->lk.flags;
1063 }
1064 
1065 static void
1066 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1067 {
1068     lck->lk.flags = flags;
1069 }
1070 
1071 /* ------------------------------------------------------------------------ */
1072 /* queuing locks */
1073 
1074 /*
1075  * First the states
1076  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1077  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1078  *                h, h  means lock is held or about to transition, 1 element on queue
1079  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1080  *
1081  * Now the transitions
1082  *    Acquire(0,0)  = -1 ,0
1083  *    Release(0,0)  = Error
1084  *    Acquire(-1,0) =  h ,h    h > 0
1085  *    Release(-1,0) =  0 ,0
1086  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1087  *    Release(h,h)  = -1 ,0    h > 0
1088  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1089  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1090  *
1091  * And pictorially
1092  *
1093  *
1094  *          +-----+
1095  *          | 0, 0|------- release -------> Error
1096  *          +-----+
1097  *            |  ^
1098  *     acquire|  |release
1099  *            |  |
1100  *            |  |
1101  *            v  |
1102  *          +-----+
1103  *          |-1, 0|
1104  *          +-----+
1105  *            |  ^
1106  *     acquire|  |release
1107  *            |  |
1108  *            |  |
1109  *            v  |
1110  *          +-----+
1111  *          | h, h|
1112  *          +-----+
1113  *            |  ^
1114  *     acquire|  |release
1115  *            |  |
1116  *            |  |
1117  *            v  |
1118  *          +-----+
1119  *          | h, t|----- acquire, release loopback ---+
1120  *          +-----+                                   |
1121  *               ^                                    |
1122  *               |                                    |
1123  *               +------------------------------------+
1124  *
1125  */
1126 
1127 #ifdef DEBUG_QUEUING_LOCKS
1128 
1129 /* Stuff for circular trace buffer */
1130 #define TRACE_BUF_ELE	1024
1131 static char traces[TRACE_BUF_ELE][128] = { 0 }
1132 static int tc = 0;
1133 #define TRACE_LOCK(X,Y)          sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1134 #define TRACE_LOCK_T(X,Y,Z)      sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1135 #define TRACE_LOCK_HT(X,Y,Z,Q)   sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1136 
1137 static void
1138 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1139   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1140 {
1141     kmp_int32 t, i;
1142 
1143     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1144 
1145     i = tc % TRACE_BUF_ELE;
1146     __kmp_printf_no_lock( "%s\n", traces[i] );
1147     i = (i+1) % TRACE_BUF_ELE;
1148     while ( i != (tc % TRACE_BUF_ELE) ) {
1149         __kmp_printf_no_lock( "%s", traces[i] );
1150         i = (i+1) % TRACE_BUF_ELE;
1151     }
1152     __kmp_printf_no_lock( "\n" );
1153 
1154     __kmp_printf_no_lock(
1155              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1156              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1157              head_id, tail_id );
1158 
1159     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1160 
1161     if ( lck->lk.head_id >= 1 ) {
1162         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1163         while (t > 0) {
1164             __kmp_printf_no_lock( "-> %d ", t );
1165             t = __kmp_threads[t-1]->th.th_next_waiting;
1166         }
1167     }
1168     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1169     __kmp_printf_no_lock( "\n\n" );
1170 }
1171 
1172 #endif /* DEBUG_QUEUING_LOCKS */
1173 
1174 static kmp_int32
1175 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1176 {
1177     return TCR_4( lck->lk.owner_id ) - 1;
1178 }
1179 
1180 static inline bool
1181 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1182 {
1183     return lck->lk.depth_locked != -1;
1184 }
1185 
1186 /* Acquire a lock using a the queuing lock implementation */
1187 template <bool takeTime>
1188 /* [TLW] The unused template above is left behind because of what BEB believes is a
1189    potential compiler problem with __forceinline. */
1190 __forceinline static void
1191 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1192   kmp_int32 gtid )
1193 {
1194     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1195     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1196     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1197     volatile kmp_uint32 *spin_here_p;
1198     kmp_int32 need_mf = 1;
1199 
1200     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1201 
1202     KMP_FSYNC_PREPARE( lck );
1203     KMP_DEBUG_ASSERT( this_thr != NULL );
1204     spin_here_p = & this_thr->th.th_spin_here;
1205 
1206 #ifdef DEBUG_QUEUING_LOCKS
1207     TRACE_LOCK( gtid+1, "acq ent" );
1208     if ( *spin_here_p )
1209         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1210     if ( this_thr->th.th_next_waiting != 0 )
1211         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1212 #endif
1213     KMP_DEBUG_ASSERT( !*spin_here_p );
1214     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1215 
1216 
1217     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1218        that may follow, not just in execution order, but also in visibility order.  This way,
1219        when a releasing thread observes the changes to the queue by this thread, it can
1220        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1221        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1222        to FALSE before this thread sets it to TRUE, this thread will hang.
1223     */
1224     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1225 
1226     while( 1 ) {
1227         kmp_int32 enqueued;
1228         kmp_int32 head;
1229         kmp_int32 tail;
1230 
1231         head = *head_id_p;
1232 
1233         switch ( head ) {
1234 
1235             case -1:
1236             {
1237 #ifdef DEBUG_QUEUING_LOCKS
1238                 tail = *tail_id_p;
1239                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1240 #endif
1241                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1242                            this assignment prevents us from entering the if ( t > 0 )
1243                            condition in the enqueued case below, which is not necessary for
1244                            this state transition */
1245 
1246                 need_mf = 0;
1247                 /* try (-1,0)->(tid,tid) */
1248                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1249                   KMP_PACK_64( -1, 0 ),
1250                   KMP_PACK_64( gtid+1, gtid+1 ) );
1251 #ifdef DEBUG_QUEUING_LOCKS
1252                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1253 #endif
1254             }
1255             break;
1256 
1257             default:
1258             {
1259                 tail = *tail_id_p;
1260                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1264 #endif
1265 
1266                 if ( tail == 0 ) {
1267                     enqueued = FALSE;
1268                 }
1269                 else {
1270                     need_mf = 0;
1271                     /* try (h,t) or (h,h)->(h,tid) */
1272                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1273 
1274 #ifdef DEBUG_QUEUING_LOCKS
1275                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1276 #endif
1277                 }
1278             }
1279             break;
1280 
1281             case 0: /* empty queue */
1282             {
1283                 kmp_int32 grabbed_lock;
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286                 tail = *tail_id_p;
1287                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1288 #endif
1289                 /* try (0,0)->(-1,0) */
1290 
1291                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1292                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1293 
1294                 if ( grabbed_lock ) {
1295 
1296                     *spin_here_p = FALSE;
1297 
1298                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1299                               lck, gtid ));
1300 #ifdef DEBUG_QUEUING_LOCKS
1301                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1302 #endif
1303                     KMP_FSYNC_ACQUIRED( lck );
1304                     return; /* lock holder cannot be on queue */
1305                 }
1306                 enqueued = FALSE;
1307             }
1308             break;
1309         }
1310 
1311         if ( enqueued ) {
1312             if ( tail > 0 ) {
1313                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1314                 KMP_ASSERT( tail_thr != NULL );
1315                 tail_thr->th.th_next_waiting = gtid+1;
1316                 /* corresponding wait for this write in release code */
1317             }
1318             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1319 
1320 
1321             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1322              *       throughput only here.
1323              */
1324             KMP_MB();
1325             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1326 
1327 #ifdef DEBUG_QUEUING_LOCKS
1328             TRACE_LOCK( gtid+1, "acq spin" );
1329 
1330             if ( this_thr->th.th_next_waiting != 0 )
1331                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1332 #endif
1333             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1334             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1335                       lck, gtid ));
1336 
1337 #ifdef DEBUG_QUEUING_LOCKS
1338             TRACE_LOCK( gtid+1, "acq exit 2" );
1339 #endif
1340             /* got lock, we were dequeued by the thread that released lock */
1341             return;
1342         }
1343 
1344         /* Yield if number of threads > number of logical processors */
1345         /* ToDo: Not sure why this should only be in oversubscription case,
1346            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1347         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1348           __kmp_xproc ) );
1349 #ifdef DEBUG_QUEUING_LOCKS
1350         TRACE_LOCK( gtid+1, "acq retry" );
1351 #endif
1352 
1353     }
1354     KMP_ASSERT2( 0, "should not get here" );
1355 }
1356 
1357 void
1358 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1359 {
1360     KMP_DEBUG_ASSERT( gtid >= 0 );
1361 
1362     __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1363 }
1364 
1365 static void
1366 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1367   kmp_int32 gtid )
1368 {
1369     char const * const func = "omp_set_lock";
1370     if ( lck->lk.initialized != lck ) {
1371         KMP_FATAL( LockIsUninitialized, func );
1372     }
1373     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1374         KMP_FATAL( LockNestableUsedAsSimple, func );
1375     }
1376     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1377         KMP_FATAL( LockIsAlreadyOwned, func );
1378     }
1379 
1380     __kmp_acquire_queuing_lock( lck, gtid );
1381 
1382     lck->lk.owner_id = gtid + 1;
1383 }
1384 
1385 int
1386 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1387 {
1388     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1389     kmp_int32 head;
1390 #ifdef KMP_DEBUG
1391     kmp_info_t *this_thr;
1392 #endif
1393 
1394     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1395     KMP_DEBUG_ASSERT( gtid >= 0 );
1396 #ifdef KMP_DEBUG
1397     this_thr = __kmp_thread_from_gtid( gtid );
1398     KMP_DEBUG_ASSERT( this_thr != NULL );
1399     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1400 #endif
1401 
1402     head = *head_id_p;
1403 
1404     if ( head == 0 ) { /* nobody on queue, nobody holding */
1405 
1406         /* try (0,0)->(-1,0) */
1407 
1408         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1409             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1410             KMP_FSYNC_ACQUIRED(lck);
1411             return TRUE;
1412         }
1413     }
1414 
1415     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1416     return FALSE;
1417 }
1418 
1419 static int
1420 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1421 {
1422     char const * const func = "omp_test_lock";
1423     if ( lck->lk.initialized != lck ) {
1424         KMP_FATAL( LockIsUninitialized, func );
1425     }
1426     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1427         KMP_FATAL( LockNestableUsedAsSimple, func );
1428     }
1429 
1430     int retval = __kmp_test_queuing_lock( lck, gtid );
1431 
1432     if ( retval ) {
1433         lck->lk.owner_id = gtid + 1;
1434     }
1435     return retval;
1436 }
1437 
1438 void
1439 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1440 {
1441     register kmp_info_t *this_thr;
1442     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1443     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1444 
1445     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1446     KMP_DEBUG_ASSERT( gtid >= 0 );
1447     this_thr    = __kmp_thread_from_gtid( gtid );
1448     KMP_DEBUG_ASSERT( this_thr != NULL );
1449 #ifdef DEBUG_QUEUING_LOCKS
1450     TRACE_LOCK( gtid+1, "rel ent" );
1451 
1452     if ( this_thr->th.th_spin_here )
1453         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1454     if ( this_thr->th.th_next_waiting != 0 )
1455         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1456 #endif
1457     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1458     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1459 
1460     KMP_FSYNC_RELEASING(lck);
1461 
1462     while( 1 ) {
1463         kmp_int32 dequeued;
1464         kmp_int32 head;
1465         kmp_int32 tail;
1466 
1467         head = *head_id_p;
1468 
1469 #ifdef DEBUG_QUEUING_LOCKS
1470         tail = *tail_id_p;
1471         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1472         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1473 #endif
1474         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1475 
1476         if ( head == -1 ) { /* nobody on queue */
1477 
1478             /* try (-1,0)->(0,0) */
1479             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1480                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1481                           lck, gtid ));
1482 #ifdef DEBUG_QUEUING_LOCKS
1483                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1484 #endif
1485                 return;
1486             }
1487             dequeued = FALSE;
1488 
1489         }
1490         else {
1491 
1492             tail = *tail_id_p;
1493             if ( head == tail ) {  /* only one thread on the queue */
1494 
1495 #ifdef DEBUG_QUEUING_LOCKS
1496                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1497 #endif
1498                 KMP_DEBUG_ASSERT( head > 0 );
1499 
1500                 /* try (h,h)->(-1,0) */
1501                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1502                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1503 #ifdef DEBUG_QUEUING_LOCKS
1504                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1505 #endif
1506 
1507             }
1508             else {
1509                 volatile kmp_int32 *waiting_id_p;
1510                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1511                 KMP_DEBUG_ASSERT( head_thr != NULL );
1512                 waiting_id_p = & head_thr->th.th_next_waiting;
1513 
1514                 /* Does this require synchronous reads? */
1515 #ifdef DEBUG_QUEUING_LOCKS
1516                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1517 #endif
1518                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1519 
1520                 /* try (h,t)->(h',t) or (t,t) */
1521 
1522                 KMP_MB();
1523                 /* make sure enqueuing thread has time to update next waiting thread field */
1524                 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1525 #ifdef DEBUG_QUEUING_LOCKS
1526                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1527 #endif
1528                 dequeued = TRUE;
1529             }
1530         }
1531 
1532         if ( dequeued ) {
1533             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1534             KMP_DEBUG_ASSERT( head_thr != NULL );
1535 
1536             /* Does this require synchronous reads? */
1537 #ifdef DEBUG_QUEUING_LOCKS
1538             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1539 #endif
1540             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1541 
1542             /* For clean code only.
1543              * Thread not released until next statement prevents race with acquire code.
1544              */
1545             head_thr->th.th_next_waiting = 0;
1546 #ifdef DEBUG_QUEUING_LOCKS
1547             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1548 #endif
1549 
1550             KMP_MB();
1551             /* reset spin value */
1552             head_thr->th.th_spin_here = FALSE;
1553 
1554             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1555                       lck, gtid ));
1556 #ifdef DEBUG_QUEUING_LOCKS
1557             TRACE_LOCK( gtid+1, "rel exit 2" );
1558 #endif
1559             return;
1560         }
1561         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1562 
1563 #ifdef DEBUG_QUEUING_LOCKS
1564         TRACE_LOCK( gtid+1, "rel retry" );
1565 #endif
1566 
1567     } /* while */
1568     KMP_ASSERT2( 0, "should not get here" );
1569 }
1570 
1571 static void
1572 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1573   kmp_int32 gtid )
1574 {
1575     char const * const func = "omp_unset_lock";
1576     KMP_MB();  /* in case another processor initialized lock */
1577     if ( lck->lk.initialized != lck ) {
1578         KMP_FATAL( LockIsUninitialized, func );
1579     }
1580     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1581         KMP_FATAL( LockNestableUsedAsSimple, func );
1582     }
1583     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1584         KMP_FATAL( LockUnsettingFree, func );
1585     }
1586     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1587         KMP_FATAL( LockUnsettingSetByAnother, func );
1588     }
1589     lck->lk.owner_id = 0;
1590     __kmp_release_queuing_lock( lck, gtid );
1591 }
1592 
1593 void
1594 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1595 {
1596     lck->lk.location = NULL;
1597     lck->lk.head_id = 0;
1598     lck->lk.tail_id = 0;
1599     lck->lk.next_ticket = 0;
1600     lck->lk.now_serving = 0;
1601     lck->lk.owner_id = 0;      // no thread owns the lock.
1602     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1603     lck->lk.initialized = lck;
1604 
1605     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1606 }
1607 
1608 static void
1609 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1610 {
1611     __kmp_init_queuing_lock( lck );
1612 }
1613 
1614 void
1615 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1616 {
1617     lck->lk.initialized = NULL;
1618     lck->lk.location = NULL;
1619     lck->lk.head_id = 0;
1620     lck->lk.tail_id = 0;
1621     lck->lk.next_ticket = 0;
1622     lck->lk.now_serving = 0;
1623     lck->lk.owner_id = 0;
1624     lck->lk.depth_locked = -1;
1625 }
1626 
1627 static void
1628 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1629 {
1630     char const * const func = "omp_destroy_lock";
1631     if ( lck->lk.initialized != lck ) {
1632         KMP_FATAL( LockIsUninitialized, func );
1633     }
1634     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1635         KMP_FATAL( LockNestableUsedAsSimple, func );
1636     }
1637     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1638         KMP_FATAL( LockStillOwned, func );
1639     }
1640     __kmp_destroy_queuing_lock( lck );
1641 }
1642 
1643 
1644 //
1645 // nested queuing locks
1646 //
1647 
1648 void
1649 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1650 {
1651     KMP_DEBUG_ASSERT( gtid >= 0 );
1652 
1653     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1654         lck->lk.depth_locked += 1;
1655     }
1656     else {
1657         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1658         KMP_MB();
1659         lck->lk.depth_locked = 1;
1660         KMP_MB();
1661         lck->lk.owner_id = gtid + 1;
1662     }
1663 }
1664 
1665 static void
1666 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1667 {
1668     char const * const func = "omp_set_nest_lock";
1669     if ( lck->lk.initialized != lck ) {
1670         KMP_FATAL( LockIsUninitialized, func );
1671     }
1672     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1673         KMP_FATAL( LockSimpleUsedAsNestable, func );
1674     }
1675     __kmp_acquire_nested_queuing_lock( lck, gtid );
1676 }
1677 
1678 int
1679 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1680 {
1681     int retval;
1682 
1683     KMP_DEBUG_ASSERT( gtid >= 0 );
1684 
1685     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1686         retval = ++lck->lk.depth_locked;
1687     }
1688     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1689         retval = 0;
1690     }
1691     else {
1692         KMP_MB();
1693         retval = lck->lk.depth_locked = 1;
1694         KMP_MB();
1695         lck->lk.owner_id = gtid + 1;
1696     }
1697     return retval;
1698 }
1699 
1700 static int
1701 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1702   kmp_int32 gtid )
1703 {
1704     char const * const func = "omp_test_nest_lock";
1705     if ( lck->lk.initialized != lck ) {
1706         KMP_FATAL( LockIsUninitialized, func );
1707     }
1708     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1709         KMP_FATAL( LockSimpleUsedAsNestable, func );
1710     }
1711     return __kmp_test_nested_queuing_lock( lck, gtid );
1712 }
1713 
1714 void
1715 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1716 {
1717     KMP_DEBUG_ASSERT( gtid >= 0 );
1718 
1719     KMP_MB();
1720     if ( --(lck->lk.depth_locked) == 0 ) {
1721         KMP_MB();
1722         lck->lk.owner_id = 0;
1723         __kmp_release_queuing_lock( lck, gtid );
1724     }
1725 }
1726 
1727 static void
1728 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1729 {
1730     char const * const func = "omp_unset_nest_lock";
1731     KMP_MB();  /* in case another processor initialized lock */
1732     if ( lck->lk.initialized != lck ) {
1733         KMP_FATAL( LockIsUninitialized, func );
1734     }
1735     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1736         KMP_FATAL( LockSimpleUsedAsNestable, func );
1737     }
1738     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1739         KMP_FATAL( LockUnsettingFree, func );
1740     }
1741     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1742         KMP_FATAL( LockUnsettingSetByAnother, func );
1743     }
1744     __kmp_release_nested_queuing_lock( lck, gtid );
1745 }
1746 
1747 void
1748 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1749 {
1750     __kmp_init_queuing_lock( lck );
1751     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1752 }
1753 
1754 static void
1755 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1756 {
1757     __kmp_init_nested_queuing_lock( lck );
1758 }
1759 
1760 void
1761 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1762 {
1763     __kmp_destroy_queuing_lock( lck );
1764     lck->lk.depth_locked = 0;
1765 }
1766 
1767 static void
1768 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1769 {
1770     char const * const func = "omp_destroy_nest_lock";
1771     if ( lck->lk.initialized != lck ) {
1772         KMP_FATAL( LockIsUninitialized, func );
1773     }
1774     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1775         KMP_FATAL( LockSimpleUsedAsNestable, func );
1776     }
1777     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1778         KMP_FATAL( LockStillOwned, func );
1779     }
1780     __kmp_destroy_nested_queuing_lock( lck );
1781 }
1782 
1783 
1784 //
1785 // access functions to fields which don't exist for all lock kinds.
1786 //
1787 
1788 static int
1789 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1790 {
1791     return lck == lck->lk.initialized;
1792 }
1793 
1794 static const ident_t *
1795 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1796 {
1797     return lck->lk.location;
1798 }
1799 
1800 static void
1801 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1802 {
1803     lck->lk.location = loc;
1804 }
1805 
1806 static kmp_lock_flags_t
1807 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1808 {
1809     return lck->lk.flags;
1810 }
1811 
1812 static void
1813 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1814 {
1815     lck->lk.flags = flags;
1816 }
1817 
1818 #if KMP_USE_ADAPTIVE_LOCKS
1819 
1820 /*
1821     RTM Adaptive locks
1822 */
1823 
1824 // TODO: Use the header for intrinsics below with the compiler 13.0
1825 //#include <immintrin.h>
1826 
1827 // Values from the status register after failed speculation.
1828 #define _XBEGIN_STARTED          (~0u)
1829 #define _XABORT_EXPLICIT         (1 << 0)
1830 #define _XABORT_RETRY            (1 << 1)
1831 #define _XABORT_CONFLICT         (1 << 2)
1832 #define _XABORT_CAPACITY         (1 << 3)
1833 #define _XABORT_DEBUG            (1 << 4)
1834 #define _XABORT_NESTED           (1 << 5)
1835 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1836 
1837 // Aborts for which it's worth trying again immediately
1838 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1839 
1840 #define STRINGIZE_INTERNAL(arg) #arg
1841 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1842 
1843 // Access to RTM instructions
1844 
1845 /*
1846   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1847   This is the same definition as the compiler intrinsic that will be supported at some point.
1848 */
1849 static __inline int _xbegin()
1850 {
1851     int res = -1;
1852 
1853 #if KMP_OS_WINDOWS
1854 #if KMP_ARCH_X86_64
1855     _asm {
1856         _emit 0xC7
1857         _emit 0xF8
1858         _emit 2
1859         _emit 0
1860         _emit 0
1861         _emit 0
1862         jmp   L2
1863         mov   res, eax
1864     L2:
1865     }
1866 #else /* IA32 */
1867     _asm {
1868         _emit 0xC7
1869         _emit 0xF8
1870         _emit 2
1871         _emit 0
1872         _emit 0
1873         _emit 0
1874         jmp   L2
1875         mov   res, eax
1876     L2:
1877     }
1878 #endif // KMP_ARCH_X86_64
1879 #else
1880     /* Note that %eax must be noted as killed (clobbered), because
1881      * the XSR is returned in %eax(%rax) on abort.  Other register
1882      * values are restored, so don't need to be killed.
1883      *
1884      * We must also mark 'res' as an input and an output, since otherwise
1885      * 'res=-1' may be dropped as being dead, whereas we do need the
1886      * assignment on the successful (i.e., non-abort) path.
1887      */
1888     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1889                       "   .long  1f-1b-6\n"
1890                       "    jmp   2f\n"
1891                       "1:  movl  %%eax,%0\n"
1892                       "2:"
1893                       :"+r"(res)::"memory","%eax");
1894 #endif // KMP_OS_WINDOWS
1895     return res;
1896 }
1897 
1898 /*
1899   Transaction end
1900 */
1901 static __inline void _xend()
1902 {
1903 #if KMP_OS_WINDOWS
1904     __asm  {
1905         _emit 0x0f
1906         _emit 0x01
1907         _emit 0xd5
1908     }
1909 #else
1910     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1911 #endif
1912 }
1913 
1914 /*
1915   This is a macro, the argument must be a single byte constant which
1916   can be evaluated by the inline assembler, since it is emitted as a
1917   byte into the assembly code.
1918 */
1919 #if KMP_OS_WINDOWS
1920 #define _xabort(ARG)                            \
1921     _asm _emit 0xc6                             \
1922     _asm _emit 0xf8                             \
1923     _asm _emit ARG
1924 #else
1925 #define _xabort(ARG) \
1926     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1927 #endif
1928 
1929 //
1930 //    Statistics is collected for testing purpose
1931 //
1932 #if KMP_DEBUG_ADAPTIVE_LOCKS
1933 
1934 // We accumulate speculative lock statistics when the lock is destroyed.
1935 // We keep locks that haven't been destroyed in the liveLocks list
1936 // so that we can grab their statistics too.
1937 static kmp_adaptive_lock_statistics_t destroyedStats;
1938 
1939 // To hold the list of live locks.
1940 static kmp_adaptive_lock_info_t liveLocks;
1941 
1942 // A lock so we can safely update the list of locks.
1943 static kmp_bootstrap_lock_t chain_lock;
1944 
1945 // Initialize the list of stats.
1946 void
1947 __kmp_init_speculative_stats()
1948 {
1949     kmp_adaptive_lock_info_t *lck = &liveLocks;
1950 
1951     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
1952     lck->stats.next = lck;
1953     lck->stats.prev = lck;
1954 
1955     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1956     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1957 
1958     __kmp_init_bootstrap_lock( &chain_lock );
1959 
1960 }
1961 
1962 // Insert the lock into the circular list
1963 static void
1964 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
1965 {
1966     __kmp_acquire_bootstrap_lock( &chain_lock );
1967 
1968     lck->stats.next = liveLocks.stats.next;
1969     lck->stats.prev = &liveLocks;
1970 
1971     liveLocks.stats.next = lck;
1972     lck->stats.next->stats.prev  = lck;
1973 
1974     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1975     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1976 
1977     __kmp_release_bootstrap_lock( &chain_lock );
1978 }
1979 
1980 static void
1981 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
1982 {
1983     KMP_ASSERT( lck->stats.next->stats.prev == lck );
1984     KMP_ASSERT( lck->stats.prev->stats.next == lck );
1985 
1986     kmp_adaptive_lock_info_t * n = lck->stats.next;
1987     kmp_adaptive_lock_info_t * p = lck->stats.prev;
1988 
1989     n->stats.prev = p;
1990     p->stats.next = n;
1991 }
1992 
1993 static void
1994 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
1995 {
1996     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
1997     __kmp_remember_lock( lck );
1998 }
1999 
2000 static void
2001 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2002 {
2003     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2004 
2005     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2006     t->successfulSpeculations += s->successfulSpeculations;
2007     t->hardFailedSpeculations += s->hardFailedSpeculations;
2008     t->softFailedSpeculations += s->softFailedSpeculations;
2009     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2010     t->lemmingYields          += s->lemmingYields;
2011 }
2012 
2013 static void
2014 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2015 {
2016     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2017 
2018     __kmp_acquire_bootstrap_lock( &chain_lock );
2019 
2020     __kmp_add_stats( &destroyedStats, lck );
2021     __kmp_forget_lock( lck );
2022 
2023     __kmp_release_bootstrap_lock( &chain_lock );
2024 }
2025 
2026 static float
2027 percent (kmp_uint32 count, kmp_uint32 total)
2028 {
2029     return (total == 0) ? 0.0: (100.0 * count)/total;
2030 }
2031 
2032 static
2033 FILE * __kmp_open_stats_file()
2034 {
2035     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2036         return stdout;
2037 
2038     size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2039     char buffer[buffLen];
2040     snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile,
2041       (kmp_int32)getpid());
2042     FILE * result = fopen(&buffer[0], "w");
2043 
2044     // Maybe we should issue a warning here...
2045     return result ? result : stdout;
2046 }
2047 
2048 void
2049 __kmp_print_speculative_stats()
2050 {
2051     if (__kmp_user_lock_kind != lk_adaptive)
2052         return;
2053 
2054     FILE * statsFile = __kmp_open_stats_file();
2055 
2056     kmp_adaptive_lock_statistics_t total = destroyedStats;
2057     kmp_adaptive_lock_info_t *lck;
2058 
2059     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2060         __kmp_add_stats( &total, lck );
2061     }
2062     kmp_adaptive_lock_statistics_t *t = &total;
2063     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2064     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2065                                    t->softFailedSpeculations;
2066 
2067     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2068     fprintf ( statsFile, " Lock parameters: \n"
2069              "   max_soft_retries               : %10d\n"
2070              "   max_badness                    : %10d\n",
2071              __kmp_adaptive_backoff_params.max_soft_retries,
2072              __kmp_adaptive_backoff_params.max_badness);
2073     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2074     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2075     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2076              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2077     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2078              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2079     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2080 
2081     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2082     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2083              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2084     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2085              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2086     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2087              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2088 
2089     if (statsFile != stdout)
2090         fclose( statsFile );
2091 }
2092 
2093 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2094 #else
2095 # define KMP_INC_STAT(lck,stat)
2096 
2097 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2098 
2099 static inline bool
2100 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2101 {
2102     // It is enough to check that the head_id is zero.
2103     // We don't also need to check the tail.
2104     bool res = lck->lk.head_id == 0;
2105 
2106     // We need a fence here, since we must ensure that no memory operations
2107     // from later in this thread float above that read.
2108 #if KMP_COMPILER_ICC
2109     _mm_mfence();
2110 #else
2111     __sync_synchronize();
2112 #endif
2113 
2114     return res;
2115 }
2116 
2117 // Functions for manipulating the badness
2118 static __inline void
2119 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2120 {
2121     // Reset the badness to zero so we eagerly try to speculate again
2122     lck->lk.adaptive.badness = 0;
2123     KMP_INC_STAT(lck,successfulSpeculations);
2124 }
2125 
2126 // Create a bit mask with one more set bit.
2127 static __inline void
2128 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2129 {
2130     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2131     if ( newBadness > lck->lk.adaptive.max_badness) {
2132         return;
2133     } else {
2134         lck->lk.adaptive.badness = newBadness;
2135     }
2136 }
2137 
2138 // Check whether speculation should be attempted.
2139 static __inline int
2140 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2141 {
2142     kmp_uint32 badness = lck->lk.adaptive.badness;
2143     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2144     int res = (attempts & badness) == 0;
2145     return res;
2146 }
2147 
2148 // Attempt to acquire only the speculative lock.
2149 // Does not back off to the non-speculative lock.
2150 //
2151 static int
2152 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2153 {
2154     int retries = lck->lk.adaptive.max_soft_retries;
2155 
2156     // We don't explicitly count the start of speculation, rather we record
2157     // the results (success, hard fail, soft fail). The sum of all of those
2158     // is the total number of times we started speculation since all
2159     // speculations must end one of those ways.
2160     do
2161     {
2162         kmp_uint32 status = _xbegin();
2163         // Switch this in to disable actual speculation but exercise
2164         // at least some of the rest of the code. Useful for debugging...
2165         // kmp_uint32 status = _XABORT_NESTED;
2166 
2167         if (status == _XBEGIN_STARTED )
2168         { /* We have successfully started speculation
2169            * Check that no-one acquired the lock for real between when we last looked
2170            * and now. This also gets the lock cache line into our read-set,
2171            * which we need so that we'll abort if anyone later claims it for real.
2172            */
2173             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2174             {
2175                 // Lock is now visibly acquired, so someone beat us to it.
2176                 // Abort the transaction so we'll restart from _xbegin with the
2177                 // failure status.
2178                 _xabort(0x01)
2179                 KMP_ASSERT2( 0, "should not get here" );
2180             }
2181             return 1;   // Lock has been acquired (speculatively)
2182         } else {
2183             // We have aborted, update the statistics
2184             if ( status & SOFT_ABORT_MASK)
2185             {
2186                 KMP_INC_STAT(lck,softFailedSpeculations);
2187                 // and loop round to retry.
2188             }
2189             else
2190             {
2191                 KMP_INC_STAT(lck,hardFailedSpeculations);
2192                 // Give up if we had a hard failure.
2193                 break;
2194             }
2195         }
2196     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2197 
2198     // Either we had a hard failure or we didn't succeed softly after
2199     // the full set of attempts, so back off the badness.
2200     __kmp_step_badness( lck );
2201     return 0;
2202 }
2203 
2204 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2205 // if the speculative lock cannot be acquired.
2206 // We can succeed speculatively, non-speculatively, or fail.
2207 static int
2208 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2209 {
2210     // First try to acquire the lock speculatively
2211     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2212         return 1;
2213 
2214     // Speculative acquisition failed, so try to acquire it non-speculatively.
2215     // Count the non-speculative acquire attempt
2216     lck->lk.adaptive.acquire_attempts++;
2217 
2218     // Use base, non-speculative lock.
2219     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2220     {
2221         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2222         return 1;       // Lock is acquired (non-speculatively)
2223     }
2224     else
2225     {
2226         return 0;       // Failed to acquire the lock, it's already visibly locked.
2227     }
2228 }
2229 
2230 static int
2231 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2232 {
2233     char const * const func = "omp_test_lock";
2234     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2235         KMP_FATAL( LockIsUninitialized, func );
2236     }
2237 
2238     int retval = __kmp_test_adaptive_lock( lck, gtid );
2239 
2240     if ( retval ) {
2241         lck->lk.qlk.owner_id = gtid + 1;
2242     }
2243     return retval;
2244 }
2245 
2246 // Block until we can acquire a speculative, adaptive lock.
2247 // We check whether we should be trying to speculate.
2248 // If we should be, we check the real lock to see if it is free,
2249 // and, if not, pause without attempting to acquire it until it is.
2250 // Then we try the speculative acquire.
2251 // This means that although we suffer from lemmings a little (
2252 // because all we can't acquire the lock speculatively until
2253 // the queue of threads waiting has cleared), we don't get into a
2254 // state where we can never acquire the lock speculatively (because we
2255 // force the queue to clear by preventing new arrivals from entering the
2256 // queue).
2257 // This does mean that when we're trying to break lemmings, the lock
2258 // is no longer fair. However OpenMP makes no guarantee that its
2259 // locks are fair, so this isn't a real problem.
2260 static void
2261 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2262 {
2263     if ( __kmp_should_speculate( lck, gtid ) )
2264     {
2265         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2266         {
2267             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2268                 return;
2269             // We tried speculation and failed, so give up.
2270         }
2271         else
2272         {
2273             // We can't try speculation until the lock is free, so we
2274             // pause here (without suspending on the queueing lock,
2275             // to allow it to drain, then try again.
2276             // All other threads will also see the same result for
2277             // shouldSpeculate, so will be doing the same if they
2278             // try to claim the lock from now on.
2279             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2280             {
2281                 KMP_INC_STAT(lck,lemmingYields);
2282                 __kmp_yield (TRUE);
2283             }
2284 
2285             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2286                 return;
2287         }
2288     }
2289 
2290     // Speculative acquisition failed, so acquire it non-speculatively.
2291     // Count the non-speculative acquire attempt
2292     lck->lk.adaptive.acquire_attempts++;
2293 
2294     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2295     // We have acquired the base lock, so count that.
2296     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2297 }
2298 
2299 static void
2300 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2301 {
2302     char const * const func = "omp_set_lock";
2303     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2304         KMP_FATAL( LockIsUninitialized, func );
2305     }
2306     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2307         KMP_FATAL( LockIsAlreadyOwned, func );
2308     }
2309 
2310     __kmp_acquire_adaptive_lock( lck, gtid );
2311 
2312     lck->lk.qlk.owner_id = gtid + 1;
2313 }
2314 
2315 static void
2316 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2317 {
2318     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2319     {   // If the lock doesn't look claimed we must be speculating.
2320         // (Or the user's code is buggy and they're releasing without locking;
2321         // if we had XTEST we'd be able to check that case...)
2322         _xend();        // Exit speculation
2323         __kmp_update_badness_after_success( lck );
2324     }
2325     else
2326     {   // Since the lock *is* visibly locked we're not speculating,
2327         // so should use the underlying lock's release scheme.
2328         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2329     }
2330 }
2331 
2332 static void
2333 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2334 {
2335     char const * const func = "omp_unset_lock";
2336     KMP_MB();  /* in case another processor initialized lock */
2337     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2338         KMP_FATAL( LockIsUninitialized, func );
2339     }
2340     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2341         KMP_FATAL( LockUnsettingFree, func );
2342     }
2343     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2344         KMP_FATAL( LockUnsettingSetByAnother, func );
2345     }
2346     lck->lk.qlk.owner_id = 0;
2347     __kmp_release_adaptive_lock( lck, gtid );
2348 }
2349 
2350 static void
2351 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2352 {
2353     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2354     lck->lk.adaptive.badness = 0;
2355     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2356     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2357     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2358 #if KMP_DEBUG_ADAPTIVE_LOCKS
2359     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2360 #endif
2361     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2362 }
2363 
2364 static void
2365 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2366 {
2367     __kmp_init_adaptive_lock( lck );
2368 }
2369 
2370 static void
2371 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2372 {
2373 #if KMP_DEBUG_ADAPTIVE_LOCKS
2374     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2375 #endif
2376     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2377     // Nothing needed for the speculative part.
2378 }
2379 
2380 static void
2381 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2382 {
2383     char const * const func = "omp_destroy_lock";
2384     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2385         KMP_FATAL( LockIsUninitialized, func );
2386     }
2387     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2388         KMP_FATAL( LockStillOwned, func );
2389     }
2390     __kmp_destroy_adaptive_lock( lck );
2391 }
2392 
2393 
2394 #endif // KMP_USE_ADAPTIVE_LOCKS
2395 
2396 
2397 /* ------------------------------------------------------------------------ */
2398 /* DRDPA ticket locks                                                */
2399 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2400 
2401 static kmp_int32
2402 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2403 {
2404     return TCR_4( lck->lk.owner_id ) - 1;
2405 }
2406 
2407 static inline bool
2408 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2409 {
2410     return lck->lk.depth_locked != -1;
2411 }
2412 
2413 __forceinline static void
2414 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2415 {
2416     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2417     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2418     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2419       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2420       TCR_PTR(lck->lk.polls);                           // volatile load
2421 
2422 #ifdef USE_LOCK_PROFILE
2423     if (TCR_8(polls[ticket & mask].poll) != ticket)
2424         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2425     /* else __kmp_printf( "." );*/
2426 #endif /* USE_LOCK_PROFILE */
2427 
2428     //
2429     // Now spin-wait, but reload the polls pointer and mask, in case the
2430     // polling area has been reconfigured.  Unless it is reconfigured, the
2431     // reloads stay in L1 cache and are cheap.
2432     //
2433     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2434     //
2435     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2436     // and poll to be re-read every spin iteration.
2437     //
2438     kmp_uint32 spins;
2439 
2440     KMP_FSYNC_PREPARE(lck);
2441     KMP_INIT_YIELD(spins);
2442     while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2443         // If we are oversubscribed,
2444         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2445         // CPU Pause is in the macros for yield.
2446         //
2447         KMP_YIELD(TCR_4(__kmp_nth)
2448           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2449         KMP_YIELD_SPIN(spins);
2450 
2451         // Re-read the mask and the poll pointer from the lock structure.
2452         //
2453         // Make certain that "mask" is read before "polls" !!!
2454         //
2455         // If another thread picks reconfigures the polling area and updates
2456         // their values, and we get the new value of mask and the old polls
2457         // pointer, we could access memory beyond the end of the old polling
2458         // area.
2459         //
2460         mask = TCR_8(lck->lk.mask);                     // volatile load
2461         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2462           TCR_PTR(lck->lk.polls);                       // volatile load
2463     }
2464 
2465     //
2466     // Critical section starts here
2467     //
2468     KMP_FSYNC_ACQUIRED(lck);
2469     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2470       ticket, lck));
2471     lck->lk.now_serving = ticket;                       // non-volatile store
2472 
2473     //
2474     // Deallocate a garbage polling area if we know that we are the last
2475     // thread that could possibly access it.
2476     //
2477     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2478     // ticket.
2479     //
2480     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2481         __kmp_free((void *)lck->lk.old_polls);
2482         lck->lk.old_polls = NULL;
2483         lck->lk.cleanup_ticket = 0;
2484     }
2485 
2486     //
2487     // Check to see if we should reconfigure the polling area.
2488     // If there is still a garbage polling area to be deallocated from a
2489     // previous reconfiguration, let a later thread reconfigure it.
2490     //
2491     if (lck->lk.old_polls == NULL) {
2492         bool reconfigure = false;
2493         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2494         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2495 
2496         if (TCR_4(__kmp_nth)
2497           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2498             //
2499             // We are in oversubscription mode.  Contract the polling area
2500             // down to a single location, if that hasn't been done already.
2501             //
2502             if (num_polls > 1) {
2503                 reconfigure = true;
2504                 num_polls = TCR_4(lck->lk.num_polls);
2505                 mask = 0;
2506                 num_polls = 1;
2507                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2508                   __kmp_allocate(num_polls * sizeof(*polls));
2509                 polls[0].poll = ticket;
2510             }
2511         }
2512         else {
2513             //
2514             // We are in under/fully subscribed mode.  Check the number of
2515             // threads waiting on the lock.  The size of the polling area
2516             // should be at least the number of threads waiting.
2517             //
2518             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2519             if (num_waiting > num_polls) {
2520                 kmp_uint32 old_num_polls = num_polls;
2521                 reconfigure = true;
2522                 do {
2523                     mask = (mask << 1) | 1;
2524                     num_polls *= 2;
2525                 } while (num_polls <= num_waiting);
2526 
2527                 //
2528                 // Allocate the new polling area, and copy the relevant portion
2529                 // of the old polling area to the new area.  __kmp_allocate()
2530                 // zeroes the memory it allocates, and most of the old area is
2531                 // just zero padding, so we only copy the release counters.
2532                 //
2533                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2534                   __kmp_allocate(num_polls * sizeof(*polls));
2535                 kmp_uint32 i;
2536                 for (i = 0; i < old_num_polls; i++) {
2537                     polls[i].poll = old_polls[i].poll;
2538                 }
2539             }
2540         }
2541 
2542         if (reconfigure) {
2543             //
2544             // Now write the updated fields back to the lock structure.
2545             //
2546             // Make certain that "polls" is written before "mask" !!!
2547             //
2548             // If another thread picks up the new value of mask and the old
2549             // polls pointer , it could access memory beyond the end of the
2550             // old polling area.
2551             //
2552             // On x86, we need memory fences.
2553             //
2554             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2555               ticket, lck, num_polls));
2556 
2557             lck->lk.old_polls = old_polls;              // non-volatile store
2558             lck->lk.polls = polls;                      // volatile store
2559 
2560             KMP_MB();
2561 
2562             lck->lk.num_polls = num_polls;              // non-volatile store
2563             lck->lk.mask = mask;                        // volatile store
2564 
2565             KMP_MB();
2566 
2567             //
2568             // Only after the new polling area and mask have been flushed
2569             // to main memory can we update the cleanup ticket field.
2570             //
2571             // volatile load / non-volatile store
2572             //
2573             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2574         }
2575     }
2576 }
2577 
2578 void
2579 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2580 {
2581     __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2582 }
2583 
2584 static void
2585 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2586 {
2587     char const * const func = "omp_set_lock";
2588     if ( lck->lk.initialized != lck ) {
2589         KMP_FATAL( LockIsUninitialized, func );
2590     }
2591     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2592         KMP_FATAL( LockNestableUsedAsSimple, func );
2593     }
2594     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2595         KMP_FATAL( LockIsAlreadyOwned, func );
2596     }
2597 
2598     __kmp_acquire_drdpa_lock( lck, gtid );
2599 
2600     lck->lk.owner_id = gtid + 1;
2601 }
2602 
2603 int
2604 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2605 {
2606     //
2607     // First get a ticket, then read the polls pointer and the mask.
2608     // The polls pointer must be read before the mask!!! (See above)
2609     //
2610     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2611     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2612       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2613       TCR_PTR(lck->lk.polls);                           // volatile load
2614     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2615     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2616         kmp_uint64 next_ticket = ticket + 1;
2617         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2618           ticket, next_ticket)) {
2619             KMP_FSYNC_ACQUIRED(lck);
2620             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2621                ticket, lck));
2622             lck->lk.now_serving = ticket;               // non-volatile store
2623 
2624             //
2625             // Since no threads are waiting, there is no possibility that
2626             // we would want to reconfigure the polling area.  We might
2627             // have the cleanup ticket value (which says that it is now
2628             // safe to deallocate old_polls), but we'll let a later thread
2629             // which calls __kmp_acquire_lock do that - this routine
2630             // isn't supposed to block, and we would risk blocks if we
2631             // called __kmp_free() to do the deallocation.
2632             //
2633             return TRUE;
2634         }
2635     }
2636     return FALSE;
2637 }
2638 
2639 static int
2640 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2641 {
2642     char const * const func = "omp_test_lock";
2643     if ( lck->lk.initialized != lck ) {
2644         KMP_FATAL( LockIsUninitialized, func );
2645     }
2646     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2647         KMP_FATAL( LockNestableUsedAsSimple, func );
2648     }
2649 
2650     int retval = __kmp_test_drdpa_lock( lck, gtid );
2651 
2652     if ( retval ) {
2653         lck->lk.owner_id = gtid + 1;
2654     }
2655     return retval;
2656 }
2657 
2658 void
2659 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2660 {
2661     //
2662     // Read the ticket value from the lock data struct, then the polls
2663     // pointer and the mask.  The polls pointer must be read before the
2664     // mask!!! (See above)
2665     //
2666     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2667     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2668       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2669       TCR_PTR(lck->lk.polls);                           // volatile load
2670     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2671     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2672        ticket - 1, lck));
2673     KMP_FSYNC_RELEASING(lck);
2674     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2675 }
2676 
2677 static void
2678 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679 {
2680     char const * const func = "omp_unset_lock";
2681     KMP_MB();  /* in case another processor initialized lock */
2682     if ( lck->lk.initialized != lck ) {
2683         KMP_FATAL( LockIsUninitialized, func );
2684     }
2685     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2686         KMP_FATAL( LockNestableUsedAsSimple, func );
2687     }
2688     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2689         KMP_FATAL( LockUnsettingFree, func );
2690     }
2691     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2692       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2693         KMP_FATAL( LockUnsettingSetByAnother, func );
2694     }
2695     lck->lk.owner_id = 0;
2696     __kmp_release_drdpa_lock( lck, gtid );
2697 }
2698 
2699 void
2700 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2701 {
2702     lck->lk.location = NULL;
2703     lck->lk.mask = 0;
2704     lck->lk.num_polls = 1;
2705     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2706       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2707     lck->lk.cleanup_ticket = 0;
2708     lck->lk.old_polls = NULL;
2709     lck->lk.next_ticket = 0;
2710     lck->lk.now_serving = 0;
2711     lck->lk.owner_id = 0;      // no thread owns the lock.
2712     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2713     lck->lk.initialized = lck;
2714 
2715     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2716 }
2717 
2718 static void
2719 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2720 {
2721     __kmp_init_drdpa_lock( lck );
2722 }
2723 
2724 void
2725 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2726 {
2727     lck->lk.initialized = NULL;
2728     lck->lk.location    = NULL;
2729     if (lck->lk.polls != NULL) {
2730         __kmp_free((void *)lck->lk.polls);
2731         lck->lk.polls = NULL;
2732     }
2733     if (lck->lk.old_polls != NULL) {
2734         __kmp_free((void *)lck->lk.old_polls);
2735         lck->lk.old_polls = NULL;
2736     }
2737     lck->lk.mask = 0;
2738     lck->lk.num_polls = 0;
2739     lck->lk.cleanup_ticket = 0;
2740     lck->lk.next_ticket = 0;
2741     lck->lk.now_serving = 0;
2742     lck->lk.owner_id = 0;
2743     lck->lk.depth_locked = -1;
2744 }
2745 
2746 static void
2747 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2748 {
2749     char const * const func = "omp_destroy_lock";
2750     if ( lck->lk.initialized != lck ) {
2751         KMP_FATAL( LockIsUninitialized, func );
2752     }
2753     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2754         KMP_FATAL( LockNestableUsedAsSimple, func );
2755     }
2756     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2757         KMP_FATAL( LockStillOwned, func );
2758     }
2759     __kmp_destroy_drdpa_lock( lck );
2760 }
2761 
2762 
2763 //
2764 // nested drdpa ticket locks
2765 //
2766 
2767 void
2768 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2769 {
2770     KMP_DEBUG_ASSERT( gtid >= 0 );
2771 
2772     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2773         lck->lk.depth_locked += 1;
2774     }
2775     else {
2776         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2777         KMP_MB();
2778         lck->lk.depth_locked = 1;
2779         KMP_MB();
2780         lck->lk.owner_id = gtid + 1;
2781     }
2782 }
2783 
2784 static void
2785 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2786 {
2787     char const * const func = "omp_set_nest_lock";
2788     if ( lck->lk.initialized != lck ) {
2789         KMP_FATAL( LockIsUninitialized, func );
2790     }
2791     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2792         KMP_FATAL( LockSimpleUsedAsNestable, func );
2793     }
2794     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2795 }
2796 
2797 int
2798 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2799 {
2800     int retval;
2801 
2802     KMP_DEBUG_ASSERT( gtid >= 0 );
2803 
2804     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2805         retval = ++lck->lk.depth_locked;
2806     }
2807     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2808         retval = 0;
2809     }
2810     else {
2811         KMP_MB();
2812         retval = lck->lk.depth_locked = 1;
2813         KMP_MB();
2814         lck->lk.owner_id = gtid + 1;
2815     }
2816     return retval;
2817 }
2818 
2819 static int
2820 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2821 {
2822     char const * const func = "omp_test_nest_lock";
2823     if ( lck->lk.initialized != lck ) {
2824         KMP_FATAL( LockIsUninitialized, func );
2825     }
2826     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2827         KMP_FATAL( LockSimpleUsedAsNestable, func );
2828     }
2829     return __kmp_test_nested_drdpa_lock( lck, gtid );
2830 }
2831 
2832 void
2833 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2834 {
2835     KMP_DEBUG_ASSERT( gtid >= 0 );
2836 
2837     KMP_MB();
2838     if ( --(lck->lk.depth_locked) == 0 ) {
2839         KMP_MB();
2840         lck->lk.owner_id = 0;
2841         __kmp_release_drdpa_lock( lck, gtid );
2842     }
2843 }
2844 
2845 static void
2846 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2847 {
2848     char const * const func = "omp_unset_nest_lock";
2849     KMP_MB();  /* in case another processor initialized lock */
2850     if ( lck->lk.initialized != lck ) {
2851         KMP_FATAL( LockIsUninitialized, func );
2852     }
2853     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2854         KMP_FATAL( LockSimpleUsedAsNestable, func );
2855     }
2856     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2857         KMP_FATAL( LockUnsettingFree, func );
2858     }
2859     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2860         KMP_FATAL( LockUnsettingSetByAnother, func );
2861     }
2862     __kmp_release_nested_drdpa_lock( lck, gtid );
2863 }
2864 
2865 void
2866 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2867 {
2868     __kmp_init_drdpa_lock( lck );
2869     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2870 }
2871 
2872 static void
2873 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2874 {
2875     __kmp_init_nested_drdpa_lock( lck );
2876 }
2877 
2878 void
2879 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2880 {
2881     __kmp_destroy_drdpa_lock( lck );
2882     lck->lk.depth_locked = 0;
2883 }
2884 
2885 static void
2886 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2887 {
2888     char const * const func = "omp_destroy_nest_lock";
2889     if ( lck->lk.initialized != lck ) {
2890         KMP_FATAL( LockIsUninitialized, func );
2891     }
2892     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2893         KMP_FATAL( LockSimpleUsedAsNestable, func );
2894     }
2895     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2896         KMP_FATAL( LockStillOwned, func );
2897     }
2898     __kmp_destroy_nested_drdpa_lock( lck );
2899 }
2900 
2901 
2902 //
2903 // access functions to fields which don't exist for all lock kinds.
2904 //
2905 
2906 static int
2907 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2908 {
2909     return lck == lck->lk.initialized;
2910 }
2911 
2912 static const ident_t *
2913 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2914 {
2915     return lck->lk.location;
2916 }
2917 
2918 static void
2919 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2920 {
2921     lck->lk.location = loc;
2922 }
2923 
2924 static kmp_lock_flags_t
2925 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
2926 {
2927     return lck->lk.flags;
2928 }
2929 
2930 static void
2931 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
2932 {
2933     lck->lk.flags = flags;
2934 }
2935 
2936 #if KMP_USE_DYNAMIC_LOCK
2937 
2938 // Definitions of lock hints.
2939 # ifndef __OMP_H
2940 typedef enum kmp_lock_hint_t {
2941     kmp_lock_hint_none = 0,
2942     kmp_lock_hint_contended,
2943     kmp_lock_hint_uncontended,
2944     kmp_lock_hint_nonspeculative,
2945     kmp_lock_hint_speculative,
2946     kmp_lock_hint_adaptive,
2947 } kmp_lock_hint_t;
2948 # endif
2949 
2950 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
2951 #define expand_init_lock(l, a)                                              \
2952 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \
2953     *lck = DYNA_LOCK_FREE(l);                                               \
2954     KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck));            \
2955 }
2956 FOREACH_D_LOCK(expand_init_lock, 0)
2957 #undef expand_init_lock
2958 
2959 #if DYNA_HAS_HLE
2960 
2961 // HLE lock functions - imported from the testbed runtime.
2962 #if KMP_MIC
2963 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument
2964 #else
2965 # define machine_pause() _mm_pause()
2966 #endif
2967 #define HLE_ACQUIRE ".byte 0xf2;"
2968 #define HLE_RELEASE ".byte 0xf3;"
2969 
2970 static inline kmp_uint32
2971 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
2972 {
2973     __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
2974                     : "+r"(v), "+m"(*p)
2975                     :
2976                     : "memory");
2977     return v;
2978 }
2979 
2980 static void
2981 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
2982 {
2983     *lck = 0;
2984 }
2985 
2986 static void
2987 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
2988 {
2989     // Use gtid for DYNA_LOCK_BUSY if necessary
2990     if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) {
2991         int delay = 1;
2992         do {
2993             while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) {
2994                 for (int i = delay; i != 0; --i)
2995                     machine_pause();
2996                 delay = ((delay << 1) | 1) & 7;
2997             }
2998         } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle));
2999     }
3000 }
3001 
3002 static void
3003 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3004 {
3005     __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3006 }
3007 
3008 static void
3009 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3010 {
3011     __asm__ volatile(HLE_RELEASE "movl %1,%0"
3012                     : "=m"(*lck)
3013                     : "r"(DYNA_LOCK_FREE(hle))
3014                     : "memory");
3015 }
3016 
3017 static void
3018 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3019 {
3020     __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3021 }
3022 
3023 static int
3024 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3025 {
3026     return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle);
3027 }
3028 
3029 static int
3030 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3031 {
3032     return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3033 }
3034 
3035 #endif // DYNA_HAS_HLE
3036 
3037 // Entry functions for indirect locks (first element of direct_*_ops[]).
3038 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3039 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3040 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3041 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3042 static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3043 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3044 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3045 static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3046 
3047 //
3048 // Jump tables for the indirect lock functions.
3049 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3050 //
3051 #define expand_func0(l, op) 0,op##_##l##_##lock,
3052 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3053     = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) };
3054 
3055 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock,
3056 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *)
3057     = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) };
3058 
3059 // Differentiates *lock and *lock_with_checks.
3060 #define expand_func2(l, op)  0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3061 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3062 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3063     = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire)  },
3064         { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } };
3065 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3066     = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release)  },
3067         { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } };
3068 
3069 #define expand_func3(l, op)  0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock,
3070 #define expand_func3c(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3071 static int  (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32)
3072     = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test)  },
3073         { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } };
3074 
3075 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3076 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3077 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3078 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0;
3079 
3080 //
3081 // Jump tables for the indirect lock functions.
3082 //
3083 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3084 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p)
3085     = { FOREACH_I_LOCK(expand_func4, init) };
3086 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p)
3087     = { FOREACH_I_LOCK(expand_func4, destroy) };
3088 
3089 // Differentiates *lock and *lock_with_checks.
3090 #define expand_func5(l, op)  (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3091 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3092 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3093     = { { FOREACH_I_LOCK(expand_func5, acquire)  },
3094         { FOREACH_I_LOCK(expand_func5c, acquire) } };
3095 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3096     = { { FOREACH_I_LOCK(expand_func5, release)  },
3097         { FOREACH_I_LOCK(expand_func5c, release) } };
3098 
3099 #define expand_func6(l, op)  (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3100 #define expand_func6c(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3101 static int  (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32)
3102     = { { FOREACH_I_LOCK(expand_func6, test)  },
3103         { FOREACH_I_LOCK(expand_func6c, test) } };
3104 
3105 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3106 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0;
3107 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0;
3108 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0;
3109 
3110 // Lock index table.
3111 kmp_indirect_lock_t **__kmp_indirect_lock_table;
3112 kmp_lock_index_t __kmp_indirect_lock_table_size;
3113 kmp_lock_index_t __kmp_indirect_lock_table_next;
3114 
3115 // Size of indirect locks.
3116 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = {
3117     sizeof(kmp_ticket_lock_t),      sizeof(kmp_queuing_lock_t),
3118 #if KMP_USE_ADAPTIVE_LOCKS
3119     sizeof(kmp_adaptive_lock_t),
3120 #endif
3121     sizeof(kmp_drdpa_lock_t),
3122     sizeof(kmp_tas_lock_t),
3123 #if DYNA_HAS_FUTEX
3124     sizeof(kmp_futex_lock_t),
3125 #endif
3126     sizeof(kmp_ticket_lock_t),      sizeof(kmp_queuing_lock_t),
3127     sizeof(kmp_drdpa_lock_t)
3128 };
3129 
3130 // Jump tables for lock accessor/modifier.
3131 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3132 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3133 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3134 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3135 
3136 // Use different lock pools for different lock types.
3137 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 };
3138 
3139 // Inserts the given lock ptr to the lock table.
3140 kmp_lock_index_t
3141 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck)
3142 {
3143     kmp_lock_index_t next = __kmp_indirect_lock_table_next;
3144     // Check capacity and double the size if required
3145     if (next >= __kmp_indirect_lock_table_size) {
3146         kmp_lock_index_t i;
3147         kmp_lock_index_t size = __kmp_indirect_lock_table_size;
3148         kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table;
3149         __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *));
3150         memcpy(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *));
3151         __kmp_free(old_table);
3152         __kmp_indirect_lock_table_size = 2*next;
3153     }
3154     // Insert lck to the table and return the index.
3155     __kmp_indirect_lock_table[next] = lck;
3156     __kmp_indirect_lock_table_next++;
3157     return next;
3158 }
3159 
3160 // User lock allocator for dynamically dispatched locks.
3161 kmp_indirect_lock_t *
3162 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3163 {
3164     kmp_indirect_lock_t *lck;
3165     kmp_lock_index_t idx;
3166 
3167     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3168 
3169     if (__kmp_indirect_lock_pool[tag] != NULL) {
3170         lck = __kmp_indirect_lock_pool[tag];
3171         if (OMP_LOCK_T_SIZE < sizeof(void *))
3172             idx = lck->lock->pool.index;
3173         __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3174     } else {
3175         lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t));
3176         lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3177         if (OMP_LOCK_T_SIZE < sizeof(void *))
3178             idx = __kmp_insert_indirect_lock(lck);
3179     }
3180 
3181     __kmp_release_lock(&__kmp_global_lock, gtid);
3182 
3183     lck->type = tag;
3184 
3185     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3186         *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3187     } else {
3188         *((kmp_indirect_lock_t **)user_lock) = lck;
3189     }
3190 
3191     return lck;
3192 }
3193 
3194 // User lock lookup for dynamically dispatched locks.
3195 static __forceinline
3196 kmp_indirect_lock_t *
3197 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3198 {
3199     if (__kmp_env_consistency_check) {
3200         kmp_indirect_lock_t *lck = NULL;
3201         if (user_lock == NULL) {
3202             KMP_FATAL(LockIsUninitialized, func);
3203         }
3204         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3205             kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock);
3206             if (idx < 0 || idx >= __kmp_indirect_lock_table_size) {
3207                 KMP_FATAL(LockIsUninitialized, func);
3208             }
3209             lck = __kmp_indirect_lock_table[idx];
3210         } else {
3211             lck = *((kmp_indirect_lock_t **)user_lock);
3212         }
3213         if (lck == NULL) {
3214             KMP_FATAL(LockIsUninitialized, func);
3215         }
3216         return lck;
3217     } else {
3218         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3219             return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)];
3220         } else {
3221             return *((kmp_indirect_lock_t **)user_lock);
3222         }
3223     }
3224 }
3225 
3226 static void
3227 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3228 {
3229 #if KMP_USE_ADAPTIVE_LOCKS
3230     if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3231         KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3232         seq = lockseq_queuing;
3233     }
3234 #endif
3235     kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq);
3236     kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3237     DYNA_I_LOCK_FUNC(l, init)(l->lock);
3238     KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type));
3239 }
3240 
3241 static void
3242 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3243 {
3244     kmp_uint32 gtid = __kmp_entry_gtid();
3245     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3246     DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3247     kmp_indirect_locktag_t tag = l->type;
3248 
3249     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3250 
3251     // Use the base lock's space to keep the pool chain.
3252     l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3253     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3254         l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock);
3255     }
3256     __kmp_indirect_lock_pool[tag] = l;
3257 
3258     __kmp_release_lock(&__kmp_global_lock, gtid);
3259 }
3260 
3261 static void
3262 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3263 {
3264     kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3265     DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3266 }
3267 
3268 static void
3269 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3270 {
3271     kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3272     DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3273 }
3274 
3275 static int
3276 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3277 {
3278     kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock);
3279     return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3280 }
3281 
3282 static void
3283 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3284 {
3285     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3286     DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid);
3287 }
3288 
3289 static void
3290 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3291 {
3292     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3293     DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3294 }
3295 
3296 static int
3297 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3298 {
3299     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3300     return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid);
3301 }
3302 
3303 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3304 
3305 // Initialize a hinted lock.
3306 void
3307 __kmp_init_lock_hinted(void **lock, int hint)
3308 {
3309     kmp_dyna_lockseq_t seq;
3310     switch (hint) {
3311         case kmp_lock_hint_uncontended:
3312             seq = lockseq_tas;
3313             break;
3314         case kmp_lock_hint_speculative:
3315 #if DYNA_HAS_HLE
3316             seq = lockseq_hle;
3317 #else
3318             seq = lockseq_tas;
3319 #endif
3320             break;
3321         case kmp_lock_hint_adaptive:
3322 #if KMP_USE_ADAPTIVE_LOCKS
3323             seq = lockseq_adaptive;
3324 #else
3325             seq = lockseq_queuing;
3326 #endif
3327             break;
3328         // Defaults to queuing locks.
3329         case kmp_lock_hint_contended:
3330         case kmp_lock_hint_nonspeculative:
3331         default:
3332             seq = lockseq_queuing;
3333             break;
3334     }
3335     if (DYNA_IS_D_LOCK(seq)) {
3336         DYNA_INIT_D_LOCK(lock, seq);
3337 #if USE_ITT_BUILD
3338         __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
3339 #endif
3340     } else {
3341         DYNA_INIT_I_LOCK(lock, seq);
3342 #if USE_ITT_BUILD
3343         kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3344         __kmp_itt_lock_creating(ilk->lock, NULL);
3345 #endif
3346     }
3347 }
3348 
3349 // This is used only in kmp_error.c when consistency checking is on.
3350 kmp_int32
3351 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3352 {
3353     switch (seq) {
3354         case lockseq_tas:
3355         case lockseq_nested_tas:
3356             return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3357 #if DYNA_HAS_FUTEX
3358         case lockseq_futex:
3359         case lockseq_nested_futex:
3360             return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3361 #endif
3362         case lockseq_ticket:
3363         case lockseq_nested_ticket:
3364             return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3365         case lockseq_queuing:
3366         case lockseq_nested_queuing:
3367 #if KMP_USE_ADAPTIVE_LOCKS
3368         case lockseq_adaptive:
3369             return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3370 #endif
3371         case lockseq_drdpa:
3372         case lockseq_nested_drdpa:
3373             return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3374         default:
3375             return 0;
3376     }
3377 }
3378 
3379 // The value initialized from KMP_LOCK_KIND needs to be translated to its
3380 // nested version.
3381 void
3382 __kmp_init_nest_lock_hinted(void **lock, int hint)
3383 {
3384     kmp_dyna_lockseq_t seq;
3385     switch (hint) {
3386         case kmp_lock_hint_uncontended:
3387             seq = lockseq_nested_tas;
3388             break;
3389         // Defaults to queuing locks.
3390         case kmp_lock_hint_contended:
3391         case kmp_lock_hint_nonspeculative:
3392         default:
3393             seq = lockseq_nested_queuing;
3394             break;
3395     }
3396     DYNA_INIT_I_LOCK(lock, seq);
3397 #if USE_ITT_BUILD
3398     kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock);
3399     __kmp_itt_lock_creating(ilk->lock, NULL);
3400 #endif
3401 }
3402 
3403 // Initializes the lock table for indirect locks.
3404 static void
3405 __kmp_init_indirect_lock_table()
3406 {
3407     __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024);
3408     __kmp_indirect_lock_table_size = 1024;
3409     __kmp_indirect_lock_table_next = 0;
3410 }
3411 
3412 #if KMP_USE_ADAPTIVE_LOCKS
3413 # define init_lock_func(table, expand) {             \
3414     table[locktag_ticket]         = expand(ticket);  \
3415     table[locktag_queuing]        = expand(queuing); \
3416     table[locktag_adaptive]       = expand(queuing); \
3417     table[locktag_drdpa]          = expand(drdpa);   \
3418     table[locktag_nested_ticket]  = expand(ticket);  \
3419     table[locktag_nested_queuing] = expand(queuing); \
3420     table[locktag_nested_drdpa]   = expand(drdpa);   \
3421 }
3422 #else
3423 # define init_lock_func(table, expand) {             \
3424     table[locktag_ticket]         = expand(ticket);  \
3425     table[locktag_queuing]        = expand(queuing); \
3426     table[locktag_drdpa]          = expand(drdpa);   \
3427     table[locktag_nested_ticket]  = expand(ticket);  \
3428     table[locktag_nested_queuing] = expand(queuing); \
3429     table[locktag_nested_drdpa]   = expand(drdpa);   \
3430 }
3431 #endif // KMP_USE_ADAPTIVE_LOCKS
3432 
3433 // Initializes data for dynamic user locks.
3434 void
3435 __kmp_init_dynamic_user_locks()
3436 {
3437     // Initialize jump table location
3438     int offset = (__kmp_env_consistency_check)? 1: 0;
3439     __kmp_direct_set_ops = direct_set_tab[offset];
3440     __kmp_direct_unset_ops = direct_unset_tab[offset];
3441     __kmp_direct_test_ops = direct_test_tab[offset];
3442     __kmp_indirect_set_ops = indirect_set_tab[offset];
3443     __kmp_indirect_unset_ops = indirect_unset_tab[offset];
3444     __kmp_indirect_test_ops = indirect_test_tab[offset];
3445     __kmp_init_indirect_lock_table();
3446 
3447     // Initialize lock accessor/modifier
3448     // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe.
3449 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3450     init_lock_func(__kmp_indirect_set_location, expand_func);
3451 #undef expand_func
3452 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3453     init_lock_func(__kmp_indirect_set_flags, expand_func);
3454 #undef expand_func
3455 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3456     init_lock_func(__kmp_indirect_get_location, expand_func);
3457 #undef expand_func
3458 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3459     init_lock_func(__kmp_indirect_get_flags, expand_func);
3460 #undef expand_func
3461 
3462     __kmp_init_user_locks = TRUE;
3463 }
3464 
3465 // Clean up the lock table.
3466 void
3467 __kmp_cleanup_indirect_user_locks()
3468 {
3469     kmp_lock_index_t i;
3470     int k;
3471 
3472     // Clean up locks in the pools first (they were already destroyed before going into the pools).
3473     for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) {
3474         kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3475         while (l != NULL) {
3476             kmp_indirect_lock_t *ll = l;
3477             l = (kmp_indirect_lock_t *)l->lock->pool.next;
3478             if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3479                 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL;
3480             }
3481             __kmp_free(ll->lock);
3482             __kmp_free(ll);
3483         }
3484     }
3485     // Clean up the remaining undestroyed locks.
3486     for (i = 0; i < __kmp_indirect_lock_table_next; i++) {
3487         kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i];
3488         if (l != NULL) {
3489             // Locks not destroyed explicitly need to be destroyed here.
3490             DYNA_I_LOCK_FUNC(l, destroy)(l->lock);
3491             __kmp_free(l->lock);
3492             __kmp_free(l);
3493         }
3494     }
3495     // Free the table
3496     __kmp_free(__kmp_indirect_lock_table);
3497 
3498     __kmp_init_user_locks = FALSE;
3499 }
3500 
3501 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3502 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3503 
3504 #else // KMP_USE_DYNAMIC_LOCK
3505 
3506 /* ------------------------------------------------------------------------ */
3507 /* user locks
3508  *
3509  * They are implemented as a table of function pointers which are set to the
3510  * lock functions of the appropriate kind, once that has been determined.
3511  */
3512 
3513 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3514 
3515 size_t __kmp_base_user_lock_size = 0;
3516 size_t __kmp_user_lock_size = 0;
3517 
3518 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3519 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3520 
3521 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3522 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3523 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3524 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3525 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3526 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3527 
3528 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3529 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3530 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3531 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3532 
3533 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3534 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3535 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3536 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3537 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3538 
3539 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3540 {
3541     switch ( user_lock_kind ) {
3542         case lk_default:
3543         default:
3544         KMP_ASSERT( 0 );
3545 
3546         case lk_tas: {
3547             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3548             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3549 
3550             __kmp_get_user_lock_owner_ =
3551               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3552               ( &__kmp_get_tas_lock_owner );
3553 
3554             if ( __kmp_env_consistency_check ) {
3555                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3556                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3557             }
3558             else {
3559                 KMP_BIND_USER_LOCK(tas);
3560                 KMP_BIND_NESTED_USER_LOCK(tas);
3561             }
3562 
3563             __kmp_destroy_user_lock_ =
3564               ( void ( * )( kmp_user_lock_p ) )
3565               ( &__kmp_destroy_tas_lock );
3566 
3567              __kmp_is_user_lock_initialized_ =
3568                ( int ( * )( kmp_user_lock_p ) ) NULL;
3569 
3570              __kmp_get_user_lock_location_ =
3571                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3572 
3573              __kmp_set_user_lock_location_ =
3574                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3575 
3576              __kmp_get_user_lock_flags_ =
3577                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3578 
3579              __kmp_set_user_lock_flags_ =
3580                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3581         }
3582         break;
3583 
3584 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3585 
3586         case lk_futex: {
3587             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3588             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3589 
3590             __kmp_get_user_lock_owner_ =
3591               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3592               ( &__kmp_get_futex_lock_owner );
3593 
3594             if ( __kmp_env_consistency_check ) {
3595                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3596                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3597             }
3598             else {
3599                 KMP_BIND_USER_LOCK(futex);
3600                 KMP_BIND_NESTED_USER_LOCK(futex);
3601             }
3602 
3603             __kmp_destroy_user_lock_ =
3604               ( void ( * )( kmp_user_lock_p ) )
3605               ( &__kmp_destroy_futex_lock );
3606 
3607              __kmp_is_user_lock_initialized_ =
3608                ( int ( * )( kmp_user_lock_p ) ) NULL;
3609 
3610              __kmp_get_user_lock_location_ =
3611                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3612 
3613              __kmp_set_user_lock_location_ =
3614                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3615 
3616              __kmp_get_user_lock_flags_ =
3617                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3618 
3619              __kmp_set_user_lock_flags_ =
3620                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3621         }
3622         break;
3623 
3624 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3625 
3626         case lk_ticket: {
3627             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3628             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3629 
3630             __kmp_get_user_lock_owner_ =
3631               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3632               ( &__kmp_get_ticket_lock_owner );
3633 
3634             if ( __kmp_env_consistency_check ) {
3635                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3636                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3637             }
3638             else {
3639                 KMP_BIND_USER_LOCK(ticket);
3640                 KMP_BIND_NESTED_USER_LOCK(ticket);
3641             }
3642 
3643             __kmp_destroy_user_lock_ =
3644               ( void ( * )( kmp_user_lock_p ) )
3645               ( &__kmp_destroy_ticket_lock );
3646 
3647              __kmp_is_user_lock_initialized_ =
3648                ( int ( * )( kmp_user_lock_p ) )
3649                ( &__kmp_is_ticket_lock_initialized );
3650 
3651              __kmp_get_user_lock_location_ =
3652                ( const ident_t * ( * )( kmp_user_lock_p ) )
3653                ( &__kmp_get_ticket_lock_location );
3654 
3655              __kmp_set_user_lock_location_ =
3656                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3657                ( &__kmp_set_ticket_lock_location );
3658 
3659              __kmp_get_user_lock_flags_ =
3660                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3661                ( &__kmp_get_ticket_lock_flags );
3662 
3663              __kmp_set_user_lock_flags_ =
3664                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3665                ( &__kmp_set_ticket_lock_flags );
3666         }
3667         break;
3668 
3669         case lk_queuing: {
3670             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3671             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3672 
3673             __kmp_get_user_lock_owner_ =
3674               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3675               ( &__kmp_get_queuing_lock_owner );
3676 
3677             if ( __kmp_env_consistency_check ) {
3678                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3679                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3680             }
3681             else {
3682                 KMP_BIND_USER_LOCK(queuing);
3683                 KMP_BIND_NESTED_USER_LOCK(queuing);
3684             }
3685 
3686             __kmp_destroy_user_lock_ =
3687               ( void ( * )( kmp_user_lock_p ) )
3688               ( &__kmp_destroy_queuing_lock );
3689 
3690              __kmp_is_user_lock_initialized_ =
3691                ( int ( * )( kmp_user_lock_p ) )
3692                ( &__kmp_is_queuing_lock_initialized );
3693 
3694              __kmp_get_user_lock_location_ =
3695                ( const ident_t * ( * )( kmp_user_lock_p ) )
3696                ( &__kmp_get_queuing_lock_location );
3697 
3698              __kmp_set_user_lock_location_ =
3699                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3700                ( &__kmp_set_queuing_lock_location );
3701 
3702              __kmp_get_user_lock_flags_ =
3703                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3704                ( &__kmp_get_queuing_lock_flags );
3705 
3706              __kmp_set_user_lock_flags_ =
3707                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3708                ( &__kmp_set_queuing_lock_flags );
3709         }
3710         break;
3711 
3712 #if KMP_USE_ADAPTIVE_LOCKS
3713         case lk_adaptive: {
3714             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3715             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3716 
3717             __kmp_get_user_lock_owner_ =
3718               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3719               ( &__kmp_get_queuing_lock_owner );
3720 
3721             if ( __kmp_env_consistency_check ) {
3722                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3723             }
3724             else {
3725                 KMP_BIND_USER_LOCK(adaptive);
3726             }
3727 
3728             __kmp_destroy_user_lock_ =
3729               ( void ( * )( kmp_user_lock_p ) )
3730               ( &__kmp_destroy_adaptive_lock );
3731 
3732             __kmp_is_user_lock_initialized_ =
3733               ( int ( * )( kmp_user_lock_p ) )
3734               ( &__kmp_is_queuing_lock_initialized );
3735 
3736             __kmp_get_user_lock_location_ =
3737               ( const ident_t * ( * )( kmp_user_lock_p ) )
3738               ( &__kmp_get_queuing_lock_location );
3739 
3740             __kmp_set_user_lock_location_ =
3741               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3742               ( &__kmp_set_queuing_lock_location );
3743 
3744             __kmp_get_user_lock_flags_ =
3745               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3746               ( &__kmp_get_queuing_lock_flags );
3747 
3748             __kmp_set_user_lock_flags_ =
3749               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3750               ( &__kmp_set_queuing_lock_flags );
3751 
3752         }
3753         break;
3754 #endif // KMP_USE_ADAPTIVE_LOCKS
3755 
3756         case lk_drdpa: {
3757             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3758             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3759 
3760             __kmp_get_user_lock_owner_ =
3761               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3762               ( &__kmp_get_drdpa_lock_owner );
3763 
3764             if ( __kmp_env_consistency_check ) {
3765                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3766                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3767             }
3768             else {
3769                 KMP_BIND_USER_LOCK(drdpa);
3770                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3771             }
3772 
3773             __kmp_destroy_user_lock_ =
3774               ( void ( * )( kmp_user_lock_p ) )
3775               ( &__kmp_destroy_drdpa_lock );
3776 
3777              __kmp_is_user_lock_initialized_ =
3778                ( int ( * )( kmp_user_lock_p ) )
3779                ( &__kmp_is_drdpa_lock_initialized );
3780 
3781              __kmp_get_user_lock_location_ =
3782                ( const ident_t * ( * )( kmp_user_lock_p ) )
3783                ( &__kmp_get_drdpa_lock_location );
3784 
3785              __kmp_set_user_lock_location_ =
3786                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3787                ( &__kmp_set_drdpa_lock_location );
3788 
3789              __kmp_get_user_lock_flags_ =
3790                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3791                ( &__kmp_get_drdpa_lock_flags );
3792 
3793              __kmp_set_user_lock_flags_ =
3794                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3795                ( &__kmp_set_drdpa_lock_flags );
3796         }
3797         break;
3798     }
3799 }
3800 
3801 
3802 // ----------------------------------------------------------------------------
3803 // User lock table & lock allocation
3804 
3805 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3806 kmp_user_lock_p __kmp_lock_pool = NULL;
3807 
3808 // Lock block-allocation support.
3809 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3810 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3811 
3812 static kmp_lock_index_t
3813 __kmp_lock_table_insert( kmp_user_lock_p lck )
3814 {
3815     // Assume that kmp_global_lock is held upon entry/exit.
3816     kmp_lock_index_t index;
3817     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3818         kmp_lock_index_t size;
3819         kmp_user_lock_p *table;
3820         kmp_lock_index_t i;
3821         // Reallocate lock table.
3822         if ( __kmp_user_lock_table.allocated == 0 ) {
3823             size = 1024;
3824         }
3825         else {
3826             size = __kmp_user_lock_table.allocated * 2;
3827         }
3828         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3829         memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3830         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3831             // We cannot free the previos table now, sinse it may be in use by other
3832             // threads. So save the pointer to the previous table in in the first element of the
3833             // new table. All the tables will be organized into a list, and could be freed when
3834             // library shutting down.
3835         __kmp_user_lock_table.table = table;
3836         __kmp_user_lock_table.allocated = size;
3837     }
3838     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3839     index = __kmp_user_lock_table.used;
3840     __kmp_user_lock_table.table[ index ] = lck;
3841     ++ __kmp_user_lock_table.used;
3842     return index;
3843 }
3844 
3845 static kmp_user_lock_p
3846 __kmp_lock_block_allocate()
3847 {
3848     // Assume that kmp_global_lock is held upon entry/exit.
3849     static int last_index = 0;
3850     if ( ( last_index >= __kmp_num_locks_in_block )
3851       || ( __kmp_lock_blocks == NULL ) ) {
3852         // Restart the index.
3853         last_index = 0;
3854         // Need to allocate a new block.
3855         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3856         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3857         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3858         // Set up the new block.
3859         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3860         new_block->next_block = __kmp_lock_blocks;
3861         new_block->locks = (void *)buffer;
3862         // Publish the new block.
3863         KMP_MB();
3864         __kmp_lock_blocks = new_block;
3865     }
3866     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3867       [ last_index * __kmp_user_lock_size ] ) );
3868     last_index++;
3869     return ret;
3870 }
3871 
3872 //
3873 // Get memory for a lock. It may be freshly allocated memory or reused memory
3874 // from lock pool.
3875 //
3876 kmp_user_lock_p
3877 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3878   kmp_lock_flags_t flags )
3879 {
3880     kmp_user_lock_p lck;
3881     kmp_lock_index_t index;
3882     KMP_DEBUG_ASSERT( user_lock );
3883 
3884     __kmp_acquire_lock( &__kmp_global_lock, gtid );
3885 
3886     if ( __kmp_lock_pool == NULL ) {
3887         // Lock pool is empty. Allocate new memory.
3888         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3889             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3890         }
3891         else {
3892             lck = __kmp_lock_block_allocate();
3893         }
3894 
3895         // Insert lock in the table so that it can be freed in __kmp_cleanup,
3896         // and debugger has info on all allocated locks.
3897         index = __kmp_lock_table_insert( lck );
3898     }
3899     else {
3900         // Pick up lock from pool.
3901         lck = __kmp_lock_pool;
3902         index = __kmp_lock_pool->pool.index;
3903         __kmp_lock_pool = __kmp_lock_pool->pool.next;
3904     }
3905 
3906     //
3907     // We could potentially differentiate between nested and regular locks
3908     // here, and do the lock table lookup for regular locks only.
3909     //
3910     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3911         * ( (kmp_lock_index_t *) user_lock ) = index;
3912     }
3913     else {
3914         * ( (kmp_user_lock_p *) user_lock ) = lck;
3915     }
3916 
3917     // mark the lock if it is critical section lock.
3918     __kmp_set_user_lock_flags( lck, flags );
3919 
3920     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3921 
3922     return lck;
3923 }
3924 
3925 // Put lock's memory to pool for reusing.
3926 void
3927 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3928 {
3929     kmp_lock_pool_t * lock_pool;
3930 
3931     KMP_DEBUG_ASSERT( user_lock != NULL );
3932     KMP_DEBUG_ASSERT( lck != NULL );
3933 
3934     __kmp_acquire_lock( & __kmp_global_lock, gtid );
3935 
3936     lck->pool.next = __kmp_lock_pool;
3937     __kmp_lock_pool = lck;
3938     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3939         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3940         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3941         lck->pool.index = index;
3942     }
3943 
3944     __kmp_release_lock( & __kmp_global_lock, gtid );
3945 }
3946 
3947 kmp_user_lock_p
3948 __kmp_lookup_user_lock( void **user_lock, char const *func )
3949 {
3950     kmp_user_lock_p lck = NULL;
3951 
3952     if ( __kmp_env_consistency_check ) {
3953         if ( user_lock == NULL ) {
3954             KMP_FATAL( LockIsUninitialized, func );
3955         }
3956     }
3957 
3958     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3959         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3960         if ( __kmp_env_consistency_check ) {
3961             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3962                 KMP_FATAL( LockIsUninitialized, func );
3963             }
3964         }
3965         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3966         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3967         lck = __kmp_user_lock_table.table[index];
3968     }
3969     else {
3970         lck = *( (kmp_user_lock_p *)user_lock );
3971     }
3972 
3973     if ( __kmp_env_consistency_check ) {
3974         if ( lck == NULL ) {
3975             KMP_FATAL( LockIsUninitialized, func );
3976         }
3977     }
3978 
3979     return lck;
3980 }
3981 
3982 void
3983 __kmp_cleanup_user_locks( void )
3984 {
3985     //
3986     // Reset lock pool. Do not worry about lock in the pool -- we will free
3987     // them when iterating through lock table (it includes all the locks,
3988     // dead or alive).
3989     //
3990     __kmp_lock_pool = NULL;
3991 
3992 #define IS_CRITICAL(lck) \
3993         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3994         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3995 
3996     //
3997     // Loop through lock table, free all locks.
3998     //
3999     // Do not free item [0], it is reserved for lock tables list.
4000     //
4001     // FIXME - we are iterating through a list of (pointers to) objects of
4002     // type union kmp_user_lock, but we have no way of knowing whether the
4003     // base type is currently "pool" or whatever the global user lock type
4004     // is.
4005     //
4006     // We are relying on the fact that for all of the user lock types
4007     // (except "tas"), the first field in the lock struct is the "initialized"
4008     // field, which is set to the address of the lock object itself when
4009     // the lock is initialized.  When the union is of type "pool", the
4010     // first field is a pointer to the next object in the free list, which
4011     // will not be the same address as the object itself.
4012     //
4013     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4014     // will fail for "pool" objects on the free list.  This must happen as
4015     // the "location" field of real user locks overlaps the "index" field
4016     // of "pool" objects.
4017     //
4018     // It would be better to run through the free list, and remove all "pool"
4019     // objects from the lock table before executing this loop.  However,
4020     // "pool" objects do not always have their index field set (only on
4021     // lin_32e), and I don't want to search the lock table for the address
4022     // of every "pool" object on the free list.
4023     //
4024     while ( __kmp_user_lock_table.used > 1 ) {
4025         const ident *loc;
4026 
4027         //
4028         // reduce __kmp_user_lock_table.used before freeing the lock,
4029         // so that state of locks is consistent
4030         //
4031         kmp_user_lock_p lck = __kmp_user_lock_table.table[
4032           --__kmp_user_lock_table.used ];
4033 
4034         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4035           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4036             //
4037             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4038             // initialized AND it is NOT a critical section (user is not
4039             // responsible for destroying criticals) AND we know source
4040             // location to report.
4041             //
4042             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4043               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4044               ( loc->psource != NULL ) ) {
4045                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4046                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4047                 __kmp_str_loc_free( &str_loc);
4048             }
4049 
4050 #ifdef KMP_DEBUG
4051             if ( IS_CRITICAL( lck ) ) {
4052                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4053             }
4054             else {
4055                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4056             }
4057 #endif // KMP_DEBUG
4058 
4059             //
4060             // Cleanup internal lock dynamic resources
4061             // (for drdpa locks particularly).
4062             //
4063             __kmp_destroy_user_lock( lck );
4064         }
4065 
4066         //
4067         // Free the lock if block allocation of locks is not used.
4068         //
4069         if ( __kmp_lock_blocks == NULL ) {
4070             __kmp_free( lck );
4071         }
4072     }
4073 
4074 #undef IS_CRITICAL
4075 
4076     //
4077     // delete lock table(s).
4078     //
4079     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4080     __kmp_user_lock_table.table = NULL;
4081     __kmp_user_lock_table.allocated = 0;
4082 
4083     while ( table_ptr != NULL ) {
4084         //
4085         // In the first element we saved the pointer to the previous
4086         // (smaller) lock table.
4087         //
4088         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4089         __kmp_free( table_ptr );
4090         table_ptr = next;
4091     }
4092 
4093     //
4094     // Free buffers allocated for blocks of locks.
4095     //
4096     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4097     __kmp_lock_blocks = NULL;
4098 
4099     while ( block_ptr != NULL ) {
4100         kmp_block_of_locks_t *next = block_ptr->next_block;
4101         __kmp_free( block_ptr->locks );
4102         //
4103         // *block_ptr itself was allocated at the end of the locks vector.
4104         //
4105 	block_ptr = next;
4106     }
4107 
4108     TCW_4(__kmp_init_user_locks, FALSE);
4109 }
4110 
4111 #endif // KMP_USE_DYNAMIC_LOCK
4112