1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 #include <atomic>
18 
19 #include "kmp.h"
20 #include "kmp_itt.h"
21 #include "kmp_i18n.h"
22 #include "kmp_lock.h"
23 #include "kmp_io.h"
24 
25 #include "tsan_annotations.h"
26 
27 #if KMP_USE_FUTEX
28 # include <unistd.h>
29 # include <sys/syscall.h>
30 // We should really include <futex.h>, but that causes compatibility problems on different
31 // Linux* OS distributions that either require that you include (or break when you try to include)
32 // <pci/types.h>.
33 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
34 // we just define the constants here and don't include <futex.h>
35 # ifndef FUTEX_WAIT
36 #  define FUTEX_WAIT    0
37 # endif
38 # ifndef FUTEX_WAKE
39 #  define FUTEX_WAKE    1
40 # endif
41 #endif
42 
43 /* Implement spin locks for internal library use.             */
44 /* The algorithm implemented is Lamport's bakery lock [1974]. */
45 
46 void
47 __kmp_validate_locks( void )
48 {
49     int i;
50     kmp_uint32  x, y;
51 
52     /* Check to make sure unsigned arithmetic does wraps properly */
53     x = ~((kmp_uint32) 0) - 2;
54     y = x - 2;
55 
56     for (i = 0; i < 8; ++i, ++x, ++y) {
57         kmp_uint32 z = (x - y);
58         KMP_ASSERT( z == 2 );
59     }
60 
61     KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
62 }
63 
64 
65 /* ------------------------------------------------------------------------ */
66 /* test and set locks */
67 
68 //
69 // For the non-nested locks, we can only assume that the first 4 bytes were
70 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
71 // compiler only allocates a 4 byte pointer on IA-32 architecture.  On
72 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
73 //
74 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
75 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
76 //
77 
78 static kmp_int32
79 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
80 {
81     return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
82 }
83 
84 static inline bool
85 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
86 {
87     return lck->lk.depth_locked != -1;
88 }
89 
90 __forceinline static int
91 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
92 {
93     KMP_MB();
94 
95 #ifdef USE_LOCK_PROFILE
96     kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
97     if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
98         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
99     /* else __kmp_printf( "." );*/
100 #endif /* USE_LOCK_PROFILE */
101 
102     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
103       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
104         KMP_FSYNC_ACQUIRED(lck);
105         return KMP_LOCK_ACQUIRED_FIRST;
106     }
107 
108     kmp_uint32 spins;
109     KMP_FSYNC_PREPARE( lck );
110     KMP_INIT_YIELD( spins );
111     if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
112       __kmp_xproc ) ) {
113         KMP_YIELD( TRUE );
114     }
115     else {
116         KMP_YIELD_SPIN( spins );
117     }
118 
119     kmp_backoff_t backoff = __kmp_spin_backoff_params;
120     while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
121       ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
122 
123         __kmp_spin_backoff(&backoff);
124         if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
125           __kmp_xproc ) ) {
126             KMP_YIELD( TRUE );
127         }
128         else {
129             KMP_YIELD_SPIN( spins );
130         }
131     }
132     KMP_FSYNC_ACQUIRED( lck );
133     return KMP_LOCK_ACQUIRED_FIRST;
134 }
135 
136 int
137 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
138 {
139   int retval = __kmp_acquire_tas_lock_timed_template( lck, gtid );
140   ANNOTATE_TAS_ACQUIRED(lck);
141   return retval;
142 }
143 
144 static int
145 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
146 {
147     char const * const func = "omp_set_lock";
148     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
149       && __kmp_is_tas_lock_nestable( lck ) ) {
150         KMP_FATAL( LockNestableUsedAsSimple, func );
151     }
152     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
153         KMP_FATAL( LockIsAlreadyOwned, func );
154     }
155     return __kmp_acquire_tas_lock( lck, gtid );
156 }
157 
158 int
159 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
160 {
161     if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
162       && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
163         KMP_FSYNC_ACQUIRED( lck );
164         return TRUE;
165     }
166     return FALSE;
167 }
168 
169 static int
170 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
171 {
172     char const * const func = "omp_test_lock";
173     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
174       && __kmp_is_tas_lock_nestable( lck ) ) {
175         KMP_FATAL( LockNestableUsedAsSimple, func );
176     }
177     return __kmp_test_tas_lock( lck, gtid );
178 }
179 
180 int
181 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
182 {
183     KMP_MB();       /* Flush all pending memory write invalidates.  */
184 
185     KMP_FSYNC_RELEASING(lck);
186     ANNOTATE_TAS_RELEASED(lck);
187     KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
188     KMP_MB();       /* Flush all pending memory write invalidates.  */
189 
190     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
191       __kmp_xproc ) );
192     return KMP_LOCK_RELEASED;
193 }
194 
195 static int
196 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
197 {
198     char const * const func = "omp_unset_lock";
199     KMP_MB();  /* in case another processor initialized lock */
200     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
201       && __kmp_is_tas_lock_nestable( lck ) ) {
202         KMP_FATAL( LockNestableUsedAsSimple, func );
203     }
204     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
205         KMP_FATAL( LockUnsettingFree, func );
206     }
207     if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
208       && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
209         KMP_FATAL( LockUnsettingSetByAnother, func );
210     }
211     return __kmp_release_tas_lock( lck, gtid );
212 }
213 
214 void
215 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
216 {
217     TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
218 }
219 
220 static void
221 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
222 {
223     __kmp_init_tas_lock( lck );
224 }
225 
226 void
227 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
228 {
229     lck->lk.poll = 0;
230 }
231 
232 static void
233 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
234 {
235     char const * const func = "omp_destroy_lock";
236     if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
237       && __kmp_is_tas_lock_nestable( lck ) ) {
238         KMP_FATAL( LockNestableUsedAsSimple, func );
239     }
240     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
241         KMP_FATAL( LockStillOwned, func );
242     }
243     __kmp_destroy_tas_lock( lck );
244 }
245 
246 
247 //
248 // nested test and set locks
249 //
250 
251 int
252 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
253 {
254     KMP_DEBUG_ASSERT( gtid >= 0 );
255 
256     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
257         lck->lk.depth_locked += 1;
258         return KMP_LOCK_ACQUIRED_NEXT;
259     }
260     else {
261         __kmp_acquire_tas_lock_timed_template( lck, gtid );
262         ANNOTATE_TAS_ACQUIRED(lck);
263         lck->lk.depth_locked = 1;
264         return KMP_LOCK_ACQUIRED_FIRST;
265     }
266 }
267 
268 static int
269 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
270 {
271     char const * const func = "omp_set_nest_lock";
272     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
273         KMP_FATAL( LockSimpleUsedAsNestable, func );
274     }
275     return __kmp_acquire_nested_tas_lock( lck, gtid );
276 }
277 
278 int
279 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
280 {
281     int retval;
282 
283     KMP_DEBUG_ASSERT( gtid >= 0 );
284 
285     if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
286         retval = ++lck->lk.depth_locked;
287     }
288     else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
289         retval = 0;
290     }
291     else {
292         KMP_MB();
293         retval = lck->lk.depth_locked = 1;
294     }
295     return retval;
296 }
297 
298 static int
299 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
300 {
301     char const * const func = "omp_test_nest_lock";
302     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
303         KMP_FATAL( LockSimpleUsedAsNestable, func );
304     }
305     return __kmp_test_nested_tas_lock( lck, gtid );
306 }
307 
308 int
309 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
310 {
311     KMP_DEBUG_ASSERT( gtid >= 0 );
312 
313     KMP_MB();
314     if ( --(lck->lk.depth_locked) == 0 ) {
315         __kmp_release_tas_lock( lck, gtid );
316         return KMP_LOCK_RELEASED;
317     }
318     return KMP_LOCK_STILL_HELD;
319 }
320 
321 static int
322 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
323 {
324     char const * const func = "omp_unset_nest_lock";
325     KMP_MB();  /* in case another processor initialized lock */
326     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
327         KMP_FATAL( LockSimpleUsedAsNestable, func );
328     }
329     if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
330         KMP_FATAL( LockUnsettingFree, func );
331     }
332     if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
333         KMP_FATAL( LockUnsettingSetByAnother, func );
334     }
335     return __kmp_release_nested_tas_lock( lck, gtid );
336 }
337 
338 void
339 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
340 {
341     __kmp_init_tas_lock( lck );
342     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
343 }
344 
345 static void
346 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
347 {
348     __kmp_init_nested_tas_lock( lck );
349 }
350 
351 void
352 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
353 {
354     __kmp_destroy_tas_lock( lck );
355     lck->lk.depth_locked = 0;
356 }
357 
358 static void
359 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
360 {
361     char const * const func = "omp_destroy_nest_lock";
362     if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
363         KMP_FATAL( LockSimpleUsedAsNestable, func );
364     }
365     if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
366         KMP_FATAL( LockStillOwned, func );
367     }
368     __kmp_destroy_nested_tas_lock( lck );
369 }
370 
371 
372 #if KMP_USE_FUTEX
373 
374 /* ------------------------------------------------------------------------ */
375 /* futex locks */
376 
377 // futex locks are really just test and set locks, with a different method
378 // of handling contention.  They take the same amount of space as test and
379 // set locks, and are allocated the same way (i.e. use the area allocated by
380 // the compiler for non-nested locks / allocate nested locks on the heap).
381 
382 static kmp_int32
383 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
384 {
385     return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
386 }
387 
388 static inline bool
389 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
390 {
391     return lck->lk.depth_locked != -1;
392 }
393 
394 __forceinline static int
395 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
396 {
397     kmp_int32 gtid_code = ( gtid + 1 ) << 1;
398 
399     KMP_MB();
400 
401 #ifdef USE_LOCK_PROFILE
402     kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
403     if ( ( curr != 0 ) && ( curr != gtid_code ) )
404         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
405     /* else __kmp_printf( "." );*/
406 #endif /* USE_LOCK_PROFILE */
407 
408     KMP_FSYNC_PREPARE( lck );
409     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
410       lck, lck->lk.poll, gtid ) );
411 
412     kmp_int32 poll_val;
413 
414     while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
415              KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
416 
417         kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
418         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
419            lck, gtid, poll_val, cond ) );
420 
421         //
422         // NOTE: if you try to use the following condition for this branch
423         //
424         // if ( poll_val & 1 == 0 )
425         //
426         // Then the 12.0 compiler has a bug where the following block will
427         // always be skipped, regardless of the value of the LSB of poll_val.
428         //
429         if ( ! cond ) {
430             //
431             // Try to set the lsb in the poll to indicate to the owner
432             // thread that they need to wake this thread up.
433             //
434             if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
435                 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
436                   lck, lck->lk.poll, gtid ) );
437                 continue;
438             }
439             poll_val |= KMP_LOCK_BUSY(1, futex);
440 
441             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
442               lck, lck->lk.poll, gtid ) );
443         }
444 
445         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
446            lck, gtid, poll_val ) );
447 
448         kmp_int32 rc;
449         if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
450           poll_val, NULL, NULL, 0 ) ) != 0 ) {
451             KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
452                lck, gtid, poll_val, rc, errno ) );
453             continue;
454         }
455 
456         KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
457            lck, gtid, poll_val ) );
458         //
459         // This thread has now done a successful futex wait call and was
460         // entered on the OS futex queue.  We must now perform a futex
461         // wake call when releasing the lock, as we have no idea how many
462         // other threads are in the queue.
463         //
464         gtid_code |= 1;
465     }
466 
467     KMP_FSYNC_ACQUIRED( lck );
468     KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
469       lck, lck->lk.poll, gtid ) );
470     return KMP_LOCK_ACQUIRED_FIRST;
471 }
472 
473 int
474 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
475 {
476   int retval = __kmp_acquire_futex_lock_timed_template( lck, gtid );
477   ANNOTATE_FUTEX_ACQUIRED(lck);
478   return retval;
479 }
480 
481 static int
482 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
483 {
484     char const * const func = "omp_set_lock";
485     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
486       && __kmp_is_futex_lock_nestable( lck ) ) {
487         KMP_FATAL( LockNestableUsedAsSimple, func );
488     }
489     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
490         KMP_FATAL( LockIsAlreadyOwned, func );
491     }
492     return __kmp_acquire_futex_lock( lck, gtid );
493 }
494 
495 int
496 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
497 {
498     if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) {
499         KMP_FSYNC_ACQUIRED( lck );
500         return TRUE;
501     }
502     return FALSE;
503 }
504 
505 static int
506 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
507 {
508     char const * const func = "omp_test_lock";
509     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
510       && __kmp_is_futex_lock_nestable( lck ) ) {
511         KMP_FATAL( LockNestableUsedAsSimple, func );
512     }
513     return __kmp_test_futex_lock( lck, gtid );
514 }
515 
516 int
517 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
518 {
519     KMP_MB();       /* Flush all pending memory write invalidates.  */
520 
521     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
522       lck, lck->lk.poll, gtid ) );
523 
524     KMP_FSYNC_RELEASING(lck);
525     ANNOTATE_FUTEX_RELEASED(lck);
526 
527     kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
528 
529     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
530        lck, gtid, poll_val ) );
531 
532     if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
533         KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
534            lck, gtid ) );
535         syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
536     }
537 
538     KMP_MB();       /* Flush all pending memory write invalidates.  */
539 
540     KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
541       lck, lck->lk.poll, gtid ) );
542 
543     KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
544       __kmp_xproc ) );
545     return KMP_LOCK_RELEASED;
546 }
547 
548 static int
549 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
550 {
551     char const * const func = "omp_unset_lock";
552     KMP_MB();  /* in case another processor initialized lock */
553     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
554       && __kmp_is_futex_lock_nestable( lck ) ) {
555         KMP_FATAL( LockNestableUsedAsSimple, func );
556     }
557     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
558         KMP_FATAL( LockUnsettingFree, func );
559     }
560     if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
561       && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
562         KMP_FATAL( LockUnsettingSetByAnother, func );
563     }
564     return __kmp_release_futex_lock( lck, gtid );
565 }
566 
567 void
568 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
569 {
570     TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
571 }
572 
573 static void
574 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
575 {
576     __kmp_init_futex_lock( lck );
577 }
578 
579 void
580 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
581 {
582     lck->lk.poll = 0;
583 }
584 
585 static void
586 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
587 {
588     char const * const func = "omp_destroy_lock";
589     if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
590       && __kmp_is_futex_lock_nestable( lck ) ) {
591         KMP_FATAL( LockNestableUsedAsSimple, func );
592     }
593     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
594         KMP_FATAL( LockStillOwned, func );
595     }
596     __kmp_destroy_futex_lock( lck );
597 }
598 
599 
600 //
601 // nested futex locks
602 //
603 
604 int
605 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
606 {
607     KMP_DEBUG_ASSERT( gtid >= 0 );
608 
609     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
610         lck->lk.depth_locked += 1;
611         return KMP_LOCK_ACQUIRED_NEXT;
612     }
613     else {
614         __kmp_acquire_futex_lock_timed_template( lck, gtid );
615         ANNOTATE_FUTEX_ACQUIRED(lck);
616         lck->lk.depth_locked = 1;
617         return KMP_LOCK_ACQUIRED_FIRST;
618     }
619 }
620 
621 static int
622 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
623 {
624     char const * const func = "omp_set_nest_lock";
625     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
626         KMP_FATAL( LockSimpleUsedAsNestable, func );
627     }
628     return __kmp_acquire_nested_futex_lock( lck, gtid );
629 }
630 
631 int
632 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
633 {
634     int retval;
635 
636     KMP_DEBUG_ASSERT( gtid >= 0 );
637 
638     if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
639         retval = ++lck->lk.depth_locked;
640     }
641     else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
642         retval = 0;
643     }
644     else {
645         KMP_MB();
646         retval = lck->lk.depth_locked = 1;
647     }
648     return retval;
649 }
650 
651 static int
652 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654     char const * const func = "omp_test_nest_lock";
655     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
656         KMP_FATAL( LockSimpleUsedAsNestable, func );
657     }
658     return __kmp_test_nested_futex_lock( lck, gtid );
659 }
660 
661 int
662 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
663 {
664     KMP_DEBUG_ASSERT( gtid >= 0 );
665 
666     KMP_MB();
667     if ( --(lck->lk.depth_locked) == 0 ) {
668         __kmp_release_futex_lock( lck, gtid );
669         return KMP_LOCK_RELEASED;
670     }
671     return KMP_LOCK_STILL_HELD;
672 }
673 
674 static int
675 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
676 {
677     char const * const func = "omp_unset_nest_lock";
678     KMP_MB();  /* in case another processor initialized lock */
679     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
680         KMP_FATAL( LockSimpleUsedAsNestable, func );
681     }
682     if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
683         KMP_FATAL( LockUnsettingFree, func );
684     }
685     if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
686         KMP_FATAL( LockUnsettingSetByAnother, func );
687     }
688     return __kmp_release_nested_futex_lock( lck, gtid );
689 }
690 
691 void
692 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
693 {
694     __kmp_init_futex_lock( lck );
695     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
696 }
697 
698 static void
699 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
700 {
701     __kmp_init_nested_futex_lock( lck );
702 }
703 
704 void
705 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
706 {
707     __kmp_destroy_futex_lock( lck );
708     lck->lk.depth_locked = 0;
709 }
710 
711 static void
712 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
713 {
714     char const * const func = "omp_destroy_nest_lock";
715     if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
716         KMP_FATAL( LockSimpleUsedAsNestable, func );
717     }
718     if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
719         KMP_FATAL( LockStillOwned, func );
720     }
721     __kmp_destroy_nested_futex_lock( lck );
722 }
723 
724 #endif // KMP_USE_FUTEX
725 
726 
727 /* ------------------------------------------------------------------------ */
728 /* ticket (bakery) locks */
729 
730 static kmp_int32
731 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
732 {
733     return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1;
734 }
735 
736 static inline bool
737 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
738 {
739     return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1;
740 }
741 
742 static kmp_uint32
743 __kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket )
744 {
745     return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket;
746 }
747 
748 __forceinline static int
749 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
750 {
751     kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed );
752 
753 #ifdef USE_LOCK_PROFILE
754     if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket )
755         __kmp_printf( "LOCK CONTENTION: %p\n", lck );
756     /* else __kmp_printf( "." );*/
757 #endif /* USE_LOCK_PROFILE */
758 
759     if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) {
760         return KMP_LOCK_ACQUIRED_FIRST;
761     }
762     KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
763     return KMP_LOCK_ACQUIRED_FIRST;
764 }
765 
766 int
767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768 {
769   int retval = __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770   ANNOTATE_TICKET_ACQUIRED(lck);
771   return retval;
772 }
773 
774 static int
775 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
776 {
777     char const * const func = "omp_set_lock";
778 
779     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
780         KMP_FATAL( LockIsUninitialized, func );
781     }
782     if ( lck->lk.self != lck ) {
783         KMP_FATAL( LockIsUninitialized, func );
784     }
785     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
786         KMP_FATAL( LockNestableUsedAsSimple, func );
787     }
788     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
789         KMP_FATAL( LockIsAlreadyOwned, func );
790     }
791 
792     __kmp_acquire_ticket_lock( lck, gtid );
793 
794     std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
795     return KMP_LOCK_ACQUIRED_FIRST;
796 }
797 
798 int
799 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
800 {
801     kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed );
802 
803     if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) {
804         kmp_uint32 next_ticket = my_ticket + 1;
805         if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket,
806              &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) {
807             return TRUE;
808         }
809     }
810     return FALSE;
811 }
812 
813 static int
814 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
815 {
816     char const * const func = "omp_test_lock";
817 
818     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
819         KMP_FATAL( LockIsUninitialized, func );
820     }
821     if ( lck->lk.self != lck ) {
822         KMP_FATAL( LockIsUninitialized, func );
823     }
824     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
825         KMP_FATAL( LockNestableUsedAsSimple, func );
826     }
827 
828     int retval = __kmp_test_ticket_lock( lck, gtid );
829 
830     if ( retval ) {
831         std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
832     }
833     return retval;
834 }
835 
836 int
837 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
838 {
839     kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed );
840 
841     ANNOTATE_TICKET_RELEASED(lck);
842     std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release );
843 
844     KMP_YIELD( distance
845       > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
846     return KMP_LOCK_RELEASED;
847 }
848 
849 static int
850 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
851 {
852     char const * const func = "omp_unset_lock";
853 
854     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
855         KMP_FATAL( LockIsUninitialized, func );
856     }
857     if ( lck->lk.self != lck ) {
858         KMP_FATAL( LockIsUninitialized, func );
859     }
860     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
861         KMP_FATAL( LockNestableUsedAsSimple, func );
862     }
863     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
864         KMP_FATAL( LockUnsettingFree, func );
865     }
866     if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
867       && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
868         KMP_FATAL( LockUnsettingSetByAnother, func );
869     }
870     std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
871     return __kmp_release_ticket_lock( lck, gtid );
872 }
873 
874 void
875 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
876 {
877     lck->lk.location = NULL;
878     lck->lk.self = lck;
879     std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
880     std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
881     std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock.
882     std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock.
883     std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release );
884 }
885 
886 static void
887 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
888 {
889     __kmp_init_ticket_lock( lck );
890 }
891 
892 void
893 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
894 {
895     std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release );
896     lck->lk.self = NULL;
897     lck->lk.location = NULL;
898     std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
899     std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
900     std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
901     std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed );
902 }
903 
904 static void
905 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
906 {
907     char const * const func = "omp_destroy_lock";
908 
909     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
910         KMP_FATAL( LockIsUninitialized, func );
911     }
912     if ( lck->lk.self != lck ) {
913         KMP_FATAL( LockIsUninitialized, func );
914     }
915     if ( __kmp_is_ticket_lock_nestable( lck ) ) {
916         KMP_FATAL( LockNestableUsedAsSimple, func );
917     }
918     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
919         KMP_FATAL( LockStillOwned, func );
920     }
921     __kmp_destroy_ticket_lock( lck );
922 }
923 
924 
925 //
926 // nested ticket locks
927 //
928 
929 int
930 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
931 {
932     KMP_DEBUG_ASSERT( gtid >= 0 );
933 
934     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
935         std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
936         return KMP_LOCK_ACQUIRED_NEXT;
937     }
938     else {
939         __kmp_acquire_ticket_lock_timed_template( lck, gtid );
940         ANNOTATE_TICKET_ACQUIRED(lck);
941         std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
942         std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
943         return KMP_LOCK_ACQUIRED_FIRST;
944     }
945 }
946 
947 static int
948 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
949 {
950     char const * const func = "omp_set_nest_lock";
951 
952     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
953         KMP_FATAL( LockIsUninitialized, func );
954     }
955     if ( lck->lk.self != lck ) {
956         KMP_FATAL( LockIsUninitialized, func );
957     }
958     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
959         KMP_FATAL( LockSimpleUsedAsNestable, func );
960     }
961     return __kmp_acquire_nested_ticket_lock( lck, gtid );
962 }
963 
964 int
965 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
966 {
967     int retval;
968 
969     KMP_DEBUG_ASSERT( gtid >= 0 );
970 
971     if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
972         retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1;
973     }
974     else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
975         retval = 0;
976     }
977     else {
978         std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
979         std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
980         retval = 1;
981     }
982     return retval;
983 }
984 
985 static int
986 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
987   kmp_int32 gtid )
988 {
989     char const * const func = "omp_test_nest_lock";
990 
991     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
992         KMP_FATAL( LockIsUninitialized, func );
993     }
994     if ( lck->lk.self != lck ) {
995         KMP_FATAL( LockIsUninitialized, func );
996     }
997     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
998         KMP_FATAL( LockSimpleUsedAsNestable, func );
999     }
1000     return __kmp_test_nested_ticket_lock( lck, gtid );
1001 }
1002 
1003 int
1004 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1005 {
1006     KMP_DEBUG_ASSERT( gtid >= 0 );
1007 
1008     if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) {
1009         std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
1010         __kmp_release_ticket_lock( lck, gtid );
1011         return KMP_LOCK_RELEASED;
1012     }
1013     return KMP_LOCK_STILL_HELD;
1014 }
1015 
1016 static int
1017 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1018 {
1019     char const * const func = "omp_unset_nest_lock";
1020 
1021     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1022         KMP_FATAL( LockIsUninitialized, func );
1023     }
1024     if ( lck->lk.self != lck ) {
1025         KMP_FATAL( LockIsUninitialized, func );
1026     }
1027     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028         KMP_FATAL( LockSimpleUsedAsNestable, func );
1029     }
1030     if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1031         KMP_FATAL( LockUnsettingFree, func );
1032     }
1033     if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1034         KMP_FATAL( LockUnsettingSetByAnother, func );
1035     }
1036     return __kmp_release_nested_ticket_lock( lck, gtid );
1037 }
1038 
1039 void
1040 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1041 {
1042     __kmp_init_ticket_lock( lck );
1043     std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks
1044 }
1045 
1046 static void
1047 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1048 {
1049     __kmp_init_nested_ticket_lock( lck );
1050 }
1051 
1052 void
1053 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1054 {
1055     __kmp_destroy_ticket_lock( lck );
1056     std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed );
1057 }
1058 
1059 static void
1060 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1061 {
1062     char const * const func = "omp_destroy_nest_lock";
1063 
1064     if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1065         KMP_FATAL( LockIsUninitialized, func );
1066     }
1067     if ( lck->lk.self != lck ) {
1068         KMP_FATAL( LockIsUninitialized, func );
1069     }
1070     if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1071         KMP_FATAL( LockSimpleUsedAsNestable, func );
1072     }
1073     if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1074         KMP_FATAL( LockStillOwned, func );
1075     }
1076     __kmp_destroy_nested_ticket_lock( lck );
1077 }
1078 
1079 
1080 //
1081 // access functions to fields which don't exist for all lock kinds.
1082 //
1083 
1084 static int
1085 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1086 {
1087     return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck);
1088 }
1089 
1090 static const ident_t *
1091 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1092 {
1093     return lck->lk.location;
1094 }
1095 
1096 static void
1097 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1098 {
1099     lck->lk.location = loc;
1100 }
1101 
1102 static kmp_lock_flags_t
1103 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1104 {
1105     return lck->lk.flags;
1106 }
1107 
1108 static void
1109 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1110 {
1111     lck->lk.flags = flags;
1112 }
1113 
1114 /* ------------------------------------------------------------------------ */
1115 /* queuing locks */
1116 
1117 /*
1118  * First the states
1119  * (head,tail) =  0, 0  means lock is unheld, nobody on queue
1120  *   UINT_MAX or -1, 0  means lock is held, nobody on queue
1121  *                h, h  means lock is held or about to transition, 1 element on queue
1122  *                h, t  h <> t, means lock is held or about to transition, >1 elements on queue
1123  *
1124  * Now the transitions
1125  *    Acquire(0,0)  = -1 ,0
1126  *    Release(0,0)  = Error
1127  *    Acquire(-1,0) =  h ,h    h > 0
1128  *    Release(-1,0) =  0 ,0
1129  *    Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
1130  *    Release(h,h)  = -1 ,0    h > 0
1131  *    Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1132  *    Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
1133  *
1134  * And pictorially
1135  *
1136  *
1137  *          +-----+
1138  *          | 0, 0|------- release -------> Error
1139  *          +-----+
1140  *            |  ^
1141  *     acquire|  |release
1142  *            |  |
1143  *            |  |
1144  *            v  |
1145  *          +-----+
1146  *          |-1, 0|
1147  *          +-----+
1148  *            |  ^
1149  *     acquire|  |release
1150  *            |  |
1151  *            |  |
1152  *            v  |
1153  *          +-----+
1154  *          | h, h|
1155  *          +-----+
1156  *            |  ^
1157  *     acquire|  |release
1158  *            |  |
1159  *            |  |
1160  *            v  |
1161  *          +-----+
1162  *          | h, t|----- acquire, release loopback ---+
1163  *          +-----+                                   |
1164  *               ^                                    |
1165  *               |                                    |
1166  *               +------------------------------------+
1167  *
1168  */
1169 
1170 #ifdef DEBUG_QUEUING_LOCKS
1171 
1172 /* Stuff for circular trace buffer */
1173 #define TRACE_BUF_ELE	1024
1174 static char traces[TRACE_BUF_ELE][128] = { 0 }
1175 static int tc = 0;
1176 #define TRACE_LOCK(X,Y)          KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128,  "t%d at %s\n", X, Y );
1177 #define TRACE_LOCK_T(X,Y,Z)      KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1178 #define TRACE_LOCK_HT(X,Y,Z,Q)   KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1179 
1180 static void
1181 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1182   kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1183 {
1184     kmp_int32 t, i;
1185 
1186     __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1187 
1188     i = tc % TRACE_BUF_ELE;
1189     __kmp_printf_no_lock( "%s\n", traces[i] );
1190     i = (i+1) % TRACE_BUF_ELE;
1191     while ( i != (tc % TRACE_BUF_ELE) ) {
1192         __kmp_printf_no_lock( "%s", traces[i] );
1193         i = (i+1) % TRACE_BUF_ELE;
1194     }
1195     __kmp_printf_no_lock( "\n" );
1196 
1197     __kmp_printf_no_lock(
1198              "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1199              gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1200              head_id, tail_id );
1201 
1202     __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1203 
1204     if ( lck->lk.head_id >= 1 ) {
1205         t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1206         while (t > 0) {
1207             __kmp_printf_no_lock( "-> %d ", t );
1208             t = __kmp_threads[t-1]->th.th_next_waiting;
1209         }
1210     }
1211     __kmp_printf_no_lock( ";  tail: %d ", lck->lk.tail_id );
1212     __kmp_printf_no_lock( "\n\n" );
1213 }
1214 
1215 #endif /* DEBUG_QUEUING_LOCKS */
1216 
1217 static kmp_int32
1218 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1219 {
1220     return TCR_4( lck->lk.owner_id ) - 1;
1221 }
1222 
1223 static inline bool
1224 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1225 {
1226     return lck->lk.depth_locked != -1;
1227 }
1228 
1229 /* Acquire a lock using a the queuing lock implementation */
1230 template <bool takeTime>
1231 /* [TLW] The unused template above is left behind because of what BEB believes is a
1232    potential compiler problem with __forceinline. */
1233 __forceinline static int
1234 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1235   kmp_int32 gtid )
1236 {
1237     register kmp_info_t *this_thr    = __kmp_thread_from_gtid( gtid );
1238     volatile kmp_int32  *head_id_p   = & lck->lk.head_id;
1239     volatile kmp_int32  *tail_id_p   = & lck->lk.tail_id;
1240     volatile kmp_uint32 *spin_here_p;
1241     kmp_int32 need_mf = 1;
1242 
1243 #if OMPT_SUPPORT
1244     ompt_state_t prev_state = ompt_state_undefined;
1245 #endif
1246 
1247     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1248 
1249     KMP_FSYNC_PREPARE( lck );
1250     KMP_DEBUG_ASSERT( this_thr != NULL );
1251     spin_here_p = & this_thr->th.th_spin_here;
1252 
1253 #ifdef DEBUG_QUEUING_LOCKS
1254     TRACE_LOCK( gtid+1, "acq ent" );
1255     if ( *spin_here_p )
1256         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1257     if ( this_thr->th.th_next_waiting != 0 )
1258         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1259 #endif
1260     KMP_DEBUG_ASSERT( !*spin_here_p );
1261     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1262 
1263 
1264     /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1265        that may follow, not just in execution order, but also in visibility order.  This way,
1266        when a releasing thread observes the changes to the queue by this thread, it can
1267        rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1268        spin_here_p to FALSE, it is not premature.  If the releasing thread sets spin_here_p
1269        to FALSE before this thread sets it to TRUE, this thread will hang.
1270     */
1271     *spin_here_p = TRUE;  /* before enqueuing to prevent race */
1272 
1273     while( 1 ) {
1274         kmp_int32 enqueued;
1275         kmp_int32 head;
1276         kmp_int32 tail;
1277 
1278         head = *head_id_p;
1279 
1280         switch ( head ) {
1281 
1282             case -1:
1283             {
1284 #ifdef DEBUG_QUEUING_LOCKS
1285                 tail = *tail_id_p;
1286                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287 #endif
1288                 tail = 0;  /* to make sure next link asynchronously read is not set accidentally;
1289                            this assignment prevents us from entering the if ( t > 0 )
1290                            condition in the enqueued case below, which is not necessary for
1291                            this state transition */
1292 
1293                 need_mf = 0;
1294                 /* try (-1,0)->(tid,tid) */
1295                 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1296                   KMP_PACK_64( -1, 0 ),
1297                   KMP_PACK_64( gtid+1, gtid+1 ) );
1298 #ifdef DEBUG_QUEUING_LOCKS
1299                   if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1300 #endif
1301             }
1302             break;
1303 
1304             default:
1305             {
1306                 tail = *tail_id_p;
1307                 KMP_DEBUG_ASSERT( tail != gtid + 1 );
1308 
1309 #ifdef DEBUG_QUEUING_LOCKS
1310                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311 #endif
1312 
1313                 if ( tail == 0 ) {
1314                     enqueued = FALSE;
1315                 }
1316                 else {
1317                     need_mf = 0;
1318                     /* try (h,t) or (h,h)->(h,tid) */
1319                     enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1320 
1321 #ifdef DEBUG_QUEUING_LOCKS
1322                         if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1323 #endif
1324                 }
1325             }
1326             break;
1327 
1328             case 0: /* empty queue */
1329             {
1330                 kmp_int32 grabbed_lock;
1331 
1332 #ifdef DEBUG_QUEUING_LOCKS
1333                 tail = *tail_id_p;
1334                 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1335 #endif
1336                 /* try (0,0)->(-1,0) */
1337 
1338                 /* only legal transition out of head = 0 is head = -1 with no change to tail */
1339                 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1340 
1341                 if ( grabbed_lock ) {
1342 
1343                     *spin_here_p = FALSE;
1344 
1345                     KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1346                               lck, gtid ));
1347 #ifdef DEBUG_QUEUING_LOCKS
1348                     TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1349 #endif
1350 
1351 #if OMPT_SUPPORT
1352                     if (ompt_enabled && prev_state != ompt_state_undefined) {
1353                         /* change the state before clearing wait_id */
1354                         this_thr->th.ompt_thread_info.state = prev_state;
1355                         this_thr->th.ompt_thread_info.wait_id = 0;
1356                     }
1357 #endif
1358 
1359                     KMP_FSYNC_ACQUIRED( lck );
1360                     return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1361                 }
1362                 enqueued = FALSE;
1363             }
1364             break;
1365         }
1366 
1367 #if OMPT_SUPPORT
1368         if (ompt_enabled && prev_state == ompt_state_undefined) {
1369             /* this thread will spin; set wait_id before entering wait state */
1370             prev_state = this_thr->th.ompt_thread_info.state;
1371             this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1372             this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1373         }
1374 #endif
1375 
1376         if ( enqueued ) {
1377             if ( tail > 0 ) {
1378                 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1379                 KMP_ASSERT( tail_thr != NULL );
1380                 tail_thr->th.th_next_waiting = gtid+1;
1381                 /* corresponding wait for this write in release code */
1382             }
1383             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1384 
1385 
1386             /* ToDo: May want to consider using __kmp_wait_sleep  or something that sleeps for
1387              *       throughput only here.
1388              */
1389             KMP_MB();
1390             KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1391 
1392 #ifdef DEBUG_QUEUING_LOCKS
1393             TRACE_LOCK( gtid+1, "acq spin" );
1394 
1395             if ( this_thr->th.th_next_waiting != 0 )
1396                 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1397 #endif
1398             KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1399             KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1400                       lck, gtid ));
1401 
1402 #ifdef DEBUG_QUEUING_LOCKS
1403             TRACE_LOCK( gtid+1, "acq exit 2" );
1404 #endif
1405 
1406 #if OMPT_SUPPORT
1407             /* change the state before clearing wait_id */
1408             this_thr->th.ompt_thread_info.state = prev_state;
1409             this_thr->th.ompt_thread_info.wait_id = 0;
1410 #endif
1411 
1412             /* got lock, we were dequeued by the thread that released lock */
1413             return KMP_LOCK_ACQUIRED_FIRST;
1414         }
1415 
1416         /* Yield if number of threads > number of logical processors */
1417         /* ToDo: Not sure why this should only be in oversubscription case,
1418            maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1419         KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1420           __kmp_xproc ) );
1421 #ifdef DEBUG_QUEUING_LOCKS
1422         TRACE_LOCK( gtid+1, "acq retry" );
1423 #endif
1424 
1425     }
1426     KMP_ASSERT2( 0, "should not get here" );
1427     return KMP_LOCK_ACQUIRED_FIRST;
1428 }
1429 
1430 int
1431 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1432 {
1433     KMP_DEBUG_ASSERT( gtid >= 0 );
1434 
1435     int retval = __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1436     ANNOTATE_QUEUING_ACQUIRED(lck);
1437     return retval;
1438 }
1439 
1440 static int
1441 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1442   kmp_int32 gtid )
1443 {
1444     char const * const func = "omp_set_lock";
1445     if ( lck->lk.initialized != lck ) {
1446         KMP_FATAL( LockIsUninitialized, func );
1447     }
1448     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1449         KMP_FATAL( LockNestableUsedAsSimple, func );
1450     }
1451     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1452         KMP_FATAL( LockIsAlreadyOwned, func );
1453     }
1454 
1455     __kmp_acquire_queuing_lock( lck, gtid );
1456 
1457     lck->lk.owner_id = gtid + 1;
1458     return KMP_LOCK_ACQUIRED_FIRST;
1459 }
1460 
1461 int
1462 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1463 {
1464     volatile kmp_int32 *head_id_p  = & lck->lk.head_id;
1465     kmp_int32 head;
1466 #ifdef KMP_DEBUG
1467     kmp_info_t *this_thr;
1468 #endif
1469 
1470     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1471     KMP_DEBUG_ASSERT( gtid >= 0 );
1472 #ifdef KMP_DEBUG
1473     this_thr = __kmp_thread_from_gtid( gtid );
1474     KMP_DEBUG_ASSERT( this_thr != NULL );
1475     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1476 #endif
1477 
1478     head = *head_id_p;
1479 
1480     if ( head == 0 ) { /* nobody on queue, nobody holding */
1481 
1482         /* try (0,0)->(-1,0) */
1483 
1484         if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1485             KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1486             KMP_FSYNC_ACQUIRED(lck);
1487             ANNOTATE_QUEUING_ACQUIRED(lck);
1488             return TRUE;
1489         }
1490     }
1491 
1492     KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1493     return FALSE;
1494 }
1495 
1496 static int
1497 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1498 {
1499     char const * const func = "omp_test_lock";
1500     if ( lck->lk.initialized != lck ) {
1501         KMP_FATAL( LockIsUninitialized, func );
1502     }
1503     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1504         KMP_FATAL( LockNestableUsedAsSimple, func );
1505     }
1506 
1507     int retval = __kmp_test_queuing_lock( lck, gtid );
1508 
1509     if ( retval ) {
1510         lck->lk.owner_id = gtid + 1;
1511     }
1512     return retval;
1513 }
1514 
1515 int
1516 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1517 {
1518     register kmp_info_t *this_thr;
1519     volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1520     volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1521 
1522     KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1523     KMP_DEBUG_ASSERT( gtid >= 0 );
1524     this_thr    = __kmp_thread_from_gtid( gtid );
1525     KMP_DEBUG_ASSERT( this_thr != NULL );
1526 #ifdef DEBUG_QUEUING_LOCKS
1527     TRACE_LOCK( gtid+1, "rel ent" );
1528 
1529     if ( this_thr->th.th_spin_here )
1530         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1531     if ( this_thr->th.th_next_waiting != 0 )
1532         __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1533 #endif
1534     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1535     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1536 
1537     KMP_FSYNC_RELEASING(lck);
1538     ANNOTATE_QUEUING_RELEASED(lck);
1539 
1540     while( 1 ) {
1541         kmp_int32 dequeued;
1542         kmp_int32 head;
1543         kmp_int32 tail;
1544 
1545         head = *head_id_p;
1546 
1547 #ifdef DEBUG_QUEUING_LOCKS
1548         tail = *tail_id_p;
1549         TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1550         if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1551 #endif
1552         KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1553 
1554         if ( head == -1 ) { /* nobody on queue */
1555 
1556             /* try (-1,0)->(0,0) */
1557             if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1558                 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1559                           lck, gtid ));
1560 #ifdef DEBUG_QUEUING_LOCKS
1561                 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1562 #endif
1563 
1564 #if OMPT_SUPPORT
1565                 /* nothing to do - no other thread is trying to shift blame */
1566 #endif
1567 
1568                 return KMP_LOCK_RELEASED;
1569             }
1570             dequeued = FALSE;
1571 
1572         }
1573         else {
1574 
1575             tail = *tail_id_p;
1576             if ( head == tail ) {  /* only one thread on the queue */
1577 
1578 #ifdef DEBUG_QUEUING_LOCKS
1579                 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1580 #endif
1581                 KMP_DEBUG_ASSERT( head > 0 );
1582 
1583                 /* try (h,h)->(-1,0) */
1584                 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1585                   KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1586 #ifdef DEBUG_QUEUING_LOCKS
1587                 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1588 #endif
1589 
1590             }
1591             else {
1592                 volatile kmp_int32 *waiting_id_p;
1593                 kmp_info_t         *head_thr = __kmp_thread_from_gtid( head - 1 );
1594                 KMP_DEBUG_ASSERT( head_thr != NULL );
1595                 waiting_id_p = & head_thr->th.th_next_waiting;
1596 
1597                 /* Does this require synchronous reads? */
1598 #ifdef DEBUG_QUEUING_LOCKS
1599                 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1600 #endif
1601                 KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1602 
1603                 /* try (h,t)->(h',t) or (t,t) */
1604 
1605                 KMP_MB();
1606                 /* make sure enqueuing thread has time to update next waiting thread field */
1607                 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1608 #ifdef DEBUG_QUEUING_LOCKS
1609                 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1610 #endif
1611                 dequeued = TRUE;
1612             }
1613         }
1614 
1615         if ( dequeued ) {
1616             kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1617             KMP_DEBUG_ASSERT( head_thr != NULL );
1618 
1619             /* Does this require synchronous reads? */
1620 #ifdef DEBUG_QUEUING_LOCKS
1621             if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1622 #endif
1623             KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1624 
1625             /* For clean code only.
1626              * Thread not released until next statement prevents race with acquire code.
1627              */
1628             head_thr->th.th_next_waiting = 0;
1629 #ifdef DEBUG_QUEUING_LOCKS
1630             TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1631 #endif
1632 
1633             KMP_MB();
1634             /* reset spin value */
1635             head_thr->th.th_spin_here = FALSE;
1636 
1637             KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1638                       lck, gtid ));
1639 #ifdef DEBUG_QUEUING_LOCKS
1640             TRACE_LOCK( gtid+1, "rel exit 2" );
1641 #endif
1642             return KMP_LOCK_RELEASED;
1643         }
1644         /* KMP_CPU_PAUSE( );  don't want to make releasing thread hold up acquiring threads */
1645 
1646 #ifdef DEBUG_QUEUING_LOCKS
1647         TRACE_LOCK( gtid+1, "rel retry" );
1648 #endif
1649 
1650     } /* while */
1651     KMP_ASSERT2( 0, "should not get here" );
1652     return KMP_LOCK_RELEASED;
1653 }
1654 
1655 static int
1656 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1657   kmp_int32 gtid )
1658 {
1659     char const * const func = "omp_unset_lock";
1660     KMP_MB();  /* in case another processor initialized lock */
1661     if ( lck->lk.initialized != lck ) {
1662         KMP_FATAL( LockIsUninitialized, func );
1663     }
1664     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1665         KMP_FATAL( LockNestableUsedAsSimple, func );
1666     }
1667     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1668         KMP_FATAL( LockUnsettingFree, func );
1669     }
1670     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1671         KMP_FATAL( LockUnsettingSetByAnother, func );
1672     }
1673     lck->lk.owner_id = 0;
1674     return __kmp_release_queuing_lock( lck, gtid );
1675 }
1676 
1677 void
1678 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1679 {
1680     lck->lk.location = NULL;
1681     lck->lk.head_id = 0;
1682     lck->lk.tail_id = 0;
1683     lck->lk.next_ticket = 0;
1684     lck->lk.now_serving = 0;
1685     lck->lk.owner_id = 0;      // no thread owns the lock.
1686     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1687     lck->lk.initialized = lck;
1688 
1689     KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1690 }
1691 
1692 static void
1693 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1694 {
1695     __kmp_init_queuing_lock( lck );
1696 }
1697 
1698 void
1699 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1700 {
1701     lck->lk.initialized = NULL;
1702     lck->lk.location = NULL;
1703     lck->lk.head_id = 0;
1704     lck->lk.tail_id = 0;
1705     lck->lk.next_ticket = 0;
1706     lck->lk.now_serving = 0;
1707     lck->lk.owner_id = 0;
1708     lck->lk.depth_locked = -1;
1709 }
1710 
1711 static void
1712 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1713 {
1714     char const * const func = "omp_destroy_lock";
1715     if ( lck->lk.initialized != lck ) {
1716         KMP_FATAL( LockIsUninitialized, func );
1717     }
1718     if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1719         KMP_FATAL( LockNestableUsedAsSimple, func );
1720     }
1721     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1722         KMP_FATAL( LockStillOwned, func );
1723     }
1724     __kmp_destroy_queuing_lock( lck );
1725 }
1726 
1727 
1728 //
1729 // nested queuing locks
1730 //
1731 
1732 int
1733 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1734 {
1735     KMP_DEBUG_ASSERT( gtid >= 0 );
1736 
1737     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1738         lck->lk.depth_locked += 1;
1739         return KMP_LOCK_ACQUIRED_NEXT;
1740     }
1741     else {
1742         __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1743         ANNOTATE_QUEUING_ACQUIRED(lck);
1744         KMP_MB();
1745         lck->lk.depth_locked = 1;
1746         KMP_MB();
1747         lck->lk.owner_id = gtid + 1;
1748         return KMP_LOCK_ACQUIRED_FIRST;
1749     }
1750 }
1751 
1752 static int
1753 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1754 {
1755     char const * const func = "omp_set_nest_lock";
1756     if ( lck->lk.initialized != lck ) {
1757         KMP_FATAL( LockIsUninitialized, func );
1758     }
1759     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1760         KMP_FATAL( LockSimpleUsedAsNestable, func );
1761     }
1762     return __kmp_acquire_nested_queuing_lock( lck, gtid );
1763 }
1764 
1765 int
1766 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1767 {
1768     int retval;
1769 
1770     KMP_DEBUG_ASSERT( gtid >= 0 );
1771 
1772     if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1773         retval = ++lck->lk.depth_locked;
1774     }
1775     else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1776         retval = 0;
1777     }
1778     else {
1779         KMP_MB();
1780         retval = lck->lk.depth_locked = 1;
1781         KMP_MB();
1782         lck->lk.owner_id = gtid + 1;
1783     }
1784     return retval;
1785 }
1786 
1787 static int
1788 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1789   kmp_int32 gtid )
1790 {
1791     char const * const func = "omp_test_nest_lock";
1792     if ( lck->lk.initialized != lck ) {
1793         KMP_FATAL( LockIsUninitialized, func );
1794     }
1795     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796         KMP_FATAL( LockSimpleUsedAsNestable, func );
1797     }
1798     return __kmp_test_nested_queuing_lock( lck, gtid );
1799 }
1800 
1801 int
1802 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1803 {
1804     KMP_DEBUG_ASSERT( gtid >= 0 );
1805 
1806     KMP_MB();
1807     if ( --(lck->lk.depth_locked) == 0 ) {
1808         KMP_MB();
1809         lck->lk.owner_id = 0;
1810         __kmp_release_queuing_lock( lck, gtid );
1811         return KMP_LOCK_RELEASED;
1812     }
1813     return KMP_LOCK_STILL_HELD;
1814 }
1815 
1816 static int
1817 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1818 {
1819     char const * const func = "omp_unset_nest_lock";
1820     KMP_MB();  /* in case another processor initialized lock */
1821     if ( lck->lk.initialized != lck ) {
1822         KMP_FATAL( LockIsUninitialized, func );
1823     }
1824     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1825         KMP_FATAL( LockSimpleUsedAsNestable, func );
1826     }
1827     if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1828         KMP_FATAL( LockUnsettingFree, func );
1829     }
1830     if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1831         KMP_FATAL( LockUnsettingSetByAnother, func );
1832     }
1833     return __kmp_release_nested_queuing_lock( lck, gtid );
1834 }
1835 
1836 void
1837 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1838 {
1839     __kmp_init_queuing_lock( lck );
1840     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1841 }
1842 
1843 static void
1844 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1845 {
1846     __kmp_init_nested_queuing_lock( lck );
1847 }
1848 
1849 void
1850 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1851 {
1852     __kmp_destroy_queuing_lock( lck );
1853     lck->lk.depth_locked = 0;
1854 }
1855 
1856 static void
1857 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1858 {
1859     char const * const func = "omp_destroy_nest_lock";
1860     if ( lck->lk.initialized != lck ) {
1861         KMP_FATAL( LockIsUninitialized, func );
1862     }
1863     if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1864         KMP_FATAL( LockSimpleUsedAsNestable, func );
1865     }
1866     if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1867         KMP_FATAL( LockStillOwned, func );
1868     }
1869     __kmp_destroy_nested_queuing_lock( lck );
1870 }
1871 
1872 
1873 //
1874 // access functions to fields which don't exist for all lock kinds.
1875 //
1876 
1877 static int
1878 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1879 {
1880     return lck == lck->lk.initialized;
1881 }
1882 
1883 static const ident_t *
1884 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1885 {
1886     return lck->lk.location;
1887 }
1888 
1889 static void
1890 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1891 {
1892     lck->lk.location = loc;
1893 }
1894 
1895 static kmp_lock_flags_t
1896 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1897 {
1898     return lck->lk.flags;
1899 }
1900 
1901 static void
1902 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1903 {
1904     lck->lk.flags = flags;
1905 }
1906 
1907 #if KMP_USE_ADAPTIVE_LOCKS
1908 
1909 /*
1910     RTM Adaptive locks
1911 */
1912 
1913 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1914 
1915 #include <immintrin.h>
1916 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1917 
1918 #else
1919 
1920 // Values from the status register after failed speculation.
1921 #define _XBEGIN_STARTED          (~0u)
1922 #define _XABORT_EXPLICIT         (1 << 0)
1923 #define _XABORT_RETRY            (1 << 1)
1924 #define _XABORT_CONFLICT         (1 << 2)
1925 #define _XABORT_CAPACITY         (1 << 3)
1926 #define _XABORT_DEBUG            (1 << 4)
1927 #define _XABORT_NESTED           (1 << 5)
1928 #define _XABORT_CODE(x)          ((unsigned char)(((x) >> 24) & 0xFF))
1929 
1930 // Aborts for which it's worth trying again immediately
1931 #define SOFT_ABORT_MASK  (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1932 
1933 #define STRINGIZE_INTERNAL(arg) #arg
1934 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1935 
1936 // Access to RTM instructions
1937 
1938 /*
1939   A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1940   This is the same definition as the compiler intrinsic that will be supported at some point.
1941 */
1942 static __inline int _xbegin()
1943 {
1944     int res = -1;
1945 
1946 #if KMP_OS_WINDOWS
1947 #if KMP_ARCH_X86_64
1948     _asm {
1949         _emit 0xC7
1950         _emit 0xF8
1951         _emit 2
1952         _emit 0
1953         _emit 0
1954         _emit 0
1955         jmp   L2
1956         mov   res, eax
1957     L2:
1958     }
1959 #else /* IA32 */
1960     _asm {
1961         _emit 0xC7
1962         _emit 0xF8
1963         _emit 2
1964         _emit 0
1965         _emit 0
1966         _emit 0
1967         jmp   L2
1968         mov   res, eax
1969     L2:
1970     }
1971 #endif // KMP_ARCH_X86_64
1972 #else
1973     /* Note that %eax must be noted as killed (clobbered), because
1974      * the XSR is returned in %eax(%rax) on abort.  Other register
1975      * values are restored, so don't need to be killed.
1976      *
1977      * We must also mark 'res' as an input and an output, since otherwise
1978      * 'res=-1' may be dropped as being dead, whereas we do need the
1979      * assignment on the successful (i.e., non-abort) path.
1980      */
1981     __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"
1982                       "   .long  1f-1b-6\n"
1983                       "    jmp   2f\n"
1984                       "1:  movl  %%eax,%0\n"
1985                       "2:"
1986                       :"+r"(res)::"memory","%eax");
1987 #endif // KMP_OS_WINDOWS
1988     return res;
1989 }
1990 
1991 /*
1992   Transaction end
1993 */
1994 static __inline void _xend()
1995 {
1996 #if KMP_OS_WINDOWS
1997     __asm  {
1998         _emit 0x0f
1999         _emit 0x01
2000         _emit 0xd5
2001     }
2002 #else
2003     __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2004 #endif
2005 }
2006 
2007 /*
2008   This is a macro, the argument must be a single byte constant which
2009   can be evaluated by the inline assembler, since it is emitted as a
2010   byte into the assembly code.
2011 */
2012 #if KMP_OS_WINDOWS
2013 #define _xabort(ARG)                            \
2014     _asm _emit 0xc6                             \
2015     _asm _emit 0xf8                             \
2016     _asm _emit ARG
2017 #else
2018 #define _xabort(ARG) \
2019     __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2020 #endif
2021 
2022 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
2023 
2024 //
2025 //    Statistics is collected for testing purpose
2026 //
2027 #if KMP_DEBUG_ADAPTIVE_LOCKS
2028 
2029 // We accumulate speculative lock statistics when the lock is destroyed.
2030 // We keep locks that haven't been destroyed in the liveLocks list
2031 // so that we can grab their statistics too.
2032 static kmp_adaptive_lock_statistics_t destroyedStats;
2033 
2034 // To hold the list of live locks.
2035 static kmp_adaptive_lock_info_t liveLocks;
2036 
2037 // A lock so we can safely update the list of locks.
2038 static kmp_bootstrap_lock_t chain_lock;
2039 
2040 // Initialize the list of stats.
2041 void
2042 __kmp_init_speculative_stats()
2043 {
2044     kmp_adaptive_lock_info_t *lck = &liveLocks;
2045 
2046     memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2047     lck->stats.next = lck;
2048     lck->stats.prev = lck;
2049 
2050     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052 
2053     __kmp_init_bootstrap_lock( &chain_lock );
2054 
2055 }
2056 
2057 // Insert the lock into the circular list
2058 static void
2059 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2060 {
2061     __kmp_acquire_bootstrap_lock( &chain_lock );
2062 
2063     lck->stats.next = liveLocks.stats.next;
2064     lck->stats.prev = &liveLocks;
2065 
2066     liveLocks.stats.next = lck;
2067     lck->stats.next->stats.prev  = lck;
2068 
2069     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2070     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2071 
2072     __kmp_release_bootstrap_lock( &chain_lock );
2073 }
2074 
2075 static void
2076 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2077 {
2078     KMP_ASSERT( lck->stats.next->stats.prev == lck );
2079     KMP_ASSERT( lck->stats.prev->stats.next == lck );
2080 
2081     kmp_adaptive_lock_info_t * n = lck->stats.next;
2082     kmp_adaptive_lock_info_t * p = lck->stats.prev;
2083 
2084     n->stats.prev = p;
2085     p->stats.next = n;
2086 }
2087 
2088 static void
2089 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2090 {
2091     memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2092     __kmp_remember_lock( lck );
2093 }
2094 
2095 static void
2096 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2097 {
2098     kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2099 
2100     t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2101     t->successfulSpeculations += s->successfulSpeculations;
2102     t->hardFailedSpeculations += s->hardFailedSpeculations;
2103     t->softFailedSpeculations += s->softFailedSpeculations;
2104     t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2105     t->lemmingYields          += s->lemmingYields;
2106 }
2107 
2108 static void
2109 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2110 {
2111     kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2112 
2113     __kmp_acquire_bootstrap_lock( &chain_lock );
2114 
2115     __kmp_add_stats( &destroyedStats, lck );
2116     __kmp_forget_lock( lck );
2117 
2118     __kmp_release_bootstrap_lock( &chain_lock );
2119 }
2120 
2121 static float
2122 percent (kmp_uint32 count, kmp_uint32 total)
2123 {
2124     return (total == 0) ? 0.0: (100.0 * count)/total;
2125 }
2126 
2127 static
2128 FILE * __kmp_open_stats_file()
2129 {
2130     if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2131         return stdout;
2132 
2133     size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2134     char buffer[buffLen];
2135     KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2136       (kmp_int32)getpid());
2137     FILE * result = fopen(&buffer[0], "w");
2138 
2139     // Maybe we should issue a warning here...
2140     return result ? result : stdout;
2141 }
2142 
2143 void
2144 __kmp_print_speculative_stats()
2145 {
2146     if (__kmp_user_lock_kind != lk_adaptive)
2147         return;
2148 
2149     FILE * statsFile = __kmp_open_stats_file();
2150 
2151     kmp_adaptive_lock_statistics_t total = destroyedStats;
2152     kmp_adaptive_lock_info_t *lck;
2153 
2154     for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2155         __kmp_add_stats( &total, lck );
2156     }
2157     kmp_adaptive_lock_statistics_t *t = &total;
2158     kmp_uint32 totalSections     = t->nonSpeculativeAcquires + t->successfulSpeculations;
2159     kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2160                                    t->softFailedSpeculations;
2161 
2162     fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2163     fprintf ( statsFile, " Lock parameters: \n"
2164              "   max_soft_retries               : %10d\n"
2165              "   max_badness                    : %10d\n",
2166              __kmp_adaptive_backoff_params.max_soft_retries,
2167              __kmp_adaptive_backoff_params.max_badness);
2168     fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2169     fprintf( statsFile, " Total critical sections          : %10d\n", totalSections );
2170     fprintf( statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
2171              t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2172     fprintf( statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
2173              t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2174     fprintf( statsFile, " Lemming yields                   : %10d\n\n", t->lemmingYields );
2175 
2176     fprintf( statsFile, " Speculative acquire attempts     : %10d\n", totalSpeculations );
2177     fprintf( statsFile, " Successes                        : %10d (%5.1f%%)\n",
2178              t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2179     fprintf( statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
2180              t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2181     fprintf( statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
2182              t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2183 
2184     if (statsFile != stdout)
2185         fclose( statsFile );
2186 }
2187 
2188 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2189 #else
2190 # define KMP_INC_STAT(lck,stat)
2191 
2192 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2193 
2194 static inline bool
2195 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2196 {
2197     // It is enough to check that the head_id is zero.
2198     // We don't also need to check the tail.
2199     bool res = lck->lk.head_id == 0;
2200 
2201     // We need a fence here, since we must ensure that no memory operations
2202     // from later in this thread float above that read.
2203 #if KMP_COMPILER_ICC
2204     _mm_mfence();
2205 #else
2206     __sync_synchronize();
2207 #endif
2208 
2209     return res;
2210 }
2211 
2212 // Functions for manipulating the badness
2213 static __inline void
2214 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2215 {
2216     // Reset the badness to zero so we eagerly try to speculate again
2217     lck->lk.adaptive.badness = 0;
2218     KMP_INC_STAT(lck,successfulSpeculations);
2219 }
2220 
2221 // Create a bit mask with one more set bit.
2222 static __inline void
2223 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2224 {
2225     kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2226     if ( newBadness > lck->lk.adaptive.max_badness) {
2227         return;
2228     } else {
2229         lck->lk.adaptive.badness = newBadness;
2230     }
2231 }
2232 
2233 // Check whether speculation should be attempted.
2234 static __inline int
2235 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2236 {
2237     kmp_uint32 badness = lck->lk.adaptive.badness;
2238     kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2239     int res = (attempts & badness) == 0;
2240     return res;
2241 }
2242 
2243 // Attempt to acquire only the speculative lock.
2244 // Does not back off to the non-speculative lock.
2245 //
2246 static int
2247 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2248 {
2249     int retries = lck->lk.adaptive.max_soft_retries;
2250 
2251     // We don't explicitly count the start of speculation, rather we record
2252     // the results (success, hard fail, soft fail). The sum of all of those
2253     // is the total number of times we started speculation since all
2254     // speculations must end one of those ways.
2255     do
2256     {
2257         kmp_uint32 status = _xbegin();
2258         // Switch this in to disable actual speculation but exercise
2259         // at least some of the rest of the code. Useful for debugging...
2260         // kmp_uint32 status = _XABORT_NESTED;
2261 
2262         if (status == _XBEGIN_STARTED )
2263         { /* We have successfully started speculation
2264            * Check that no-one acquired the lock for real between when we last looked
2265            * and now. This also gets the lock cache line into our read-set,
2266            * which we need so that we'll abort if anyone later claims it for real.
2267            */
2268             if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2269             {
2270                 // Lock is now visibly acquired, so someone beat us to it.
2271                 // Abort the transaction so we'll restart from _xbegin with the
2272                 // failure status.
2273                 _xabort(0x01);
2274                 KMP_ASSERT2( 0, "should not get here" );
2275             }
2276             return 1;   // Lock has been acquired (speculatively)
2277         } else {
2278             // We have aborted, update the statistics
2279             if ( status & SOFT_ABORT_MASK)
2280             {
2281                 KMP_INC_STAT(lck,softFailedSpeculations);
2282                 // and loop round to retry.
2283             }
2284             else
2285             {
2286                 KMP_INC_STAT(lck,hardFailedSpeculations);
2287                 // Give up if we had a hard failure.
2288                 break;
2289             }
2290         }
2291     }  while( retries-- ); // Loop while we have retries, and didn't fail hard.
2292 
2293     // Either we had a hard failure or we didn't succeed softly after
2294     // the full set of attempts, so back off the badness.
2295     __kmp_step_badness( lck );
2296     return 0;
2297 }
2298 
2299 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2300 // if the speculative lock cannot be acquired.
2301 // We can succeed speculatively, non-speculatively, or fail.
2302 static int
2303 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2304 {
2305     // First try to acquire the lock speculatively
2306     if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2307         return 1;
2308 
2309     // Speculative acquisition failed, so try to acquire it non-speculatively.
2310     // Count the non-speculative acquire attempt
2311     lck->lk.adaptive.acquire_attempts++;
2312 
2313     // Use base, non-speculative lock.
2314     if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2315     {
2316         KMP_INC_STAT(lck,nonSpeculativeAcquires);
2317         return 1;       // Lock is acquired (non-speculatively)
2318     }
2319     else
2320     {
2321         return 0;       // Failed to acquire the lock, it's already visibly locked.
2322     }
2323 }
2324 
2325 static int
2326 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2327 {
2328     char const * const func = "omp_test_lock";
2329     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2330         KMP_FATAL( LockIsUninitialized, func );
2331     }
2332 
2333     int retval = __kmp_test_adaptive_lock( lck, gtid );
2334 
2335     if ( retval ) {
2336         lck->lk.qlk.owner_id = gtid + 1;
2337     }
2338     return retval;
2339 }
2340 
2341 // Block until we can acquire a speculative, adaptive lock.
2342 // We check whether we should be trying to speculate.
2343 // If we should be, we check the real lock to see if it is free,
2344 // and, if not, pause without attempting to acquire it until it is.
2345 // Then we try the speculative acquire.
2346 // This means that although we suffer from lemmings a little (
2347 // because all we can't acquire the lock speculatively until
2348 // the queue of threads waiting has cleared), we don't get into a
2349 // state where we can never acquire the lock speculatively (because we
2350 // force the queue to clear by preventing new arrivals from entering the
2351 // queue).
2352 // This does mean that when we're trying to break lemmings, the lock
2353 // is no longer fair. However OpenMP makes no guarantee that its
2354 // locks are fair, so this isn't a real problem.
2355 static void
2356 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2357 {
2358     if ( __kmp_should_speculate( lck, gtid ) )
2359     {
2360         if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2361         {
2362             if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2363                 return;
2364             // We tried speculation and failed, so give up.
2365         }
2366         else
2367         {
2368             // We can't try speculation until the lock is free, so we
2369             // pause here (without suspending on the queueing lock,
2370             // to allow it to drain, then try again.
2371             // All other threads will also see the same result for
2372             // shouldSpeculate, so will be doing the same if they
2373             // try to claim the lock from now on.
2374             while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2375             {
2376                 KMP_INC_STAT(lck,lemmingYields);
2377                 __kmp_yield (TRUE);
2378             }
2379 
2380             if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2381                 return;
2382         }
2383     }
2384 
2385     // Speculative acquisition failed, so acquire it non-speculatively.
2386     // Count the non-speculative acquire attempt
2387     lck->lk.adaptive.acquire_attempts++;
2388 
2389     __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2390     // We have acquired the base lock, so count that.
2391     KMP_INC_STAT(lck,nonSpeculativeAcquires );
2392     ANNOTATE_QUEUING_ACQUIRED(lck);
2393 }
2394 
2395 static void
2396 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2397 {
2398     char const * const func = "omp_set_lock";
2399     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2400         KMP_FATAL( LockIsUninitialized, func );
2401     }
2402     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2403         KMP_FATAL( LockIsAlreadyOwned, func );
2404     }
2405 
2406     __kmp_acquire_adaptive_lock( lck, gtid );
2407 
2408     lck->lk.qlk.owner_id = gtid + 1;
2409 }
2410 
2411 static int
2412 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2413 {
2414     if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2415     {   // If the lock doesn't look claimed we must be speculating.
2416         // (Or the user's code is buggy and they're releasing without locking;
2417         // if we had XTEST we'd be able to check that case...)
2418         _xend();        // Exit speculation
2419         __kmp_update_badness_after_success( lck );
2420     }
2421     else
2422     {   // Since the lock *is* visibly locked we're not speculating,
2423         // so should use the underlying lock's release scheme.
2424         __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2425     }
2426     return KMP_LOCK_RELEASED;
2427 }
2428 
2429 static int
2430 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2431 {
2432     char const * const func = "omp_unset_lock";
2433     KMP_MB();  /* in case another processor initialized lock */
2434     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2435         KMP_FATAL( LockIsUninitialized, func );
2436     }
2437     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2438         KMP_FATAL( LockUnsettingFree, func );
2439     }
2440     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2441         KMP_FATAL( LockUnsettingSetByAnother, func );
2442     }
2443     lck->lk.qlk.owner_id = 0;
2444     __kmp_release_adaptive_lock( lck, gtid );
2445     return KMP_LOCK_RELEASED;
2446 }
2447 
2448 static void
2449 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2450 {
2451     __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2452     lck->lk.adaptive.badness = 0;
2453     lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2454     lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2455     lck->lk.adaptive.max_badness      = __kmp_adaptive_backoff_params.max_badness;
2456 #if KMP_DEBUG_ADAPTIVE_LOCKS
2457     __kmp_zero_speculative_stats( &lck->lk.adaptive );
2458 #endif
2459     KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2460 }
2461 
2462 static void
2463 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2464 {
2465     __kmp_init_adaptive_lock( lck );
2466 }
2467 
2468 static void
2469 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2470 {
2471 #if KMP_DEBUG_ADAPTIVE_LOCKS
2472     __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2473 #endif
2474     __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2475     // Nothing needed for the speculative part.
2476 }
2477 
2478 static void
2479 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2480 {
2481     char const * const func = "omp_destroy_lock";
2482     if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2483         KMP_FATAL( LockIsUninitialized, func );
2484     }
2485     if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2486         KMP_FATAL( LockStillOwned, func );
2487     }
2488     __kmp_destroy_adaptive_lock( lck );
2489 }
2490 
2491 
2492 #endif // KMP_USE_ADAPTIVE_LOCKS
2493 
2494 
2495 /* ------------------------------------------------------------------------ */
2496 /* DRDPA ticket locks                                                */
2497 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2498 
2499 static kmp_int32
2500 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2501 {
2502     return TCR_4( lck->lk.owner_id ) - 1;
2503 }
2504 
2505 static inline bool
2506 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2507 {
2508     return lck->lk.depth_locked != -1;
2509 }
2510 
2511 __forceinline static int
2512 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2513 {
2514     kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2515     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2516     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2517       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2518       TCR_PTR(lck->lk.polls);                           // volatile load
2519 
2520 #ifdef USE_LOCK_PROFILE
2521     if (TCR_8(polls[ticket & mask].poll) != ticket)
2522         __kmp_printf("LOCK CONTENTION: %p\n", lck);
2523     /* else __kmp_printf( "." );*/
2524 #endif /* USE_LOCK_PROFILE */
2525 
2526     //
2527     // Now spin-wait, but reload the polls pointer and mask, in case the
2528     // polling area has been reconfigured.  Unless it is reconfigured, the
2529     // reloads stay in L1 cache and are cheap.
2530     //
2531     // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
2532     //
2533     // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2534     // and poll to be re-read every spin iteration.
2535     //
2536     kmp_uint32 spins;
2537 
2538     KMP_FSYNC_PREPARE(lck);
2539     KMP_INIT_YIELD(spins);
2540     while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
2541         // If we are oversubscribed,
2542         // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2543         // CPU Pause is in the macros for yield.
2544         //
2545         KMP_YIELD(TCR_4(__kmp_nth)
2546           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2547         KMP_YIELD_SPIN(spins);
2548 
2549         // Re-read the mask and the poll pointer from the lock structure.
2550         //
2551         // Make certain that "mask" is read before "polls" !!!
2552         //
2553         // If another thread picks reconfigures the polling area and updates
2554         // their values, and we get the new value of mask and the old polls
2555         // pointer, we could access memory beyond the end of the old polling
2556         // area.
2557         //
2558         mask = TCR_8(lck->lk.mask);                     // volatile load
2559         polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2560           TCR_PTR(lck->lk.polls);                       // volatile load
2561     }
2562 
2563     //
2564     // Critical section starts here
2565     //
2566     KMP_FSYNC_ACQUIRED(lck);
2567     KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2568       ticket, lck));
2569     lck->lk.now_serving = ticket;                       // non-volatile store
2570 
2571     //
2572     // Deallocate a garbage polling area if we know that we are the last
2573     // thread that could possibly access it.
2574     //
2575     // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2576     // ticket.
2577     //
2578     if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2579         __kmp_free((void *)lck->lk.old_polls);
2580         lck->lk.old_polls = NULL;
2581         lck->lk.cleanup_ticket = 0;
2582     }
2583 
2584     //
2585     // Check to see if we should reconfigure the polling area.
2586     // If there is still a garbage polling area to be deallocated from a
2587     // previous reconfiguration, let a later thread reconfigure it.
2588     //
2589     if (lck->lk.old_polls == NULL) {
2590         bool reconfigure = false;
2591         volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2592         kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2593 
2594         if (TCR_4(__kmp_nth)
2595           > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2596             //
2597             // We are in oversubscription mode.  Contract the polling area
2598             // down to a single location, if that hasn't been done already.
2599             //
2600             if (num_polls > 1) {
2601                 reconfigure = true;
2602                 num_polls = TCR_4(lck->lk.num_polls);
2603                 mask = 0;
2604                 num_polls = 1;
2605                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2606                   __kmp_allocate(num_polls * sizeof(*polls));
2607                 polls[0].poll = ticket;
2608             }
2609         }
2610         else {
2611             //
2612             // We are in under/fully subscribed mode.  Check the number of
2613             // threads waiting on the lock.  The size of the polling area
2614             // should be at least the number of threads waiting.
2615             //
2616             kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2617             if (num_waiting > num_polls) {
2618                 kmp_uint32 old_num_polls = num_polls;
2619                 reconfigure = true;
2620                 do {
2621                     mask = (mask << 1) | 1;
2622                     num_polls *= 2;
2623                 } while (num_polls <= num_waiting);
2624 
2625                 //
2626                 // Allocate the new polling area, and copy the relevant portion
2627                 // of the old polling area to the new area.  __kmp_allocate()
2628                 // zeroes the memory it allocates, and most of the old area is
2629                 // just zero padding, so we only copy the release counters.
2630                 //
2631                 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2632                   __kmp_allocate(num_polls * sizeof(*polls));
2633                 kmp_uint32 i;
2634                 for (i = 0; i < old_num_polls; i++) {
2635                     polls[i].poll = old_polls[i].poll;
2636                 }
2637             }
2638         }
2639 
2640         if (reconfigure) {
2641             //
2642             // Now write the updated fields back to the lock structure.
2643             //
2644             // Make certain that "polls" is written before "mask" !!!
2645             //
2646             // If another thread picks up the new value of mask and the old
2647             // polls pointer , it could access memory beyond the end of the
2648             // old polling area.
2649             //
2650             // On x86, we need memory fences.
2651             //
2652             KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2653               ticket, lck, num_polls));
2654 
2655             lck->lk.old_polls = old_polls;              // non-volatile store
2656             lck->lk.polls = polls;                      // volatile store
2657 
2658             KMP_MB();
2659 
2660             lck->lk.num_polls = num_polls;              // non-volatile store
2661             lck->lk.mask = mask;                        // volatile store
2662 
2663             KMP_MB();
2664 
2665             //
2666             // Only after the new polling area and mask have been flushed
2667             // to main memory can we update the cleanup ticket field.
2668             //
2669             // volatile load / non-volatile store
2670             //
2671             lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2672         }
2673     }
2674     return KMP_LOCK_ACQUIRED_FIRST;
2675 }
2676 
2677 int
2678 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679 {
2680     int retval = __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2681     ANNOTATE_DRDPA_ACQUIRED(lck);
2682     return retval;
2683 }
2684 
2685 static int
2686 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2687 {
2688     char const * const func = "omp_set_lock";
2689     if ( lck->lk.initialized != lck ) {
2690         KMP_FATAL( LockIsUninitialized, func );
2691     }
2692     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2693         KMP_FATAL( LockNestableUsedAsSimple, func );
2694     }
2695     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2696         KMP_FATAL( LockIsAlreadyOwned, func );
2697     }
2698 
2699     __kmp_acquire_drdpa_lock( lck, gtid );
2700 
2701     lck->lk.owner_id = gtid + 1;
2702     return KMP_LOCK_ACQUIRED_FIRST;
2703 }
2704 
2705 int
2706 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2707 {
2708     //
2709     // First get a ticket, then read the polls pointer and the mask.
2710     // The polls pointer must be read before the mask!!! (See above)
2711     //
2712     kmp_uint64 ticket = TCR_8(lck->lk.next_ticket);     // volatile load
2713     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2714       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2715       TCR_PTR(lck->lk.polls);                           // volatile load
2716     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2717     if (TCR_8(polls[ticket & mask].poll) == ticket) {
2718         kmp_uint64 next_ticket = ticket + 1;
2719         if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2720           ticket, next_ticket)) {
2721             KMP_FSYNC_ACQUIRED(lck);
2722             KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2723                ticket, lck));
2724             lck->lk.now_serving = ticket;               // non-volatile store
2725 
2726             //
2727             // Since no threads are waiting, there is no possibility that
2728             // we would want to reconfigure the polling area.  We might
2729             // have the cleanup ticket value (which says that it is now
2730             // safe to deallocate old_polls), but we'll let a later thread
2731             // which calls __kmp_acquire_lock do that - this routine
2732             // isn't supposed to block, and we would risk blocks if we
2733             // called __kmp_free() to do the deallocation.
2734             //
2735             return TRUE;
2736         }
2737     }
2738     return FALSE;
2739 }
2740 
2741 static int
2742 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2743 {
2744     char const * const func = "omp_test_lock";
2745     if ( lck->lk.initialized != lck ) {
2746         KMP_FATAL( LockIsUninitialized, func );
2747     }
2748     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2749         KMP_FATAL( LockNestableUsedAsSimple, func );
2750     }
2751 
2752     int retval = __kmp_test_drdpa_lock( lck, gtid );
2753 
2754     if ( retval ) {
2755         lck->lk.owner_id = gtid + 1;
2756     }
2757     return retval;
2758 }
2759 
2760 int
2761 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2762 {
2763     //
2764     // Read the ticket value from the lock data struct, then the polls
2765     // pointer and the mask.  The polls pointer must be read before the
2766     // mask!!! (See above)
2767     //
2768     kmp_uint64 ticket = lck->lk.now_serving + 1;        // non-volatile load
2769     volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2770       = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2771       TCR_PTR(lck->lk.polls);                           // volatile load
2772     kmp_uint64 mask = TCR_8(lck->lk.mask);              // volatile load
2773     KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2774        ticket - 1, lck));
2775     KMP_FSYNC_RELEASING(lck);
2776     ANNOTATE_DRDPA_RELEASED(lck);
2777     KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2778     return KMP_LOCK_RELEASED;
2779 }
2780 
2781 static int
2782 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2783 {
2784     char const * const func = "omp_unset_lock";
2785     KMP_MB();  /* in case another processor initialized lock */
2786     if ( lck->lk.initialized != lck ) {
2787         KMP_FATAL( LockIsUninitialized, func );
2788     }
2789     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2790         KMP_FATAL( LockNestableUsedAsSimple, func );
2791     }
2792     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2793         KMP_FATAL( LockUnsettingFree, func );
2794     }
2795     if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2796       && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2797         KMP_FATAL( LockUnsettingSetByAnother, func );
2798     }
2799     lck->lk.owner_id = 0;
2800     return __kmp_release_drdpa_lock( lck, gtid );
2801 }
2802 
2803 void
2804 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2805 {
2806     lck->lk.location = NULL;
2807     lck->lk.mask = 0;
2808     lck->lk.num_polls = 1;
2809     lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2810       __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2811     lck->lk.cleanup_ticket = 0;
2812     lck->lk.old_polls = NULL;
2813     lck->lk.next_ticket = 0;
2814     lck->lk.now_serving = 0;
2815     lck->lk.owner_id = 0;      // no thread owns the lock.
2816     lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2817     lck->lk.initialized = lck;
2818 
2819     KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2820 }
2821 
2822 static void
2823 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2824 {
2825     __kmp_init_drdpa_lock( lck );
2826 }
2827 
2828 void
2829 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2830 {
2831     lck->lk.initialized = NULL;
2832     lck->lk.location    = NULL;
2833     if (lck->lk.polls != NULL) {
2834         __kmp_free((void *)lck->lk.polls);
2835         lck->lk.polls = NULL;
2836     }
2837     if (lck->lk.old_polls != NULL) {
2838         __kmp_free((void *)lck->lk.old_polls);
2839         lck->lk.old_polls = NULL;
2840     }
2841     lck->lk.mask = 0;
2842     lck->lk.num_polls = 0;
2843     lck->lk.cleanup_ticket = 0;
2844     lck->lk.next_ticket = 0;
2845     lck->lk.now_serving = 0;
2846     lck->lk.owner_id = 0;
2847     lck->lk.depth_locked = -1;
2848 }
2849 
2850 static void
2851 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2852 {
2853     char const * const func = "omp_destroy_lock";
2854     if ( lck->lk.initialized != lck ) {
2855         KMP_FATAL( LockIsUninitialized, func );
2856     }
2857     if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2858         KMP_FATAL( LockNestableUsedAsSimple, func );
2859     }
2860     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2861         KMP_FATAL( LockStillOwned, func );
2862     }
2863     __kmp_destroy_drdpa_lock( lck );
2864 }
2865 
2866 
2867 //
2868 // nested drdpa ticket locks
2869 //
2870 
2871 int
2872 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2873 {
2874     KMP_DEBUG_ASSERT( gtid >= 0 );
2875 
2876     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2877         lck->lk.depth_locked += 1;
2878         return KMP_LOCK_ACQUIRED_NEXT;
2879     }
2880     else {
2881         __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2882         ANNOTATE_DRDPA_ACQUIRED(lck);
2883         KMP_MB();
2884         lck->lk.depth_locked = 1;
2885         KMP_MB();
2886         lck->lk.owner_id = gtid + 1;
2887         return KMP_LOCK_ACQUIRED_FIRST;
2888     }
2889 }
2890 
2891 static void
2892 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2893 {
2894     char const * const func = "omp_set_nest_lock";
2895     if ( lck->lk.initialized != lck ) {
2896         KMP_FATAL( LockIsUninitialized, func );
2897     }
2898     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2899         KMP_FATAL( LockSimpleUsedAsNestable, func );
2900     }
2901     __kmp_acquire_nested_drdpa_lock( lck, gtid );
2902 }
2903 
2904 int
2905 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2906 {
2907     int retval;
2908 
2909     KMP_DEBUG_ASSERT( gtid >= 0 );
2910 
2911     if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2912         retval = ++lck->lk.depth_locked;
2913     }
2914     else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2915         retval = 0;
2916     }
2917     else {
2918         KMP_MB();
2919         retval = lck->lk.depth_locked = 1;
2920         KMP_MB();
2921         lck->lk.owner_id = gtid + 1;
2922     }
2923     return retval;
2924 }
2925 
2926 static int
2927 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2928 {
2929     char const * const func = "omp_test_nest_lock";
2930     if ( lck->lk.initialized != lck ) {
2931         KMP_FATAL( LockIsUninitialized, func );
2932     }
2933     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2934         KMP_FATAL( LockSimpleUsedAsNestable, func );
2935     }
2936     return __kmp_test_nested_drdpa_lock( lck, gtid );
2937 }
2938 
2939 int
2940 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2941 {
2942     KMP_DEBUG_ASSERT( gtid >= 0 );
2943 
2944     KMP_MB();
2945     if ( --(lck->lk.depth_locked) == 0 ) {
2946         KMP_MB();
2947         lck->lk.owner_id = 0;
2948         __kmp_release_drdpa_lock( lck, gtid );
2949         return KMP_LOCK_RELEASED;
2950     }
2951     return KMP_LOCK_STILL_HELD;
2952 }
2953 
2954 static int
2955 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2956 {
2957     char const * const func = "omp_unset_nest_lock";
2958     KMP_MB();  /* in case another processor initialized lock */
2959     if ( lck->lk.initialized != lck ) {
2960         KMP_FATAL( LockIsUninitialized, func );
2961     }
2962     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2963         KMP_FATAL( LockSimpleUsedAsNestable, func );
2964     }
2965     if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2966         KMP_FATAL( LockUnsettingFree, func );
2967     }
2968     if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2969         KMP_FATAL( LockUnsettingSetByAnother, func );
2970     }
2971     return __kmp_release_nested_drdpa_lock( lck, gtid );
2972 }
2973 
2974 void
2975 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2976 {
2977     __kmp_init_drdpa_lock( lck );
2978     lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2979 }
2980 
2981 static void
2982 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2983 {
2984     __kmp_init_nested_drdpa_lock( lck );
2985 }
2986 
2987 void
2988 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2989 {
2990     __kmp_destroy_drdpa_lock( lck );
2991     lck->lk.depth_locked = 0;
2992 }
2993 
2994 static void
2995 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2996 {
2997     char const * const func = "omp_destroy_nest_lock";
2998     if ( lck->lk.initialized != lck ) {
2999         KMP_FATAL( LockIsUninitialized, func );
3000     }
3001     if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3002         KMP_FATAL( LockSimpleUsedAsNestable, func );
3003     }
3004     if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3005         KMP_FATAL( LockStillOwned, func );
3006     }
3007     __kmp_destroy_nested_drdpa_lock( lck );
3008 }
3009 
3010 
3011 //
3012 // access functions to fields which don't exist for all lock kinds.
3013 //
3014 
3015 static int
3016 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3017 {
3018     return lck == lck->lk.initialized;
3019 }
3020 
3021 static const ident_t *
3022 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3023 {
3024     return lck->lk.location;
3025 }
3026 
3027 static void
3028 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3029 {
3030     lck->lk.location = loc;
3031 }
3032 
3033 static kmp_lock_flags_t
3034 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3035 {
3036     return lck->lk.flags;
3037 }
3038 
3039 static void
3040 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3041 {
3042     lck->lk.flags = flags;
3043 }
3044 
3045 // Time stamp counter
3046 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3047 # define __kmp_tsc() __kmp_hardware_timestamp()
3048 // Runtime's default backoff parameters
3049 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3050 #else
3051 // Use nanoseconds for other platforms
3052 extern kmp_uint64 __kmp_now_nsec();
3053 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3054 # define __kmp_tsc() __kmp_now_nsec()
3055 #endif
3056 
3057 // A useful predicate for dealing with timestamps that may wrap.
3058 // Is a before b?
3059 // Since the timestamps may wrap, this is asking whether it's
3060 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3061 // Times where going clockwise is less distance than going anti-clockwise
3062 // are in the future, others are in the past.
3063 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3064 //       whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3065 static inline bool before(kmp_uint64 a, kmp_uint64 b)
3066 {
3067     return ((kmp_int64)b - (kmp_int64)a) > 0;
3068 }
3069 
3070 // Truncated binary exponential backoff function
3071 void
3072 __kmp_spin_backoff(kmp_backoff_t *boff)
3073 {
3074     // We could flatten this loop, but making it a nested loop gives better result.
3075     kmp_uint32 i;
3076     for (i = boff->step; i > 0; i--) {
3077         kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3078         do {
3079             KMP_CPU_PAUSE();
3080         } while (before(__kmp_tsc(), goal));
3081     }
3082     boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3083 }
3084 
3085 #if KMP_USE_DYNAMIC_LOCK
3086 
3087 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3088 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3089 {
3090     TCW_4(*lck, KMP_GET_D_TAG(seq));
3091     KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3092 }
3093 
3094 #if KMP_USE_TSX
3095 
3096 // HLE lock functions - imported from the testbed runtime.
3097 #define HLE_ACQUIRE ".byte 0xf2;"
3098 #define HLE_RELEASE ".byte 0xf3;"
3099 
3100 static inline kmp_uint32
3101 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3102 {
3103     __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3104                     : "+r"(v), "+m"(*p)
3105                     :
3106                     : "memory");
3107     return v;
3108 }
3109 
3110 static void
3111 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3112 {
3113     TCW_4(*lck, 0);
3114 }
3115 
3116 static void
3117 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3118 {
3119     // Use gtid for KMP_LOCK_BUSY if necessary
3120     if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3121         int delay = 1;
3122         do {
3123             while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3124                 for (int i = delay; i != 0; --i)
3125                     KMP_CPU_PAUSE();
3126                 delay = ((delay << 1) | 1) & 7;
3127             }
3128         } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3129     }
3130 }
3131 
3132 static void
3133 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3134 {
3135     __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3136 }
3137 
3138 static int
3139 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3140 {
3141     __asm__ volatile(HLE_RELEASE "movl %1,%0"
3142                     : "=m"(*lck)
3143                     : "r"(KMP_LOCK_FREE(hle))
3144                     : "memory");
3145     return KMP_LOCK_RELEASED;
3146 }
3147 
3148 static int
3149 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3150 {
3151     return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3152 }
3153 
3154 static int
3155 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3156 {
3157     return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3158 }
3159 
3160 static int
3161 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3162 {
3163     return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3164 }
3165 
3166 static void
3167 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3168 {
3169     __kmp_init_queuing_lock(lck);
3170 }
3171 
3172 static void
3173 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3174 {
3175     __kmp_destroy_queuing_lock(lck);
3176 }
3177 
3178 static void
3179 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3180 {
3181     unsigned retries=3, status;
3182     do {
3183         status = _xbegin();
3184         if (status == _XBEGIN_STARTED) {
3185             if (__kmp_is_unlocked_queuing_lock(lck))
3186                 return;
3187             _xabort(0xff);
3188         }
3189         if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3190             // Wait until lock becomes free
3191             while (! __kmp_is_unlocked_queuing_lock(lck))
3192                 __kmp_yield(TRUE);
3193         }
3194         else if (!(status & _XABORT_RETRY))
3195             break;
3196     } while (retries--);
3197 
3198     // Fall-back non-speculative lock (xchg)
3199     __kmp_acquire_queuing_lock(lck, gtid);
3200 }
3201 
3202 static void
3203 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3204 {
3205     __kmp_acquire_rtm_lock(lck, gtid);
3206 }
3207 
3208 static int
3209 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3210 {
3211     if (__kmp_is_unlocked_queuing_lock(lck)) {
3212         // Releasing from speculation
3213         _xend();
3214     }
3215     else {
3216         // Releasing from a real lock
3217         __kmp_release_queuing_lock(lck, gtid);
3218     }
3219     return KMP_LOCK_RELEASED;
3220 }
3221 
3222 static int
3223 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3224 {
3225     return __kmp_release_rtm_lock(lck, gtid);
3226 }
3227 
3228 static int
3229 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3230 {
3231     unsigned retries=3, status;
3232     do {
3233         status = _xbegin();
3234         if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3235             return 1;
3236         }
3237         if (!(status & _XABORT_RETRY))
3238             break;
3239     } while (retries--);
3240 
3241     return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3242 }
3243 
3244 static int
3245 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3246 {
3247     return __kmp_test_rtm_lock(lck, gtid);
3248 }
3249 
3250 #endif // KMP_USE_TSX
3251 
3252 // Entry functions for indirect locks (first element of direct lock jump tables).
3253 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3254 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3255 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3256 static int  __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3257 static int  __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3258 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3259 static int  __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3260 static int  __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3261 
3262 //
3263 // Jump tables for the indirect lock functions.
3264 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3265 //
3266 
3267 // init functions
3268 #define expand(l, op) 0,__kmp_init_direct_lock,
3269 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3270     = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3271 #undef expand
3272 
3273 // destroy functions
3274 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3275 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3276     = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3277 #undef expand
3278 
3279 // set/acquire functions
3280 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3281 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3282     = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3283 #undef expand
3284 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3285 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3286     = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3287 #undef expand
3288 
3289 // unset/release and test functions
3290 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3291 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3292     = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3293 static int  (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3294     = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3295 #undef expand
3296 #define expand(l, op) 0,(int  (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3297 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3298     = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3299 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3300     = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3301 #undef expand
3302 
3303 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3304 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3305 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3306 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3307 
3308 //
3309 // Jump tables for the indirect lock functions.
3310 //
3311 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3312 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3313 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3314 #undef expand
3315 
3316 // set/acquire functions
3317 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3318 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3319 #undef expand
3320 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3321 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3322 #undef expand
3323 
3324 // unset/release and test functions
3325 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3326 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3327 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3328 #undef expand
3329 #define expand(l, op) (int  (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3330 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3331 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3332 #undef expand
3333 
3334 // Exposes only one jump tables (*lock or *lock_with_checks).
3335 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3336 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3337 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3338 
3339 // Lock index table.
3340 kmp_indirect_lock_table_t __kmp_i_lock_table;
3341 
3342 // Size of indirect locks.
3343 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3344 
3345 // Jump tables for lock accessor/modifier.
3346 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3347 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3348 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3349 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3350 
3351 // Use different lock pools for different lock types.
3352 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3353 
3354 // User lock allocator for dynamically dispatched indirect locks.
3355 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3356 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3357 // object is returned to the reusable pool of locks, unique to each lock type.
3358 kmp_indirect_lock_t *
3359 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3360 {
3361     kmp_indirect_lock_t *lck;
3362     kmp_lock_index_t idx;
3363 
3364     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3365 
3366     if (__kmp_indirect_lock_pool[tag] != NULL) {
3367         // Reuse the allocated and destroyed lock object
3368         lck = __kmp_indirect_lock_pool[tag];
3369         if (OMP_LOCK_T_SIZE < sizeof(void *))
3370             idx = lck->lock->pool.index;
3371         __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3372         KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3373     } else {
3374         idx = __kmp_i_lock_table.next;
3375         // Check capacity and double the size if it is full
3376         if (idx == __kmp_i_lock_table.size) {
3377             // Double up the space for block pointers
3378             int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3379             kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3380             __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3381             KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3382             __kmp_free(old_table);
3383             // Allocate new objects in the new blocks
3384             for (int i = row; i < 2*row; ++i)
3385                 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3386                                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3387             __kmp_i_lock_table.size = 2*idx;
3388         }
3389         __kmp_i_lock_table.next++;
3390         lck = KMP_GET_I_LOCK(idx);
3391         // Allocate a new base lock object
3392         lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3393         KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3394     }
3395 
3396     __kmp_release_lock(&__kmp_global_lock, gtid);
3397 
3398     lck->type = tag;
3399 
3400     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3401         *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3402     } else {
3403         *((kmp_indirect_lock_t **)user_lock) = lck;
3404     }
3405 
3406     return lck;
3407 }
3408 
3409 // User lock lookup for dynamically dispatched locks.
3410 static __forceinline
3411 kmp_indirect_lock_t *
3412 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3413 {
3414     if (__kmp_env_consistency_check) {
3415         kmp_indirect_lock_t *lck = NULL;
3416         if (user_lock == NULL) {
3417             KMP_FATAL(LockIsUninitialized, func);
3418         }
3419         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3420             kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3421             if (idx >= __kmp_i_lock_table.size) {
3422                 KMP_FATAL(LockIsUninitialized, func);
3423             }
3424             lck = KMP_GET_I_LOCK(idx);
3425         } else {
3426             lck = *((kmp_indirect_lock_t **)user_lock);
3427         }
3428         if (lck == NULL) {
3429             KMP_FATAL(LockIsUninitialized, func);
3430         }
3431         return lck;
3432     } else {
3433         if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3434             return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3435         } else {
3436             return *((kmp_indirect_lock_t **)user_lock);
3437         }
3438     }
3439 }
3440 
3441 static void
3442 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3443 {
3444 #if KMP_USE_ADAPTIVE_LOCKS
3445     if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3446         KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3447         seq = lockseq_queuing;
3448     }
3449 #endif
3450 #if KMP_USE_TSX
3451     if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3452         seq = lockseq_queuing;
3453     }
3454 #endif
3455     kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3456     kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3457     KMP_I_LOCK_FUNC(l, init)(l->lock);
3458     KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3459 }
3460 
3461 static void
3462 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3463 {
3464     kmp_uint32 gtid = __kmp_entry_gtid();
3465     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3466     KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3467     kmp_indirect_locktag_t tag = l->type;
3468 
3469     __kmp_acquire_lock(&__kmp_global_lock, gtid);
3470 
3471     // Use the base lock's space to keep the pool chain.
3472     l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3473     if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3474         l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3475     }
3476     __kmp_indirect_lock_pool[tag] = l;
3477 
3478     __kmp_release_lock(&__kmp_global_lock, gtid);
3479 }
3480 
3481 static void
3482 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3483 {
3484     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3485     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3486 }
3487 
3488 static int
3489 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3490 {
3491     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3492     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3493 }
3494 
3495 static int
3496 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3497 {
3498     kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3499     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3500 }
3501 
3502 static void
3503 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3504 {
3505     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3506     KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3507 }
3508 
3509 static int
3510 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3511 {
3512     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3513     return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3514 }
3515 
3516 static int
3517 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3518 {
3519     kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3520     return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3521 }
3522 
3523 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3524 
3525 // This is used only in kmp_error.cpp when consistency checking is on.
3526 kmp_int32
3527 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3528 {
3529     switch (seq) {
3530         case lockseq_tas:
3531         case lockseq_nested_tas:
3532             return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3533 #if KMP_USE_FUTEX
3534         case lockseq_futex:
3535         case lockseq_nested_futex:
3536             return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3537 #endif
3538         case lockseq_ticket:
3539         case lockseq_nested_ticket:
3540             return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3541         case lockseq_queuing:
3542         case lockseq_nested_queuing:
3543 #if KMP_USE_ADAPTIVE_LOCKS
3544         case lockseq_adaptive:
3545 #endif
3546             return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3547         case lockseq_drdpa:
3548         case lockseq_nested_drdpa:
3549             return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3550         default:
3551             return 0;
3552     }
3553 }
3554 
3555 // Initializes data for dynamic user locks.
3556 void
3557 __kmp_init_dynamic_user_locks()
3558 {
3559     // Initialize jump table for the lock functions
3560     if (__kmp_env_consistency_check) {
3561         __kmp_direct_set     = direct_set_check;
3562         __kmp_direct_unset   = direct_unset_check;
3563         __kmp_direct_test    = direct_test_check;
3564         __kmp_indirect_set   = indirect_set_check;
3565         __kmp_indirect_unset = indirect_unset_check;
3566         __kmp_indirect_test  = indirect_test_check;
3567     }
3568     else {
3569         __kmp_direct_set     = direct_set;
3570         __kmp_direct_unset   = direct_unset;
3571         __kmp_direct_test    = direct_test;
3572         __kmp_indirect_set   = indirect_set;
3573         __kmp_indirect_unset = indirect_unset;
3574         __kmp_indirect_test  = indirect_test;
3575     }
3576     // If the user locks have already been initialized, then return.
3577     // Allow the switch between different KMP_CONSISTENCY_CHECK values,
3578     // but do not allocate new lock tables if they have already been
3579     // allocated.
3580     if (__kmp_init_user_locks)
3581         return;
3582 
3583     // Initialize lock index table
3584     __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3585     __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3586     *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3587                                   __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3588     __kmp_i_lock_table.next = 0;
3589 
3590     // Indirect lock size
3591     __kmp_indirect_lock_size[locktag_ticket]         = sizeof(kmp_ticket_lock_t);
3592     __kmp_indirect_lock_size[locktag_queuing]        = sizeof(kmp_queuing_lock_t);
3593 #if KMP_USE_ADAPTIVE_LOCKS
3594     __kmp_indirect_lock_size[locktag_adaptive]       = sizeof(kmp_adaptive_lock_t);
3595 #endif
3596     __kmp_indirect_lock_size[locktag_drdpa]          = sizeof(kmp_drdpa_lock_t);
3597 #if KMP_USE_TSX
3598     __kmp_indirect_lock_size[locktag_rtm]            = sizeof(kmp_queuing_lock_t);
3599 #endif
3600     __kmp_indirect_lock_size[locktag_nested_tas]     = sizeof(kmp_tas_lock_t);
3601 #if KMP_USE_FUTEX
3602     __kmp_indirect_lock_size[locktag_nested_futex]   = sizeof(kmp_futex_lock_t);
3603 #endif
3604     __kmp_indirect_lock_size[locktag_nested_ticket]  = sizeof(kmp_ticket_lock_t);
3605     __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3606     __kmp_indirect_lock_size[locktag_nested_drdpa]   = sizeof(kmp_drdpa_lock_t);
3607 
3608     // Initialize lock accessor/modifier
3609 #define fill_jumps(table, expand, sep) {            \
3610     table[locktag##sep##ticket]  = expand(ticket);  \
3611     table[locktag##sep##queuing] = expand(queuing); \
3612     table[locktag##sep##drdpa]   = expand(drdpa);   \
3613 }
3614 
3615 #if KMP_USE_ADAPTIVE_LOCKS
3616 # define fill_table(table, expand) {           \
3617     fill_jumps(table, expand, _);              \
3618     table[locktag_adaptive] = expand(queuing); \
3619     fill_jumps(table, expand, _nested_);       \
3620 }
3621 #else
3622 # define fill_table(table, expand) {           \
3623     fill_jumps(table, expand, _);              \
3624     fill_jumps(table, expand, _nested_);       \
3625 }
3626 #endif // KMP_USE_ADAPTIVE_LOCKS
3627 
3628 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3629     fill_table(__kmp_indirect_set_location, expand);
3630 #undef expand
3631 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3632     fill_table(__kmp_indirect_set_flags, expand);
3633 #undef expand
3634 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3635     fill_table(__kmp_indirect_get_location, expand);
3636 #undef expand
3637 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3638     fill_table(__kmp_indirect_get_flags, expand);
3639 #undef expand
3640 
3641     __kmp_init_user_locks = TRUE;
3642 }
3643 
3644 // Clean up the lock table.
3645 void
3646 __kmp_cleanup_indirect_user_locks()
3647 {
3648     kmp_lock_index_t i;
3649     int k;
3650 
3651     // Clean up locks in the pools first (they were already destroyed before going into the pools).
3652     for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3653         kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3654         while (l != NULL) {
3655             kmp_indirect_lock_t *ll = l;
3656             l = (kmp_indirect_lock_t *)l->lock->pool.next;
3657             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3658             __kmp_free(ll->lock);
3659             ll->lock = NULL;
3660         }
3661         __kmp_indirect_lock_pool[k] = NULL;
3662     }
3663     // Clean up the remaining undestroyed locks.
3664     for (i = 0; i < __kmp_i_lock_table.next; i++) {
3665         kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3666         if (l->lock != NULL) {
3667             // Locks not destroyed explicitly need to be destroyed here.
3668             KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3669             KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3670             __kmp_free(l->lock);
3671         }
3672     }
3673     // Free the table
3674     for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3675         __kmp_free(__kmp_i_lock_table.table[i]);
3676     __kmp_free(__kmp_i_lock_table.table);
3677 
3678     __kmp_init_user_locks = FALSE;
3679 }
3680 
3681 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3682 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3683 
3684 #else // KMP_USE_DYNAMIC_LOCK
3685 
3686 /* ------------------------------------------------------------------------ */
3687 /* user locks
3688  *
3689  * They are implemented as a table of function pointers which are set to the
3690  * lock functions of the appropriate kind, once that has been determined.
3691  */
3692 
3693 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3694 
3695 size_t __kmp_base_user_lock_size = 0;
3696 size_t __kmp_user_lock_size = 0;
3697 
3698 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3699 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3700 
3701 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3702 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3703 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3704 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3705 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3706 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3707 
3708 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3709 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3710 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3711 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3712 
3713 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3714 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3715 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3716 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3717 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3718 
3719 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3720 {
3721     switch ( user_lock_kind ) {
3722         case lk_default:
3723         default:
3724         KMP_ASSERT( 0 );
3725 
3726         case lk_tas: {
3727             __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3728             __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3729 
3730             __kmp_get_user_lock_owner_ =
3731               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3732               ( &__kmp_get_tas_lock_owner );
3733 
3734             if ( __kmp_env_consistency_check ) {
3735                 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3736                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3737             }
3738             else {
3739                 KMP_BIND_USER_LOCK(tas);
3740                 KMP_BIND_NESTED_USER_LOCK(tas);
3741             }
3742 
3743             __kmp_destroy_user_lock_ =
3744               ( void ( * )( kmp_user_lock_p ) )
3745               ( &__kmp_destroy_tas_lock );
3746 
3747              __kmp_is_user_lock_initialized_ =
3748                ( int ( * )( kmp_user_lock_p ) ) NULL;
3749 
3750              __kmp_get_user_lock_location_ =
3751                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3752 
3753              __kmp_set_user_lock_location_ =
3754                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3755 
3756              __kmp_get_user_lock_flags_ =
3757                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3758 
3759              __kmp_set_user_lock_flags_ =
3760                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3761         }
3762         break;
3763 
3764 #if KMP_USE_FUTEX
3765 
3766         case lk_futex: {
3767             __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3768             __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3769 
3770             __kmp_get_user_lock_owner_ =
3771               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3772               ( &__kmp_get_futex_lock_owner );
3773 
3774             if ( __kmp_env_consistency_check ) {
3775                 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3776                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3777             }
3778             else {
3779                 KMP_BIND_USER_LOCK(futex);
3780                 KMP_BIND_NESTED_USER_LOCK(futex);
3781             }
3782 
3783             __kmp_destroy_user_lock_ =
3784               ( void ( * )( kmp_user_lock_p ) )
3785               ( &__kmp_destroy_futex_lock );
3786 
3787              __kmp_is_user_lock_initialized_ =
3788                ( int ( * )( kmp_user_lock_p ) ) NULL;
3789 
3790              __kmp_get_user_lock_location_ =
3791                ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3792 
3793              __kmp_set_user_lock_location_ =
3794                ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3795 
3796              __kmp_get_user_lock_flags_ =
3797                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3798 
3799              __kmp_set_user_lock_flags_ =
3800                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3801         }
3802         break;
3803 
3804 #endif // KMP_USE_FUTEX
3805 
3806         case lk_ticket: {
3807             __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3808             __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3809 
3810             __kmp_get_user_lock_owner_ =
3811               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3812               ( &__kmp_get_ticket_lock_owner );
3813 
3814             if ( __kmp_env_consistency_check ) {
3815                 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3816                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3817             }
3818             else {
3819                 KMP_BIND_USER_LOCK(ticket);
3820                 KMP_BIND_NESTED_USER_LOCK(ticket);
3821             }
3822 
3823             __kmp_destroy_user_lock_ =
3824               ( void ( * )( kmp_user_lock_p ) )
3825               ( &__kmp_destroy_ticket_lock );
3826 
3827              __kmp_is_user_lock_initialized_ =
3828                ( int ( * )( kmp_user_lock_p ) )
3829                ( &__kmp_is_ticket_lock_initialized );
3830 
3831              __kmp_get_user_lock_location_ =
3832                ( const ident_t * ( * )( kmp_user_lock_p ) )
3833                ( &__kmp_get_ticket_lock_location );
3834 
3835              __kmp_set_user_lock_location_ =
3836                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3837                ( &__kmp_set_ticket_lock_location );
3838 
3839              __kmp_get_user_lock_flags_ =
3840                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3841                ( &__kmp_get_ticket_lock_flags );
3842 
3843              __kmp_set_user_lock_flags_ =
3844                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3845                ( &__kmp_set_ticket_lock_flags );
3846         }
3847         break;
3848 
3849         case lk_queuing: {
3850             __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3851             __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3852 
3853             __kmp_get_user_lock_owner_ =
3854               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3855               ( &__kmp_get_queuing_lock_owner );
3856 
3857             if ( __kmp_env_consistency_check ) {
3858                 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3859                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3860             }
3861             else {
3862                 KMP_BIND_USER_LOCK(queuing);
3863                 KMP_BIND_NESTED_USER_LOCK(queuing);
3864             }
3865 
3866             __kmp_destroy_user_lock_ =
3867               ( void ( * )( kmp_user_lock_p ) )
3868               ( &__kmp_destroy_queuing_lock );
3869 
3870              __kmp_is_user_lock_initialized_ =
3871                ( int ( * )( kmp_user_lock_p ) )
3872                ( &__kmp_is_queuing_lock_initialized );
3873 
3874              __kmp_get_user_lock_location_ =
3875                ( const ident_t * ( * )( kmp_user_lock_p ) )
3876                ( &__kmp_get_queuing_lock_location );
3877 
3878              __kmp_set_user_lock_location_ =
3879                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3880                ( &__kmp_set_queuing_lock_location );
3881 
3882              __kmp_get_user_lock_flags_ =
3883                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3884                ( &__kmp_get_queuing_lock_flags );
3885 
3886              __kmp_set_user_lock_flags_ =
3887                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3888                ( &__kmp_set_queuing_lock_flags );
3889         }
3890         break;
3891 
3892 #if KMP_USE_ADAPTIVE_LOCKS
3893         case lk_adaptive: {
3894             __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3895             __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3896 
3897             __kmp_get_user_lock_owner_ =
3898               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3899               ( &__kmp_get_queuing_lock_owner );
3900 
3901             if ( __kmp_env_consistency_check ) {
3902                 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3903             }
3904             else {
3905                 KMP_BIND_USER_LOCK(adaptive);
3906             }
3907 
3908             __kmp_destroy_user_lock_ =
3909               ( void ( * )( kmp_user_lock_p ) )
3910               ( &__kmp_destroy_adaptive_lock );
3911 
3912             __kmp_is_user_lock_initialized_ =
3913               ( int ( * )( kmp_user_lock_p ) )
3914               ( &__kmp_is_queuing_lock_initialized );
3915 
3916             __kmp_get_user_lock_location_ =
3917               ( const ident_t * ( * )( kmp_user_lock_p ) )
3918               ( &__kmp_get_queuing_lock_location );
3919 
3920             __kmp_set_user_lock_location_ =
3921               ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3922               ( &__kmp_set_queuing_lock_location );
3923 
3924             __kmp_get_user_lock_flags_ =
3925               ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3926               ( &__kmp_get_queuing_lock_flags );
3927 
3928             __kmp_set_user_lock_flags_ =
3929               ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3930               ( &__kmp_set_queuing_lock_flags );
3931 
3932         }
3933         break;
3934 #endif // KMP_USE_ADAPTIVE_LOCKS
3935 
3936         case lk_drdpa: {
3937             __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3938             __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3939 
3940             __kmp_get_user_lock_owner_ =
3941               ( kmp_int32 ( * )( kmp_user_lock_p ) )
3942               ( &__kmp_get_drdpa_lock_owner );
3943 
3944             if ( __kmp_env_consistency_check ) {
3945                 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3946                 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3947             }
3948             else {
3949                 KMP_BIND_USER_LOCK(drdpa);
3950                 KMP_BIND_NESTED_USER_LOCK(drdpa);
3951             }
3952 
3953             __kmp_destroy_user_lock_ =
3954               ( void ( * )( kmp_user_lock_p ) )
3955               ( &__kmp_destroy_drdpa_lock );
3956 
3957              __kmp_is_user_lock_initialized_ =
3958                ( int ( * )( kmp_user_lock_p ) )
3959                ( &__kmp_is_drdpa_lock_initialized );
3960 
3961              __kmp_get_user_lock_location_ =
3962                ( const ident_t * ( * )( kmp_user_lock_p ) )
3963                ( &__kmp_get_drdpa_lock_location );
3964 
3965              __kmp_set_user_lock_location_ =
3966                ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3967                ( &__kmp_set_drdpa_lock_location );
3968 
3969              __kmp_get_user_lock_flags_ =
3970                ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3971                ( &__kmp_get_drdpa_lock_flags );
3972 
3973              __kmp_set_user_lock_flags_ =
3974                ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3975                ( &__kmp_set_drdpa_lock_flags );
3976         }
3977         break;
3978     }
3979 }
3980 
3981 
3982 // ----------------------------------------------------------------------------
3983 // User lock table & lock allocation
3984 
3985 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3986 kmp_user_lock_p __kmp_lock_pool = NULL;
3987 
3988 // Lock block-allocation support.
3989 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3990 int __kmp_num_locks_in_block = 1;             // FIXME - tune this value
3991 
3992 static kmp_lock_index_t
3993 __kmp_lock_table_insert( kmp_user_lock_p lck )
3994 {
3995     // Assume that kmp_global_lock is held upon entry/exit.
3996     kmp_lock_index_t index;
3997     if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3998         kmp_lock_index_t size;
3999         kmp_user_lock_p *table;
4000         // Reallocate lock table.
4001         if ( __kmp_user_lock_table.allocated == 0 ) {
4002             size = 1024;
4003         }
4004         else {
4005             size = __kmp_user_lock_table.allocated * 2;
4006         }
4007         table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
4008         KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
4009         table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
4010             // We cannot free the previous table now, since it may be in use by other
4011             // threads. So save the pointer to the previous table in in the first element of the
4012             // new table. All the tables will be organized into a list, and could be freed when
4013             // library shutting down.
4014         __kmp_user_lock_table.table = table;
4015         __kmp_user_lock_table.allocated = size;
4016     }
4017     KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
4018     index = __kmp_user_lock_table.used;
4019     __kmp_user_lock_table.table[ index ] = lck;
4020     ++ __kmp_user_lock_table.used;
4021     return index;
4022 }
4023 
4024 static kmp_user_lock_p
4025 __kmp_lock_block_allocate()
4026 {
4027     // Assume that kmp_global_lock is held upon entry/exit.
4028     static int last_index = 0;
4029     if ( ( last_index >= __kmp_num_locks_in_block )
4030       || ( __kmp_lock_blocks == NULL ) ) {
4031         // Restart the index.
4032         last_index = 0;
4033         // Need to allocate a new block.
4034         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4035         size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
4036         char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
4037         // Set up the new block.
4038         kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
4039         new_block->next_block = __kmp_lock_blocks;
4040         new_block->locks = (void *)buffer;
4041         // Publish the new block.
4042         KMP_MB();
4043         __kmp_lock_blocks = new_block;
4044     }
4045     kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4046       [ last_index * __kmp_user_lock_size ] ) );
4047     last_index++;
4048     return ret;
4049 }
4050 
4051 //
4052 // Get memory for a lock. It may be freshly allocated memory or reused memory
4053 // from lock pool.
4054 //
4055 kmp_user_lock_p
4056 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4057   kmp_lock_flags_t flags )
4058 {
4059     kmp_user_lock_p lck;
4060     kmp_lock_index_t index;
4061     KMP_DEBUG_ASSERT( user_lock );
4062 
4063     __kmp_acquire_lock( &__kmp_global_lock, gtid );
4064 
4065     if ( __kmp_lock_pool == NULL ) {
4066         // Lock pool is empty. Allocate new memory.
4067 
4068         // ANNOTATION: Found no good way to express the syncronisation
4069         // between allocation and usage, so ignore the allocation
4070         ANNOTATE_IGNORE_WRITES_BEGIN();
4071         if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4072             lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4073         }
4074         else {
4075             lck = __kmp_lock_block_allocate();
4076         }
4077         ANNOTATE_IGNORE_WRITES_END();
4078 
4079         // Insert lock in the table so that it can be freed in __kmp_cleanup,
4080         // and debugger has info on all allocated locks.
4081         index = __kmp_lock_table_insert( lck );
4082     }
4083     else {
4084         // Pick up lock from pool.
4085         lck = __kmp_lock_pool;
4086         index = __kmp_lock_pool->pool.index;
4087         __kmp_lock_pool = __kmp_lock_pool->pool.next;
4088     }
4089 
4090     //
4091     // We could potentially differentiate between nested and regular locks
4092     // here, and do the lock table lookup for regular locks only.
4093     //
4094     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4095         * ( (kmp_lock_index_t *) user_lock ) = index;
4096     }
4097     else {
4098         * ( (kmp_user_lock_p *) user_lock ) = lck;
4099     }
4100 
4101     // mark the lock if it is critical section lock.
4102     __kmp_set_user_lock_flags( lck, flags );
4103 
4104     __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4105 
4106     return lck;
4107 }
4108 
4109 // Put lock's memory to pool for reusing.
4110 void
4111 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4112 {
4113     KMP_DEBUG_ASSERT( user_lock != NULL );
4114     KMP_DEBUG_ASSERT( lck != NULL );
4115 
4116     __kmp_acquire_lock( & __kmp_global_lock, gtid );
4117 
4118     lck->pool.next = __kmp_lock_pool;
4119     __kmp_lock_pool = lck;
4120     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4121         kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4122         KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4123         lck->pool.index = index;
4124     }
4125 
4126     __kmp_release_lock( & __kmp_global_lock, gtid );
4127 }
4128 
4129 kmp_user_lock_p
4130 __kmp_lookup_user_lock( void **user_lock, char const *func )
4131 {
4132     kmp_user_lock_p lck = NULL;
4133 
4134     if ( __kmp_env_consistency_check ) {
4135         if ( user_lock == NULL ) {
4136             KMP_FATAL( LockIsUninitialized, func );
4137         }
4138     }
4139 
4140     if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4141         kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4142         if ( __kmp_env_consistency_check ) {
4143             if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4144                 KMP_FATAL( LockIsUninitialized, func );
4145             }
4146         }
4147         KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4148         KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4149         lck = __kmp_user_lock_table.table[index];
4150     }
4151     else {
4152         lck = *( (kmp_user_lock_p *)user_lock );
4153     }
4154 
4155     if ( __kmp_env_consistency_check ) {
4156         if ( lck == NULL ) {
4157             KMP_FATAL( LockIsUninitialized, func );
4158         }
4159     }
4160 
4161     return lck;
4162 }
4163 
4164 void
4165 __kmp_cleanup_user_locks( void )
4166 {
4167     //
4168     // Reset lock pool. Do not worry about lock in the pool -- we will free
4169     // them when iterating through lock table (it includes all the locks,
4170     // dead or alive).
4171     //
4172     __kmp_lock_pool = NULL;
4173 
4174 #define IS_CRITICAL(lck) \
4175         ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4176         ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4177 
4178     //
4179     // Loop through lock table, free all locks.
4180     //
4181     // Do not free item [0], it is reserved for lock tables list.
4182     //
4183     // FIXME - we are iterating through a list of (pointers to) objects of
4184     // type union kmp_user_lock, but we have no way of knowing whether the
4185     // base type is currently "pool" or whatever the global user lock type
4186     // is.
4187     //
4188     // We are relying on the fact that for all of the user lock types
4189     // (except "tas"), the first field in the lock struct is the "initialized"
4190     // field, which is set to the address of the lock object itself when
4191     // the lock is initialized.  When the union is of type "pool", the
4192     // first field is a pointer to the next object in the free list, which
4193     // will not be the same address as the object itself.
4194     //
4195     // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4196     // will fail for "pool" objects on the free list.  This must happen as
4197     // the "location" field of real user locks overlaps the "index" field
4198     // of "pool" objects.
4199     //
4200     // It would be better to run through the free list, and remove all "pool"
4201     // objects from the lock table before executing this loop.  However,
4202     // "pool" objects do not always have their index field set (only on
4203     // lin_32e), and I don't want to search the lock table for the address
4204     // of every "pool" object on the free list.
4205     //
4206     while ( __kmp_user_lock_table.used > 1 ) {
4207         const ident *loc;
4208 
4209         //
4210         // reduce __kmp_user_lock_table.used before freeing the lock,
4211         // so that state of locks is consistent
4212         //
4213         kmp_user_lock_p lck = __kmp_user_lock_table.table[
4214           --__kmp_user_lock_table.used ];
4215 
4216         if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4217           ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4218             //
4219             // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4220             // initialized AND it is NOT a critical section (user is not
4221             // responsible for destroying criticals) AND we know source
4222             // location to report.
4223             //
4224             if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4225               ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4226               ( loc->psource != NULL ) ) {
4227                 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4228                 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4229                 __kmp_str_loc_free( &str_loc);
4230             }
4231 
4232 #ifdef KMP_DEBUG
4233             if ( IS_CRITICAL( lck ) ) {
4234                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4235             }
4236             else {
4237                 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4238             }
4239 #endif // KMP_DEBUG
4240 
4241             //
4242             // Cleanup internal lock dynamic resources
4243             // (for drdpa locks particularly).
4244             //
4245             __kmp_destroy_user_lock( lck );
4246         }
4247 
4248         //
4249         // Free the lock if block allocation of locks is not used.
4250         //
4251         if ( __kmp_lock_blocks == NULL ) {
4252             __kmp_free( lck );
4253         }
4254     }
4255 
4256 #undef IS_CRITICAL
4257 
4258     //
4259     // delete lock table(s).
4260     //
4261     kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4262     __kmp_user_lock_table.table = NULL;
4263     __kmp_user_lock_table.allocated = 0;
4264 
4265     while ( table_ptr != NULL ) {
4266         //
4267         // In the first element we saved the pointer to the previous
4268         // (smaller) lock table.
4269         //
4270         kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4271         __kmp_free( table_ptr );
4272         table_ptr = next;
4273     }
4274 
4275     //
4276     // Free buffers allocated for blocks of locks.
4277     //
4278     kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4279     __kmp_lock_blocks = NULL;
4280 
4281     while ( block_ptr != NULL ) {
4282         kmp_block_of_locks_t *next = block_ptr->next_block;
4283         __kmp_free( block_ptr->locks );
4284         //
4285         // *block_ptr itself was allocated at the end of the locks vector.
4286         //
4287 	block_ptr = next;
4288     }
4289 
4290     TCW_4(__kmp_init_user_locks, FALSE);
4291 }
4292 
4293 #endif // KMP_USE_DYNAMIC_LOCK
4294