1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 #include <atomic> 18 19 #include "kmp.h" 20 #include "kmp_itt.h" 21 #include "kmp_i18n.h" 22 #include "kmp_lock.h" 23 #include "kmp_io.h" 24 25 #include "tsan_annotations.h" 26 27 #if KMP_USE_FUTEX 28 # include <unistd.h> 29 # include <sys/syscall.h> 30 // We should really include <futex.h>, but that causes compatibility problems on different 31 // Linux* OS distributions that either require that you include (or break when you try to include) 32 // <pci/types.h>. 33 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 34 // we just define the constants here and don't include <futex.h> 35 # ifndef FUTEX_WAIT 36 # define FUTEX_WAIT 0 37 # endif 38 # ifndef FUTEX_WAKE 39 # define FUTEX_WAKE 1 40 # endif 41 #endif 42 43 /* Implement spin locks for internal library use. */ 44 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 45 46 void 47 __kmp_validate_locks( void ) 48 { 49 int i; 50 kmp_uint32 x, y; 51 52 /* Check to make sure unsigned arithmetic does wraps properly */ 53 x = ~((kmp_uint32) 0) - 2; 54 y = x - 2; 55 56 for (i = 0; i < 8; ++i, ++x, ++y) { 57 kmp_uint32 z = (x - y); 58 KMP_ASSERT( z == 2 ); 59 } 60 61 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 62 } 63 64 65 /* ------------------------------------------------------------------------ */ 66 /* test and set locks */ 67 68 // 69 // For the non-nested locks, we can only assume that the first 4 bytes were 70 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 71 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 72 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 73 // 74 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 75 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 76 // 77 78 static kmp_int32 79 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 80 { 81 return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 82 } 83 84 static inline bool 85 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 86 { 87 return lck->lk.depth_locked != -1; 88 } 89 90 __forceinline static int 91 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 92 { 93 KMP_MB(); 94 95 #ifdef USE_LOCK_PROFILE 96 kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) ); 97 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 98 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 99 /* else __kmp_printf( "." );*/ 100 #endif /* USE_LOCK_PROFILE */ 101 102 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 103 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 104 KMP_FSYNC_ACQUIRED(lck); 105 return KMP_LOCK_ACQUIRED_FIRST; 106 } 107 108 kmp_uint32 spins; 109 KMP_FSYNC_PREPARE( lck ); 110 KMP_INIT_YIELD( spins ); 111 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 112 __kmp_xproc ) ) { 113 KMP_YIELD( TRUE ); 114 } 115 else { 116 KMP_YIELD_SPIN( spins ); 117 } 118 119 kmp_backoff_t backoff = __kmp_spin_backoff_params; 120 while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || 121 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { 122 123 __kmp_spin_backoff(&backoff); 124 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 125 __kmp_xproc ) ) { 126 KMP_YIELD( TRUE ); 127 } 128 else { 129 KMP_YIELD_SPIN( spins ); 130 } 131 } 132 KMP_FSYNC_ACQUIRED( lck ); 133 return KMP_LOCK_ACQUIRED_FIRST; 134 } 135 136 int 137 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 138 { 139 int retval = __kmp_acquire_tas_lock_timed_template( lck, gtid ); 140 ANNOTATE_TAS_ACQUIRED(lck); 141 return retval; 142 } 143 144 static int 145 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 146 { 147 char const * const func = "omp_set_lock"; 148 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 149 && __kmp_is_tas_lock_nestable( lck ) ) { 150 KMP_FATAL( LockNestableUsedAsSimple, func ); 151 } 152 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 153 KMP_FATAL( LockIsAlreadyOwned, func ); 154 } 155 return __kmp_acquire_tas_lock( lck, gtid ); 156 } 157 158 int 159 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 160 { 161 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 162 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 163 KMP_FSYNC_ACQUIRED( lck ); 164 return TRUE; 165 } 166 return FALSE; 167 } 168 169 static int 170 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 171 { 172 char const * const func = "omp_test_lock"; 173 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 174 && __kmp_is_tas_lock_nestable( lck ) ) { 175 KMP_FATAL( LockNestableUsedAsSimple, func ); 176 } 177 return __kmp_test_tas_lock( lck, gtid ); 178 } 179 180 int 181 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 182 { 183 KMP_MB(); /* Flush all pending memory write invalidates. */ 184 185 KMP_FSYNC_RELEASING(lck); 186 ANNOTATE_TAS_RELEASED(lck); 187 KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); 188 KMP_MB(); /* Flush all pending memory write invalidates. */ 189 190 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 191 __kmp_xproc ) ); 192 return KMP_LOCK_RELEASED; 193 } 194 195 static int 196 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 197 { 198 char const * const func = "omp_unset_lock"; 199 KMP_MB(); /* in case another processor initialized lock */ 200 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 201 && __kmp_is_tas_lock_nestable( lck ) ) { 202 KMP_FATAL( LockNestableUsedAsSimple, func ); 203 } 204 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 205 KMP_FATAL( LockUnsettingFree, func ); 206 } 207 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 208 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 209 KMP_FATAL( LockUnsettingSetByAnother, func ); 210 } 211 return __kmp_release_tas_lock( lck, gtid ); 212 } 213 214 void 215 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 216 { 217 TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); 218 } 219 220 static void 221 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 222 { 223 __kmp_init_tas_lock( lck ); 224 } 225 226 void 227 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 228 { 229 lck->lk.poll = 0; 230 } 231 232 static void 233 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 234 { 235 char const * const func = "omp_destroy_lock"; 236 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 237 && __kmp_is_tas_lock_nestable( lck ) ) { 238 KMP_FATAL( LockNestableUsedAsSimple, func ); 239 } 240 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 241 KMP_FATAL( LockStillOwned, func ); 242 } 243 __kmp_destroy_tas_lock( lck ); 244 } 245 246 247 // 248 // nested test and set locks 249 // 250 251 int 252 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 253 { 254 KMP_DEBUG_ASSERT( gtid >= 0 ); 255 256 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 257 lck->lk.depth_locked += 1; 258 return KMP_LOCK_ACQUIRED_NEXT; 259 } 260 else { 261 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 262 ANNOTATE_TAS_ACQUIRED(lck); 263 lck->lk.depth_locked = 1; 264 return KMP_LOCK_ACQUIRED_FIRST; 265 } 266 } 267 268 static int 269 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 270 { 271 char const * const func = "omp_set_nest_lock"; 272 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 273 KMP_FATAL( LockSimpleUsedAsNestable, func ); 274 } 275 return __kmp_acquire_nested_tas_lock( lck, gtid ); 276 } 277 278 int 279 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 280 { 281 int retval; 282 283 KMP_DEBUG_ASSERT( gtid >= 0 ); 284 285 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 286 retval = ++lck->lk.depth_locked; 287 } 288 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 289 retval = 0; 290 } 291 else { 292 KMP_MB(); 293 retval = lck->lk.depth_locked = 1; 294 } 295 return retval; 296 } 297 298 static int 299 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 300 { 301 char const * const func = "omp_test_nest_lock"; 302 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 303 KMP_FATAL( LockSimpleUsedAsNestable, func ); 304 } 305 return __kmp_test_nested_tas_lock( lck, gtid ); 306 } 307 308 int 309 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 310 { 311 KMP_DEBUG_ASSERT( gtid >= 0 ); 312 313 KMP_MB(); 314 if ( --(lck->lk.depth_locked) == 0 ) { 315 __kmp_release_tas_lock( lck, gtid ); 316 return KMP_LOCK_RELEASED; 317 } 318 return KMP_LOCK_STILL_HELD; 319 } 320 321 static int 322 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 323 { 324 char const * const func = "omp_unset_nest_lock"; 325 KMP_MB(); /* in case another processor initialized lock */ 326 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 327 KMP_FATAL( LockSimpleUsedAsNestable, func ); 328 } 329 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 330 KMP_FATAL( LockUnsettingFree, func ); 331 } 332 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 333 KMP_FATAL( LockUnsettingSetByAnother, func ); 334 } 335 return __kmp_release_nested_tas_lock( lck, gtid ); 336 } 337 338 void 339 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 340 { 341 __kmp_init_tas_lock( lck ); 342 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 343 } 344 345 static void 346 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 347 { 348 __kmp_init_nested_tas_lock( lck ); 349 } 350 351 void 352 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 353 { 354 __kmp_destroy_tas_lock( lck ); 355 lck->lk.depth_locked = 0; 356 } 357 358 static void 359 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 360 { 361 char const * const func = "omp_destroy_nest_lock"; 362 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 363 KMP_FATAL( LockSimpleUsedAsNestable, func ); 364 } 365 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 366 KMP_FATAL( LockStillOwned, func ); 367 } 368 __kmp_destroy_nested_tas_lock( lck ); 369 } 370 371 372 #if KMP_USE_FUTEX 373 374 /* ------------------------------------------------------------------------ */ 375 /* futex locks */ 376 377 // futex locks are really just test and set locks, with a different method 378 // of handling contention. They take the same amount of space as test and 379 // set locks, and are allocated the same way (i.e. use the area allocated by 380 // the compiler for non-nested locks / allocate nested locks on the heap). 381 382 static kmp_int32 383 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 384 { 385 return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 386 } 387 388 static inline bool 389 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 390 { 391 return lck->lk.depth_locked != -1; 392 } 393 394 __forceinline static int 395 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 396 { 397 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 398 399 KMP_MB(); 400 401 #ifdef USE_LOCK_PROFILE 402 kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) ); 403 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 404 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 405 /* else __kmp_printf( "." );*/ 406 #endif /* USE_LOCK_PROFILE */ 407 408 KMP_FSYNC_PREPARE( lck ); 409 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 410 lck, lck->lk.poll, gtid ) ); 411 412 kmp_int32 poll_val; 413 414 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), 415 KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { 416 417 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 418 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 419 lck, gtid, poll_val, cond ) ); 420 421 // 422 // NOTE: if you try to use the following condition for this branch 423 // 424 // if ( poll_val & 1 == 0 ) 425 // 426 // Then the 12.0 compiler has a bug where the following block will 427 // always be skipped, regardless of the value of the LSB of poll_val. 428 // 429 if ( ! cond ) { 430 // 431 // Try to set the lsb in the poll to indicate to the owner 432 // thread that they need to wake this thread up. 433 // 434 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { 435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 436 lck, lck->lk.poll, gtid ) ); 437 continue; 438 } 439 poll_val |= KMP_LOCK_BUSY(1, futex); 440 441 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 442 lck, lck->lk.poll, gtid ) ); 443 } 444 445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 446 lck, gtid, poll_val ) ); 447 448 kmp_int32 rc; 449 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 450 poll_val, NULL, NULL, 0 ) ) != 0 ) { 451 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 452 lck, gtid, poll_val, rc, errno ) ); 453 continue; 454 } 455 456 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 457 lck, gtid, poll_val ) ); 458 // 459 // This thread has now done a successful futex wait call and was 460 // entered on the OS futex queue. We must now perform a futex 461 // wake call when releasing the lock, as we have no idea how many 462 // other threads are in the queue. 463 // 464 gtid_code |= 1; 465 } 466 467 KMP_FSYNC_ACQUIRED( lck ); 468 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 469 lck, lck->lk.poll, gtid ) ); 470 return KMP_LOCK_ACQUIRED_FIRST; 471 } 472 473 int 474 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 475 { 476 int retval = __kmp_acquire_futex_lock_timed_template( lck, gtid ); 477 ANNOTATE_FUTEX_ACQUIRED(lck); 478 return retval; 479 } 480 481 static int 482 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 483 { 484 char const * const func = "omp_set_lock"; 485 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 486 && __kmp_is_futex_lock_nestable( lck ) ) { 487 KMP_FATAL( LockNestableUsedAsSimple, func ); 488 } 489 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 490 KMP_FATAL( LockIsAlreadyOwned, func ); 491 } 492 return __kmp_acquire_futex_lock( lck, gtid ); 493 } 494 495 int 496 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 497 { 498 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) { 499 KMP_FSYNC_ACQUIRED( lck ); 500 return TRUE; 501 } 502 return FALSE; 503 } 504 505 static int 506 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 507 { 508 char const * const func = "omp_test_lock"; 509 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 510 && __kmp_is_futex_lock_nestable( lck ) ) { 511 KMP_FATAL( LockNestableUsedAsSimple, func ); 512 } 513 return __kmp_test_futex_lock( lck, gtid ); 514 } 515 516 int 517 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 518 { 519 KMP_MB(); /* Flush all pending memory write invalidates. */ 520 521 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 522 lck, lck->lk.poll, gtid ) ); 523 524 KMP_FSYNC_RELEASING(lck); 525 ANNOTATE_FUTEX_RELEASED(lck); 526 527 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); 528 529 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 530 lck, gtid, poll_val ) ); 531 532 if ( KMP_LOCK_STRIP(poll_val) & 1 ) { 533 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 534 lck, gtid ) ); 535 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 536 } 537 538 KMP_MB(); /* Flush all pending memory write invalidates. */ 539 540 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 541 lck, lck->lk.poll, gtid ) ); 542 543 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 544 __kmp_xproc ) ); 545 return KMP_LOCK_RELEASED; 546 } 547 548 static int 549 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 550 { 551 char const * const func = "omp_unset_lock"; 552 KMP_MB(); /* in case another processor initialized lock */ 553 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 554 && __kmp_is_futex_lock_nestable( lck ) ) { 555 KMP_FATAL( LockNestableUsedAsSimple, func ); 556 } 557 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 558 KMP_FATAL( LockUnsettingFree, func ); 559 } 560 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 561 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 562 KMP_FATAL( LockUnsettingSetByAnother, func ); 563 } 564 return __kmp_release_futex_lock( lck, gtid ); 565 } 566 567 void 568 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 569 { 570 TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); 571 } 572 573 static void 574 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 575 { 576 __kmp_init_futex_lock( lck ); 577 } 578 579 void 580 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 581 { 582 lck->lk.poll = 0; 583 } 584 585 static void 586 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 587 { 588 char const * const func = "omp_destroy_lock"; 589 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 590 && __kmp_is_futex_lock_nestable( lck ) ) { 591 KMP_FATAL( LockNestableUsedAsSimple, func ); 592 } 593 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 594 KMP_FATAL( LockStillOwned, func ); 595 } 596 __kmp_destroy_futex_lock( lck ); 597 } 598 599 600 // 601 // nested futex locks 602 // 603 604 int 605 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 606 { 607 KMP_DEBUG_ASSERT( gtid >= 0 ); 608 609 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 610 lck->lk.depth_locked += 1; 611 return KMP_LOCK_ACQUIRED_NEXT; 612 } 613 else { 614 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 615 ANNOTATE_FUTEX_ACQUIRED(lck); 616 lck->lk.depth_locked = 1; 617 return KMP_LOCK_ACQUIRED_FIRST; 618 } 619 } 620 621 static int 622 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 623 { 624 char const * const func = "omp_set_nest_lock"; 625 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 626 KMP_FATAL( LockSimpleUsedAsNestable, func ); 627 } 628 return __kmp_acquire_nested_futex_lock( lck, gtid ); 629 } 630 631 int 632 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 633 { 634 int retval; 635 636 KMP_DEBUG_ASSERT( gtid >= 0 ); 637 638 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 639 retval = ++lck->lk.depth_locked; 640 } 641 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 642 retval = 0; 643 } 644 else { 645 KMP_MB(); 646 retval = lck->lk.depth_locked = 1; 647 } 648 return retval; 649 } 650 651 static int 652 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 653 { 654 char const * const func = "omp_test_nest_lock"; 655 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 656 KMP_FATAL( LockSimpleUsedAsNestable, func ); 657 } 658 return __kmp_test_nested_futex_lock( lck, gtid ); 659 } 660 661 int 662 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 663 { 664 KMP_DEBUG_ASSERT( gtid >= 0 ); 665 666 KMP_MB(); 667 if ( --(lck->lk.depth_locked) == 0 ) { 668 __kmp_release_futex_lock( lck, gtid ); 669 return KMP_LOCK_RELEASED; 670 } 671 return KMP_LOCK_STILL_HELD; 672 } 673 674 static int 675 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 676 { 677 char const * const func = "omp_unset_nest_lock"; 678 KMP_MB(); /* in case another processor initialized lock */ 679 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 680 KMP_FATAL( LockSimpleUsedAsNestable, func ); 681 } 682 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 683 KMP_FATAL( LockUnsettingFree, func ); 684 } 685 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 686 KMP_FATAL( LockUnsettingSetByAnother, func ); 687 } 688 return __kmp_release_nested_futex_lock( lck, gtid ); 689 } 690 691 void 692 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 693 { 694 __kmp_init_futex_lock( lck ); 695 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 696 } 697 698 static void 699 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 700 { 701 __kmp_init_nested_futex_lock( lck ); 702 } 703 704 void 705 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 706 { 707 __kmp_destroy_futex_lock( lck ); 708 lck->lk.depth_locked = 0; 709 } 710 711 static void 712 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 713 { 714 char const * const func = "omp_destroy_nest_lock"; 715 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 716 KMP_FATAL( LockSimpleUsedAsNestable, func ); 717 } 718 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 719 KMP_FATAL( LockStillOwned, func ); 720 } 721 __kmp_destroy_nested_futex_lock( lck ); 722 } 723 724 #endif // KMP_USE_FUTEX 725 726 727 /* ------------------------------------------------------------------------ */ 728 /* ticket (bakery) locks */ 729 730 static kmp_int32 731 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 732 { 733 return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1; 734 } 735 736 static inline bool 737 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 738 { 739 return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1; 740 } 741 742 static kmp_uint32 743 __kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket ) 744 { 745 return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket; 746 } 747 748 __forceinline static int 749 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 750 { 751 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed ); 752 753 #ifdef USE_LOCK_PROFILE 754 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket ) 755 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 756 /* else __kmp_printf( "." );*/ 757 #endif /* USE_LOCK_PROFILE */ 758 759 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) { 760 return KMP_LOCK_ACQUIRED_FIRST; 761 } 762 KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 763 return KMP_LOCK_ACQUIRED_FIRST; 764 } 765 766 int 767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 768 { 769 int retval = __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 770 ANNOTATE_TICKET_ACQUIRED(lck); 771 return retval; 772 } 773 774 static int 775 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 776 { 777 char const * const func = "omp_set_lock"; 778 779 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 780 KMP_FATAL( LockIsUninitialized, func ); 781 } 782 if ( lck->lk.self != lck ) { 783 KMP_FATAL( LockIsUninitialized, func ); 784 } 785 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 786 KMP_FATAL( LockNestableUsedAsSimple, func ); 787 } 788 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 789 KMP_FATAL( LockIsAlreadyOwned, func ); 790 } 791 792 __kmp_acquire_ticket_lock( lck, gtid ); 793 794 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 795 return KMP_LOCK_ACQUIRED_FIRST; 796 } 797 798 int 799 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 800 { 801 kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ); 802 803 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) { 804 kmp_uint32 next_ticket = my_ticket + 1; 805 if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket, 806 &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) { 807 return TRUE; 808 } 809 } 810 return FALSE; 811 } 812 813 static int 814 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 815 { 816 char const * const func = "omp_test_lock"; 817 818 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 819 KMP_FATAL( LockIsUninitialized, func ); 820 } 821 if ( lck->lk.self != lck ) { 822 KMP_FATAL( LockIsUninitialized, func ); 823 } 824 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 825 KMP_FATAL( LockNestableUsedAsSimple, func ); 826 } 827 828 int retval = __kmp_test_ticket_lock( lck, gtid ); 829 830 if ( retval ) { 831 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 832 } 833 return retval; 834 } 835 836 int 837 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 838 { 839 kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ); 840 841 ANNOTATE_TICKET_RELEASED(lck); 842 std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release ); 843 844 KMP_YIELD( distance 845 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 846 return KMP_LOCK_RELEASED; 847 } 848 849 static int 850 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 851 { 852 char const * const func = "omp_unset_lock"; 853 854 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 855 KMP_FATAL( LockIsUninitialized, func ); 856 } 857 if ( lck->lk.self != lck ) { 858 KMP_FATAL( LockIsUninitialized, func ); 859 } 860 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 861 KMP_FATAL( LockNestableUsedAsSimple, func ); 862 } 863 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 864 KMP_FATAL( LockUnsettingFree, func ); 865 } 866 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 867 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 868 KMP_FATAL( LockUnsettingSetByAnother, func ); 869 } 870 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); 871 return __kmp_release_ticket_lock( lck, gtid ); 872 } 873 874 void 875 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 876 { 877 lck->lk.location = NULL; 878 lck->lk.self = lck; 879 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed ); 880 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed ); 881 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock. 882 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock. 883 std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release ); 884 } 885 886 static void 887 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 888 { 889 __kmp_init_ticket_lock( lck ); 890 } 891 892 void 893 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 894 { 895 std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release ); 896 lck->lk.self = NULL; 897 lck->lk.location = NULL; 898 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed ); 899 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed ); 900 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); 901 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); 902 } 903 904 static void 905 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 906 { 907 char const * const func = "omp_destroy_lock"; 908 909 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 910 KMP_FATAL( LockIsUninitialized, func ); 911 } 912 if ( lck->lk.self != lck ) { 913 KMP_FATAL( LockIsUninitialized, func ); 914 } 915 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 916 KMP_FATAL( LockNestableUsedAsSimple, func ); 917 } 918 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 919 KMP_FATAL( LockStillOwned, func ); 920 } 921 __kmp_destroy_ticket_lock( lck ); 922 } 923 924 925 // 926 // nested ticket locks 927 // 928 929 int 930 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 931 { 932 KMP_DEBUG_ASSERT( gtid >= 0 ); 933 934 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 935 std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); 936 return KMP_LOCK_ACQUIRED_NEXT; 937 } 938 else { 939 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 940 ANNOTATE_TICKET_ACQUIRED(lck); 941 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); 942 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 943 return KMP_LOCK_ACQUIRED_FIRST; 944 } 945 } 946 947 static int 948 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 949 { 950 char const * const func = "omp_set_nest_lock"; 951 952 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 953 KMP_FATAL( LockIsUninitialized, func ); 954 } 955 if ( lck->lk.self != lck ) { 956 KMP_FATAL( LockIsUninitialized, func ); 957 } 958 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 959 KMP_FATAL( LockSimpleUsedAsNestable, func ); 960 } 961 return __kmp_acquire_nested_ticket_lock( lck, gtid ); 962 } 963 964 int 965 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 966 { 967 int retval; 968 969 KMP_DEBUG_ASSERT( gtid >= 0 ); 970 971 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 972 retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1; 973 } 974 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 975 retval = 0; 976 } 977 else { 978 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); 979 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 980 retval = 1; 981 } 982 return retval; 983 } 984 985 static int 986 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 987 kmp_int32 gtid ) 988 { 989 char const * const func = "omp_test_nest_lock"; 990 991 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 992 KMP_FATAL( LockIsUninitialized, func ); 993 } 994 if ( lck->lk.self != lck ) { 995 KMP_FATAL( LockIsUninitialized, func ); 996 } 997 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 998 KMP_FATAL( LockSimpleUsedAsNestable, func ); 999 } 1000 return __kmp_test_nested_ticket_lock( lck, gtid ); 1001 } 1002 1003 int 1004 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1005 { 1006 KMP_DEBUG_ASSERT( gtid >= 0 ); 1007 1008 if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) { 1009 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); 1010 __kmp_release_ticket_lock( lck, gtid ); 1011 return KMP_LOCK_RELEASED; 1012 } 1013 return KMP_LOCK_STILL_HELD; 1014 } 1015 1016 static int 1017 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1018 { 1019 char const * const func = "omp_unset_nest_lock"; 1020 1021 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 1022 KMP_FATAL( LockIsUninitialized, func ); 1023 } 1024 if ( lck->lk.self != lck ) { 1025 KMP_FATAL( LockIsUninitialized, func ); 1026 } 1027 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1028 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1029 } 1030 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1031 KMP_FATAL( LockUnsettingFree, func ); 1032 } 1033 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1034 KMP_FATAL( LockUnsettingSetByAnother, func ); 1035 } 1036 return __kmp_release_nested_ticket_lock( lck, gtid ); 1037 } 1038 1039 void 1040 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1041 { 1042 __kmp_init_ticket_lock( lck ); 1043 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks 1044 } 1045 1046 static void 1047 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1048 { 1049 __kmp_init_nested_ticket_lock( lck ); 1050 } 1051 1052 void 1053 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1054 { 1055 __kmp_destroy_ticket_lock( lck ); 1056 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); 1057 } 1058 1059 static void 1060 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1061 { 1062 char const * const func = "omp_destroy_nest_lock"; 1063 1064 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 1065 KMP_FATAL( LockIsUninitialized, func ); 1066 } 1067 if ( lck->lk.self != lck ) { 1068 KMP_FATAL( LockIsUninitialized, func ); 1069 } 1070 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1071 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1072 } 1073 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1074 KMP_FATAL( LockStillOwned, func ); 1075 } 1076 __kmp_destroy_nested_ticket_lock( lck ); 1077 } 1078 1079 1080 // 1081 // access functions to fields which don't exist for all lock kinds. 1082 // 1083 1084 static int 1085 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1086 { 1087 return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck); 1088 } 1089 1090 static const ident_t * 1091 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1092 { 1093 return lck->lk.location; 1094 } 1095 1096 static void 1097 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1098 { 1099 lck->lk.location = loc; 1100 } 1101 1102 static kmp_lock_flags_t 1103 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1104 { 1105 return lck->lk.flags; 1106 } 1107 1108 static void 1109 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1110 { 1111 lck->lk.flags = flags; 1112 } 1113 1114 /* ------------------------------------------------------------------------ */ 1115 /* queuing locks */ 1116 1117 /* 1118 * First the states 1119 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1120 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1121 * h, h means lock is held or about to transition, 1 element on queue 1122 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1123 * 1124 * Now the transitions 1125 * Acquire(0,0) = -1 ,0 1126 * Release(0,0) = Error 1127 * Acquire(-1,0) = h ,h h > 0 1128 * Release(-1,0) = 0 ,0 1129 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1130 * Release(h,h) = -1 ,0 h > 0 1131 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1132 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1133 * 1134 * And pictorially 1135 * 1136 * 1137 * +-----+ 1138 * | 0, 0|------- release -------> Error 1139 * +-----+ 1140 * | ^ 1141 * acquire| |release 1142 * | | 1143 * | | 1144 * v | 1145 * +-----+ 1146 * |-1, 0| 1147 * +-----+ 1148 * | ^ 1149 * acquire| |release 1150 * | | 1151 * | | 1152 * v | 1153 * +-----+ 1154 * | h, h| 1155 * +-----+ 1156 * | ^ 1157 * acquire| |release 1158 * | | 1159 * | | 1160 * v | 1161 * +-----+ 1162 * | h, t|----- acquire, release loopback ---+ 1163 * +-----+ | 1164 * ^ | 1165 * | | 1166 * +------------------------------------+ 1167 * 1168 */ 1169 1170 #ifdef DEBUG_QUEUING_LOCKS 1171 1172 /* Stuff for circular trace buffer */ 1173 #define TRACE_BUF_ELE 1024 1174 static char traces[TRACE_BUF_ELE][128] = { 0 } 1175 static int tc = 0; 1176 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); 1177 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 1178 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 1179 1180 static void 1181 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1182 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1183 { 1184 kmp_int32 t, i; 1185 1186 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1187 1188 i = tc % TRACE_BUF_ELE; 1189 __kmp_printf_no_lock( "%s\n", traces[i] ); 1190 i = (i+1) % TRACE_BUF_ELE; 1191 while ( i != (tc % TRACE_BUF_ELE) ) { 1192 __kmp_printf_no_lock( "%s", traces[i] ); 1193 i = (i+1) % TRACE_BUF_ELE; 1194 } 1195 __kmp_printf_no_lock( "\n" ); 1196 1197 __kmp_printf_no_lock( 1198 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1199 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1200 head_id, tail_id ); 1201 1202 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1203 1204 if ( lck->lk.head_id >= 1 ) { 1205 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1206 while (t > 0) { 1207 __kmp_printf_no_lock( "-> %d ", t ); 1208 t = __kmp_threads[t-1]->th.th_next_waiting; 1209 } 1210 } 1211 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1212 __kmp_printf_no_lock( "\n\n" ); 1213 } 1214 1215 #endif /* DEBUG_QUEUING_LOCKS */ 1216 1217 static kmp_int32 1218 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1219 { 1220 return TCR_4( lck->lk.owner_id ) - 1; 1221 } 1222 1223 static inline bool 1224 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1225 { 1226 return lck->lk.depth_locked != -1; 1227 } 1228 1229 /* Acquire a lock using a the queuing lock implementation */ 1230 template <bool takeTime> 1231 /* [TLW] The unused template above is left behind because of what BEB believes is a 1232 potential compiler problem with __forceinline. */ 1233 __forceinline static int 1234 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1235 kmp_int32 gtid ) 1236 { 1237 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1238 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1239 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1240 volatile kmp_uint32 *spin_here_p; 1241 kmp_int32 need_mf = 1; 1242 1243 #if OMPT_SUPPORT 1244 ompt_state_t prev_state = ompt_state_undefined; 1245 #endif 1246 1247 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1248 1249 KMP_FSYNC_PREPARE( lck ); 1250 KMP_DEBUG_ASSERT( this_thr != NULL ); 1251 spin_here_p = & this_thr->th.th_spin_here; 1252 1253 #ifdef DEBUG_QUEUING_LOCKS 1254 TRACE_LOCK( gtid+1, "acq ent" ); 1255 if ( *spin_here_p ) 1256 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1257 if ( this_thr->th.th_next_waiting != 0 ) 1258 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1259 #endif 1260 KMP_DEBUG_ASSERT( !*spin_here_p ); 1261 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1262 1263 1264 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1265 that may follow, not just in execution order, but also in visibility order. This way, 1266 when a releasing thread observes the changes to the queue by this thread, it can 1267 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1268 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1269 to FALSE before this thread sets it to TRUE, this thread will hang. 1270 */ 1271 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1272 1273 while( 1 ) { 1274 kmp_int32 enqueued; 1275 kmp_int32 head; 1276 kmp_int32 tail; 1277 1278 head = *head_id_p; 1279 1280 switch ( head ) { 1281 1282 case -1: 1283 { 1284 #ifdef DEBUG_QUEUING_LOCKS 1285 tail = *tail_id_p; 1286 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1287 #endif 1288 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1289 this assignment prevents us from entering the if ( t > 0 ) 1290 condition in the enqueued case below, which is not necessary for 1291 this state transition */ 1292 1293 need_mf = 0; 1294 /* try (-1,0)->(tid,tid) */ 1295 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1296 KMP_PACK_64( -1, 0 ), 1297 KMP_PACK_64( gtid+1, gtid+1 ) ); 1298 #ifdef DEBUG_QUEUING_LOCKS 1299 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1300 #endif 1301 } 1302 break; 1303 1304 default: 1305 { 1306 tail = *tail_id_p; 1307 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1308 1309 #ifdef DEBUG_QUEUING_LOCKS 1310 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1311 #endif 1312 1313 if ( tail == 0 ) { 1314 enqueued = FALSE; 1315 } 1316 else { 1317 need_mf = 0; 1318 /* try (h,t) or (h,h)->(h,tid) */ 1319 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1320 1321 #ifdef DEBUG_QUEUING_LOCKS 1322 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1323 #endif 1324 } 1325 } 1326 break; 1327 1328 case 0: /* empty queue */ 1329 { 1330 kmp_int32 grabbed_lock; 1331 1332 #ifdef DEBUG_QUEUING_LOCKS 1333 tail = *tail_id_p; 1334 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1335 #endif 1336 /* try (0,0)->(-1,0) */ 1337 1338 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1339 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1340 1341 if ( grabbed_lock ) { 1342 1343 *spin_here_p = FALSE; 1344 1345 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1346 lck, gtid )); 1347 #ifdef DEBUG_QUEUING_LOCKS 1348 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1349 #endif 1350 1351 #if OMPT_SUPPORT 1352 if (ompt_enabled && prev_state != ompt_state_undefined) { 1353 /* change the state before clearing wait_id */ 1354 this_thr->th.ompt_thread_info.state = prev_state; 1355 this_thr->th.ompt_thread_info.wait_id = 0; 1356 } 1357 #endif 1358 1359 KMP_FSYNC_ACQUIRED( lck ); 1360 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1361 } 1362 enqueued = FALSE; 1363 } 1364 break; 1365 } 1366 1367 #if OMPT_SUPPORT 1368 if (ompt_enabled && prev_state == ompt_state_undefined) { 1369 /* this thread will spin; set wait_id before entering wait state */ 1370 prev_state = this_thr->th.ompt_thread_info.state; 1371 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 1372 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1373 } 1374 #endif 1375 1376 if ( enqueued ) { 1377 if ( tail > 0 ) { 1378 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1379 KMP_ASSERT( tail_thr != NULL ); 1380 tail_thr->th.th_next_waiting = gtid+1; 1381 /* corresponding wait for this write in release code */ 1382 } 1383 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1384 1385 1386 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1387 * throughput only here. 1388 */ 1389 KMP_MB(); 1390 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1391 1392 #ifdef DEBUG_QUEUING_LOCKS 1393 TRACE_LOCK( gtid+1, "acq spin" ); 1394 1395 if ( this_thr->th.th_next_waiting != 0 ) 1396 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1397 #endif 1398 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1399 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1400 lck, gtid )); 1401 1402 #ifdef DEBUG_QUEUING_LOCKS 1403 TRACE_LOCK( gtid+1, "acq exit 2" ); 1404 #endif 1405 1406 #if OMPT_SUPPORT 1407 /* change the state before clearing wait_id */ 1408 this_thr->th.ompt_thread_info.state = prev_state; 1409 this_thr->th.ompt_thread_info.wait_id = 0; 1410 #endif 1411 1412 /* got lock, we were dequeued by the thread that released lock */ 1413 return KMP_LOCK_ACQUIRED_FIRST; 1414 } 1415 1416 /* Yield if number of threads > number of logical processors */ 1417 /* ToDo: Not sure why this should only be in oversubscription case, 1418 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1419 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1420 __kmp_xproc ) ); 1421 #ifdef DEBUG_QUEUING_LOCKS 1422 TRACE_LOCK( gtid+1, "acq retry" ); 1423 #endif 1424 1425 } 1426 KMP_ASSERT2( 0, "should not get here" ); 1427 return KMP_LOCK_ACQUIRED_FIRST; 1428 } 1429 1430 int 1431 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1432 { 1433 KMP_DEBUG_ASSERT( gtid >= 0 ); 1434 1435 int retval = __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1436 ANNOTATE_QUEUING_ACQUIRED(lck); 1437 return retval; 1438 } 1439 1440 static int 1441 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1442 kmp_int32 gtid ) 1443 { 1444 char const * const func = "omp_set_lock"; 1445 if ( lck->lk.initialized != lck ) { 1446 KMP_FATAL( LockIsUninitialized, func ); 1447 } 1448 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1449 KMP_FATAL( LockNestableUsedAsSimple, func ); 1450 } 1451 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1452 KMP_FATAL( LockIsAlreadyOwned, func ); 1453 } 1454 1455 __kmp_acquire_queuing_lock( lck, gtid ); 1456 1457 lck->lk.owner_id = gtid + 1; 1458 return KMP_LOCK_ACQUIRED_FIRST; 1459 } 1460 1461 int 1462 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1463 { 1464 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1465 kmp_int32 head; 1466 #ifdef KMP_DEBUG 1467 kmp_info_t *this_thr; 1468 #endif 1469 1470 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1471 KMP_DEBUG_ASSERT( gtid >= 0 ); 1472 #ifdef KMP_DEBUG 1473 this_thr = __kmp_thread_from_gtid( gtid ); 1474 KMP_DEBUG_ASSERT( this_thr != NULL ); 1475 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1476 #endif 1477 1478 head = *head_id_p; 1479 1480 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1481 1482 /* try (0,0)->(-1,0) */ 1483 1484 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1485 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1486 KMP_FSYNC_ACQUIRED(lck); 1487 ANNOTATE_QUEUING_ACQUIRED(lck); 1488 return TRUE; 1489 } 1490 } 1491 1492 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1493 return FALSE; 1494 } 1495 1496 static int 1497 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1498 { 1499 char const * const func = "omp_test_lock"; 1500 if ( lck->lk.initialized != lck ) { 1501 KMP_FATAL( LockIsUninitialized, func ); 1502 } 1503 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1504 KMP_FATAL( LockNestableUsedAsSimple, func ); 1505 } 1506 1507 int retval = __kmp_test_queuing_lock( lck, gtid ); 1508 1509 if ( retval ) { 1510 lck->lk.owner_id = gtid + 1; 1511 } 1512 return retval; 1513 } 1514 1515 int 1516 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1517 { 1518 register kmp_info_t *this_thr; 1519 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1520 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1521 1522 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1523 KMP_DEBUG_ASSERT( gtid >= 0 ); 1524 this_thr = __kmp_thread_from_gtid( gtid ); 1525 KMP_DEBUG_ASSERT( this_thr != NULL ); 1526 #ifdef DEBUG_QUEUING_LOCKS 1527 TRACE_LOCK( gtid+1, "rel ent" ); 1528 1529 if ( this_thr->th.th_spin_here ) 1530 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1531 if ( this_thr->th.th_next_waiting != 0 ) 1532 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1533 #endif 1534 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1535 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1536 1537 KMP_FSYNC_RELEASING(lck); 1538 ANNOTATE_QUEUING_RELEASED(lck); 1539 1540 while( 1 ) { 1541 kmp_int32 dequeued; 1542 kmp_int32 head; 1543 kmp_int32 tail; 1544 1545 head = *head_id_p; 1546 1547 #ifdef DEBUG_QUEUING_LOCKS 1548 tail = *tail_id_p; 1549 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1550 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1551 #endif 1552 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1553 1554 if ( head == -1 ) { /* nobody on queue */ 1555 1556 /* try (-1,0)->(0,0) */ 1557 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1558 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1559 lck, gtid )); 1560 #ifdef DEBUG_QUEUING_LOCKS 1561 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1562 #endif 1563 1564 #if OMPT_SUPPORT 1565 /* nothing to do - no other thread is trying to shift blame */ 1566 #endif 1567 1568 return KMP_LOCK_RELEASED; 1569 } 1570 dequeued = FALSE; 1571 1572 } 1573 else { 1574 1575 tail = *tail_id_p; 1576 if ( head == tail ) { /* only one thread on the queue */ 1577 1578 #ifdef DEBUG_QUEUING_LOCKS 1579 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1580 #endif 1581 KMP_DEBUG_ASSERT( head > 0 ); 1582 1583 /* try (h,h)->(-1,0) */ 1584 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1585 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1586 #ifdef DEBUG_QUEUING_LOCKS 1587 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1588 #endif 1589 1590 } 1591 else { 1592 volatile kmp_int32 *waiting_id_p; 1593 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1594 KMP_DEBUG_ASSERT( head_thr != NULL ); 1595 waiting_id_p = & head_thr->th.th_next_waiting; 1596 1597 /* Does this require synchronous reads? */ 1598 #ifdef DEBUG_QUEUING_LOCKS 1599 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1600 #endif 1601 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1602 1603 /* try (h,t)->(h',t) or (t,t) */ 1604 1605 KMP_MB(); 1606 /* make sure enqueuing thread has time to update next waiting thread field */ 1607 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL); 1608 #ifdef DEBUG_QUEUING_LOCKS 1609 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1610 #endif 1611 dequeued = TRUE; 1612 } 1613 } 1614 1615 if ( dequeued ) { 1616 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1617 KMP_DEBUG_ASSERT( head_thr != NULL ); 1618 1619 /* Does this require synchronous reads? */ 1620 #ifdef DEBUG_QUEUING_LOCKS 1621 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1622 #endif 1623 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1624 1625 /* For clean code only. 1626 * Thread not released until next statement prevents race with acquire code. 1627 */ 1628 head_thr->th.th_next_waiting = 0; 1629 #ifdef DEBUG_QUEUING_LOCKS 1630 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1631 #endif 1632 1633 KMP_MB(); 1634 /* reset spin value */ 1635 head_thr->th.th_spin_here = FALSE; 1636 1637 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1638 lck, gtid )); 1639 #ifdef DEBUG_QUEUING_LOCKS 1640 TRACE_LOCK( gtid+1, "rel exit 2" ); 1641 #endif 1642 return KMP_LOCK_RELEASED; 1643 } 1644 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1645 1646 #ifdef DEBUG_QUEUING_LOCKS 1647 TRACE_LOCK( gtid+1, "rel retry" ); 1648 #endif 1649 1650 } /* while */ 1651 KMP_ASSERT2( 0, "should not get here" ); 1652 return KMP_LOCK_RELEASED; 1653 } 1654 1655 static int 1656 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1657 kmp_int32 gtid ) 1658 { 1659 char const * const func = "omp_unset_lock"; 1660 KMP_MB(); /* in case another processor initialized lock */ 1661 if ( lck->lk.initialized != lck ) { 1662 KMP_FATAL( LockIsUninitialized, func ); 1663 } 1664 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1665 KMP_FATAL( LockNestableUsedAsSimple, func ); 1666 } 1667 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1668 KMP_FATAL( LockUnsettingFree, func ); 1669 } 1670 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1671 KMP_FATAL( LockUnsettingSetByAnother, func ); 1672 } 1673 lck->lk.owner_id = 0; 1674 return __kmp_release_queuing_lock( lck, gtid ); 1675 } 1676 1677 void 1678 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1679 { 1680 lck->lk.location = NULL; 1681 lck->lk.head_id = 0; 1682 lck->lk.tail_id = 0; 1683 lck->lk.next_ticket = 0; 1684 lck->lk.now_serving = 0; 1685 lck->lk.owner_id = 0; // no thread owns the lock. 1686 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1687 lck->lk.initialized = lck; 1688 1689 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1690 } 1691 1692 static void 1693 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1694 { 1695 __kmp_init_queuing_lock( lck ); 1696 } 1697 1698 void 1699 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1700 { 1701 lck->lk.initialized = NULL; 1702 lck->lk.location = NULL; 1703 lck->lk.head_id = 0; 1704 lck->lk.tail_id = 0; 1705 lck->lk.next_ticket = 0; 1706 lck->lk.now_serving = 0; 1707 lck->lk.owner_id = 0; 1708 lck->lk.depth_locked = -1; 1709 } 1710 1711 static void 1712 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1713 { 1714 char const * const func = "omp_destroy_lock"; 1715 if ( lck->lk.initialized != lck ) { 1716 KMP_FATAL( LockIsUninitialized, func ); 1717 } 1718 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1719 KMP_FATAL( LockNestableUsedAsSimple, func ); 1720 } 1721 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1722 KMP_FATAL( LockStillOwned, func ); 1723 } 1724 __kmp_destroy_queuing_lock( lck ); 1725 } 1726 1727 1728 // 1729 // nested queuing locks 1730 // 1731 1732 int 1733 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1734 { 1735 KMP_DEBUG_ASSERT( gtid >= 0 ); 1736 1737 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1738 lck->lk.depth_locked += 1; 1739 return KMP_LOCK_ACQUIRED_NEXT; 1740 } 1741 else { 1742 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1743 ANNOTATE_QUEUING_ACQUIRED(lck); 1744 KMP_MB(); 1745 lck->lk.depth_locked = 1; 1746 KMP_MB(); 1747 lck->lk.owner_id = gtid + 1; 1748 return KMP_LOCK_ACQUIRED_FIRST; 1749 } 1750 } 1751 1752 static int 1753 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1754 { 1755 char const * const func = "omp_set_nest_lock"; 1756 if ( lck->lk.initialized != lck ) { 1757 KMP_FATAL( LockIsUninitialized, func ); 1758 } 1759 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1760 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1761 } 1762 return __kmp_acquire_nested_queuing_lock( lck, gtid ); 1763 } 1764 1765 int 1766 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1767 { 1768 int retval; 1769 1770 KMP_DEBUG_ASSERT( gtid >= 0 ); 1771 1772 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1773 retval = ++lck->lk.depth_locked; 1774 } 1775 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1776 retval = 0; 1777 } 1778 else { 1779 KMP_MB(); 1780 retval = lck->lk.depth_locked = 1; 1781 KMP_MB(); 1782 lck->lk.owner_id = gtid + 1; 1783 } 1784 return retval; 1785 } 1786 1787 static int 1788 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1789 kmp_int32 gtid ) 1790 { 1791 char const * const func = "omp_test_nest_lock"; 1792 if ( lck->lk.initialized != lck ) { 1793 KMP_FATAL( LockIsUninitialized, func ); 1794 } 1795 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1796 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1797 } 1798 return __kmp_test_nested_queuing_lock( lck, gtid ); 1799 } 1800 1801 int 1802 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1803 { 1804 KMP_DEBUG_ASSERT( gtid >= 0 ); 1805 1806 KMP_MB(); 1807 if ( --(lck->lk.depth_locked) == 0 ) { 1808 KMP_MB(); 1809 lck->lk.owner_id = 0; 1810 __kmp_release_queuing_lock( lck, gtid ); 1811 return KMP_LOCK_RELEASED; 1812 } 1813 return KMP_LOCK_STILL_HELD; 1814 } 1815 1816 static int 1817 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1818 { 1819 char const * const func = "omp_unset_nest_lock"; 1820 KMP_MB(); /* in case another processor initialized lock */ 1821 if ( lck->lk.initialized != lck ) { 1822 KMP_FATAL( LockIsUninitialized, func ); 1823 } 1824 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1825 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1826 } 1827 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1828 KMP_FATAL( LockUnsettingFree, func ); 1829 } 1830 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1831 KMP_FATAL( LockUnsettingSetByAnother, func ); 1832 } 1833 return __kmp_release_nested_queuing_lock( lck, gtid ); 1834 } 1835 1836 void 1837 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1838 { 1839 __kmp_init_queuing_lock( lck ); 1840 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1841 } 1842 1843 static void 1844 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1845 { 1846 __kmp_init_nested_queuing_lock( lck ); 1847 } 1848 1849 void 1850 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1851 { 1852 __kmp_destroy_queuing_lock( lck ); 1853 lck->lk.depth_locked = 0; 1854 } 1855 1856 static void 1857 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1858 { 1859 char const * const func = "omp_destroy_nest_lock"; 1860 if ( lck->lk.initialized != lck ) { 1861 KMP_FATAL( LockIsUninitialized, func ); 1862 } 1863 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1864 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1865 } 1866 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1867 KMP_FATAL( LockStillOwned, func ); 1868 } 1869 __kmp_destroy_nested_queuing_lock( lck ); 1870 } 1871 1872 1873 // 1874 // access functions to fields which don't exist for all lock kinds. 1875 // 1876 1877 static int 1878 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1879 { 1880 return lck == lck->lk.initialized; 1881 } 1882 1883 static const ident_t * 1884 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1885 { 1886 return lck->lk.location; 1887 } 1888 1889 static void 1890 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1891 { 1892 lck->lk.location = loc; 1893 } 1894 1895 static kmp_lock_flags_t 1896 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1897 { 1898 return lck->lk.flags; 1899 } 1900 1901 static void 1902 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1903 { 1904 lck->lk.flags = flags; 1905 } 1906 1907 #if KMP_USE_ADAPTIVE_LOCKS 1908 1909 /* 1910 RTM Adaptive locks 1911 */ 1912 1913 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1914 1915 #include <immintrin.h> 1916 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1917 1918 #else 1919 1920 // Values from the status register after failed speculation. 1921 #define _XBEGIN_STARTED (~0u) 1922 #define _XABORT_EXPLICIT (1 << 0) 1923 #define _XABORT_RETRY (1 << 1) 1924 #define _XABORT_CONFLICT (1 << 2) 1925 #define _XABORT_CAPACITY (1 << 3) 1926 #define _XABORT_DEBUG (1 << 4) 1927 #define _XABORT_NESTED (1 << 5) 1928 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1929 1930 // Aborts for which it's worth trying again immediately 1931 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1932 1933 #define STRINGIZE_INTERNAL(arg) #arg 1934 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1935 1936 // Access to RTM instructions 1937 1938 /* 1939 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1940 This is the same definition as the compiler intrinsic that will be supported at some point. 1941 */ 1942 static __inline int _xbegin() 1943 { 1944 int res = -1; 1945 1946 #if KMP_OS_WINDOWS 1947 #if KMP_ARCH_X86_64 1948 _asm { 1949 _emit 0xC7 1950 _emit 0xF8 1951 _emit 2 1952 _emit 0 1953 _emit 0 1954 _emit 0 1955 jmp L2 1956 mov res, eax 1957 L2: 1958 } 1959 #else /* IA32 */ 1960 _asm { 1961 _emit 0xC7 1962 _emit 0xF8 1963 _emit 2 1964 _emit 0 1965 _emit 0 1966 _emit 0 1967 jmp L2 1968 mov res, eax 1969 L2: 1970 } 1971 #endif // KMP_ARCH_X86_64 1972 #else 1973 /* Note that %eax must be noted as killed (clobbered), because 1974 * the XSR is returned in %eax(%rax) on abort. Other register 1975 * values are restored, so don't need to be killed. 1976 * 1977 * We must also mark 'res' as an input and an output, since otherwise 1978 * 'res=-1' may be dropped as being dead, whereas we do need the 1979 * assignment on the successful (i.e., non-abort) path. 1980 */ 1981 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1982 " .long 1f-1b-6\n" 1983 " jmp 2f\n" 1984 "1: movl %%eax,%0\n" 1985 "2:" 1986 :"+r"(res)::"memory","%eax"); 1987 #endif // KMP_OS_WINDOWS 1988 return res; 1989 } 1990 1991 /* 1992 Transaction end 1993 */ 1994 static __inline void _xend() 1995 { 1996 #if KMP_OS_WINDOWS 1997 __asm { 1998 _emit 0x0f 1999 _emit 0x01 2000 _emit 0xd5 2001 } 2002 #else 2003 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 2004 #endif 2005 } 2006 2007 /* 2008 This is a macro, the argument must be a single byte constant which 2009 can be evaluated by the inline assembler, since it is emitted as a 2010 byte into the assembly code. 2011 */ 2012 #if KMP_OS_WINDOWS 2013 #define _xabort(ARG) \ 2014 _asm _emit 0xc6 \ 2015 _asm _emit 0xf8 \ 2016 _asm _emit ARG 2017 #else 2018 #define _xabort(ARG) \ 2019 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 2020 #endif 2021 2022 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 2023 2024 // 2025 // Statistics is collected for testing purpose 2026 // 2027 #if KMP_DEBUG_ADAPTIVE_LOCKS 2028 2029 // We accumulate speculative lock statistics when the lock is destroyed. 2030 // We keep locks that haven't been destroyed in the liveLocks list 2031 // so that we can grab their statistics too. 2032 static kmp_adaptive_lock_statistics_t destroyedStats; 2033 2034 // To hold the list of live locks. 2035 static kmp_adaptive_lock_info_t liveLocks; 2036 2037 // A lock so we can safely update the list of locks. 2038 static kmp_bootstrap_lock_t chain_lock; 2039 2040 // Initialize the list of stats. 2041 void 2042 __kmp_init_speculative_stats() 2043 { 2044 kmp_adaptive_lock_info_t *lck = &liveLocks; 2045 2046 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 2047 lck->stats.next = lck; 2048 lck->stats.prev = lck; 2049 2050 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2051 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2052 2053 __kmp_init_bootstrap_lock( &chain_lock ); 2054 2055 } 2056 2057 // Insert the lock into the circular list 2058 static void 2059 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 2060 { 2061 __kmp_acquire_bootstrap_lock( &chain_lock ); 2062 2063 lck->stats.next = liveLocks.stats.next; 2064 lck->stats.prev = &liveLocks; 2065 2066 liveLocks.stats.next = lck; 2067 lck->stats.next->stats.prev = lck; 2068 2069 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2070 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2071 2072 __kmp_release_bootstrap_lock( &chain_lock ); 2073 } 2074 2075 static void 2076 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 2077 { 2078 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2079 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2080 2081 kmp_adaptive_lock_info_t * n = lck->stats.next; 2082 kmp_adaptive_lock_info_t * p = lck->stats.prev; 2083 2084 n->stats.prev = p; 2085 p->stats.next = n; 2086 } 2087 2088 static void 2089 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 2090 { 2091 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2092 __kmp_remember_lock( lck ); 2093 } 2094 2095 static void 2096 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2097 { 2098 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2099 2100 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2101 t->successfulSpeculations += s->successfulSpeculations; 2102 t->hardFailedSpeculations += s->hardFailedSpeculations; 2103 t->softFailedSpeculations += s->softFailedSpeculations; 2104 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2105 t->lemmingYields += s->lemmingYields; 2106 } 2107 2108 static void 2109 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2110 { 2111 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2112 2113 __kmp_acquire_bootstrap_lock( &chain_lock ); 2114 2115 __kmp_add_stats( &destroyedStats, lck ); 2116 __kmp_forget_lock( lck ); 2117 2118 __kmp_release_bootstrap_lock( &chain_lock ); 2119 } 2120 2121 static float 2122 percent (kmp_uint32 count, kmp_uint32 total) 2123 { 2124 return (total == 0) ? 0.0: (100.0 * count)/total; 2125 } 2126 2127 static 2128 FILE * __kmp_open_stats_file() 2129 { 2130 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2131 return stdout; 2132 2133 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 2134 char buffer[buffLen]; 2135 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 2136 (kmp_int32)getpid()); 2137 FILE * result = fopen(&buffer[0], "w"); 2138 2139 // Maybe we should issue a warning here... 2140 return result ? result : stdout; 2141 } 2142 2143 void 2144 __kmp_print_speculative_stats() 2145 { 2146 if (__kmp_user_lock_kind != lk_adaptive) 2147 return; 2148 2149 FILE * statsFile = __kmp_open_stats_file(); 2150 2151 kmp_adaptive_lock_statistics_t total = destroyedStats; 2152 kmp_adaptive_lock_info_t *lck; 2153 2154 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2155 __kmp_add_stats( &total, lck ); 2156 } 2157 kmp_adaptive_lock_statistics_t *t = &total; 2158 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2159 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2160 t->softFailedSpeculations; 2161 2162 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2163 fprintf ( statsFile, " Lock parameters: \n" 2164 " max_soft_retries : %10d\n" 2165 " max_badness : %10d\n", 2166 __kmp_adaptive_backoff_params.max_soft_retries, 2167 __kmp_adaptive_backoff_params.max_badness); 2168 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2169 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2170 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2171 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2172 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2173 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2174 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2175 2176 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2177 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2178 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2179 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2180 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2181 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2182 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2183 2184 if (statsFile != stdout) 2185 fclose( statsFile ); 2186 } 2187 2188 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2189 #else 2190 # define KMP_INC_STAT(lck,stat) 2191 2192 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2193 2194 static inline bool 2195 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2196 { 2197 // It is enough to check that the head_id is zero. 2198 // We don't also need to check the tail. 2199 bool res = lck->lk.head_id == 0; 2200 2201 // We need a fence here, since we must ensure that no memory operations 2202 // from later in this thread float above that read. 2203 #if KMP_COMPILER_ICC 2204 _mm_mfence(); 2205 #else 2206 __sync_synchronize(); 2207 #endif 2208 2209 return res; 2210 } 2211 2212 // Functions for manipulating the badness 2213 static __inline void 2214 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2215 { 2216 // Reset the badness to zero so we eagerly try to speculate again 2217 lck->lk.adaptive.badness = 0; 2218 KMP_INC_STAT(lck,successfulSpeculations); 2219 } 2220 2221 // Create a bit mask with one more set bit. 2222 static __inline void 2223 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2224 { 2225 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2226 if ( newBadness > lck->lk.adaptive.max_badness) { 2227 return; 2228 } else { 2229 lck->lk.adaptive.badness = newBadness; 2230 } 2231 } 2232 2233 // Check whether speculation should be attempted. 2234 static __inline int 2235 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2236 { 2237 kmp_uint32 badness = lck->lk.adaptive.badness; 2238 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2239 int res = (attempts & badness) == 0; 2240 return res; 2241 } 2242 2243 // Attempt to acquire only the speculative lock. 2244 // Does not back off to the non-speculative lock. 2245 // 2246 static int 2247 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2248 { 2249 int retries = lck->lk.adaptive.max_soft_retries; 2250 2251 // We don't explicitly count the start of speculation, rather we record 2252 // the results (success, hard fail, soft fail). The sum of all of those 2253 // is the total number of times we started speculation since all 2254 // speculations must end one of those ways. 2255 do 2256 { 2257 kmp_uint32 status = _xbegin(); 2258 // Switch this in to disable actual speculation but exercise 2259 // at least some of the rest of the code. Useful for debugging... 2260 // kmp_uint32 status = _XABORT_NESTED; 2261 2262 if (status == _XBEGIN_STARTED ) 2263 { /* We have successfully started speculation 2264 * Check that no-one acquired the lock for real between when we last looked 2265 * and now. This also gets the lock cache line into our read-set, 2266 * which we need so that we'll abort if anyone later claims it for real. 2267 */ 2268 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2269 { 2270 // Lock is now visibly acquired, so someone beat us to it. 2271 // Abort the transaction so we'll restart from _xbegin with the 2272 // failure status. 2273 _xabort(0x01); 2274 KMP_ASSERT2( 0, "should not get here" ); 2275 } 2276 return 1; // Lock has been acquired (speculatively) 2277 } else { 2278 // We have aborted, update the statistics 2279 if ( status & SOFT_ABORT_MASK) 2280 { 2281 KMP_INC_STAT(lck,softFailedSpeculations); 2282 // and loop round to retry. 2283 } 2284 else 2285 { 2286 KMP_INC_STAT(lck,hardFailedSpeculations); 2287 // Give up if we had a hard failure. 2288 break; 2289 } 2290 } 2291 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2292 2293 // Either we had a hard failure or we didn't succeed softly after 2294 // the full set of attempts, so back off the badness. 2295 __kmp_step_badness( lck ); 2296 return 0; 2297 } 2298 2299 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2300 // if the speculative lock cannot be acquired. 2301 // We can succeed speculatively, non-speculatively, or fail. 2302 static int 2303 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2304 { 2305 // First try to acquire the lock speculatively 2306 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2307 return 1; 2308 2309 // Speculative acquisition failed, so try to acquire it non-speculatively. 2310 // Count the non-speculative acquire attempt 2311 lck->lk.adaptive.acquire_attempts++; 2312 2313 // Use base, non-speculative lock. 2314 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2315 { 2316 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2317 return 1; // Lock is acquired (non-speculatively) 2318 } 2319 else 2320 { 2321 return 0; // Failed to acquire the lock, it's already visibly locked. 2322 } 2323 } 2324 2325 static int 2326 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2327 { 2328 char const * const func = "omp_test_lock"; 2329 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2330 KMP_FATAL( LockIsUninitialized, func ); 2331 } 2332 2333 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2334 2335 if ( retval ) { 2336 lck->lk.qlk.owner_id = gtid + 1; 2337 } 2338 return retval; 2339 } 2340 2341 // Block until we can acquire a speculative, adaptive lock. 2342 // We check whether we should be trying to speculate. 2343 // If we should be, we check the real lock to see if it is free, 2344 // and, if not, pause without attempting to acquire it until it is. 2345 // Then we try the speculative acquire. 2346 // This means that although we suffer from lemmings a little ( 2347 // because all we can't acquire the lock speculatively until 2348 // the queue of threads waiting has cleared), we don't get into a 2349 // state where we can never acquire the lock speculatively (because we 2350 // force the queue to clear by preventing new arrivals from entering the 2351 // queue). 2352 // This does mean that when we're trying to break lemmings, the lock 2353 // is no longer fair. However OpenMP makes no guarantee that its 2354 // locks are fair, so this isn't a real problem. 2355 static void 2356 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2357 { 2358 if ( __kmp_should_speculate( lck, gtid ) ) 2359 { 2360 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2361 { 2362 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2363 return; 2364 // We tried speculation and failed, so give up. 2365 } 2366 else 2367 { 2368 // We can't try speculation until the lock is free, so we 2369 // pause here (without suspending on the queueing lock, 2370 // to allow it to drain, then try again. 2371 // All other threads will also see the same result for 2372 // shouldSpeculate, so will be doing the same if they 2373 // try to claim the lock from now on. 2374 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2375 { 2376 KMP_INC_STAT(lck,lemmingYields); 2377 __kmp_yield (TRUE); 2378 } 2379 2380 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2381 return; 2382 } 2383 } 2384 2385 // Speculative acquisition failed, so acquire it non-speculatively. 2386 // Count the non-speculative acquire attempt 2387 lck->lk.adaptive.acquire_attempts++; 2388 2389 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2390 // We have acquired the base lock, so count that. 2391 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2392 ANNOTATE_QUEUING_ACQUIRED(lck); 2393 } 2394 2395 static void 2396 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2397 { 2398 char const * const func = "omp_set_lock"; 2399 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2400 KMP_FATAL( LockIsUninitialized, func ); 2401 } 2402 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2403 KMP_FATAL( LockIsAlreadyOwned, func ); 2404 } 2405 2406 __kmp_acquire_adaptive_lock( lck, gtid ); 2407 2408 lck->lk.qlk.owner_id = gtid + 1; 2409 } 2410 2411 static int 2412 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2413 { 2414 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2415 { // If the lock doesn't look claimed we must be speculating. 2416 // (Or the user's code is buggy and they're releasing without locking; 2417 // if we had XTEST we'd be able to check that case...) 2418 _xend(); // Exit speculation 2419 __kmp_update_badness_after_success( lck ); 2420 } 2421 else 2422 { // Since the lock *is* visibly locked we're not speculating, 2423 // so should use the underlying lock's release scheme. 2424 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2425 } 2426 return KMP_LOCK_RELEASED; 2427 } 2428 2429 static int 2430 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2431 { 2432 char const * const func = "omp_unset_lock"; 2433 KMP_MB(); /* in case another processor initialized lock */ 2434 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2435 KMP_FATAL( LockIsUninitialized, func ); 2436 } 2437 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2438 KMP_FATAL( LockUnsettingFree, func ); 2439 } 2440 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2441 KMP_FATAL( LockUnsettingSetByAnother, func ); 2442 } 2443 lck->lk.qlk.owner_id = 0; 2444 __kmp_release_adaptive_lock( lck, gtid ); 2445 return KMP_LOCK_RELEASED; 2446 } 2447 2448 static void 2449 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2450 { 2451 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2452 lck->lk.adaptive.badness = 0; 2453 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2454 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2455 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2456 #if KMP_DEBUG_ADAPTIVE_LOCKS 2457 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2458 #endif 2459 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2460 } 2461 2462 static void 2463 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2464 { 2465 __kmp_init_adaptive_lock( lck ); 2466 } 2467 2468 static void 2469 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2470 { 2471 #if KMP_DEBUG_ADAPTIVE_LOCKS 2472 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2473 #endif 2474 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2475 // Nothing needed for the speculative part. 2476 } 2477 2478 static void 2479 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2480 { 2481 char const * const func = "omp_destroy_lock"; 2482 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2483 KMP_FATAL( LockIsUninitialized, func ); 2484 } 2485 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2486 KMP_FATAL( LockStillOwned, func ); 2487 } 2488 __kmp_destroy_adaptive_lock( lck ); 2489 } 2490 2491 2492 #endif // KMP_USE_ADAPTIVE_LOCKS 2493 2494 2495 /* ------------------------------------------------------------------------ */ 2496 /* DRDPA ticket locks */ 2497 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2498 2499 static kmp_int32 2500 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2501 { 2502 return TCR_4( lck->lk.owner_id ) - 1; 2503 } 2504 2505 static inline bool 2506 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2507 { 2508 return lck->lk.depth_locked != -1; 2509 } 2510 2511 __forceinline static int 2512 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2513 { 2514 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2515 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2516 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2517 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2518 TCR_PTR(lck->lk.polls); // volatile load 2519 2520 #ifdef USE_LOCK_PROFILE 2521 if (TCR_8(polls[ticket & mask].poll) != ticket) 2522 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2523 /* else __kmp_printf( "." );*/ 2524 #endif /* USE_LOCK_PROFILE */ 2525 2526 // 2527 // Now spin-wait, but reload the polls pointer and mask, in case the 2528 // polling area has been reconfigured. Unless it is reconfigured, the 2529 // reloads stay in L1 cache and are cheap. 2530 // 2531 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2532 // 2533 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2534 // and poll to be re-read every spin iteration. 2535 // 2536 kmp_uint32 spins; 2537 2538 KMP_FSYNC_PREPARE(lck); 2539 KMP_INIT_YIELD(spins); 2540 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load 2541 // If we are oversubscribed, 2542 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2543 // CPU Pause is in the macros for yield. 2544 // 2545 KMP_YIELD(TCR_4(__kmp_nth) 2546 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2547 KMP_YIELD_SPIN(spins); 2548 2549 // Re-read the mask and the poll pointer from the lock structure. 2550 // 2551 // Make certain that "mask" is read before "polls" !!! 2552 // 2553 // If another thread picks reconfigures the polling area and updates 2554 // their values, and we get the new value of mask and the old polls 2555 // pointer, we could access memory beyond the end of the old polling 2556 // area. 2557 // 2558 mask = TCR_8(lck->lk.mask); // volatile load 2559 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2560 TCR_PTR(lck->lk.polls); // volatile load 2561 } 2562 2563 // 2564 // Critical section starts here 2565 // 2566 KMP_FSYNC_ACQUIRED(lck); 2567 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2568 ticket, lck)); 2569 lck->lk.now_serving = ticket; // non-volatile store 2570 2571 // 2572 // Deallocate a garbage polling area if we know that we are the last 2573 // thread that could possibly access it. 2574 // 2575 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2576 // ticket. 2577 // 2578 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2579 __kmp_free((void *)lck->lk.old_polls); 2580 lck->lk.old_polls = NULL; 2581 lck->lk.cleanup_ticket = 0; 2582 } 2583 2584 // 2585 // Check to see if we should reconfigure the polling area. 2586 // If there is still a garbage polling area to be deallocated from a 2587 // previous reconfiguration, let a later thread reconfigure it. 2588 // 2589 if (lck->lk.old_polls == NULL) { 2590 bool reconfigure = false; 2591 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2592 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2593 2594 if (TCR_4(__kmp_nth) 2595 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2596 // 2597 // We are in oversubscription mode. Contract the polling area 2598 // down to a single location, if that hasn't been done already. 2599 // 2600 if (num_polls > 1) { 2601 reconfigure = true; 2602 num_polls = TCR_4(lck->lk.num_polls); 2603 mask = 0; 2604 num_polls = 1; 2605 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2606 __kmp_allocate(num_polls * sizeof(*polls)); 2607 polls[0].poll = ticket; 2608 } 2609 } 2610 else { 2611 // 2612 // We are in under/fully subscribed mode. Check the number of 2613 // threads waiting on the lock. The size of the polling area 2614 // should be at least the number of threads waiting. 2615 // 2616 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2617 if (num_waiting > num_polls) { 2618 kmp_uint32 old_num_polls = num_polls; 2619 reconfigure = true; 2620 do { 2621 mask = (mask << 1) | 1; 2622 num_polls *= 2; 2623 } while (num_polls <= num_waiting); 2624 2625 // 2626 // Allocate the new polling area, and copy the relevant portion 2627 // of the old polling area to the new area. __kmp_allocate() 2628 // zeroes the memory it allocates, and most of the old area is 2629 // just zero padding, so we only copy the release counters. 2630 // 2631 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2632 __kmp_allocate(num_polls * sizeof(*polls)); 2633 kmp_uint32 i; 2634 for (i = 0; i < old_num_polls; i++) { 2635 polls[i].poll = old_polls[i].poll; 2636 } 2637 } 2638 } 2639 2640 if (reconfigure) { 2641 // 2642 // Now write the updated fields back to the lock structure. 2643 // 2644 // Make certain that "polls" is written before "mask" !!! 2645 // 2646 // If another thread picks up the new value of mask and the old 2647 // polls pointer , it could access memory beyond the end of the 2648 // old polling area. 2649 // 2650 // On x86, we need memory fences. 2651 // 2652 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2653 ticket, lck, num_polls)); 2654 2655 lck->lk.old_polls = old_polls; // non-volatile store 2656 lck->lk.polls = polls; // volatile store 2657 2658 KMP_MB(); 2659 2660 lck->lk.num_polls = num_polls; // non-volatile store 2661 lck->lk.mask = mask; // volatile store 2662 2663 KMP_MB(); 2664 2665 // 2666 // Only after the new polling area and mask have been flushed 2667 // to main memory can we update the cleanup ticket field. 2668 // 2669 // volatile load / non-volatile store 2670 // 2671 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2672 } 2673 } 2674 return KMP_LOCK_ACQUIRED_FIRST; 2675 } 2676 2677 int 2678 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2679 { 2680 int retval = __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2681 ANNOTATE_DRDPA_ACQUIRED(lck); 2682 return retval; 2683 } 2684 2685 static int 2686 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2687 { 2688 char const * const func = "omp_set_lock"; 2689 if ( lck->lk.initialized != lck ) { 2690 KMP_FATAL( LockIsUninitialized, func ); 2691 } 2692 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2693 KMP_FATAL( LockNestableUsedAsSimple, func ); 2694 } 2695 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2696 KMP_FATAL( LockIsAlreadyOwned, func ); 2697 } 2698 2699 __kmp_acquire_drdpa_lock( lck, gtid ); 2700 2701 lck->lk.owner_id = gtid + 1; 2702 return KMP_LOCK_ACQUIRED_FIRST; 2703 } 2704 2705 int 2706 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2707 { 2708 // 2709 // First get a ticket, then read the polls pointer and the mask. 2710 // The polls pointer must be read before the mask!!! (See above) 2711 // 2712 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2713 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2714 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2715 TCR_PTR(lck->lk.polls); // volatile load 2716 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2717 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2718 kmp_uint64 next_ticket = ticket + 1; 2719 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2720 ticket, next_ticket)) { 2721 KMP_FSYNC_ACQUIRED(lck); 2722 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2723 ticket, lck)); 2724 lck->lk.now_serving = ticket; // non-volatile store 2725 2726 // 2727 // Since no threads are waiting, there is no possibility that 2728 // we would want to reconfigure the polling area. We might 2729 // have the cleanup ticket value (which says that it is now 2730 // safe to deallocate old_polls), but we'll let a later thread 2731 // which calls __kmp_acquire_lock do that - this routine 2732 // isn't supposed to block, and we would risk blocks if we 2733 // called __kmp_free() to do the deallocation. 2734 // 2735 return TRUE; 2736 } 2737 } 2738 return FALSE; 2739 } 2740 2741 static int 2742 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2743 { 2744 char const * const func = "omp_test_lock"; 2745 if ( lck->lk.initialized != lck ) { 2746 KMP_FATAL( LockIsUninitialized, func ); 2747 } 2748 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2749 KMP_FATAL( LockNestableUsedAsSimple, func ); 2750 } 2751 2752 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2753 2754 if ( retval ) { 2755 lck->lk.owner_id = gtid + 1; 2756 } 2757 return retval; 2758 } 2759 2760 int 2761 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2762 { 2763 // 2764 // Read the ticket value from the lock data struct, then the polls 2765 // pointer and the mask. The polls pointer must be read before the 2766 // mask!!! (See above) 2767 // 2768 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2769 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2770 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2771 TCR_PTR(lck->lk.polls); // volatile load 2772 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2773 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2774 ticket - 1, lck)); 2775 KMP_FSYNC_RELEASING(lck); 2776 ANNOTATE_DRDPA_RELEASED(lck); 2777 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2778 return KMP_LOCK_RELEASED; 2779 } 2780 2781 static int 2782 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2783 { 2784 char const * const func = "omp_unset_lock"; 2785 KMP_MB(); /* in case another processor initialized lock */ 2786 if ( lck->lk.initialized != lck ) { 2787 KMP_FATAL( LockIsUninitialized, func ); 2788 } 2789 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2790 KMP_FATAL( LockNestableUsedAsSimple, func ); 2791 } 2792 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2793 KMP_FATAL( LockUnsettingFree, func ); 2794 } 2795 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2796 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2797 KMP_FATAL( LockUnsettingSetByAnother, func ); 2798 } 2799 lck->lk.owner_id = 0; 2800 return __kmp_release_drdpa_lock( lck, gtid ); 2801 } 2802 2803 void 2804 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2805 { 2806 lck->lk.location = NULL; 2807 lck->lk.mask = 0; 2808 lck->lk.num_polls = 1; 2809 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2810 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2811 lck->lk.cleanup_ticket = 0; 2812 lck->lk.old_polls = NULL; 2813 lck->lk.next_ticket = 0; 2814 lck->lk.now_serving = 0; 2815 lck->lk.owner_id = 0; // no thread owns the lock. 2816 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2817 lck->lk.initialized = lck; 2818 2819 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2820 } 2821 2822 static void 2823 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2824 { 2825 __kmp_init_drdpa_lock( lck ); 2826 } 2827 2828 void 2829 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2830 { 2831 lck->lk.initialized = NULL; 2832 lck->lk.location = NULL; 2833 if (lck->lk.polls != NULL) { 2834 __kmp_free((void *)lck->lk.polls); 2835 lck->lk.polls = NULL; 2836 } 2837 if (lck->lk.old_polls != NULL) { 2838 __kmp_free((void *)lck->lk.old_polls); 2839 lck->lk.old_polls = NULL; 2840 } 2841 lck->lk.mask = 0; 2842 lck->lk.num_polls = 0; 2843 lck->lk.cleanup_ticket = 0; 2844 lck->lk.next_ticket = 0; 2845 lck->lk.now_serving = 0; 2846 lck->lk.owner_id = 0; 2847 lck->lk.depth_locked = -1; 2848 } 2849 2850 static void 2851 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2852 { 2853 char const * const func = "omp_destroy_lock"; 2854 if ( lck->lk.initialized != lck ) { 2855 KMP_FATAL( LockIsUninitialized, func ); 2856 } 2857 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2858 KMP_FATAL( LockNestableUsedAsSimple, func ); 2859 } 2860 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2861 KMP_FATAL( LockStillOwned, func ); 2862 } 2863 __kmp_destroy_drdpa_lock( lck ); 2864 } 2865 2866 2867 // 2868 // nested drdpa ticket locks 2869 // 2870 2871 int 2872 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2873 { 2874 KMP_DEBUG_ASSERT( gtid >= 0 ); 2875 2876 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2877 lck->lk.depth_locked += 1; 2878 return KMP_LOCK_ACQUIRED_NEXT; 2879 } 2880 else { 2881 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2882 ANNOTATE_DRDPA_ACQUIRED(lck); 2883 KMP_MB(); 2884 lck->lk.depth_locked = 1; 2885 KMP_MB(); 2886 lck->lk.owner_id = gtid + 1; 2887 return KMP_LOCK_ACQUIRED_FIRST; 2888 } 2889 } 2890 2891 static void 2892 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2893 { 2894 char const * const func = "omp_set_nest_lock"; 2895 if ( lck->lk.initialized != lck ) { 2896 KMP_FATAL( LockIsUninitialized, func ); 2897 } 2898 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2899 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2900 } 2901 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2902 } 2903 2904 int 2905 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2906 { 2907 int retval; 2908 2909 KMP_DEBUG_ASSERT( gtid >= 0 ); 2910 2911 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2912 retval = ++lck->lk.depth_locked; 2913 } 2914 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2915 retval = 0; 2916 } 2917 else { 2918 KMP_MB(); 2919 retval = lck->lk.depth_locked = 1; 2920 KMP_MB(); 2921 lck->lk.owner_id = gtid + 1; 2922 } 2923 return retval; 2924 } 2925 2926 static int 2927 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2928 { 2929 char const * const func = "omp_test_nest_lock"; 2930 if ( lck->lk.initialized != lck ) { 2931 KMP_FATAL( LockIsUninitialized, func ); 2932 } 2933 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2934 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2935 } 2936 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2937 } 2938 2939 int 2940 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2941 { 2942 KMP_DEBUG_ASSERT( gtid >= 0 ); 2943 2944 KMP_MB(); 2945 if ( --(lck->lk.depth_locked) == 0 ) { 2946 KMP_MB(); 2947 lck->lk.owner_id = 0; 2948 __kmp_release_drdpa_lock( lck, gtid ); 2949 return KMP_LOCK_RELEASED; 2950 } 2951 return KMP_LOCK_STILL_HELD; 2952 } 2953 2954 static int 2955 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2956 { 2957 char const * const func = "omp_unset_nest_lock"; 2958 KMP_MB(); /* in case another processor initialized lock */ 2959 if ( lck->lk.initialized != lck ) { 2960 KMP_FATAL( LockIsUninitialized, func ); 2961 } 2962 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2963 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2964 } 2965 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2966 KMP_FATAL( LockUnsettingFree, func ); 2967 } 2968 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2969 KMP_FATAL( LockUnsettingSetByAnother, func ); 2970 } 2971 return __kmp_release_nested_drdpa_lock( lck, gtid ); 2972 } 2973 2974 void 2975 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2976 { 2977 __kmp_init_drdpa_lock( lck ); 2978 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2979 } 2980 2981 static void 2982 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2983 { 2984 __kmp_init_nested_drdpa_lock( lck ); 2985 } 2986 2987 void 2988 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2989 { 2990 __kmp_destroy_drdpa_lock( lck ); 2991 lck->lk.depth_locked = 0; 2992 } 2993 2994 static void 2995 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2996 { 2997 char const * const func = "omp_destroy_nest_lock"; 2998 if ( lck->lk.initialized != lck ) { 2999 KMP_FATAL( LockIsUninitialized, func ); 3000 } 3001 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 3002 KMP_FATAL( LockSimpleUsedAsNestable, func ); 3003 } 3004 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 3005 KMP_FATAL( LockStillOwned, func ); 3006 } 3007 __kmp_destroy_nested_drdpa_lock( lck ); 3008 } 3009 3010 3011 // 3012 // access functions to fields which don't exist for all lock kinds. 3013 // 3014 3015 static int 3016 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 3017 { 3018 return lck == lck->lk.initialized; 3019 } 3020 3021 static const ident_t * 3022 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 3023 { 3024 return lck->lk.location; 3025 } 3026 3027 static void 3028 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 3029 { 3030 lck->lk.location = loc; 3031 } 3032 3033 static kmp_lock_flags_t 3034 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 3035 { 3036 return lck->lk.flags; 3037 } 3038 3039 static void 3040 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 3041 { 3042 lck->lk.flags = flags; 3043 } 3044 3045 // Time stamp counter 3046 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3047 # define __kmp_tsc() __kmp_hardware_timestamp() 3048 // Runtime's default backoff parameters 3049 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 }; 3050 #else 3051 // Use nanoseconds for other platforms 3052 extern kmp_uint64 __kmp_now_nsec(); 3053 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 }; 3054 # define __kmp_tsc() __kmp_now_nsec() 3055 #endif 3056 3057 // A useful predicate for dealing with timestamps that may wrap. 3058 // Is a before b? 3059 // Since the timestamps may wrap, this is asking whether it's 3060 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 3061 // Times where going clockwise is less distance than going anti-clockwise 3062 // are in the future, others are in the past. 3063 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b 3064 // whereas signed(a) = -2, signed(b) = 0 captures the actual difference 3065 static inline bool before(kmp_uint64 a, kmp_uint64 b) 3066 { 3067 return ((kmp_int64)b - (kmp_int64)a) > 0; 3068 } 3069 3070 // Truncated binary exponential backoff function 3071 void 3072 __kmp_spin_backoff(kmp_backoff_t *boff) 3073 { 3074 // We could flatten this loop, but making it a nested loop gives better result. 3075 kmp_uint32 i; 3076 for (i = boff->step; i > 0; i--) { 3077 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 3078 do { 3079 KMP_CPU_PAUSE(); 3080 } while (before(__kmp_tsc(), goal)); 3081 } 3082 boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1); 3083 } 3084 3085 #if KMP_USE_DYNAMIC_LOCK 3086 3087 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 3088 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) 3089 { 3090 TCW_4(*lck, KMP_GET_D_TAG(seq)); 3091 KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 3092 } 3093 3094 #if KMP_USE_TSX 3095 3096 // HLE lock functions - imported from the testbed runtime. 3097 #define HLE_ACQUIRE ".byte 0xf2;" 3098 #define HLE_RELEASE ".byte 0xf3;" 3099 3100 static inline kmp_uint32 3101 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 3102 { 3103 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 3104 : "+r"(v), "+m"(*p) 3105 : 3106 : "memory"); 3107 return v; 3108 } 3109 3110 static void 3111 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 3112 { 3113 TCW_4(*lck, 0); 3114 } 3115 3116 static void 3117 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3118 { 3119 // Use gtid for KMP_LOCK_BUSY if necessary 3120 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 3121 int delay = 1; 3122 do { 3123 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 3124 for (int i = delay; i != 0; --i) 3125 KMP_CPU_PAUSE(); 3126 delay = ((delay << 1) | 1) & 7; 3127 } 3128 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 3129 } 3130 } 3131 3132 static void 3133 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3134 { 3135 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3136 } 3137 3138 static int 3139 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3140 { 3141 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3142 : "=m"(*lck) 3143 : "r"(KMP_LOCK_FREE(hle)) 3144 : "memory"); 3145 return KMP_LOCK_RELEASED; 3146 } 3147 3148 static int 3149 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3150 { 3151 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3152 } 3153 3154 static int 3155 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3156 { 3157 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 3158 } 3159 3160 static int 3161 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3162 { 3163 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3164 } 3165 3166 static void 3167 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) 3168 { 3169 __kmp_init_queuing_lock(lck); 3170 } 3171 3172 static void 3173 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) 3174 { 3175 __kmp_destroy_queuing_lock(lck); 3176 } 3177 3178 static void 3179 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3180 { 3181 unsigned retries=3, status; 3182 do { 3183 status = _xbegin(); 3184 if (status == _XBEGIN_STARTED) { 3185 if (__kmp_is_unlocked_queuing_lock(lck)) 3186 return; 3187 _xabort(0xff); 3188 } 3189 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 3190 // Wait until lock becomes free 3191 while (! __kmp_is_unlocked_queuing_lock(lck)) 3192 __kmp_yield(TRUE); 3193 } 3194 else if (!(status & _XABORT_RETRY)) 3195 break; 3196 } while (retries--); 3197 3198 // Fall-back non-speculative lock (xchg) 3199 __kmp_acquire_queuing_lock(lck, gtid); 3200 } 3201 3202 static void 3203 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3204 { 3205 __kmp_acquire_rtm_lock(lck, gtid); 3206 } 3207 3208 static int 3209 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3210 { 3211 if (__kmp_is_unlocked_queuing_lock(lck)) { 3212 // Releasing from speculation 3213 _xend(); 3214 } 3215 else { 3216 // Releasing from a real lock 3217 __kmp_release_queuing_lock(lck, gtid); 3218 } 3219 return KMP_LOCK_RELEASED; 3220 } 3221 3222 static int 3223 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3224 { 3225 return __kmp_release_rtm_lock(lck, gtid); 3226 } 3227 3228 static int 3229 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3230 { 3231 unsigned retries=3, status; 3232 do { 3233 status = _xbegin(); 3234 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 3235 return 1; 3236 } 3237 if (!(status & _XABORT_RETRY)) 3238 break; 3239 } while (retries--); 3240 3241 return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; 3242 } 3243 3244 static int 3245 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3246 { 3247 return __kmp_test_rtm_lock(lck, gtid); 3248 } 3249 3250 #endif // KMP_USE_TSX 3251 3252 // Entry functions for indirect locks (first element of direct lock jump tables). 3253 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3254 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3255 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3256 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3257 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3258 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3259 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3260 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3261 3262 // 3263 // Jump tables for the indirect lock functions. 3264 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3265 // 3266 3267 // init functions 3268 #define expand(l, op) 0,__kmp_init_direct_lock, 3269 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3270 = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; 3271 #undef expand 3272 3273 // destroy functions 3274 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 3275 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) 3276 = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; 3277 #undef expand 3278 3279 // set/acquire functions 3280 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3281 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) 3282 = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 3283 #undef expand 3284 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3285 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) 3286 = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 3287 #undef expand 3288 3289 // unset/release and test functions 3290 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3291 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) 3292 = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; 3293 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) 3294 = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; 3295 #undef expand 3296 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3297 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) 3298 = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; 3299 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) 3300 = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; 3301 #undef expand 3302 3303 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3304 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 3305 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 3306 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 3307 3308 // 3309 // Jump tables for the indirect lock functions. 3310 // 3311 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3312 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; 3313 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; 3314 #undef expand 3315 3316 // set/acquire functions 3317 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3318 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 3319 #undef expand 3320 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3321 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 3322 #undef expand 3323 3324 // unset/release and test functions 3325 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3326 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 3327 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 3328 #undef expand 3329 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3330 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 3331 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 3332 #undef expand 3333 3334 // Exposes only one jump tables (*lock or *lock_with_checks). 3335 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3336 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3337 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3338 3339 // Lock index table. 3340 kmp_indirect_lock_table_t __kmp_i_lock_table; 3341 3342 // Size of indirect locks. 3343 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; 3344 3345 // Jump tables for lock accessor/modifier. 3346 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3347 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3348 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3349 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3350 3351 // Use different lock pools for different lock types. 3352 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; 3353 3354 // User lock allocator for dynamically dispatched indirect locks. 3355 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock 3356 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock 3357 // object is returned to the reusable pool of locks, unique to each lock type. 3358 kmp_indirect_lock_t * 3359 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3360 { 3361 kmp_indirect_lock_t *lck; 3362 kmp_lock_index_t idx; 3363 3364 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3365 3366 if (__kmp_indirect_lock_pool[tag] != NULL) { 3367 // Reuse the allocated and destroyed lock object 3368 lck = __kmp_indirect_lock_pool[tag]; 3369 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3370 idx = lck->lock->pool.index; 3371 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3372 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); 3373 } else { 3374 idx = __kmp_i_lock_table.next; 3375 // Check capacity and double the size if it is full 3376 if (idx == __kmp_i_lock_table.size) { 3377 // Double up the space for block pointers 3378 int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; 3379 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3380 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); 3381 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); 3382 __kmp_free(old_table); 3383 // Allocate new objects in the new blocks 3384 for (int i = row; i < 2*row; ++i) 3385 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) 3386 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 3387 __kmp_i_lock_table.size = 2*idx; 3388 } 3389 __kmp_i_lock_table.next++; 3390 lck = KMP_GET_I_LOCK(idx); 3391 // Allocate a new base lock object 3392 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3393 KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3394 } 3395 3396 __kmp_release_lock(&__kmp_global_lock, gtid); 3397 3398 lck->type = tag; 3399 3400 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3401 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3402 } else { 3403 *((kmp_indirect_lock_t **)user_lock) = lck; 3404 } 3405 3406 return lck; 3407 } 3408 3409 // User lock lookup for dynamically dispatched locks. 3410 static __forceinline 3411 kmp_indirect_lock_t * 3412 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3413 { 3414 if (__kmp_env_consistency_check) { 3415 kmp_indirect_lock_t *lck = NULL; 3416 if (user_lock == NULL) { 3417 KMP_FATAL(LockIsUninitialized, func); 3418 } 3419 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3420 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3421 if (idx >= __kmp_i_lock_table.size) { 3422 KMP_FATAL(LockIsUninitialized, func); 3423 } 3424 lck = KMP_GET_I_LOCK(idx); 3425 } else { 3426 lck = *((kmp_indirect_lock_t **)user_lock); 3427 } 3428 if (lck == NULL) { 3429 KMP_FATAL(LockIsUninitialized, func); 3430 } 3431 return lck; 3432 } else { 3433 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3434 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3435 } else { 3436 return *((kmp_indirect_lock_t **)user_lock); 3437 } 3438 } 3439 } 3440 3441 static void 3442 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3443 { 3444 #if KMP_USE_ADAPTIVE_LOCKS 3445 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3446 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3447 seq = lockseq_queuing; 3448 } 3449 #endif 3450 #if KMP_USE_TSX 3451 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3452 seq = lockseq_queuing; 3453 } 3454 #endif 3455 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3456 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3457 KMP_I_LOCK_FUNC(l, init)(l->lock); 3458 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); 3459 } 3460 3461 static void 3462 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3463 { 3464 kmp_uint32 gtid = __kmp_entry_gtid(); 3465 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3466 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3467 kmp_indirect_locktag_t tag = l->type; 3468 3469 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3470 3471 // Use the base lock's space to keep the pool chain. 3472 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3473 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3474 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3475 } 3476 __kmp_indirect_lock_pool[tag] = l; 3477 3478 __kmp_release_lock(&__kmp_global_lock, gtid); 3479 } 3480 3481 static void 3482 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3483 { 3484 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3485 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3486 } 3487 3488 static int 3489 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3490 { 3491 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3492 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3493 } 3494 3495 static int 3496 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3497 { 3498 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3499 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3500 } 3501 3502 static void 3503 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3504 { 3505 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3506 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3507 } 3508 3509 static int 3510 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3511 { 3512 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3513 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3514 } 3515 3516 static int 3517 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3518 { 3519 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3520 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3521 } 3522 3523 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3524 3525 // This is used only in kmp_error.cpp when consistency checking is on. 3526 kmp_int32 3527 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3528 { 3529 switch (seq) { 3530 case lockseq_tas: 3531 case lockseq_nested_tas: 3532 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3533 #if KMP_USE_FUTEX 3534 case lockseq_futex: 3535 case lockseq_nested_futex: 3536 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3537 #endif 3538 case lockseq_ticket: 3539 case lockseq_nested_ticket: 3540 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3541 case lockseq_queuing: 3542 case lockseq_nested_queuing: 3543 #if KMP_USE_ADAPTIVE_LOCKS 3544 case lockseq_adaptive: 3545 #endif 3546 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3547 case lockseq_drdpa: 3548 case lockseq_nested_drdpa: 3549 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3550 default: 3551 return 0; 3552 } 3553 } 3554 3555 // Initializes data for dynamic user locks. 3556 void 3557 __kmp_init_dynamic_user_locks() 3558 { 3559 // Initialize jump table for the lock functions 3560 if (__kmp_env_consistency_check) { 3561 __kmp_direct_set = direct_set_check; 3562 __kmp_direct_unset = direct_unset_check; 3563 __kmp_direct_test = direct_test_check; 3564 __kmp_indirect_set = indirect_set_check; 3565 __kmp_indirect_unset = indirect_unset_check; 3566 __kmp_indirect_test = indirect_test_check; 3567 } 3568 else { 3569 __kmp_direct_set = direct_set; 3570 __kmp_direct_unset = direct_unset; 3571 __kmp_direct_test = direct_test; 3572 __kmp_indirect_set = indirect_set; 3573 __kmp_indirect_unset = indirect_unset; 3574 __kmp_indirect_test = indirect_test; 3575 } 3576 // If the user locks have already been initialized, then return. 3577 // Allow the switch between different KMP_CONSISTENCY_CHECK values, 3578 // but do not allocate new lock tables if they have already been 3579 // allocated. 3580 if (__kmp_init_user_locks) 3581 return; 3582 3583 // Initialize lock index table 3584 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3585 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3586 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) 3587 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 3588 __kmp_i_lock_table.next = 0; 3589 3590 // Indirect lock size 3591 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3592 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3593 #if KMP_USE_ADAPTIVE_LOCKS 3594 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3595 #endif 3596 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3597 #if KMP_USE_TSX 3598 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3599 #endif 3600 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3601 #if KMP_USE_FUTEX 3602 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3603 #endif 3604 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3605 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3606 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3607 3608 // Initialize lock accessor/modifier 3609 #define fill_jumps(table, expand, sep) { \ 3610 table[locktag##sep##ticket] = expand(ticket); \ 3611 table[locktag##sep##queuing] = expand(queuing); \ 3612 table[locktag##sep##drdpa] = expand(drdpa); \ 3613 } 3614 3615 #if KMP_USE_ADAPTIVE_LOCKS 3616 # define fill_table(table, expand) { \ 3617 fill_jumps(table, expand, _); \ 3618 table[locktag_adaptive] = expand(queuing); \ 3619 fill_jumps(table, expand, _nested_); \ 3620 } 3621 #else 3622 # define fill_table(table, expand) { \ 3623 fill_jumps(table, expand, _); \ 3624 fill_jumps(table, expand, _nested_); \ 3625 } 3626 #endif // KMP_USE_ADAPTIVE_LOCKS 3627 3628 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3629 fill_table(__kmp_indirect_set_location, expand); 3630 #undef expand 3631 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3632 fill_table(__kmp_indirect_set_flags, expand); 3633 #undef expand 3634 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3635 fill_table(__kmp_indirect_get_location, expand); 3636 #undef expand 3637 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3638 fill_table(__kmp_indirect_get_flags, expand); 3639 #undef expand 3640 3641 __kmp_init_user_locks = TRUE; 3642 } 3643 3644 // Clean up the lock table. 3645 void 3646 __kmp_cleanup_indirect_user_locks() 3647 { 3648 kmp_lock_index_t i; 3649 int k; 3650 3651 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3652 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3653 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3654 while (l != NULL) { 3655 kmp_indirect_lock_t *ll = l; 3656 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3657 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); 3658 __kmp_free(ll->lock); 3659 ll->lock = NULL; 3660 } 3661 __kmp_indirect_lock_pool[k] = NULL; 3662 } 3663 // Clean up the remaining undestroyed locks. 3664 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3665 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3666 if (l->lock != NULL) { 3667 // Locks not destroyed explicitly need to be destroyed here. 3668 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3669 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); 3670 __kmp_free(l->lock); 3671 } 3672 } 3673 // Free the table 3674 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3675 __kmp_free(__kmp_i_lock_table.table[i]); 3676 __kmp_free(__kmp_i_lock_table.table); 3677 3678 __kmp_init_user_locks = FALSE; 3679 } 3680 3681 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3682 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3683 3684 #else // KMP_USE_DYNAMIC_LOCK 3685 3686 /* ------------------------------------------------------------------------ */ 3687 /* user locks 3688 * 3689 * They are implemented as a table of function pointers which are set to the 3690 * lock functions of the appropriate kind, once that has been determined. 3691 */ 3692 3693 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3694 3695 size_t __kmp_base_user_lock_size = 0; 3696 size_t __kmp_user_lock_size = 0; 3697 3698 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3699 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3700 3701 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3702 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3703 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3704 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3705 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3706 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3707 3708 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3709 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3710 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3711 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3712 3713 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3714 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3715 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3716 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3717 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3718 3719 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3720 { 3721 switch ( user_lock_kind ) { 3722 case lk_default: 3723 default: 3724 KMP_ASSERT( 0 ); 3725 3726 case lk_tas: { 3727 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3728 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3729 3730 __kmp_get_user_lock_owner_ = 3731 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3732 ( &__kmp_get_tas_lock_owner ); 3733 3734 if ( __kmp_env_consistency_check ) { 3735 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3736 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3737 } 3738 else { 3739 KMP_BIND_USER_LOCK(tas); 3740 KMP_BIND_NESTED_USER_LOCK(tas); 3741 } 3742 3743 __kmp_destroy_user_lock_ = 3744 ( void ( * )( kmp_user_lock_p ) ) 3745 ( &__kmp_destroy_tas_lock ); 3746 3747 __kmp_is_user_lock_initialized_ = 3748 ( int ( * )( kmp_user_lock_p ) ) NULL; 3749 3750 __kmp_get_user_lock_location_ = 3751 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3752 3753 __kmp_set_user_lock_location_ = 3754 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3755 3756 __kmp_get_user_lock_flags_ = 3757 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3758 3759 __kmp_set_user_lock_flags_ = 3760 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3761 } 3762 break; 3763 3764 #if KMP_USE_FUTEX 3765 3766 case lk_futex: { 3767 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3768 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3769 3770 __kmp_get_user_lock_owner_ = 3771 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3772 ( &__kmp_get_futex_lock_owner ); 3773 3774 if ( __kmp_env_consistency_check ) { 3775 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3776 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3777 } 3778 else { 3779 KMP_BIND_USER_LOCK(futex); 3780 KMP_BIND_NESTED_USER_LOCK(futex); 3781 } 3782 3783 __kmp_destroy_user_lock_ = 3784 ( void ( * )( kmp_user_lock_p ) ) 3785 ( &__kmp_destroy_futex_lock ); 3786 3787 __kmp_is_user_lock_initialized_ = 3788 ( int ( * )( kmp_user_lock_p ) ) NULL; 3789 3790 __kmp_get_user_lock_location_ = 3791 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3792 3793 __kmp_set_user_lock_location_ = 3794 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3795 3796 __kmp_get_user_lock_flags_ = 3797 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3798 3799 __kmp_set_user_lock_flags_ = 3800 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3801 } 3802 break; 3803 3804 #endif // KMP_USE_FUTEX 3805 3806 case lk_ticket: { 3807 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3808 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3809 3810 __kmp_get_user_lock_owner_ = 3811 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3812 ( &__kmp_get_ticket_lock_owner ); 3813 3814 if ( __kmp_env_consistency_check ) { 3815 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3816 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3817 } 3818 else { 3819 KMP_BIND_USER_LOCK(ticket); 3820 KMP_BIND_NESTED_USER_LOCK(ticket); 3821 } 3822 3823 __kmp_destroy_user_lock_ = 3824 ( void ( * )( kmp_user_lock_p ) ) 3825 ( &__kmp_destroy_ticket_lock ); 3826 3827 __kmp_is_user_lock_initialized_ = 3828 ( int ( * )( kmp_user_lock_p ) ) 3829 ( &__kmp_is_ticket_lock_initialized ); 3830 3831 __kmp_get_user_lock_location_ = 3832 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3833 ( &__kmp_get_ticket_lock_location ); 3834 3835 __kmp_set_user_lock_location_ = 3836 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3837 ( &__kmp_set_ticket_lock_location ); 3838 3839 __kmp_get_user_lock_flags_ = 3840 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3841 ( &__kmp_get_ticket_lock_flags ); 3842 3843 __kmp_set_user_lock_flags_ = 3844 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3845 ( &__kmp_set_ticket_lock_flags ); 3846 } 3847 break; 3848 3849 case lk_queuing: { 3850 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3851 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3852 3853 __kmp_get_user_lock_owner_ = 3854 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3855 ( &__kmp_get_queuing_lock_owner ); 3856 3857 if ( __kmp_env_consistency_check ) { 3858 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3859 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3860 } 3861 else { 3862 KMP_BIND_USER_LOCK(queuing); 3863 KMP_BIND_NESTED_USER_LOCK(queuing); 3864 } 3865 3866 __kmp_destroy_user_lock_ = 3867 ( void ( * )( kmp_user_lock_p ) ) 3868 ( &__kmp_destroy_queuing_lock ); 3869 3870 __kmp_is_user_lock_initialized_ = 3871 ( int ( * )( kmp_user_lock_p ) ) 3872 ( &__kmp_is_queuing_lock_initialized ); 3873 3874 __kmp_get_user_lock_location_ = 3875 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3876 ( &__kmp_get_queuing_lock_location ); 3877 3878 __kmp_set_user_lock_location_ = 3879 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3880 ( &__kmp_set_queuing_lock_location ); 3881 3882 __kmp_get_user_lock_flags_ = 3883 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3884 ( &__kmp_get_queuing_lock_flags ); 3885 3886 __kmp_set_user_lock_flags_ = 3887 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3888 ( &__kmp_set_queuing_lock_flags ); 3889 } 3890 break; 3891 3892 #if KMP_USE_ADAPTIVE_LOCKS 3893 case lk_adaptive: { 3894 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3895 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3896 3897 __kmp_get_user_lock_owner_ = 3898 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3899 ( &__kmp_get_queuing_lock_owner ); 3900 3901 if ( __kmp_env_consistency_check ) { 3902 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3903 } 3904 else { 3905 KMP_BIND_USER_LOCK(adaptive); 3906 } 3907 3908 __kmp_destroy_user_lock_ = 3909 ( void ( * )( kmp_user_lock_p ) ) 3910 ( &__kmp_destroy_adaptive_lock ); 3911 3912 __kmp_is_user_lock_initialized_ = 3913 ( int ( * )( kmp_user_lock_p ) ) 3914 ( &__kmp_is_queuing_lock_initialized ); 3915 3916 __kmp_get_user_lock_location_ = 3917 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3918 ( &__kmp_get_queuing_lock_location ); 3919 3920 __kmp_set_user_lock_location_ = 3921 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3922 ( &__kmp_set_queuing_lock_location ); 3923 3924 __kmp_get_user_lock_flags_ = 3925 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3926 ( &__kmp_get_queuing_lock_flags ); 3927 3928 __kmp_set_user_lock_flags_ = 3929 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3930 ( &__kmp_set_queuing_lock_flags ); 3931 3932 } 3933 break; 3934 #endif // KMP_USE_ADAPTIVE_LOCKS 3935 3936 case lk_drdpa: { 3937 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3938 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3939 3940 __kmp_get_user_lock_owner_ = 3941 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3942 ( &__kmp_get_drdpa_lock_owner ); 3943 3944 if ( __kmp_env_consistency_check ) { 3945 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3946 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3947 } 3948 else { 3949 KMP_BIND_USER_LOCK(drdpa); 3950 KMP_BIND_NESTED_USER_LOCK(drdpa); 3951 } 3952 3953 __kmp_destroy_user_lock_ = 3954 ( void ( * )( kmp_user_lock_p ) ) 3955 ( &__kmp_destroy_drdpa_lock ); 3956 3957 __kmp_is_user_lock_initialized_ = 3958 ( int ( * )( kmp_user_lock_p ) ) 3959 ( &__kmp_is_drdpa_lock_initialized ); 3960 3961 __kmp_get_user_lock_location_ = 3962 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3963 ( &__kmp_get_drdpa_lock_location ); 3964 3965 __kmp_set_user_lock_location_ = 3966 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3967 ( &__kmp_set_drdpa_lock_location ); 3968 3969 __kmp_get_user_lock_flags_ = 3970 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3971 ( &__kmp_get_drdpa_lock_flags ); 3972 3973 __kmp_set_user_lock_flags_ = 3974 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3975 ( &__kmp_set_drdpa_lock_flags ); 3976 } 3977 break; 3978 } 3979 } 3980 3981 3982 // ---------------------------------------------------------------------------- 3983 // User lock table & lock allocation 3984 3985 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3986 kmp_user_lock_p __kmp_lock_pool = NULL; 3987 3988 // Lock block-allocation support. 3989 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3990 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3991 3992 static kmp_lock_index_t 3993 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3994 { 3995 // Assume that kmp_global_lock is held upon entry/exit. 3996 kmp_lock_index_t index; 3997 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3998 kmp_lock_index_t size; 3999 kmp_user_lock_p *table; 4000 // Reallocate lock table. 4001 if ( __kmp_user_lock_table.allocated == 0 ) { 4002 size = 1024; 4003 } 4004 else { 4005 size = __kmp_user_lock_table.allocated * 2; 4006 } 4007 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 4008 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 4009 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 4010 // We cannot free the previous table now, since it may be in use by other 4011 // threads. So save the pointer to the previous table in in the first element of the 4012 // new table. All the tables will be organized into a list, and could be freed when 4013 // library shutting down. 4014 __kmp_user_lock_table.table = table; 4015 __kmp_user_lock_table.allocated = size; 4016 } 4017 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 4018 index = __kmp_user_lock_table.used; 4019 __kmp_user_lock_table.table[ index ] = lck; 4020 ++ __kmp_user_lock_table.used; 4021 return index; 4022 } 4023 4024 static kmp_user_lock_p 4025 __kmp_lock_block_allocate() 4026 { 4027 // Assume that kmp_global_lock is held upon entry/exit. 4028 static int last_index = 0; 4029 if ( ( last_index >= __kmp_num_locks_in_block ) 4030 || ( __kmp_lock_blocks == NULL ) ) { 4031 // Restart the index. 4032 last_index = 0; 4033 // Need to allocate a new block. 4034 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4035 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 4036 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 4037 // Set up the new block. 4038 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 4039 new_block->next_block = __kmp_lock_blocks; 4040 new_block->locks = (void *)buffer; 4041 // Publish the new block. 4042 KMP_MB(); 4043 __kmp_lock_blocks = new_block; 4044 } 4045 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 4046 [ last_index * __kmp_user_lock_size ] ) ); 4047 last_index++; 4048 return ret; 4049 } 4050 4051 // 4052 // Get memory for a lock. It may be freshly allocated memory or reused memory 4053 // from lock pool. 4054 // 4055 kmp_user_lock_p 4056 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 4057 kmp_lock_flags_t flags ) 4058 { 4059 kmp_user_lock_p lck; 4060 kmp_lock_index_t index; 4061 KMP_DEBUG_ASSERT( user_lock ); 4062 4063 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 4064 4065 if ( __kmp_lock_pool == NULL ) { 4066 // Lock pool is empty. Allocate new memory. 4067 4068 // ANNOTATION: Found no good way to express the syncronisation 4069 // between allocation and usage, so ignore the allocation 4070 ANNOTATE_IGNORE_WRITES_BEGIN(); 4071 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 4072 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 4073 } 4074 else { 4075 lck = __kmp_lock_block_allocate(); 4076 } 4077 ANNOTATE_IGNORE_WRITES_END(); 4078 4079 // Insert lock in the table so that it can be freed in __kmp_cleanup, 4080 // and debugger has info on all allocated locks. 4081 index = __kmp_lock_table_insert( lck ); 4082 } 4083 else { 4084 // Pick up lock from pool. 4085 lck = __kmp_lock_pool; 4086 index = __kmp_lock_pool->pool.index; 4087 __kmp_lock_pool = __kmp_lock_pool->pool.next; 4088 } 4089 4090 // 4091 // We could potentially differentiate between nested and regular locks 4092 // here, and do the lock table lookup for regular locks only. 4093 // 4094 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4095 * ( (kmp_lock_index_t *) user_lock ) = index; 4096 } 4097 else { 4098 * ( (kmp_user_lock_p *) user_lock ) = lck; 4099 } 4100 4101 // mark the lock if it is critical section lock. 4102 __kmp_set_user_lock_flags( lck, flags ); 4103 4104 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 4105 4106 return lck; 4107 } 4108 4109 // Put lock's memory to pool for reusing. 4110 void 4111 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 4112 { 4113 KMP_DEBUG_ASSERT( user_lock != NULL ); 4114 KMP_DEBUG_ASSERT( lck != NULL ); 4115 4116 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 4117 4118 lck->pool.next = __kmp_lock_pool; 4119 __kmp_lock_pool = lck; 4120 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4121 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 4122 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 4123 lck->pool.index = index; 4124 } 4125 4126 __kmp_release_lock( & __kmp_global_lock, gtid ); 4127 } 4128 4129 kmp_user_lock_p 4130 __kmp_lookup_user_lock( void **user_lock, char const *func ) 4131 { 4132 kmp_user_lock_p lck = NULL; 4133 4134 if ( __kmp_env_consistency_check ) { 4135 if ( user_lock == NULL ) { 4136 KMP_FATAL( LockIsUninitialized, func ); 4137 } 4138 } 4139 4140 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4141 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 4142 if ( __kmp_env_consistency_check ) { 4143 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 4144 KMP_FATAL( LockIsUninitialized, func ); 4145 } 4146 } 4147 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 4148 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4149 lck = __kmp_user_lock_table.table[index]; 4150 } 4151 else { 4152 lck = *( (kmp_user_lock_p *)user_lock ); 4153 } 4154 4155 if ( __kmp_env_consistency_check ) { 4156 if ( lck == NULL ) { 4157 KMP_FATAL( LockIsUninitialized, func ); 4158 } 4159 } 4160 4161 return lck; 4162 } 4163 4164 void 4165 __kmp_cleanup_user_locks( void ) 4166 { 4167 // 4168 // Reset lock pool. Do not worry about lock in the pool -- we will free 4169 // them when iterating through lock table (it includes all the locks, 4170 // dead or alive). 4171 // 4172 __kmp_lock_pool = NULL; 4173 4174 #define IS_CRITICAL(lck) \ 4175 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 4176 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 4177 4178 // 4179 // Loop through lock table, free all locks. 4180 // 4181 // Do not free item [0], it is reserved for lock tables list. 4182 // 4183 // FIXME - we are iterating through a list of (pointers to) objects of 4184 // type union kmp_user_lock, but we have no way of knowing whether the 4185 // base type is currently "pool" or whatever the global user lock type 4186 // is. 4187 // 4188 // We are relying on the fact that for all of the user lock types 4189 // (except "tas"), the first field in the lock struct is the "initialized" 4190 // field, which is set to the address of the lock object itself when 4191 // the lock is initialized. When the union is of type "pool", the 4192 // first field is a pointer to the next object in the free list, which 4193 // will not be the same address as the object itself. 4194 // 4195 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4196 // will fail for "pool" objects on the free list. This must happen as 4197 // the "location" field of real user locks overlaps the "index" field 4198 // of "pool" objects. 4199 // 4200 // It would be better to run through the free list, and remove all "pool" 4201 // objects from the lock table before executing this loop. However, 4202 // "pool" objects do not always have their index field set (only on 4203 // lin_32e), and I don't want to search the lock table for the address 4204 // of every "pool" object on the free list. 4205 // 4206 while ( __kmp_user_lock_table.used > 1 ) { 4207 const ident *loc; 4208 4209 // 4210 // reduce __kmp_user_lock_table.used before freeing the lock, 4211 // so that state of locks is consistent 4212 // 4213 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4214 --__kmp_user_lock_table.used ]; 4215 4216 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4217 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4218 // 4219 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4220 // initialized AND it is NOT a critical section (user is not 4221 // responsible for destroying criticals) AND we know source 4222 // location to report. 4223 // 4224 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4225 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4226 ( loc->psource != NULL ) ) { 4227 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4228 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4229 __kmp_str_loc_free( &str_loc); 4230 } 4231 4232 #ifdef KMP_DEBUG 4233 if ( IS_CRITICAL( lck ) ) { 4234 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4235 } 4236 else { 4237 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4238 } 4239 #endif // KMP_DEBUG 4240 4241 // 4242 // Cleanup internal lock dynamic resources 4243 // (for drdpa locks particularly). 4244 // 4245 __kmp_destroy_user_lock( lck ); 4246 } 4247 4248 // 4249 // Free the lock if block allocation of locks is not used. 4250 // 4251 if ( __kmp_lock_blocks == NULL ) { 4252 __kmp_free( lck ); 4253 } 4254 } 4255 4256 #undef IS_CRITICAL 4257 4258 // 4259 // delete lock table(s). 4260 // 4261 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4262 __kmp_user_lock_table.table = NULL; 4263 __kmp_user_lock_table.allocated = 0; 4264 4265 while ( table_ptr != NULL ) { 4266 // 4267 // In the first element we saved the pointer to the previous 4268 // (smaller) lock table. 4269 // 4270 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4271 __kmp_free( table_ptr ); 4272 table_ptr = next; 4273 } 4274 4275 // 4276 // Free buffers allocated for blocks of locks. 4277 // 4278 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4279 __kmp_lock_blocks = NULL; 4280 4281 while ( block_ptr != NULL ) { 4282 kmp_block_of_locks_t *next = block_ptr->next_block; 4283 __kmp_free( block_ptr->locks ); 4284 // 4285 // *block_ptr itself was allocated at the end of the locks vector. 4286 // 4287 block_ptr = next; 4288 } 4289 4290 TCW_4(__kmp_init_user_locks, FALSE); 4291 } 4292 4293 #endif // KMP_USE_DYNAMIC_LOCK 4294