1 /* 2 * kmp_lock.cpp -- lock-related functions 3 * $Revision: 42810 $ 4 * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $ 5 */ 6 7 8 //===----------------------------------------------------------------------===// 9 // 10 // The LLVM Compiler Infrastructure 11 // 12 // This file is dual licensed under the MIT and the University of Illinois Open 13 // Source Licenses. See LICENSE.txt for details. 14 // 15 //===----------------------------------------------------------------------===// 16 17 18 #include <stddef.h> 19 20 #include "kmp.h" 21 #include "kmp_itt.h" 22 #include "kmp_i18n.h" 23 #include "kmp_lock.h" 24 #include "kmp_io.h" 25 26 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 27 # include <unistd.h> 28 # include <sys/syscall.h> 29 // We should really include <futex.h>, but that causes compatibility problems on different 30 // Linux* OS distributions that either require that you include (or break when you try to include) 31 // <pci/types.h>. 32 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 33 // we just define the constants here and don't include <futex.h> 34 # ifndef FUTEX_WAIT 35 # define FUTEX_WAIT 0 36 # endif 37 # ifndef FUTEX_WAKE 38 # define FUTEX_WAKE 1 39 # endif 40 #endif 41 42 43 #ifndef KMP_DEBUG 44 # define __kmp_static_delay( arg ) /* nothing to do */ 45 #else 46 47 static void 48 __kmp_static_delay( int arg ) 49 { 50 /* Work around weird code-gen bug that causes assert to trip */ 51 # if KMP_ARCH_X86_64 && KMP_OS_LINUX 52 KMP_ASSERT( arg != 0 ); 53 # else 54 KMP_ASSERT( arg >= 0 ); 55 # endif 56 } 57 #endif /* KMP_DEBUG */ 58 59 static void 60 __kmp_static_yield( int arg ) 61 { 62 __kmp_yield( arg ); 63 } 64 65 /* Implement spin locks for internal library use. */ 66 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 67 68 void 69 __kmp_validate_locks( void ) 70 { 71 int i; 72 kmp_uint32 x, y; 73 74 /* Check to make sure unsigned arithmetic does wraps properly */ 75 x = ~((kmp_uint32) 0) - 2; 76 y = x - 2; 77 78 for (i = 0; i < 8; ++i, ++x, ++y) { 79 kmp_uint32 z = (x - y); 80 KMP_ASSERT( z == 2 ); 81 } 82 83 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 84 } 85 86 87 /* ------------------------------------------------------------------------ */ 88 /* test and set locks */ 89 90 // 91 // For the non-nested locks, we can only assume that the first 4 bytes were 92 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 93 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 94 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 95 // 96 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 97 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 98 // 99 100 static kmp_int32 101 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 102 { 103 return TCR_4( lck->lk.poll ) - 1; 104 } 105 106 static inline bool 107 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 108 { 109 return lck->lk.depth_locked != -1; 110 } 111 112 __forceinline static void 113 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 114 { 115 KMP_MB(); 116 117 #ifdef USE_LOCK_PROFILE 118 kmp_uint32 curr = TCR_4( lck->lk.poll ); 119 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 120 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 121 /* else __kmp_printf( "." );*/ 122 #endif /* USE_LOCK_PROFILE */ 123 124 if ( ( lck->lk.poll == 0 ) 125 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) { 126 KMP_FSYNC_ACQUIRED(lck); 127 return; 128 } 129 130 kmp_uint32 spins; 131 KMP_FSYNC_PREPARE( lck ); 132 KMP_INIT_YIELD( spins ); 133 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 134 __kmp_xproc ) ) { 135 KMP_YIELD( TRUE ); 136 } 137 else { 138 KMP_YIELD_SPIN( spins ); 139 } 140 141 while ( ( lck->lk.poll != 0 ) || 142 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) { 143 // 144 // FIXME - use exponential backoff here 145 // 146 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 147 __kmp_xproc ) ) { 148 KMP_YIELD( TRUE ); 149 } 150 else { 151 KMP_YIELD_SPIN( spins ); 152 } 153 } 154 KMP_FSYNC_ACQUIRED( lck ); 155 } 156 157 void 158 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 159 { 160 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 161 } 162 163 static void 164 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 165 { 166 if ( __kmp_env_consistency_check ) { 167 char const * const func = "omp_set_lock"; 168 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 169 && __kmp_is_tas_lock_nestable( lck ) ) { 170 KMP_FATAL( LockNestableUsedAsSimple, func ); 171 } 172 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 173 KMP_FATAL( LockIsAlreadyOwned, func ); 174 } 175 } 176 __kmp_acquire_tas_lock( lck, gtid ); 177 } 178 179 int 180 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 181 { 182 if ( ( lck->lk.poll == 0 ) 183 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) { 184 KMP_FSYNC_ACQUIRED( lck ); 185 return TRUE; 186 } 187 return FALSE; 188 } 189 190 static int 191 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 192 { 193 if ( __kmp_env_consistency_check ) { 194 char const * const func = "omp_test_lock"; 195 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 196 && __kmp_is_tas_lock_nestable( lck ) ) { 197 KMP_FATAL( LockNestableUsedAsSimple, func ); 198 } 199 } 200 return __kmp_test_tas_lock( lck, gtid ); 201 } 202 203 void 204 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 205 { 206 KMP_MB(); /* Flush all pending memory write invalidates. */ 207 208 KMP_FSYNC_RELEASING(lck); 209 KMP_ST_REL32( &(lck->lk.poll), 0 ); 210 211 KMP_MB(); /* Flush all pending memory write invalidates. */ 212 213 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 214 __kmp_xproc ) ); 215 } 216 217 static void 218 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 219 { 220 if ( __kmp_env_consistency_check ) { 221 char const * const func = "omp_unset_lock"; 222 KMP_MB(); /* in case another processor initialized lock */ 223 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 224 && __kmp_is_tas_lock_nestable( lck ) ) { 225 KMP_FATAL( LockNestableUsedAsSimple, func ); 226 } 227 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 228 KMP_FATAL( LockUnsettingFree, func ); 229 } 230 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 231 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 232 KMP_FATAL( LockUnsettingSetByAnother, func ); 233 } 234 } 235 __kmp_release_tas_lock( lck, gtid ); 236 } 237 238 void 239 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 240 { 241 TCW_4( lck->lk.poll, 0 ); 242 } 243 244 static void 245 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 246 { 247 __kmp_init_tas_lock( lck ); 248 } 249 250 void 251 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 252 { 253 lck->lk.poll = 0; 254 } 255 256 static void 257 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 258 { 259 if ( __kmp_env_consistency_check ) { 260 char const * const func = "omp_destroy_lock"; 261 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 262 && __kmp_is_tas_lock_nestable( lck ) ) { 263 KMP_FATAL( LockNestableUsedAsSimple, func ); 264 } 265 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 266 KMP_FATAL( LockStillOwned, func ); 267 } 268 } 269 __kmp_destroy_tas_lock( lck ); 270 } 271 272 273 // 274 // nested test and set locks 275 // 276 277 void 278 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 279 { 280 KMP_DEBUG_ASSERT( gtid >= 0 ); 281 282 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 283 lck->lk.depth_locked += 1; 284 } 285 else { 286 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 287 lck->lk.depth_locked = 1; 288 } 289 } 290 291 static void 292 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 293 { 294 if ( __kmp_env_consistency_check ) { 295 char const * const func = "omp_set_nest_lock"; 296 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 297 KMP_FATAL( LockSimpleUsedAsNestable, func ); 298 } 299 } 300 __kmp_acquire_nested_tas_lock( lck, gtid ); 301 } 302 303 int 304 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 305 { 306 int retval; 307 308 KMP_DEBUG_ASSERT( gtid >= 0 ); 309 310 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 311 retval = ++lck->lk.depth_locked; 312 } 313 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 314 retval = 0; 315 } 316 else { 317 KMP_MB(); 318 retval = lck->lk.depth_locked = 1; 319 } 320 return retval; 321 } 322 323 static int 324 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 325 { 326 if ( __kmp_env_consistency_check ) { 327 char const * const func = "omp_test_nest_lock"; 328 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 329 KMP_FATAL( LockSimpleUsedAsNestable, func ); 330 } 331 } 332 return __kmp_test_nested_tas_lock( lck, gtid ); 333 } 334 335 void 336 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 337 { 338 KMP_DEBUG_ASSERT( gtid >= 0 ); 339 340 KMP_MB(); 341 if ( --(lck->lk.depth_locked) == 0 ) { 342 __kmp_release_tas_lock( lck, gtid ); 343 } 344 } 345 346 static void 347 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 348 { 349 if ( __kmp_env_consistency_check ) { 350 char const * const func = "omp_unset_nest_lock"; 351 KMP_MB(); /* in case another processor initialized lock */ 352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 353 KMP_FATAL( LockSimpleUsedAsNestable, func ); 354 } 355 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 356 KMP_FATAL( LockUnsettingFree, func ); 357 } 358 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 359 KMP_FATAL( LockUnsettingSetByAnother, func ); 360 } 361 } 362 __kmp_release_nested_tas_lock( lck, gtid ); 363 } 364 365 void 366 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 367 { 368 __kmp_init_tas_lock( lck ); 369 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 370 } 371 372 static void 373 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 374 { 375 __kmp_init_nested_tas_lock( lck ); 376 } 377 378 void 379 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 380 { 381 __kmp_destroy_tas_lock( lck ); 382 lck->lk.depth_locked = 0; 383 } 384 385 static void 386 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 387 { 388 if ( __kmp_env_consistency_check ) { 389 char const * const func = "omp_destroy_nest_lock"; 390 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 391 KMP_FATAL( LockSimpleUsedAsNestable, func ); 392 } 393 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 394 KMP_FATAL( LockStillOwned, func ); 395 } 396 } 397 __kmp_destroy_nested_tas_lock( lck ); 398 } 399 400 401 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 402 403 /* ------------------------------------------------------------------------ */ 404 /* futex locks */ 405 406 // futex locks are really just test and set locks, with a different method 407 // of handling contention. They take the same amount of space as test and 408 // set locks, and are allocated the same way (i.e. use the area allocated by 409 // the compiler for non-nested locks / allocate nested locks on the heap). 410 411 static kmp_int32 412 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 413 { 414 return ( TCR_4( lck->lk.poll ) >> 1 ) - 1; 415 } 416 417 static inline bool 418 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 419 { 420 return lck->lk.depth_locked != -1; 421 } 422 423 __forceinline static void 424 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 425 { 426 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 427 428 KMP_MB(); 429 430 #ifdef USE_LOCK_PROFILE 431 kmp_uint32 curr = TCR_4( lck->lk.poll ); 432 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 433 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 434 /* else __kmp_printf( "." );*/ 435 #endif /* USE_LOCK_PROFILE */ 436 437 KMP_FSYNC_PREPARE( lck ); 438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 439 lck, lck->lk.poll, gtid ) ); 440 441 kmp_int32 poll_val; 442 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0, 443 gtid_code ) ) != 0 ) { 444 kmp_int32 cond = poll_val & 1; 445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 446 lck, gtid, poll_val, cond ) ); 447 448 // 449 // NOTE: if you try to use the following condition for this branch 450 // 451 // if ( poll_val & 1 == 0 ) 452 // 453 // Then the 12.0 compiler has a bug where the following block will 454 // always be skipped, regardless of the value of the LSB of poll_val. 455 // 456 if ( ! cond ) { 457 // 458 // Try to set the lsb in the poll to indicate to the owner 459 // thread that they need to wake this thread up. 460 // 461 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), 462 poll_val, poll_val | 1 ) ) { 463 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 464 lck, lck->lk.poll, gtid ) ); 465 continue; 466 } 467 poll_val |= 1; 468 469 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 470 lck, lck->lk.poll, gtid ) ); 471 } 472 473 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 474 lck, gtid, poll_val ) ); 475 476 kmp_int32 rc; 477 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 478 poll_val, NULL, NULL, 0 ) ) != 0 ) { 479 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 480 lck, gtid, poll_val, rc, errno ) ); 481 continue; 482 } 483 484 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 485 lck, gtid, poll_val ) ); 486 // 487 // This thread has now done a successful futex wait call and was 488 // entered on the OS futex queue. We must now perform a futex 489 // wake call when releasing the lock, as we have no idea how many 490 // other threads are in the queue. 491 // 492 gtid_code |= 1; 493 } 494 495 KMP_FSYNC_ACQUIRED( lck ); 496 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 497 lck, lck->lk.poll, gtid ) ); 498 } 499 500 void 501 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 502 { 503 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 504 } 505 506 static void 507 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 508 { 509 if ( __kmp_env_consistency_check ) { 510 char const * const func = "omp_set_lock"; 511 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 512 && __kmp_is_futex_lock_nestable( lck ) ) { 513 KMP_FATAL( LockNestableUsedAsSimple, func ); 514 } 515 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 516 KMP_FATAL( LockIsAlreadyOwned, func ); 517 } 518 } 519 __kmp_acquire_futex_lock( lck, gtid ); 520 } 521 522 int 523 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 524 { 525 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) { 526 KMP_FSYNC_ACQUIRED( lck ); 527 return TRUE; 528 } 529 return FALSE; 530 } 531 532 static int 533 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 534 { 535 if ( __kmp_env_consistency_check ) { 536 char const * const func = "omp_test_lock"; 537 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 538 && __kmp_is_futex_lock_nestable( lck ) ) { 539 KMP_FATAL( LockNestableUsedAsSimple, func ); 540 } 541 } 542 return __kmp_test_futex_lock( lck, gtid ); 543 } 544 545 void 546 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 547 { 548 KMP_MB(); /* Flush all pending memory write invalidates. */ 549 550 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 551 lck, lck->lk.poll, gtid ) ); 552 553 KMP_FSYNC_RELEASING(lck); 554 555 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 ); 556 557 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 558 lck, gtid, poll_val ) ); 559 560 if ( poll_val & 1 ) { 561 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 562 lck, gtid ) ); 563 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 ); 564 } 565 566 KMP_MB(); /* Flush all pending memory write invalidates. */ 567 568 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 569 lck, lck->lk.poll, gtid ) ); 570 571 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 572 __kmp_xproc ) ); 573 } 574 575 static void 576 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 577 { 578 if ( __kmp_env_consistency_check ) { 579 char const * const func = "omp_unset_lock"; 580 KMP_MB(); /* in case another processor initialized lock */ 581 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 582 && __kmp_is_futex_lock_nestable( lck ) ) { 583 KMP_FATAL( LockNestableUsedAsSimple, func ); 584 } 585 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 586 KMP_FATAL( LockUnsettingFree, func ); 587 } 588 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 589 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 590 KMP_FATAL( LockUnsettingSetByAnother, func ); 591 } 592 } 593 __kmp_release_futex_lock( lck, gtid ); 594 } 595 596 void 597 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 598 { 599 TCW_4( lck->lk.poll, 0 ); 600 } 601 602 static void 603 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 604 { 605 __kmp_init_futex_lock( lck ); 606 } 607 608 void 609 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 610 { 611 lck->lk.poll = 0; 612 } 613 614 static void 615 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 616 { 617 if ( __kmp_env_consistency_check ) { 618 char const * const func = "omp_destroy_lock"; 619 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 620 && __kmp_is_futex_lock_nestable( lck ) ) { 621 KMP_FATAL( LockNestableUsedAsSimple, func ); 622 } 623 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 624 KMP_FATAL( LockStillOwned, func ); 625 } 626 } 627 __kmp_destroy_futex_lock( lck ); 628 } 629 630 631 // 632 // nested futex locks 633 // 634 635 void 636 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 637 { 638 KMP_DEBUG_ASSERT( gtid >= 0 ); 639 640 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 641 lck->lk.depth_locked += 1; 642 } 643 else { 644 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 645 lck->lk.depth_locked = 1; 646 } 647 } 648 649 static void 650 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 651 { 652 if ( __kmp_env_consistency_check ) { 653 char const * const func = "omp_set_nest_lock"; 654 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 655 KMP_FATAL( LockSimpleUsedAsNestable, func ); 656 } 657 } 658 __kmp_acquire_nested_futex_lock( lck, gtid ); 659 } 660 661 int 662 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 663 { 664 int retval; 665 666 KMP_DEBUG_ASSERT( gtid >= 0 ); 667 668 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 669 retval = ++lck->lk.depth_locked; 670 } 671 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 672 retval = 0; 673 } 674 else { 675 KMP_MB(); 676 retval = lck->lk.depth_locked = 1; 677 } 678 return retval; 679 } 680 681 static int 682 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 683 { 684 if ( __kmp_env_consistency_check ) { 685 char const * const func = "omp_test_nest_lock"; 686 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 687 KMP_FATAL( LockSimpleUsedAsNestable, func ); 688 } 689 } 690 return __kmp_test_nested_futex_lock( lck, gtid ); 691 } 692 693 void 694 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 695 { 696 KMP_DEBUG_ASSERT( gtid >= 0 ); 697 698 KMP_MB(); 699 if ( --(lck->lk.depth_locked) == 0 ) { 700 __kmp_release_futex_lock( lck, gtid ); 701 } 702 } 703 704 static void 705 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 706 { 707 if ( __kmp_env_consistency_check ) { 708 char const * const func = "omp_unset_nest_lock"; 709 KMP_MB(); /* in case another processor initialized lock */ 710 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 711 KMP_FATAL( LockSimpleUsedAsNestable, func ); 712 } 713 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 714 KMP_FATAL( LockUnsettingFree, func ); 715 } 716 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 717 KMP_FATAL( LockUnsettingSetByAnother, func ); 718 } 719 } 720 __kmp_release_nested_futex_lock( lck, gtid ); 721 } 722 723 void 724 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 725 { 726 __kmp_init_futex_lock( lck ); 727 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 728 } 729 730 static void 731 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 732 { 733 __kmp_init_nested_futex_lock( lck ); 734 } 735 736 void 737 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 738 { 739 __kmp_destroy_futex_lock( lck ); 740 lck->lk.depth_locked = 0; 741 } 742 743 static void 744 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 745 { 746 if ( __kmp_env_consistency_check ) { 747 char const * const func = "omp_destroy_nest_lock"; 748 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 749 KMP_FATAL( LockSimpleUsedAsNestable, func ); 750 } 751 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 752 KMP_FATAL( LockStillOwned, func ); 753 } 754 } 755 __kmp_destroy_nested_futex_lock( lck ); 756 } 757 758 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 759 760 761 /* ------------------------------------------------------------------------ */ 762 /* ticket (bakery) locks */ 763 764 static kmp_int32 765 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 766 { 767 return TCR_4( lck->lk.owner_id ) - 1; 768 } 769 770 static inline bool 771 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 772 { 773 return lck->lk.depth_locked != -1; 774 } 775 776 static kmp_uint32 777 __kmp_bakery_check(kmp_uint value, kmp_uint checker) 778 { 779 register kmp_uint32 pause; 780 781 if (value == checker) { 782 return TRUE; 783 } 784 for (pause = checker - value; pause != 0; --pause) { 785 __kmp_static_delay(TRUE); 786 } 787 return FALSE; 788 } 789 790 __forceinline static void 791 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 792 { 793 kmp_uint32 my_ticket; 794 KMP_MB(); 795 796 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 797 798 #ifdef USE_LOCK_PROFILE 799 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 800 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 801 /* else __kmp_printf( "." );*/ 802 #endif /* USE_LOCK_PROFILE */ 803 804 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 805 KMP_FSYNC_ACQUIRED(lck); 806 return; 807 } 808 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 809 KMP_FSYNC_ACQUIRED(lck); 810 } 811 812 void 813 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 814 { 815 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 816 } 817 818 static void 819 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 820 { 821 if ( __kmp_env_consistency_check ) { 822 char const * const func = "omp_set_lock"; 823 if ( lck->lk.initialized != lck ) { 824 KMP_FATAL( LockIsUninitialized, func ); 825 } 826 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 827 KMP_FATAL( LockNestableUsedAsSimple, func ); 828 } 829 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 830 KMP_FATAL( LockIsAlreadyOwned, func ); 831 } 832 } 833 834 __kmp_acquire_ticket_lock( lck, gtid ); 835 836 if ( __kmp_env_consistency_check ) { 837 lck->lk.owner_id = gtid + 1; 838 } 839 } 840 841 int 842 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 843 { 844 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 845 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 846 kmp_uint32 next_ticket = my_ticket + 1; 847 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 848 my_ticket, next_ticket ) ) { 849 KMP_FSYNC_ACQUIRED( lck ); 850 return TRUE; 851 } 852 } 853 return FALSE; 854 } 855 856 static int 857 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 858 { 859 if ( __kmp_env_consistency_check ) { 860 char const * const func = "omp_test_lock"; 861 if ( lck->lk.initialized != lck ) { 862 KMP_FATAL( LockIsUninitialized, func ); 863 } 864 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 865 KMP_FATAL( LockNestableUsedAsSimple, func ); 866 } 867 } 868 869 int retval = __kmp_test_ticket_lock( lck, gtid ); 870 871 if ( __kmp_env_consistency_check && retval ) { 872 lck->lk.owner_id = gtid + 1; 873 } 874 return retval; 875 } 876 877 void 878 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 879 { 880 kmp_uint32 distance; 881 882 KMP_MB(); /* Flush all pending memory write invalidates. */ 883 884 KMP_FSYNC_RELEASING(lck); 885 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 886 887 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 888 889 KMP_MB(); /* Flush all pending memory write invalidates. */ 890 891 KMP_YIELD( distance 892 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 893 } 894 895 static void 896 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 897 { 898 if ( __kmp_env_consistency_check ) { 899 char const * const func = "omp_unset_lock"; 900 KMP_MB(); /* in case another processor initialized lock */ 901 if ( lck->lk.initialized != lck ) { 902 KMP_FATAL( LockIsUninitialized, func ); 903 } 904 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 905 KMP_FATAL( LockNestableUsedAsSimple, func ); 906 } 907 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 908 KMP_FATAL( LockUnsettingFree, func ); 909 } 910 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 911 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 912 KMP_FATAL( LockUnsettingSetByAnother, func ); 913 } 914 lck->lk.owner_id = 0; 915 } 916 __kmp_release_ticket_lock( lck, gtid ); 917 } 918 919 void 920 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 921 { 922 lck->lk.location = NULL; 923 TCW_4( lck->lk.next_ticket, 0 ); 924 TCW_4( lck->lk.now_serving, 0 ); 925 lck->lk.owner_id = 0; // no thread owns the lock. 926 lck->lk.depth_locked = -1; // -1 => not a nested lock. 927 lck->lk.initialized = (kmp_ticket_lock *)lck; 928 } 929 930 static void 931 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 932 { 933 __kmp_init_ticket_lock( lck ); 934 } 935 936 void 937 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 938 { 939 lck->lk.initialized = NULL; 940 lck->lk.location = NULL; 941 lck->lk.next_ticket = 0; 942 lck->lk.now_serving = 0; 943 lck->lk.owner_id = 0; 944 lck->lk.depth_locked = -1; 945 } 946 947 static void 948 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 949 { 950 if ( __kmp_env_consistency_check ) { 951 char const * const func = "omp_destroy_lock"; 952 if ( lck->lk.initialized != lck ) { 953 KMP_FATAL( LockIsUninitialized, func ); 954 } 955 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 956 KMP_FATAL( LockNestableUsedAsSimple, func ); 957 } 958 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 959 KMP_FATAL( LockStillOwned, func ); 960 } 961 } 962 __kmp_destroy_ticket_lock( lck ); 963 } 964 965 966 // 967 // nested ticket locks 968 // 969 970 void 971 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 972 { 973 KMP_DEBUG_ASSERT( gtid >= 0 ); 974 975 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 976 lck->lk.depth_locked += 1; 977 } 978 else { 979 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 980 KMP_MB(); 981 lck->lk.depth_locked = 1; 982 KMP_MB(); 983 lck->lk.owner_id = gtid + 1; 984 } 985 } 986 987 static void 988 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 989 { 990 if ( __kmp_env_consistency_check ) { 991 char const * const func = "omp_set_nest_lock"; 992 if ( lck->lk.initialized != lck ) { 993 KMP_FATAL( LockIsUninitialized, func ); 994 } 995 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 996 KMP_FATAL( LockSimpleUsedAsNestable, func ); 997 } 998 } 999 __kmp_acquire_nested_ticket_lock( lck, gtid ); 1000 } 1001 1002 int 1003 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1004 { 1005 int retval; 1006 1007 KMP_DEBUG_ASSERT( gtid >= 0 ); 1008 1009 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 1010 retval = ++lck->lk.depth_locked; 1011 } 1012 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 1013 retval = 0; 1014 } 1015 else { 1016 KMP_MB(); 1017 retval = lck->lk.depth_locked = 1; 1018 KMP_MB(); 1019 lck->lk.owner_id = gtid + 1; 1020 } 1021 return retval; 1022 } 1023 1024 static int 1025 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 1026 kmp_int32 gtid ) 1027 { 1028 if ( __kmp_env_consistency_check ) { 1029 char const * const func = "omp_test_nest_lock"; 1030 if ( lck->lk.initialized != lck ) { 1031 KMP_FATAL( LockIsUninitialized, func ); 1032 } 1033 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1034 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1035 } 1036 } 1037 return __kmp_test_nested_ticket_lock( lck, gtid ); 1038 } 1039 1040 void 1041 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1042 { 1043 KMP_DEBUG_ASSERT( gtid >= 0 ); 1044 1045 KMP_MB(); 1046 if ( --(lck->lk.depth_locked) == 0 ) { 1047 KMP_MB(); 1048 lck->lk.owner_id = 0; 1049 __kmp_release_ticket_lock( lck, gtid ); 1050 } 1051 } 1052 1053 static void 1054 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1055 { 1056 if ( __kmp_env_consistency_check ) { 1057 char const * const func = "omp_unset_nest_lock"; 1058 KMP_MB(); /* in case another processor initialized lock */ 1059 if ( lck->lk.initialized != lck ) { 1060 KMP_FATAL( LockIsUninitialized, func ); 1061 } 1062 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1063 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1064 } 1065 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1066 KMP_FATAL( LockUnsettingFree, func ); 1067 } 1068 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1069 KMP_FATAL( LockUnsettingSetByAnother, func ); 1070 } 1071 } 1072 __kmp_release_nested_ticket_lock( lck, gtid ); 1073 } 1074 1075 void 1076 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1077 { 1078 __kmp_init_ticket_lock( lck ); 1079 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1080 } 1081 1082 static void 1083 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1084 { 1085 __kmp_init_nested_ticket_lock( lck ); 1086 } 1087 1088 void 1089 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1090 { 1091 __kmp_destroy_ticket_lock( lck ); 1092 lck->lk.depth_locked = 0; 1093 } 1094 1095 static void 1096 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1097 { 1098 if ( __kmp_env_consistency_check ) { 1099 char const * const func = "omp_destroy_nest_lock"; 1100 if ( lck->lk.initialized != lck ) { 1101 KMP_FATAL( LockIsUninitialized, func ); 1102 } 1103 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1104 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1105 } 1106 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1107 KMP_FATAL( LockStillOwned, func ); 1108 } 1109 } 1110 __kmp_destroy_nested_ticket_lock( lck ); 1111 } 1112 1113 1114 // 1115 // access functions to fields which don't exist for all lock kinds. 1116 // 1117 1118 static int 1119 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1120 { 1121 return lck == lck->lk.initialized; 1122 } 1123 1124 static const ident_t * 1125 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1126 { 1127 return lck->lk.location; 1128 } 1129 1130 static void 1131 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1132 { 1133 lck->lk.location = loc; 1134 } 1135 1136 static kmp_lock_flags_t 1137 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1138 { 1139 return lck->lk.flags; 1140 } 1141 1142 static void 1143 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1144 { 1145 lck->lk.flags = flags; 1146 } 1147 1148 /* ------------------------------------------------------------------------ */ 1149 /* queuing locks */ 1150 1151 /* 1152 * First the states 1153 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1154 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1155 * h, h means lock is held or about to transition, 1 element on queue 1156 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1157 * 1158 * Now the transitions 1159 * Acquire(0,0) = -1 ,0 1160 * Release(0,0) = Error 1161 * Acquire(-1,0) = h ,h h > 0 1162 * Release(-1,0) = 0 ,0 1163 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1164 * Release(h,h) = -1 ,0 h > 0 1165 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1166 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1167 * 1168 * And pictorially 1169 * 1170 * 1171 * +-----+ 1172 * | 0, 0|------- release -------> Error 1173 * +-----+ 1174 * | ^ 1175 * acquire| |release 1176 * | | 1177 * | | 1178 * v | 1179 * +-----+ 1180 * |-1, 0| 1181 * +-----+ 1182 * | ^ 1183 * acquire| |release 1184 * | | 1185 * | | 1186 * v | 1187 * +-----+ 1188 * | h, h| 1189 * +-----+ 1190 * | ^ 1191 * acquire| |release 1192 * | | 1193 * | | 1194 * v | 1195 * +-----+ 1196 * | h, t|----- acquire, release loopback ---+ 1197 * +-----+ | 1198 * ^ | 1199 * | | 1200 * +------------------------------------+ 1201 * 1202 */ 1203 1204 #ifdef DEBUG_QUEUING_LOCKS 1205 1206 /* Stuff for circular trace buffer */ 1207 #define TRACE_BUF_ELE 1024 1208 static char traces[TRACE_BUF_ELE][128] = { 0 } 1209 static int tc = 0; 1210 #define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y ); 1211 #define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z ); 1212 #define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q ); 1213 1214 static void 1215 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1216 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1217 { 1218 kmp_int32 t, i; 1219 1220 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1221 1222 i = tc % TRACE_BUF_ELE; 1223 __kmp_printf_no_lock( "%s\n", traces[i] ); 1224 i = (i+1) % TRACE_BUF_ELE; 1225 while ( i != (tc % TRACE_BUF_ELE) ) { 1226 __kmp_printf_no_lock( "%s", traces[i] ); 1227 i = (i+1) % TRACE_BUF_ELE; 1228 } 1229 __kmp_printf_no_lock( "\n" ); 1230 1231 __kmp_printf_no_lock( 1232 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1233 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1234 head_id, tail_id ); 1235 1236 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1237 1238 if ( lck->lk.head_id >= 1 ) { 1239 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1240 while (t > 0) { 1241 __kmp_printf_no_lock( "-> %d ", t ); 1242 t = __kmp_threads[t-1]->th.th_next_waiting; 1243 } 1244 } 1245 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1246 __kmp_printf_no_lock( "\n\n" ); 1247 } 1248 1249 #endif /* DEBUG_QUEUING_LOCKS */ 1250 1251 static kmp_int32 1252 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1253 { 1254 return TCR_4( lck->lk.owner_id ) - 1; 1255 } 1256 1257 static inline bool 1258 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1259 { 1260 return lck->lk.depth_locked != -1; 1261 } 1262 1263 /* Acquire a lock using a the queuing lock implementation */ 1264 template <bool takeTime> 1265 /* [TLW] The unused template above is left behind because of what BEB believes is a 1266 potential compiler problem with __forceinline. */ 1267 __forceinline static void 1268 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1269 kmp_int32 gtid ) 1270 { 1271 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1272 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1273 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1274 volatile kmp_uint32 *spin_here_p; 1275 kmp_int32 need_mf = 1; 1276 1277 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1278 1279 KMP_FSYNC_PREPARE( lck ); 1280 KMP_DEBUG_ASSERT( this_thr != NULL ); 1281 spin_here_p = & this_thr->th.th_spin_here; 1282 1283 #ifdef DEBUG_QUEUING_LOCKS 1284 TRACE_LOCK( gtid+1, "acq ent" ); 1285 if ( *spin_here_p ) 1286 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1287 if ( this_thr->th.th_next_waiting != 0 ) 1288 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1289 #endif 1290 KMP_DEBUG_ASSERT( !*spin_here_p ); 1291 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1292 1293 1294 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1295 that may follow, not just in execution order, but also in visibility order. This way, 1296 when a releasing thread observes the changes to the queue by this thread, it can 1297 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1298 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1299 to FALSE before this thread sets it to TRUE, this thread will hang. 1300 */ 1301 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1302 1303 while( 1 ) { 1304 kmp_int32 enqueued; 1305 kmp_int32 head; 1306 kmp_int32 tail; 1307 1308 head = *head_id_p; 1309 1310 switch ( head ) { 1311 1312 case -1: 1313 { 1314 #ifdef DEBUG_QUEUING_LOCKS 1315 tail = *tail_id_p; 1316 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1317 #endif 1318 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1319 this assignment prevents us from entering the if ( t > 0 ) 1320 condition in the enqueued case below, which is not necessary for 1321 this state transition */ 1322 1323 need_mf = 0; 1324 /* try (-1,0)->(tid,tid) */ 1325 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1326 KMP_PACK_64( -1, 0 ), 1327 KMP_PACK_64( gtid+1, gtid+1 ) ); 1328 #ifdef DEBUG_QUEUING_LOCKS 1329 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1330 #endif 1331 } 1332 break; 1333 1334 default: 1335 { 1336 tail = *tail_id_p; 1337 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1338 1339 #ifdef DEBUG_QUEUING_LOCKS 1340 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1341 #endif 1342 1343 if ( tail == 0 ) { 1344 enqueued = FALSE; 1345 } 1346 else { 1347 need_mf = 0; 1348 /* try (h,t) or (h,h)->(h,tid) */ 1349 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1350 1351 #ifdef DEBUG_QUEUING_LOCKS 1352 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1353 #endif 1354 } 1355 } 1356 break; 1357 1358 case 0: /* empty queue */ 1359 { 1360 kmp_int32 grabbed_lock; 1361 1362 #ifdef DEBUG_QUEUING_LOCKS 1363 tail = *tail_id_p; 1364 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1365 #endif 1366 /* try (0,0)->(-1,0) */ 1367 1368 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1369 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1370 1371 if ( grabbed_lock ) { 1372 1373 *spin_here_p = FALSE; 1374 1375 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1376 lck, gtid )); 1377 #ifdef DEBUG_QUEUING_LOCKS 1378 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1379 #endif 1380 KMP_FSYNC_ACQUIRED( lck ); 1381 return; /* lock holder cannot be on queue */ 1382 } 1383 enqueued = FALSE; 1384 } 1385 break; 1386 } 1387 1388 if ( enqueued ) { 1389 if ( tail > 0 ) { 1390 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1391 KMP_ASSERT( tail_thr != NULL ); 1392 tail_thr->th.th_next_waiting = gtid+1; 1393 /* corresponding wait for this write in release code */ 1394 } 1395 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1396 1397 1398 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1399 * throughput only here. 1400 */ 1401 KMP_MB(); 1402 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1403 1404 #ifdef DEBUG_QUEUING_LOCKS 1405 TRACE_LOCK( gtid+1, "acq spin" ); 1406 1407 if ( this_thr->th.th_next_waiting != 0 ) 1408 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1409 #endif 1410 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1411 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1412 lck, gtid )); 1413 1414 #ifdef DEBUG_QUEUING_LOCKS 1415 TRACE_LOCK( gtid+1, "acq exit 2" ); 1416 #endif 1417 /* got lock, we were dequeued by the thread that released lock */ 1418 return; 1419 } 1420 1421 /* Yield if number of threads > number of logical processors */ 1422 /* ToDo: Not sure why this should only be in oversubscription case, 1423 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1424 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1425 __kmp_xproc ) ); 1426 #ifdef DEBUG_QUEUING_LOCKS 1427 TRACE_LOCK( gtid+1, "acq retry" ); 1428 #endif 1429 1430 } 1431 KMP_ASSERT2( 0, "should not get here" ); 1432 } 1433 1434 void 1435 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1436 { 1437 KMP_DEBUG_ASSERT( gtid >= 0 ); 1438 1439 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1440 } 1441 1442 static void 1443 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1444 kmp_int32 gtid ) 1445 { 1446 if ( __kmp_env_consistency_check ) { 1447 char const * const func = "omp_set_lock"; 1448 if ( lck->lk.initialized != lck ) { 1449 KMP_FATAL( LockIsUninitialized, func ); 1450 } 1451 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1452 KMP_FATAL( LockNestableUsedAsSimple, func ); 1453 } 1454 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1455 KMP_FATAL( LockIsAlreadyOwned, func ); 1456 } 1457 } 1458 1459 __kmp_acquire_queuing_lock( lck, gtid ); 1460 1461 if ( __kmp_env_consistency_check ) { 1462 lck->lk.owner_id = gtid + 1; 1463 } 1464 } 1465 1466 int 1467 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1468 { 1469 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1470 kmp_int32 head; 1471 #ifdef KMP_DEBUG 1472 kmp_info_t *this_thr; 1473 #endif 1474 1475 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1476 KMP_DEBUG_ASSERT( gtid >= 0 ); 1477 #ifdef KMP_DEBUG 1478 this_thr = __kmp_thread_from_gtid( gtid ); 1479 KMP_DEBUG_ASSERT( this_thr != NULL ); 1480 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1481 #endif 1482 1483 head = *head_id_p; 1484 1485 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1486 1487 /* try (0,0)->(-1,0) */ 1488 1489 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1490 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1491 KMP_FSYNC_ACQUIRED(lck); 1492 return TRUE; 1493 } 1494 } 1495 1496 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1497 return FALSE; 1498 } 1499 1500 static int 1501 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1502 { 1503 if ( __kmp_env_consistency_check ) { 1504 char const * const func = "omp_test_lock"; 1505 if ( lck->lk.initialized != lck ) { 1506 KMP_FATAL( LockIsUninitialized, func ); 1507 } 1508 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1509 KMP_FATAL( LockNestableUsedAsSimple, func ); 1510 } 1511 } 1512 1513 int retval = __kmp_test_queuing_lock( lck, gtid ); 1514 1515 if ( __kmp_env_consistency_check && retval ) { 1516 lck->lk.owner_id = gtid + 1; 1517 } 1518 return retval; 1519 } 1520 1521 void 1522 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1523 { 1524 register kmp_info_t *this_thr; 1525 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1526 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1527 1528 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1529 KMP_DEBUG_ASSERT( gtid >= 0 ); 1530 this_thr = __kmp_thread_from_gtid( gtid ); 1531 KMP_DEBUG_ASSERT( this_thr != NULL ); 1532 #ifdef DEBUG_QUEUING_LOCKS 1533 TRACE_LOCK( gtid+1, "rel ent" ); 1534 1535 if ( this_thr->th.th_spin_here ) 1536 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1537 if ( this_thr->th.th_next_waiting != 0 ) 1538 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1539 #endif 1540 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1541 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1542 1543 KMP_FSYNC_RELEASING(lck); 1544 1545 while( 1 ) { 1546 kmp_int32 dequeued; 1547 kmp_int32 head; 1548 kmp_int32 tail; 1549 1550 head = *head_id_p; 1551 1552 #ifdef DEBUG_QUEUING_LOCKS 1553 tail = *tail_id_p; 1554 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1555 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1556 #endif 1557 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1558 1559 if ( head == -1 ) { /* nobody on queue */ 1560 1561 /* try (-1,0)->(0,0) */ 1562 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1563 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1564 lck, gtid )); 1565 #ifdef DEBUG_QUEUING_LOCKS 1566 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1567 #endif 1568 return; 1569 } 1570 dequeued = FALSE; 1571 1572 } 1573 else { 1574 1575 tail = *tail_id_p; 1576 if ( head == tail ) { /* only one thread on the queue */ 1577 1578 #ifdef DEBUG_QUEUING_LOCKS 1579 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1580 #endif 1581 KMP_DEBUG_ASSERT( head > 0 ); 1582 1583 /* try (h,h)->(-1,0) */ 1584 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1585 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1586 #ifdef DEBUG_QUEUING_LOCKS 1587 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1588 #endif 1589 1590 } 1591 else { 1592 volatile kmp_int32 *waiting_id_p; 1593 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1594 KMP_DEBUG_ASSERT( head_thr != NULL ); 1595 waiting_id_p = & head_thr->th.th_next_waiting; 1596 1597 /* Does this require synchronous reads? */ 1598 #ifdef DEBUG_QUEUING_LOCKS 1599 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1600 #endif 1601 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1602 1603 /* try (h,t)->(h',t) or (t,t) */ 1604 1605 KMP_MB(); 1606 /* make sure enqueuing thread has time to update next waiting thread field */ 1607 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 1608 #ifdef DEBUG_QUEUING_LOCKS 1609 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1610 #endif 1611 dequeued = TRUE; 1612 } 1613 } 1614 1615 if ( dequeued ) { 1616 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1617 KMP_DEBUG_ASSERT( head_thr != NULL ); 1618 1619 /* Does this require synchronous reads? */ 1620 #ifdef DEBUG_QUEUING_LOCKS 1621 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1622 #endif 1623 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1624 1625 /* For clean code only. 1626 * Thread not released until next statement prevents race with acquire code. 1627 */ 1628 head_thr->th.th_next_waiting = 0; 1629 #ifdef DEBUG_QUEUING_LOCKS 1630 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1631 #endif 1632 1633 KMP_MB(); 1634 /* reset spin value */ 1635 head_thr->th.th_spin_here = FALSE; 1636 1637 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1638 lck, gtid )); 1639 #ifdef DEBUG_QUEUING_LOCKS 1640 TRACE_LOCK( gtid+1, "rel exit 2" ); 1641 #endif 1642 return; 1643 } 1644 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1645 1646 #ifdef DEBUG_QUEUING_LOCKS 1647 TRACE_LOCK( gtid+1, "rel retry" ); 1648 #endif 1649 1650 } /* while */ 1651 KMP_ASSERT2( 0, "should not get here" ); 1652 } 1653 1654 static void 1655 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1656 kmp_int32 gtid ) 1657 { 1658 if ( __kmp_env_consistency_check ) { 1659 char const * const func = "omp_unset_lock"; 1660 KMP_MB(); /* in case another processor initialized lock */ 1661 if ( lck->lk.initialized != lck ) { 1662 KMP_FATAL( LockIsUninitialized, func ); 1663 } 1664 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1665 KMP_FATAL( LockNestableUsedAsSimple, func ); 1666 } 1667 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1668 KMP_FATAL( LockUnsettingFree, func ); 1669 } 1670 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1671 KMP_FATAL( LockUnsettingSetByAnother, func ); 1672 } 1673 lck->lk.owner_id = 0; 1674 } 1675 __kmp_release_queuing_lock( lck, gtid ); 1676 } 1677 1678 void 1679 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1680 { 1681 lck->lk.location = NULL; 1682 lck->lk.head_id = 0; 1683 lck->lk.tail_id = 0; 1684 lck->lk.next_ticket = 0; 1685 lck->lk.now_serving = 0; 1686 lck->lk.owner_id = 0; // no thread owns the lock. 1687 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1688 lck->lk.initialized = lck; 1689 1690 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1691 } 1692 1693 static void 1694 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1695 { 1696 __kmp_init_queuing_lock( lck ); 1697 } 1698 1699 void 1700 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1701 { 1702 lck->lk.initialized = NULL; 1703 lck->lk.location = NULL; 1704 lck->lk.head_id = 0; 1705 lck->lk.tail_id = 0; 1706 lck->lk.next_ticket = 0; 1707 lck->lk.now_serving = 0; 1708 lck->lk.owner_id = 0; 1709 lck->lk.depth_locked = -1; 1710 } 1711 1712 static void 1713 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1714 { 1715 if ( __kmp_env_consistency_check ) { 1716 char const * const func = "omp_destroy_lock"; 1717 if ( lck->lk.initialized != lck ) { 1718 KMP_FATAL( LockIsUninitialized, func ); 1719 } 1720 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1721 KMP_FATAL( LockNestableUsedAsSimple, func ); 1722 } 1723 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1724 KMP_FATAL( LockStillOwned, func ); 1725 } 1726 } 1727 __kmp_destroy_queuing_lock( lck ); 1728 } 1729 1730 1731 // 1732 // nested queuing locks 1733 // 1734 1735 void 1736 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1737 { 1738 KMP_DEBUG_ASSERT( gtid >= 0 ); 1739 1740 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1741 lck->lk.depth_locked += 1; 1742 } 1743 else { 1744 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1745 KMP_MB(); 1746 lck->lk.depth_locked = 1; 1747 KMP_MB(); 1748 lck->lk.owner_id = gtid + 1; 1749 } 1750 } 1751 1752 static void 1753 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1754 { 1755 if ( __kmp_env_consistency_check ) { 1756 char const * const func = "omp_set_nest_lock"; 1757 if ( lck->lk.initialized != lck ) { 1758 KMP_FATAL( LockIsUninitialized, func ); 1759 } 1760 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1761 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1762 } 1763 } 1764 __kmp_acquire_nested_queuing_lock( lck, gtid ); 1765 } 1766 1767 int 1768 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1769 { 1770 int retval; 1771 1772 KMP_DEBUG_ASSERT( gtid >= 0 ); 1773 1774 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1775 retval = ++lck->lk.depth_locked; 1776 } 1777 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1778 retval = 0; 1779 } 1780 else { 1781 KMP_MB(); 1782 retval = lck->lk.depth_locked = 1; 1783 KMP_MB(); 1784 lck->lk.owner_id = gtid + 1; 1785 } 1786 return retval; 1787 } 1788 1789 static int 1790 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1791 kmp_int32 gtid ) 1792 { 1793 if ( __kmp_env_consistency_check ) { 1794 char const * const func = "omp_test_nest_lock"; 1795 if ( lck->lk.initialized != lck ) { 1796 KMP_FATAL( LockIsUninitialized, func ); 1797 } 1798 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1799 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1800 } 1801 } 1802 return __kmp_test_nested_queuing_lock( lck, gtid ); 1803 } 1804 1805 void 1806 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1807 { 1808 KMP_DEBUG_ASSERT( gtid >= 0 ); 1809 1810 KMP_MB(); 1811 if ( --(lck->lk.depth_locked) == 0 ) { 1812 KMP_MB(); 1813 lck->lk.owner_id = 0; 1814 __kmp_release_queuing_lock( lck, gtid ); 1815 } 1816 } 1817 1818 static void 1819 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1820 { 1821 if ( __kmp_env_consistency_check ) { 1822 char const * const func = "omp_unset_nest_lock"; 1823 KMP_MB(); /* in case another processor initialized lock */ 1824 if ( lck->lk.initialized != lck ) { 1825 KMP_FATAL( LockIsUninitialized, func ); 1826 } 1827 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1828 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1829 } 1830 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1831 KMP_FATAL( LockUnsettingFree, func ); 1832 } 1833 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1834 KMP_FATAL( LockUnsettingSetByAnother, func ); 1835 } 1836 } 1837 __kmp_release_nested_queuing_lock( lck, gtid ); 1838 } 1839 1840 void 1841 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1842 { 1843 __kmp_init_queuing_lock( lck ); 1844 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1845 } 1846 1847 static void 1848 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1849 { 1850 __kmp_init_nested_queuing_lock( lck ); 1851 } 1852 1853 void 1854 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1855 { 1856 __kmp_destroy_queuing_lock( lck ); 1857 lck->lk.depth_locked = 0; 1858 } 1859 1860 static void 1861 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1862 { 1863 if ( __kmp_env_consistency_check ) { 1864 char const * const func = "omp_destroy_nest_lock"; 1865 if ( lck->lk.initialized != lck ) { 1866 KMP_FATAL( LockIsUninitialized, func ); 1867 } 1868 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1869 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1870 } 1871 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1872 KMP_FATAL( LockStillOwned, func ); 1873 } 1874 } 1875 __kmp_destroy_nested_queuing_lock( lck ); 1876 } 1877 1878 1879 // 1880 // access functions to fields which don't exist for all lock kinds. 1881 // 1882 1883 static int 1884 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1885 { 1886 return lck == lck->lk.initialized; 1887 } 1888 1889 static const ident_t * 1890 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1891 { 1892 return lck->lk.location; 1893 } 1894 1895 static void 1896 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1897 { 1898 lck->lk.location = loc; 1899 } 1900 1901 static kmp_lock_flags_t 1902 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1903 { 1904 return lck->lk.flags; 1905 } 1906 1907 static void 1908 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1909 { 1910 lck->lk.flags = flags; 1911 } 1912 1913 #if KMP_USE_ADAPTIVE_LOCKS 1914 1915 /* 1916 RTM Adaptive locks 1917 */ 1918 1919 // TODO: Use the header for intrinsics below with the compiler 13.0 1920 //#include <immintrin.h> 1921 1922 // Values from the status register after failed speculation. 1923 #define _XBEGIN_STARTED (~0u) 1924 #define _XABORT_EXPLICIT (1 << 0) 1925 #define _XABORT_RETRY (1 << 1) 1926 #define _XABORT_CONFLICT (1 << 2) 1927 #define _XABORT_CAPACITY (1 << 3) 1928 #define _XABORT_DEBUG (1 << 4) 1929 #define _XABORT_NESTED (1 << 5) 1930 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1931 1932 // Aborts for which it's worth trying again immediately 1933 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1934 1935 #define STRINGIZE_INTERNAL(arg) #arg 1936 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1937 1938 // Access to RTM instructions 1939 1940 /* 1941 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1942 This is the same definition as the compiler intrinsic that will be supported at some point. 1943 */ 1944 static __inline int _xbegin() 1945 { 1946 int res = -1; 1947 1948 #if KMP_OS_WINDOWS 1949 #if KMP_ARCH_X86_64 1950 _asm { 1951 _emit 0xC7 1952 _emit 0xF8 1953 _emit 2 1954 _emit 0 1955 _emit 0 1956 _emit 0 1957 jmp L2 1958 mov res, eax 1959 L2: 1960 } 1961 #else /* IA32 */ 1962 _asm { 1963 _emit 0xC7 1964 _emit 0xF8 1965 _emit 2 1966 _emit 0 1967 _emit 0 1968 _emit 0 1969 jmp L2 1970 mov res, eax 1971 L2: 1972 } 1973 #endif // KMP_ARCH_X86_64 1974 #else 1975 /* Note that %eax must be noted as killed (clobbered), because 1976 * the XSR is returned in %eax(%rax) on abort. Other register 1977 * values are restored, so don't need to be killed. 1978 * 1979 * We must also mark 'res' as an input and an output, since otherwise 1980 * 'res=-1' may be dropped as being dead, whereas we do need the 1981 * assignment on the successful (i.e., non-abort) path. 1982 */ 1983 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1984 " .long 1f-1b-6\n" 1985 " jmp 2f\n" 1986 "1: movl %%eax,%0\n" 1987 "2:" 1988 :"+r"(res)::"memory","%eax"); 1989 #endif // KMP_OS_WINDOWS 1990 return res; 1991 } 1992 1993 /* 1994 Transaction end 1995 */ 1996 static __inline void _xend() 1997 { 1998 #if KMP_OS_WINDOWS 1999 __asm { 2000 _emit 0x0f 2001 _emit 0x01 2002 _emit 0xd5 2003 } 2004 #else 2005 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 2006 #endif 2007 } 2008 2009 /* 2010 This is a macro, the argument must be a single byte constant which 2011 can be evaluated by the inline assembler, since it is emitted as a 2012 byte into the assembly code. 2013 */ 2014 #if KMP_OS_WINDOWS 2015 #define _xabort(ARG) \ 2016 _asm _emit 0xc6 \ 2017 _asm _emit 0xf8 \ 2018 _asm _emit ARG 2019 #else 2020 #define _xabort(ARG) \ 2021 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 2022 #endif 2023 2024 // 2025 // Statistics is collected for testing purpose 2026 // 2027 #if KMP_DEBUG_ADAPTIVE_LOCKS 2028 2029 // We accumulate speculative lock statistics when the lock is destroyed. 2030 // We keep locks that haven't been destroyed in the liveLocks list 2031 // so that we can grab their statistics too. 2032 static kmp_adaptive_lock_statistics_t destroyedStats; 2033 2034 // To hold the list of live locks. 2035 static kmp_adaptive_lock_t liveLocks; 2036 2037 // A lock so we can safely update the list of locks. 2038 static kmp_bootstrap_lock_t chain_lock; 2039 2040 // Initialize the list of stats. 2041 void 2042 __kmp_init_speculative_stats() 2043 { 2044 kmp_adaptive_lock *lck = &liveLocks; 2045 2046 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 2047 lck->stats.next = lck; 2048 lck->stats.prev = lck; 2049 2050 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2051 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2052 2053 __kmp_init_bootstrap_lock( &chain_lock ); 2054 2055 } 2056 2057 // Insert the lock into the circular list 2058 static void 2059 __kmp_remember_lock( kmp_adaptive_lock * lck ) 2060 { 2061 __kmp_acquire_bootstrap_lock( &chain_lock ); 2062 2063 lck->stats.next = liveLocks.stats.next; 2064 lck->stats.prev = &liveLocks; 2065 2066 liveLocks.stats.next = lck; 2067 lck->stats.next->stats.prev = lck; 2068 2069 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2070 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2071 2072 __kmp_release_bootstrap_lock( &chain_lock ); 2073 } 2074 2075 static void 2076 __kmp_forget_lock( kmp_adaptive_lock * lck ) 2077 { 2078 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2079 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2080 2081 kmp_adaptive_lock * n = lck->stats.next; 2082 kmp_adaptive_lock * p = lck->stats.prev; 2083 2084 n->stats.prev = p; 2085 p->stats.next = n; 2086 } 2087 2088 static void 2089 __kmp_zero_speculative_stats( kmp_adaptive_lock * lck ) 2090 { 2091 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2092 __kmp_remember_lock( lck ); 2093 } 2094 2095 static void 2096 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_t * lck ) 2097 { 2098 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2099 2100 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2101 t->successfulSpeculations += s->successfulSpeculations; 2102 t->hardFailedSpeculations += s->hardFailedSpeculations; 2103 t->softFailedSpeculations += s->softFailedSpeculations; 2104 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2105 t->lemmingYields += s->lemmingYields; 2106 } 2107 2108 static void 2109 __kmp_accumulate_speculative_stats( kmp_adaptive_lock * lck) 2110 { 2111 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2112 2113 __kmp_acquire_bootstrap_lock( &chain_lock ); 2114 2115 __kmp_add_stats( &destroyedStats, lck ); 2116 __kmp_forget_lock( lck ); 2117 2118 __kmp_release_bootstrap_lock( &chain_lock ); 2119 } 2120 2121 static float 2122 percent (kmp_uint32 count, kmp_uint32 total) 2123 { 2124 return (total == 0) ? 0.0: (100.0 * count)/total; 2125 } 2126 2127 static 2128 FILE * __kmp_open_stats_file() 2129 { 2130 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2131 return stdout; 2132 2133 size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20; 2134 char buffer[buffLen]; 2135 snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, getpid()); 2136 FILE * result = fopen(&buffer[0], "w"); 2137 2138 // Maybe we should issue a warning here... 2139 return result ? result : stdout; 2140 } 2141 2142 void 2143 __kmp_print_speculative_stats() 2144 { 2145 if (__kmp_user_lock_kind != lk_adaptive) 2146 return; 2147 2148 FILE * statsFile = __kmp_open_stats_file(); 2149 2150 kmp_adaptive_lock_statistics_t total = destroyedStats; 2151 kmp_adaptive_lock *lck; 2152 2153 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2154 __kmp_add_stats( &total, lck ); 2155 } 2156 kmp_adaptive_lock_statistics_t *t = &total; 2157 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2158 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2159 t->softFailedSpeculations; 2160 2161 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2162 fprintf ( statsFile, " Lock parameters: \n" 2163 " max_soft_retries : %10d\n" 2164 " max_badness : %10d\n", 2165 __kmp_adaptive_backoff_params.max_soft_retries, 2166 __kmp_adaptive_backoff_params.max_badness); 2167 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2168 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2169 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2170 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2171 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2172 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2173 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2174 2175 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2176 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2177 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2178 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2179 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2180 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2181 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2182 2183 if (statsFile != stdout) 2184 fclose( statsFile ); 2185 } 2186 2187 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2188 #else 2189 # define KMP_INC_STAT(lck,stat) 2190 2191 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2192 2193 static inline bool 2194 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2195 { 2196 // It is enough to check that the head_id is zero. 2197 // We don't also need to check the tail. 2198 bool res = lck->lk.head_id == 0; 2199 2200 // We need a fence here, since we must ensure that no memory operations 2201 // from later in this thread float above that read. 2202 #if KMP_COMPILER_ICC 2203 _mm_mfence(); 2204 #else 2205 __sync_synchronize(); 2206 #endif 2207 2208 return res; 2209 } 2210 2211 // Functions for manipulating the badness 2212 static __inline void 2213 __kmp_update_badness_after_success( kmp_queuing_lock_t *lck ) 2214 { 2215 // Reset the badness to zero so we eagerly try to speculate again 2216 lck->lk.adaptive.badness = 0; 2217 KMP_INC_STAT(lck,successfulSpeculations); 2218 } 2219 2220 // Create a bit mask with one more set bit. 2221 static __inline void 2222 __kmp_step_badness( kmp_queuing_lock_t *lck ) 2223 { 2224 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2225 if ( newBadness > lck->lk.adaptive.max_badness) { 2226 return; 2227 } else { 2228 lck->lk.adaptive.badness = newBadness; 2229 } 2230 } 2231 2232 // Check whether speculation should be attempted. 2233 static __inline int 2234 __kmp_should_speculate( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 2235 { 2236 kmp_uint32 badness = lck->lk.adaptive.badness; 2237 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2238 int res = (attempts & badness) == 0; 2239 return res; 2240 } 2241 2242 // Attempt to acquire only the speculative lock. 2243 // Does not back off to the non-speculative lock. 2244 // 2245 static int 2246 __kmp_test_adaptive_lock_only( kmp_queuing_lock_t * lck, kmp_int32 gtid ) 2247 { 2248 int retries = lck->lk.adaptive.max_soft_retries; 2249 2250 // We don't explicitly count the start of speculation, rather we record 2251 // the results (success, hard fail, soft fail). The sum of all of those 2252 // is the total number of times we started speculation since all 2253 // speculations must end one of those ways. 2254 do 2255 { 2256 kmp_uint32 status = _xbegin(); 2257 // Switch this in to disable actual speculation but exercise 2258 // at least some of the rest of the code. Useful for debugging... 2259 // kmp_uint32 status = _XABORT_NESTED; 2260 2261 if (status == _XBEGIN_STARTED ) 2262 { /* We have successfully started speculation 2263 * Check that no-one acquired the lock for real between when we last looked 2264 * and now. This also gets the lock cache line into our read-set, 2265 * which we need so that we'll abort if anyone later claims it for real. 2266 */ 2267 if (! __kmp_is_unlocked_queuing_lock( lck ) ) 2268 { 2269 // Lock is now visibly acquired, so someone beat us to it. 2270 // Abort the transaction so we'll restart from _xbegin with the 2271 // failure status. 2272 _xabort(0x01) 2273 KMP_ASSERT2( 0, "should not get here" ); 2274 } 2275 return 1; // Lock has been acquired (speculatively) 2276 } else { 2277 // We have aborted, update the statistics 2278 if ( status & SOFT_ABORT_MASK) 2279 { 2280 KMP_INC_STAT(lck,softFailedSpeculations); 2281 // and loop round to retry. 2282 } 2283 else 2284 { 2285 KMP_INC_STAT(lck,hardFailedSpeculations); 2286 // Give up if we had a hard failure. 2287 break; 2288 } 2289 } 2290 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2291 2292 // Either we had a hard failure or we didn't succeed softly after 2293 // the full set of attempts, so back off the badness. 2294 __kmp_step_badness( lck ); 2295 return 0; 2296 } 2297 2298 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2299 // if the speculative lock cannot be acquired. 2300 // We can succeed speculatively, non-speculatively, or fail. 2301 static int 2302 __kmp_test_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 2303 { 2304 // First try to acquire the lock speculatively 2305 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2306 return 1; 2307 2308 // Speculative acquisition failed, so try to acquire it non-speculatively. 2309 // Count the non-speculative acquire attempt 2310 lck->lk.adaptive.acquire_attempts++; 2311 2312 // Use base, non-speculative lock. 2313 if ( __kmp_test_queuing_lock( lck, gtid ) ) 2314 { 2315 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2316 return 1; // Lock is acquired (non-speculatively) 2317 } 2318 else 2319 { 2320 return 0; // Failed to acquire the lock, it's already visibly locked. 2321 } 2322 } 2323 2324 static int 2325 __kmp_test_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 2326 { 2327 if ( __kmp_env_consistency_check ) { 2328 char const * const func = "omp_test_lock"; 2329 if ( lck->lk.initialized != lck ) { 2330 KMP_FATAL( LockIsUninitialized, func ); 2331 } 2332 } 2333 2334 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2335 2336 if ( __kmp_env_consistency_check && retval ) { 2337 lck->lk.owner_id = gtid + 1; 2338 } 2339 return retval; 2340 } 2341 2342 // Block until we can acquire a speculative, adaptive lock. 2343 // We check whether we should be trying to speculate. 2344 // If we should be, we check the real lock to see if it is free, 2345 // and, if not, pause without attempting to acquire it until it is. 2346 // Then we try the speculative acquire. 2347 // This means that although we suffer from lemmings a little ( 2348 // because all we can't acquire the lock speculatively until 2349 // the queue of threads waiting has cleared), we don't get into a 2350 // state where we can never acquire the lock speculatively (because we 2351 // force the queue to clear by preventing new arrivals from entering the 2352 // queue). 2353 // This does mean that when we're trying to break lemmings, the lock 2354 // is no longer fair. However OpenMP makes no guarantee that its 2355 // locks are fair, so this isn't a real problem. 2356 static void 2357 __kmp_acquire_adaptive_lock( kmp_queuing_lock_t * lck, kmp_int32 gtid ) 2358 { 2359 if ( __kmp_should_speculate( lck, gtid ) ) 2360 { 2361 if ( __kmp_is_unlocked_queuing_lock( lck ) ) 2362 { 2363 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2364 return; 2365 // We tried speculation and failed, so give up. 2366 } 2367 else 2368 { 2369 // We can't try speculation until the lock is free, so we 2370 // pause here (without suspending on the queueing lock, 2371 // to allow it to drain, then try again. 2372 // All other threads will also see the same result for 2373 // shouldSpeculate, so will be doing the same if they 2374 // try to claim the lock from now on. 2375 while ( ! __kmp_is_unlocked_queuing_lock( lck ) ) 2376 { 2377 KMP_INC_STAT(lck,lemmingYields); 2378 __kmp_yield (TRUE); 2379 } 2380 2381 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2382 return; 2383 } 2384 } 2385 2386 // Speculative acquisition failed, so acquire it non-speculatively. 2387 // Count the non-speculative acquire attempt 2388 lck->lk.adaptive.acquire_attempts++; 2389 2390 __kmp_acquire_queuing_lock_timed_template<FALSE>( lck, gtid ); 2391 // We have acquired the base lock, so count that. 2392 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2393 } 2394 2395 static void 2396 __kmp_acquire_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 2397 { 2398 if ( __kmp_env_consistency_check ) { 2399 char const * const func = "omp_set_lock"; 2400 if ( lck->lk.initialized != lck ) { 2401 KMP_FATAL( LockIsUninitialized, func ); 2402 } 2403 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 2404 KMP_FATAL( LockIsAlreadyOwned, func ); 2405 } 2406 } 2407 2408 __kmp_acquire_adaptive_lock( lck, gtid ); 2409 2410 if ( __kmp_env_consistency_check ) { 2411 lck->lk.owner_id = gtid + 1; 2412 } 2413 } 2414 2415 static void 2416 __kmp_release_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 2417 { 2418 if ( __kmp_is_unlocked_queuing_lock( lck ) ) 2419 { // If the lock doesn't look claimed we must be speculating. 2420 // (Or the user's code is buggy and they're releasing without locking; 2421 // if we had XTEST we'd be able to check that case...) 2422 _xend(); // Exit speculation 2423 __kmp_update_badness_after_success( lck ); 2424 } 2425 else 2426 { // Since the lock *is* visibly locked we're not speculating, 2427 // so should use the underlying lock's release scheme. 2428 __kmp_release_queuing_lock( lck, gtid ); 2429 } 2430 } 2431 2432 static void 2433 __kmp_release_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 2434 { 2435 if ( __kmp_env_consistency_check ) { 2436 char const * const func = "omp_unset_lock"; 2437 KMP_MB(); /* in case another processor initialized lock */ 2438 if ( lck->lk.initialized != lck ) { 2439 KMP_FATAL( LockIsUninitialized, func ); 2440 } 2441 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 2442 KMP_FATAL( LockUnsettingFree, func ); 2443 } 2444 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 2445 KMP_FATAL( LockUnsettingSetByAnother, func ); 2446 } 2447 lck->lk.owner_id = 0; 2448 } 2449 __kmp_release_adaptive_lock( lck, gtid ); 2450 } 2451 2452 static void 2453 __kmp_init_adaptive_lock( kmp_queuing_lock_t *lck ) 2454 { 2455 __kmp_init_queuing_lock( lck ); 2456 lck->lk.adaptive.badness = 0; 2457 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2458 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2459 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2460 #if KMP_DEBUG_ADAPTIVE_LOCKS 2461 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2462 #endif 2463 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2464 } 2465 2466 static void 2467 __kmp_init_adaptive_lock_with_checks( kmp_queuing_lock_t * lck ) 2468 { 2469 __kmp_init_adaptive_lock( lck ); 2470 } 2471 2472 static void 2473 __kmp_destroy_adaptive_lock( kmp_queuing_lock_t *lck ) 2474 { 2475 #if KMP_DEBUG_ADAPTIVE_LOCKS 2476 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2477 #endif 2478 __kmp_destroy_queuing_lock (lck); 2479 // Nothing needed for the speculative part. 2480 } 2481 2482 static void 2483 __kmp_destroy_adaptive_lock_with_checks( kmp_queuing_lock_t *lck ) 2484 { 2485 if ( __kmp_env_consistency_check ) { 2486 char const * const func = "omp_destroy_lock"; 2487 if ( lck->lk.initialized != lck ) { 2488 KMP_FATAL( LockIsUninitialized, func ); 2489 } 2490 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 2491 KMP_FATAL( LockStillOwned, func ); 2492 } 2493 } 2494 __kmp_destroy_adaptive_lock( lck ); 2495 } 2496 2497 2498 #endif // KMP_USE_ADAPTIVE_LOCKS 2499 2500 2501 /* ------------------------------------------------------------------------ */ 2502 /* DRDPA ticket locks */ 2503 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2504 2505 static kmp_int32 2506 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2507 { 2508 return TCR_4( lck->lk.owner_id ) - 1; 2509 } 2510 2511 static inline bool 2512 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2513 { 2514 return lck->lk.depth_locked != -1; 2515 } 2516 2517 __forceinline static void 2518 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2519 { 2520 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2521 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2522 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2523 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2524 TCR_PTR(lck->lk.polls); // volatile load 2525 2526 #ifdef USE_LOCK_PROFILE 2527 if (TCR_8(polls[ticket & mask].poll) != ticket) 2528 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2529 /* else __kmp_printf( "." );*/ 2530 #endif /* USE_LOCK_PROFILE */ 2531 2532 // 2533 // Now spin-wait, but reload the polls pointer and mask, in case the 2534 // polling area has been reconfigured. Unless it is reconfigured, the 2535 // reloads stay in L1 cache and are cheap. 2536 // 2537 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2538 // 2539 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2540 // and poll to be re-read every spin iteration. 2541 // 2542 kmp_uint32 spins; 2543 2544 KMP_FSYNC_PREPARE(lck); 2545 KMP_INIT_YIELD(spins); 2546 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2547 __kmp_static_delay(TRUE); 2548 2549 // 2550 // If we are oversubscribed, 2551 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2552 // CPU Pause is in the macros for yield. 2553 // 2554 KMP_YIELD(TCR_4(__kmp_nth) 2555 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2556 KMP_YIELD_SPIN(spins); 2557 2558 // Re-read the mask and the poll pointer from the lock structure. 2559 // 2560 // Make certain that "mask" is read before "polls" !!! 2561 // 2562 // If another thread picks reconfigures the polling area and updates 2563 // their values, and we get the new value of mask and the old polls 2564 // pointer, we could access memory beyond the end of the old polling 2565 // area. 2566 // 2567 mask = TCR_8(lck->lk.mask); // volatile load 2568 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2569 TCR_PTR(lck->lk.polls); // volatile load 2570 } 2571 2572 // 2573 // Critical section starts here 2574 // 2575 KMP_FSYNC_ACQUIRED(lck); 2576 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2577 ticket, lck)); 2578 lck->lk.now_serving = ticket; // non-volatile store 2579 2580 // 2581 // Deallocate a garbage polling area if we know that we are the last 2582 // thread that could possibly access it. 2583 // 2584 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2585 // ticket. 2586 // 2587 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2588 __kmp_free((void *)lck->lk.old_polls); 2589 lck->lk.old_polls = NULL; 2590 lck->lk.cleanup_ticket = 0; 2591 } 2592 2593 // 2594 // Check to see if we should reconfigure the polling area. 2595 // If there is still a garbage polling area to be deallocated from a 2596 // previous reconfiguration, let a later thread reconfigure it. 2597 // 2598 if (lck->lk.old_polls == NULL) { 2599 bool reconfigure = false; 2600 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2601 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2602 2603 if (TCR_4(__kmp_nth) 2604 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2605 // 2606 // We are in oversubscription mode. Contract the polling area 2607 // down to a single location, if that hasn't been done already. 2608 // 2609 if (num_polls > 1) { 2610 reconfigure = true; 2611 num_polls = TCR_4(lck->lk.num_polls); 2612 mask = 0; 2613 num_polls = 1; 2614 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2615 __kmp_allocate(num_polls * sizeof(*polls)); 2616 polls[0].poll = ticket; 2617 } 2618 } 2619 else { 2620 // 2621 // We are in under/fully subscribed mode. Check the number of 2622 // threads waiting on the lock. The size of the polling area 2623 // should be at least the number of threads waiting. 2624 // 2625 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2626 if (num_waiting > num_polls) { 2627 kmp_uint32 old_num_polls = num_polls; 2628 reconfigure = true; 2629 do { 2630 mask = (mask << 1) | 1; 2631 num_polls *= 2; 2632 } while (num_polls <= num_waiting); 2633 2634 // 2635 // Allocate the new polling area, and copy the relevant portion 2636 // of the old polling area to the new area. __kmp_allocate() 2637 // zeroes the memory it allocates, and most of the old area is 2638 // just zero padding, so we only copy the release counters. 2639 // 2640 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2641 __kmp_allocate(num_polls * sizeof(*polls)); 2642 kmp_uint32 i; 2643 for (i = 0; i < old_num_polls; i++) { 2644 polls[i].poll = old_polls[i].poll; 2645 } 2646 } 2647 } 2648 2649 if (reconfigure) { 2650 // 2651 // Now write the updated fields back to the lock structure. 2652 // 2653 // Make certain that "polls" is written before "mask" !!! 2654 // 2655 // If another thread picks up the new value of mask and the old 2656 // polls pointer , it could access memory beyond the end of the 2657 // old polling area. 2658 // 2659 // On x86, we need memory fences. 2660 // 2661 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2662 ticket, lck, num_polls)); 2663 2664 lck->lk.old_polls = old_polls; // non-volatile store 2665 lck->lk.polls = polls; // volatile store 2666 2667 KMP_MB(); 2668 2669 lck->lk.num_polls = num_polls; // non-volatile store 2670 lck->lk.mask = mask; // volatile store 2671 2672 KMP_MB(); 2673 2674 // 2675 // Only after the new polling area and mask have been flushed 2676 // to main memory can we update the cleanup ticket field. 2677 // 2678 // volatile load / non-volatile store 2679 // 2680 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2681 } 2682 } 2683 } 2684 2685 void 2686 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2687 { 2688 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2689 } 2690 2691 static void 2692 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2693 { 2694 if ( __kmp_env_consistency_check ) { 2695 char const * const func = "omp_set_lock"; 2696 if ( lck->lk.initialized != lck ) { 2697 KMP_FATAL( LockIsUninitialized, func ); 2698 } 2699 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2700 KMP_FATAL( LockNestableUsedAsSimple, func ); 2701 } 2702 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2703 KMP_FATAL( LockIsAlreadyOwned, func ); 2704 } 2705 } 2706 2707 __kmp_acquire_drdpa_lock( lck, gtid ); 2708 2709 if ( __kmp_env_consistency_check ) { 2710 lck->lk.owner_id = gtid + 1; 2711 } 2712 } 2713 2714 int 2715 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2716 { 2717 // 2718 // First get a ticket, then read the polls pointer and the mask. 2719 // The polls pointer must be read before the mask!!! (See above) 2720 // 2721 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2722 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2723 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2724 TCR_PTR(lck->lk.polls); // volatile load 2725 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2726 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2727 kmp_uint64 next_ticket = ticket + 1; 2728 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2729 ticket, next_ticket)) { 2730 KMP_FSYNC_ACQUIRED(lck); 2731 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2732 ticket, lck)); 2733 lck->lk.now_serving = ticket; // non-volatile store 2734 2735 // 2736 // Since no threads are waiting, there is no possibility that 2737 // we would want to reconfigure the polling area. We might 2738 // have the cleanup ticket value (which says that it is now 2739 // safe to deallocate old_polls), but we'll let a later thread 2740 // which calls __kmp_acquire_lock do that - this routine 2741 // isn't supposed to block, and we would risk blocks if we 2742 // called __kmp_free() to do the deallocation. 2743 // 2744 return TRUE; 2745 } 2746 } 2747 return FALSE; 2748 } 2749 2750 static int 2751 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2752 { 2753 if ( __kmp_env_consistency_check ) { 2754 char const * const func = "omp_test_lock"; 2755 if ( lck->lk.initialized != lck ) { 2756 KMP_FATAL( LockIsUninitialized, func ); 2757 } 2758 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2759 KMP_FATAL( LockNestableUsedAsSimple, func ); 2760 } 2761 } 2762 2763 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2764 2765 if ( __kmp_env_consistency_check && retval ) { 2766 lck->lk.owner_id = gtid + 1; 2767 } 2768 return retval; 2769 } 2770 2771 void 2772 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2773 { 2774 // 2775 // Read the ticket value from the lock data struct, then the polls 2776 // pointer and the mask. The polls pointer must be read before the 2777 // mask!!! (See above) 2778 // 2779 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2780 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2781 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2782 TCR_PTR(lck->lk.polls); // volatile load 2783 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2784 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2785 ticket - 1, lck)); 2786 KMP_FSYNC_RELEASING(lck); 2787 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2788 } 2789 2790 static void 2791 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2792 { 2793 if ( __kmp_env_consistency_check ) { 2794 char const * const func = "omp_unset_lock"; 2795 KMP_MB(); /* in case another processor initialized lock */ 2796 if ( lck->lk.initialized != lck ) { 2797 KMP_FATAL( LockIsUninitialized, func ); 2798 } 2799 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2800 KMP_FATAL( LockNestableUsedAsSimple, func ); 2801 } 2802 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2803 KMP_FATAL( LockUnsettingFree, func ); 2804 } 2805 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2806 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2807 KMP_FATAL( LockUnsettingSetByAnother, func ); 2808 } 2809 lck->lk.owner_id = 0; 2810 } 2811 __kmp_release_drdpa_lock( lck, gtid ); 2812 } 2813 2814 void 2815 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2816 { 2817 lck->lk.location = NULL; 2818 lck->lk.mask = 0; 2819 lck->lk.num_polls = 1; 2820 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2821 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2822 lck->lk.cleanup_ticket = 0; 2823 lck->lk.old_polls = NULL; 2824 lck->lk.next_ticket = 0; 2825 lck->lk.now_serving = 0; 2826 lck->lk.owner_id = 0; // no thread owns the lock. 2827 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2828 lck->lk.initialized = lck; 2829 2830 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2831 } 2832 2833 static void 2834 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2835 { 2836 __kmp_init_drdpa_lock( lck ); 2837 } 2838 2839 void 2840 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2841 { 2842 lck->lk.initialized = NULL; 2843 lck->lk.location = NULL; 2844 if (lck->lk.polls != NULL) { 2845 __kmp_free((void *)lck->lk.polls); 2846 lck->lk.polls = NULL; 2847 } 2848 if (lck->lk.old_polls != NULL) { 2849 __kmp_free((void *)lck->lk.old_polls); 2850 lck->lk.old_polls = NULL; 2851 } 2852 lck->lk.mask = 0; 2853 lck->lk.num_polls = 0; 2854 lck->lk.cleanup_ticket = 0; 2855 lck->lk.next_ticket = 0; 2856 lck->lk.now_serving = 0; 2857 lck->lk.owner_id = 0; 2858 lck->lk.depth_locked = -1; 2859 } 2860 2861 static void 2862 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2863 { 2864 if ( __kmp_env_consistency_check ) { 2865 char const * const func = "omp_destroy_lock"; 2866 if ( lck->lk.initialized != lck ) { 2867 KMP_FATAL( LockIsUninitialized, func ); 2868 } 2869 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2870 KMP_FATAL( LockNestableUsedAsSimple, func ); 2871 } 2872 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2873 KMP_FATAL( LockStillOwned, func ); 2874 } 2875 } 2876 __kmp_destroy_drdpa_lock( lck ); 2877 } 2878 2879 2880 // 2881 // nested drdpa ticket locks 2882 // 2883 2884 void 2885 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2886 { 2887 KMP_DEBUG_ASSERT( gtid >= 0 ); 2888 2889 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2890 lck->lk.depth_locked += 1; 2891 } 2892 else { 2893 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2894 KMP_MB(); 2895 lck->lk.depth_locked = 1; 2896 KMP_MB(); 2897 lck->lk.owner_id = gtid + 1; 2898 } 2899 } 2900 2901 static void 2902 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2903 { 2904 if ( __kmp_env_consistency_check ) { 2905 char const * const func = "omp_set_nest_lock"; 2906 if ( lck->lk.initialized != lck ) { 2907 KMP_FATAL( LockIsUninitialized, func ); 2908 } 2909 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2910 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2911 } 2912 } 2913 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2914 } 2915 2916 int 2917 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2918 { 2919 int retval; 2920 2921 KMP_DEBUG_ASSERT( gtid >= 0 ); 2922 2923 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2924 retval = ++lck->lk.depth_locked; 2925 } 2926 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2927 retval = 0; 2928 } 2929 else { 2930 KMP_MB(); 2931 retval = lck->lk.depth_locked = 1; 2932 KMP_MB(); 2933 lck->lk.owner_id = gtid + 1; 2934 } 2935 return retval; 2936 } 2937 2938 static int 2939 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2940 { 2941 if ( __kmp_env_consistency_check ) { 2942 char const * const func = "omp_test_nest_lock"; 2943 if ( lck->lk.initialized != lck ) { 2944 KMP_FATAL( LockIsUninitialized, func ); 2945 } 2946 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2947 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2948 } 2949 } 2950 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2951 } 2952 2953 void 2954 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2955 { 2956 KMP_DEBUG_ASSERT( gtid >= 0 ); 2957 2958 KMP_MB(); 2959 if ( --(lck->lk.depth_locked) == 0 ) { 2960 KMP_MB(); 2961 lck->lk.owner_id = 0; 2962 __kmp_release_drdpa_lock( lck, gtid ); 2963 } 2964 } 2965 2966 static void 2967 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2968 { 2969 if ( __kmp_env_consistency_check ) { 2970 char const * const func = "omp_unset_nest_lock"; 2971 KMP_MB(); /* in case another processor initialized lock */ 2972 if ( lck->lk.initialized != lck ) { 2973 KMP_FATAL( LockIsUninitialized, func ); 2974 } 2975 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2976 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2977 } 2978 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2979 KMP_FATAL( LockUnsettingFree, func ); 2980 } 2981 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2982 KMP_FATAL( LockUnsettingSetByAnother, func ); 2983 } 2984 } 2985 __kmp_release_nested_drdpa_lock( lck, gtid ); 2986 } 2987 2988 void 2989 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2990 { 2991 __kmp_init_drdpa_lock( lck ); 2992 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2993 } 2994 2995 static void 2996 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2997 { 2998 __kmp_init_nested_drdpa_lock( lck ); 2999 } 3000 3001 void 3002 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 3003 { 3004 __kmp_destroy_drdpa_lock( lck ); 3005 lck->lk.depth_locked = 0; 3006 } 3007 3008 static void 3009 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 3010 { 3011 if ( __kmp_env_consistency_check ) { 3012 char const * const func = "omp_destroy_nest_lock"; 3013 if ( lck->lk.initialized != lck ) { 3014 KMP_FATAL( LockIsUninitialized, func ); 3015 } 3016 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 3017 KMP_FATAL( LockSimpleUsedAsNestable, func ); 3018 } 3019 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 3020 KMP_FATAL( LockStillOwned, func ); 3021 } 3022 } 3023 __kmp_destroy_nested_drdpa_lock( lck ); 3024 } 3025 3026 3027 // 3028 // access functions to fields which don't exist for all lock kinds. 3029 // 3030 3031 static int 3032 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 3033 { 3034 return lck == lck->lk.initialized; 3035 } 3036 3037 static const ident_t * 3038 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 3039 { 3040 return lck->lk.location; 3041 } 3042 3043 static void 3044 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 3045 { 3046 lck->lk.location = loc; 3047 } 3048 3049 static kmp_lock_flags_t 3050 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 3051 { 3052 return lck->lk.flags; 3053 } 3054 3055 static void 3056 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 3057 { 3058 lck->lk.flags = flags; 3059 } 3060 3061 /* ------------------------------------------------------------------------ */ 3062 /* user locks 3063 * 3064 * They are implemented as a table of function pointers which are set to the 3065 * lock functions of the appropriate kind, once that has been determined. 3066 */ 3067 3068 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3069 3070 size_t __kmp_base_user_lock_size = 0; 3071 size_t __kmp_user_lock_size = 0; 3072 3073 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3074 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3075 3076 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3077 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3078 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3079 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3080 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3081 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3082 3083 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3084 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3085 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3086 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3087 3088 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3089 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3090 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3091 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3092 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3093 3094 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3095 { 3096 switch ( user_lock_kind ) { 3097 case lk_default: 3098 default: 3099 KMP_ASSERT( 0 ); 3100 3101 case lk_tas: { 3102 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3103 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3104 3105 __kmp_get_user_lock_owner_ = 3106 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3107 ( &__kmp_get_tas_lock_owner ); 3108 3109 __kmp_acquire_user_lock_with_checks_ = 3110 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3111 ( &__kmp_acquire_tas_lock_with_checks ); 3112 3113 __kmp_test_user_lock_with_checks_ = 3114 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3115 ( &__kmp_test_tas_lock_with_checks ); 3116 3117 __kmp_release_user_lock_with_checks_ = 3118 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3119 ( &__kmp_release_tas_lock_with_checks ); 3120 3121 __kmp_init_user_lock_with_checks_ = 3122 ( void ( * )( kmp_user_lock_p ) ) 3123 ( &__kmp_init_tas_lock_with_checks ); 3124 3125 __kmp_destroy_user_lock_ = 3126 ( void ( * )( kmp_user_lock_p ) ) 3127 ( &__kmp_destroy_tas_lock ); 3128 3129 __kmp_destroy_user_lock_with_checks_ = 3130 ( void ( * )( kmp_user_lock_p ) ) 3131 ( &__kmp_destroy_tas_lock_with_checks ); 3132 3133 __kmp_acquire_nested_user_lock_with_checks_ = 3134 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3135 ( &__kmp_acquire_nested_tas_lock_with_checks ); 3136 3137 __kmp_test_nested_user_lock_with_checks_ = 3138 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3139 ( &__kmp_test_nested_tas_lock_with_checks ); 3140 3141 __kmp_release_nested_user_lock_with_checks_ = 3142 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3143 ( &__kmp_release_nested_tas_lock_with_checks ); 3144 3145 __kmp_init_nested_user_lock_with_checks_ = 3146 ( void ( * )( kmp_user_lock_p ) ) 3147 ( &__kmp_init_nested_tas_lock_with_checks ); 3148 3149 __kmp_destroy_nested_user_lock_with_checks_ = 3150 ( void ( * )( kmp_user_lock_p ) ) 3151 ( &__kmp_destroy_nested_tas_lock_with_checks ); 3152 3153 __kmp_is_user_lock_initialized_ = 3154 ( int ( * )( kmp_user_lock_p ) ) NULL; 3155 3156 __kmp_get_user_lock_location_ = 3157 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3158 3159 __kmp_set_user_lock_location_ = 3160 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3161 3162 __kmp_get_user_lock_flags_ = 3163 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3164 3165 __kmp_set_user_lock_flags_ = 3166 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3167 } 3168 break; 3169 3170 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3171 3172 case lk_futex: { 3173 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3174 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3175 3176 __kmp_get_user_lock_owner_ = 3177 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3178 ( &__kmp_get_futex_lock_owner ); 3179 3180 __kmp_acquire_user_lock_with_checks_ = 3181 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3182 ( &__kmp_acquire_futex_lock_with_checks ); 3183 3184 __kmp_test_user_lock_with_checks_ = 3185 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3186 ( &__kmp_test_futex_lock_with_checks ); 3187 3188 __kmp_release_user_lock_with_checks_ = 3189 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3190 ( &__kmp_release_futex_lock_with_checks ); 3191 3192 __kmp_init_user_lock_with_checks_ = 3193 ( void ( * )( kmp_user_lock_p ) ) 3194 ( &__kmp_init_futex_lock_with_checks ); 3195 3196 __kmp_destroy_user_lock_ = 3197 ( void ( * )( kmp_user_lock_p ) ) 3198 ( &__kmp_destroy_futex_lock ); 3199 3200 __kmp_destroy_user_lock_with_checks_ = 3201 ( void ( * )( kmp_user_lock_p ) ) 3202 ( &__kmp_destroy_futex_lock_with_checks ); 3203 3204 __kmp_acquire_nested_user_lock_with_checks_ = 3205 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3206 ( &__kmp_acquire_nested_futex_lock_with_checks ); 3207 3208 __kmp_test_nested_user_lock_with_checks_ = 3209 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3210 ( &__kmp_test_nested_futex_lock_with_checks ); 3211 3212 __kmp_release_nested_user_lock_with_checks_ = 3213 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3214 ( &__kmp_release_nested_futex_lock_with_checks ); 3215 3216 __kmp_init_nested_user_lock_with_checks_ = 3217 ( void ( * )( kmp_user_lock_p ) ) 3218 ( &__kmp_init_nested_futex_lock_with_checks ); 3219 3220 __kmp_destroy_nested_user_lock_with_checks_ = 3221 ( void ( * )( kmp_user_lock_p ) ) 3222 ( &__kmp_destroy_nested_futex_lock_with_checks ); 3223 3224 __kmp_is_user_lock_initialized_ = 3225 ( int ( * )( kmp_user_lock_p ) ) NULL; 3226 3227 __kmp_get_user_lock_location_ = 3228 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3229 3230 __kmp_set_user_lock_location_ = 3231 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3232 3233 __kmp_get_user_lock_flags_ = 3234 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3235 3236 __kmp_set_user_lock_flags_ = 3237 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3238 } 3239 break; 3240 3241 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3242 3243 case lk_ticket: { 3244 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3245 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3246 3247 __kmp_get_user_lock_owner_ = 3248 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3249 ( &__kmp_get_ticket_lock_owner ); 3250 3251 __kmp_acquire_user_lock_with_checks_ = 3252 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3253 ( &__kmp_acquire_ticket_lock_with_checks ); 3254 3255 __kmp_test_user_lock_with_checks_ = 3256 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3257 ( &__kmp_test_ticket_lock_with_checks ); 3258 3259 __kmp_release_user_lock_with_checks_ = 3260 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3261 ( &__kmp_release_ticket_lock_with_checks ); 3262 3263 __kmp_init_user_lock_with_checks_ = 3264 ( void ( * )( kmp_user_lock_p ) ) 3265 ( &__kmp_init_ticket_lock_with_checks ); 3266 3267 __kmp_destroy_user_lock_ = 3268 ( void ( * )( kmp_user_lock_p ) ) 3269 ( &__kmp_destroy_ticket_lock ); 3270 3271 __kmp_destroy_user_lock_with_checks_ = 3272 ( void ( * )( kmp_user_lock_p ) ) 3273 ( &__kmp_destroy_ticket_lock_with_checks ); 3274 3275 __kmp_acquire_nested_user_lock_with_checks_ = 3276 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3277 ( &__kmp_acquire_nested_ticket_lock_with_checks ); 3278 3279 __kmp_test_nested_user_lock_with_checks_ = 3280 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3281 ( &__kmp_test_nested_ticket_lock_with_checks ); 3282 3283 __kmp_release_nested_user_lock_with_checks_ = 3284 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3285 ( &__kmp_release_nested_ticket_lock_with_checks ); 3286 3287 __kmp_init_nested_user_lock_with_checks_ = 3288 ( void ( * )( kmp_user_lock_p ) ) 3289 ( &__kmp_init_nested_ticket_lock_with_checks ); 3290 3291 __kmp_destroy_nested_user_lock_with_checks_ = 3292 ( void ( * )( kmp_user_lock_p ) ) 3293 ( &__kmp_destroy_nested_ticket_lock_with_checks ); 3294 3295 __kmp_is_user_lock_initialized_ = 3296 ( int ( * )( kmp_user_lock_p ) ) 3297 ( &__kmp_is_ticket_lock_initialized ); 3298 3299 __kmp_get_user_lock_location_ = 3300 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3301 ( &__kmp_get_ticket_lock_location ); 3302 3303 __kmp_set_user_lock_location_ = 3304 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3305 ( &__kmp_set_ticket_lock_location ); 3306 3307 __kmp_get_user_lock_flags_ = 3308 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3309 ( &__kmp_get_ticket_lock_flags ); 3310 3311 __kmp_set_user_lock_flags_ = 3312 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3313 ( &__kmp_set_ticket_lock_flags ); 3314 } 3315 break; 3316 3317 case lk_queuing: { 3318 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3319 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3320 3321 __kmp_get_user_lock_owner_ = 3322 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3323 ( &__kmp_get_queuing_lock_owner ); 3324 3325 __kmp_acquire_user_lock_with_checks_ = 3326 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3327 ( &__kmp_acquire_queuing_lock_with_checks ); 3328 3329 __kmp_test_user_lock_with_checks_ = 3330 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3331 ( &__kmp_test_queuing_lock_with_checks ); 3332 3333 __kmp_release_user_lock_with_checks_ = 3334 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3335 ( &__kmp_release_queuing_lock_with_checks ); 3336 3337 __kmp_init_user_lock_with_checks_ = 3338 ( void ( * )( kmp_user_lock_p ) ) 3339 ( &__kmp_init_queuing_lock_with_checks ); 3340 3341 __kmp_destroy_user_lock_ = 3342 ( void ( * )( kmp_user_lock_p ) ) 3343 ( &__kmp_destroy_queuing_lock ); 3344 3345 __kmp_destroy_user_lock_with_checks_ = 3346 ( void ( * )( kmp_user_lock_p ) ) 3347 ( &__kmp_destroy_queuing_lock_with_checks ); 3348 3349 __kmp_acquire_nested_user_lock_with_checks_ = 3350 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3351 ( &__kmp_acquire_nested_queuing_lock_with_checks ); 3352 3353 __kmp_test_nested_user_lock_with_checks_ = 3354 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3355 ( &__kmp_test_nested_queuing_lock_with_checks ); 3356 3357 __kmp_release_nested_user_lock_with_checks_ = 3358 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3359 ( &__kmp_release_nested_queuing_lock_with_checks ); 3360 3361 __kmp_init_nested_user_lock_with_checks_ = 3362 ( void ( * )( kmp_user_lock_p ) ) 3363 ( &__kmp_init_nested_queuing_lock_with_checks ); 3364 3365 __kmp_destroy_nested_user_lock_with_checks_ = 3366 ( void ( * )( kmp_user_lock_p ) ) 3367 ( &__kmp_destroy_nested_queuing_lock_with_checks ); 3368 3369 __kmp_is_user_lock_initialized_ = 3370 ( int ( * )( kmp_user_lock_p ) ) 3371 ( &__kmp_is_queuing_lock_initialized ); 3372 3373 __kmp_get_user_lock_location_ = 3374 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3375 ( &__kmp_get_queuing_lock_location ); 3376 3377 __kmp_set_user_lock_location_ = 3378 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3379 ( &__kmp_set_queuing_lock_location ); 3380 3381 __kmp_get_user_lock_flags_ = 3382 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3383 ( &__kmp_get_queuing_lock_flags ); 3384 3385 __kmp_set_user_lock_flags_ = 3386 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3387 ( &__kmp_set_queuing_lock_flags ); 3388 } 3389 break; 3390 3391 #if KMP_USE_ADAPTIVE_LOCKS 3392 case lk_adaptive: { 3393 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3394 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3395 3396 __kmp_get_user_lock_owner_ = 3397 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3398 ( &__kmp_get_queuing_lock_owner ); 3399 3400 __kmp_acquire_user_lock_with_checks_ = 3401 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3402 ( &__kmp_acquire_adaptive_lock_with_checks ); 3403 3404 __kmp_test_user_lock_with_checks_ = 3405 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3406 ( &__kmp_test_adaptive_lock_with_checks ); 3407 3408 __kmp_release_user_lock_with_checks_ = 3409 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3410 ( &__kmp_release_adaptive_lock_with_checks ); 3411 3412 __kmp_init_user_lock_with_checks_ = 3413 ( void ( * )( kmp_user_lock_p ) ) 3414 ( &__kmp_init_adaptive_lock_with_checks ); 3415 3416 __kmp_destroy_user_lock_with_checks_ = 3417 ( void ( * )( kmp_user_lock_p ) ) 3418 ( &__kmp_destroy_adaptive_lock_with_checks ); 3419 3420 __kmp_destroy_user_lock_ = 3421 ( void ( * )( kmp_user_lock_p ) ) 3422 ( &__kmp_destroy_adaptive_lock ); 3423 3424 __kmp_is_user_lock_initialized_ = 3425 ( int ( * )( kmp_user_lock_p ) ) 3426 ( &__kmp_is_queuing_lock_initialized ); 3427 3428 __kmp_get_user_lock_location_ = 3429 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3430 ( &__kmp_get_queuing_lock_location ); 3431 3432 __kmp_set_user_lock_location_ = 3433 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3434 ( &__kmp_set_queuing_lock_location ); 3435 3436 __kmp_get_user_lock_flags_ = 3437 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3438 ( &__kmp_get_queuing_lock_flags ); 3439 3440 __kmp_set_user_lock_flags_ = 3441 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3442 ( &__kmp_set_queuing_lock_flags ); 3443 3444 } 3445 break; 3446 #endif // KMP_USE_ADAPTIVE_LOCKS 3447 3448 case lk_drdpa: { 3449 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3450 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3451 3452 __kmp_get_user_lock_owner_ = 3453 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3454 ( &__kmp_get_drdpa_lock_owner ); 3455 3456 __kmp_acquire_user_lock_with_checks_ = 3457 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3458 ( &__kmp_acquire_drdpa_lock_with_checks ); 3459 3460 __kmp_test_user_lock_with_checks_ = 3461 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3462 ( &__kmp_test_drdpa_lock_with_checks ); 3463 3464 __kmp_release_user_lock_with_checks_ = 3465 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3466 ( &__kmp_release_drdpa_lock_with_checks ); 3467 3468 __kmp_init_user_lock_with_checks_ = 3469 ( void ( * )( kmp_user_lock_p ) ) 3470 ( &__kmp_init_drdpa_lock_with_checks ); 3471 3472 __kmp_destroy_user_lock_ = 3473 ( void ( * )( kmp_user_lock_p ) ) 3474 ( &__kmp_destroy_drdpa_lock ); 3475 3476 __kmp_destroy_user_lock_with_checks_ = 3477 ( void ( * )( kmp_user_lock_p ) ) 3478 ( &__kmp_destroy_drdpa_lock_with_checks ); 3479 3480 __kmp_acquire_nested_user_lock_with_checks_ = 3481 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3482 ( &__kmp_acquire_nested_drdpa_lock_with_checks ); 3483 3484 __kmp_test_nested_user_lock_with_checks_ = 3485 ( int ( * )( kmp_user_lock_p, kmp_int32 ) ) 3486 ( &__kmp_test_nested_drdpa_lock_with_checks ); 3487 3488 __kmp_release_nested_user_lock_with_checks_ = 3489 ( void ( * )( kmp_user_lock_p, kmp_int32 ) ) 3490 ( &__kmp_release_nested_drdpa_lock_with_checks ); 3491 3492 __kmp_init_nested_user_lock_with_checks_ = 3493 ( void ( * )( kmp_user_lock_p ) ) 3494 ( &__kmp_init_nested_drdpa_lock_with_checks ); 3495 3496 __kmp_destroy_nested_user_lock_with_checks_ = 3497 ( void ( * )( kmp_user_lock_p ) ) 3498 ( &__kmp_destroy_nested_drdpa_lock_with_checks ); 3499 3500 __kmp_is_user_lock_initialized_ = 3501 ( int ( * )( kmp_user_lock_p ) ) 3502 ( &__kmp_is_drdpa_lock_initialized ); 3503 3504 __kmp_get_user_lock_location_ = 3505 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3506 ( &__kmp_get_drdpa_lock_location ); 3507 3508 __kmp_set_user_lock_location_ = 3509 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3510 ( &__kmp_set_drdpa_lock_location ); 3511 3512 __kmp_get_user_lock_flags_ = 3513 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3514 ( &__kmp_get_drdpa_lock_flags ); 3515 3516 __kmp_set_user_lock_flags_ = 3517 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3518 ( &__kmp_set_drdpa_lock_flags ); 3519 } 3520 break; 3521 } 3522 } 3523 3524 3525 // ---------------------------------------------------------------------------- 3526 // User lock table & lock allocation 3527 3528 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3529 kmp_user_lock_p __kmp_lock_pool = NULL; 3530 3531 // Lock block-allocation support. 3532 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3533 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3534 3535 static kmp_lock_index_t 3536 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3537 { 3538 // Assume that kmp_global_lock is held upon entry/exit. 3539 kmp_lock_index_t index; 3540 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3541 kmp_lock_index_t size; 3542 kmp_user_lock_p *table; 3543 kmp_lock_index_t i; 3544 // Reallocate lock table. 3545 if ( __kmp_user_lock_table.allocated == 0 ) { 3546 size = 1024; 3547 } 3548 else { 3549 size = __kmp_user_lock_table.allocated * 2; 3550 } 3551 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3552 memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3553 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3554 // We cannot free the previos table now, sinse it may be in use by other 3555 // threads. So save the pointer to the previous table in in the first element of the 3556 // new table. All the tables will be organized into a list, and could be freed when 3557 // library shutting down. 3558 __kmp_user_lock_table.table = table; 3559 __kmp_user_lock_table.allocated = size; 3560 } 3561 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3562 index = __kmp_user_lock_table.used; 3563 __kmp_user_lock_table.table[ index ] = lck; 3564 ++ __kmp_user_lock_table.used; 3565 return index; 3566 } 3567 3568 static kmp_user_lock_p 3569 __kmp_lock_block_allocate() 3570 { 3571 // Assume that kmp_global_lock is held upon entry/exit. 3572 static int last_index = 0; 3573 if ( ( last_index >= __kmp_num_locks_in_block ) 3574 || ( __kmp_lock_blocks == NULL ) ) { 3575 // Restart the index. 3576 last_index = 0; 3577 // Need to allocate a new block. 3578 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3579 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3580 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3581 // Set up the new block. 3582 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3583 new_block->next_block = __kmp_lock_blocks; 3584 new_block->locks = (void *)buffer; 3585 // Publish the new block. 3586 KMP_MB(); 3587 __kmp_lock_blocks = new_block; 3588 } 3589 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3590 [ last_index * __kmp_user_lock_size ] ) ); 3591 last_index++; 3592 return ret; 3593 } 3594 3595 // 3596 // Get memory for a lock. It may be freshly allocated memory or reused memory 3597 // from lock pool. 3598 // 3599 kmp_user_lock_p 3600 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3601 kmp_lock_flags_t flags ) 3602 { 3603 kmp_user_lock_p lck; 3604 kmp_lock_index_t index; 3605 KMP_DEBUG_ASSERT( user_lock ); 3606 3607 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3608 3609 if ( __kmp_lock_pool == NULL ) { 3610 // Lock pool is empty. Allocate new memory. 3611 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3612 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3613 } 3614 else { 3615 lck = __kmp_lock_block_allocate(); 3616 } 3617 3618 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3619 // and debugger has info on all allocated locks. 3620 index = __kmp_lock_table_insert( lck ); 3621 } 3622 else { 3623 // Pick up lock from pool. 3624 lck = __kmp_lock_pool; 3625 index = __kmp_lock_pool->pool.index; 3626 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3627 } 3628 3629 // 3630 // We could potentially differentiate between nested and regular locks 3631 // here, and do the lock table lookup for regular locks only. 3632 // 3633 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3634 * ( (kmp_lock_index_t *) user_lock ) = index; 3635 } 3636 else { 3637 * ( (kmp_user_lock_p *) user_lock ) = lck; 3638 } 3639 3640 // mark the lock if it is critical section lock. 3641 __kmp_set_user_lock_flags( lck, flags ); 3642 3643 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 3644 3645 return lck; 3646 } 3647 3648 // Put lock's memory to pool for reusing. 3649 void 3650 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 3651 { 3652 kmp_lock_pool_t * lock_pool; 3653 3654 KMP_DEBUG_ASSERT( user_lock != NULL ); 3655 KMP_DEBUG_ASSERT( lck != NULL ); 3656 3657 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 3658 3659 lck->pool.next = __kmp_lock_pool; 3660 __kmp_lock_pool = lck; 3661 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3662 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 3663 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 3664 lck->pool.index = index; 3665 } 3666 3667 __kmp_release_lock( & __kmp_global_lock, gtid ); 3668 } 3669 3670 kmp_user_lock_p 3671 __kmp_lookup_user_lock( void **user_lock, char const *func ) 3672 { 3673 kmp_user_lock_p lck = NULL; 3674 3675 if ( __kmp_env_consistency_check ) { 3676 if ( user_lock == NULL ) { 3677 KMP_FATAL( LockIsUninitialized, func ); 3678 } 3679 } 3680 3681 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3682 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 3683 if ( __kmp_env_consistency_check ) { 3684 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 3685 KMP_FATAL( LockIsUninitialized, func ); 3686 } 3687 } 3688 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 3689 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3690 lck = __kmp_user_lock_table.table[index]; 3691 } 3692 else { 3693 lck = *( (kmp_user_lock_p *)user_lock ); 3694 } 3695 3696 if ( __kmp_env_consistency_check ) { 3697 if ( lck == NULL ) { 3698 KMP_FATAL( LockIsUninitialized, func ); 3699 } 3700 } 3701 3702 return lck; 3703 } 3704 3705 void 3706 __kmp_cleanup_user_locks( void ) 3707 { 3708 // 3709 // Reset lock pool. Do not worry about lock in the pool -- we will free 3710 // them when iterating through lock table (it includes all the locks, 3711 // dead or alive). 3712 // 3713 __kmp_lock_pool = NULL; 3714 3715 #define IS_CRITICAL(lck) \ 3716 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 3717 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 3718 3719 // 3720 // Loop through lock table, free all locks. 3721 // 3722 // Do not free item [0], it is reserved for lock tables list. 3723 // 3724 // FIXME - we are iterating through a list of (pointers to) objects of 3725 // type union kmp_user_lock, but we have no way of knowing whether the 3726 // base type is currently "pool" or whatever the global user lock type 3727 // is. 3728 // 3729 // We are relying on the fact that for all of the user lock types 3730 // (except "tas"), the first field in the lock struct is the "initialized" 3731 // field, which is set to the address of the lock object itself when 3732 // the lock is initialized. When the union is of type "pool", the 3733 // first field is a pointer to the next object in the free list, which 3734 // will not be the same address as the object itself. 3735 // 3736 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 3737 // will fail for "pool" objects on the free list. This must happen as 3738 // the "location" field of real user locks overlaps the "index" field 3739 // of "pool" objects. 3740 // 3741 // It would be better to run through the free list, and remove all "pool" 3742 // objects from the lock table before executing this loop. However, 3743 // "pool" objects do not always have their index field set (only on 3744 // lin_32e), and I don't want to search the lock table for the address 3745 // of every "pool" object on the free list. 3746 // 3747 while ( __kmp_user_lock_table.used > 1 ) { 3748 const ident *loc; 3749 3750 // 3751 // reduce __kmp_user_lock_table.used before freeing the lock, 3752 // so that state of locks is consistent 3753 // 3754 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 3755 --__kmp_user_lock_table.used ]; 3756 3757 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 3758 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 3759 // 3760 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 3761 // initialized AND it is NOT a critical section (user is not 3762 // responsible for destroying criticals) AND we know source 3763 // location to report. 3764 // 3765 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 3766 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 3767 ( loc->psource != NULL ) ) { 3768 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 3769 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func, 3770 str_loc.line, str_loc.col ); 3771 __kmp_str_loc_free( &str_loc); 3772 } 3773 3774 #ifdef KMP_DEBUG 3775 if ( IS_CRITICAL( lck ) ) { 3776 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 3777 } 3778 else { 3779 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 3780 } 3781 #endif // KMP_DEBUG 3782 3783 // 3784 // Cleanup internal lock dynamic resources 3785 // (for drdpa locks particularly). 3786 // 3787 __kmp_destroy_user_lock( lck ); 3788 } 3789 3790 // 3791 // Free the lock if block allocation of locks is not used. 3792 // 3793 if ( __kmp_lock_blocks == NULL ) { 3794 __kmp_free( lck ); 3795 } 3796 } 3797 3798 #undef IS_CRITICAL 3799 3800 // 3801 // delete lock table(s). 3802 // 3803 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3804 __kmp_user_lock_table.table = NULL; 3805 __kmp_user_lock_table.allocated = 0; 3806 3807 while ( table_ptr != NULL ) { 3808 // 3809 // In the first element we saved the pointer to the previous 3810 // (smaller) lock table. 3811 // 3812 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 3813 __kmp_free( table_ptr ); 3814 table_ptr = next; 3815 } 3816 3817 // 3818 // Free buffers allocated for blocks of locks. 3819 // 3820 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3821 __kmp_lock_blocks = NULL; 3822 3823 while ( block_ptr != NULL ) { 3824 kmp_block_of_locks_t *next = block_ptr->next_block; 3825 __kmp_free( block_ptr->locks ); 3826 // 3827 // *block_ptr itself was allocated at the end of the locks vector. 3828 // 3829 block_ptr = next; 3830 } 3831 3832 TCW_4(__kmp_init_user_locks, FALSE); 3833 } 3834 3835