1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 #include <atomic> 18 19 #include "kmp.h" 20 #include "kmp_itt.h" 21 #include "kmp_i18n.h" 22 #include "kmp_lock.h" 23 #include "kmp_io.h" 24 25 #if KMP_USE_FUTEX 26 # include <unistd.h> 27 # include <sys/syscall.h> 28 // We should really include <futex.h>, but that causes compatibility problems on different 29 // Linux* OS distributions that either require that you include (or break when you try to include) 30 // <pci/types.h>. 31 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 32 // we just define the constants here and don't include <futex.h> 33 # ifndef FUTEX_WAIT 34 # define FUTEX_WAIT 0 35 # endif 36 # ifndef FUTEX_WAKE 37 # define FUTEX_WAKE 1 38 # endif 39 #endif 40 41 /* Implement spin locks for internal library use. */ 42 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 43 44 void 45 __kmp_validate_locks( void ) 46 { 47 int i; 48 kmp_uint32 x, y; 49 50 /* Check to make sure unsigned arithmetic does wraps properly */ 51 x = ~((kmp_uint32) 0) - 2; 52 y = x - 2; 53 54 for (i = 0; i < 8; ++i, ++x, ++y) { 55 kmp_uint32 z = (x - y); 56 KMP_ASSERT( z == 2 ); 57 } 58 59 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 60 } 61 62 63 /* ------------------------------------------------------------------------ */ 64 /* test and set locks */ 65 66 // 67 // For the non-nested locks, we can only assume that the first 4 bytes were 68 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 69 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 70 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 71 // 72 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 73 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 74 // 75 76 static kmp_int32 77 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 78 { 79 return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 80 } 81 82 static inline bool 83 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 84 { 85 return lck->lk.depth_locked != -1; 86 } 87 88 __forceinline static int 89 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 90 { 91 KMP_MB(); 92 93 #ifdef USE_LOCK_PROFILE 94 kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) ); 95 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 96 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 97 /* else __kmp_printf( "." );*/ 98 #endif /* USE_LOCK_PROFILE */ 99 100 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 101 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 102 KMP_FSYNC_ACQUIRED(lck); 103 return KMP_LOCK_ACQUIRED_FIRST; 104 } 105 106 kmp_uint32 spins; 107 KMP_FSYNC_PREPARE( lck ); 108 KMP_INIT_YIELD( spins ); 109 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 110 __kmp_xproc ) ) { 111 KMP_YIELD( TRUE ); 112 } 113 else { 114 KMP_YIELD_SPIN( spins ); 115 } 116 117 kmp_backoff_t backoff = __kmp_spin_backoff_params; 118 while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || 119 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { 120 121 __kmp_spin_backoff(&backoff); 122 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 123 __kmp_xproc ) ) { 124 KMP_YIELD( TRUE ); 125 } 126 else { 127 KMP_YIELD_SPIN( spins ); 128 } 129 } 130 KMP_FSYNC_ACQUIRED( lck ); 131 return KMP_LOCK_ACQUIRED_FIRST; 132 } 133 134 int 135 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 136 { 137 return __kmp_acquire_tas_lock_timed_template( lck, gtid ); 138 } 139 140 static int 141 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 142 { 143 char const * const func = "omp_set_lock"; 144 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 145 && __kmp_is_tas_lock_nestable( lck ) ) { 146 KMP_FATAL( LockNestableUsedAsSimple, func ); 147 } 148 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 149 KMP_FATAL( LockIsAlreadyOwned, func ); 150 } 151 return __kmp_acquire_tas_lock( lck, gtid ); 152 } 153 154 int 155 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 156 { 157 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 158 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 159 KMP_FSYNC_ACQUIRED( lck ); 160 return TRUE; 161 } 162 return FALSE; 163 } 164 165 static int 166 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 167 { 168 char const * const func = "omp_test_lock"; 169 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 170 && __kmp_is_tas_lock_nestable( lck ) ) { 171 KMP_FATAL( LockNestableUsedAsSimple, func ); 172 } 173 return __kmp_test_tas_lock( lck, gtid ); 174 } 175 176 int 177 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 178 { 179 KMP_MB(); /* Flush all pending memory write invalidates. */ 180 181 KMP_FSYNC_RELEASING(lck); 182 KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); 183 KMP_MB(); /* Flush all pending memory write invalidates. */ 184 185 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 186 __kmp_xproc ) ); 187 return KMP_LOCK_RELEASED; 188 } 189 190 static int 191 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 192 { 193 char const * const func = "omp_unset_lock"; 194 KMP_MB(); /* in case another processor initialized lock */ 195 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 196 && __kmp_is_tas_lock_nestable( lck ) ) { 197 KMP_FATAL( LockNestableUsedAsSimple, func ); 198 } 199 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 200 KMP_FATAL( LockUnsettingFree, func ); 201 } 202 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 203 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 204 KMP_FATAL( LockUnsettingSetByAnother, func ); 205 } 206 return __kmp_release_tas_lock( lck, gtid ); 207 } 208 209 void 210 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 211 { 212 TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); 213 } 214 215 static void 216 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 217 { 218 __kmp_init_tas_lock( lck ); 219 } 220 221 void 222 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 223 { 224 lck->lk.poll = 0; 225 } 226 227 static void 228 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 229 { 230 char const * const func = "omp_destroy_lock"; 231 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 232 && __kmp_is_tas_lock_nestable( lck ) ) { 233 KMP_FATAL( LockNestableUsedAsSimple, func ); 234 } 235 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 236 KMP_FATAL( LockStillOwned, func ); 237 } 238 __kmp_destroy_tas_lock( lck ); 239 } 240 241 242 // 243 // nested test and set locks 244 // 245 246 int 247 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 248 { 249 KMP_DEBUG_ASSERT( gtid >= 0 ); 250 251 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 252 lck->lk.depth_locked += 1; 253 return KMP_LOCK_ACQUIRED_NEXT; 254 } 255 else { 256 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 257 lck->lk.depth_locked = 1; 258 return KMP_LOCK_ACQUIRED_FIRST; 259 } 260 } 261 262 static int 263 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 264 { 265 char const * const func = "omp_set_nest_lock"; 266 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 267 KMP_FATAL( LockSimpleUsedAsNestable, func ); 268 } 269 return __kmp_acquire_nested_tas_lock( lck, gtid ); 270 } 271 272 int 273 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 274 { 275 int retval; 276 277 KMP_DEBUG_ASSERT( gtid >= 0 ); 278 279 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 280 retval = ++lck->lk.depth_locked; 281 } 282 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 283 retval = 0; 284 } 285 else { 286 KMP_MB(); 287 retval = lck->lk.depth_locked = 1; 288 } 289 return retval; 290 } 291 292 static int 293 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 294 { 295 char const * const func = "omp_test_nest_lock"; 296 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 297 KMP_FATAL( LockSimpleUsedAsNestable, func ); 298 } 299 return __kmp_test_nested_tas_lock( lck, gtid ); 300 } 301 302 int 303 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 304 { 305 KMP_DEBUG_ASSERT( gtid >= 0 ); 306 307 KMP_MB(); 308 if ( --(lck->lk.depth_locked) == 0 ) { 309 __kmp_release_tas_lock( lck, gtid ); 310 return KMP_LOCK_RELEASED; 311 } 312 return KMP_LOCK_STILL_HELD; 313 } 314 315 static int 316 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 317 { 318 char const * const func = "omp_unset_nest_lock"; 319 KMP_MB(); /* in case another processor initialized lock */ 320 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 321 KMP_FATAL( LockSimpleUsedAsNestable, func ); 322 } 323 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 324 KMP_FATAL( LockUnsettingFree, func ); 325 } 326 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 327 KMP_FATAL( LockUnsettingSetByAnother, func ); 328 } 329 return __kmp_release_nested_tas_lock( lck, gtid ); 330 } 331 332 void 333 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 334 { 335 __kmp_init_tas_lock( lck ); 336 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 337 } 338 339 static void 340 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 341 { 342 __kmp_init_nested_tas_lock( lck ); 343 } 344 345 void 346 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 347 { 348 __kmp_destroy_tas_lock( lck ); 349 lck->lk.depth_locked = 0; 350 } 351 352 static void 353 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 354 { 355 char const * const func = "omp_destroy_nest_lock"; 356 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 357 KMP_FATAL( LockSimpleUsedAsNestable, func ); 358 } 359 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 360 KMP_FATAL( LockStillOwned, func ); 361 } 362 __kmp_destroy_nested_tas_lock( lck ); 363 } 364 365 366 #if KMP_USE_FUTEX 367 368 /* ------------------------------------------------------------------------ */ 369 /* futex locks */ 370 371 // futex locks are really just test and set locks, with a different method 372 // of handling contention. They take the same amount of space as test and 373 // set locks, and are allocated the same way (i.e. use the area allocated by 374 // the compiler for non-nested locks / allocate nested locks on the heap). 375 376 static kmp_int32 377 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 378 { 379 return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 380 } 381 382 static inline bool 383 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 384 { 385 return lck->lk.depth_locked != -1; 386 } 387 388 __forceinline static int 389 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 390 { 391 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 392 393 KMP_MB(); 394 395 #ifdef USE_LOCK_PROFILE 396 kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) ); 397 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 398 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 399 /* else __kmp_printf( "." );*/ 400 #endif /* USE_LOCK_PROFILE */ 401 402 KMP_FSYNC_PREPARE( lck ); 403 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 404 lck, lck->lk.poll, gtid ) ); 405 406 kmp_int32 poll_val; 407 408 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), 409 KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { 410 411 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 412 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 413 lck, gtid, poll_val, cond ) ); 414 415 // 416 // NOTE: if you try to use the following condition for this branch 417 // 418 // if ( poll_val & 1 == 0 ) 419 // 420 // Then the 12.0 compiler has a bug where the following block will 421 // always be skipped, regardless of the value of the LSB of poll_val. 422 // 423 if ( ! cond ) { 424 // 425 // Try to set the lsb in the poll to indicate to the owner 426 // thread that they need to wake this thread up. 427 // 428 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { 429 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 430 lck, lck->lk.poll, gtid ) ); 431 continue; 432 } 433 poll_val |= KMP_LOCK_BUSY(1, futex); 434 435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 436 lck, lck->lk.poll, gtid ) ); 437 } 438 439 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 440 lck, gtid, poll_val ) ); 441 442 kmp_int32 rc; 443 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 444 poll_val, NULL, NULL, 0 ) ) != 0 ) { 445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 446 lck, gtid, poll_val, rc, errno ) ); 447 continue; 448 } 449 450 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 451 lck, gtid, poll_val ) ); 452 // 453 // This thread has now done a successful futex wait call and was 454 // entered on the OS futex queue. We must now perform a futex 455 // wake call when releasing the lock, as we have no idea how many 456 // other threads are in the queue. 457 // 458 gtid_code |= 1; 459 } 460 461 KMP_FSYNC_ACQUIRED( lck ); 462 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 463 lck, lck->lk.poll, gtid ) ); 464 return KMP_LOCK_ACQUIRED_FIRST; 465 } 466 467 int 468 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 469 { 470 return __kmp_acquire_futex_lock_timed_template( lck, gtid ); 471 } 472 473 static int 474 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 475 { 476 char const * const func = "omp_set_lock"; 477 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 478 && __kmp_is_futex_lock_nestable( lck ) ) { 479 KMP_FATAL( LockNestableUsedAsSimple, func ); 480 } 481 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 482 KMP_FATAL( LockIsAlreadyOwned, func ); 483 } 484 return __kmp_acquire_futex_lock( lck, gtid ); 485 } 486 487 int 488 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 489 { 490 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) { 491 KMP_FSYNC_ACQUIRED( lck ); 492 return TRUE; 493 } 494 return FALSE; 495 } 496 497 static int 498 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 499 { 500 char const * const func = "omp_test_lock"; 501 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 502 && __kmp_is_futex_lock_nestable( lck ) ) { 503 KMP_FATAL( LockNestableUsedAsSimple, func ); 504 } 505 return __kmp_test_futex_lock( lck, gtid ); 506 } 507 508 int 509 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 510 { 511 KMP_MB(); /* Flush all pending memory write invalidates. */ 512 513 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 514 lck, lck->lk.poll, gtid ) ); 515 516 KMP_FSYNC_RELEASING(lck); 517 518 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); 519 520 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 521 lck, gtid, poll_val ) ); 522 523 if ( KMP_LOCK_STRIP(poll_val) & 1 ) { 524 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 525 lck, gtid ) ); 526 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 527 } 528 529 KMP_MB(); /* Flush all pending memory write invalidates. */ 530 531 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 532 lck, lck->lk.poll, gtid ) ); 533 534 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 535 __kmp_xproc ) ); 536 return KMP_LOCK_RELEASED; 537 } 538 539 static int 540 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 541 { 542 char const * const func = "omp_unset_lock"; 543 KMP_MB(); /* in case another processor initialized lock */ 544 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 545 && __kmp_is_futex_lock_nestable( lck ) ) { 546 KMP_FATAL( LockNestableUsedAsSimple, func ); 547 } 548 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 549 KMP_FATAL( LockUnsettingFree, func ); 550 } 551 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 552 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 553 KMP_FATAL( LockUnsettingSetByAnother, func ); 554 } 555 return __kmp_release_futex_lock( lck, gtid ); 556 } 557 558 void 559 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 560 { 561 TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); 562 } 563 564 static void 565 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 566 { 567 __kmp_init_futex_lock( lck ); 568 } 569 570 void 571 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 572 { 573 lck->lk.poll = 0; 574 } 575 576 static void 577 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 578 { 579 char const * const func = "omp_destroy_lock"; 580 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 581 && __kmp_is_futex_lock_nestable( lck ) ) { 582 KMP_FATAL( LockNestableUsedAsSimple, func ); 583 } 584 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 585 KMP_FATAL( LockStillOwned, func ); 586 } 587 __kmp_destroy_futex_lock( lck ); 588 } 589 590 591 // 592 // nested futex locks 593 // 594 595 int 596 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 597 { 598 KMP_DEBUG_ASSERT( gtid >= 0 ); 599 600 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 601 lck->lk.depth_locked += 1; 602 return KMP_LOCK_ACQUIRED_NEXT; 603 } 604 else { 605 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 606 lck->lk.depth_locked = 1; 607 return KMP_LOCK_ACQUIRED_FIRST; 608 } 609 } 610 611 static int 612 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 613 { 614 char const * const func = "omp_set_nest_lock"; 615 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 616 KMP_FATAL( LockSimpleUsedAsNestable, func ); 617 } 618 return __kmp_acquire_nested_futex_lock( lck, gtid ); 619 } 620 621 int 622 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 623 { 624 int retval; 625 626 KMP_DEBUG_ASSERT( gtid >= 0 ); 627 628 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 629 retval = ++lck->lk.depth_locked; 630 } 631 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 632 retval = 0; 633 } 634 else { 635 KMP_MB(); 636 retval = lck->lk.depth_locked = 1; 637 } 638 return retval; 639 } 640 641 static int 642 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 643 { 644 char const * const func = "omp_test_nest_lock"; 645 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 646 KMP_FATAL( LockSimpleUsedAsNestable, func ); 647 } 648 return __kmp_test_nested_futex_lock( lck, gtid ); 649 } 650 651 int 652 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 653 { 654 KMP_DEBUG_ASSERT( gtid >= 0 ); 655 656 KMP_MB(); 657 if ( --(lck->lk.depth_locked) == 0 ) { 658 __kmp_release_futex_lock( lck, gtid ); 659 return KMP_LOCK_RELEASED; 660 } 661 return KMP_LOCK_STILL_HELD; 662 } 663 664 static int 665 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 666 { 667 char const * const func = "omp_unset_nest_lock"; 668 KMP_MB(); /* in case another processor initialized lock */ 669 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 670 KMP_FATAL( LockSimpleUsedAsNestable, func ); 671 } 672 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 673 KMP_FATAL( LockUnsettingFree, func ); 674 } 675 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 676 KMP_FATAL( LockUnsettingSetByAnother, func ); 677 } 678 return __kmp_release_nested_futex_lock( lck, gtid ); 679 } 680 681 void 682 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 683 { 684 __kmp_init_futex_lock( lck ); 685 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 686 } 687 688 static void 689 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 690 { 691 __kmp_init_nested_futex_lock( lck ); 692 } 693 694 void 695 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 696 { 697 __kmp_destroy_futex_lock( lck ); 698 lck->lk.depth_locked = 0; 699 } 700 701 static void 702 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 703 { 704 char const * const func = "omp_destroy_nest_lock"; 705 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 706 KMP_FATAL( LockSimpleUsedAsNestable, func ); 707 } 708 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 709 KMP_FATAL( LockStillOwned, func ); 710 } 711 __kmp_destroy_nested_futex_lock( lck ); 712 } 713 714 #endif // KMP_USE_FUTEX 715 716 717 /* ------------------------------------------------------------------------ */ 718 /* ticket (bakery) locks */ 719 720 static kmp_int32 721 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 722 { 723 return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1; 724 } 725 726 static inline bool 727 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 728 { 729 return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1; 730 } 731 732 static kmp_uint32 733 __kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket ) 734 { 735 return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket; 736 } 737 738 __forceinline static int 739 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 740 { 741 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed ); 742 743 #ifdef USE_LOCK_PROFILE 744 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket ) 745 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 746 /* else __kmp_printf( "." );*/ 747 #endif /* USE_LOCK_PROFILE */ 748 749 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) { 750 return KMP_LOCK_ACQUIRED_FIRST; 751 } 752 KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 753 return KMP_LOCK_ACQUIRED_FIRST; 754 } 755 756 int 757 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 758 { 759 return __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 760 } 761 762 static int 763 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 764 { 765 char const * const func = "omp_set_lock"; 766 767 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 768 KMP_FATAL( LockIsUninitialized, func ); 769 } 770 if ( lck->lk.self != lck ) { 771 KMP_FATAL( LockIsUninitialized, func ); 772 } 773 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 774 KMP_FATAL( LockNestableUsedAsSimple, func ); 775 } 776 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 777 KMP_FATAL( LockIsAlreadyOwned, func ); 778 } 779 780 __kmp_acquire_ticket_lock( lck, gtid ); 781 782 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 783 return KMP_LOCK_ACQUIRED_FIRST; 784 } 785 786 int 787 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 788 { 789 kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ); 790 791 if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) { 792 kmp_uint32 next_ticket = my_ticket + 1; 793 if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket, 794 &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) { 795 return TRUE; 796 } 797 } 798 return FALSE; 799 } 800 801 static int 802 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 803 { 804 char const * const func = "omp_test_lock"; 805 806 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 807 KMP_FATAL( LockIsUninitialized, func ); 808 } 809 if ( lck->lk.self != lck ) { 810 KMP_FATAL( LockIsUninitialized, func ); 811 } 812 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 813 KMP_FATAL( LockNestableUsedAsSimple, func ); 814 } 815 816 int retval = __kmp_test_ticket_lock( lck, gtid ); 817 818 if ( retval ) { 819 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 820 } 821 return retval; 822 } 823 824 int 825 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 826 { 827 kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ); 828 829 std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release ); 830 831 KMP_YIELD( distance 832 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 833 return KMP_LOCK_RELEASED; 834 } 835 836 static int 837 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 838 { 839 char const * const func = "omp_unset_lock"; 840 841 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 842 KMP_FATAL( LockIsUninitialized, func ); 843 } 844 if ( lck->lk.self != lck ) { 845 KMP_FATAL( LockIsUninitialized, func ); 846 } 847 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 848 KMP_FATAL( LockNestableUsedAsSimple, func ); 849 } 850 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 851 KMP_FATAL( LockUnsettingFree, func ); 852 } 853 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 854 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 855 KMP_FATAL( LockUnsettingSetByAnother, func ); 856 } 857 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); 858 return __kmp_release_ticket_lock( lck, gtid ); 859 } 860 861 void 862 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 863 { 864 lck->lk.location = NULL; 865 lck->lk.self = lck; 866 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed ); 867 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed ); 868 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock. 869 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock. 870 std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release ); 871 } 872 873 static void 874 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 875 { 876 __kmp_init_ticket_lock( lck ); 877 } 878 879 void 880 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 881 { 882 std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release ); 883 lck->lk.self = NULL; 884 lck->lk.location = NULL; 885 std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed ); 886 std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed ); 887 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); 888 std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); 889 } 890 891 static void 892 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 893 { 894 char const * const func = "omp_destroy_lock"; 895 896 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 897 KMP_FATAL( LockIsUninitialized, func ); 898 } 899 if ( lck->lk.self != lck ) { 900 KMP_FATAL( LockIsUninitialized, func ); 901 } 902 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 903 KMP_FATAL( LockNestableUsedAsSimple, func ); 904 } 905 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 906 KMP_FATAL( LockStillOwned, func ); 907 } 908 __kmp_destroy_ticket_lock( lck ); 909 } 910 911 912 // 913 // nested ticket locks 914 // 915 916 int 917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 918 { 919 KMP_DEBUG_ASSERT( gtid >= 0 ); 920 921 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 922 std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); 923 return KMP_LOCK_ACQUIRED_NEXT; 924 } 925 else { 926 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 927 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); 928 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 929 return KMP_LOCK_ACQUIRED_FIRST; 930 } 931 } 932 933 static int 934 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 935 { 936 char const * const func = "omp_set_nest_lock"; 937 938 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 939 KMP_FATAL( LockIsUninitialized, func ); 940 } 941 if ( lck->lk.self != lck ) { 942 KMP_FATAL( LockIsUninitialized, func ); 943 } 944 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 945 KMP_FATAL( LockSimpleUsedAsNestable, func ); 946 } 947 return __kmp_acquire_nested_ticket_lock( lck, gtid ); 948 } 949 950 int 951 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 952 { 953 int retval; 954 955 KMP_DEBUG_ASSERT( gtid >= 0 ); 956 957 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 958 retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1; 959 } 960 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 961 retval = 0; 962 } 963 else { 964 std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); 965 std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); 966 retval = 1; 967 } 968 return retval; 969 } 970 971 static int 972 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 973 kmp_int32 gtid ) 974 { 975 char const * const func = "omp_test_nest_lock"; 976 977 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 978 KMP_FATAL( LockIsUninitialized, func ); 979 } 980 if ( lck->lk.self != lck ) { 981 KMP_FATAL( LockIsUninitialized, func ); 982 } 983 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 984 KMP_FATAL( LockSimpleUsedAsNestable, func ); 985 } 986 return __kmp_test_nested_ticket_lock( lck, gtid ); 987 } 988 989 int 990 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 991 { 992 KMP_DEBUG_ASSERT( gtid >= 0 ); 993 994 if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) { 995 std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); 996 __kmp_release_ticket_lock( lck, gtid ); 997 return KMP_LOCK_RELEASED; 998 } 999 return KMP_LOCK_STILL_HELD; 1000 } 1001 1002 static int 1003 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1004 { 1005 char const * const func = "omp_unset_nest_lock"; 1006 1007 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 1008 KMP_FATAL( LockIsUninitialized, func ); 1009 } 1010 if ( lck->lk.self != lck ) { 1011 KMP_FATAL( LockIsUninitialized, func ); 1012 } 1013 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1014 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1015 } 1016 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1017 KMP_FATAL( LockUnsettingFree, func ); 1018 } 1019 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1020 KMP_FATAL( LockUnsettingSetByAnother, func ); 1021 } 1022 return __kmp_release_nested_ticket_lock( lck, gtid ); 1023 } 1024 1025 void 1026 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1027 { 1028 __kmp_init_ticket_lock( lck ); 1029 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks 1030 } 1031 1032 static void 1033 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1034 { 1035 __kmp_init_nested_ticket_lock( lck ); 1036 } 1037 1038 void 1039 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1040 { 1041 __kmp_destroy_ticket_lock( lck ); 1042 std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); 1043 } 1044 1045 static void 1046 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1047 { 1048 char const * const func = "omp_destroy_nest_lock"; 1049 1050 if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { 1051 KMP_FATAL( LockIsUninitialized, func ); 1052 } 1053 if ( lck->lk.self != lck ) { 1054 KMP_FATAL( LockIsUninitialized, func ); 1055 } 1056 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1057 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1058 } 1059 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1060 KMP_FATAL( LockStillOwned, func ); 1061 } 1062 __kmp_destroy_nested_ticket_lock( lck ); 1063 } 1064 1065 1066 // 1067 // access functions to fields which don't exist for all lock kinds. 1068 // 1069 1070 static int 1071 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1072 { 1073 return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck); 1074 } 1075 1076 static const ident_t * 1077 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1078 { 1079 return lck->lk.location; 1080 } 1081 1082 static void 1083 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1084 { 1085 lck->lk.location = loc; 1086 } 1087 1088 static kmp_lock_flags_t 1089 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1090 { 1091 return lck->lk.flags; 1092 } 1093 1094 static void 1095 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1096 { 1097 lck->lk.flags = flags; 1098 } 1099 1100 /* ------------------------------------------------------------------------ */ 1101 /* queuing locks */ 1102 1103 /* 1104 * First the states 1105 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1106 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1107 * h, h means lock is held or about to transition, 1 element on queue 1108 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1109 * 1110 * Now the transitions 1111 * Acquire(0,0) = -1 ,0 1112 * Release(0,0) = Error 1113 * Acquire(-1,0) = h ,h h > 0 1114 * Release(-1,0) = 0 ,0 1115 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1116 * Release(h,h) = -1 ,0 h > 0 1117 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1118 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1119 * 1120 * And pictorially 1121 * 1122 * 1123 * +-----+ 1124 * | 0, 0|------- release -------> Error 1125 * +-----+ 1126 * | ^ 1127 * acquire| |release 1128 * | | 1129 * | | 1130 * v | 1131 * +-----+ 1132 * |-1, 0| 1133 * +-----+ 1134 * | ^ 1135 * acquire| |release 1136 * | | 1137 * | | 1138 * v | 1139 * +-----+ 1140 * | h, h| 1141 * +-----+ 1142 * | ^ 1143 * acquire| |release 1144 * | | 1145 * | | 1146 * v | 1147 * +-----+ 1148 * | h, t|----- acquire, release loopback ---+ 1149 * +-----+ | 1150 * ^ | 1151 * | | 1152 * +------------------------------------+ 1153 * 1154 */ 1155 1156 #ifdef DEBUG_QUEUING_LOCKS 1157 1158 /* Stuff for circular trace buffer */ 1159 #define TRACE_BUF_ELE 1024 1160 static char traces[TRACE_BUF_ELE][128] = { 0 } 1161 static int tc = 0; 1162 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); 1163 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 1164 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 1165 1166 static void 1167 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1168 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1169 { 1170 kmp_int32 t, i; 1171 1172 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1173 1174 i = tc % TRACE_BUF_ELE; 1175 __kmp_printf_no_lock( "%s\n", traces[i] ); 1176 i = (i+1) % TRACE_BUF_ELE; 1177 while ( i != (tc % TRACE_BUF_ELE) ) { 1178 __kmp_printf_no_lock( "%s", traces[i] ); 1179 i = (i+1) % TRACE_BUF_ELE; 1180 } 1181 __kmp_printf_no_lock( "\n" ); 1182 1183 __kmp_printf_no_lock( 1184 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1185 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1186 head_id, tail_id ); 1187 1188 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1189 1190 if ( lck->lk.head_id >= 1 ) { 1191 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1192 while (t > 0) { 1193 __kmp_printf_no_lock( "-> %d ", t ); 1194 t = __kmp_threads[t-1]->th.th_next_waiting; 1195 } 1196 } 1197 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1198 __kmp_printf_no_lock( "\n\n" ); 1199 } 1200 1201 #endif /* DEBUG_QUEUING_LOCKS */ 1202 1203 static kmp_int32 1204 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1205 { 1206 return TCR_4( lck->lk.owner_id ) - 1; 1207 } 1208 1209 static inline bool 1210 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1211 { 1212 return lck->lk.depth_locked != -1; 1213 } 1214 1215 /* Acquire a lock using a the queuing lock implementation */ 1216 template <bool takeTime> 1217 /* [TLW] The unused template above is left behind because of what BEB believes is a 1218 potential compiler problem with __forceinline. */ 1219 __forceinline static int 1220 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1221 kmp_int32 gtid ) 1222 { 1223 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1224 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1225 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1226 volatile kmp_uint32 *spin_here_p; 1227 kmp_int32 need_mf = 1; 1228 1229 #if OMPT_SUPPORT 1230 ompt_state_t prev_state = ompt_state_undefined; 1231 #endif 1232 1233 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1234 1235 KMP_FSYNC_PREPARE( lck ); 1236 KMP_DEBUG_ASSERT( this_thr != NULL ); 1237 spin_here_p = & this_thr->th.th_spin_here; 1238 1239 #ifdef DEBUG_QUEUING_LOCKS 1240 TRACE_LOCK( gtid+1, "acq ent" ); 1241 if ( *spin_here_p ) 1242 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1243 if ( this_thr->th.th_next_waiting != 0 ) 1244 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1245 #endif 1246 KMP_DEBUG_ASSERT( !*spin_here_p ); 1247 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1248 1249 1250 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1251 that may follow, not just in execution order, but also in visibility order. This way, 1252 when a releasing thread observes the changes to the queue by this thread, it can 1253 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1254 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1255 to FALSE before this thread sets it to TRUE, this thread will hang. 1256 */ 1257 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1258 1259 while( 1 ) { 1260 kmp_int32 enqueued; 1261 kmp_int32 head; 1262 kmp_int32 tail; 1263 1264 head = *head_id_p; 1265 1266 switch ( head ) { 1267 1268 case -1: 1269 { 1270 #ifdef DEBUG_QUEUING_LOCKS 1271 tail = *tail_id_p; 1272 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1273 #endif 1274 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1275 this assignment prevents us from entering the if ( t > 0 ) 1276 condition in the enqueued case below, which is not necessary for 1277 this state transition */ 1278 1279 need_mf = 0; 1280 /* try (-1,0)->(tid,tid) */ 1281 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1282 KMP_PACK_64( -1, 0 ), 1283 KMP_PACK_64( gtid+1, gtid+1 ) ); 1284 #ifdef DEBUG_QUEUING_LOCKS 1285 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1286 #endif 1287 } 1288 break; 1289 1290 default: 1291 { 1292 tail = *tail_id_p; 1293 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1294 1295 #ifdef DEBUG_QUEUING_LOCKS 1296 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1297 #endif 1298 1299 if ( tail == 0 ) { 1300 enqueued = FALSE; 1301 } 1302 else { 1303 need_mf = 0; 1304 /* try (h,t) or (h,h)->(h,tid) */ 1305 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1306 1307 #ifdef DEBUG_QUEUING_LOCKS 1308 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1309 #endif 1310 } 1311 } 1312 break; 1313 1314 case 0: /* empty queue */ 1315 { 1316 kmp_int32 grabbed_lock; 1317 1318 #ifdef DEBUG_QUEUING_LOCKS 1319 tail = *tail_id_p; 1320 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1321 #endif 1322 /* try (0,0)->(-1,0) */ 1323 1324 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1325 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1326 1327 if ( grabbed_lock ) { 1328 1329 *spin_here_p = FALSE; 1330 1331 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1332 lck, gtid )); 1333 #ifdef DEBUG_QUEUING_LOCKS 1334 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1335 #endif 1336 1337 #if OMPT_SUPPORT 1338 if (ompt_enabled && prev_state != ompt_state_undefined) { 1339 /* change the state before clearing wait_id */ 1340 this_thr->th.ompt_thread_info.state = prev_state; 1341 this_thr->th.ompt_thread_info.wait_id = 0; 1342 } 1343 #endif 1344 1345 KMP_FSYNC_ACQUIRED( lck ); 1346 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1347 } 1348 enqueued = FALSE; 1349 } 1350 break; 1351 } 1352 1353 #if OMPT_SUPPORT 1354 if (ompt_enabled && prev_state == ompt_state_undefined) { 1355 /* this thread will spin; set wait_id before entering wait state */ 1356 prev_state = this_thr->th.ompt_thread_info.state; 1357 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 1358 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1359 } 1360 #endif 1361 1362 if ( enqueued ) { 1363 if ( tail > 0 ) { 1364 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1365 KMP_ASSERT( tail_thr != NULL ); 1366 tail_thr->th.th_next_waiting = gtid+1; 1367 /* corresponding wait for this write in release code */ 1368 } 1369 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1370 1371 1372 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1373 * throughput only here. 1374 */ 1375 KMP_MB(); 1376 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1377 1378 #ifdef DEBUG_QUEUING_LOCKS 1379 TRACE_LOCK( gtid+1, "acq spin" ); 1380 1381 if ( this_thr->th.th_next_waiting != 0 ) 1382 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1383 #endif 1384 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1385 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1386 lck, gtid )); 1387 1388 #ifdef DEBUG_QUEUING_LOCKS 1389 TRACE_LOCK( gtid+1, "acq exit 2" ); 1390 #endif 1391 1392 #if OMPT_SUPPORT 1393 /* change the state before clearing wait_id */ 1394 this_thr->th.ompt_thread_info.state = prev_state; 1395 this_thr->th.ompt_thread_info.wait_id = 0; 1396 #endif 1397 1398 /* got lock, we were dequeued by the thread that released lock */ 1399 return KMP_LOCK_ACQUIRED_FIRST; 1400 } 1401 1402 /* Yield if number of threads > number of logical processors */ 1403 /* ToDo: Not sure why this should only be in oversubscription case, 1404 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1405 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1406 __kmp_xproc ) ); 1407 #ifdef DEBUG_QUEUING_LOCKS 1408 TRACE_LOCK( gtid+1, "acq retry" ); 1409 #endif 1410 1411 } 1412 KMP_ASSERT2( 0, "should not get here" ); 1413 return KMP_LOCK_ACQUIRED_FIRST; 1414 } 1415 1416 int 1417 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1418 { 1419 KMP_DEBUG_ASSERT( gtid >= 0 ); 1420 1421 return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1422 } 1423 1424 static int 1425 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1426 kmp_int32 gtid ) 1427 { 1428 char const * const func = "omp_set_lock"; 1429 if ( lck->lk.initialized != lck ) { 1430 KMP_FATAL( LockIsUninitialized, func ); 1431 } 1432 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1433 KMP_FATAL( LockNestableUsedAsSimple, func ); 1434 } 1435 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1436 KMP_FATAL( LockIsAlreadyOwned, func ); 1437 } 1438 1439 __kmp_acquire_queuing_lock( lck, gtid ); 1440 1441 lck->lk.owner_id = gtid + 1; 1442 return KMP_LOCK_ACQUIRED_FIRST; 1443 } 1444 1445 int 1446 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1447 { 1448 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1449 kmp_int32 head; 1450 #ifdef KMP_DEBUG 1451 kmp_info_t *this_thr; 1452 #endif 1453 1454 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1455 KMP_DEBUG_ASSERT( gtid >= 0 ); 1456 #ifdef KMP_DEBUG 1457 this_thr = __kmp_thread_from_gtid( gtid ); 1458 KMP_DEBUG_ASSERT( this_thr != NULL ); 1459 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1460 #endif 1461 1462 head = *head_id_p; 1463 1464 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1465 1466 /* try (0,0)->(-1,0) */ 1467 1468 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1469 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1470 KMP_FSYNC_ACQUIRED(lck); 1471 return TRUE; 1472 } 1473 } 1474 1475 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1476 return FALSE; 1477 } 1478 1479 static int 1480 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1481 { 1482 char const * const func = "omp_test_lock"; 1483 if ( lck->lk.initialized != lck ) { 1484 KMP_FATAL( LockIsUninitialized, func ); 1485 } 1486 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1487 KMP_FATAL( LockNestableUsedAsSimple, func ); 1488 } 1489 1490 int retval = __kmp_test_queuing_lock( lck, gtid ); 1491 1492 if ( retval ) { 1493 lck->lk.owner_id = gtid + 1; 1494 } 1495 return retval; 1496 } 1497 1498 int 1499 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1500 { 1501 register kmp_info_t *this_thr; 1502 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1503 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1504 1505 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1506 KMP_DEBUG_ASSERT( gtid >= 0 ); 1507 this_thr = __kmp_thread_from_gtid( gtid ); 1508 KMP_DEBUG_ASSERT( this_thr != NULL ); 1509 #ifdef DEBUG_QUEUING_LOCKS 1510 TRACE_LOCK( gtid+1, "rel ent" ); 1511 1512 if ( this_thr->th.th_spin_here ) 1513 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1514 if ( this_thr->th.th_next_waiting != 0 ) 1515 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1516 #endif 1517 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1518 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1519 1520 KMP_FSYNC_RELEASING(lck); 1521 1522 while( 1 ) { 1523 kmp_int32 dequeued; 1524 kmp_int32 head; 1525 kmp_int32 tail; 1526 1527 head = *head_id_p; 1528 1529 #ifdef DEBUG_QUEUING_LOCKS 1530 tail = *tail_id_p; 1531 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1532 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1533 #endif 1534 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1535 1536 if ( head == -1 ) { /* nobody on queue */ 1537 1538 /* try (-1,0)->(0,0) */ 1539 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1540 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1541 lck, gtid )); 1542 #ifdef DEBUG_QUEUING_LOCKS 1543 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1544 #endif 1545 1546 #if OMPT_SUPPORT 1547 /* nothing to do - no other thread is trying to shift blame */ 1548 #endif 1549 1550 return KMP_LOCK_RELEASED; 1551 } 1552 dequeued = FALSE; 1553 1554 } 1555 else { 1556 1557 tail = *tail_id_p; 1558 if ( head == tail ) { /* only one thread on the queue */ 1559 1560 #ifdef DEBUG_QUEUING_LOCKS 1561 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1562 #endif 1563 KMP_DEBUG_ASSERT( head > 0 ); 1564 1565 /* try (h,h)->(-1,0) */ 1566 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1567 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1568 #ifdef DEBUG_QUEUING_LOCKS 1569 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1570 #endif 1571 1572 } 1573 else { 1574 volatile kmp_int32 *waiting_id_p; 1575 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1576 KMP_DEBUG_ASSERT( head_thr != NULL ); 1577 waiting_id_p = & head_thr->th.th_next_waiting; 1578 1579 /* Does this require synchronous reads? */ 1580 #ifdef DEBUG_QUEUING_LOCKS 1581 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1582 #endif 1583 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1584 1585 /* try (h,t)->(h',t) or (t,t) */ 1586 1587 KMP_MB(); 1588 /* make sure enqueuing thread has time to update next waiting thread field */ 1589 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL); 1590 #ifdef DEBUG_QUEUING_LOCKS 1591 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1592 #endif 1593 dequeued = TRUE; 1594 } 1595 } 1596 1597 if ( dequeued ) { 1598 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1599 KMP_DEBUG_ASSERT( head_thr != NULL ); 1600 1601 /* Does this require synchronous reads? */ 1602 #ifdef DEBUG_QUEUING_LOCKS 1603 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1604 #endif 1605 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1606 1607 /* For clean code only. 1608 * Thread not released until next statement prevents race with acquire code. 1609 */ 1610 head_thr->th.th_next_waiting = 0; 1611 #ifdef DEBUG_QUEUING_LOCKS 1612 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1613 #endif 1614 1615 KMP_MB(); 1616 /* reset spin value */ 1617 head_thr->th.th_spin_here = FALSE; 1618 1619 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1620 lck, gtid )); 1621 #ifdef DEBUG_QUEUING_LOCKS 1622 TRACE_LOCK( gtid+1, "rel exit 2" ); 1623 #endif 1624 return KMP_LOCK_RELEASED; 1625 } 1626 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1627 1628 #ifdef DEBUG_QUEUING_LOCKS 1629 TRACE_LOCK( gtid+1, "rel retry" ); 1630 #endif 1631 1632 } /* while */ 1633 KMP_ASSERT2( 0, "should not get here" ); 1634 return KMP_LOCK_RELEASED; 1635 } 1636 1637 static int 1638 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1639 kmp_int32 gtid ) 1640 { 1641 char const * const func = "omp_unset_lock"; 1642 KMP_MB(); /* in case another processor initialized lock */ 1643 if ( lck->lk.initialized != lck ) { 1644 KMP_FATAL( LockIsUninitialized, func ); 1645 } 1646 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1647 KMP_FATAL( LockNestableUsedAsSimple, func ); 1648 } 1649 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1650 KMP_FATAL( LockUnsettingFree, func ); 1651 } 1652 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1653 KMP_FATAL( LockUnsettingSetByAnother, func ); 1654 } 1655 lck->lk.owner_id = 0; 1656 return __kmp_release_queuing_lock( lck, gtid ); 1657 } 1658 1659 void 1660 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1661 { 1662 lck->lk.location = NULL; 1663 lck->lk.head_id = 0; 1664 lck->lk.tail_id = 0; 1665 lck->lk.next_ticket = 0; 1666 lck->lk.now_serving = 0; 1667 lck->lk.owner_id = 0; // no thread owns the lock. 1668 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1669 lck->lk.initialized = lck; 1670 1671 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1672 } 1673 1674 static void 1675 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1676 { 1677 __kmp_init_queuing_lock( lck ); 1678 } 1679 1680 void 1681 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1682 { 1683 lck->lk.initialized = NULL; 1684 lck->lk.location = NULL; 1685 lck->lk.head_id = 0; 1686 lck->lk.tail_id = 0; 1687 lck->lk.next_ticket = 0; 1688 lck->lk.now_serving = 0; 1689 lck->lk.owner_id = 0; 1690 lck->lk.depth_locked = -1; 1691 } 1692 1693 static void 1694 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1695 { 1696 char const * const func = "omp_destroy_lock"; 1697 if ( lck->lk.initialized != lck ) { 1698 KMP_FATAL( LockIsUninitialized, func ); 1699 } 1700 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1701 KMP_FATAL( LockNestableUsedAsSimple, func ); 1702 } 1703 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1704 KMP_FATAL( LockStillOwned, func ); 1705 } 1706 __kmp_destroy_queuing_lock( lck ); 1707 } 1708 1709 1710 // 1711 // nested queuing locks 1712 // 1713 1714 int 1715 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1716 { 1717 KMP_DEBUG_ASSERT( gtid >= 0 ); 1718 1719 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1720 lck->lk.depth_locked += 1; 1721 return KMP_LOCK_ACQUIRED_NEXT; 1722 } 1723 else { 1724 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1725 KMP_MB(); 1726 lck->lk.depth_locked = 1; 1727 KMP_MB(); 1728 lck->lk.owner_id = gtid + 1; 1729 return KMP_LOCK_ACQUIRED_FIRST; 1730 } 1731 } 1732 1733 static int 1734 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1735 { 1736 char const * const func = "omp_set_nest_lock"; 1737 if ( lck->lk.initialized != lck ) { 1738 KMP_FATAL( LockIsUninitialized, func ); 1739 } 1740 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1741 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1742 } 1743 return __kmp_acquire_nested_queuing_lock( lck, gtid ); 1744 } 1745 1746 int 1747 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1748 { 1749 int retval; 1750 1751 KMP_DEBUG_ASSERT( gtid >= 0 ); 1752 1753 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1754 retval = ++lck->lk.depth_locked; 1755 } 1756 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1757 retval = 0; 1758 } 1759 else { 1760 KMP_MB(); 1761 retval = lck->lk.depth_locked = 1; 1762 KMP_MB(); 1763 lck->lk.owner_id = gtid + 1; 1764 } 1765 return retval; 1766 } 1767 1768 static int 1769 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1770 kmp_int32 gtid ) 1771 { 1772 char const * const func = "omp_test_nest_lock"; 1773 if ( lck->lk.initialized != lck ) { 1774 KMP_FATAL( LockIsUninitialized, func ); 1775 } 1776 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1777 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1778 } 1779 return __kmp_test_nested_queuing_lock( lck, gtid ); 1780 } 1781 1782 int 1783 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1784 { 1785 KMP_DEBUG_ASSERT( gtid >= 0 ); 1786 1787 KMP_MB(); 1788 if ( --(lck->lk.depth_locked) == 0 ) { 1789 KMP_MB(); 1790 lck->lk.owner_id = 0; 1791 __kmp_release_queuing_lock( lck, gtid ); 1792 return KMP_LOCK_RELEASED; 1793 } 1794 return KMP_LOCK_STILL_HELD; 1795 } 1796 1797 static int 1798 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1799 { 1800 char const * const func = "omp_unset_nest_lock"; 1801 KMP_MB(); /* in case another processor initialized lock */ 1802 if ( lck->lk.initialized != lck ) { 1803 KMP_FATAL( LockIsUninitialized, func ); 1804 } 1805 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1806 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1807 } 1808 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1809 KMP_FATAL( LockUnsettingFree, func ); 1810 } 1811 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1812 KMP_FATAL( LockUnsettingSetByAnother, func ); 1813 } 1814 return __kmp_release_nested_queuing_lock( lck, gtid ); 1815 } 1816 1817 void 1818 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1819 { 1820 __kmp_init_queuing_lock( lck ); 1821 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1822 } 1823 1824 static void 1825 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1826 { 1827 __kmp_init_nested_queuing_lock( lck ); 1828 } 1829 1830 void 1831 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1832 { 1833 __kmp_destroy_queuing_lock( lck ); 1834 lck->lk.depth_locked = 0; 1835 } 1836 1837 static void 1838 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1839 { 1840 char const * const func = "omp_destroy_nest_lock"; 1841 if ( lck->lk.initialized != lck ) { 1842 KMP_FATAL( LockIsUninitialized, func ); 1843 } 1844 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1845 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1846 } 1847 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1848 KMP_FATAL( LockStillOwned, func ); 1849 } 1850 __kmp_destroy_nested_queuing_lock( lck ); 1851 } 1852 1853 1854 // 1855 // access functions to fields which don't exist for all lock kinds. 1856 // 1857 1858 static int 1859 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1860 { 1861 return lck == lck->lk.initialized; 1862 } 1863 1864 static const ident_t * 1865 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1866 { 1867 return lck->lk.location; 1868 } 1869 1870 static void 1871 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1872 { 1873 lck->lk.location = loc; 1874 } 1875 1876 static kmp_lock_flags_t 1877 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1878 { 1879 return lck->lk.flags; 1880 } 1881 1882 static void 1883 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1884 { 1885 lck->lk.flags = flags; 1886 } 1887 1888 #if KMP_USE_ADAPTIVE_LOCKS 1889 1890 /* 1891 RTM Adaptive locks 1892 */ 1893 1894 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1895 1896 #include <immintrin.h> 1897 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1898 1899 #else 1900 1901 // Values from the status register after failed speculation. 1902 #define _XBEGIN_STARTED (~0u) 1903 #define _XABORT_EXPLICIT (1 << 0) 1904 #define _XABORT_RETRY (1 << 1) 1905 #define _XABORT_CONFLICT (1 << 2) 1906 #define _XABORT_CAPACITY (1 << 3) 1907 #define _XABORT_DEBUG (1 << 4) 1908 #define _XABORT_NESTED (1 << 5) 1909 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1910 1911 // Aborts for which it's worth trying again immediately 1912 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1913 1914 #define STRINGIZE_INTERNAL(arg) #arg 1915 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1916 1917 // Access to RTM instructions 1918 1919 /* 1920 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1921 This is the same definition as the compiler intrinsic that will be supported at some point. 1922 */ 1923 static __inline int _xbegin() 1924 { 1925 int res = -1; 1926 1927 #if KMP_OS_WINDOWS 1928 #if KMP_ARCH_X86_64 1929 _asm { 1930 _emit 0xC7 1931 _emit 0xF8 1932 _emit 2 1933 _emit 0 1934 _emit 0 1935 _emit 0 1936 jmp L2 1937 mov res, eax 1938 L2: 1939 } 1940 #else /* IA32 */ 1941 _asm { 1942 _emit 0xC7 1943 _emit 0xF8 1944 _emit 2 1945 _emit 0 1946 _emit 0 1947 _emit 0 1948 jmp L2 1949 mov res, eax 1950 L2: 1951 } 1952 #endif // KMP_ARCH_X86_64 1953 #else 1954 /* Note that %eax must be noted as killed (clobbered), because 1955 * the XSR is returned in %eax(%rax) on abort. Other register 1956 * values are restored, so don't need to be killed. 1957 * 1958 * We must also mark 'res' as an input and an output, since otherwise 1959 * 'res=-1' may be dropped as being dead, whereas we do need the 1960 * assignment on the successful (i.e., non-abort) path. 1961 */ 1962 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1963 " .long 1f-1b-6\n" 1964 " jmp 2f\n" 1965 "1: movl %%eax,%0\n" 1966 "2:" 1967 :"+r"(res)::"memory","%eax"); 1968 #endif // KMP_OS_WINDOWS 1969 return res; 1970 } 1971 1972 /* 1973 Transaction end 1974 */ 1975 static __inline void _xend() 1976 { 1977 #if KMP_OS_WINDOWS 1978 __asm { 1979 _emit 0x0f 1980 _emit 0x01 1981 _emit 0xd5 1982 } 1983 #else 1984 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1985 #endif 1986 } 1987 1988 /* 1989 This is a macro, the argument must be a single byte constant which 1990 can be evaluated by the inline assembler, since it is emitted as a 1991 byte into the assembly code. 1992 */ 1993 #if KMP_OS_WINDOWS 1994 #define _xabort(ARG) \ 1995 _asm _emit 0xc6 \ 1996 _asm _emit 0xf8 \ 1997 _asm _emit ARG 1998 #else 1999 #define _xabort(ARG) \ 2000 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 2001 #endif 2002 2003 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 2004 2005 // 2006 // Statistics is collected for testing purpose 2007 // 2008 #if KMP_DEBUG_ADAPTIVE_LOCKS 2009 2010 // We accumulate speculative lock statistics when the lock is destroyed. 2011 // We keep locks that haven't been destroyed in the liveLocks list 2012 // so that we can grab their statistics too. 2013 static kmp_adaptive_lock_statistics_t destroyedStats; 2014 2015 // To hold the list of live locks. 2016 static kmp_adaptive_lock_info_t liveLocks; 2017 2018 // A lock so we can safely update the list of locks. 2019 static kmp_bootstrap_lock_t chain_lock; 2020 2021 // Initialize the list of stats. 2022 void 2023 __kmp_init_speculative_stats() 2024 { 2025 kmp_adaptive_lock_info_t *lck = &liveLocks; 2026 2027 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 2028 lck->stats.next = lck; 2029 lck->stats.prev = lck; 2030 2031 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2032 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2033 2034 __kmp_init_bootstrap_lock( &chain_lock ); 2035 2036 } 2037 2038 // Insert the lock into the circular list 2039 static void 2040 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 2041 { 2042 __kmp_acquire_bootstrap_lock( &chain_lock ); 2043 2044 lck->stats.next = liveLocks.stats.next; 2045 lck->stats.prev = &liveLocks; 2046 2047 liveLocks.stats.next = lck; 2048 lck->stats.next->stats.prev = lck; 2049 2050 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2051 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2052 2053 __kmp_release_bootstrap_lock( &chain_lock ); 2054 } 2055 2056 static void 2057 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 2058 { 2059 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2060 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2061 2062 kmp_adaptive_lock_info_t * n = lck->stats.next; 2063 kmp_adaptive_lock_info_t * p = lck->stats.prev; 2064 2065 n->stats.prev = p; 2066 p->stats.next = n; 2067 } 2068 2069 static void 2070 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 2071 { 2072 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2073 __kmp_remember_lock( lck ); 2074 } 2075 2076 static void 2077 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2078 { 2079 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2080 2081 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2082 t->successfulSpeculations += s->successfulSpeculations; 2083 t->hardFailedSpeculations += s->hardFailedSpeculations; 2084 t->softFailedSpeculations += s->softFailedSpeculations; 2085 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2086 t->lemmingYields += s->lemmingYields; 2087 } 2088 2089 static void 2090 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2091 { 2092 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2093 2094 __kmp_acquire_bootstrap_lock( &chain_lock ); 2095 2096 __kmp_add_stats( &destroyedStats, lck ); 2097 __kmp_forget_lock( lck ); 2098 2099 __kmp_release_bootstrap_lock( &chain_lock ); 2100 } 2101 2102 static float 2103 percent (kmp_uint32 count, kmp_uint32 total) 2104 { 2105 return (total == 0) ? 0.0: (100.0 * count)/total; 2106 } 2107 2108 static 2109 FILE * __kmp_open_stats_file() 2110 { 2111 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2112 return stdout; 2113 2114 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 2115 char buffer[buffLen]; 2116 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 2117 (kmp_int32)getpid()); 2118 FILE * result = fopen(&buffer[0], "w"); 2119 2120 // Maybe we should issue a warning here... 2121 return result ? result : stdout; 2122 } 2123 2124 void 2125 __kmp_print_speculative_stats() 2126 { 2127 if (__kmp_user_lock_kind != lk_adaptive) 2128 return; 2129 2130 FILE * statsFile = __kmp_open_stats_file(); 2131 2132 kmp_adaptive_lock_statistics_t total = destroyedStats; 2133 kmp_adaptive_lock_info_t *lck; 2134 2135 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2136 __kmp_add_stats( &total, lck ); 2137 } 2138 kmp_adaptive_lock_statistics_t *t = &total; 2139 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2140 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2141 t->softFailedSpeculations; 2142 2143 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2144 fprintf ( statsFile, " Lock parameters: \n" 2145 " max_soft_retries : %10d\n" 2146 " max_badness : %10d\n", 2147 __kmp_adaptive_backoff_params.max_soft_retries, 2148 __kmp_adaptive_backoff_params.max_badness); 2149 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2150 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2151 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2152 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2153 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2154 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2155 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2156 2157 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2158 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2159 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2160 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2161 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2162 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2163 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2164 2165 if (statsFile != stdout) 2166 fclose( statsFile ); 2167 } 2168 2169 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2170 #else 2171 # define KMP_INC_STAT(lck,stat) 2172 2173 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2174 2175 static inline bool 2176 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2177 { 2178 // It is enough to check that the head_id is zero. 2179 // We don't also need to check the tail. 2180 bool res = lck->lk.head_id == 0; 2181 2182 // We need a fence here, since we must ensure that no memory operations 2183 // from later in this thread float above that read. 2184 #if KMP_COMPILER_ICC 2185 _mm_mfence(); 2186 #else 2187 __sync_synchronize(); 2188 #endif 2189 2190 return res; 2191 } 2192 2193 // Functions for manipulating the badness 2194 static __inline void 2195 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2196 { 2197 // Reset the badness to zero so we eagerly try to speculate again 2198 lck->lk.adaptive.badness = 0; 2199 KMP_INC_STAT(lck,successfulSpeculations); 2200 } 2201 2202 // Create a bit mask with one more set bit. 2203 static __inline void 2204 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2205 { 2206 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2207 if ( newBadness > lck->lk.adaptive.max_badness) { 2208 return; 2209 } else { 2210 lck->lk.adaptive.badness = newBadness; 2211 } 2212 } 2213 2214 // Check whether speculation should be attempted. 2215 static __inline int 2216 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2217 { 2218 kmp_uint32 badness = lck->lk.adaptive.badness; 2219 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2220 int res = (attempts & badness) == 0; 2221 return res; 2222 } 2223 2224 // Attempt to acquire only the speculative lock. 2225 // Does not back off to the non-speculative lock. 2226 // 2227 static int 2228 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2229 { 2230 int retries = lck->lk.adaptive.max_soft_retries; 2231 2232 // We don't explicitly count the start of speculation, rather we record 2233 // the results (success, hard fail, soft fail). The sum of all of those 2234 // is the total number of times we started speculation since all 2235 // speculations must end one of those ways. 2236 do 2237 { 2238 kmp_uint32 status = _xbegin(); 2239 // Switch this in to disable actual speculation but exercise 2240 // at least some of the rest of the code. Useful for debugging... 2241 // kmp_uint32 status = _XABORT_NESTED; 2242 2243 if (status == _XBEGIN_STARTED ) 2244 { /* We have successfully started speculation 2245 * Check that no-one acquired the lock for real between when we last looked 2246 * and now. This also gets the lock cache line into our read-set, 2247 * which we need so that we'll abort if anyone later claims it for real. 2248 */ 2249 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2250 { 2251 // Lock is now visibly acquired, so someone beat us to it. 2252 // Abort the transaction so we'll restart from _xbegin with the 2253 // failure status. 2254 _xabort(0x01); 2255 KMP_ASSERT2( 0, "should not get here" ); 2256 } 2257 return 1; // Lock has been acquired (speculatively) 2258 } else { 2259 // We have aborted, update the statistics 2260 if ( status & SOFT_ABORT_MASK) 2261 { 2262 KMP_INC_STAT(lck,softFailedSpeculations); 2263 // and loop round to retry. 2264 } 2265 else 2266 { 2267 KMP_INC_STAT(lck,hardFailedSpeculations); 2268 // Give up if we had a hard failure. 2269 break; 2270 } 2271 } 2272 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2273 2274 // Either we had a hard failure or we didn't succeed softly after 2275 // the full set of attempts, so back off the badness. 2276 __kmp_step_badness( lck ); 2277 return 0; 2278 } 2279 2280 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2281 // if the speculative lock cannot be acquired. 2282 // We can succeed speculatively, non-speculatively, or fail. 2283 static int 2284 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2285 { 2286 // First try to acquire the lock speculatively 2287 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2288 return 1; 2289 2290 // Speculative acquisition failed, so try to acquire it non-speculatively. 2291 // Count the non-speculative acquire attempt 2292 lck->lk.adaptive.acquire_attempts++; 2293 2294 // Use base, non-speculative lock. 2295 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2296 { 2297 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2298 return 1; // Lock is acquired (non-speculatively) 2299 } 2300 else 2301 { 2302 return 0; // Failed to acquire the lock, it's already visibly locked. 2303 } 2304 } 2305 2306 static int 2307 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2308 { 2309 char const * const func = "omp_test_lock"; 2310 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2311 KMP_FATAL( LockIsUninitialized, func ); 2312 } 2313 2314 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2315 2316 if ( retval ) { 2317 lck->lk.qlk.owner_id = gtid + 1; 2318 } 2319 return retval; 2320 } 2321 2322 // Block until we can acquire a speculative, adaptive lock. 2323 // We check whether we should be trying to speculate. 2324 // If we should be, we check the real lock to see if it is free, 2325 // and, if not, pause without attempting to acquire it until it is. 2326 // Then we try the speculative acquire. 2327 // This means that although we suffer from lemmings a little ( 2328 // because all we can't acquire the lock speculatively until 2329 // the queue of threads waiting has cleared), we don't get into a 2330 // state where we can never acquire the lock speculatively (because we 2331 // force the queue to clear by preventing new arrivals from entering the 2332 // queue). 2333 // This does mean that when we're trying to break lemmings, the lock 2334 // is no longer fair. However OpenMP makes no guarantee that its 2335 // locks are fair, so this isn't a real problem. 2336 static void 2337 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2338 { 2339 if ( __kmp_should_speculate( lck, gtid ) ) 2340 { 2341 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2342 { 2343 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2344 return; 2345 // We tried speculation and failed, so give up. 2346 } 2347 else 2348 { 2349 // We can't try speculation until the lock is free, so we 2350 // pause here (without suspending on the queueing lock, 2351 // to allow it to drain, then try again. 2352 // All other threads will also see the same result for 2353 // shouldSpeculate, so will be doing the same if they 2354 // try to claim the lock from now on. 2355 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2356 { 2357 KMP_INC_STAT(lck,lemmingYields); 2358 __kmp_yield (TRUE); 2359 } 2360 2361 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2362 return; 2363 } 2364 } 2365 2366 // Speculative acquisition failed, so acquire it non-speculatively. 2367 // Count the non-speculative acquire attempt 2368 lck->lk.adaptive.acquire_attempts++; 2369 2370 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2371 // We have acquired the base lock, so count that. 2372 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2373 } 2374 2375 static void 2376 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2377 { 2378 char const * const func = "omp_set_lock"; 2379 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2380 KMP_FATAL( LockIsUninitialized, func ); 2381 } 2382 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2383 KMP_FATAL( LockIsAlreadyOwned, func ); 2384 } 2385 2386 __kmp_acquire_adaptive_lock( lck, gtid ); 2387 2388 lck->lk.qlk.owner_id = gtid + 1; 2389 } 2390 2391 static int 2392 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2393 { 2394 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2395 { // If the lock doesn't look claimed we must be speculating. 2396 // (Or the user's code is buggy and they're releasing without locking; 2397 // if we had XTEST we'd be able to check that case...) 2398 _xend(); // Exit speculation 2399 __kmp_update_badness_after_success( lck ); 2400 } 2401 else 2402 { // Since the lock *is* visibly locked we're not speculating, 2403 // so should use the underlying lock's release scheme. 2404 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2405 } 2406 return KMP_LOCK_RELEASED; 2407 } 2408 2409 static int 2410 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2411 { 2412 char const * const func = "omp_unset_lock"; 2413 KMP_MB(); /* in case another processor initialized lock */ 2414 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2415 KMP_FATAL( LockIsUninitialized, func ); 2416 } 2417 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2418 KMP_FATAL( LockUnsettingFree, func ); 2419 } 2420 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2421 KMP_FATAL( LockUnsettingSetByAnother, func ); 2422 } 2423 lck->lk.qlk.owner_id = 0; 2424 __kmp_release_adaptive_lock( lck, gtid ); 2425 return KMP_LOCK_RELEASED; 2426 } 2427 2428 static void 2429 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2430 { 2431 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2432 lck->lk.adaptive.badness = 0; 2433 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2434 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2435 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2436 #if KMP_DEBUG_ADAPTIVE_LOCKS 2437 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2438 #endif 2439 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2440 } 2441 2442 static void 2443 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2444 { 2445 __kmp_init_adaptive_lock( lck ); 2446 } 2447 2448 static void 2449 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2450 { 2451 #if KMP_DEBUG_ADAPTIVE_LOCKS 2452 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2453 #endif 2454 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2455 // Nothing needed for the speculative part. 2456 } 2457 2458 static void 2459 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2460 { 2461 char const * const func = "omp_destroy_lock"; 2462 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2463 KMP_FATAL( LockIsUninitialized, func ); 2464 } 2465 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2466 KMP_FATAL( LockStillOwned, func ); 2467 } 2468 __kmp_destroy_adaptive_lock( lck ); 2469 } 2470 2471 2472 #endif // KMP_USE_ADAPTIVE_LOCKS 2473 2474 2475 /* ------------------------------------------------------------------------ */ 2476 /* DRDPA ticket locks */ 2477 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2478 2479 static kmp_int32 2480 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2481 { 2482 return TCR_4( lck->lk.owner_id ) - 1; 2483 } 2484 2485 static inline bool 2486 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2487 { 2488 return lck->lk.depth_locked != -1; 2489 } 2490 2491 __forceinline static int 2492 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2493 { 2494 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2495 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2496 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2497 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2498 TCR_PTR(lck->lk.polls); // volatile load 2499 2500 #ifdef USE_LOCK_PROFILE 2501 if (TCR_8(polls[ticket & mask].poll) != ticket) 2502 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2503 /* else __kmp_printf( "." );*/ 2504 #endif /* USE_LOCK_PROFILE */ 2505 2506 // 2507 // Now spin-wait, but reload the polls pointer and mask, in case the 2508 // polling area has been reconfigured. Unless it is reconfigured, the 2509 // reloads stay in L1 cache and are cheap. 2510 // 2511 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2512 // 2513 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2514 // and poll to be re-read every spin iteration. 2515 // 2516 kmp_uint32 spins; 2517 2518 KMP_FSYNC_PREPARE(lck); 2519 KMP_INIT_YIELD(spins); 2520 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load 2521 // If we are oversubscribed, 2522 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2523 // CPU Pause is in the macros for yield. 2524 // 2525 KMP_YIELD(TCR_4(__kmp_nth) 2526 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2527 KMP_YIELD_SPIN(spins); 2528 2529 // Re-read the mask and the poll pointer from the lock structure. 2530 // 2531 // Make certain that "mask" is read before "polls" !!! 2532 // 2533 // If another thread picks reconfigures the polling area and updates 2534 // their values, and we get the new value of mask and the old polls 2535 // pointer, we could access memory beyond the end of the old polling 2536 // area. 2537 // 2538 mask = TCR_8(lck->lk.mask); // volatile load 2539 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2540 TCR_PTR(lck->lk.polls); // volatile load 2541 } 2542 2543 // 2544 // Critical section starts here 2545 // 2546 KMP_FSYNC_ACQUIRED(lck); 2547 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2548 ticket, lck)); 2549 lck->lk.now_serving = ticket; // non-volatile store 2550 2551 // 2552 // Deallocate a garbage polling area if we know that we are the last 2553 // thread that could possibly access it. 2554 // 2555 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2556 // ticket. 2557 // 2558 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2559 __kmp_free((void *)lck->lk.old_polls); 2560 lck->lk.old_polls = NULL; 2561 lck->lk.cleanup_ticket = 0; 2562 } 2563 2564 // 2565 // Check to see if we should reconfigure the polling area. 2566 // If there is still a garbage polling area to be deallocated from a 2567 // previous reconfiguration, let a later thread reconfigure it. 2568 // 2569 if (lck->lk.old_polls == NULL) { 2570 bool reconfigure = false; 2571 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2572 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2573 2574 if (TCR_4(__kmp_nth) 2575 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2576 // 2577 // We are in oversubscription mode. Contract the polling area 2578 // down to a single location, if that hasn't been done already. 2579 // 2580 if (num_polls > 1) { 2581 reconfigure = true; 2582 num_polls = TCR_4(lck->lk.num_polls); 2583 mask = 0; 2584 num_polls = 1; 2585 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2586 __kmp_allocate(num_polls * sizeof(*polls)); 2587 polls[0].poll = ticket; 2588 } 2589 } 2590 else { 2591 // 2592 // We are in under/fully subscribed mode. Check the number of 2593 // threads waiting on the lock. The size of the polling area 2594 // should be at least the number of threads waiting. 2595 // 2596 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2597 if (num_waiting > num_polls) { 2598 kmp_uint32 old_num_polls = num_polls; 2599 reconfigure = true; 2600 do { 2601 mask = (mask << 1) | 1; 2602 num_polls *= 2; 2603 } while (num_polls <= num_waiting); 2604 2605 // 2606 // Allocate the new polling area, and copy the relevant portion 2607 // of the old polling area to the new area. __kmp_allocate() 2608 // zeroes the memory it allocates, and most of the old area is 2609 // just zero padding, so we only copy the release counters. 2610 // 2611 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2612 __kmp_allocate(num_polls * sizeof(*polls)); 2613 kmp_uint32 i; 2614 for (i = 0; i < old_num_polls; i++) { 2615 polls[i].poll = old_polls[i].poll; 2616 } 2617 } 2618 } 2619 2620 if (reconfigure) { 2621 // 2622 // Now write the updated fields back to the lock structure. 2623 // 2624 // Make certain that "polls" is written before "mask" !!! 2625 // 2626 // If another thread picks up the new value of mask and the old 2627 // polls pointer , it could access memory beyond the end of the 2628 // old polling area. 2629 // 2630 // On x86, we need memory fences. 2631 // 2632 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2633 ticket, lck, num_polls)); 2634 2635 lck->lk.old_polls = old_polls; // non-volatile store 2636 lck->lk.polls = polls; // volatile store 2637 2638 KMP_MB(); 2639 2640 lck->lk.num_polls = num_polls; // non-volatile store 2641 lck->lk.mask = mask; // volatile store 2642 2643 KMP_MB(); 2644 2645 // 2646 // Only after the new polling area and mask have been flushed 2647 // to main memory can we update the cleanup ticket field. 2648 // 2649 // volatile load / non-volatile store 2650 // 2651 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2652 } 2653 } 2654 return KMP_LOCK_ACQUIRED_FIRST; 2655 } 2656 2657 int 2658 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2659 { 2660 return __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2661 } 2662 2663 static int 2664 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2665 { 2666 char const * const func = "omp_set_lock"; 2667 if ( lck->lk.initialized != lck ) { 2668 KMP_FATAL( LockIsUninitialized, func ); 2669 } 2670 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2671 KMP_FATAL( LockNestableUsedAsSimple, func ); 2672 } 2673 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2674 KMP_FATAL( LockIsAlreadyOwned, func ); 2675 } 2676 2677 __kmp_acquire_drdpa_lock( lck, gtid ); 2678 2679 lck->lk.owner_id = gtid + 1; 2680 return KMP_LOCK_ACQUIRED_FIRST; 2681 } 2682 2683 int 2684 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2685 { 2686 // 2687 // First get a ticket, then read the polls pointer and the mask. 2688 // The polls pointer must be read before the mask!!! (See above) 2689 // 2690 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2691 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2692 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2693 TCR_PTR(lck->lk.polls); // volatile load 2694 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2695 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2696 kmp_uint64 next_ticket = ticket + 1; 2697 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2698 ticket, next_ticket)) { 2699 KMP_FSYNC_ACQUIRED(lck); 2700 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2701 ticket, lck)); 2702 lck->lk.now_serving = ticket; // non-volatile store 2703 2704 // 2705 // Since no threads are waiting, there is no possibility that 2706 // we would want to reconfigure the polling area. We might 2707 // have the cleanup ticket value (which says that it is now 2708 // safe to deallocate old_polls), but we'll let a later thread 2709 // which calls __kmp_acquire_lock do that - this routine 2710 // isn't supposed to block, and we would risk blocks if we 2711 // called __kmp_free() to do the deallocation. 2712 // 2713 return TRUE; 2714 } 2715 } 2716 return FALSE; 2717 } 2718 2719 static int 2720 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2721 { 2722 char const * const func = "omp_test_lock"; 2723 if ( lck->lk.initialized != lck ) { 2724 KMP_FATAL( LockIsUninitialized, func ); 2725 } 2726 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2727 KMP_FATAL( LockNestableUsedAsSimple, func ); 2728 } 2729 2730 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2731 2732 if ( retval ) { 2733 lck->lk.owner_id = gtid + 1; 2734 } 2735 return retval; 2736 } 2737 2738 int 2739 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2740 { 2741 // 2742 // Read the ticket value from the lock data struct, then the polls 2743 // pointer and the mask. The polls pointer must be read before the 2744 // mask!!! (See above) 2745 // 2746 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2747 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2748 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2749 TCR_PTR(lck->lk.polls); // volatile load 2750 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2751 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2752 ticket - 1, lck)); 2753 KMP_FSYNC_RELEASING(lck); 2754 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2755 return KMP_LOCK_RELEASED; 2756 } 2757 2758 static int 2759 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2760 { 2761 char const * const func = "omp_unset_lock"; 2762 KMP_MB(); /* in case another processor initialized lock */ 2763 if ( lck->lk.initialized != lck ) { 2764 KMP_FATAL( LockIsUninitialized, func ); 2765 } 2766 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2767 KMP_FATAL( LockNestableUsedAsSimple, func ); 2768 } 2769 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2770 KMP_FATAL( LockUnsettingFree, func ); 2771 } 2772 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2773 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2774 KMP_FATAL( LockUnsettingSetByAnother, func ); 2775 } 2776 lck->lk.owner_id = 0; 2777 return __kmp_release_drdpa_lock( lck, gtid ); 2778 } 2779 2780 void 2781 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2782 { 2783 lck->lk.location = NULL; 2784 lck->lk.mask = 0; 2785 lck->lk.num_polls = 1; 2786 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2787 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2788 lck->lk.cleanup_ticket = 0; 2789 lck->lk.old_polls = NULL; 2790 lck->lk.next_ticket = 0; 2791 lck->lk.now_serving = 0; 2792 lck->lk.owner_id = 0; // no thread owns the lock. 2793 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2794 lck->lk.initialized = lck; 2795 2796 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2797 } 2798 2799 static void 2800 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2801 { 2802 __kmp_init_drdpa_lock( lck ); 2803 } 2804 2805 void 2806 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2807 { 2808 lck->lk.initialized = NULL; 2809 lck->lk.location = NULL; 2810 if (lck->lk.polls != NULL) { 2811 __kmp_free((void *)lck->lk.polls); 2812 lck->lk.polls = NULL; 2813 } 2814 if (lck->lk.old_polls != NULL) { 2815 __kmp_free((void *)lck->lk.old_polls); 2816 lck->lk.old_polls = NULL; 2817 } 2818 lck->lk.mask = 0; 2819 lck->lk.num_polls = 0; 2820 lck->lk.cleanup_ticket = 0; 2821 lck->lk.next_ticket = 0; 2822 lck->lk.now_serving = 0; 2823 lck->lk.owner_id = 0; 2824 lck->lk.depth_locked = -1; 2825 } 2826 2827 static void 2828 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2829 { 2830 char const * const func = "omp_destroy_lock"; 2831 if ( lck->lk.initialized != lck ) { 2832 KMP_FATAL( LockIsUninitialized, func ); 2833 } 2834 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2835 KMP_FATAL( LockNestableUsedAsSimple, func ); 2836 } 2837 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2838 KMP_FATAL( LockStillOwned, func ); 2839 } 2840 __kmp_destroy_drdpa_lock( lck ); 2841 } 2842 2843 2844 // 2845 // nested drdpa ticket locks 2846 // 2847 2848 int 2849 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2850 { 2851 KMP_DEBUG_ASSERT( gtid >= 0 ); 2852 2853 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2854 lck->lk.depth_locked += 1; 2855 return KMP_LOCK_ACQUIRED_NEXT; 2856 } 2857 else { 2858 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2859 KMP_MB(); 2860 lck->lk.depth_locked = 1; 2861 KMP_MB(); 2862 lck->lk.owner_id = gtid + 1; 2863 return KMP_LOCK_ACQUIRED_FIRST; 2864 } 2865 } 2866 2867 static void 2868 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2869 { 2870 char const * const func = "omp_set_nest_lock"; 2871 if ( lck->lk.initialized != lck ) { 2872 KMP_FATAL( LockIsUninitialized, func ); 2873 } 2874 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2875 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2876 } 2877 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2878 } 2879 2880 int 2881 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2882 { 2883 int retval; 2884 2885 KMP_DEBUG_ASSERT( gtid >= 0 ); 2886 2887 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2888 retval = ++lck->lk.depth_locked; 2889 } 2890 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2891 retval = 0; 2892 } 2893 else { 2894 KMP_MB(); 2895 retval = lck->lk.depth_locked = 1; 2896 KMP_MB(); 2897 lck->lk.owner_id = gtid + 1; 2898 } 2899 return retval; 2900 } 2901 2902 static int 2903 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2904 { 2905 char const * const func = "omp_test_nest_lock"; 2906 if ( lck->lk.initialized != lck ) { 2907 KMP_FATAL( LockIsUninitialized, func ); 2908 } 2909 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2910 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2911 } 2912 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2913 } 2914 2915 int 2916 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2917 { 2918 KMP_DEBUG_ASSERT( gtid >= 0 ); 2919 2920 KMP_MB(); 2921 if ( --(lck->lk.depth_locked) == 0 ) { 2922 KMP_MB(); 2923 lck->lk.owner_id = 0; 2924 __kmp_release_drdpa_lock( lck, gtid ); 2925 return KMP_LOCK_RELEASED; 2926 } 2927 return KMP_LOCK_STILL_HELD; 2928 } 2929 2930 static int 2931 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2932 { 2933 char const * const func = "omp_unset_nest_lock"; 2934 KMP_MB(); /* in case another processor initialized lock */ 2935 if ( lck->lk.initialized != lck ) { 2936 KMP_FATAL( LockIsUninitialized, func ); 2937 } 2938 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2939 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2940 } 2941 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2942 KMP_FATAL( LockUnsettingFree, func ); 2943 } 2944 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2945 KMP_FATAL( LockUnsettingSetByAnother, func ); 2946 } 2947 return __kmp_release_nested_drdpa_lock( lck, gtid ); 2948 } 2949 2950 void 2951 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2952 { 2953 __kmp_init_drdpa_lock( lck ); 2954 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2955 } 2956 2957 static void 2958 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2959 { 2960 __kmp_init_nested_drdpa_lock( lck ); 2961 } 2962 2963 void 2964 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2965 { 2966 __kmp_destroy_drdpa_lock( lck ); 2967 lck->lk.depth_locked = 0; 2968 } 2969 2970 static void 2971 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2972 { 2973 char const * const func = "omp_destroy_nest_lock"; 2974 if ( lck->lk.initialized != lck ) { 2975 KMP_FATAL( LockIsUninitialized, func ); 2976 } 2977 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2978 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2979 } 2980 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2981 KMP_FATAL( LockStillOwned, func ); 2982 } 2983 __kmp_destroy_nested_drdpa_lock( lck ); 2984 } 2985 2986 2987 // 2988 // access functions to fields which don't exist for all lock kinds. 2989 // 2990 2991 static int 2992 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2993 { 2994 return lck == lck->lk.initialized; 2995 } 2996 2997 static const ident_t * 2998 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2999 { 3000 return lck->lk.location; 3001 } 3002 3003 static void 3004 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 3005 { 3006 lck->lk.location = loc; 3007 } 3008 3009 static kmp_lock_flags_t 3010 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 3011 { 3012 return lck->lk.flags; 3013 } 3014 3015 static void 3016 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 3017 { 3018 lck->lk.flags = flags; 3019 } 3020 3021 // Time stamp counter 3022 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3023 # define __kmp_tsc() __kmp_hardware_timestamp() 3024 // Runtime's default backoff parameters 3025 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 }; 3026 #else 3027 // Use nanoseconds for other platforms 3028 extern kmp_uint64 __kmp_now_nsec(); 3029 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 }; 3030 # define __kmp_tsc() __kmp_now_nsec() 3031 #endif 3032 3033 // A useful predicate for dealing with timestamps that may wrap. 3034 // Is a before b? 3035 // Since the timestamps may wrap, this is asking whether it's 3036 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 3037 // Times where going clockwise is less distance than going anti-clockwise 3038 // are in the future, others are in the past. 3039 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b 3040 // whereas signed(a) = -2, signed(b) = 0 captures the actual difference 3041 static inline bool before(kmp_uint64 a, kmp_uint64 b) 3042 { 3043 return ((kmp_int64)b - (kmp_int64)a) > 0; 3044 } 3045 3046 // Truncated binary exponential backoff function 3047 void 3048 __kmp_spin_backoff(kmp_backoff_t *boff) 3049 { 3050 // We could flatten this loop, but making it a nested loop gives better result. 3051 kmp_uint32 i; 3052 for (i = boff->step; i > 0; i--) { 3053 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 3054 do { 3055 KMP_CPU_PAUSE(); 3056 } while (before(__kmp_tsc(), goal)); 3057 } 3058 boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1); 3059 } 3060 3061 #if KMP_USE_DYNAMIC_LOCK 3062 3063 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 3064 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) 3065 { 3066 TCW_4(*lck, KMP_GET_D_TAG(seq)); 3067 KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 3068 } 3069 3070 #if KMP_USE_TSX 3071 3072 // HLE lock functions - imported from the testbed runtime. 3073 #define HLE_ACQUIRE ".byte 0xf2;" 3074 #define HLE_RELEASE ".byte 0xf3;" 3075 3076 static inline kmp_uint32 3077 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 3078 { 3079 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 3080 : "+r"(v), "+m"(*p) 3081 : 3082 : "memory"); 3083 return v; 3084 } 3085 3086 static void 3087 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 3088 { 3089 TCW_4(*lck, 0); 3090 } 3091 3092 static void 3093 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3094 { 3095 // Use gtid for KMP_LOCK_BUSY if necessary 3096 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 3097 int delay = 1; 3098 do { 3099 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 3100 for (int i = delay; i != 0; --i) 3101 KMP_CPU_PAUSE(); 3102 delay = ((delay << 1) | 1) & 7; 3103 } 3104 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 3105 } 3106 } 3107 3108 static void 3109 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3110 { 3111 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3112 } 3113 3114 static int 3115 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3116 { 3117 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3118 : "=m"(*lck) 3119 : "r"(KMP_LOCK_FREE(hle)) 3120 : "memory"); 3121 return KMP_LOCK_RELEASED; 3122 } 3123 3124 static int 3125 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3126 { 3127 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3128 } 3129 3130 static int 3131 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3132 { 3133 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 3134 } 3135 3136 static int 3137 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3138 { 3139 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3140 } 3141 3142 static void 3143 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) 3144 { 3145 __kmp_init_queuing_lock(lck); 3146 } 3147 3148 static void 3149 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) 3150 { 3151 __kmp_destroy_queuing_lock(lck); 3152 } 3153 3154 static void 3155 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3156 { 3157 unsigned retries=3, status; 3158 do { 3159 status = _xbegin(); 3160 if (status == _XBEGIN_STARTED) { 3161 if (__kmp_is_unlocked_queuing_lock(lck)) 3162 return; 3163 _xabort(0xff); 3164 } 3165 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 3166 // Wait until lock becomes free 3167 while (! __kmp_is_unlocked_queuing_lock(lck)) 3168 __kmp_yield(TRUE); 3169 } 3170 else if (!(status & _XABORT_RETRY)) 3171 break; 3172 } while (retries--); 3173 3174 // Fall-back non-speculative lock (xchg) 3175 __kmp_acquire_queuing_lock(lck, gtid); 3176 } 3177 3178 static void 3179 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3180 { 3181 __kmp_acquire_rtm_lock(lck, gtid); 3182 } 3183 3184 static int 3185 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3186 { 3187 if (__kmp_is_unlocked_queuing_lock(lck)) { 3188 // Releasing from speculation 3189 _xend(); 3190 } 3191 else { 3192 // Releasing from a real lock 3193 __kmp_release_queuing_lock(lck, gtid); 3194 } 3195 return KMP_LOCK_RELEASED; 3196 } 3197 3198 static int 3199 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3200 { 3201 return __kmp_release_rtm_lock(lck, gtid); 3202 } 3203 3204 static int 3205 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3206 { 3207 unsigned retries=3, status; 3208 do { 3209 status = _xbegin(); 3210 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 3211 return 1; 3212 } 3213 if (!(status & _XABORT_RETRY)) 3214 break; 3215 } while (retries--); 3216 3217 return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; 3218 } 3219 3220 static int 3221 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3222 { 3223 return __kmp_test_rtm_lock(lck, gtid); 3224 } 3225 3226 #endif // KMP_USE_TSX 3227 3228 // Entry functions for indirect locks (first element of direct lock jump tables). 3229 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3230 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3231 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3232 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3233 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3234 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3235 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3236 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3237 3238 // 3239 // Jump tables for the indirect lock functions. 3240 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3241 // 3242 3243 // init functions 3244 #define expand(l, op) 0,__kmp_init_direct_lock, 3245 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3246 = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; 3247 #undef expand 3248 3249 // destroy functions 3250 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 3251 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) 3252 = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; 3253 #undef expand 3254 3255 // set/acquire functions 3256 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3257 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) 3258 = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 3259 #undef expand 3260 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3261 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) 3262 = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 3263 #undef expand 3264 3265 // unset/release and test functions 3266 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3267 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) 3268 = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; 3269 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) 3270 = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; 3271 #undef expand 3272 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3273 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) 3274 = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; 3275 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) 3276 = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; 3277 #undef expand 3278 3279 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3280 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 3281 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 3282 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 3283 3284 // 3285 // Jump tables for the indirect lock functions. 3286 // 3287 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3288 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; 3289 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; 3290 #undef expand 3291 3292 // set/acquire functions 3293 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3294 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 3295 #undef expand 3296 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3297 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 3298 #undef expand 3299 3300 // unset/release and test functions 3301 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3302 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 3303 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 3304 #undef expand 3305 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3306 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 3307 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 3308 #undef expand 3309 3310 // Exposes only one jump tables (*lock or *lock_with_checks). 3311 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3312 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3313 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3314 3315 // Lock index table. 3316 kmp_indirect_lock_table_t __kmp_i_lock_table; 3317 3318 // Size of indirect locks. 3319 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; 3320 3321 // Jump tables for lock accessor/modifier. 3322 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3323 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3324 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3325 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3326 3327 // Use different lock pools for different lock types. 3328 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; 3329 3330 // User lock allocator for dynamically dispatched indirect locks. 3331 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock 3332 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock 3333 // object is returned to the reusable pool of locks, unique to each lock type. 3334 kmp_indirect_lock_t * 3335 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3336 { 3337 kmp_indirect_lock_t *lck; 3338 kmp_lock_index_t idx; 3339 3340 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3341 3342 if (__kmp_indirect_lock_pool[tag] != NULL) { 3343 // Reuse the allocated and destroyed lock object 3344 lck = __kmp_indirect_lock_pool[tag]; 3345 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3346 idx = lck->lock->pool.index; 3347 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3348 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); 3349 } else { 3350 idx = __kmp_i_lock_table.next; 3351 // Check capacity and double the size if it is full 3352 if (idx == __kmp_i_lock_table.size) { 3353 // Double up the space for block pointers 3354 int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; 3355 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3356 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); 3357 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); 3358 __kmp_free(old_table); 3359 // Allocate new objects in the new blocks 3360 for (int i = row; i < 2*row; ++i) 3361 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) 3362 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 3363 __kmp_i_lock_table.size = 2*idx; 3364 } 3365 __kmp_i_lock_table.next++; 3366 lck = KMP_GET_I_LOCK(idx); 3367 // Allocate a new base lock object 3368 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3369 KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3370 } 3371 3372 __kmp_release_lock(&__kmp_global_lock, gtid); 3373 3374 lck->type = tag; 3375 3376 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3377 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3378 } else { 3379 *((kmp_indirect_lock_t **)user_lock) = lck; 3380 } 3381 3382 return lck; 3383 } 3384 3385 // User lock lookup for dynamically dispatched locks. 3386 static __forceinline 3387 kmp_indirect_lock_t * 3388 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3389 { 3390 if (__kmp_env_consistency_check) { 3391 kmp_indirect_lock_t *lck = NULL; 3392 if (user_lock == NULL) { 3393 KMP_FATAL(LockIsUninitialized, func); 3394 } 3395 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3396 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3397 if (idx >= __kmp_i_lock_table.size) { 3398 KMP_FATAL(LockIsUninitialized, func); 3399 } 3400 lck = KMP_GET_I_LOCK(idx); 3401 } else { 3402 lck = *((kmp_indirect_lock_t **)user_lock); 3403 } 3404 if (lck == NULL) { 3405 KMP_FATAL(LockIsUninitialized, func); 3406 } 3407 return lck; 3408 } else { 3409 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3410 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3411 } else { 3412 return *((kmp_indirect_lock_t **)user_lock); 3413 } 3414 } 3415 } 3416 3417 static void 3418 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3419 { 3420 #if KMP_USE_ADAPTIVE_LOCKS 3421 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3422 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3423 seq = lockseq_queuing; 3424 } 3425 #endif 3426 #if KMP_USE_TSX 3427 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3428 seq = lockseq_queuing; 3429 } 3430 #endif 3431 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3432 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3433 KMP_I_LOCK_FUNC(l, init)(l->lock); 3434 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); 3435 } 3436 3437 static void 3438 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3439 { 3440 kmp_uint32 gtid = __kmp_entry_gtid(); 3441 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3442 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3443 kmp_indirect_locktag_t tag = l->type; 3444 3445 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3446 3447 // Use the base lock's space to keep the pool chain. 3448 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3449 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3450 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3451 } 3452 __kmp_indirect_lock_pool[tag] = l; 3453 3454 __kmp_release_lock(&__kmp_global_lock, gtid); 3455 } 3456 3457 static void 3458 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3459 { 3460 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3461 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3462 } 3463 3464 static int 3465 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3466 { 3467 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3468 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3469 } 3470 3471 static int 3472 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3473 { 3474 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3475 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3476 } 3477 3478 static void 3479 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3480 { 3481 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3482 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3483 } 3484 3485 static int 3486 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3487 { 3488 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3489 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3490 } 3491 3492 static int 3493 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3494 { 3495 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3496 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3497 } 3498 3499 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3500 3501 // This is used only in kmp_error.c when consistency checking is on. 3502 kmp_int32 3503 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3504 { 3505 switch (seq) { 3506 case lockseq_tas: 3507 case lockseq_nested_tas: 3508 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3509 #if KMP_USE_FUTEX 3510 case lockseq_futex: 3511 case lockseq_nested_futex: 3512 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3513 #endif 3514 case lockseq_ticket: 3515 case lockseq_nested_ticket: 3516 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3517 case lockseq_queuing: 3518 case lockseq_nested_queuing: 3519 #if KMP_USE_ADAPTIVE_LOCKS 3520 case lockseq_adaptive: 3521 #endif 3522 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3523 case lockseq_drdpa: 3524 case lockseq_nested_drdpa: 3525 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3526 default: 3527 return 0; 3528 } 3529 } 3530 3531 // Initializes data for dynamic user locks. 3532 void 3533 __kmp_init_dynamic_user_locks() 3534 { 3535 // Initialize jump table for the lock functions 3536 if (__kmp_env_consistency_check) { 3537 __kmp_direct_set = direct_set_check; 3538 __kmp_direct_unset = direct_unset_check; 3539 __kmp_direct_test = direct_test_check; 3540 __kmp_indirect_set = indirect_set_check; 3541 __kmp_indirect_unset = indirect_unset_check; 3542 __kmp_indirect_test = indirect_test_check; 3543 } 3544 else { 3545 __kmp_direct_set = direct_set; 3546 __kmp_direct_unset = direct_unset; 3547 __kmp_direct_test = direct_test; 3548 __kmp_indirect_set = indirect_set; 3549 __kmp_indirect_unset = indirect_unset; 3550 __kmp_indirect_test = indirect_test; 3551 } 3552 3553 // Initialize lock index table 3554 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3555 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3556 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) 3557 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 3558 __kmp_i_lock_table.next = 0; 3559 3560 // Indirect lock size 3561 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3562 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3563 #if KMP_USE_ADAPTIVE_LOCKS 3564 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3565 #endif 3566 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3567 #if KMP_USE_TSX 3568 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3569 #endif 3570 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3571 #if KMP_USE_FUTEX 3572 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3573 #endif 3574 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3575 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3576 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3577 3578 // Initialize lock accessor/modifier 3579 #define fill_jumps(table, expand, sep) { \ 3580 table[locktag##sep##ticket] = expand(ticket); \ 3581 table[locktag##sep##queuing] = expand(queuing); \ 3582 table[locktag##sep##drdpa] = expand(drdpa); \ 3583 } 3584 3585 #if KMP_USE_ADAPTIVE_LOCKS 3586 # define fill_table(table, expand) { \ 3587 fill_jumps(table, expand, _); \ 3588 table[locktag_adaptive] = expand(queuing); \ 3589 fill_jumps(table, expand, _nested_); \ 3590 } 3591 #else 3592 # define fill_table(table, expand) { \ 3593 fill_jumps(table, expand, _); \ 3594 fill_jumps(table, expand, _nested_); \ 3595 } 3596 #endif // KMP_USE_ADAPTIVE_LOCKS 3597 3598 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3599 fill_table(__kmp_indirect_set_location, expand); 3600 #undef expand 3601 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3602 fill_table(__kmp_indirect_set_flags, expand); 3603 #undef expand 3604 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3605 fill_table(__kmp_indirect_get_location, expand); 3606 #undef expand 3607 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3608 fill_table(__kmp_indirect_get_flags, expand); 3609 #undef expand 3610 3611 __kmp_init_user_locks = TRUE; 3612 } 3613 3614 // Clean up the lock table. 3615 void 3616 __kmp_cleanup_indirect_user_locks() 3617 { 3618 kmp_lock_index_t i; 3619 int k; 3620 3621 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3622 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3623 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3624 while (l != NULL) { 3625 kmp_indirect_lock_t *ll = l; 3626 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3627 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); 3628 __kmp_free(ll->lock); 3629 ll->lock = NULL; 3630 } 3631 __kmp_indirect_lock_pool[k] = NULL; 3632 } 3633 // Clean up the remaining undestroyed locks. 3634 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3635 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3636 if (l->lock != NULL) { 3637 // Locks not destroyed explicitly need to be destroyed here. 3638 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3639 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); 3640 __kmp_free(l->lock); 3641 } 3642 } 3643 // Free the table 3644 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3645 __kmp_free(__kmp_i_lock_table.table[i]); 3646 __kmp_free(__kmp_i_lock_table.table); 3647 3648 __kmp_init_user_locks = FALSE; 3649 } 3650 3651 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3652 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3653 3654 #else // KMP_USE_DYNAMIC_LOCK 3655 3656 /* ------------------------------------------------------------------------ */ 3657 /* user locks 3658 * 3659 * They are implemented as a table of function pointers which are set to the 3660 * lock functions of the appropriate kind, once that has been determined. 3661 */ 3662 3663 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3664 3665 size_t __kmp_base_user_lock_size = 0; 3666 size_t __kmp_user_lock_size = 0; 3667 3668 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3669 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3670 3671 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3672 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3673 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3674 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3675 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3676 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3677 3678 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3679 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3680 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3681 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3682 3683 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3684 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3685 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3686 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3687 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3688 3689 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3690 { 3691 switch ( user_lock_kind ) { 3692 case lk_default: 3693 default: 3694 KMP_ASSERT( 0 ); 3695 3696 case lk_tas: { 3697 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3698 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3699 3700 __kmp_get_user_lock_owner_ = 3701 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3702 ( &__kmp_get_tas_lock_owner ); 3703 3704 if ( __kmp_env_consistency_check ) { 3705 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3706 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3707 } 3708 else { 3709 KMP_BIND_USER_LOCK(tas); 3710 KMP_BIND_NESTED_USER_LOCK(tas); 3711 } 3712 3713 __kmp_destroy_user_lock_ = 3714 ( void ( * )( kmp_user_lock_p ) ) 3715 ( &__kmp_destroy_tas_lock ); 3716 3717 __kmp_is_user_lock_initialized_ = 3718 ( int ( * )( kmp_user_lock_p ) ) NULL; 3719 3720 __kmp_get_user_lock_location_ = 3721 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3722 3723 __kmp_set_user_lock_location_ = 3724 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3725 3726 __kmp_get_user_lock_flags_ = 3727 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3728 3729 __kmp_set_user_lock_flags_ = 3730 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3731 } 3732 break; 3733 3734 #if KMP_USE_FUTEX 3735 3736 case lk_futex: { 3737 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3738 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3739 3740 __kmp_get_user_lock_owner_ = 3741 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3742 ( &__kmp_get_futex_lock_owner ); 3743 3744 if ( __kmp_env_consistency_check ) { 3745 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3746 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3747 } 3748 else { 3749 KMP_BIND_USER_LOCK(futex); 3750 KMP_BIND_NESTED_USER_LOCK(futex); 3751 } 3752 3753 __kmp_destroy_user_lock_ = 3754 ( void ( * )( kmp_user_lock_p ) ) 3755 ( &__kmp_destroy_futex_lock ); 3756 3757 __kmp_is_user_lock_initialized_ = 3758 ( int ( * )( kmp_user_lock_p ) ) NULL; 3759 3760 __kmp_get_user_lock_location_ = 3761 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3762 3763 __kmp_set_user_lock_location_ = 3764 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3765 3766 __kmp_get_user_lock_flags_ = 3767 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3768 3769 __kmp_set_user_lock_flags_ = 3770 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3771 } 3772 break; 3773 3774 #endif // KMP_USE_FUTEX 3775 3776 case lk_ticket: { 3777 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3778 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3779 3780 __kmp_get_user_lock_owner_ = 3781 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3782 ( &__kmp_get_ticket_lock_owner ); 3783 3784 if ( __kmp_env_consistency_check ) { 3785 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3786 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3787 } 3788 else { 3789 KMP_BIND_USER_LOCK(ticket); 3790 KMP_BIND_NESTED_USER_LOCK(ticket); 3791 } 3792 3793 __kmp_destroy_user_lock_ = 3794 ( void ( * )( kmp_user_lock_p ) ) 3795 ( &__kmp_destroy_ticket_lock ); 3796 3797 __kmp_is_user_lock_initialized_ = 3798 ( int ( * )( kmp_user_lock_p ) ) 3799 ( &__kmp_is_ticket_lock_initialized ); 3800 3801 __kmp_get_user_lock_location_ = 3802 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3803 ( &__kmp_get_ticket_lock_location ); 3804 3805 __kmp_set_user_lock_location_ = 3806 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3807 ( &__kmp_set_ticket_lock_location ); 3808 3809 __kmp_get_user_lock_flags_ = 3810 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3811 ( &__kmp_get_ticket_lock_flags ); 3812 3813 __kmp_set_user_lock_flags_ = 3814 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3815 ( &__kmp_set_ticket_lock_flags ); 3816 } 3817 break; 3818 3819 case lk_queuing: { 3820 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3821 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3822 3823 __kmp_get_user_lock_owner_ = 3824 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3825 ( &__kmp_get_queuing_lock_owner ); 3826 3827 if ( __kmp_env_consistency_check ) { 3828 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3829 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3830 } 3831 else { 3832 KMP_BIND_USER_LOCK(queuing); 3833 KMP_BIND_NESTED_USER_LOCK(queuing); 3834 } 3835 3836 __kmp_destroy_user_lock_ = 3837 ( void ( * )( kmp_user_lock_p ) ) 3838 ( &__kmp_destroy_queuing_lock ); 3839 3840 __kmp_is_user_lock_initialized_ = 3841 ( int ( * )( kmp_user_lock_p ) ) 3842 ( &__kmp_is_queuing_lock_initialized ); 3843 3844 __kmp_get_user_lock_location_ = 3845 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3846 ( &__kmp_get_queuing_lock_location ); 3847 3848 __kmp_set_user_lock_location_ = 3849 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3850 ( &__kmp_set_queuing_lock_location ); 3851 3852 __kmp_get_user_lock_flags_ = 3853 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3854 ( &__kmp_get_queuing_lock_flags ); 3855 3856 __kmp_set_user_lock_flags_ = 3857 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3858 ( &__kmp_set_queuing_lock_flags ); 3859 } 3860 break; 3861 3862 #if KMP_USE_ADAPTIVE_LOCKS 3863 case lk_adaptive: { 3864 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3865 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3866 3867 __kmp_get_user_lock_owner_ = 3868 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3869 ( &__kmp_get_queuing_lock_owner ); 3870 3871 if ( __kmp_env_consistency_check ) { 3872 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3873 } 3874 else { 3875 KMP_BIND_USER_LOCK(adaptive); 3876 } 3877 3878 __kmp_destroy_user_lock_ = 3879 ( void ( * )( kmp_user_lock_p ) ) 3880 ( &__kmp_destroy_adaptive_lock ); 3881 3882 __kmp_is_user_lock_initialized_ = 3883 ( int ( * )( kmp_user_lock_p ) ) 3884 ( &__kmp_is_queuing_lock_initialized ); 3885 3886 __kmp_get_user_lock_location_ = 3887 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3888 ( &__kmp_get_queuing_lock_location ); 3889 3890 __kmp_set_user_lock_location_ = 3891 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3892 ( &__kmp_set_queuing_lock_location ); 3893 3894 __kmp_get_user_lock_flags_ = 3895 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3896 ( &__kmp_get_queuing_lock_flags ); 3897 3898 __kmp_set_user_lock_flags_ = 3899 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3900 ( &__kmp_set_queuing_lock_flags ); 3901 3902 } 3903 break; 3904 #endif // KMP_USE_ADAPTIVE_LOCKS 3905 3906 case lk_drdpa: { 3907 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3908 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3909 3910 __kmp_get_user_lock_owner_ = 3911 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3912 ( &__kmp_get_drdpa_lock_owner ); 3913 3914 if ( __kmp_env_consistency_check ) { 3915 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3916 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3917 } 3918 else { 3919 KMP_BIND_USER_LOCK(drdpa); 3920 KMP_BIND_NESTED_USER_LOCK(drdpa); 3921 } 3922 3923 __kmp_destroy_user_lock_ = 3924 ( void ( * )( kmp_user_lock_p ) ) 3925 ( &__kmp_destroy_drdpa_lock ); 3926 3927 __kmp_is_user_lock_initialized_ = 3928 ( int ( * )( kmp_user_lock_p ) ) 3929 ( &__kmp_is_drdpa_lock_initialized ); 3930 3931 __kmp_get_user_lock_location_ = 3932 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3933 ( &__kmp_get_drdpa_lock_location ); 3934 3935 __kmp_set_user_lock_location_ = 3936 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3937 ( &__kmp_set_drdpa_lock_location ); 3938 3939 __kmp_get_user_lock_flags_ = 3940 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3941 ( &__kmp_get_drdpa_lock_flags ); 3942 3943 __kmp_set_user_lock_flags_ = 3944 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3945 ( &__kmp_set_drdpa_lock_flags ); 3946 } 3947 break; 3948 } 3949 } 3950 3951 3952 // ---------------------------------------------------------------------------- 3953 // User lock table & lock allocation 3954 3955 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3956 kmp_user_lock_p __kmp_lock_pool = NULL; 3957 3958 // Lock block-allocation support. 3959 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3960 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3961 3962 static kmp_lock_index_t 3963 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3964 { 3965 // Assume that kmp_global_lock is held upon entry/exit. 3966 kmp_lock_index_t index; 3967 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3968 kmp_lock_index_t size; 3969 kmp_user_lock_p *table; 3970 // Reallocate lock table. 3971 if ( __kmp_user_lock_table.allocated == 0 ) { 3972 size = 1024; 3973 } 3974 else { 3975 size = __kmp_user_lock_table.allocated * 2; 3976 } 3977 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3978 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3979 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3980 // We cannot free the previous table now, since it may be in use by other 3981 // threads. So save the pointer to the previous table in in the first element of the 3982 // new table. All the tables will be organized into a list, and could be freed when 3983 // library shutting down. 3984 __kmp_user_lock_table.table = table; 3985 __kmp_user_lock_table.allocated = size; 3986 } 3987 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3988 index = __kmp_user_lock_table.used; 3989 __kmp_user_lock_table.table[ index ] = lck; 3990 ++ __kmp_user_lock_table.used; 3991 return index; 3992 } 3993 3994 static kmp_user_lock_p 3995 __kmp_lock_block_allocate() 3996 { 3997 // Assume that kmp_global_lock is held upon entry/exit. 3998 static int last_index = 0; 3999 if ( ( last_index >= __kmp_num_locks_in_block ) 4000 || ( __kmp_lock_blocks == NULL ) ) { 4001 // Restart the index. 4002 last_index = 0; 4003 // Need to allocate a new block. 4004 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4005 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 4006 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 4007 // Set up the new block. 4008 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 4009 new_block->next_block = __kmp_lock_blocks; 4010 new_block->locks = (void *)buffer; 4011 // Publish the new block. 4012 KMP_MB(); 4013 __kmp_lock_blocks = new_block; 4014 } 4015 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 4016 [ last_index * __kmp_user_lock_size ] ) ); 4017 last_index++; 4018 return ret; 4019 } 4020 4021 // 4022 // Get memory for a lock. It may be freshly allocated memory or reused memory 4023 // from lock pool. 4024 // 4025 kmp_user_lock_p 4026 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 4027 kmp_lock_flags_t flags ) 4028 { 4029 kmp_user_lock_p lck; 4030 kmp_lock_index_t index; 4031 KMP_DEBUG_ASSERT( user_lock ); 4032 4033 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 4034 4035 if ( __kmp_lock_pool == NULL ) { 4036 // Lock pool is empty. Allocate new memory. 4037 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 4038 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 4039 } 4040 else { 4041 lck = __kmp_lock_block_allocate(); 4042 } 4043 4044 // Insert lock in the table so that it can be freed in __kmp_cleanup, 4045 // and debugger has info on all allocated locks. 4046 index = __kmp_lock_table_insert( lck ); 4047 } 4048 else { 4049 // Pick up lock from pool. 4050 lck = __kmp_lock_pool; 4051 index = __kmp_lock_pool->pool.index; 4052 __kmp_lock_pool = __kmp_lock_pool->pool.next; 4053 } 4054 4055 // 4056 // We could potentially differentiate between nested and regular locks 4057 // here, and do the lock table lookup for regular locks only. 4058 // 4059 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4060 * ( (kmp_lock_index_t *) user_lock ) = index; 4061 } 4062 else { 4063 * ( (kmp_user_lock_p *) user_lock ) = lck; 4064 } 4065 4066 // mark the lock if it is critical section lock. 4067 __kmp_set_user_lock_flags( lck, flags ); 4068 4069 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 4070 4071 return lck; 4072 } 4073 4074 // Put lock's memory to pool for reusing. 4075 void 4076 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 4077 { 4078 KMP_DEBUG_ASSERT( user_lock != NULL ); 4079 KMP_DEBUG_ASSERT( lck != NULL ); 4080 4081 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 4082 4083 lck->pool.next = __kmp_lock_pool; 4084 __kmp_lock_pool = lck; 4085 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4086 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 4087 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 4088 lck->pool.index = index; 4089 } 4090 4091 __kmp_release_lock( & __kmp_global_lock, gtid ); 4092 } 4093 4094 kmp_user_lock_p 4095 __kmp_lookup_user_lock( void **user_lock, char const *func ) 4096 { 4097 kmp_user_lock_p lck = NULL; 4098 4099 if ( __kmp_env_consistency_check ) { 4100 if ( user_lock == NULL ) { 4101 KMP_FATAL( LockIsUninitialized, func ); 4102 } 4103 } 4104 4105 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4106 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 4107 if ( __kmp_env_consistency_check ) { 4108 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 4109 KMP_FATAL( LockIsUninitialized, func ); 4110 } 4111 } 4112 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 4113 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4114 lck = __kmp_user_lock_table.table[index]; 4115 } 4116 else { 4117 lck = *( (kmp_user_lock_p *)user_lock ); 4118 } 4119 4120 if ( __kmp_env_consistency_check ) { 4121 if ( lck == NULL ) { 4122 KMP_FATAL( LockIsUninitialized, func ); 4123 } 4124 } 4125 4126 return lck; 4127 } 4128 4129 void 4130 __kmp_cleanup_user_locks( void ) 4131 { 4132 // 4133 // Reset lock pool. Do not worry about lock in the pool -- we will free 4134 // them when iterating through lock table (it includes all the locks, 4135 // dead or alive). 4136 // 4137 __kmp_lock_pool = NULL; 4138 4139 #define IS_CRITICAL(lck) \ 4140 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 4141 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 4142 4143 // 4144 // Loop through lock table, free all locks. 4145 // 4146 // Do not free item [0], it is reserved for lock tables list. 4147 // 4148 // FIXME - we are iterating through a list of (pointers to) objects of 4149 // type union kmp_user_lock, but we have no way of knowing whether the 4150 // base type is currently "pool" or whatever the global user lock type 4151 // is. 4152 // 4153 // We are relying on the fact that for all of the user lock types 4154 // (except "tas"), the first field in the lock struct is the "initialized" 4155 // field, which is set to the address of the lock object itself when 4156 // the lock is initialized. When the union is of type "pool", the 4157 // first field is a pointer to the next object in the free list, which 4158 // will not be the same address as the object itself. 4159 // 4160 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4161 // will fail for "pool" objects on the free list. This must happen as 4162 // the "location" field of real user locks overlaps the "index" field 4163 // of "pool" objects. 4164 // 4165 // It would be better to run through the free list, and remove all "pool" 4166 // objects from the lock table before executing this loop. However, 4167 // "pool" objects do not always have their index field set (only on 4168 // lin_32e), and I don't want to search the lock table for the address 4169 // of every "pool" object on the free list. 4170 // 4171 while ( __kmp_user_lock_table.used > 1 ) { 4172 const ident *loc; 4173 4174 // 4175 // reduce __kmp_user_lock_table.used before freeing the lock, 4176 // so that state of locks is consistent 4177 // 4178 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4179 --__kmp_user_lock_table.used ]; 4180 4181 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4182 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4183 // 4184 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4185 // initialized AND it is NOT a critical section (user is not 4186 // responsible for destroying criticals) AND we know source 4187 // location to report. 4188 // 4189 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4190 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4191 ( loc->psource != NULL ) ) { 4192 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4193 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4194 __kmp_str_loc_free( &str_loc); 4195 } 4196 4197 #ifdef KMP_DEBUG 4198 if ( IS_CRITICAL( lck ) ) { 4199 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4200 } 4201 else { 4202 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4203 } 4204 #endif // KMP_DEBUG 4205 4206 // 4207 // Cleanup internal lock dynamic resources 4208 // (for drdpa locks particularly). 4209 // 4210 __kmp_destroy_user_lock( lck ); 4211 } 4212 4213 // 4214 // Free the lock if block allocation of locks is not used. 4215 // 4216 if ( __kmp_lock_blocks == NULL ) { 4217 __kmp_free( lck ); 4218 } 4219 } 4220 4221 #undef IS_CRITICAL 4222 4223 // 4224 // delete lock table(s). 4225 // 4226 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4227 __kmp_user_lock_table.table = NULL; 4228 __kmp_user_lock_table.allocated = 0; 4229 4230 while ( table_ptr != NULL ) { 4231 // 4232 // In the first element we saved the pointer to the previous 4233 // (smaller) lock table. 4234 // 4235 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4236 __kmp_free( table_ptr ); 4237 table_ptr = next; 4238 } 4239 4240 // 4241 // Free buffers allocated for blocks of locks. 4242 // 4243 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4244 __kmp_lock_blocks = NULL; 4245 4246 while ( block_ptr != NULL ) { 4247 kmp_block_of_locks_t *next = block_ptr->next_block; 4248 __kmp_free( block_ptr->locks ); 4249 // 4250 // *block_ptr itself was allocated at the end of the locks vector. 4251 // 4252 block_ptr = next; 4253 } 4254 4255 TCW_4(__kmp_init_user_locks, FALSE); 4256 } 4257 4258 #endif // KMP_USE_DYNAMIC_LOCK 4259