1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 18 #include "kmp.h" 19 #include "kmp_itt.h" 20 #include "kmp_i18n.h" 21 #include "kmp_lock.h" 22 #include "kmp_io.h" 23 24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 25 # include <unistd.h> 26 # include <sys/syscall.h> 27 // We should really include <futex.h>, but that causes compatibility problems on different 28 // Linux* OS distributions that either require that you include (or break when you try to include) 29 // <pci/types.h>. 30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 31 // we just define the constants here and don't include <futex.h> 32 # ifndef FUTEX_WAIT 33 # define FUTEX_WAIT 0 34 # endif 35 # ifndef FUTEX_WAKE 36 # define FUTEX_WAKE 1 37 # endif 38 #endif 39 40 /* Implement spin locks for internal library use. */ 41 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 42 43 void 44 __kmp_validate_locks( void ) 45 { 46 int i; 47 kmp_uint32 x, y; 48 49 /* Check to make sure unsigned arithmetic does wraps properly */ 50 x = ~((kmp_uint32) 0) - 2; 51 y = x - 2; 52 53 for (i = 0; i < 8; ++i, ++x, ++y) { 54 kmp_uint32 z = (x - y); 55 KMP_ASSERT( z == 2 ); 56 } 57 58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 59 } 60 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // 66 // For the non-nested locks, we can only assume that the first 4 bytes were 67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 68 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 70 // 71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 73 // 74 75 static kmp_int32 76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 77 { 78 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 79 } 80 81 static inline bool 82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 83 { 84 return lck->lk.depth_locked != -1; 85 } 86 87 __forceinline static void 88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 89 { 90 KMP_MB(); 91 92 #ifdef USE_LOCK_PROFILE 93 kmp_uint32 curr = TCR_4( lck->lk.poll ); 94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 95 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 96 /* else __kmp_printf( "." );*/ 97 #endif /* USE_LOCK_PROFILE */ 98 99 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 101 KMP_FSYNC_ACQUIRED(lck); 102 return; 103 } 104 105 kmp_uint32 spins; 106 KMP_FSYNC_PREPARE( lck ); 107 KMP_INIT_YIELD( spins ); 108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 109 __kmp_xproc ) ) { 110 KMP_YIELD( TRUE ); 111 } 112 else { 113 KMP_YIELD_SPIN( spins ); 114 } 115 116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) || 117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) { 118 // 119 // FIXME - use exponential backoff here 120 // 121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 122 __kmp_xproc ) ) { 123 KMP_YIELD( TRUE ); 124 } 125 else { 126 KMP_YIELD_SPIN( spins ); 127 } 128 } 129 KMP_FSYNC_ACQUIRED( lck ); 130 } 131 132 void 133 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 134 { 135 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 136 } 137 138 static void 139 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 140 { 141 char const * const func = "omp_set_lock"; 142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 143 && __kmp_is_tas_lock_nestable( lck ) ) { 144 KMP_FATAL( LockNestableUsedAsSimple, func ); 145 } 146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 147 KMP_FATAL( LockIsAlreadyOwned, func ); 148 } 149 __kmp_acquire_tas_lock( lck, gtid ); 150 } 151 152 int 153 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 154 { 155 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 157 KMP_FSYNC_ACQUIRED( lck ); 158 return TRUE; 159 } 160 return FALSE; 161 } 162 163 static int 164 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 165 { 166 char const * const func = "omp_test_lock"; 167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 168 && __kmp_is_tas_lock_nestable( lck ) ) { 169 KMP_FATAL( LockNestableUsedAsSimple, func ); 170 } 171 return __kmp_test_tas_lock( lck, gtid ); 172 } 173 174 int 175 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 176 { 177 KMP_MB(); /* Flush all pending memory write invalidates. */ 178 179 KMP_FSYNC_RELEASING(lck); 180 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) ); 181 KMP_MB(); /* Flush all pending memory write invalidates. */ 182 183 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 184 __kmp_xproc ) ); 185 return KMP_LOCK_RELEASED; 186 } 187 188 static int 189 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 190 { 191 char const * const func = "omp_unset_lock"; 192 KMP_MB(); /* in case another processor initialized lock */ 193 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 194 && __kmp_is_tas_lock_nestable( lck ) ) { 195 KMP_FATAL( LockNestableUsedAsSimple, func ); 196 } 197 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 198 KMP_FATAL( LockUnsettingFree, func ); 199 } 200 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 201 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 202 KMP_FATAL( LockUnsettingSetByAnother, func ); 203 } 204 return __kmp_release_tas_lock( lck, gtid ); 205 } 206 207 void 208 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 209 { 210 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) ); 211 } 212 213 static void 214 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 215 { 216 __kmp_init_tas_lock( lck ); 217 } 218 219 void 220 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 221 { 222 lck->lk.poll = 0; 223 } 224 225 static void 226 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 227 { 228 char const * const func = "omp_destroy_lock"; 229 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 230 && __kmp_is_tas_lock_nestable( lck ) ) { 231 KMP_FATAL( LockNestableUsedAsSimple, func ); 232 } 233 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 234 KMP_FATAL( LockStillOwned, func ); 235 } 236 __kmp_destroy_tas_lock( lck ); 237 } 238 239 240 // 241 // nested test and set locks 242 // 243 244 void 245 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 246 { 247 KMP_DEBUG_ASSERT( gtid >= 0 ); 248 249 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 250 lck->lk.depth_locked += 1; 251 } 252 else { 253 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 254 lck->lk.depth_locked = 1; 255 } 256 } 257 258 static void 259 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 260 { 261 char const * const func = "omp_set_nest_lock"; 262 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 263 KMP_FATAL( LockSimpleUsedAsNestable, func ); 264 } 265 __kmp_acquire_nested_tas_lock( lck, gtid ); 266 } 267 268 int 269 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 270 { 271 int retval; 272 273 KMP_DEBUG_ASSERT( gtid >= 0 ); 274 275 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 276 retval = ++lck->lk.depth_locked; 277 } 278 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 279 retval = 0; 280 } 281 else { 282 KMP_MB(); 283 retval = lck->lk.depth_locked = 1; 284 } 285 return retval; 286 } 287 288 static int 289 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 290 { 291 char const * const func = "omp_test_nest_lock"; 292 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 293 KMP_FATAL( LockSimpleUsedAsNestable, func ); 294 } 295 return __kmp_test_nested_tas_lock( lck, gtid ); 296 } 297 298 int 299 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 300 { 301 KMP_DEBUG_ASSERT( gtid >= 0 ); 302 303 KMP_MB(); 304 if ( --(lck->lk.depth_locked) == 0 ) { 305 __kmp_release_tas_lock( lck, gtid ); 306 return KMP_LOCK_RELEASED; 307 } 308 return KMP_LOCK_STILL_HELD; 309 } 310 311 static int 312 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 313 { 314 char const * const func = "omp_unset_nest_lock"; 315 KMP_MB(); /* in case another processor initialized lock */ 316 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 317 KMP_FATAL( LockSimpleUsedAsNestable, func ); 318 } 319 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 320 KMP_FATAL( LockUnsettingFree, func ); 321 } 322 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 323 KMP_FATAL( LockUnsettingSetByAnother, func ); 324 } 325 return __kmp_release_nested_tas_lock( lck, gtid ); 326 } 327 328 void 329 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 330 { 331 __kmp_init_tas_lock( lck ); 332 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 333 } 334 335 static void 336 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 337 { 338 __kmp_init_nested_tas_lock( lck ); 339 } 340 341 void 342 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 343 { 344 __kmp_destroy_tas_lock( lck ); 345 lck->lk.depth_locked = 0; 346 } 347 348 static void 349 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 350 { 351 char const * const func = "omp_destroy_nest_lock"; 352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 353 KMP_FATAL( LockSimpleUsedAsNestable, func ); 354 } 355 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 356 KMP_FATAL( LockStillOwned, func ); 357 } 358 __kmp_destroy_nested_tas_lock( lck ); 359 } 360 361 362 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 363 364 /* ------------------------------------------------------------------------ */ 365 /* futex locks */ 366 367 // futex locks are really just test and set locks, with a different method 368 // of handling contention. They take the same amount of space as test and 369 // set locks, and are allocated the same way (i.e. use the area allocated by 370 // the compiler for non-nested locks / allocate nested locks on the heap). 371 372 static kmp_int32 373 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 374 { 375 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 376 } 377 378 static inline bool 379 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 380 { 381 return lck->lk.depth_locked != -1; 382 } 383 384 __forceinline static void 385 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 386 { 387 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 388 389 KMP_MB(); 390 391 #ifdef USE_LOCK_PROFILE 392 kmp_uint32 curr = TCR_4( lck->lk.poll ); 393 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 394 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 395 /* else __kmp_printf( "." );*/ 396 #endif /* USE_LOCK_PROFILE */ 397 398 KMP_FSYNC_PREPARE( lck ); 399 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 400 lck, lck->lk.poll, gtid ) ); 401 402 kmp_int32 poll_val; 403 404 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), 405 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) { 406 407 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; 408 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 409 lck, gtid, poll_val, cond ) ); 410 411 // 412 // NOTE: if you try to use the following condition for this branch 413 // 414 // if ( poll_val & 1 == 0 ) 415 // 416 // Then the 12.0 compiler has a bug where the following block will 417 // always be skipped, regardless of the value of the LSB of poll_val. 418 // 419 if ( ! cond ) { 420 // 421 // Try to set the lsb in the poll to indicate to the owner 422 // thread that they need to wake this thread up. 423 // 424 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) { 425 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 426 lck, lck->lk.poll, gtid ) ); 427 continue; 428 } 429 poll_val |= DYNA_LOCK_BUSY(1, futex); 430 431 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 432 lck, lck->lk.poll, gtid ) ); 433 } 434 435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 436 lck, gtid, poll_val ) ); 437 438 kmp_int32 rc; 439 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 440 poll_val, NULL, NULL, 0 ) ) != 0 ) { 441 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 442 lck, gtid, poll_val, rc, errno ) ); 443 continue; 444 } 445 446 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 447 lck, gtid, poll_val ) ); 448 // 449 // This thread has now done a successful futex wait call and was 450 // entered on the OS futex queue. We must now perform a futex 451 // wake call when releasing the lock, as we have no idea how many 452 // other threads are in the queue. 453 // 454 gtid_code |= 1; 455 } 456 457 KMP_FSYNC_ACQUIRED( lck ); 458 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 459 lck, lck->lk.poll, gtid ) ); 460 } 461 462 void 463 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 464 { 465 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 466 } 467 468 static void 469 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 470 { 471 char const * const func = "omp_set_lock"; 472 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 473 && __kmp_is_futex_lock_nestable( lck ) ) { 474 KMP_FATAL( LockNestableUsedAsSimple, func ); 475 } 476 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 477 KMP_FATAL( LockIsAlreadyOwned, func ); 478 } 479 __kmp_acquire_futex_lock( lck, gtid ); 480 } 481 482 int 483 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 484 { 485 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) { 486 KMP_FSYNC_ACQUIRED( lck ); 487 return TRUE; 488 } 489 return FALSE; 490 } 491 492 static int 493 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 494 { 495 char const * const func = "omp_test_lock"; 496 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 497 && __kmp_is_futex_lock_nestable( lck ) ) { 498 KMP_FATAL( LockNestableUsedAsSimple, func ); 499 } 500 return __kmp_test_futex_lock( lck, gtid ); 501 } 502 503 int 504 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 505 { 506 KMP_MB(); /* Flush all pending memory write invalidates. */ 507 508 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 509 lck, lck->lk.poll, gtid ) ); 510 511 KMP_FSYNC_RELEASING(lck); 512 513 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) ); 514 515 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 516 lck, gtid, poll_val ) ); 517 518 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) { 519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 520 lck, gtid ) ); 521 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 522 } 523 524 KMP_MB(); /* Flush all pending memory write invalidates. */ 525 526 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 527 lck, lck->lk.poll, gtid ) ); 528 529 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 530 __kmp_xproc ) ); 531 return KMP_LOCK_RELEASED; 532 } 533 534 static int 535 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 536 { 537 char const * const func = "omp_unset_lock"; 538 KMP_MB(); /* in case another processor initialized lock */ 539 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 540 && __kmp_is_futex_lock_nestable( lck ) ) { 541 KMP_FATAL( LockNestableUsedAsSimple, func ); 542 } 543 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 544 KMP_FATAL( LockUnsettingFree, func ); 545 } 546 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 547 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 548 KMP_FATAL( LockUnsettingSetByAnother, func ); 549 } 550 return __kmp_release_futex_lock( lck, gtid ); 551 } 552 553 void 554 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 555 { 556 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) ); 557 } 558 559 static void 560 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 561 { 562 __kmp_init_futex_lock( lck ); 563 } 564 565 void 566 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 567 { 568 lck->lk.poll = 0; 569 } 570 571 static void 572 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 573 { 574 char const * const func = "omp_destroy_lock"; 575 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 576 && __kmp_is_futex_lock_nestable( lck ) ) { 577 KMP_FATAL( LockNestableUsedAsSimple, func ); 578 } 579 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 580 KMP_FATAL( LockStillOwned, func ); 581 } 582 __kmp_destroy_futex_lock( lck ); 583 } 584 585 586 // 587 // nested futex locks 588 // 589 590 void 591 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 592 { 593 KMP_DEBUG_ASSERT( gtid >= 0 ); 594 595 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 596 lck->lk.depth_locked += 1; 597 } 598 else { 599 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 600 lck->lk.depth_locked = 1; 601 } 602 } 603 604 static void 605 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 606 { 607 char const * const func = "omp_set_nest_lock"; 608 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 609 KMP_FATAL( LockSimpleUsedAsNestable, func ); 610 } 611 __kmp_acquire_nested_futex_lock( lck, gtid ); 612 } 613 614 int 615 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 616 { 617 int retval; 618 619 KMP_DEBUG_ASSERT( gtid >= 0 ); 620 621 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 622 retval = ++lck->lk.depth_locked; 623 } 624 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 625 retval = 0; 626 } 627 else { 628 KMP_MB(); 629 retval = lck->lk.depth_locked = 1; 630 } 631 return retval; 632 } 633 634 static int 635 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 636 { 637 char const * const func = "omp_test_nest_lock"; 638 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 639 KMP_FATAL( LockSimpleUsedAsNestable, func ); 640 } 641 return __kmp_test_nested_futex_lock( lck, gtid ); 642 } 643 644 int 645 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 646 { 647 KMP_DEBUG_ASSERT( gtid >= 0 ); 648 649 KMP_MB(); 650 if ( --(lck->lk.depth_locked) == 0 ) { 651 __kmp_release_futex_lock( lck, gtid ); 652 return KMP_LOCK_RELEASED; 653 } 654 return KMP_LOCK_STILL_HELD; 655 } 656 657 static int 658 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 659 { 660 char const * const func = "omp_unset_nest_lock"; 661 KMP_MB(); /* in case another processor initialized lock */ 662 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 663 KMP_FATAL( LockSimpleUsedAsNestable, func ); 664 } 665 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 666 KMP_FATAL( LockUnsettingFree, func ); 667 } 668 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 669 KMP_FATAL( LockUnsettingSetByAnother, func ); 670 } 671 return __kmp_release_nested_futex_lock( lck, gtid ); 672 } 673 674 void 675 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 676 { 677 __kmp_init_futex_lock( lck ); 678 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 679 } 680 681 static void 682 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 683 { 684 __kmp_init_nested_futex_lock( lck ); 685 } 686 687 void 688 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 689 { 690 __kmp_destroy_futex_lock( lck ); 691 lck->lk.depth_locked = 0; 692 } 693 694 static void 695 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 696 { 697 char const * const func = "omp_destroy_nest_lock"; 698 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 699 KMP_FATAL( LockSimpleUsedAsNestable, func ); 700 } 701 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 702 KMP_FATAL( LockStillOwned, func ); 703 } 704 __kmp_destroy_nested_futex_lock( lck ); 705 } 706 707 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 708 709 710 /* ------------------------------------------------------------------------ */ 711 /* ticket (bakery) locks */ 712 713 static kmp_int32 714 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 715 { 716 return TCR_4( lck->lk.owner_id ) - 1; 717 } 718 719 static inline bool 720 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 721 { 722 return lck->lk.depth_locked != -1; 723 } 724 725 static kmp_uint32 726 __kmp_bakery_check(kmp_uint value, kmp_uint checker) 727 { 728 register kmp_uint32 pause; 729 730 if (value == checker) { 731 return TRUE; 732 } 733 for (pause = checker - value; pause != 0; --pause); 734 return FALSE; 735 } 736 737 __forceinline static void 738 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 739 { 740 kmp_uint32 my_ticket; 741 KMP_MB(); 742 743 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 744 745 #ifdef USE_LOCK_PROFILE 746 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 747 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 748 /* else __kmp_printf( "." );*/ 749 #endif /* USE_LOCK_PROFILE */ 750 751 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 752 KMP_FSYNC_ACQUIRED(lck); 753 return; 754 } 755 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 756 KMP_FSYNC_ACQUIRED(lck); 757 } 758 759 void 760 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 761 { 762 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 763 } 764 765 static void 766 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 767 { 768 char const * const func = "omp_set_lock"; 769 if ( lck->lk.initialized != lck ) { 770 KMP_FATAL( LockIsUninitialized, func ); 771 } 772 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 773 KMP_FATAL( LockNestableUsedAsSimple, func ); 774 } 775 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 776 KMP_FATAL( LockIsAlreadyOwned, func ); 777 } 778 779 __kmp_acquire_ticket_lock( lck, gtid ); 780 781 lck->lk.owner_id = gtid + 1; 782 } 783 784 int 785 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 786 { 787 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 788 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 789 kmp_uint32 next_ticket = my_ticket + 1; 790 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 791 my_ticket, next_ticket ) ) { 792 KMP_FSYNC_ACQUIRED( lck ); 793 return TRUE; 794 } 795 } 796 return FALSE; 797 } 798 799 static int 800 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 801 { 802 char const * const func = "omp_test_lock"; 803 if ( lck->lk.initialized != lck ) { 804 KMP_FATAL( LockIsUninitialized, func ); 805 } 806 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 807 KMP_FATAL( LockNestableUsedAsSimple, func ); 808 } 809 810 int retval = __kmp_test_ticket_lock( lck, gtid ); 811 812 if ( retval ) { 813 lck->lk.owner_id = gtid + 1; 814 } 815 return retval; 816 } 817 818 int 819 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 820 { 821 kmp_uint32 distance; 822 823 KMP_MB(); /* Flush all pending memory write invalidates. */ 824 825 KMP_FSYNC_RELEASING(lck); 826 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 827 828 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 829 830 KMP_MB(); /* Flush all pending memory write invalidates. */ 831 832 KMP_YIELD( distance 833 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 834 return KMP_LOCK_RELEASED; 835 } 836 837 static int 838 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 839 { 840 char const * const func = "omp_unset_lock"; 841 KMP_MB(); /* in case another processor initialized lock */ 842 if ( lck->lk.initialized != lck ) { 843 KMP_FATAL( LockIsUninitialized, func ); 844 } 845 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 846 KMP_FATAL( LockNestableUsedAsSimple, func ); 847 } 848 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 849 KMP_FATAL( LockUnsettingFree, func ); 850 } 851 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 852 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 853 KMP_FATAL( LockUnsettingSetByAnother, func ); 854 } 855 lck->lk.owner_id = 0; 856 return __kmp_release_ticket_lock( lck, gtid ); 857 } 858 859 void 860 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 861 { 862 lck->lk.location = NULL; 863 TCW_4( lck->lk.next_ticket, 0 ); 864 TCW_4( lck->lk.now_serving, 0 ); 865 lck->lk.owner_id = 0; // no thread owns the lock. 866 lck->lk.depth_locked = -1; // -1 => not a nested lock. 867 lck->lk.initialized = (kmp_ticket_lock *)lck; 868 } 869 870 static void 871 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 872 { 873 __kmp_init_ticket_lock( lck ); 874 } 875 876 void 877 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 878 { 879 lck->lk.initialized = NULL; 880 lck->lk.location = NULL; 881 lck->lk.next_ticket = 0; 882 lck->lk.now_serving = 0; 883 lck->lk.owner_id = 0; 884 lck->lk.depth_locked = -1; 885 } 886 887 static void 888 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 889 { 890 char const * const func = "omp_destroy_lock"; 891 if ( lck->lk.initialized != lck ) { 892 KMP_FATAL( LockIsUninitialized, func ); 893 } 894 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 895 KMP_FATAL( LockNestableUsedAsSimple, func ); 896 } 897 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 898 KMP_FATAL( LockStillOwned, func ); 899 } 900 __kmp_destroy_ticket_lock( lck ); 901 } 902 903 904 // 905 // nested ticket locks 906 // 907 908 void 909 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 910 { 911 KMP_DEBUG_ASSERT( gtid >= 0 ); 912 913 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 914 lck->lk.depth_locked += 1; 915 } 916 else { 917 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 918 KMP_MB(); 919 lck->lk.depth_locked = 1; 920 KMP_MB(); 921 lck->lk.owner_id = gtid + 1; 922 } 923 } 924 925 static void 926 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 927 { 928 char const * const func = "omp_set_nest_lock"; 929 if ( lck->lk.initialized != lck ) { 930 KMP_FATAL( LockIsUninitialized, func ); 931 } 932 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 933 KMP_FATAL( LockSimpleUsedAsNestable, func ); 934 } 935 __kmp_acquire_nested_ticket_lock( lck, gtid ); 936 } 937 938 int 939 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 940 { 941 int retval; 942 943 KMP_DEBUG_ASSERT( gtid >= 0 ); 944 945 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 946 retval = ++lck->lk.depth_locked; 947 } 948 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 949 retval = 0; 950 } 951 else { 952 KMP_MB(); 953 retval = lck->lk.depth_locked = 1; 954 KMP_MB(); 955 lck->lk.owner_id = gtid + 1; 956 } 957 return retval; 958 } 959 960 static int 961 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 962 kmp_int32 gtid ) 963 { 964 char const * const func = "omp_test_nest_lock"; 965 if ( lck->lk.initialized != lck ) { 966 KMP_FATAL( LockIsUninitialized, func ); 967 } 968 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 969 KMP_FATAL( LockSimpleUsedAsNestable, func ); 970 } 971 return __kmp_test_nested_ticket_lock( lck, gtid ); 972 } 973 974 int 975 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 976 { 977 KMP_DEBUG_ASSERT( gtid >= 0 ); 978 979 KMP_MB(); 980 if ( --(lck->lk.depth_locked) == 0 ) { 981 KMP_MB(); 982 lck->lk.owner_id = 0; 983 __kmp_release_ticket_lock( lck, gtid ); 984 return KMP_LOCK_RELEASED; 985 } 986 return KMP_LOCK_STILL_HELD; 987 } 988 989 static int 990 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 991 { 992 char const * const func = "omp_unset_nest_lock"; 993 KMP_MB(); /* in case another processor initialized lock */ 994 if ( lck->lk.initialized != lck ) { 995 KMP_FATAL( LockIsUninitialized, func ); 996 } 997 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 998 KMP_FATAL( LockSimpleUsedAsNestable, func ); 999 } 1000 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1001 KMP_FATAL( LockUnsettingFree, func ); 1002 } 1003 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1004 KMP_FATAL( LockUnsettingSetByAnother, func ); 1005 } 1006 return __kmp_release_nested_ticket_lock( lck, gtid ); 1007 } 1008 1009 void 1010 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1011 { 1012 __kmp_init_ticket_lock( lck ); 1013 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1014 } 1015 1016 static void 1017 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1018 { 1019 __kmp_init_nested_ticket_lock( lck ); 1020 } 1021 1022 void 1023 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1024 { 1025 __kmp_destroy_ticket_lock( lck ); 1026 lck->lk.depth_locked = 0; 1027 } 1028 1029 static void 1030 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1031 { 1032 char const * const func = "omp_destroy_nest_lock"; 1033 if ( lck->lk.initialized != lck ) { 1034 KMP_FATAL( LockIsUninitialized, func ); 1035 } 1036 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1037 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1038 } 1039 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1040 KMP_FATAL( LockStillOwned, func ); 1041 } 1042 __kmp_destroy_nested_ticket_lock( lck ); 1043 } 1044 1045 1046 // 1047 // access functions to fields which don't exist for all lock kinds. 1048 // 1049 1050 static int 1051 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1052 { 1053 return lck == lck->lk.initialized; 1054 } 1055 1056 static const ident_t * 1057 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1058 { 1059 return lck->lk.location; 1060 } 1061 1062 static void 1063 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1064 { 1065 lck->lk.location = loc; 1066 } 1067 1068 static kmp_lock_flags_t 1069 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1070 { 1071 return lck->lk.flags; 1072 } 1073 1074 static void 1075 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1076 { 1077 lck->lk.flags = flags; 1078 } 1079 1080 /* ------------------------------------------------------------------------ */ 1081 /* queuing locks */ 1082 1083 /* 1084 * First the states 1085 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1086 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1087 * h, h means lock is held or about to transition, 1 element on queue 1088 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1089 * 1090 * Now the transitions 1091 * Acquire(0,0) = -1 ,0 1092 * Release(0,0) = Error 1093 * Acquire(-1,0) = h ,h h > 0 1094 * Release(-1,0) = 0 ,0 1095 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1096 * Release(h,h) = -1 ,0 h > 0 1097 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1098 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1099 * 1100 * And pictorially 1101 * 1102 * 1103 * +-----+ 1104 * | 0, 0|------- release -------> Error 1105 * +-----+ 1106 * | ^ 1107 * acquire| |release 1108 * | | 1109 * | | 1110 * v | 1111 * +-----+ 1112 * |-1, 0| 1113 * +-----+ 1114 * | ^ 1115 * acquire| |release 1116 * | | 1117 * | | 1118 * v | 1119 * +-----+ 1120 * | h, h| 1121 * +-----+ 1122 * | ^ 1123 * acquire| |release 1124 * | | 1125 * | | 1126 * v | 1127 * +-----+ 1128 * | h, t|----- acquire, release loopback ---+ 1129 * +-----+ | 1130 * ^ | 1131 * | | 1132 * +------------------------------------+ 1133 * 1134 */ 1135 1136 #ifdef DEBUG_QUEUING_LOCKS 1137 1138 /* Stuff for circular trace buffer */ 1139 #define TRACE_BUF_ELE 1024 1140 static char traces[TRACE_BUF_ELE][128] = { 0 } 1141 static int tc = 0; 1142 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); 1143 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 1144 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 1145 1146 static void 1147 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1148 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1149 { 1150 kmp_int32 t, i; 1151 1152 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1153 1154 i = tc % TRACE_BUF_ELE; 1155 __kmp_printf_no_lock( "%s\n", traces[i] ); 1156 i = (i+1) % TRACE_BUF_ELE; 1157 while ( i != (tc % TRACE_BUF_ELE) ) { 1158 __kmp_printf_no_lock( "%s", traces[i] ); 1159 i = (i+1) % TRACE_BUF_ELE; 1160 } 1161 __kmp_printf_no_lock( "\n" ); 1162 1163 __kmp_printf_no_lock( 1164 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1165 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1166 head_id, tail_id ); 1167 1168 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1169 1170 if ( lck->lk.head_id >= 1 ) { 1171 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1172 while (t > 0) { 1173 __kmp_printf_no_lock( "-> %d ", t ); 1174 t = __kmp_threads[t-1]->th.th_next_waiting; 1175 } 1176 } 1177 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1178 __kmp_printf_no_lock( "\n\n" ); 1179 } 1180 1181 #endif /* DEBUG_QUEUING_LOCKS */ 1182 1183 static kmp_int32 1184 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1185 { 1186 return TCR_4( lck->lk.owner_id ) - 1; 1187 } 1188 1189 static inline bool 1190 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1191 { 1192 return lck->lk.depth_locked != -1; 1193 } 1194 1195 /* Acquire a lock using a the queuing lock implementation */ 1196 template <bool takeTime> 1197 /* [TLW] The unused template above is left behind because of what BEB believes is a 1198 potential compiler problem with __forceinline. */ 1199 __forceinline static void 1200 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1201 kmp_int32 gtid ) 1202 { 1203 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1204 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1205 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1206 volatile kmp_uint32 *spin_here_p; 1207 kmp_int32 need_mf = 1; 1208 1209 #if OMPT_SUPPORT 1210 ompt_state_t prev_state = ompt_state_undefined; 1211 #endif 1212 1213 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1214 1215 KMP_FSYNC_PREPARE( lck ); 1216 KMP_DEBUG_ASSERT( this_thr != NULL ); 1217 spin_here_p = & this_thr->th.th_spin_here; 1218 1219 #ifdef DEBUG_QUEUING_LOCKS 1220 TRACE_LOCK( gtid+1, "acq ent" ); 1221 if ( *spin_here_p ) 1222 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1223 if ( this_thr->th.th_next_waiting != 0 ) 1224 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1225 #endif 1226 KMP_DEBUG_ASSERT( !*spin_here_p ); 1227 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1228 1229 1230 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1231 that may follow, not just in execution order, but also in visibility order. This way, 1232 when a releasing thread observes the changes to the queue by this thread, it can 1233 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1234 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1235 to FALSE before this thread sets it to TRUE, this thread will hang. 1236 */ 1237 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1238 1239 while( 1 ) { 1240 kmp_int32 enqueued; 1241 kmp_int32 head; 1242 kmp_int32 tail; 1243 1244 head = *head_id_p; 1245 1246 switch ( head ) { 1247 1248 case -1: 1249 { 1250 #ifdef DEBUG_QUEUING_LOCKS 1251 tail = *tail_id_p; 1252 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1253 #endif 1254 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1255 this assignment prevents us from entering the if ( t > 0 ) 1256 condition in the enqueued case below, which is not necessary for 1257 this state transition */ 1258 1259 need_mf = 0; 1260 /* try (-1,0)->(tid,tid) */ 1261 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1262 KMP_PACK_64( -1, 0 ), 1263 KMP_PACK_64( gtid+1, gtid+1 ) ); 1264 #ifdef DEBUG_QUEUING_LOCKS 1265 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1266 #endif 1267 } 1268 break; 1269 1270 default: 1271 { 1272 tail = *tail_id_p; 1273 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1274 1275 #ifdef DEBUG_QUEUING_LOCKS 1276 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1277 #endif 1278 1279 if ( tail == 0 ) { 1280 enqueued = FALSE; 1281 } 1282 else { 1283 need_mf = 0; 1284 /* try (h,t) or (h,h)->(h,tid) */ 1285 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1286 1287 #ifdef DEBUG_QUEUING_LOCKS 1288 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1289 #endif 1290 } 1291 } 1292 break; 1293 1294 case 0: /* empty queue */ 1295 { 1296 kmp_int32 grabbed_lock; 1297 1298 #ifdef DEBUG_QUEUING_LOCKS 1299 tail = *tail_id_p; 1300 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1301 #endif 1302 /* try (0,0)->(-1,0) */ 1303 1304 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1305 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1306 1307 if ( grabbed_lock ) { 1308 1309 *spin_here_p = FALSE; 1310 1311 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1312 lck, gtid )); 1313 #ifdef DEBUG_QUEUING_LOCKS 1314 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1315 #endif 1316 1317 #if OMPT_SUPPORT 1318 if (ompt_enabled && prev_state != ompt_state_undefined) { 1319 /* change the state before clearing wait_id */ 1320 this_thr->th.ompt_thread_info.state = prev_state; 1321 this_thr->th.ompt_thread_info.wait_id = 0; 1322 } 1323 #endif 1324 1325 KMP_FSYNC_ACQUIRED( lck ); 1326 return; /* lock holder cannot be on queue */ 1327 } 1328 enqueued = FALSE; 1329 } 1330 break; 1331 } 1332 1333 #if OMPT_SUPPORT 1334 if (ompt_enabled && prev_state == ompt_state_undefined) { 1335 /* this thread will spin; set wait_id before entering wait state */ 1336 prev_state = this_thr->th.ompt_thread_info.state; 1337 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 1338 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1339 } 1340 #endif 1341 1342 if ( enqueued ) { 1343 if ( tail > 0 ) { 1344 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1345 KMP_ASSERT( tail_thr != NULL ); 1346 tail_thr->th.th_next_waiting = gtid+1; 1347 /* corresponding wait for this write in release code */ 1348 } 1349 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1350 1351 1352 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1353 * throughput only here. 1354 */ 1355 KMP_MB(); 1356 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1357 1358 #ifdef DEBUG_QUEUING_LOCKS 1359 TRACE_LOCK( gtid+1, "acq spin" ); 1360 1361 if ( this_thr->th.th_next_waiting != 0 ) 1362 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1363 #endif 1364 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1365 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1366 lck, gtid )); 1367 1368 #ifdef DEBUG_QUEUING_LOCKS 1369 TRACE_LOCK( gtid+1, "acq exit 2" ); 1370 #endif 1371 1372 #if OMPT_SUPPORT 1373 /* change the state before clearing wait_id */ 1374 this_thr->th.ompt_thread_info.state = prev_state; 1375 this_thr->th.ompt_thread_info.wait_id = 0; 1376 #endif 1377 1378 /* got lock, we were dequeued by the thread that released lock */ 1379 return; 1380 } 1381 1382 /* Yield if number of threads > number of logical processors */ 1383 /* ToDo: Not sure why this should only be in oversubscription case, 1384 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1385 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1386 __kmp_xproc ) ); 1387 #ifdef DEBUG_QUEUING_LOCKS 1388 TRACE_LOCK( gtid+1, "acq retry" ); 1389 #endif 1390 1391 } 1392 KMP_ASSERT2( 0, "should not get here" ); 1393 } 1394 1395 void 1396 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1397 { 1398 KMP_DEBUG_ASSERT( gtid >= 0 ); 1399 1400 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1401 } 1402 1403 static void 1404 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1405 kmp_int32 gtid ) 1406 { 1407 char const * const func = "omp_set_lock"; 1408 if ( lck->lk.initialized != lck ) { 1409 KMP_FATAL( LockIsUninitialized, func ); 1410 } 1411 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1412 KMP_FATAL( LockNestableUsedAsSimple, func ); 1413 } 1414 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1415 KMP_FATAL( LockIsAlreadyOwned, func ); 1416 } 1417 1418 __kmp_acquire_queuing_lock( lck, gtid ); 1419 1420 lck->lk.owner_id = gtid + 1; 1421 } 1422 1423 int 1424 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1425 { 1426 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1427 kmp_int32 head; 1428 #ifdef KMP_DEBUG 1429 kmp_info_t *this_thr; 1430 #endif 1431 1432 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1433 KMP_DEBUG_ASSERT( gtid >= 0 ); 1434 #ifdef KMP_DEBUG 1435 this_thr = __kmp_thread_from_gtid( gtid ); 1436 KMP_DEBUG_ASSERT( this_thr != NULL ); 1437 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1438 #endif 1439 1440 head = *head_id_p; 1441 1442 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1443 1444 /* try (0,0)->(-1,0) */ 1445 1446 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1447 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1448 KMP_FSYNC_ACQUIRED(lck); 1449 return TRUE; 1450 } 1451 } 1452 1453 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1454 return FALSE; 1455 } 1456 1457 static int 1458 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1459 { 1460 char const * const func = "omp_test_lock"; 1461 if ( lck->lk.initialized != lck ) { 1462 KMP_FATAL( LockIsUninitialized, func ); 1463 } 1464 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1465 KMP_FATAL( LockNestableUsedAsSimple, func ); 1466 } 1467 1468 int retval = __kmp_test_queuing_lock( lck, gtid ); 1469 1470 if ( retval ) { 1471 lck->lk.owner_id = gtid + 1; 1472 } 1473 return retval; 1474 } 1475 1476 int 1477 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1478 { 1479 register kmp_info_t *this_thr; 1480 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1481 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1482 1483 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1484 KMP_DEBUG_ASSERT( gtid >= 0 ); 1485 this_thr = __kmp_thread_from_gtid( gtid ); 1486 KMP_DEBUG_ASSERT( this_thr != NULL ); 1487 #ifdef DEBUG_QUEUING_LOCKS 1488 TRACE_LOCK( gtid+1, "rel ent" ); 1489 1490 if ( this_thr->th.th_spin_here ) 1491 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1492 if ( this_thr->th.th_next_waiting != 0 ) 1493 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1494 #endif 1495 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1496 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1497 1498 KMP_FSYNC_RELEASING(lck); 1499 1500 while( 1 ) { 1501 kmp_int32 dequeued; 1502 kmp_int32 head; 1503 kmp_int32 tail; 1504 1505 head = *head_id_p; 1506 1507 #ifdef DEBUG_QUEUING_LOCKS 1508 tail = *tail_id_p; 1509 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1510 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1511 #endif 1512 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1513 1514 if ( head == -1 ) { /* nobody on queue */ 1515 1516 /* try (-1,0)->(0,0) */ 1517 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1518 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1519 lck, gtid )); 1520 #ifdef DEBUG_QUEUING_LOCKS 1521 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1522 #endif 1523 1524 #if OMPT_SUPPORT 1525 /* nothing to do - no other thread is trying to shift blame */ 1526 #endif 1527 1528 return KMP_LOCK_RELEASED; 1529 } 1530 dequeued = FALSE; 1531 1532 } 1533 else { 1534 1535 tail = *tail_id_p; 1536 if ( head == tail ) { /* only one thread on the queue */ 1537 1538 #ifdef DEBUG_QUEUING_LOCKS 1539 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1540 #endif 1541 KMP_DEBUG_ASSERT( head > 0 ); 1542 1543 /* try (h,h)->(-1,0) */ 1544 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1545 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1546 #ifdef DEBUG_QUEUING_LOCKS 1547 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1548 #endif 1549 1550 } 1551 else { 1552 volatile kmp_int32 *waiting_id_p; 1553 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1554 KMP_DEBUG_ASSERT( head_thr != NULL ); 1555 waiting_id_p = & head_thr->th.th_next_waiting; 1556 1557 /* Does this require synchronous reads? */ 1558 #ifdef DEBUG_QUEUING_LOCKS 1559 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1560 #endif 1561 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1562 1563 /* try (h,t)->(h',t) or (t,t) */ 1564 1565 KMP_MB(); 1566 /* make sure enqueuing thread has time to update next waiting thread field */ 1567 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 1568 #ifdef DEBUG_QUEUING_LOCKS 1569 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1570 #endif 1571 dequeued = TRUE; 1572 } 1573 } 1574 1575 if ( dequeued ) { 1576 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1577 KMP_DEBUG_ASSERT( head_thr != NULL ); 1578 1579 /* Does this require synchronous reads? */ 1580 #ifdef DEBUG_QUEUING_LOCKS 1581 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1582 #endif 1583 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1584 1585 /* For clean code only. 1586 * Thread not released until next statement prevents race with acquire code. 1587 */ 1588 head_thr->th.th_next_waiting = 0; 1589 #ifdef DEBUG_QUEUING_LOCKS 1590 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1591 #endif 1592 1593 KMP_MB(); 1594 /* reset spin value */ 1595 head_thr->th.th_spin_here = FALSE; 1596 1597 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1598 lck, gtid )); 1599 #ifdef DEBUG_QUEUING_LOCKS 1600 TRACE_LOCK( gtid+1, "rel exit 2" ); 1601 #endif 1602 return KMP_LOCK_RELEASED; 1603 } 1604 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1605 1606 #ifdef DEBUG_QUEUING_LOCKS 1607 TRACE_LOCK( gtid+1, "rel retry" ); 1608 #endif 1609 1610 } /* while */ 1611 KMP_ASSERT2( 0, "should not get here" ); 1612 return KMP_LOCK_RELEASED; 1613 } 1614 1615 static int 1616 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1617 kmp_int32 gtid ) 1618 { 1619 char const * const func = "omp_unset_lock"; 1620 KMP_MB(); /* in case another processor initialized lock */ 1621 if ( lck->lk.initialized != lck ) { 1622 KMP_FATAL( LockIsUninitialized, func ); 1623 } 1624 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1625 KMP_FATAL( LockNestableUsedAsSimple, func ); 1626 } 1627 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1628 KMP_FATAL( LockUnsettingFree, func ); 1629 } 1630 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1631 KMP_FATAL( LockUnsettingSetByAnother, func ); 1632 } 1633 lck->lk.owner_id = 0; 1634 return __kmp_release_queuing_lock( lck, gtid ); 1635 } 1636 1637 void 1638 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1639 { 1640 lck->lk.location = NULL; 1641 lck->lk.head_id = 0; 1642 lck->lk.tail_id = 0; 1643 lck->lk.next_ticket = 0; 1644 lck->lk.now_serving = 0; 1645 lck->lk.owner_id = 0; // no thread owns the lock. 1646 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1647 lck->lk.initialized = lck; 1648 1649 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1650 } 1651 1652 static void 1653 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1654 { 1655 __kmp_init_queuing_lock( lck ); 1656 } 1657 1658 void 1659 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1660 { 1661 lck->lk.initialized = NULL; 1662 lck->lk.location = NULL; 1663 lck->lk.head_id = 0; 1664 lck->lk.tail_id = 0; 1665 lck->lk.next_ticket = 0; 1666 lck->lk.now_serving = 0; 1667 lck->lk.owner_id = 0; 1668 lck->lk.depth_locked = -1; 1669 } 1670 1671 static void 1672 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1673 { 1674 char const * const func = "omp_destroy_lock"; 1675 if ( lck->lk.initialized != lck ) { 1676 KMP_FATAL( LockIsUninitialized, func ); 1677 } 1678 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1679 KMP_FATAL( LockNestableUsedAsSimple, func ); 1680 } 1681 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1682 KMP_FATAL( LockStillOwned, func ); 1683 } 1684 __kmp_destroy_queuing_lock( lck ); 1685 } 1686 1687 1688 // 1689 // nested queuing locks 1690 // 1691 1692 void 1693 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1694 { 1695 KMP_DEBUG_ASSERT( gtid >= 0 ); 1696 1697 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1698 lck->lk.depth_locked += 1; 1699 } 1700 else { 1701 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1702 KMP_MB(); 1703 lck->lk.depth_locked = 1; 1704 KMP_MB(); 1705 lck->lk.owner_id = gtid + 1; 1706 } 1707 } 1708 1709 static void 1710 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1711 { 1712 char const * const func = "omp_set_nest_lock"; 1713 if ( lck->lk.initialized != lck ) { 1714 KMP_FATAL( LockIsUninitialized, func ); 1715 } 1716 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1717 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1718 } 1719 __kmp_acquire_nested_queuing_lock( lck, gtid ); 1720 } 1721 1722 int 1723 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1724 { 1725 int retval; 1726 1727 KMP_DEBUG_ASSERT( gtid >= 0 ); 1728 1729 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1730 retval = ++lck->lk.depth_locked; 1731 } 1732 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1733 retval = 0; 1734 } 1735 else { 1736 KMP_MB(); 1737 retval = lck->lk.depth_locked = 1; 1738 KMP_MB(); 1739 lck->lk.owner_id = gtid + 1; 1740 } 1741 return retval; 1742 } 1743 1744 static int 1745 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1746 kmp_int32 gtid ) 1747 { 1748 char const * const func = "omp_test_nest_lock"; 1749 if ( lck->lk.initialized != lck ) { 1750 KMP_FATAL( LockIsUninitialized, func ); 1751 } 1752 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1753 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1754 } 1755 return __kmp_test_nested_queuing_lock( lck, gtid ); 1756 } 1757 1758 int 1759 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1760 { 1761 KMP_DEBUG_ASSERT( gtid >= 0 ); 1762 1763 KMP_MB(); 1764 if ( --(lck->lk.depth_locked) == 0 ) { 1765 KMP_MB(); 1766 lck->lk.owner_id = 0; 1767 __kmp_release_queuing_lock( lck, gtid ); 1768 return KMP_LOCK_RELEASED; 1769 } 1770 return KMP_LOCK_STILL_HELD; 1771 } 1772 1773 static int 1774 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1775 { 1776 char const * const func = "omp_unset_nest_lock"; 1777 KMP_MB(); /* in case another processor initialized lock */ 1778 if ( lck->lk.initialized != lck ) { 1779 KMP_FATAL( LockIsUninitialized, func ); 1780 } 1781 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1782 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1783 } 1784 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1785 KMP_FATAL( LockUnsettingFree, func ); 1786 } 1787 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1788 KMP_FATAL( LockUnsettingSetByAnother, func ); 1789 } 1790 return __kmp_release_nested_queuing_lock( lck, gtid ); 1791 } 1792 1793 void 1794 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1795 { 1796 __kmp_init_queuing_lock( lck ); 1797 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1798 } 1799 1800 static void 1801 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1802 { 1803 __kmp_init_nested_queuing_lock( lck ); 1804 } 1805 1806 void 1807 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1808 { 1809 __kmp_destroy_queuing_lock( lck ); 1810 lck->lk.depth_locked = 0; 1811 } 1812 1813 static void 1814 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1815 { 1816 char const * const func = "omp_destroy_nest_lock"; 1817 if ( lck->lk.initialized != lck ) { 1818 KMP_FATAL( LockIsUninitialized, func ); 1819 } 1820 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1821 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1822 } 1823 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1824 KMP_FATAL( LockStillOwned, func ); 1825 } 1826 __kmp_destroy_nested_queuing_lock( lck ); 1827 } 1828 1829 1830 // 1831 // access functions to fields which don't exist for all lock kinds. 1832 // 1833 1834 static int 1835 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1836 { 1837 return lck == lck->lk.initialized; 1838 } 1839 1840 static const ident_t * 1841 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1842 { 1843 return lck->lk.location; 1844 } 1845 1846 static void 1847 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1848 { 1849 lck->lk.location = loc; 1850 } 1851 1852 static kmp_lock_flags_t 1853 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1854 { 1855 return lck->lk.flags; 1856 } 1857 1858 static void 1859 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1860 { 1861 lck->lk.flags = flags; 1862 } 1863 1864 #if KMP_USE_ADAPTIVE_LOCKS 1865 1866 /* 1867 RTM Adaptive locks 1868 */ 1869 1870 // TODO: Use the header for intrinsics below with the compiler 13.0 1871 //#include <immintrin.h> 1872 1873 // Values from the status register after failed speculation. 1874 #define _XBEGIN_STARTED (~0u) 1875 #define _XABORT_EXPLICIT (1 << 0) 1876 #define _XABORT_RETRY (1 << 1) 1877 #define _XABORT_CONFLICT (1 << 2) 1878 #define _XABORT_CAPACITY (1 << 3) 1879 #define _XABORT_DEBUG (1 << 4) 1880 #define _XABORT_NESTED (1 << 5) 1881 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1882 1883 // Aborts for which it's worth trying again immediately 1884 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1885 1886 #define STRINGIZE_INTERNAL(arg) #arg 1887 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1888 1889 // Access to RTM instructions 1890 1891 /* 1892 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1893 This is the same definition as the compiler intrinsic that will be supported at some point. 1894 */ 1895 static __inline int _xbegin() 1896 { 1897 int res = -1; 1898 1899 #if KMP_OS_WINDOWS 1900 #if KMP_ARCH_X86_64 1901 _asm { 1902 _emit 0xC7 1903 _emit 0xF8 1904 _emit 2 1905 _emit 0 1906 _emit 0 1907 _emit 0 1908 jmp L2 1909 mov res, eax 1910 L2: 1911 } 1912 #else /* IA32 */ 1913 _asm { 1914 _emit 0xC7 1915 _emit 0xF8 1916 _emit 2 1917 _emit 0 1918 _emit 0 1919 _emit 0 1920 jmp L2 1921 mov res, eax 1922 L2: 1923 } 1924 #endif // KMP_ARCH_X86_64 1925 #else 1926 /* Note that %eax must be noted as killed (clobbered), because 1927 * the XSR is returned in %eax(%rax) on abort. Other register 1928 * values are restored, so don't need to be killed. 1929 * 1930 * We must also mark 'res' as an input and an output, since otherwise 1931 * 'res=-1' may be dropped as being dead, whereas we do need the 1932 * assignment on the successful (i.e., non-abort) path. 1933 */ 1934 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1935 " .long 1f-1b-6\n" 1936 " jmp 2f\n" 1937 "1: movl %%eax,%0\n" 1938 "2:" 1939 :"+r"(res)::"memory","%eax"); 1940 #endif // KMP_OS_WINDOWS 1941 return res; 1942 } 1943 1944 /* 1945 Transaction end 1946 */ 1947 static __inline void _xend() 1948 { 1949 #if KMP_OS_WINDOWS 1950 __asm { 1951 _emit 0x0f 1952 _emit 0x01 1953 _emit 0xd5 1954 } 1955 #else 1956 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1957 #endif 1958 } 1959 1960 /* 1961 This is a macro, the argument must be a single byte constant which 1962 can be evaluated by the inline assembler, since it is emitted as a 1963 byte into the assembly code. 1964 */ 1965 #if KMP_OS_WINDOWS 1966 #define _xabort(ARG) \ 1967 _asm _emit 0xc6 \ 1968 _asm _emit 0xf8 \ 1969 _asm _emit ARG 1970 #else 1971 #define _xabort(ARG) \ 1972 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 1973 #endif 1974 1975 // 1976 // Statistics is collected for testing purpose 1977 // 1978 #if KMP_DEBUG_ADAPTIVE_LOCKS 1979 1980 // We accumulate speculative lock statistics when the lock is destroyed. 1981 // We keep locks that haven't been destroyed in the liveLocks list 1982 // so that we can grab their statistics too. 1983 static kmp_adaptive_lock_statistics_t destroyedStats; 1984 1985 // To hold the list of live locks. 1986 static kmp_adaptive_lock_info_t liveLocks; 1987 1988 // A lock so we can safely update the list of locks. 1989 static kmp_bootstrap_lock_t chain_lock; 1990 1991 // Initialize the list of stats. 1992 void 1993 __kmp_init_speculative_stats() 1994 { 1995 kmp_adaptive_lock_info_t *lck = &liveLocks; 1996 1997 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 1998 lck->stats.next = lck; 1999 lck->stats.prev = lck; 2000 2001 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2002 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2003 2004 __kmp_init_bootstrap_lock( &chain_lock ); 2005 2006 } 2007 2008 // Insert the lock into the circular list 2009 static void 2010 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 2011 { 2012 __kmp_acquire_bootstrap_lock( &chain_lock ); 2013 2014 lck->stats.next = liveLocks.stats.next; 2015 lck->stats.prev = &liveLocks; 2016 2017 liveLocks.stats.next = lck; 2018 lck->stats.next->stats.prev = lck; 2019 2020 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2021 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2022 2023 __kmp_release_bootstrap_lock( &chain_lock ); 2024 } 2025 2026 static void 2027 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 2028 { 2029 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2030 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2031 2032 kmp_adaptive_lock_info_t * n = lck->stats.next; 2033 kmp_adaptive_lock_info_t * p = lck->stats.prev; 2034 2035 n->stats.prev = p; 2036 p->stats.next = n; 2037 } 2038 2039 static void 2040 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 2041 { 2042 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2043 __kmp_remember_lock( lck ); 2044 } 2045 2046 static void 2047 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2048 { 2049 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2050 2051 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2052 t->successfulSpeculations += s->successfulSpeculations; 2053 t->hardFailedSpeculations += s->hardFailedSpeculations; 2054 t->softFailedSpeculations += s->softFailedSpeculations; 2055 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2056 t->lemmingYields += s->lemmingYields; 2057 } 2058 2059 static void 2060 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2061 { 2062 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2063 2064 __kmp_acquire_bootstrap_lock( &chain_lock ); 2065 2066 __kmp_add_stats( &destroyedStats, lck ); 2067 __kmp_forget_lock( lck ); 2068 2069 __kmp_release_bootstrap_lock( &chain_lock ); 2070 } 2071 2072 static float 2073 percent (kmp_uint32 count, kmp_uint32 total) 2074 { 2075 return (total == 0) ? 0.0: (100.0 * count)/total; 2076 } 2077 2078 static 2079 FILE * __kmp_open_stats_file() 2080 { 2081 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2082 return stdout; 2083 2084 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 2085 char buffer[buffLen]; 2086 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 2087 (kmp_int32)getpid()); 2088 FILE * result = fopen(&buffer[0], "w"); 2089 2090 // Maybe we should issue a warning here... 2091 return result ? result : stdout; 2092 } 2093 2094 void 2095 __kmp_print_speculative_stats() 2096 { 2097 if (__kmp_user_lock_kind != lk_adaptive) 2098 return; 2099 2100 FILE * statsFile = __kmp_open_stats_file(); 2101 2102 kmp_adaptive_lock_statistics_t total = destroyedStats; 2103 kmp_adaptive_lock_info_t *lck; 2104 2105 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2106 __kmp_add_stats( &total, lck ); 2107 } 2108 kmp_adaptive_lock_statistics_t *t = &total; 2109 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2110 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2111 t->softFailedSpeculations; 2112 2113 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2114 fprintf ( statsFile, " Lock parameters: \n" 2115 " max_soft_retries : %10d\n" 2116 " max_badness : %10d\n", 2117 __kmp_adaptive_backoff_params.max_soft_retries, 2118 __kmp_adaptive_backoff_params.max_badness); 2119 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2120 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2121 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2122 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2123 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2124 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2125 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2126 2127 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2128 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2129 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2130 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2131 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2132 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2133 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2134 2135 if (statsFile != stdout) 2136 fclose( statsFile ); 2137 } 2138 2139 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2140 #else 2141 # define KMP_INC_STAT(lck,stat) 2142 2143 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2144 2145 static inline bool 2146 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2147 { 2148 // It is enough to check that the head_id is zero. 2149 // We don't also need to check the tail. 2150 bool res = lck->lk.head_id == 0; 2151 2152 // We need a fence here, since we must ensure that no memory operations 2153 // from later in this thread float above that read. 2154 #if KMP_COMPILER_ICC 2155 _mm_mfence(); 2156 #else 2157 __sync_synchronize(); 2158 #endif 2159 2160 return res; 2161 } 2162 2163 // Functions for manipulating the badness 2164 static __inline void 2165 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2166 { 2167 // Reset the badness to zero so we eagerly try to speculate again 2168 lck->lk.adaptive.badness = 0; 2169 KMP_INC_STAT(lck,successfulSpeculations); 2170 } 2171 2172 // Create a bit mask with one more set bit. 2173 static __inline void 2174 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2175 { 2176 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2177 if ( newBadness > lck->lk.adaptive.max_badness) { 2178 return; 2179 } else { 2180 lck->lk.adaptive.badness = newBadness; 2181 } 2182 } 2183 2184 // Check whether speculation should be attempted. 2185 static __inline int 2186 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2187 { 2188 kmp_uint32 badness = lck->lk.adaptive.badness; 2189 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2190 int res = (attempts & badness) == 0; 2191 return res; 2192 } 2193 2194 // Attempt to acquire only the speculative lock. 2195 // Does not back off to the non-speculative lock. 2196 // 2197 static int 2198 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2199 { 2200 int retries = lck->lk.adaptive.max_soft_retries; 2201 2202 // We don't explicitly count the start of speculation, rather we record 2203 // the results (success, hard fail, soft fail). The sum of all of those 2204 // is the total number of times we started speculation since all 2205 // speculations must end one of those ways. 2206 do 2207 { 2208 kmp_uint32 status = _xbegin(); 2209 // Switch this in to disable actual speculation but exercise 2210 // at least some of the rest of the code. Useful for debugging... 2211 // kmp_uint32 status = _XABORT_NESTED; 2212 2213 if (status == _XBEGIN_STARTED ) 2214 { /* We have successfully started speculation 2215 * Check that no-one acquired the lock for real between when we last looked 2216 * and now. This also gets the lock cache line into our read-set, 2217 * which we need so that we'll abort if anyone later claims it for real. 2218 */ 2219 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2220 { 2221 // Lock is now visibly acquired, so someone beat us to it. 2222 // Abort the transaction so we'll restart from _xbegin with the 2223 // failure status. 2224 _xabort(0x01) 2225 KMP_ASSERT2( 0, "should not get here" ); 2226 } 2227 return 1; // Lock has been acquired (speculatively) 2228 } else { 2229 // We have aborted, update the statistics 2230 if ( status & SOFT_ABORT_MASK) 2231 { 2232 KMP_INC_STAT(lck,softFailedSpeculations); 2233 // and loop round to retry. 2234 } 2235 else 2236 { 2237 KMP_INC_STAT(lck,hardFailedSpeculations); 2238 // Give up if we had a hard failure. 2239 break; 2240 } 2241 } 2242 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2243 2244 // Either we had a hard failure or we didn't succeed softly after 2245 // the full set of attempts, so back off the badness. 2246 __kmp_step_badness( lck ); 2247 return 0; 2248 } 2249 2250 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2251 // if the speculative lock cannot be acquired. 2252 // We can succeed speculatively, non-speculatively, or fail. 2253 static int 2254 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2255 { 2256 // First try to acquire the lock speculatively 2257 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2258 return 1; 2259 2260 // Speculative acquisition failed, so try to acquire it non-speculatively. 2261 // Count the non-speculative acquire attempt 2262 lck->lk.adaptive.acquire_attempts++; 2263 2264 // Use base, non-speculative lock. 2265 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2266 { 2267 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2268 return 1; // Lock is acquired (non-speculatively) 2269 } 2270 else 2271 { 2272 return 0; // Failed to acquire the lock, it's already visibly locked. 2273 } 2274 } 2275 2276 static int 2277 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2278 { 2279 char const * const func = "omp_test_lock"; 2280 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2281 KMP_FATAL( LockIsUninitialized, func ); 2282 } 2283 2284 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2285 2286 if ( retval ) { 2287 lck->lk.qlk.owner_id = gtid + 1; 2288 } 2289 return retval; 2290 } 2291 2292 // Block until we can acquire a speculative, adaptive lock. 2293 // We check whether we should be trying to speculate. 2294 // If we should be, we check the real lock to see if it is free, 2295 // and, if not, pause without attempting to acquire it until it is. 2296 // Then we try the speculative acquire. 2297 // This means that although we suffer from lemmings a little ( 2298 // because all we can't acquire the lock speculatively until 2299 // the queue of threads waiting has cleared), we don't get into a 2300 // state where we can never acquire the lock speculatively (because we 2301 // force the queue to clear by preventing new arrivals from entering the 2302 // queue). 2303 // This does mean that when we're trying to break lemmings, the lock 2304 // is no longer fair. However OpenMP makes no guarantee that its 2305 // locks are fair, so this isn't a real problem. 2306 static void 2307 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2308 { 2309 if ( __kmp_should_speculate( lck, gtid ) ) 2310 { 2311 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2312 { 2313 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2314 return; 2315 // We tried speculation and failed, so give up. 2316 } 2317 else 2318 { 2319 // We can't try speculation until the lock is free, so we 2320 // pause here (without suspending on the queueing lock, 2321 // to allow it to drain, then try again. 2322 // All other threads will also see the same result for 2323 // shouldSpeculate, so will be doing the same if they 2324 // try to claim the lock from now on. 2325 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2326 { 2327 KMP_INC_STAT(lck,lemmingYields); 2328 __kmp_yield (TRUE); 2329 } 2330 2331 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2332 return; 2333 } 2334 } 2335 2336 // Speculative acquisition failed, so acquire it non-speculatively. 2337 // Count the non-speculative acquire attempt 2338 lck->lk.adaptive.acquire_attempts++; 2339 2340 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2341 // We have acquired the base lock, so count that. 2342 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2343 } 2344 2345 static void 2346 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2347 { 2348 char const * const func = "omp_set_lock"; 2349 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2350 KMP_FATAL( LockIsUninitialized, func ); 2351 } 2352 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2353 KMP_FATAL( LockIsAlreadyOwned, func ); 2354 } 2355 2356 __kmp_acquire_adaptive_lock( lck, gtid ); 2357 2358 lck->lk.qlk.owner_id = gtid + 1; 2359 } 2360 2361 static int 2362 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2363 { 2364 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2365 { // If the lock doesn't look claimed we must be speculating. 2366 // (Or the user's code is buggy and they're releasing without locking; 2367 // if we had XTEST we'd be able to check that case...) 2368 _xend(); // Exit speculation 2369 __kmp_update_badness_after_success( lck ); 2370 } 2371 else 2372 { // Since the lock *is* visibly locked we're not speculating, 2373 // so should use the underlying lock's release scheme. 2374 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2375 } 2376 return KMP_LOCK_RELEASED; 2377 } 2378 2379 static int 2380 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2381 { 2382 char const * const func = "omp_unset_lock"; 2383 KMP_MB(); /* in case another processor initialized lock */ 2384 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2385 KMP_FATAL( LockIsUninitialized, func ); 2386 } 2387 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2388 KMP_FATAL( LockUnsettingFree, func ); 2389 } 2390 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2391 KMP_FATAL( LockUnsettingSetByAnother, func ); 2392 } 2393 lck->lk.qlk.owner_id = 0; 2394 __kmp_release_adaptive_lock( lck, gtid ); 2395 return KMP_LOCK_RELEASED; 2396 } 2397 2398 static void 2399 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2400 { 2401 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2402 lck->lk.adaptive.badness = 0; 2403 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2404 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2405 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2406 #if KMP_DEBUG_ADAPTIVE_LOCKS 2407 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2408 #endif 2409 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2410 } 2411 2412 static void 2413 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2414 { 2415 __kmp_init_adaptive_lock( lck ); 2416 } 2417 2418 static void 2419 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2420 { 2421 #if KMP_DEBUG_ADAPTIVE_LOCKS 2422 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2423 #endif 2424 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2425 // Nothing needed for the speculative part. 2426 } 2427 2428 static void 2429 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2430 { 2431 char const * const func = "omp_destroy_lock"; 2432 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2433 KMP_FATAL( LockIsUninitialized, func ); 2434 } 2435 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2436 KMP_FATAL( LockStillOwned, func ); 2437 } 2438 __kmp_destroy_adaptive_lock( lck ); 2439 } 2440 2441 2442 #endif // KMP_USE_ADAPTIVE_LOCKS 2443 2444 2445 /* ------------------------------------------------------------------------ */ 2446 /* DRDPA ticket locks */ 2447 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2448 2449 static kmp_int32 2450 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2451 { 2452 return TCR_4( lck->lk.owner_id ) - 1; 2453 } 2454 2455 static inline bool 2456 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2457 { 2458 return lck->lk.depth_locked != -1; 2459 } 2460 2461 __forceinline static void 2462 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2463 { 2464 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2465 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2466 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2467 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2468 TCR_PTR(lck->lk.polls); // volatile load 2469 2470 #ifdef USE_LOCK_PROFILE 2471 if (TCR_8(polls[ticket & mask].poll) != ticket) 2472 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2473 /* else __kmp_printf( "." );*/ 2474 #endif /* USE_LOCK_PROFILE */ 2475 2476 // 2477 // Now spin-wait, but reload the polls pointer and mask, in case the 2478 // polling area has been reconfigured. Unless it is reconfigured, the 2479 // reloads stay in L1 cache and are cheap. 2480 // 2481 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2482 // 2483 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2484 // and poll to be re-read every spin iteration. 2485 // 2486 kmp_uint32 spins; 2487 2488 KMP_FSYNC_PREPARE(lck); 2489 KMP_INIT_YIELD(spins); 2490 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2491 // If we are oversubscribed, 2492 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2493 // CPU Pause is in the macros for yield. 2494 // 2495 KMP_YIELD(TCR_4(__kmp_nth) 2496 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2497 KMP_YIELD_SPIN(spins); 2498 2499 // Re-read the mask and the poll pointer from the lock structure. 2500 // 2501 // Make certain that "mask" is read before "polls" !!! 2502 // 2503 // If another thread picks reconfigures the polling area and updates 2504 // their values, and we get the new value of mask and the old polls 2505 // pointer, we could access memory beyond the end of the old polling 2506 // area. 2507 // 2508 mask = TCR_8(lck->lk.mask); // volatile load 2509 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2510 TCR_PTR(lck->lk.polls); // volatile load 2511 } 2512 2513 // 2514 // Critical section starts here 2515 // 2516 KMP_FSYNC_ACQUIRED(lck); 2517 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2518 ticket, lck)); 2519 lck->lk.now_serving = ticket; // non-volatile store 2520 2521 // 2522 // Deallocate a garbage polling area if we know that we are the last 2523 // thread that could possibly access it. 2524 // 2525 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2526 // ticket. 2527 // 2528 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2529 __kmp_free((void *)lck->lk.old_polls); 2530 lck->lk.old_polls = NULL; 2531 lck->lk.cleanup_ticket = 0; 2532 } 2533 2534 // 2535 // Check to see if we should reconfigure the polling area. 2536 // If there is still a garbage polling area to be deallocated from a 2537 // previous reconfiguration, let a later thread reconfigure it. 2538 // 2539 if (lck->lk.old_polls == NULL) { 2540 bool reconfigure = false; 2541 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2542 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2543 2544 if (TCR_4(__kmp_nth) 2545 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2546 // 2547 // We are in oversubscription mode. Contract the polling area 2548 // down to a single location, if that hasn't been done already. 2549 // 2550 if (num_polls > 1) { 2551 reconfigure = true; 2552 num_polls = TCR_4(lck->lk.num_polls); 2553 mask = 0; 2554 num_polls = 1; 2555 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2556 __kmp_allocate(num_polls * sizeof(*polls)); 2557 polls[0].poll = ticket; 2558 } 2559 } 2560 else { 2561 // 2562 // We are in under/fully subscribed mode. Check the number of 2563 // threads waiting on the lock. The size of the polling area 2564 // should be at least the number of threads waiting. 2565 // 2566 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2567 if (num_waiting > num_polls) { 2568 kmp_uint32 old_num_polls = num_polls; 2569 reconfigure = true; 2570 do { 2571 mask = (mask << 1) | 1; 2572 num_polls *= 2; 2573 } while (num_polls <= num_waiting); 2574 2575 // 2576 // Allocate the new polling area, and copy the relevant portion 2577 // of the old polling area to the new area. __kmp_allocate() 2578 // zeroes the memory it allocates, and most of the old area is 2579 // just zero padding, so we only copy the release counters. 2580 // 2581 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2582 __kmp_allocate(num_polls * sizeof(*polls)); 2583 kmp_uint32 i; 2584 for (i = 0; i < old_num_polls; i++) { 2585 polls[i].poll = old_polls[i].poll; 2586 } 2587 } 2588 } 2589 2590 if (reconfigure) { 2591 // 2592 // Now write the updated fields back to the lock structure. 2593 // 2594 // Make certain that "polls" is written before "mask" !!! 2595 // 2596 // If another thread picks up the new value of mask and the old 2597 // polls pointer , it could access memory beyond the end of the 2598 // old polling area. 2599 // 2600 // On x86, we need memory fences. 2601 // 2602 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2603 ticket, lck, num_polls)); 2604 2605 lck->lk.old_polls = old_polls; // non-volatile store 2606 lck->lk.polls = polls; // volatile store 2607 2608 KMP_MB(); 2609 2610 lck->lk.num_polls = num_polls; // non-volatile store 2611 lck->lk.mask = mask; // volatile store 2612 2613 KMP_MB(); 2614 2615 // 2616 // Only after the new polling area and mask have been flushed 2617 // to main memory can we update the cleanup ticket field. 2618 // 2619 // volatile load / non-volatile store 2620 // 2621 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2622 } 2623 } 2624 } 2625 2626 void 2627 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2628 { 2629 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2630 } 2631 2632 static void 2633 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2634 { 2635 char const * const func = "omp_set_lock"; 2636 if ( lck->lk.initialized != lck ) { 2637 KMP_FATAL( LockIsUninitialized, func ); 2638 } 2639 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2640 KMP_FATAL( LockNestableUsedAsSimple, func ); 2641 } 2642 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2643 KMP_FATAL( LockIsAlreadyOwned, func ); 2644 } 2645 2646 __kmp_acquire_drdpa_lock( lck, gtid ); 2647 2648 lck->lk.owner_id = gtid + 1; 2649 } 2650 2651 int 2652 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2653 { 2654 // 2655 // First get a ticket, then read the polls pointer and the mask. 2656 // The polls pointer must be read before the mask!!! (See above) 2657 // 2658 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2659 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2660 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2661 TCR_PTR(lck->lk.polls); // volatile load 2662 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2663 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2664 kmp_uint64 next_ticket = ticket + 1; 2665 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2666 ticket, next_ticket)) { 2667 KMP_FSYNC_ACQUIRED(lck); 2668 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2669 ticket, lck)); 2670 lck->lk.now_serving = ticket; // non-volatile store 2671 2672 // 2673 // Since no threads are waiting, there is no possibility that 2674 // we would want to reconfigure the polling area. We might 2675 // have the cleanup ticket value (which says that it is now 2676 // safe to deallocate old_polls), but we'll let a later thread 2677 // which calls __kmp_acquire_lock do that - this routine 2678 // isn't supposed to block, and we would risk blocks if we 2679 // called __kmp_free() to do the deallocation. 2680 // 2681 return TRUE; 2682 } 2683 } 2684 return FALSE; 2685 } 2686 2687 static int 2688 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2689 { 2690 char const * const func = "omp_test_lock"; 2691 if ( lck->lk.initialized != lck ) { 2692 KMP_FATAL( LockIsUninitialized, func ); 2693 } 2694 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2695 KMP_FATAL( LockNestableUsedAsSimple, func ); 2696 } 2697 2698 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2699 2700 if ( retval ) { 2701 lck->lk.owner_id = gtid + 1; 2702 } 2703 return retval; 2704 } 2705 2706 int 2707 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2708 { 2709 // 2710 // Read the ticket value from the lock data struct, then the polls 2711 // pointer and the mask. The polls pointer must be read before the 2712 // mask!!! (See above) 2713 // 2714 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2715 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2716 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2717 TCR_PTR(lck->lk.polls); // volatile load 2718 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2719 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2720 ticket - 1, lck)); 2721 KMP_FSYNC_RELEASING(lck); 2722 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2723 return KMP_LOCK_RELEASED; 2724 } 2725 2726 static int 2727 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2728 { 2729 char const * const func = "omp_unset_lock"; 2730 KMP_MB(); /* in case another processor initialized lock */ 2731 if ( lck->lk.initialized != lck ) { 2732 KMP_FATAL( LockIsUninitialized, func ); 2733 } 2734 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2735 KMP_FATAL( LockNestableUsedAsSimple, func ); 2736 } 2737 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2738 KMP_FATAL( LockUnsettingFree, func ); 2739 } 2740 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2741 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2742 KMP_FATAL( LockUnsettingSetByAnother, func ); 2743 } 2744 lck->lk.owner_id = 0; 2745 return __kmp_release_drdpa_lock( lck, gtid ); 2746 } 2747 2748 void 2749 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2750 { 2751 lck->lk.location = NULL; 2752 lck->lk.mask = 0; 2753 lck->lk.num_polls = 1; 2754 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2755 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2756 lck->lk.cleanup_ticket = 0; 2757 lck->lk.old_polls = NULL; 2758 lck->lk.next_ticket = 0; 2759 lck->lk.now_serving = 0; 2760 lck->lk.owner_id = 0; // no thread owns the lock. 2761 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2762 lck->lk.initialized = lck; 2763 2764 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2765 } 2766 2767 static void 2768 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2769 { 2770 __kmp_init_drdpa_lock( lck ); 2771 } 2772 2773 void 2774 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2775 { 2776 lck->lk.initialized = NULL; 2777 lck->lk.location = NULL; 2778 if (lck->lk.polls != NULL) { 2779 __kmp_free((void *)lck->lk.polls); 2780 lck->lk.polls = NULL; 2781 } 2782 if (lck->lk.old_polls != NULL) { 2783 __kmp_free((void *)lck->lk.old_polls); 2784 lck->lk.old_polls = NULL; 2785 } 2786 lck->lk.mask = 0; 2787 lck->lk.num_polls = 0; 2788 lck->lk.cleanup_ticket = 0; 2789 lck->lk.next_ticket = 0; 2790 lck->lk.now_serving = 0; 2791 lck->lk.owner_id = 0; 2792 lck->lk.depth_locked = -1; 2793 } 2794 2795 static void 2796 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2797 { 2798 char const * const func = "omp_destroy_lock"; 2799 if ( lck->lk.initialized != lck ) { 2800 KMP_FATAL( LockIsUninitialized, func ); 2801 } 2802 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2803 KMP_FATAL( LockNestableUsedAsSimple, func ); 2804 } 2805 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2806 KMP_FATAL( LockStillOwned, func ); 2807 } 2808 __kmp_destroy_drdpa_lock( lck ); 2809 } 2810 2811 2812 // 2813 // nested drdpa ticket locks 2814 // 2815 2816 void 2817 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2818 { 2819 KMP_DEBUG_ASSERT( gtid >= 0 ); 2820 2821 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2822 lck->lk.depth_locked += 1; 2823 } 2824 else { 2825 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2826 KMP_MB(); 2827 lck->lk.depth_locked = 1; 2828 KMP_MB(); 2829 lck->lk.owner_id = gtid + 1; 2830 } 2831 } 2832 2833 static void 2834 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2835 { 2836 char const * const func = "omp_set_nest_lock"; 2837 if ( lck->lk.initialized != lck ) { 2838 KMP_FATAL( LockIsUninitialized, func ); 2839 } 2840 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2841 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2842 } 2843 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2844 } 2845 2846 int 2847 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2848 { 2849 int retval; 2850 2851 KMP_DEBUG_ASSERT( gtid >= 0 ); 2852 2853 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2854 retval = ++lck->lk.depth_locked; 2855 } 2856 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2857 retval = 0; 2858 } 2859 else { 2860 KMP_MB(); 2861 retval = lck->lk.depth_locked = 1; 2862 KMP_MB(); 2863 lck->lk.owner_id = gtid + 1; 2864 } 2865 return retval; 2866 } 2867 2868 static int 2869 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2870 { 2871 char const * const func = "omp_test_nest_lock"; 2872 if ( lck->lk.initialized != lck ) { 2873 KMP_FATAL( LockIsUninitialized, func ); 2874 } 2875 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2876 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2877 } 2878 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2879 } 2880 2881 int 2882 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2883 { 2884 KMP_DEBUG_ASSERT( gtid >= 0 ); 2885 2886 KMP_MB(); 2887 if ( --(lck->lk.depth_locked) == 0 ) { 2888 KMP_MB(); 2889 lck->lk.owner_id = 0; 2890 __kmp_release_drdpa_lock( lck, gtid ); 2891 return KMP_LOCK_RELEASED; 2892 } 2893 return KMP_LOCK_STILL_HELD; 2894 } 2895 2896 static int 2897 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2898 { 2899 char const * const func = "omp_unset_nest_lock"; 2900 KMP_MB(); /* in case another processor initialized lock */ 2901 if ( lck->lk.initialized != lck ) { 2902 KMP_FATAL( LockIsUninitialized, func ); 2903 } 2904 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2905 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2906 } 2907 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2908 KMP_FATAL( LockUnsettingFree, func ); 2909 } 2910 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2911 KMP_FATAL( LockUnsettingSetByAnother, func ); 2912 } 2913 return __kmp_release_nested_drdpa_lock( lck, gtid ); 2914 } 2915 2916 void 2917 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2918 { 2919 __kmp_init_drdpa_lock( lck ); 2920 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2921 } 2922 2923 static void 2924 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2925 { 2926 __kmp_init_nested_drdpa_lock( lck ); 2927 } 2928 2929 void 2930 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2931 { 2932 __kmp_destroy_drdpa_lock( lck ); 2933 lck->lk.depth_locked = 0; 2934 } 2935 2936 static void 2937 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2938 { 2939 char const * const func = "omp_destroy_nest_lock"; 2940 if ( lck->lk.initialized != lck ) { 2941 KMP_FATAL( LockIsUninitialized, func ); 2942 } 2943 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2944 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2945 } 2946 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2947 KMP_FATAL( LockStillOwned, func ); 2948 } 2949 __kmp_destroy_nested_drdpa_lock( lck ); 2950 } 2951 2952 2953 // 2954 // access functions to fields which don't exist for all lock kinds. 2955 // 2956 2957 static int 2958 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2959 { 2960 return lck == lck->lk.initialized; 2961 } 2962 2963 static const ident_t * 2964 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2965 { 2966 return lck->lk.location; 2967 } 2968 2969 static void 2970 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 2971 { 2972 lck->lk.location = loc; 2973 } 2974 2975 static kmp_lock_flags_t 2976 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 2977 { 2978 return lck->lk.flags; 2979 } 2980 2981 static void 2982 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 2983 { 2984 lck->lk.flags = flags; 2985 } 2986 2987 #if KMP_USE_DYNAMIC_LOCK 2988 2989 // Definitions of lock hints. 2990 # ifndef __OMP_H 2991 typedef enum kmp_lock_hint_t { 2992 kmp_lock_hint_none = 0, 2993 kmp_lock_hint_contended, 2994 kmp_lock_hint_uncontended, 2995 kmp_lock_hint_nonspeculative, 2996 kmp_lock_hint_speculative, 2997 kmp_lock_hint_adaptive, 2998 } kmp_lock_hint_t; 2999 # endif 3000 3001 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 3002 #define expand_init_lock(l, a) \ 3003 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \ 3004 *lck = DYNA_LOCK_FREE(l); \ 3005 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \ 3006 } 3007 FOREACH_D_LOCK(expand_init_lock, 0) 3008 #undef expand_init_lock 3009 3010 #if DYNA_HAS_HLE 3011 3012 // HLE lock functions - imported from the testbed runtime. 3013 #if KMP_MIC 3014 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument 3015 #else 3016 # define machine_pause() _mm_pause() 3017 #endif 3018 #define HLE_ACQUIRE ".byte 0xf2;" 3019 #define HLE_RELEASE ".byte 0xf3;" 3020 3021 static inline kmp_uint32 3022 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 3023 { 3024 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 3025 : "+r"(v), "+m"(*p) 3026 : 3027 : "memory"); 3028 return v; 3029 } 3030 3031 static void 3032 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 3033 { 3034 *lck = 0; 3035 } 3036 3037 static void 3038 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3039 { 3040 // Use gtid for DYNA_LOCK_BUSY if necessary 3041 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) { 3042 int delay = 1; 3043 do { 3044 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) { 3045 for (int i = delay; i != 0; --i) 3046 machine_pause(); 3047 delay = ((delay << 1) | 1) & 7; 3048 } 3049 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)); 3050 } 3051 } 3052 3053 static void 3054 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3055 { 3056 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3057 } 3058 3059 static void 3060 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3061 { 3062 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3063 : "=m"(*lck) 3064 : "r"(DYNA_LOCK_FREE(hle)) 3065 : "memory"); 3066 } 3067 3068 static void 3069 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3070 { 3071 __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3072 } 3073 3074 static int 3075 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3076 { 3077 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle); 3078 } 3079 3080 static int 3081 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3082 { 3083 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3084 } 3085 3086 #endif // DYNA_HAS_HLE 3087 3088 // Entry functions for indirect locks (first element of direct_*_ops[]). 3089 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3090 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3091 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3092 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3093 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3094 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3095 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3096 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3097 3098 // 3099 // Jump tables for the indirect lock functions. 3100 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3101 // 3102 #define expand_func0(l, op) 0,op##_##l##_##lock, 3103 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3104 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) }; 3105 3106 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock, 3107 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *) 3108 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) }; 3109 3110 // Differentiates *lock and *lock_with_checks. 3111 #define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3112 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3113 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3114 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) }, 3115 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } }; 3116 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3117 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) }, 3118 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } }; 3119 3120 #define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3121 #define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3122 static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3123 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) }, 3124 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } }; 3125 3126 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3127 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3128 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3129 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3130 3131 // 3132 // Jump tables for the indirect lock functions. 3133 // 3134 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3135 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p) 3136 = { FOREACH_I_LOCK(expand_func4, init) }; 3137 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p) 3138 = { FOREACH_I_LOCK(expand_func4, destroy) }; 3139 3140 // Differentiates *lock and *lock_with_checks. 3141 #define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3142 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3143 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3144 = { { FOREACH_I_LOCK(expand_func5, acquire) }, 3145 { FOREACH_I_LOCK(expand_func5c, acquire) } }; 3146 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3147 = { { FOREACH_I_LOCK(expand_func5, release) }, 3148 { FOREACH_I_LOCK(expand_func5c, release) } }; 3149 3150 #define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3151 #define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3152 static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3153 = { { FOREACH_I_LOCK(expand_func6, test) }, 3154 { FOREACH_I_LOCK(expand_func6c, test) } }; 3155 3156 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3157 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0; 3158 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0; 3159 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0; 3160 3161 // Lock index table. 3162 kmp_indirect_lock_t **__kmp_indirect_lock_table; 3163 kmp_lock_index_t __kmp_indirect_lock_table_size; 3164 kmp_lock_index_t __kmp_indirect_lock_table_next; 3165 3166 // Size of indirect locks. 3167 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = { 3168 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3169 #if KMP_USE_ADAPTIVE_LOCKS 3170 sizeof(kmp_adaptive_lock_t), 3171 #endif 3172 sizeof(kmp_drdpa_lock_t), 3173 sizeof(kmp_tas_lock_t), 3174 #if DYNA_HAS_FUTEX 3175 sizeof(kmp_futex_lock_t), 3176 #endif 3177 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3178 sizeof(kmp_drdpa_lock_t) 3179 }; 3180 3181 // Jump tables for lock accessor/modifier. 3182 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3183 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3184 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3185 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3186 3187 // Use different lock pools for different lock types. 3188 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 }; 3189 3190 // Inserts the given lock ptr to the lock table. 3191 kmp_lock_index_t 3192 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck) 3193 { 3194 kmp_lock_index_t next = __kmp_indirect_lock_table_next; 3195 // Check capacity and double the size if required 3196 if (next >= __kmp_indirect_lock_table_size) { 3197 kmp_lock_index_t i; 3198 kmp_lock_index_t size = __kmp_indirect_lock_table_size; 3199 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table; 3200 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *)); 3201 KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *)); 3202 __kmp_free(old_table); 3203 __kmp_indirect_lock_table_size = 2*next; 3204 } 3205 // Insert lck to the table and return the index. 3206 __kmp_indirect_lock_table[next] = lck; 3207 __kmp_indirect_lock_table_next++; 3208 return next; 3209 } 3210 3211 // User lock allocator for dynamically dispatched locks. 3212 kmp_indirect_lock_t * 3213 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3214 { 3215 kmp_indirect_lock_t *lck; 3216 kmp_lock_index_t idx; 3217 3218 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3219 3220 if (__kmp_indirect_lock_pool[tag] != NULL) { 3221 lck = __kmp_indirect_lock_pool[tag]; 3222 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3223 idx = lck->lock->pool.index; 3224 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3225 } else { 3226 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t)); 3227 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3228 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3229 idx = __kmp_insert_indirect_lock(lck); 3230 } 3231 3232 __kmp_release_lock(&__kmp_global_lock, gtid); 3233 3234 lck->type = tag; 3235 3236 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3237 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3238 } else { 3239 *((kmp_indirect_lock_t **)user_lock) = lck; 3240 } 3241 3242 return lck; 3243 } 3244 3245 // User lock lookup for dynamically dispatched locks. 3246 static __forceinline 3247 kmp_indirect_lock_t * 3248 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3249 { 3250 if (__kmp_env_consistency_check) { 3251 kmp_indirect_lock_t *lck = NULL; 3252 if (user_lock == NULL) { 3253 KMP_FATAL(LockIsUninitialized, func); 3254 } 3255 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3256 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock); 3257 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) { 3258 KMP_FATAL(LockIsUninitialized, func); 3259 } 3260 lck = __kmp_indirect_lock_table[idx]; 3261 } else { 3262 lck = *((kmp_indirect_lock_t **)user_lock); 3263 } 3264 if (lck == NULL) { 3265 KMP_FATAL(LockIsUninitialized, func); 3266 } 3267 return lck; 3268 } else { 3269 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3270 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)]; 3271 } else { 3272 return *((kmp_indirect_lock_t **)user_lock); 3273 } 3274 } 3275 } 3276 3277 static void 3278 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3279 { 3280 #if KMP_USE_ADAPTIVE_LOCKS 3281 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3282 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3283 seq = lockseq_queuing; 3284 } 3285 #endif 3286 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq); 3287 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3288 DYNA_I_LOCK_FUNC(l, init)(l->lock); 3289 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type)); 3290 } 3291 3292 static void 3293 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3294 { 3295 kmp_uint32 gtid = __kmp_entry_gtid(); 3296 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3297 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3298 kmp_indirect_locktag_t tag = l->type; 3299 3300 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3301 3302 // Use the base lock's space to keep the pool chain. 3303 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3304 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3305 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock); 3306 } 3307 __kmp_indirect_lock_pool[tag] = l; 3308 3309 __kmp_release_lock(&__kmp_global_lock, gtid); 3310 } 3311 3312 static void 3313 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3314 { 3315 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3316 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3317 } 3318 3319 static void 3320 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3321 { 3322 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3323 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3324 } 3325 3326 static int 3327 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3328 { 3329 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3330 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3331 } 3332 3333 static void 3334 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3335 { 3336 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3337 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3338 } 3339 3340 static void 3341 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3342 { 3343 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3344 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3345 } 3346 3347 static int 3348 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3349 { 3350 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3351 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3352 } 3353 3354 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3355 3356 // Initialize a hinted lock. 3357 void 3358 __kmp_init_lock_hinted(void **lock, int hint) 3359 { 3360 kmp_dyna_lockseq_t seq; 3361 switch (hint) { 3362 case kmp_lock_hint_uncontended: 3363 seq = lockseq_tas; 3364 break; 3365 case kmp_lock_hint_speculative: 3366 #if DYNA_HAS_HLE 3367 seq = lockseq_hle; 3368 #else 3369 seq = lockseq_tas; 3370 #endif 3371 break; 3372 case kmp_lock_hint_adaptive: 3373 #if KMP_USE_ADAPTIVE_LOCKS 3374 seq = lockseq_adaptive; 3375 #else 3376 seq = lockseq_queuing; 3377 #endif 3378 break; 3379 // Defaults to queuing locks. 3380 case kmp_lock_hint_contended: 3381 case kmp_lock_hint_nonspeculative: 3382 default: 3383 seq = lockseq_queuing; 3384 break; 3385 } 3386 if (DYNA_IS_D_LOCK(seq)) { 3387 DYNA_INIT_D_LOCK(lock, seq); 3388 #if USE_ITT_BUILD 3389 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 3390 #endif 3391 } else { 3392 DYNA_INIT_I_LOCK(lock, seq); 3393 #if USE_ITT_BUILD 3394 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3395 __kmp_itt_lock_creating(ilk->lock, NULL); 3396 #endif 3397 } 3398 } 3399 3400 // This is used only in kmp_error.c when consistency checking is on. 3401 kmp_int32 3402 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3403 { 3404 switch (seq) { 3405 case lockseq_tas: 3406 case lockseq_nested_tas: 3407 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3408 #if DYNA_HAS_FUTEX 3409 case lockseq_futex: 3410 case lockseq_nested_futex: 3411 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3412 #endif 3413 case lockseq_ticket: 3414 case lockseq_nested_ticket: 3415 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3416 case lockseq_queuing: 3417 case lockseq_nested_queuing: 3418 #if KMP_USE_ADAPTIVE_LOCKS 3419 case lockseq_adaptive: 3420 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3421 #endif 3422 case lockseq_drdpa: 3423 case lockseq_nested_drdpa: 3424 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3425 default: 3426 return 0; 3427 } 3428 } 3429 3430 // The value initialized from KMP_LOCK_KIND needs to be translated to its 3431 // nested version. 3432 void 3433 __kmp_init_nest_lock_hinted(void **lock, int hint) 3434 { 3435 kmp_dyna_lockseq_t seq; 3436 switch (hint) { 3437 case kmp_lock_hint_uncontended: 3438 seq = lockseq_nested_tas; 3439 break; 3440 // Defaults to queuing locks. 3441 case kmp_lock_hint_contended: 3442 case kmp_lock_hint_nonspeculative: 3443 default: 3444 seq = lockseq_nested_queuing; 3445 break; 3446 } 3447 DYNA_INIT_I_LOCK(lock, seq); 3448 #if USE_ITT_BUILD 3449 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3450 __kmp_itt_lock_creating(ilk->lock, NULL); 3451 #endif 3452 } 3453 3454 // Initializes the lock table for indirect locks. 3455 static void 3456 __kmp_init_indirect_lock_table() 3457 { 3458 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024); 3459 __kmp_indirect_lock_table_size = 1024; 3460 __kmp_indirect_lock_table_next = 0; 3461 } 3462 3463 #if KMP_USE_ADAPTIVE_LOCKS 3464 # define init_lock_func(table, expand) { \ 3465 table[locktag_ticket] = expand(ticket); \ 3466 table[locktag_queuing] = expand(queuing); \ 3467 table[locktag_adaptive] = expand(queuing); \ 3468 table[locktag_drdpa] = expand(drdpa); \ 3469 table[locktag_nested_ticket] = expand(ticket); \ 3470 table[locktag_nested_queuing] = expand(queuing); \ 3471 table[locktag_nested_drdpa] = expand(drdpa); \ 3472 } 3473 #else 3474 # define init_lock_func(table, expand) { \ 3475 table[locktag_ticket] = expand(ticket); \ 3476 table[locktag_queuing] = expand(queuing); \ 3477 table[locktag_drdpa] = expand(drdpa); \ 3478 table[locktag_nested_ticket] = expand(ticket); \ 3479 table[locktag_nested_queuing] = expand(queuing); \ 3480 table[locktag_nested_drdpa] = expand(drdpa); \ 3481 } 3482 #endif // KMP_USE_ADAPTIVE_LOCKS 3483 3484 // Initializes data for dynamic user locks. 3485 void 3486 __kmp_init_dynamic_user_locks() 3487 { 3488 // Initialize jump table location 3489 int offset = (__kmp_env_consistency_check)? 1: 0; 3490 __kmp_direct_set_ops = direct_set_tab[offset]; 3491 __kmp_direct_unset_ops = direct_unset_tab[offset]; 3492 __kmp_direct_test_ops = direct_test_tab[offset]; 3493 __kmp_indirect_set_ops = indirect_set_tab[offset]; 3494 __kmp_indirect_unset_ops = indirect_unset_tab[offset]; 3495 __kmp_indirect_test_ops = indirect_test_tab[offset]; 3496 __kmp_init_indirect_lock_table(); 3497 3498 // Initialize lock accessor/modifier 3499 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe. 3500 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3501 init_lock_func(__kmp_indirect_set_location, expand_func); 3502 #undef expand_func 3503 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3504 init_lock_func(__kmp_indirect_set_flags, expand_func); 3505 #undef expand_func 3506 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3507 init_lock_func(__kmp_indirect_get_location, expand_func); 3508 #undef expand_func 3509 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3510 init_lock_func(__kmp_indirect_get_flags, expand_func); 3511 #undef expand_func 3512 3513 __kmp_init_user_locks = TRUE; 3514 } 3515 3516 // Clean up the lock table. 3517 void 3518 __kmp_cleanup_indirect_user_locks() 3519 { 3520 kmp_lock_index_t i; 3521 int k; 3522 3523 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3524 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) { 3525 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3526 while (l != NULL) { 3527 kmp_indirect_lock_t *ll = l; 3528 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3529 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3530 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL; 3531 } 3532 __kmp_free(ll->lock); 3533 __kmp_free(ll); 3534 } 3535 } 3536 // Clean up the remaining undestroyed locks. 3537 for (i = 0; i < __kmp_indirect_lock_table_next; i++) { 3538 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i]; 3539 if (l != NULL) { 3540 // Locks not destroyed explicitly need to be destroyed here. 3541 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3542 __kmp_free(l->lock); 3543 __kmp_free(l); 3544 } 3545 } 3546 // Free the table 3547 __kmp_free(__kmp_indirect_lock_table); 3548 3549 __kmp_init_user_locks = FALSE; 3550 } 3551 3552 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3553 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3554 3555 #else // KMP_USE_DYNAMIC_LOCK 3556 3557 /* ------------------------------------------------------------------------ */ 3558 /* user locks 3559 * 3560 * They are implemented as a table of function pointers which are set to the 3561 * lock functions of the appropriate kind, once that has been determined. 3562 */ 3563 3564 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3565 3566 size_t __kmp_base_user_lock_size = 0; 3567 size_t __kmp_user_lock_size = 0; 3568 3569 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3570 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3571 3572 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3573 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3574 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3575 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3576 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3577 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3578 3579 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3580 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3581 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3582 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3583 3584 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3585 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3586 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3587 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3588 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3589 3590 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3591 { 3592 switch ( user_lock_kind ) { 3593 case lk_default: 3594 default: 3595 KMP_ASSERT( 0 ); 3596 3597 case lk_tas: { 3598 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3599 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3600 3601 __kmp_get_user_lock_owner_ = 3602 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3603 ( &__kmp_get_tas_lock_owner ); 3604 3605 if ( __kmp_env_consistency_check ) { 3606 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3607 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3608 } 3609 else { 3610 KMP_BIND_USER_LOCK(tas); 3611 KMP_BIND_NESTED_USER_LOCK(tas); 3612 } 3613 3614 __kmp_destroy_user_lock_ = 3615 ( void ( * )( kmp_user_lock_p ) ) 3616 ( &__kmp_destroy_tas_lock ); 3617 3618 __kmp_is_user_lock_initialized_ = 3619 ( int ( * )( kmp_user_lock_p ) ) NULL; 3620 3621 __kmp_get_user_lock_location_ = 3622 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3623 3624 __kmp_set_user_lock_location_ = 3625 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3626 3627 __kmp_get_user_lock_flags_ = 3628 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3629 3630 __kmp_set_user_lock_flags_ = 3631 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3632 } 3633 break; 3634 3635 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3636 3637 case lk_futex: { 3638 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3639 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3640 3641 __kmp_get_user_lock_owner_ = 3642 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3643 ( &__kmp_get_futex_lock_owner ); 3644 3645 if ( __kmp_env_consistency_check ) { 3646 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3647 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3648 } 3649 else { 3650 KMP_BIND_USER_LOCK(futex); 3651 KMP_BIND_NESTED_USER_LOCK(futex); 3652 } 3653 3654 __kmp_destroy_user_lock_ = 3655 ( void ( * )( kmp_user_lock_p ) ) 3656 ( &__kmp_destroy_futex_lock ); 3657 3658 __kmp_is_user_lock_initialized_ = 3659 ( int ( * )( kmp_user_lock_p ) ) NULL; 3660 3661 __kmp_get_user_lock_location_ = 3662 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3663 3664 __kmp_set_user_lock_location_ = 3665 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3666 3667 __kmp_get_user_lock_flags_ = 3668 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3669 3670 __kmp_set_user_lock_flags_ = 3671 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3672 } 3673 break; 3674 3675 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3676 3677 case lk_ticket: { 3678 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3679 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3680 3681 __kmp_get_user_lock_owner_ = 3682 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3683 ( &__kmp_get_ticket_lock_owner ); 3684 3685 if ( __kmp_env_consistency_check ) { 3686 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3687 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3688 } 3689 else { 3690 KMP_BIND_USER_LOCK(ticket); 3691 KMP_BIND_NESTED_USER_LOCK(ticket); 3692 } 3693 3694 __kmp_destroy_user_lock_ = 3695 ( void ( * )( kmp_user_lock_p ) ) 3696 ( &__kmp_destroy_ticket_lock ); 3697 3698 __kmp_is_user_lock_initialized_ = 3699 ( int ( * )( kmp_user_lock_p ) ) 3700 ( &__kmp_is_ticket_lock_initialized ); 3701 3702 __kmp_get_user_lock_location_ = 3703 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3704 ( &__kmp_get_ticket_lock_location ); 3705 3706 __kmp_set_user_lock_location_ = 3707 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3708 ( &__kmp_set_ticket_lock_location ); 3709 3710 __kmp_get_user_lock_flags_ = 3711 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3712 ( &__kmp_get_ticket_lock_flags ); 3713 3714 __kmp_set_user_lock_flags_ = 3715 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3716 ( &__kmp_set_ticket_lock_flags ); 3717 } 3718 break; 3719 3720 case lk_queuing: { 3721 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3722 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3723 3724 __kmp_get_user_lock_owner_ = 3725 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3726 ( &__kmp_get_queuing_lock_owner ); 3727 3728 if ( __kmp_env_consistency_check ) { 3729 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3730 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3731 } 3732 else { 3733 KMP_BIND_USER_LOCK(queuing); 3734 KMP_BIND_NESTED_USER_LOCK(queuing); 3735 } 3736 3737 __kmp_destroy_user_lock_ = 3738 ( void ( * )( kmp_user_lock_p ) ) 3739 ( &__kmp_destroy_queuing_lock ); 3740 3741 __kmp_is_user_lock_initialized_ = 3742 ( int ( * )( kmp_user_lock_p ) ) 3743 ( &__kmp_is_queuing_lock_initialized ); 3744 3745 __kmp_get_user_lock_location_ = 3746 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3747 ( &__kmp_get_queuing_lock_location ); 3748 3749 __kmp_set_user_lock_location_ = 3750 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3751 ( &__kmp_set_queuing_lock_location ); 3752 3753 __kmp_get_user_lock_flags_ = 3754 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3755 ( &__kmp_get_queuing_lock_flags ); 3756 3757 __kmp_set_user_lock_flags_ = 3758 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3759 ( &__kmp_set_queuing_lock_flags ); 3760 } 3761 break; 3762 3763 #if KMP_USE_ADAPTIVE_LOCKS 3764 case lk_adaptive: { 3765 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3766 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3767 3768 __kmp_get_user_lock_owner_ = 3769 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3770 ( &__kmp_get_queuing_lock_owner ); 3771 3772 if ( __kmp_env_consistency_check ) { 3773 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3774 } 3775 else { 3776 KMP_BIND_USER_LOCK(adaptive); 3777 } 3778 3779 __kmp_destroy_user_lock_ = 3780 ( void ( * )( kmp_user_lock_p ) ) 3781 ( &__kmp_destroy_adaptive_lock ); 3782 3783 __kmp_is_user_lock_initialized_ = 3784 ( int ( * )( kmp_user_lock_p ) ) 3785 ( &__kmp_is_queuing_lock_initialized ); 3786 3787 __kmp_get_user_lock_location_ = 3788 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3789 ( &__kmp_get_queuing_lock_location ); 3790 3791 __kmp_set_user_lock_location_ = 3792 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3793 ( &__kmp_set_queuing_lock_location ); 3794 3795 __kmp_get_user_lock_flags_ = 3796 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3797 ( &__kmp_get_queuing_lock_flags ); 3798 3799 __kmp_set_user_lock_flags_ = 3800 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3801 ( &__kmp_set_queuing_lock_flags ); 3802 3803 } 3804 break; 3805 #endif // KMP_USE_ADAPTIVE_LOCKS 3806 3807 case lk_drdpa: { 3808 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3809 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3810 3811 __kmp_get_user_lock_owner_ = 3812 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3813 ( &__kmp_get_drdpa_lock_owner ); 3814 3815 if ( __kmp_env_consistency_check ) { 3816 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3817 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3818 } 3819 else { 3820 KMP_BIND_USER_LOCK(drdpa); 3821 KMP_BIND_NESTED_USER_LOCK(drdpa); 3822 } 3823 3824 __kmp_destroy_user_lock_ = 3825 ( void ( * )( kmp_user_lock_p ) ) 3826 ( &__kmp_destroy_drdpa_lock ); 3827 3828 __kmp_is_user_lock_initialized_ = 3829 ( int ( * )( kmp_user_lock_p ) ) 3830 ( &__kmp_is_drdpa_lock_initialized ); 3831 3832 __kmp_get_user_lock_location_ = 3833 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3834 ( &__kmp_get_drdpa_lock_location ); 3835 3836 __kmp_set_user_lock_location_ = 3837 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3838 ( &__kmp_set_drdpa_lock_location ); 3839 3840 __kmp_get_user_lock_flags_ = 3841 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3842 ( &__kmp_get_drdpa_lock_flags ); 3843 3844 __kmp_set_user_lock_flags_ = 3845 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3846 ( &__kmp_set_drdpa_lock_flags ); 3847 } 3848 break; 3849 } 3850 } 3851 3852 3853 // ---------------------------------------------------------------------------- 3854 // User lock table & lock allocation 3855 3856 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3857 kmp_user_lock_p __kmp_lock_pool = NULL; 3858 3859 // Lock block-allocation support. 3860 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3861 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3862 3863 static kmp_lock_index_t 3864 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3865 { 3866 // Assume that kmp_global_lock is held upon entry/exit. 3867 kmp_lock_index_t index; 3868 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3869 kmp_lock_index_t size; 3870 kmp_user_lock_p *table; 3871 // Reallocate lock table. 3872 if ( __kmp_user_lock_table.allocated == 0 ) { 3873 size = 1024; 3874 } 3875 else { 3876 size = __kmp_user_lock_table.allocated * 2; 3877 } 3878 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3879 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3880 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3881 // We cannot free the previous table now, since it may be in use by other 3882 // threads. So save the pointer to the previous table in in the first element of the 3883 // new table. All the tables will be organized into a list, and could be freed when 3884 // library shutting down. 3885 __kmp_user_lock_table.table = table; 3886 __kmp_user_lock_table.allocated = size; 3887 } 3888 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3889 index = __kmp_user_lock_table.used; 3890 __kmp_user_lock_table.table[ index ] = lck; 3891 ++ __kmp_user_lock_table.used; 3892 return index; 3893 } 3894 3895 static kmp_user_lock_p 3896 __kmp_lock_block_allocate() 3897 { 3898 // Assume that kmp_global_lock is held upon entry/exit. 3899 static int last_index = 0; 3900 if ( ( last_index >= __kmp_num_locks_in_block ) 3901 || ( __kmp_lock_blocks == NULL ) ) { 3902 // Restart the index. 3903 last_index = 0; 3904 // Need to allocate a new block. 3905 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3906 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3907 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3908 // Set up the new block. 3909 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3910 new_block->next_block = __kmp_lock_blocks; 3911 new_block->locks = (void *)buffer; 3912 // Publish the new block. 3913 KMP_MB(); 3914 __kmp_lock_blocks = new_block; 3915 } 3916 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3917 [ last_index * __kmp_user_lock_size ] ) ); 3918 last_index++; 3919 return ret; 3920 } 3921 3922 // 3923 // Get memory for a lock. It may be freshly allocated memory or reused memory 3924 // from lock pool. 3925 // 3926 kmp_user_lock_p 3927 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3928 kmp_lock_flags_t flags ) 3929 { 3930 kmp_user_lock_p lck; 3931 kmp_lock_index_t index; 3932 KMP_DEBUG_ASSERT( user_lock ); 3933 3934 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3935 3936 if ( __kmp_lock_pool == NULL ) { 3937 // Lock pool is empty. Allocate new memory. 3938 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3939 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3940 } 3941 else { 3942 lck = __kmp_lock_block_allocate(); 3943 } 3944 3945 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3946 // and debugger has info on all allocated locks. 3947 index = __kmp_lock_table_insert( lck ); 3948 } 3949 else { 3950 // Pick up lock from pool. 3951 lck = __kmp_lock_pool; 3952 index = __kmp_lock_pool->pool.index; 3953 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3954 } 3955 3956 // 3957 // We could potentially differentiate between nested and regular locks 3958 // here, and do the lock table lookup for regular locks only. 3959 // 3960 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3961 * ( (kmp_lock_index_t *) user_lock ) = index; 3962 } 3963 else { 3964 * ( (kmp_user_lock_p *) user_lock ) = lck; 3965 } 3966 3967 // mark the lock if it is critical section lock. 3968 __kmp_set_user_lock_flags( lck, flags ); 3969 3970 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 3971 3972 return lck; 3973 } 3974 3975 // Put lock's memory to pool for reusing. 3976 void 3977 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 3978 { 3979 KMP_DEBUG_ASSERT( user_lock != NULL ); 3980 KMP_DEBUG_ASSERT( lck != NULL ); 3981 3982 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 3983 3984 lck->pool.next = __kmp_lock_pool; 3985 __kmp_lock_pool = lck; 3986 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3987 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 3988 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 3989 lck->pool.index = index; 3990 } 3991 3992 __kmp_release_lock( & __kmp_global_lock, gtid ); 3993 } 3994 3995 kmp_user_lock_p 3996 __kmp_lookup_user_lock( void **user_lock, char const *func ) 3997 { 3998 kmp_user_lock_p lck = NULL; 3999 4000 if ( __kmp_env_consistency_check ) { 4001 if ( user_lock == NULL ) { 4002 KMP_FATAL( LockIsUninitialized, func ); 4003 } 4004 } 4005 4006 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4007 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 4008 if ( __kmp_env_consistency_check ) { 4009 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 4010 KMP_FATAL( LockIsUninitialized, func ); 4011 } 4012 } 4013 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 4014 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4015 lck = __kmp_user_lock_table.table[index]; 4016 } 4017 else { 4018 lck = *( (kmp_user_lock_p *)user_lock ); 4019 } 4020 4021 if ( __kmp_env_consistency_check ) { 4022 if ( lck == NULL ) { 4023 KMP_FATAL( LockIsUninitialized, func ); 4024 } 4025 } 4026 4027 return lck; 4028 } 4029 4030 void 4031 __kmp_cleanup_user_locks( void ) 4032 { 4033 // 4034 // Reset lock pool. Do not worry about lock in the pool -- we will free 4035 // them when iterating through lock table (it includes all the locks, 4036 // dead or alive). 4037 // 4038 __kmp_lock_pool = NULL; 4039 4040 #define IS_CRITICAL(lck) \ 4041 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 4042 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 4043 4044 // 4045 // Loop through lock table, free all locks. 4046 // 4047 // Do not free item [0], it is reserved for lock tables list. 4048 // 4049 // FIXME - we are iterating through a list of (pointers to) objects of 4050 // type union kmp_user_lock, but we have no way of knowing whether the 4051 // base type is currently "pool" or whatever the global user lock type 4052 // is. 4053 // 4054 // We are relying on the fact that for all of the user lock types 4055 // (except "tas"), the first field in the lock struct is the "initialized" 4056 // field, which is set to the address of the lock object itself when 4057 // the lock is initialized. When the union is of type "pool", the 4058 // first field is a pointer to the next object in the free list, which 4059 // will not be the same address as the object itself. 4060 // 4061 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4062 // will fail for "pool" objects on the free list. This must happen as 4063 // the "location" field of real user locks overlaps the "index" field 4064 // of "pool" objects. 4065 // 4066 // It would be better to run through the free list, and remove all "pool" 4067 // objects from the lock table before executing this loop. However, 4068 // "pool" objects do not always have their index field set (only on 4069 // lin_32e), and I don't want to search the lock table for the address 4070 // of every "pool" object on the free list. 4071 // 4072 while ( __kmp_user_lock_table.used > 1 ) { 4073 const ident *loc; 4074 4075 // 4076 // reduce __kmp_user_lock_table.used before freeing the lock, 4077 // so that state of locks is consistent 4078 // 4079 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4080 --__kmp_user_lock_table.used ]; 4081 4082 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4083 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4084 // 4085 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4086 // initialized AND it is NOT a critical section (user is not 4087 // responsible for destroying criticals) AND we know source 4088 // location to report. 4089 // 4090 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4091 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4092 ( loc->psource != NULL ) ) { 4093 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4094 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4095 __kmp_str_loc_free( &str_loc); 4096 } 4097 4098 #ifdef KMP_DEBUG 4099 if ( IS_CRITICAL( lck ) ) { 4100 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4101 } 4102 else { 4103 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4104 } 4105 #endif // KMP_DEBUG 4106 4107 // 4108 // Cleanup internal lock dynamic resources 4109 // (for drdpa locks particularly). 4110 // 4111 __kmp_destroy_user_lock( lck ); 4112 } 4113 4114 // 4115 // Free the lock if block allocation of locks is not used. 4116 // 4117 if ( __kmp_lock_blocks == NULL ) { 4118 __kmp_free( lck ); 4119 } 4120 } 4121 4122 #undef IS_CRITICAL 4123 4124 // 4125 // delete lock table(s). 4126 // 4127 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4128 __kmp_user_lock_table.table = NULL; 4129 __kmp_user_lock_table.allocated = 0; 4130 4131 while ( table_ptr != NULL ) { 4132 // 4133 // In the first element we saved the pointer to the previous 4134 // (smaller) lock table. 4135 // 4136 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4137 __kmp_free( table_ptr ); 4138 table_ptr = next; 4139 } 4140 4141 // 4142 // Free buffers allocated for blocks of locks. 4143 // 4144 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4145 __kmp_lock_blocks = NULL; 4146 4147 while ( block_ptr != NULL ) { 4148 kmp_block_of_locks_t *next = block_ptr->next_block; 4149 __kmp_free( block_ptr->locks ); 4150 // 4151 // *block_ptr itself was allocated at the end of the locks vector. 4152 // 4153 block_ptr = next; 4154 } 4155 4156 TCW_4(__kmp_init_user_locks, FALSE); 4157 } 4158 4159 #endif // KMP_USE_DYNAMIC_LOCK 4160