1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 18 #include "kmp.h" 19 #include "kmp_itt.h" 20 #include "kmp_i18n.h" 21 #include "kmp_lock.h" 22 #include "kmp_io.h" 23 24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 25 # include <unistd.h> 26 # include <sys/syscall.h> 27 // We should really include <futex.h>, but that causes compatibility problems on different 28 // Linux* OS distributions that either require that you include (or break when you try to include) 29 // <pci/types.h>. 30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 31 // we just define the constants here and don't include <futex.h> 32 # ifndef FUTEX_WAIT 33 # define FUTEX_WAIT 0 34 # endif 35 # ifndef FUTEX_WAKE 36 # define FUTEX_WAKE 1 37 # endif 38 #endif 39 40 /* Implement spin locks for internal library use. */ 41 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 42 43 void 44 __kmp_validate_locks( void ) 45 { 46 int i; 47 kmp_uint32 x, y; 48 49 /* Check to make sure unsigned arithmetic does wraps properly */ 50 x = ~((kmp_uint32) 0) - 2; 51 y = x - 2; 52 53 for (i = 0; i < 8; ++i, ++x, ++y) { 54 kmp_uint32 z = (x - y); 55 KMP_ASSERT( z == 2 ); 56 } 57 58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 59 } 60 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // 66 // For the non-nested locks, we can only assume that the first 4 bytes were 67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 68 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 70 // 71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 73 // 74 75 static kmp_int32 76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 77 { 78 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 79 } 80 81 static inline bool 82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 83 { 84 return lck->lk.depth_locked != -1; 85 } 86 87 __forceinline static void 88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 89 { 90 KMP_MB(); 91 92 #ifdef USE_LOCK_PROFILE 93 kmp_uint32 curr = TCR_4( lck->lk.poll ); 94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 95 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 96 /* else __kmp_printf( "." );*/ 97 #endif /* USE_LOCK_PROFILE */ 98 99 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 101 KMP_FSYNC_ACQUIRED(lck); 102 return; 103 } 104 105 kmp_uint32 spins; 106 KMP_FSYNC_PREPARE( lck ); 107 KMP_INIT_YIELD( spins ); 108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 109 __kmp_xproc ) ) { 110 KMP_YIELD( TRUE ); 111 } 112 else { 113 KMP_YIELD_SPIN( spins ); 114 } 115 116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) || 117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) { 118 // 119 // FIXME - use exponential backoff here 120 // 121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 122 __kmp_xproc ) ) { 123 KMP_YIELD( TRUE ); 124 } 125 else { 126 KMP_YIELD_SPIN( spins ); 127 } 128 } 129 KMP_FSYNC_ACQUIRED( lck ); 130 } 131 132 void 133 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 134 { 135 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 136 } 137 138 static void 139 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 140 { 141 char const * const func = "omp_set_lock"; 142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 143 && __kmp_is_tas_lock_nestable( lck ) ) { 144 KMP_FATAL( LockNestableUsedAsSimple, func ); 145 } 146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 147 KMP_FATAL( LockIsAlreadyOwned, func ); 148 } 149 __kmp_acquire_tas_lock( lck, gtid ); 150 } 151 152 int 153 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 154 { 155 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 157 KMP_FSYNC_ACQUIRED( lck ); 158 return TRUE; 159 } 160 return FALSE; 161 } 162 163 static int 164 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 165 { 166 char const * const func = "omp_test_lock"; 167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 168 && __kmp_is_tas_lock_nestable( lck ) ) { 169 KMP_FATAL( LockNestableUsedAsSimple, func ); 170 } 171 return __kmp_test_tas_lock( lck, gtid ); 172 } 173 174 int 175 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 176 { 177 KMP_MB(); /* Flush all pending memory write invalidates. */ 178 179 KMP_FSYNC_RELEASING(lck); 180 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) ); 181 KMP_MB(); /* Flush all pending memory write invalidates. */ 182 183 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 184 __kmp_xproc ) ); 185 return KMP_LOCK_RELEASED; 186 } 187 188 static int 189 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 190 { 191 char const * const func = "omp_unset_lock"; 192 KMP_MB(); /* in case another processor initialized lock */ 193 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 194 && __kmp_is_tas_lock_nestable( lck ) ) { 195 KMP_FATAL( LockNestableUsedAsSimple, func ); 196 } 197 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 198 KMP_FATAL( LockUnsettingFree, func ); 199 } 200 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 201 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 202 KMP_FATAL( LockUnsettingSetByAnother, func ); 203 } 204 return __kmp_release_tas_lock( lck, gtid ); 205 } 206 207 void 208 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 209 { 210 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) ); 211 } 212 213 static void 214 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 215 { 216 __kmp_init_tas_lock( lck ); 217 } 218 219 void 220 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 221 { 222 lck->lk.poll = 0; 223 } 224 225 static void 226 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 227 { 228 char const * const func = "omp_destroy_lock"; 229 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 230 && __kmp_is_tas_lock_nestable( lck ) ) { 231 KMP_FATAL( LockNestableUsedAsSimple, func ); 232 } 233 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 234 KMP_FATAL( LockStillOwned, func ); 235 } 236 __kmp_destroy_tas_lock( lck ); 237 } 238 239 240 // 241 // nested test and set locks 242 // 243 244 void 245 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 246 { 247 KMP_DEBUG_ASSERT( gtid >= 0 ); 248 249 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 250 lck->lk.depth_locked += 1; 251 } 252 else { 253 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 254 lck->lk.depth_locked = 1; 255 } 256 } 257 258 static void 259 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 260 { 261 char const * const func = "omp_set_nest_lock"; 262 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 263 KMP_FATAL( LockSimpleUsedAsNestable, func ); 264 } 265 __kmp_acquire_nested_tas_lock( lck, gtid ); 266 } 267 268 int 269 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 270 { 271 int retval; 272 273 KMP_DEBUG_ASSERT( gtid >= 0 ); 274 275 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 276 retval = ++lck->lk.depth_locked; 277 } 278 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 279 retval = 0; 280 } 281 else { 282 KMP_MB(); 283 retval = lck->lk.depth_locked = 1; 284 } 285 return retval; 286 } 287 288 static int 289 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 290 { 291 char const * const func = "omp_test_nest_lock"; 292 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 293 KMP_FATAL( LockSimpleUsedAsNestable, func ); 294 } 295 return __kmp_test_nested_tas_lock( lck, gtid ); 296 } 297 298 int 299 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 300 { 301 KMP_DEBUG_ASSERT( gtid >= 0 ); 302 303 KMP_MB(); 304 if ( --(lck->lk.depth_locked) == 0 ) { 305 __kmp_release_tas_lock( lck, gtid ); 306 return KMP_LOCK_RELEASED; 307 } 308 return KMP_LOCK_STILL_HELD; 309 } 310 311 static int 312 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 313 { 314 char const * const func = "omp_unset_nest_lock"; 315 KMP_MB(); /* in case another processor initialized lock */ 316 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 317 KMP_FATAL( LockSimpleUsedAsNestable, func ); 318 } 319 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 320 KMP_FATAL( LockUnsettingFree, func ); 321 } 322 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 323 KMP_FATAL( LockUnsettingSetByAnother, func ); 324 } 325 return __kmp_release_nested_tas_lock( lck, gtid ); 326 } 327 328 void 329 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 330 { 331 __kmp_init_tas_lock( lck ); 332 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 333 } 334 335 static void 336 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 337 { 338 __kmp_init_nested_tas_lock( lck ); 339 } 340 341 void 342 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 343 { 344 __kmp_destroy_tas_lock( lck ); 345 lck->lk.depth_locked = 0; 346 } 347 348 static void 349 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 350 { 351 char const * const func = "omp_destroy_nest_lock"; 352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 353 KMP_FATAL( LockSimpleUsedAsNestable, func ); 354 } 355 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 356 KMP_FATAL( LockStillOwned, func ); 357 } 358 __kmp_destroy_nested_tas_lock( lck ); 359 } 360 361 362 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 363 364 /* ------------------------------------------------------------------------ */ 365 /* futex locks */ 366 367 // futex locks are really just test and set locks, with a different method 368 // of handling contention. They take the same amount of space as test and 369 // set locks, and are allocated the same way (i.e. use the area allocated by 370 // the compiler for non-nested locks / allocate nested locks on the heap). 371 372 static kmp_int32 373 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 374 { 375 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 376 } 377 378 static inline bool 379 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 380 { 381 return lck->lk.depth_locked != -1; 382 } 383 384 __forceinline static void 385 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 386 { 387 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 388 389 KMP_MB(); 390 391 #ifdef USE_LOCK_PROFILE 392 kmp_uint32 curr = TCR_4( lck->lk.poll ); 393 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 394 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 395 /* else __kmp_printf( "." );*/ 396 #endif /* USE_LOCK_PROFILE */ 397 398 KMP_FSYNC_PREPARE( lck ); 399 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 400 lck, lck->lk.poll, gtid ) ); 401 402 kmp_int32 poll_val; 403 404 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), 405 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) { 406 407 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; 408 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 409 lck, gtid, poll_val, cond ) ); 410 411 // 412 // NOTE: if you try to use the following condition for this branch 413 // 414 // if ( poll_val & 1 == 0 ) 415 // 416 // Then the 12.0 compiler has a bug where the following block will 417 // always be skipped, regardless of the value of the LSB of poll_val. 418 // 419 if ( ! cond ) { 420 // 421 // Try to set the lsb in the poll to indicate to the owner 422 // thread that they need to wake this thread up. 423 // 424 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) { 425 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 426 lck, lck->lk.poll, gtid ) ); 427 continue; 428 } 429 poll_val |= DYNA_LOCK_BUSY(1, futex); 430 431 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 432 lck, lck->lk.poll, gtid ) ); 433 } 434 435 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 436 lck, gtid, poll_val ) ); 437 438 kmp_int32 rc; 439 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 440 poll_val, NULL, NULL, 0 ) ) != 0 ) { 441 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 442 lck, gtid, poll_val, rc, errno ) ); 443 continue; 444 } 445 446 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 447 lck, gtid, poll_val ) ); 448 // 449 // This thread has now done a successful futex wait call and was 450 // entered on the OS futex queue. We must now perform a futex 451 // wake call when releasing the lock, as we have no idea how many 452 // other threads are in the queue. 453 // 454 gtid_code |= 1; 455 } 456 457 KMP_FSYNC_ACQUIRED( lck ); 458 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 459 lck, lck->lk.poll, gtid ) ); 460 } 461 462 void 463 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 464 { 465 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 466 } 467 468 static void 469 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 470 { 471 char const * const func = "omp_set_lock"; 472 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 473 && __kmp_is_futex_lock_nestable( lck ) ) { 474 KMP_FATAL( LockNestableUsedAsSimple, func ); 475 } 476 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 477 KMP_FATAL( LockIsAlreadyOwned, func ); 478 } 479 __kmp_acquire_futex_lock( lck, gtid ); 480 } 481 482 int 483 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 484 { 485 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) { 486 KMP_FSYNC_ACQUIRED( lck ); 487 return TRUE; 488 } 489 return FALSE; 490 } 491 492 static int 493 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 494 { 495 char const * const func = "omp_test_lock"; 496 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 497 && __kmp_is_futex_lock_nestable( lck ) ) { 498 KMP_FATAL( LockNestableUsedAsSimple, func ); 499 } 500 return __kmp_test_futex_lock( lck, gtid ); 501 } 502 503 int 504 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 505 { 506 KMP_MB(); /* Flush all pending memory write invalidates. */ 507 508 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 509 lck, lck->lk.poll, gtid ) ); 510 511 KMP_FSYNC_RELEASING(lck); 512 513 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) ); 514 515 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 516 lck, gtid, poll_val ) ); 517 518 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) { 519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 520 lck, gtid ) ); 521 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 522 } 523 524 KMP_MB(); /* Flush all pending memory write invalidates. */ 525 526 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 527 lck, lck->lk.poll, gtid ) ); 528 529 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 530 __kmp_xproc ) ); 531 return KMP_LOCK_RELEASED; 532 } 533 534 static int 535 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 536 { 537 char const * const func = "omp_unset_lock"; 538 KMP_MB(); /* in case another processor initialized lock */ 539 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 540 && __kmp_is_futex_lock_nestable( lck ) ) { 541 KMP_FATAL( LockNestableUsedAsSimple, func ); 542 } 543 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 544 KMP_FATAL( LockUnsettingFree, func ); 545 } 546 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 547 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 548 KMP_FATAL( LockUnsettingSetByAnother, func ); 549 } 550 return __kmp_release_futex_lock( lck, gtid ); 551 } 552 553 void 554 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 555 { 556 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) ); 557 } 558 559 static void 560 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 561 { 562 __kmp_init_futex_lock( lck ); 563 } 564 565 void 566 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 567 { 568 lck->lk.poll = 0; 569 } 570 571 static void 572 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 573 { 574 char const * const func = "omp_destroy_lock"; 575 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 576 && __kmp_is_futex_lock_nestable( lck ) ) { 577 KMP_FATAL( LockNestableUsedAsSimple, func ); 578 } 579 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 580 KMP_FATAL( LockStillOwned, func ); 581 } 582 __kmp_destroy_futex_lock( lck ); 583 } 584 585 586 // 587 // nested futex locks 588 // 589 590 void 591 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 592 { 593 KMP_DEBUG_ASSERT( gtid >= 0 ); 594 595 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 596 lck->lk.depth_locked += 1; 597 } 598 else { 599 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 600 lck->lk.depth_locked = 1; 601 } 602 } 603 604 static void 605 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 606 { 607 char const * const func = "omp_set_nest_lock"; 608 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 609 KMP_FATAL( LockSimpleUsedAsNestable, func ); 610 } 611 __kmp_acquire_nested_futex_lock( lck, gtid ); 612 } 613 614 int 615 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 616 { 617 int retval; 618 619 KMP_DEBUG_ASSERT( gtid >= 0 ); 620 621 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 622 retval = ++lck->lk.depth_locked; 623 } 624 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 625 retval = 0; 626 } 627 else { 628 KMP_MB(); 629 retval = lck->lk.depth_locked = 1; 630 } 631 return retval; 632 } 633 634 static int 635 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 636 { 637 char const * const func = "omp_test_nest_lock"; 638 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 639 KMP_FATAL( LockSimpleUsedAsNestable, func ); 640 } 641 return __kmp_test_nested_futex_lock( lck, gtid ); 642 } 643 644 int 645 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 646 { 647 KMP_DEBUG_ASSERT( gtid >= 0 ); 648 649 KMP_MB(); 650 if ( --(lck->lk.depth_locked) == 0 ) { 651 __kmp_release_futex_lock( lck, gtid ); 652 return KMP_LOCK_RELEASED; 653 } 654 return KMP_LOCK_STILL_HELD; 655 } 656 657 static int 658 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 659 { 660 char const * const func = "omp_unset_nest_lock"; 661 KMP_MB(); /* in case another processor initialized lock */ 662 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 663 KMP_FATAL( LockSimpleUsedAsNestable, func ); 664 } 665 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 666 KMP_FATAL( LockUnsettingFree, func ); 667 } 668 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 669 KMP_FATAL( LockUnsettingSetByAnother, func ); 670 } 671 return __kmp_release_nested_futex_lock( lck, gtid ); 672 } 673 674 void 675 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 676 { 677 __kmp_init_futex_lock( lck ); 678 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 679 } 680 681 static void 682 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 683 { 684 __kmp_init_nested_futex_lock( lck ); 685 } 686 687 void 688 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 689 { 690 __kmp_destroy_futex_lock( lck ); 691 lck->lk.depth_locked = 0; 692 } 693 694 static void 695 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 696 { 697 char const * const func = "omp_destroy_nest_lock"; 698 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 699 KMP_FATAL( LockSimpleUsedAsNestable, func ); 700 } 701 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 702 KMP_FATAL( LockStillOwned, func ); 703 } 704 __kmp_destroy_nested_futex_lock( lck ); 705 } 706 707 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 708 709 710 /* ------------------------------------------------------------------------ */ 711 /* ticket (bakery) locks */ 712 713 static kmp_int32 714 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 715 { 716 return TCR_4( lck->lk.owner_id ) - 1; 717 } 718 719 static inline bool 720 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 721 { 722 return lck->lk.depth_locked != -1; 723 } 724 725 static kmp_uint32 726 __kmp_bakery_check(kmp_uint value, kmp_uint checker) 727 { 728 register kmp_uint32 pause; 729 730 if (value == checker) { 731 return TRUE; 732 } 733 for (pause = checker - value; pause != 0; --pause); 734 return FALSE; 735 } 736 737 __forceinline static void 738 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 739 { 740 kmp_uint32 my_ticket; 741 KMP_MB(); 742 743 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 744 745 #ifdef USE_LOCK_PROFILE 746 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 747 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 748 /* else __kmp_printf( "." );*/ 749 #endif /* USE_LOCK_PROFILE */ 750 751 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 752 KMP_FSYNC_ACQUIRED(lck); 753 return; 754 } 755 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 756 KMP_FSYNC_ACQUIRED(lck); 757 } 758 759 void 760 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 761 { 762 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 763 } 764 765 static void 766 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 767 { 768 char const * const func = "omp_set_lock"; 769 if ( lck->lk.initialized != lck ) { 770 KMP_FATAL( LockIsUninitialized, func ); 771 } 772 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 773 KMP_FATAL( LockNestableUsedAsSimple, func ); 774 } 775 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 776 KMP_FATAL( LockIsAlreadyOwned, func ); 777 } 778 779 __kmp_acquire_ticket_lock( lck, gtid ); 780 781 lck->lk.owner_id = gtid + 1; 782 } 783 784 int 785 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 786 { 787 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 788 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 789 kmp_uint32 next_ticket = my_ticket + 1; 790 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 791 my_ticket, next_ticket ) ) { 792 KMP_FSYNC_ACQUIRED( lck ); 793 return TRUE; 794 } 795 } 796 return FALSE; 797 } 798 799 static int 800 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 801 { 802 char const * const func = "omp_test_lock"; 803 if ( lck->lk.initialized != lck ) { 804 KMP_FATAL( LockIsUninitialized, func ); 805 } 806 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 807 KMP_FATAL( LockNestableUsedAsSimple, func ); 808 } 809 810 int retval = __kmp_test_ticket_lock( lck, gtid ); 811 812 if ( retval ) { 813 lck->lk.owner_id = gtid + 1; 814 } 815 return retval; 816 } 817 818 int 819 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 820 { 821 kmp_uint32 distance; 822 823 KMP_MB(); /* Flush all pending memory write invalidates. */ 824 825 KMP_FSYNC_RELEASING(lck); 826 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 827 828 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 829 830 KMP_MB(); /* Flush all pending memory write invalidates. */ 831 832 KMP_YIELD( distance 833 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 834 return KMP_LOCK_RELEASED; 835 } 836 837 static int 838 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 839 { 840 char const * const func = "omp_unset_lock"; 841 KMP_MB(); /* in case another processor initialized lock */ 842 if ( lck->lk.initialized != lck ) { 843 KMP_FATAL( LockIsUninitialized, func ); 844 } 845 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 846 KMP_FATAL( LockNestableUsedAsSimple, func ); 847 } 848 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 849 KMP_FATAL( LockUnsettingFree, func ); 850 } 851 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 852 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 853 KMP_FATAL( LockUnsettingSetByAnother, func ); 854 } 855 lck->lk.owner_id = 0; 856 return __kmp_release_ticket_lock( lck, gtid ); 857 } 858 859 void 860 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 861 { 862 lck->lk.location = NULL; 863 TCW_4( lck->lk.next_ticket, 0 ); 864 TCW_4( lck->lk.now_serving, 0 ); 865 lck->lk.owner_id = 0; // no thread owns the lock. 866 lck->lk.depth_locked = -1; // -1 => not a nested lock. 867 lck->lk.initialized = (kmp_ticket_lock *)lck; 868 } 869 870 static void 871 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 872 { 873 __kmp_init_ticket_lock( lck ); 874 } 875 876 void 877 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 878 { 879 lck->lk.initialized = NULL; 880 lck->lk.location = NULL; 881 lck->lk.next_ticket = 0; 882 lck->lk.now_serving = 0; 883 lck->lk.owner_id = 0; 884 lck->lk.depth_locked = -1; 885 } 886 887 static void 888 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 889 { 890 char const * const func = "omp_destroy_lock"; 891 if ( lck->lk.initialized != lck ) { 892 KMP_FATAL( LockIsUninitialized, func ); 893 } 894 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 895 KMP_FATAL( LockNestableUsedAsSimple, func ); 896 } 897 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 898 KMP_FATAL( LockStillOwned, func ); 899 } 900 __kmp_destroy_ticket_lock( lck ); 901 } 902 903 904 // 905 // nested ticket locks 906 // 907 908 void 909 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 910 { 911 KMP_DEBUG_ASSERT( gtid >= 0 ); 912 913 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 914 lck->lk.depth_locked += 1; 915 } 916 else { 917 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 918 KMP_MB(); 919 lck->lk.depth_locked = 1; 920 KMP_MB(); 921 lck->lk.owner_id = gtid + 1; 922 } 923 } 924 925 static void 926 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 927 { 928 char const * const func = "omp_set_nest_lock"; 929 if ( lck->lk.initialized != lck ) { 930 KMP_FATAL( LockIsUninitialized, func ); 931 } 932 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 933 KMP_FATAL( LockSimpleUsedAsNestable, func ); 934 } 935 __kmp_acquire_nested_ticket_lock( lck, gtid ); 936 } 937 938 int 939 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 940 { 941 int retval; 942 943 KMP_DEBUG_ASSERT( gtid >= 0 ); 944 945 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 946 retval = ++lck->lk.depth_locked; 947 } 948 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 949 retval = 0; 950 } 951 else { 952 KMP_MB(); 953 retval = lck->lk.depth_locked = 1; 954 KMP_MB(); 955 lck->lk.owner_id = gtid + 1; 956 } 957 return retval; 958 } 959 960 static int 961 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 962 kmp_int32 gtid ) 963 { 964 char const * const func = "omp_test_nest_lock"; 965 if ( lck->lk.initialized != lck ) { 966 KMP_FATAL( LockIsUninitialized, func ); 967 } 968 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 969 KMP_FATAL( LockSimpleUsedAsNestable, func ); 970 } 971 return __kmp_test_nested_ticket_lock( lck, gtid ); 972 } 973 974 int 975 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 976 { 977 KMP_DEBUG_ASSERT( gtid >= 0 ); 978 979 KMP_MB(); 980 if ( --(lck->lk.depth_locked) == 0 ) { 981 KMP_MB(); 982 lck->lk.owner_id = 0; 983 __kmp_release_ticket_lock( lck, gtid ); 984 return KMP_LOCK_RELEASED; 985 } 986 return KMP_LOCK_STILL_HELD; 987 } 988 989 static int 990 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 991 { 992 char const * const func = "omp_unset_nest_lock"; 993 KMP_MB(); /* in case another processor initialized lock */ 994 if ( lck->lk.initialized != lck ) { 995 KMP_FATAL( LockIsUninitialized, func ); 996 } 997 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 998 KMP_FATAL( LockSimpleUsedAsNestable, func ); 999 } 1000 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1001 KMP_FATAL( LockUnsettingFree, func ); 1002 } 1003 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1004 KMP_FATAL( LockUnsettingSetByAnother, func ); 1005 } 1006 return __kmp_release_nested_ticket_lock( lck, gtid ); 1007 } 1008 1009 void 1010 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1011 { 1012 __kmp_init_ticket_lock( lck ); 1013 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1014 } 1015 1016 static void 1017 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1018 { 1019 __kmp_init_nested_ticket_lock( lck ); 1020 } 1021 1022 void 1023 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1024 { 1025 __kmp_destroy_ticket_lock( lck ); 1026 lck->lk.depth_locked = 0; 1027 } 1028 1029 static void 1030 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1031 { 1032 char const * const func = "omp_destroy_nest_lock"; 1033 if ( lck->lk.initialized != lck ) { 1034 KMP_FATAL( LockIsUninitialized, func ); 1035 } 1036 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1037 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1038 } 1039 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1040 KMP_FATAL( LockStillOwned, func ); 1041 } 1042 __kmp_destroy_nested_ticket_lock( lck ); 1043 } 1044 1045 1046 // 1047 // access functions to fields which don't exist for all lock kinds. 1048 // 1049 1050 static int 1051 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1052 { 1053 return lck == lck->lk.initialized; 1054 } 1055 1056 static const ident_t * 1057 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1058 { 1059 return lck->lk.location; 1060 } 1061 1062 static void 1063 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1064 { 1065 lck->lk.location = loc; 1066 } 1067 1068 static kmp_lock_flags_t 1069 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1070 { 1071 return lck->lk.flags; 1072 } 1073 1074 static void 1075 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1076 { 1077 lck->lk.flags = flags; 1078 } 1079 1080 /* ------------------------------------------------------------------------ */ 1081 /* queuing locks */ 1082 1083 /* 1084 * First the states 1085 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1086 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1087 * h, h means lock is held or about to transition, 1 element on queue 1088 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1089 * 1090 * Now the transitions 1091 * Acquire(0,0) = -1 ,0 1092 * Release(0,0) = Error 1093 * Acquire(-1,0) = h ,h h > 0 1094 * Release(-1,0) = 0 ,0 1095 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1096 * Release(h,h) = -1 ,0 h > 0 1097 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1098 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1099 * 1100 * And pictorially 1101 * 1102 * 1103 * +-----+ 1104 * | 0, 0|------- release -------> Error 1105 * +-----+ 1106 * | ^ 1107 * acquire| |release 1108 * | | 1109 * | | 1110 * v | 1111 * +-----+ 1112 * |-1, 0| 1113 * +-----+ 1114 * | ^ 1115 * acquire| |release 1116 * | | 1117 * | | 1118 * v | 1119 * +-----+ 1120 * | h, h| 1121 * +-----+ 1122 * | ^ 1123 * acquire| |release 1124 * | | 1125 * | | 1126 * v | 1127 * +-----+ 1128 * | h, t|----- acquire, release loopback ---+ 1129 * +-----+ | 1130 * ^ | 1131 * | | 1132 * +------------------------------------+ 1133 * 1134 */ 1135 1136 #ifdef DEBUG_QUEUING_LOCKS 1137 1138 /* Stuff for circular trace buffer */ 1139 #define TRACE_BUF_ELE 1024 1140 static char traces[TRACE_BUF_ELE][128] = { 0 } 1141 static int tc = 0; 1142 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); 1143 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 1144 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 1145 1146 static void 1147 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1148 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1149 { 1150 kmp_int32 t, i; 1151 1152 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1153 1154 i = tc % TRACE_BUF_ELE; 1155 __kmp_printf_no_lock( "%s\n", traces[i] ); 1156 i = (i+1) % TRACE_BUF_ELE; 1157 while ( i != (tc % TRACE_BUF_ELE) ) { 1158 __kmp_printf_no_lock( "%s", traces[i] ); 1159 i = (i+1) % TRACE_BUF_ELE; 1160 } 1161 __kmp_printf_no_lock( "\n" ); 1162 1163 __kmp_printf_no_lock( 1164 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1165 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1166 head_id, tail_id ); 1167 1168 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1169 1170 if ( lck->lk.head_id >= 1 ) { 1171 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1172 while (t > 0) { 1173 __kmp_printf_no_lock( "-> %d ", t ); 1174 t = __kmp_threads[t-1]->th.th_next_waiting; 1175 } 1176 } 1177 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1178 __kmp_printf_no_lock( "\n\n" ); 1179 } 1180 1181 #endif /* DEBUG_QUEUING_LOCKS */ 1182 1183 static kmp_int32 1184 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1185 { 1186 return TCR_4( lck->lk.owner_id ) - 1; 1187 } 1188 1189 static inline bool 1190 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1191 { 1192 return lck->lk.depth_locked != -1; 1193 } 1194 1195 /* Acquire a lock using a the queuing lock implementation */ 1196 template <bool takeTime> 1197 /* [TLW] The unused template above is left behind because of what BEB believes is a 1198 potential compiler problem with __forceinline. */ 1199 __forceinline static void 1200 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1201 kmp_int32 gtid ) 1202 { 1203 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1204 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1205 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1206 volatile kmp_uint32 *spin_here_p; 1207 kmp_int32 need_mf = 1; 1208 1209 #if OMPT_SUPPORT 1210 ompt_state_t prev_state = ompt_state_undefined; 1211 #endif 1212 1213 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1214 1215 KMP_FSYNC_PREPARE( lck ); 1216 KMP_DEBUG_ASSERT( this_thr != NULL ); 1217 spin_here_p = & this_thr->th.th_spin_here; 1218 1219 #ifdef DEBUG_QUEUING_LOCKS 1220 TRACE_LOCK( gtid+1, "acq ent" ); 1221 if ( *spin_here_p ) 1222 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1223 if ( this_thr->th.th_next_waiting != 0 ) 1224 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1225 #endif 1226 KMP_DEBUG_ASSERT( !*spin_here_p ); 1227 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1228 1229 1230 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1231 that may follow, not just in execution order, but also in visibility order. This way, 1232 when a releasing thread observes the changes to the queue by this thread, it can 1233 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1234 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1235 to FALSE before this thread sets it to TRUE, this thread will hang. 1236 */ 1237 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1238 1239 while( 1 ) { 1240 kmp_int32 enqueued; 1241 kmp_int32 head; 1242 kmp_int32 tail; 1243 1244 head = *head_id_p; 1245 1246 switch ( head ) { 1247 1248 case -1: 1249 { 1250 #ifdef DEBUG_QUEUING_LOCKS 1251 tail = *tail_id_p; 1252 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1253 #endif 1254 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1255 this assignment prevents us from entering the if ( t > 0 ) 1256 condition in the enqueued case below, which is not necessary for 1257 this state transition */ 1258 1259 need_mf = 0; 1260 /* try (-1,0)->(tid,tid) */ 1261 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1262 KMP_PACK_64( -1, 0 ), 1263 KMP_PACK_64( gtid+1, gtid+1 ) ); 1264 #ifdef DEBUG_QUEUING_LOCKS 1265 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1266 #endif 1267 } 1268 break; 1269 1270 default: 1271 { 1272 tail = *tail_id_p; 1273 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1274 1275 #ifdef DEBUG_QUEUING_LOCKS 1276 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1277 #endif 1278 1279 if ( tail == 0 ) { 1280 enqueued = FALSE; 1281 } 1282 else { 1283 need_mf = 0; 1284 /* try (h,t) or (h,h)->(h,tid) */ 1285 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1286 1287 #ifdef DEBUG_QUEUING_LOCKS 1288 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1289 #endif 1290 } 1291 } 1292 break; 1293 1294 case 0: /* empty queue */ 1295 { 1296 kmp_int32 grabbed_lock; 1297 1298 #ifdef DEBUG_QUEUING_LOCKS 1299 tail = *tail_id_p; 1300 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1301 #endif 1302 /* try (0,0)->(-1,0) */ 1303 1304 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1305 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1306 1307 if ( grabbed_lock ) { 1308 1309 *spin_here_p = FALSE; 1310 1311 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1312 lck, gtid )); 1313 #ifdef DEBUG_QUEUING_LOCKS 1314 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1315 #endif 1316 1317 #if OMPT_SUPPORT 1318 if ((ompt_status & ompt_status_track) && 1319 prev_state != ompt_state_undefined) { 1320 /* change the state before clearing wait_id */ 1321 this_thr->th.ompt_thread_info.state = prev_state; 1322 this_thr->th.ompt_thread_info.wait_id = 0; 1323 } 1324 #endif 1325 1326 KMP_FSYNC_ACQUIRED( lck ); 1327 return; /* lock holder cannot be on queue */ 1328 } 1329 enqueued = FALSE; 1330 } 1331 break; 1332 } 1333 1334 #if OMPT_SUPPORT 1335 if ((ompt_status & ompt_status_track) && 1336 prev_state == ompt_state_undefined) { 1337 /* this thread will spin; set wait_id before entering wait state */ 1338 prev_state = this_thr->th.ompt_thread_info.state; 1339 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 1340 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1341 } 1342 #endif 1343 1344 if ( enqueued ) { 1345 if ( tail > 0 ) { 1346 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1347 KMP_ASSERT( tail_thr != NULL ); 1348 tail_thr->th.th_next_waiting = gtid+1; 1349 /* corresponding wait for this write in release code */ 1350 } 1351 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1352 1353 1354 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1355 * throughput only here. 1356 */ 1357 KMP_MB(); 1358 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1359 1360 #ifdef DEBUG_QUEUING_LOCKS 1361 TRACE_LOCK( gtid+1, "acq spin" ); 1362 1363 if ( this_thr->th.th_next_waiting != 0 ) 1364 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1365 #endif 1366 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1367 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1368 lck, gtid )); 1369 1370 #ifdef DEBUG_QUEUING_LOCKS 1371 TRACE_LOCK( gtid+1, "acq exit 2" ); 1372 #endif 1373 1374 #if OMPT_SUPPORT 1375 /* change the state before clearing wait_id */ 1376 this_thr->th.ompt_thread_info.state = prev_state; 1377 this_thr->th.ompt_thread_info.wait_id = 0; 1378 #endif 1379 1380 /* got lock, we were dequeued by the thread that released lock */ 1381 return; 1382 } 1383 1384 /* Yield if number of threads > number of logical processors */ 1385 /* ToDo: Not sure why this should only be in oversubscription case, 1386 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1387 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1388 __kmp_xproc ) ); 1389 #ifdef DEBUG_QUEUING_LOCKS 1390 TRACE_LOCK( gtid+1, "acq retry" ); 1391 #endif 1392 1393 } 1394 KMP_ASSERT2( 0, "should not get here" ); 1395 } 1396 1397 void 1398 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1399 { 1400 KMP_DEBUG_ASSERT( gtid >= 0 ); 1401 1402 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1403 } 1404 1405 static void 1406 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1407 kmp_int32 gtid ) 1408 { 1409 char const * const func = "omp_set_lock"; 1410 if ( lck->lk.initialized != lck ) { 1411 KMP_FATAL( LockIsUninitialized, func ); 1412 } 1413 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1414 KMP_FATAL( LockNestableUsedAsSimple, func ); 1415 } 1416 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1417 KMP_FATAL( LockIsAlreadyOwned, func ); 1418 } 1419 1420 __kmp_acquire_queuing_lock( lck, gtid ); 1421 1422 lck->lk.owner_id = gtid + 1; 1423 } 1424 1425 int 1426 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1427 { 1428 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1429 kmp_int32 head; 1430 #ifdef KMP_DEBUG 1431 kmp_info_t *this_thr; 1432 #endif 1433 1434 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1435 KMP_DEBUG_ASSERT( gtid >= 0 ); 1436 #ifdef KMP_DEBUG 1437 this_thr = __kmp_thread_from_gtid( gtid ); 1438 KMP_DEBUG_ASSERT( this_thr != NULL ); 1439 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1440 #endif 1441 1442 head = *head_id_p; 1443 1444 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1445 1446 /* try (0,0)->(-1,0) */ 1447 1448 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1449 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1450 KMP_FSYNC_ACQUIRED(lck); 1451 return TRUE; 1452 } 1453 } 1454 1455 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1456 return FALSE; 1457 } 1458 1459 static int 1460 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1461 { 1462 char const * const func = "omp_test_lock"; 1463 if ( lck->lk.initialized != lck ) { 1464 KMP_FATAL( LockIsUninitialized, func ); 1465 } 1466 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1467 KMP_FATAL( LockNestableUsedAsSimple, func ); 1468 } 1469 1470 int retval = __kmp_test_queuing_lock( lck, gtid ); 1471 1472 if ( retval ) { 1473 lck->lk.owner_id = gtid + 1; 1474 } 1475 return retval; 1476 } 1477 1478 int 1479 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1480 { 1481 register kmp_info_t *this_thr; 1482 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1483 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1484 1485 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1486 KMP_DEBUG_ASSERT( gtid >= 0 ); 1487 this_thr = __kmp_thread_from_gtid( gtid ); 1488 KMP_DEBUG_ASSERT( this_thr != NULL ); 1489 #ifdef DEBUG_QUEUING_LOCKS 1490 TRACE_LOCK( gtid+1, "rel ent" ); 1491 1492 if ( this_thr->th.th_spin_here ) 1493 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1494 if ( this_thr->th.th_next_waiting != 0 ) 1495 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1496 #endif 1497 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1498 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1499 1500 KMP_FSYNC_RELEASING(lck); 1501 1502 while( 1 ) { 1503 kmp_int32 dequeued; 1504 kmp_int32 head; 1505 kmp_int32 tail; 1506 1507 head = *head_id_p; 1508 1509 #ifdef DEBUG_QUEUING_LOCKS 1510 tail = *tail_id_p; 1511 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1512 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1513 #endif 1514 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1515 1516 if ( head == -1 ) { /* nobody on queue */ 1517 1518 /* try (-1,0)->(0,0) */ 1519 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1520 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1521 lck, gtid )); 1522 #ifdef DEBUG_QUEUING_LOCKS 1523 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1524 #endif 1525 1526 #if OMPT_SUPPORT 1527 /* nothing to do - no other thread is trying to shift blame */ 1528 #endif 1529 1530 return KMP_LOCK_RELEASED; 1531 } 1532 dequeued = FALSE; 1533 1534 } 1535 else { 1536 1537 tail = *tail_id_p; 1538 if ( head == tail ) { /* only one thread on the queue */ 1539 1540 #ifdef DEBUG_QUEUING_LOCKS 1541 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1542 #endif 1543 KMP_DEBUG_ASSERT( head > 0 ); 1544 1545 /* try (h,h)->(-1,0) */ 1546 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1547 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1548 #ifdef DEBUG_QUEUING_LOCKS 1549 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1550 #endif 1551 1552 } 1553 else { 1554 volatile kmp_int32 *waiting_id_p; 1555 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1556 KMP_DEBUG_ASSERT( head_thr != NULL ); 1557 waiting_id_p = & head_thr->th.th_next_waiting; 1558 1559 /* Does this require synchronous reads? */ 1560 #ifdef DEBUG_QUEUING_LOCKS 1561 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1562 #endif 1563 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1564 1565 /* try (h,t)->(h',t) or (t,t) */ 1566 1567 KMP_MB(); 1568 /* make sure enqueuing thread has time to update next waiting thread field */ 1569 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 1570 #ifdef DEBUG_QUEUING_LOCKS 1571 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1572 #endif 1573 dequeued = TRUE; 1574 } 1575 } 1576 1577 if ( dequeued ) { 1578 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1579 KMP_DEBUG_ASSERT( head_thr != NULL ); 1580 1581 /* Does this require synchronous reads? */ 1582 #ifdef DEBUG_QUEUING_LOCKS 1583 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1584 #endif 1585 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1586 1587 /* For clean code only. 1588 * Thread not released until next statement prevents race with acquire code. 1589 */ 1590 head_thr->th.th_next_waiting = 0; 1591 #ifdef DEBUG_QUEUING_LOCKS 1592 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1593 #endif 1594 1595 KMP_MB(); 1596 /* reset spin value */ 1597 head_thr->th.th_spin_here = FALSE; 1598 1599 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1600 lck, gtid )); 1601 #ifdef DEBUG_QUEUING_LOCKS 1602 TRACE_LOCK( gtid+1, "rel exit 2" ); 1603 #endif 1604 return KMP_LOCK_RELEASED; 1605 } 1606 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1607 1608 #ifdef DEBUG_QUEUING_LOCKS 1609 TRACE_LOCK( gtid+1, "rel retry" ); 1610 #endif 1611 1612 } /* while */ 1613 KMP_ASSERT2( 0, "should not get here" ); 1614 return KMP_LOCK_RELEASED; 1615 } 1616 1617 static int 1618 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1619 kmp_int32 gtid ) 1620 { 1621 char const * const func = "omp_unset_lock"; 1622 KMP_MB(); /* in case another processor initialized lock */ 1623 if ( lck->lk.initialized != lck ) { 1624 KMP_FATAL( LockIsUninitialized, func ); 1625 } 1626 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1627 KMP_FATAL( LockNestableUsedAsSimple, func ); 1628 } 1629 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1630 KMP_FATAL( LockUnsettingFree, func ); 1631 } 1632 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1633 KMP_FATAL( LockUnsettingSetByAnother, func ); 1634 } 1635 lck->lk.owner_id = 0; 1636 return __kmp_release_queuing_lock( lck, gtid ); 1637 } 1638 1639 void 1640 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1641 { 1642 lck->lk.location = NULL; 1643 lck->lk.head_id = 0; 1644 lck->lk.tail_id = 0; 1645 lck->lk.next_ticket = 0; 1646 lck->lk.now_serving = 0; 1647 lck->lk.owner_id = 0; // no thread owns the lock. 1648 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1649 lck->lk.initialized = lck; 1650 1651 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1652 } 1653 1654 static void 1655 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1656 { 1657 __kmp_init_queuing_lock( lck ); 1658 } 1659 1660 void 1661 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1662 { 1663 lck->lk.initialized = NULL; 1664 lck->lk.location = NULL; 1665 lck->lk.head_id = 0; 1666 lck->lk.tail_id = 0; 1667 lck->lk.next_ticket = 0; 1668 lck->lk.now_serving = 0; 1669 lck->lk.owner_id = 0; 1670 lck->lk.depth_locked = -1; 1671 } 1672 1673 static void 1674 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1675 { 1676 char const * const func = "omp_destroy_lock"; 1677 if ( lck->lk.initialized != lck ) { 1678 KMP_FATAL( LockIsUninitialized, func ); 1679 } 1680 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1681 KMP_FATAL( LockNestableUsedAsSimple, func ); 1682 } 1683 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1684 KMP_FATAL( LockStillOwned, func ); 1685 } 1686 __kmp_destroy_queuing_lock( lck ); 1687 } 1688 1689 1690 // 1691 // nested queuing locks 1692 // 1693 1694 void 1695 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1696 { 1697 KMP_DEBUG_ASSERT( gtid >= 0 ); 1698 1699 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1700 lck->lk.depth_locked += 1; 1701 } 1702 else { 1703 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1704 KMP_MB(); 1705 lck->lk.depth_locked = 1; 1706 KMP_MB(); 1707 lck->lk.owner_id = gtid + 1; 1708 } 1709 } 1710 1711 static void 1712 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1713 { 1714 char const * const func = "omp_set_nest_lock"; 1715 if ( lck->lk.initialized != lck ) { 1716 KMP_FATAL( LockIsUninitialized, func ); 1717 } 1718 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1719 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1720 } 1721 __kmp_acquire_nested_queuing_lock( lck, gtid ); 1722 } 1723 1724 int 1725 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1726 { 1727 int retval; 1728 1729 KMP_DEBUG_ASSERT( gtid >= 0 ); 1730 1731 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1732 retval = ++lck->lk.depth_locked; 1733 } 1734 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1735 retval = 0; 1736 } 1737 else { 1738 KMP_MB(); 1739 retval = lck->lk.depth_locked = 1; 1740 KMP_MB(); 1741 lck->lk.owner_id = gtid + 1; 1742 } 1743 return retval; 1744 } 1745 1746 static int 1747 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1748 kmp_int32 gtid ) 1749 { 1750 char const * const func = "omp_test_nest_lock"; 1751 if ( lck->lk.initialized != lck ) { 1752 KMP_FATAL( LockIsUninitialized, func ); 1753 } 1754 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1755 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1756 } 1757 return __kmp_test_nested_queuing_lock( lck, gtid ); 1758 } 1759 1760 int 1761 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1762 { 1763 KMP_DEBUG_ASSERT( gtid >= 0 ); 1764 1765 KMP_MB(); 1766 if ( --(lck->lk.depth_locked) == 0 ) { 1767 KMP_MB(); 1768 lck->lk.owner_id = 0; 1769 __kmp_release_queuing_lock( lck, gtid ); 1770 return KMP_LOCK_RELEASED; 1771 } 1772 return KMP_LOCK_STILL_HELD; 1773 } 1774 1775 static int 1776 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1777 { 1778 char const * const func = "omp_unset_nest_lock"; 1779 KMP_MB(); /* in case another processor initialized lock */ 1780 if ( lck->lk.initialized != lck ) { 1781 KMP_FATAL( LockIsUninitialized, func ); 1782 } 1783 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1784 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1785 } 1786 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1787 KMP_FATAL( LockUnsettingFree, func ); 1788 } 1789 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1790 KMP_FATAL( LockUnsettingSetByAnother, func ); 1791 } 1792 return __kmp_release_nested_queuing_lock( lck, gtid ); 1793 } 1794 1795 void 1796 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1797 { 1798 __kmp_init_queuing_lock( lck ); 1799 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1800 } 1801 1802 static void 1803 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1804 { 1805 __kmp_init_nested_queuing_lock( lck ); 1806 } 1807 1808 void 1809 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1810 { 1811 __kmp_destroy_queuing_lock( lck ); 1812 lck->lk.depth_locked = 0; 1813 } 1814 1815 static void 1816 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1817 { 1818 char const * const func = "omp_destroy_nest_lock"; 1819 if ( lck->lk.initialized != lck ) { 1820 KMP_FATAL( LockIsUninitialized, func ); 1821 } 1822 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1823 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1824 } 1825 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1826 KMP_FATAL( LockStillOwned, func ); 1827 } 1828 __kmp_destroy_nested_queuing_lock( lck ); 1829 } 1830 1831 1832 // 1833 // access functions to fields which don't exist for all lock kinds. 1834 // 1835 1836 static int 1837 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1838 { 1839 return lck == lck->lk.initialized; 1840 } 1841 1842 static const ident_t * 1843 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1844 { 1845 return lck->lk.location; 1846 } 1847 1848 static void 1849 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1850 { 1851 lck->lk.location = loc; 1852 } 1853 1854 static kmp_lock_flags_t 1855 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1856 { 1857 return lck->lk.flags; 1858 } 1859 1860 static void 1861 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1862 { 1863 lck->lk.flags = flags; 1864 } 1865 1866 #if KMP_USE_ADAPTIVE_LOCKS 1867 1868 /* 1869 RTM Adaptive locks 1870 */ 1871 1872 // TODO: Use the header for intrinsics below with the compiler 13.0 1873 //#include <immintrin.h> 1874 1875 // Values from the status register after failed speculation. 1876 #define _XBEGIN_STARTED (~0u) 1877 #define _XABORT_EXPLICIT (1 << 0) 1878 #define _XABORT_RETRY (1 << 1) 1879 #define _XABORT_CONFLICT (1 << 2) 1880 #define _XABORT_CAPACITY (1 << 3) 1881 #define _XABORT_DEBUG (1 << 4) 1882 #define _XABORT_NESTED (1 << 5) 1883 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1884 1885 // Aborts for which it's worth trying again immediately 1886 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1887 1888 #define STRINGIZE_INTERNAL(arg) #arg 1889 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1890 1891 // Access to RTM instructions 1892 1893 /* 1894 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1895 This is the same definition as the compiler intrinsic that will be supported at some point. 1896 */ 1897 static __inline int _xbegin() 1898 { 1899 int res = -1; 1900 1901 #if KMP_OS_WINDOWS 1902 #if KMP_ARCH_X86_64 1903 _asm { 1904 _emit 0xC7 1905 _emit 0xF8 1906 _emit 2 1907 _emit 0 1908 _emit 0 1909 _emit 0 1910 jmp L2 1911 mov res, eax 1912 L2: 1913 } 1914 #else /* IA32 */ 1915 _asm { 1916 _emit 0xC7 1917 _emit 0xF8 1918 _emit 2 1919 _emit 0 1920 _emit 0 1921 _emit 0 1922 jmp L2 1923 mov res, eax 1924 L2: 1925 } 1926 #endif // KMP_ARCH_X86_64 1927 #else 1928 /* Note that %eax must be noted as killed (clobbered), because 1929 * the XSR is returned in %eax(%rax) on abort. Other register 1930 * values are restored, so don't need to be killed. 1931 * 1932 * We must also mark 'res' as an input and an output, since otherwise 1933 * 'res=-1' may be dropped as being dead, whereas we do need the 1934 * assignment on the successful (i.e., non-abort) path. 1935 */ 1936 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1937 " .long 1f-1b-6\n" 1938 " jmp 2f\n" 1939 "1: movl %%eax,%0\n" 1940 "2:" 1941 :"+r"(res)::"memory","%eax"); 1942 #endif // KMP_OS_WINDOWS 1943 return res; 1944 } 1945 1946 /* 1947 Transaction end 1948 */ 1949 static __inline void _xend() 1950 { 1951 #if KMP_OS_WINDOWS 1952 __asm { 1953 _emit 0x0f 1954 _emit 0x01 1955 _emit 0xd5 1956 } 1957 #else 1958 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1959 #endif 1960 } 1961 1962 /* 1963 This is a macro, the argument must be a single byte constant which 1964 can be evaluated by the inline assembler, since it is emitted as a 1965 byte into the assembly code. 1966 */ 1967 #if KMP_OS_WINDOWS 1968 #define _xabort(ARG) \ 1969 _asm _emit 0xc6 \ 1970 _asm _emit 0xf8 \ 1971 _asm _emit ARG 1972 #else 1973 #define _xabort(ARG) \ 1974 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 1975 #endif 1976 1977 // 1978 // Statistics is collected for testing purpose 1979 // 1980 #if KMP_DEBUG_ADAPTIVE_LOCKS 1981 1982 // We accumulate speculative lock statistics when the lock is destroyed. 1983 // We keep locks that haven't been destroyed in the liveLocks list 1984 // so that we can grab their statistics too. 1985 static kmp_adaptive_lock_statistics_t destroyedStats; 1986 1987 // To hold the list of live locks. 1988 static kmp_adaptive_lock_info_t liveLocks; 1989 1990 // A lock so we can safely update the list of locks. 1991 static kmp_bootstrap_lock_t chain_lock; 1992 1993 // Initialize the list of stats. 1994 void 1995 __kmp_init_speculative_stats() 1996 { 1997 kmp_adaptive_lock_info_t *lck = &liveLocks; 1998 1999 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 2000 lck->stats.next = lck; 2001 lck->stats.prev = lck; 2002 2003 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2004 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2005 2006 __kmp_init_bootstrap_lock( &chain_lock ); 2007 2008 } 2009 2010 // Insert the lock into the circular list 2011 static void 2012 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 2013 { 2014 __kmp_acquire_bootstrap_lock( &chain_lock ); 2015 2016 lck->stats.next = liveLocks.stats.next; 2017 lck->stats.prev = &liveLocks; 2018 2019 liveLocks.stats.next = lck; 2020 lck->stats.next->stats.prev = lck; 2021 2022 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2023 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2024 2025 __kmp_release_bootstrap_lock( &chain_lock ); 2026 } 2027 2028 static void 2029 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 2030 { 2031 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2032 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2033 2034 kmp_adaptive_lock_info_t * n = lck->stats.next; 2035 kmp_adaptive_lock_info_t * p = lck->stats.prev; 2036 2037 n->stats.prev = p; 2038 p->stats.next = n; 2039 } 2040 2041 static void 2042 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 2043 { 2044 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2045 __kmp_remember_lock( lck ); 2046 } 2047 2048 static void 2049 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2050 { 2051 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2052 2053 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2054 t->successfulSpeculations += s->successfulSpeculations; 2055 t->hardFailedSpeculations += s->hardFailedSpeculations; 2056 t->softFailedSpeculations += s->softFailedSpeculations; 2057 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2058 t->lemmingYields += s->lemmingYields; 2059 } 2060 2061 static void 2062 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2063 { 2064 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2065 2066 __kmp_acquire_bootstrap_lock( &chain_lock ); 2067 2068 __kmp_add_stats( &destroyedStats, lck ); 2069 __kmp_forget_lock( lck ); 2070 2071 __kmp_release_bootstrap_lock( &chain_lock ); 2072 } 2073 2074 static float 2075 percent (kmp_uint32 count, kmp_uint32 total) 2076 { 2077 return (total == 0) ? 0.0: (100.0 * count)/total; 2078 } 2079 2080 static 2081 FILE * __kmp_open_stats_file() 2082 { 2083 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2084 return stdout; 2085 2086 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 2087 char buffer[buffLen]; 2088 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 2089 (kmp_int32)getpid()); 2090 FILE * result = fopen(&buffer[0], "w"); 2091 2092 // Maybe we should issue a warning here... 2093 return result ? result : stdout; 2094 } 2095 2096 void 2097 __kmp_print_speculative_stats() 2098 { 2099 if (__kmp_user_lock_kind != lk_adaptive) 2100 return; 2101 2102 FILE * statsFile = __kmp_open_stats_file(); 2103 2104 kmp_adaptive_lock_statistics_t total = destroyedStats; 2105 kmp_adaptive_lock_info_t *lck; 2106 2107 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2108 __kmp_add_stats( &total, lck ); 2109 } 2110 kmp_adaptive_lock_statistics_t *t = &total; 2111 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2112 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2113 t->softFailedSpeculations; 2114 2115 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2116 fprintf ( statsFile, " Lock parameters: \n" 2117 " max_soft_retries : %10d\n" 2118 " max_badness : %10d\n", 2119 __kmp_adaptive_backoff_params.max_soft_retries, 2120 __kmp_adaptive_backoff_params.max_badness); 2121 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2122 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2123 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2124 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2125 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2126 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2127 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2128 2129 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2130 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2131 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2132 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2133 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2134 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2135 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2136 2137 if (statsFile != stdout) 2138 fclose( statsFile ); 2139 } 2140 2141 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2142 #else 2143 # define KMP_INC_STAT(lck,stat) 2144 2145 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2146 2147 static inline bool 2148 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2149 { 2150 // It is enough to check that the head_id is zero. 2151 // We don't also need to check the tail. 2152 bool res = lck->lk.head_id == 0; 2153 2154 // We need a fence here, since we must ensure that no memory operations 2155 // from later in this thread float above that read. 2156 #if KMP_COMPILER_ICC 2157 _mm_mfence(); 2158 #else 2159 __sync_synchronize(); 2160 #endif 2161 2162 return res; 2163 } 2164 2165 // Functions for manipulating the badness 2166 static __inline void 2167 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2168 { 2169 // Reset the badness to zero so we eagerly try to speculate again 2170 lck->lk.adaptive.badness = 0; 2171 KMP_INC_STAT(lck,successfulSpeculations); 2172 } 2173 2174 // Create a bit mask with one more set bit. 2175 static __inline void 2176 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2177 { 2178 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2179 if ( newBadness > lck->lk.adaptive.max_badness) { 2180 return; 2181 } else { 2182 lck->lk.adaptive.badness = newBadness; 2183 } 2184 } 2185 2186 // Check whether speculation should be attempted. 2187 static __inline int 2188 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2189 { 2190 kmp_uint32 badness = lck->lk.adaptive.badness; 2191 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2192 int res = (attempts & badness) == 0; 2193 return res; 2194 } 2195 2196 // Attempt to acquire only the speculative lock. 2197 // Does not back off to the non-speculative lock. 2198 // 2199 static int 2200 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2201 { 2202 int retries = lck->lk.adaptive.max_soft_retries; 2203 2204 // We don't explicitly count the start of speculation, rather we record 2205 // the results (success, hard fail, soft fail). The sum of all of those 2206 // is the total number of times we started speculation since all 2207 // speculations must end one of those ways. 2208 do 2209 { 2210 kmp_uint32 status = _xbegin(); 2211 // Switch this in to disable actual speculation but exercise 2212 // at least some of the rest of the code. Useful for debugging... 2213 // kmp_uint32 status = _XABORT_NESTED; 2214 2215 if (status == _XBEGIN_STARTED ) 2216 { /* We have successfully started speculation 2217 * Check that no-one acquired the lock for real between when we last looked 2218 * and now. This also gets the lock cache line into our read-set, 2219 * which we need so that we'll abort if anyone later claims it for real. 2220 */ 2221 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2222 { 2223 // Lock is now visibly acquired, so someone beat us to it. 2224 // Abort the transaction so we'll restart from _xbegin with the 2225 // failure status. 2226 _xabort(0x01) 2227 KMP_ASSERT2( 0, "should not get here" ); 2228 } 2229 return 1; // Lock has been acquired (speculatively) 2230 } else { 2231 // We have aborted, update the statistics 2232 if ( status & SOFT_ABORT_MASK) 2233 { 2234 KMP_INC_STAT(lck,softFailedSpeculations); 2235 // and loop round to retry. 2236 } 2237 else 2238 { 2239 KMP_INC_STAT(lck,hardFailedSpeculations); 2240 // Give up if we had a hard failure. 2241 break; 2242 } 2243 } 2244 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2245 2246 // Either we had a hard failure or we didn't succeed softly after 2247 // the full set of attempts, so back off the badness. 2248 __kmp_step_badness( lck ); 2249 return 0; 2250 } 2251 2252 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2253 // if the speculative lock cannot be acquired. 2254 // We can succeed speculatively, non-speculatively, or fail. 2255 static int 2256 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2257 { 2258 // First try to acquire the lock speculatively 2259 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2260 return 1; 2261 2262 // Speculative acquisition failed, so try to acquire it non-speculatively. 2263 // Count the non-speculative acquire attempt 2264 lck->lk.adaptive.acquire_attempts++; 2265 2266 // Use base, non-speculative lock. 2267 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2268 { 2269 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2270 return 1; // Lock is acquired (non-speculatively) 2271 } 2272 else 2273 { 2274 return 0; // Failed to acquire the lock, it's already visibly locked. 2275 } 2276 } 2277 2278 static int 2279 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2280 { 2281 char const * const func = "omp_test_lock"; 2282 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2283 KMP_FATAL( LockIsUninitialized, func ); 2284 } 2285 2286 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2287 2288 if ( retval ) { 2289 lck->lk.qlk.owner_id = gtid + 1; 2290 } 2291 return retval; 2292 } 2293 2294 // Block until we can acquire a speculative, adaptive lock. 2295 // We check whether we should be trying to speculate. 2296 // If we should be, we check the real lock to see if it is free, 2297 // and, if not, pause without attempting to acquire it until it is. 2298 // Then we try the speculative acquire. 2299 // This means that although we suffer from lemmings a little ( 2300 // because all we can't acquire the lock speculatively until 2301 // the queue of threads waiting has cleared), we don't get into a 2302 // state where we can never acquire the lock speculatively (because we 2303 // force the queue to clear by preventing new arrivals from entering the 2304 // queue). 2305 // This does mean that when we're trying to break lemmings, the lock 2306 // is no longer fair. However OpenMP makes no guarantee that its 2307 // locks are fair, so this isn't a real problem. 2308 static void 2309 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2310 { 2311 if ( __kmp_should_speculate( lck, gtid ) ) 2312 { 2313 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2314 { 2315 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2316 return; 2317 // We tried speculation and failed, so give up. 2318 } 2319 else 2320 { 2321 // We can't try speculation until the lock is free, so we 2322 // pause here (without suspending on the queueing lock, 2323 // to allow it to drain, then try again. 2324 // All other threads will also see the same result for 2325 // shouldSpeculate, so will be doing the same if they 2326 // try to claim the lock from now on. 2327 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2328 { 2329 KMP_INC_STAT(lck,lemmingYields); 2330 __kmp_yield (TRUE); 2331 } 2332 2333 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2334 return; 2335 } 2336 } 2337 2338 // Speculative acquisition failed, so acquire it non-speculatively. 2339 // Count the non-speculative acquire attempt 2340 lck->lk.adaptive.acquire_attempts++; 2341 2342 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2343 // We have acquired the base lock, so count that. 2344 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2345 } 2346 2347 static void 2348 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2349 { 2350 char const * const func = "omp_set_lock"; 2351 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2352 KMP_FATAL( LockIsUninitialized, func ); 2353 } 2354 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2355 KMP_FATAL( LockIsAlreadyOwned, func ); 2356 } 2357 2358 __kmp_acquire_adaptive_lock( lck, gtid ); 2359 2360 lck->lk.qlk.owner_id = gtid + 1; 2361 } 2362 2363 static int 2364 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2365 { 2366 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2367 { // If the lock doesn't look claimed we must be speculating. 2368 // (Or the user's code is buggy and they're releasing without locking; 2369 // if we had XTEST we'd be able to check that case...) 2370 _xend(); // Exit speculation 2371 __kmp_update_badness_after_success( lck ); 2372 } 2373 else 2374 { // Since the lock *is* visibly locked we're not speculating, 2375 // so should use the underlying lock's release scheme. 2376 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2377 } 2378 return KMP_LOCK_RELEASED; 2379 } 2380 2381 static int 2382 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2383 { 2384 char const * const func = "omp_unset_lock"; 2385 KMP_MB(); /* in case another processor initialized lock */ 2386 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2387 KMP_FATAL( LockIsUninitialized, func ); 2388 } 2389 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2390 KMP_FATAL( LockUnsettingFree, func ); 2391 } 2392 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2393 KMP_FATAL( LockUnsettingSetByAnother, func ); 2394 } 2395 lck->lk.qlk.owner_id = 0; 2396 __kmp_release_adaptive_lock( lck, gtid ); 2397 return KMP_LOCK_RELEASED; 2398 } 2399 2400 static void 2401 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2402 { 2403 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2404 lck->lk.adaptive.badness = 0; 2405 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2406 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2407 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2408 #if KMP_DEBUG_ADAPTIVE_LOCKS 2409 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2410 #endif 2411 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2412 } 2413 2414 static void 2415 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2416 { 2417 __kmp_init_adaptive_lock( lck ); 2418 } 2419 2420 static void 2421 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2422 { 2423 #if KMP_DEBUG_ADAPTIVE_LOCKS 2424 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2425 #endif 2426 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2427 // Nothing needed for the speculative part. 2428 } 2429 2430 static void 2431 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2432 { 2433 char const * const func = "omp_destroy_lock"; 2434 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2435 KMP_FATAL( LockIsUninitialized, func ); 2436 } 2437 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2438 KMP_FATAL( LockStillOwned, func ); 2439 } 2440 __kmp_destroy_adaptive_lock( lck ); 2441 } 2442 2443 2444 #endif // KMP_USE_ADAPTIVE_LOCKS 2445 2446 2447 /* ------------------------------------------------------------------------ */ 2448 /* DRDPA ticket locks */ 2449 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2450 2451 static kmp_int32 2452 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2453 { 2454 return TCR_4( lck->lk.owner_id ) - 1; 2455 } 2456 2457 static inline bool 2458 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2459 { 2460 return lck->lk.depth_locked != -1; 2461 } 2462 2463 __forceinline static void 2464 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2465 { 2466 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2467 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2468 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2469 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2470 TCR_PTR(lck->lk.polls); // volatile load 2471 2472 #ifdef USE_LOCK_PROFILE 2473 if (TCR_8(polls[ticket & mask].poll) != ticket) 2474 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2475 /* else __kmp_printf( "." );*/ 2476 #endif /* USE_LOCK_PROFILE */ 2477 2478 // 2479 // Now spin-wait, but reload the polls pointer and mask, in case the 2480 // polling area has been reconfigured. Unless it is reconfigured, the 2481 // reloads stay in L1 cache and are cheap. 2482 // 2483 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2484 // 2485 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2486 // and poll to be re-read every spin iteration. 2487 // 2488 kmp_uint32 spins; 2489 2490 KMP_FSYNC_PREPARE(lck); 2491 KMP_INIT_YIELD(spins); 2492 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2493 // If we are oversubscribed, 2494 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2495 // CPU Pause is in the macros for yield. 2496 // 2497 KMP_YIELD(TCR_4(__kmp_nth) 2498 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2499 KMP_YIELD_SPIN(spins); 2500 2501 // Re-read the mask and the poll pointer from the lock structure. 2502 // 2503 // Make certain that "mask" is read before "polls" !!! 2504 // 2505 // If another thread picks reconfigures the polling area and updates 2506 // their values, and we get the new value of mask and the old polls 2507 // pointer, we could access memory beyond the end of the old polling 2508 // area. 2509 // 2510 mask = TCR_8(lck->lk.mask); // volatile load 2511 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2512 TCR_PTR(lck->lk.polls); // volatile load 2513 } 2514 2515 // 2516 // Critical section starts here 2517 // 2518 KMP_FSYNC_ACQUIRED(lck); 2519 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2520 ticket, lck)); 2521 lck->lk.now_serving = ticket; // non-volatile store 2522 2523 // 2524 // Deallocate a garbage polling area if we know that we are the last 2525 // thread that could possibly access it. 2526 // 2527 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2528 // ticket. 2529 // 2530 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2531 __kmp_free((void *)lck->lk.old_polls); 2532 lck->lk.old_polls = NULL; 2533 lck->lk.cleanup_ticket = 0; 2534 } 2535 2536 // 2537 // Check to see if we should reconfigure the polling area. 2538 // If there is still a garbage polling area to be deallocated from a 2539 // previous reconfiguration, let a later thread reconfigure it. 2540 // 2541 if (lck->lk.old_polls == NULL) { 2542 bool reconfigure = false; 2543 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2544 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2545 2546 if (TCR_4(__kmp_nth) 2547 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2548 // 2549 // We are in oversubscription mode. Contract the polling area 2550 // down to a single location, if that hasn't been done already. 2551 // 2552 if (num_polls > 1) { 2553 reconfigure = true; 2554 num_polls = TCR_4(lck->lk.num_polls); 2555 mask = 0; 2556 num_polls = 1; 2557 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2558 __kmp_allocate(num_polls * sizeof(*polls)); 2559 polls[0].poll = ticket; 2560 } 2561 } 2562 else { 2563 // 2564 // We are in under/fully subscribed mode. Check the number of 2565 // threads waiting on the lock. The size of the polling area 2566 // should be at least the number of threads waiting. 2567 // 2568 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2569 if (num_waiting > num_polls) { 2570 kmp_uint32 old_num_polls = num_polls; 2571 reconfigure = true; 2572 do { 2573 mask = (mask << 1) | 1; 2574 num_polls *= 2; 2575 } while (num_polls <= num_waiting); 2576 2577 // 2578 // Allocate the new polling area, and copy the relevant portion 2579 // of the old polling area to the new area. __kmp_allocate() 2580 // zeroes the memory it allocates, and most of the old area is 2581 // just zero padding, so we only copy the release counters. 2582 // 2583 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2584 __kmp_allocate(num_polls * sizeof(*polls)); 2585 kmp_uint32 i; 2586 for (i = 0; i < old_num_polls; i++) { 2587 polls[i].poll = old_polls[i].poll; 2588 } 2589 } 2590 } 2591 2592 if (reconfigure) { 2593 // 2594 // Now write the updated fields back to the lock structure. 2595 // 2596 // Make certain that "polls" is written before "mask" !!! 2597 // 2598 // If another thread picks up the new value of mask and the old 2599 // polls pointer , it could access memory beyond the end of the 2600 // old polling area. 2601 // 2602 // On x86, we need memory fences. 2603 // 2604 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2605 ticket, lck, num_polls)); 2606 2607 lck->lk.old_polls = old_polls; // non-volatile store 2608 lck->lk.polls = polls; // volatile store 2609 2610 KMP_MB(); 2611 2612 lck->lk.num_polls = num_polls; // non-volatile store 2613 lck->lk.mask = mask; // volatile store 2614 2615 KMP_MB(); 2616 2617 // 2618 // Only after the new polling area and mask have been flushed 2619 // to main memory can we update the cleanup ticket field. 2620 // 2621 // volatile load / non-volatile store 2622 // 2623 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2624 } 2625 } 2626 } 2627 2628 void 2629 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2630 { 2631 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2632 } 2633 2634 static void 2635 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2636 { 2637 char const * const func = "omp_set_lock"; 2638 if ( lck->lk.initialized != lck ) { 2639 KMP_FATAL( LockIsUninitialized, func ); 2640 } 2641 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2642 KMP_FATAL( LockNestableUsedAsSimple, func ); 2643 } 2644 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2645 KMP_FATAL( LockIsAlreadyOwned, func ); 2646 } 2647 2648 __kmp_acquire_drdpa_lock( lck, gtid ); 2649 2650 lck->lk.owner_id = gtid + 1; 2651 } 2652 2653 int 2654 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2655 { 2656 // 2657 // First get a ticket, then read the polls pointer and the mask. 2658 // The polls pointer must be read before the mask!!! (See above) 2659 // 2660 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2661 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2662 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2663 TCR_PTR(lck->lk.polls); // volatile load 2664 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2665 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2666 kmp_uint64 next_ticket = ticket + 1; 2667 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2668 ticket, next_ticket)) { 2669 KMP_FSYNC_ACQUIRED(lck); 2670 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2671 ticket, lck)); 2672 lck->lk.now_serving = ticket; // non-volatile store 2673 2674 // 2675 // Since no threads are waiting, there is no possibility that 2676 // we would want to reconfigure the polling area. We might 2677 // have the cleanup ticket value (which says that it is now 2678 // safe to deallocate old_polls), but we'll let a later thread 2679 // which calls __kmp_acquire_lock do that - this routine 2680 // isn't supposed to block, and we would risk blocks if we 2681 // called __kmp_free() to do the deallocation. 2682 // 2683 return TRUE; 2684 } 2685 } 2686 return FALSE; 2687 } 2688 2689 static int 2690 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2691 { 2692 char const * const func = "omp_test_lock"; 2693 if ( lck->lk.initialized != lck ) { 2694 KMP_FATAL( LockIsUninitialized, func ); 2695 } 2696 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2697 KMP_FATAL( LockNestableUsedAsSimple, func ); 2698 } 2699 2700 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2701 2702 if ( retval ) { 2703 lck->lk.owner_id = gtid + 1; 2704 } 2705 return retval; 2706 } 2707 2708 int 2709 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2710 { 2711 // 2712 // Read the ticket value from the lock data struct, then the polls 2713 // pointer and the mask. The polls pointer must be read before the 2714 // mask!!! (See above) 2715 // 2716 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2717 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2718 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2719 TCR_PTR(lck->lk.polls); // volatile load 2720 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2721 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2722 ticket - 1, lck)); 2723 KMP_FSYNC_RELEASING(lck); 2724 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2725 return KMP_LOCK_RELEASED; 2726 } 2727 2728 static int 2729 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2730 { 2731 char const * const func = "omp_unset_lock"; 2732 KMP_MB(); /* in case another processor initialized lock */ 2733 if ( lck->lk.initialized != lck ) { 2734 KMP_FATAL( LockIsUninitialized, func ); 2735 } 2736 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2737 KMP_FATAL( LockNestableUsedAsSimple, func ); 2738 } 2739 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2740 KMP_FATAL( LockUnsettingFree, func ); 2741 } 2742 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2743 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2744 KMP_FATAL( LockUnsettingSetByAnother, func ); 2745 } 2746 lck->lk.owner_id = 0; 2747 return __kmp_release_drdpa_lock( lck, gtid ); 2748 } 2749 2750 void 2751 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2752 { 2753 lck->lk.location = NULL; 2754 lck->lk.mask = 0; 2755 lck->lk.num_polls = 1; 2756 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2757 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2758 lck->lk.cleanup_ticket = 0; 2759 lck->lk.old_polls = NULL; 2760 lck->lk.next_ticket = 0; 2761 lck->lk.now_serving = 0; 2762 lck->lk.owner_id = 0; // no thread owns the lock. 2763 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2764 lck->lk.initialized = lck; 2765 2766 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2767 } 2768 2769 static void 2770 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2771 { 2772 __kmp_init_drdpa_lock( lck ); 2773 } 2774 2775 void 2776 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2777 { 2778 lck->lk.initialized = NULL; 2779 lck->lk.location = NULL; 2780 if (lck->lk.polls != NULL) { 2781 __kmp_free((void *)lck->lk.polls); 2782 lck->lk.polls = NULL; 2783 } 2784 if (lck->lk.old_polls != NULL) { 2785 __kmp_free((void *)lck->lk.old_polls); 2786 lck->lk.old_polls = NULL; 2787 } 2788 lck->lk.mask = 0; 2789 lck->lk.num_polls = 0; 2790 lck->lk.cleanup_ticket = 0; 2791 lck->lk.next_ticket = 0; 2792 lck->lk.now_serving = 0; 2793 lck->lk.owner_id = 0; 2794 lck->lk.depth_locked = -1; 2795 } 2796 2797 static void 2798 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2799 { 2800 char const * const func = "omp_destroy_lock"; 2801 if ( lck->lk.initialized != lck ) { 2802 KMP_FATAL( LockIsUninitialized, func ); 2803 } 2804 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2805 KMP_FATAL( LockNestableUsedAsSimple, func ); 2806 } 2807 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2808 KMP_FATAL( LockStillOwned, func ); 2809 } 2810 __kmp_destroy_drdpa_lock( lck ); 2811 } 2812 2813 2814 // 2815 // nested drdpa ticket locks 2816 // 2817 2818 void 2819 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2820 { 2821 KMP_DEBUG_ASSERT( gtid >= 0 ); 2822 2823 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2824 lck->lk.depth_locked += 1; 2825 } 2826 else { 2827 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2828 KMP_MB(); 2829 lck->lk.depth_locked = 1; 2830 KMP_MB(); 2831 lck->lk.owner_id = gtid + 1; 2832 } 2833 } 2834 2835 static void 2836 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2837 { 2838 char const * const func = "omp_set_nest_lock"; 2839 if ( lck->lk.initialized != lck ) { 2840 KMP_FATAL( LockIsUninitialized, func ); 2841 } 2842 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2843 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2844 } 2845 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2846 } 2847 2848 int 2849 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2850 { 2851 int retval; 2852 2853 KMP_DEBUG_ASSERT( gtid >= 0 ); 2854 2855 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2856 retval = ++lck->lk.depth_locked; 2857 } 2858 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2859 retval = 0; 2860 } 2861 else { 2862 KMP_MB(); 2863 retval = lck->lk.depth_locked = 1; 2864 KMP_MB(); 2865 lck->lk.owner_id = gtid + 1; 2866 } 2867 return retval; 2868 } 2869 2870 static int 2871 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2872 { 2873 char const * const func = "omp_test_nest_lock"; 2874 if ( lck->lk.initialized != lck ) { 2875 KMP_FATAL( LockIsUninitialized, func ); 2876 } 2877 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2878 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2879 } 2880 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2881 } 2882 2883 int 2884 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2885 { 2886 KMP_DEBUG_ASSERT( gtid >= 0 ); 2887 2888 KMP_MB(); 2889 if ( --(lck->lk.depth_locked) == 0 ) { 2890 KMP_MB(); 2891 lck->lk.owner_id = 0; 2892 __kmp_release_drdpa_lock( lck, gtid ); 2893 return KMP_LOCK_RELEASED; 2894 } 2895 return KMP_LOCK_STILL_HELD; 2896 } 2897 2898 static int 2899 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2900 { 2901 char const * const func = "omp_unset_nest_lock"; 2902 KMP_MB(); /* in case another processor initialized lock */ 2903 if ( lck->lk.initialized != lck ) { 2904 KMP_FATAL( LockIsUninitialized, func ); 2905 } 2906 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2907 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2908 } 2909 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2910 KMP_FATAL( LockUnsettingFree, func ); 2911 } 2912 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2913 KMP_FATAL( LockUnsettingSetByAnother, func ); 2914 } 2915 return __kmp_release_nested_drdpa_lock( lck, gtid ); 2916 } 2917 2918 void 2919 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2920 { 2921 __kmp_init_drdpa_lock( lck ); 2922 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2923 } 2924 2925 static void 2926 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2927 { 2928 __kmp_init_nested_drdpa_lock( lck ); 2929 } 2930 2931 void 2932 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2933 { 2934 __kmp_destroy_drdpa_lock( lck ); 2935 lck->lk.depth_locked = 0; 2936 } 2937 2938 static void 2939 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2940 { 2941 char const * const func = "omp_destroy_nest_lock"; 2942 if ( lck->lk.initialized != lck ) { 2943 KMP_FATAL( LockIsUninitialized, func ); 2944 } 2945 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2946 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2947 } 2948 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2949 KMP_FATAL( LockStillOwned, func ); 2950 } 2951 __kmp_destroy_nested_drdpa_lock( lck ); 2952 } 2953 2954 2955 // 2956 // access functions to fields which don't exist for all lock kinds. 2957 // 2958 2959 static int 2960 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2961 { 2962 return lck == lck->lk.initialized; 2963 } 2964 2965 static const ident_t * 2966 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2967 { 2968 return lck->lk.location; 2969 } 2970 2971 static void 2972 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 2973 { 2974 lck->lk.location = loc; 2975 } 2976 2977 static kmp_lock_flags_t 2978 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 2979 { 2980 return lck->lk.flags; 2981 } 2982 2983 static void 2984 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 2985 { 2986 lck->lk.flags = flags; 2987 } 2988 2989 #if KMP_USE_DYNAMIC_LOCK 2990 2991 // Definitions of lock hints. 2992 # ifndef __OMP_H 2993 typedef enum kmp_lock_hint_t { 2994 kmp_lock_hint_none = 0, 2995 kmp_lock_hint_contended, 2996 kmp_lock_hint_uncontended, 2997 kmp_lock_hint_nonspeculative, 2998 kmp_lock_hint_speculative, 2999 kmp_lock_hint_adaptive, 3000 } kmp_lock_hint_t; 3001 # endif 3002 3003 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 3004 #define expand_init_lock(l, a) \ 3005 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \ 3006 *lck = DYNA_LOCK_FREE(l); \ 3007 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \ 3008 } 3009 FOREACH_D_LOCK(expand_init_lock, 0) 3010 #undef expand_init_lock 3011 3012 #if DYNA_HAS_HLE 3013 3014 // HLE lock functions - imported from the testbed runtime. 3015 #if KMP_MIC 3016 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument 3017 #else 3018 # define machine_pause() _mm_pause() 3019 #endif 3020 #define HLE_ACQUIRE ".byte 0xf2;" 3021 #define HLE_RELEASE ".byte 0xf3;" 3022 3023 static inline kmp_uint32 3024 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 3025 { 3026 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 3027 : "+r"(v), "+m"(*p) 3028 : 3029 : "memory"); 3030 return v; 3031 } 3032 3033 static void 3034 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 3035 { 3036 *lck = 0; 3037 } 3038 3039 static void 3040 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3041 { 3042 // Use gtid for DYNA_LOCK_BUSY if necessary 3043 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) { 3044 int delay = 1; 3045 do { 3046 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) { 3047 for (int i = delay; i != 0; --i) 3048 machine_pause(); 3049 delay = ((delay << 1) | 1) & 7; 3050 } 3051 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)); 3052 } 3053 } 3054 3055 static void 3056 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3057 { 3058 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3059 } 3060 3061 static void 3062 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3063 { 3064 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3065 : "=m"(*lck) 3066 : "r"(DYNA_LOCK_FREE(hle)) 3067 : "memory"); 3068 } 3069 3070 static void 3071 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3072 { 3073 __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3074 } 3075 3076 static int 3077 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3078 { 3079 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle); 3080 } 3081 3082 static int 3083 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3084 { 3085 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3086 } 3087 3088 #endif // DYNA_HAS_HLE 3089 3090 // Entry functions for indirect locks (first element of direct_*_ops[]). 3091 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3092 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3093 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3094 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3095 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3096 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3097 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3098 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3099 3100 // 3101 // Jump tables for the indirect lock functions. 3102 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3103 // 3104 #define expand_func0(l, op) 0,op##_##l##_##lock, 3105 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3106 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) }; 3107 3108 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock, 3109 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *) 3110 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) }; 3111 3112 // Differentiates *lock and *lock_with_checks. 3113 #define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3114 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3115 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3116 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) }, 3117 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } }; 3118 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3119 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) }, 3120 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } }; 3121 3122 #define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3123 #define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3124 static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3125 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) }, 3126 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } }; 3127 3128 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3129 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3130 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3131 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3132 3133 // 3134 // Jump tables for the indirect lock functions. 3135 // 3136 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3137 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p) 3138 = { FOREACH_I_LOCK(expand_func4, init) }; 3139 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p) 3140 = { FOREACH_I_LOCK(expand_func4, destroy) }; 3141 3142 // Differentiates *lock and *lock_with_checks. 3143 #define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3144 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3145 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3146 = { { FOREACH_I_LOCK(expand_func5, acquire) }, 3147 { FOREACH_I_LOCK(expand_func5c, acquire) } }; 3148 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3149 = { { FOREACH_I_LOCK(expand_func5, release) }, 3150 { FOREACH_I_LOCK(expand_func5c, release) } }; 3151 3152 #define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3153 #define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3154 static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3155 = { { FOREACH_I_LOCK(expand_func6, test) }, 3156 { FOREACH_I_LOCK(expand_func6c, test) } }; 3157 3158 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3159 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0; 3160 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0; 3161 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0; 3162 3163 // Lock index table. 3164 kmp_indirect_lock_t **__kmp_indirect_lock_table; 3165 kmp_lock_index_t __kmp_indirect_lock_table_size; 3166 kmp_lock_index_t __kmp_indirect_lock_table_next; 3167 3168 // Size of indirect locks. 3169 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = { 3170 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3171 #if KMP_USE_ADAPTIVE_LOCKS 3172 sizeof(kmp_adaptive_lock_t), 3173 #endif 3174 sizeof(kmp_drdpa_lock_t), 3175 sizeof(kmp_tas_lock_t), 3176 #if DYNA_HAS_FUTEX 3177 sizeof(kmp_futex_lock_t), 3178 #endif 3179 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3180 sizeof(kmp_drdpa_lock_t) 3181 }; 3182 3183 // Jump tables for lock accessor/modifier. 3184 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3185 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3186 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3187 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3188 3189 // Use different lock pools for different lock types. 3190 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 }; 3191 3192 // Inserts the given lock ptr to the lock table. 3193 kmp_lock_index_t 3194 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck) 3195 { 3196 kmp_lock_index_t next = __kmp_indirect_lock_table_next; 3197 // Check capacity and double the size if required 3198 if (next >= __kmp_indirect_lock_table_size) { 3199 kmp_lock_index_t i; 3200 kmp_lock_index_t size = __kmp_indirect_lock_table_size; 3201 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table; 3202 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *)); 3203 KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *)); 3204 __kmp_free(old_table); 3205 __kmp_indirect_lock_table_size = 2*next; 3206 } 3207 // Insert lck to the table and return the index. 3208 __kmp_indirect_lock_table[next] = lck; 3209 __kmp_indirect_lock_table_next++; 3210 return next; 3211 } 3212 3213 // User lock allocator for dynamically dispatched locks. 3214 kmp_indirect_lock_t * 3215 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3216 { 3217 kmp_indirect_lock_t *lck; 3218 kmp_lock_index_t idx; 3219 3220 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3221 3222 if (__kmp_indirect_lock_pool[tag] != NULL) { 3223 lck = __kmp_indirect_lock_pool[tag]; 3224 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3225 idx = lck->lock->pool.index; 3226 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3227 } else { 3228 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t)); 3229 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3230 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3231 idx = __kmp_insert_indirect_lock(lck); 3232 } 3233 3234 __kmp_release_lock(&__kmp_global_lock, gtid); 3235 3236 lck->type = tag; 3237 3238 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3239 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3240 } else { 3241 *((kmp_indirect_lock_t **)user_lock) = lck; 3242 } 3243 3244 return lck; 3245 } 3246 3247 // User lock lookup for dynamically dispatched locks. 3248 static __forceinline 3249 kmp_indirect_lock_t * 3250 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3251 { 3252 if (__kmp_env_consistency_check) { 3253 kmp_indirect_lock_t *lck = NULL; 3254 if (user_lock == NULL) { 3255 KMP_FATAL(LockIsUninitialized, func); 3256 } 3257 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3258 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock); 3259 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) { 3260 KMP_FATAL(LockIsUninitialized, func); 3261 } 3262 lck = __kmp_indirect_lock_table[idx]; 3263 } else { 3264 lck = *((kmp_indirect_lock_t **)user_lock); 3265 } 3266 if (lck == NULL) { 3267 KMP_FATAL(LockIsUninitialized, func); 3268 } 3269 return lck; 3270 } else { 3271 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3272 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)]; 3273 } else { 3274 return *((kmp_indirect_lock_t **)user_lock); 3275 } 3276 } 3277 } 3278 3279 static void 3280 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3281 { 3282 #if KMP_USE_ADAPTIVE_LOCKS 3283 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3284 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3285 seq = lockseq_queuing; 3286 } 3287 #endif 3288 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq); 3289 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3290 DYNA_I_LOCK_FUNC(l, init)(l->lock); 3291 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type)); 3292 } 3293 3294 static void 3295 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3296 { 3297 kmp_uint32 gtid = __kmp_entry_gtid(); 3298 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3299 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3300 kmp_indirect_locktag_t tag = l->type; 3301 3302 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3303 3304 // Use the base lock's space to keep the pool chain. 3305 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3306 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3307 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock); 3308 } 3309 __kmp_indirect_lock_pool[tag] = l; 3310 3311 __kmp_release_lock(&__kmp_global_lock, gtid); 3312 } 3313 3314 static void 3315 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3316 { 3317 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3318 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3319 } 3320 3321 static void 3322 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3323 { 3324 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3325 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3326 } 3327 3328 static int 3329 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3330 { 3331 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3332 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3333 } 3334 3335 static void 3336 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3337 { 3338 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3339 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3340 } 3341 3342 static void 3343 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3344 { 3345 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3346 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3347 } 3348 3349 static int 3350 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3351 { 3352 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3353 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3354 } 3355 3356 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3357 3358 // Initialize a hinted lock. 3359 void 3360 __kmp_init_lock_hinted(void **lock, int hint) 3361 { 3362 kmp_dyna_lockseq_t seq; 3363 switch (hint) { 3364 case kmp_lock_hint_uncontended: 3365 seq = lockseq_tas; 3366 break; 3367 case kmp_lock_hint_speculative: 3368 #if DYNA_HAS_HLE 3369 seq = lockseq_hle; 3370 #else 3371 seq = lockseq_tas; 3372 #endif 3373 break; 3374 case kmp_lock_hint_adaptive: 3375 #if KMP_USE_ADAPTIVE_LOCKS 3376 seq = lockseq_adaptive; 3377 #else 3378 seq = lockseq_queuing; 3379 #endif 3380 break; 3381 // Defaults to queuing locks. 3382 case kmp_lock_hint_contended: 3383 case kmp_lock_hint_nonspeculative: 3384 default: 3385 seq = lockseq_queuing; 3386 break; 3387 } 3388 if (DYNA_IS_D_LOCK(seq)) { 3389 DYNA_INIT_D_LOCK(lock, seq); 3390 #if USE_ITT_BUILD 3391 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 3392 #endif 3393 } else { 3394 DYNA_INIT_I_LOCK(lock, seq); 3395 #if USE_ITT_BUILD 3396 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3397 __kmp_itt_lock_creating(ilk->lock, NULL); 3398 #endif 3399 } 3400 } 3401 3402 // This is used only in kmp_error.c when consistency checking is on. 3403 kmp_int32 3404 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3405 { 3406 switch (seq) { 3407 case lockseq_tas: 3408 case lockseq_nested_tas: 3409 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3410 #if DYNA_HAS_FUTEX 3411 case lockseq_futex: 3412 case lockseq_nested_futex: 3413 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3414 #endif 3415 case lockseq_ticket: 3416 case lockseq_nested_ticket: 3417 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3418 case lockseq_queuing: 3419 case lockseq_nested_queuing: 3420 #if KMP_USE_ADAPTIVE_LOCKS 3421 case lockseq_adaptive: 3422 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3423 #endif 3424 case lockseq_drdpa: 3425 case lockseq_nested_drdpa: 3426 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3427 default: 3428 return 0; 3429 } 3430 } 3431 3432 // The value initialized from KMP_LOCK_KIND needs to be translated to its 3433 // nested version. 3434 void 3435 __kmp_init_nest_lock_hinted(void **lock, int hint) 3436 { 3437 kmp_dyna_lockseq_t seq; 3438 switch (hint) { 3439 case kmp_lock_hint_uncontended: 3440 seq = lockseq_nested_tas; 3441 break; 3442 // Defaults to queuing locks. 3443 case kmp_lock_hint_contended: 3444 case kmp_lock_hint_nonspeculative: 3445 default: 3446 seq = lockseq_nested_queuing; 3447 break; 3448 } 3449 DYNA_INIT_I_LOCK(lock, seq); 3450 #if USE_ITT_BUILD 3451 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3452 __kmp_itt_lock_creating(ilk->lock, NULL); 3453 #endif 3454 } 3455 3456 // Initializes the lock table for indirect locks. 3457 static void 3458 __kmp_init_indirect_lock_table() 3459 { 3460 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024); 3461 __kmp_indirect_lock_table_size = 1024; 3462 __kmp_indirect_lock_table_next = 0; 3463 } 3464 3465 #if KMP_USE_ADAPTIVE_LOCKS 3466 # define init_lock_func(table, expand) { \ 3467 table[locktag_ticket] = expand(ticket); \ 3468 table[locktag_queuing] = expand(queuing); \ 3469 table[locktag_adaptive] = expand(queuing); \ 3470 table[locktag_drdpa] = expand(drdpa); \ 3471 table[locktag_nested_ticket] = expand(ticket); \ 3472 table[locktag_nested_queuing] = expand(queuing); \ 3473 table[locktag_nested_drdpa] = expand(drdpa); \ 3474 } 3475 #else 3476 # define init_lock_func(table, expand) { \ 3477 table[locktag_ticket] = expand(ticket); \ 3478 table[locktag_queuing] = expand(queuing); \ 3479 table[locktag_drdpa] = expand(drdpa); \ 3480 table[locktag_nested_ticket] = expand(ticket); \ 3481 table[locktag_nested_queuing] = expand(queuing); \ 3482 table[locktag_nested_drdpa] = expand(drdpa); \ 3483 } 3484 #endif // KMP_USE_ADAPTIVE_LOCKS 3485 3486 // Initializes data for dynamic user locks. 3487 void 3488 __kmp_init_dynamic_user_locks() 3489 { 3490 // Initialize jump table location 3491 int offset = (__kmp_env_consistency_check)? 1: 0; 3492 __kmp_direct_set_ops = direct_set_tab[offset]; 3493 __kmp_direct_unset_ops = direct_unset_tab[offset]; 3494 __kmp_direct_test_ops = direct_test_tab[offset]; 3495 __kmp_indirect_set_ops = indirect_set_tab[offset]; 3496 __kmp_indirect_unset_ops = indirect_unset_tab[offset]; 3497 __kmp_indirect_test_ops = indirect_test_tab[offset]; 3498 __kmp_init_indirect_lock_table(); 3499 3500 // Initialize lock accessor/modifier 3501 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe. 3502 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3503 init_lock_func(__kmp_indirect_set_location, expand_func); 3504 #undef expand_func 3505 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3506 init_lock_func(__kmp_indirect_set_flags, expand_func); 3507 #undef expand_func 3508 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3509 init_lock_func(__kmp_indirect_get_location, expand_func); 3510 #undef expand_func 3511 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3512 init_lock_func(__kmp_indirect_get_flags, expand_func); 3513 #undef expand_func 3514 3515 __kmp_init_user_locks = TRUE; 3516 } 3517 3518 // Clean up the lock table. 3519 void 3520 __kmp_cleanup_indirect_user_locks() 3521 { 3522 kmp_lock_index_t i; 3523 int k; 3524 3525 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3526 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) { 3527 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3528 while (l != NULL) { 3529 kmp_indirect_lock_t *ll = l; 3530 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3531 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3532 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL; 3533 } 3534 __kmp_free(ll->lock); 3535 __kmp_free(ll); 3536 } 3537 } 3538 // Clean up the remaining undestroyed locks. 3539 for (i = 0; i < __kmp_indirect_lock_table_next; i++) { 3540 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i]; 3541 if (l != NULL) { 3542 // Locks not destroyed explicitly need to be destroyed here. 3543 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3544 __kmp_free(l->lock); 3545 __kmp_free(l); 3546 } 3547 } 3548 // Free the table 3549 __kmp_free(__kmp_indirect_lock_table); 3550 3551 __kmp_init_user_locks = FALSE; 3552 } 3553 3554 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3555 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3556 3557 #else // KMP_USE_DYNAMIC_LOCK 3558 3559 /* ------------------------------------------------------------------------ */ 3560 /* user locks 3561 * 3562 * They are implemented as a table of function pointers which are set to the 3563 * lock functions of the appropriate kind, once that has been determined. 3564 */ 3565 3566 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3567 3568 size_t __kmp_base_user_lock_size = 0; 3569 size_t __kmp_user_lock_size = 0; 3570 3571 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3572 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3573 3574 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3575 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3576 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3577 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3578 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3579 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3580 3581 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3582 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3583 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3584 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3585 3586 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3587 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3588 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3589 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3590 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3591 3592 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3593 { 3594 switch ( user_lock_kind ) { 3595 case lk_default: 3596 default: 3597 KMP_ASSERT( 0 ); 3598 3599 case lk_tas: { 3600 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3601 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3602 3603 __kmp_get_user_lock_owner_ = 3604 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3605 ( &__kmp_get_tas_lock_owner ); 3606 3607 if ( __kmp_env_consistency_check ) { 3608 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3609 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3610 } 3611 else { 3612 KMP_BIND_USER_LOCK(tas); 3613 KMP_BIND_NESTED_USER_LOCK(tas); 3614 } 3615 3616 __kmp_destroy_user_lock_ = 3617 ( void ( * )( kmp_user_lock_p ) ) 3618 ( &__kmp_destroy_tas_lock ); 3619 3620 __kmp_is_user_lock_initialized_ = 3621 ( int ( * )( kmp_user_lock_p ) ) NULL; 3622 3623 __kmp_get_user_lock_location_ = 3624 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3625 3626 __kmp_set_user_lock_location_ = 3627 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3628 3629 __kmp_get_user_lock_flags_ = 3630 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3631 3632 __kmp_set_user_lock_flags_ = 3633 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3634 } 3635 break; 3636 3637 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3638 3639 case lk_futex: { 3640 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3641 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3642 3643 __kmp_get_user_lock_owner_ = 3644 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3645 ( &__kmp_get_futex_lock_owner ); 3646 3647 if ( __kmp_env_consistency_check ) { 3648 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3649 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3650 } 3651 else { 3652 KMP_BIND_USER_LOCK(futex); 3653 KMP_BIND_NESTED_USER_LOCK(futex); 3654 } 3655 3656 __kmp_destroy_user_lock_ = 3657 ( void ( * )( kmp_user_lock_p ) ) 3658 ( &__kmp_destroy_futex_lock ); 3659 3660 __kmp_is_user_lock_initialized_ = 3661 ( int ( * )( kmp_user_lock_p ) ) NULL; 3662 3663 __kmp_get_user_lock_location_ = 3664 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3665 3666 __kmp_set_user_lock_location_ = 3667 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3668 3669 __kmp_get_user_lock_flags_ = 3670 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3671 3672 __kmp_set_user_lock_flags_ = 3673 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3674 } 3675 break; 3676 3677 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3678 3679 case lk_ticket: { 3680 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3681 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3682 3683 __kmp_get_user_lock_owner_ = 3684 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3685 ( &__kmp_get_ticket_lock_owner ); 3686 3687 if ( __kmp_env_consistency_check ) { 3688 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3689 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3690 } 3691 else { 3692 KMP_BIND_USER_LOCK(ticket); 3693 KMP_BIND_NESTED_USER_LOCK(ticket); 3694 } 3695 3696 __kmp_destroy_user_lock_ = 3697 ( void ( * )( kmp_user_lock_p ) ) 3698 ( &__kmp_destroy_ticket_lock ); 3699 3700 __kmp_is_user_lock_initialized_ = 3701 ( int ( * )( kmp_user_lock_p ) ) 3702 ( &__kmp_is_ticket_lock_initialized ); 3703 3704 __kmp_get_user_lock_location_ = 3705 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3706 ( &__kmp_get_ticket_lock_location ); 3707 3708 __kmp_set_user_lock_location_ = 3709 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3710 ( &__kmp_set_ticket_lock_location ); 3711 3712 __kmp_get_user_lock_flags_ = 3713 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3714 ( &__kmp_get_ticket_lock_flags ); 3715 3716 __kmp_set_user_lock_flags_ = 3717 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3718 ( &__kmp_set_ticket_lock_flags ); 3719 } 3720 break; 3721 3722 case lk_queuing: { 3723 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3724 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3725 3726 __kmp_get_user_lock_owner_ = 3727 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3728 ( &__kmp_get_queuing_lock_owner ); 3729 3730 if ( __kmp_env_consistency_check ) { 3731 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3732 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3733 } 3734 else { 3735 KMP_BIND_USER_LOCK(queuing); 3736 KMP_BIND_NESTED_USER_LOCK(queuing); 3737 } 3738 3739 __kmp_destroy_user_lock_ = 3740 ( void ( * )( kmp_user_lock_p ) ) 3741 ( &__kmp_destroy_queuing_lock ); 3742 3743 __kmp_is_user_lock_initialized_ = 3744 ( int ( * )( kmp_user_lock_p ) ) 3745 ( &__kmp_is_queuing_lock_initialized ); 3746 3747 __kmp_get_user_lock_location_ = 3748 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3749 ( &__kmp_get_queuing_lock_location ); 3750 3751 __kmp_set_user_lock_location_ = 3752 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3753 ( &__kmp_set_queuing_lock_location ); 3754 3755 __kmp_get_user_lock_flags_ = 3756 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3757 ( &__kmp_get_queuing_lock_flags ); 3758 3759 __kmp_set_user_lock_flags_ = 3760 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3761 ( &__kmp_set_queuing_lock_flags ); 3762 } 3763 break; 3764 3765 #if KMP_USE_ADAPTIVE_LOCKS 3766 case lk_adaptive: { 3767 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3768 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3769 3770 __kmp_get_user_lock_owner_ = 3771 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3772 ( &__kmp_get_queuing_lock_owner ); 3773 3774 if ( __kmp_env_consistency_check ) { 3775 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3776 } 3777 else { 3778 KMP_BIND_USER_LOCK(adaptive); 3779 } 3780 3781 __kmp_destroy_user_lock_ = 3782 ( void ( * )( kmp_user_lock_p ) ) 3783 ( &__kmp_destroy_adaptive_lock ); 3784 3785 __kmp_is_user_lock_initialized_ = 3786 ( int ( * )( kmp_user_lock_p ) ) 3787 ( &__kmp_is_queuing_lock_initialized ); 3788 3789 __kmp_get_user_lock_location_ = 3790 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3791 ( &__kmp_get_queuing_lock_location ); 3792 3793 __kmp_set_user_lock_location_ = 3794 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3795 ( &__kmp_set_queuing_lock_location ); 3796 3797 __kmp_get_user_lock_flags_ = 3798 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3799 ( &__kmp_get_queuing_lock_flags ); 3800 3801 __kmp_set_user_lock_flags_ = 3802 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3803 ( &__kmp_set_queuing_lock_flags ); 3804 3805 } 3806 break; 3807 #endif // KMP_USE_ADAPTIVE_LOCKS 3808 3809 case lk_drdpa: { 3810 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3811 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3812 3813 __kmp_get_user_lock_owner_ = 3814 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3815 ( &__kmp_get_drdpa_lock_owner ); 3816 3817 if ( __kmp_env_consistency_check ) { 3818 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3819 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3820 } 3821 else { 3822 KMP_BIND_USER_LOCK(drdpa); 3823 KMP_BIND_NESTED_USER_LOCK(drdpa); 3824 } 3825 3826 __kmp_destroy_user_lock_ = 3827 ( void ( * )( kmp_user_lock_p ) ) 3828 ( &__kmp_destroy_drdpa_lock ); 3829 3830 __kmp_is_user_lock_initialized_ = 3831 ( int ( * )( kmp_user_lock_p ) ) 3832 ( &__kmp_is_drdpa_lock_initialized ); 3833 3834 __kmp_get_user_lock_location_ = 3835 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3836 ( &__kmp_get_drdpa_lock_location ); 3837 3838 __kmp_set_user_lock_location_ = 3839 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3840 ( &__kmp_set_drdpa_lock_location ); 3841 3842 __kmp_get_user_lock_flags_ = 3843 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3844 ( &__kmp_get_drdpa_lock_flags ); 3845 3846 __kmp_set_user_lock_flags_ = 3847 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3848 ( &__kmp_set_drdpa_lock_flags ); 3849 } 3850 break; 3851 } 3852 } 3853 3854 3855 // ---------------------------------------------------------------------------- 3856 // User lock table & lock allocation 3857 3858 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3859 kmp_user_lock_p __kmp_lock_pool = NULL; 3860 3861 // Lock block-allocation support. 3862 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3863 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3864 3865 static kmp_lock_index_t 3866 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3867 { 3868 // Assume that kmp_global_lock is held upon entry/exit. 3869 kmp_lock_index_t index; 3870 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3871 kmp_lock_index_t size; 3872 kmp_user_lock_p *table; 3873 // Reallocate lock table. 3874 if ( __kmp_user_lock_table.allocated == 0 ) { 3875 size = 1024; 3876 } 3877 else { 3878 size = __kmp_user_lock_table.allocated * 2; 3879 } 3880 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3881 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3882 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3883 // We cannot free the previous table now, since it may be in use by other 3884 // threads. So save the pointer to the previous table in in the first element of the 3885 // new table. All the tables will be organized into a list, and could be freed when 3886 // library shutting down. 3887 __kmp_user_lock_table.table = table; 3888 __kmp_user_lock_table.allocated = size; 3889 } 3890 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3891 index = __kmp_user_lock_table.used; 3892 __kmp_user_lock_table.table[ index ] = lck; 3893 ++ __kmp_user_lock_table.used; 3894 return index; 3895 } 3896 3897 static kmp_user_lock_p 3898 __kmp_lock_block_allocate() 3899 { 3900 // Assume that kmp_global_lock is held upon entry/exit. 3901 static int last_index = 0; 3902 if ( ( last_index >= __kmp_num_locks_in_block ) 3903 || ( __kmp_lock_blocks == NULL ) ) { 3904 // Restart the index. 3905 last_index = 0; 3906 // Need to allocate a new block. 3907 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3908 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3909 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3910 // Set up the new block. 3911 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3912 new_block->next_block = __kmp_lock_blocks; 3913 new_block->locks = (void *)buffer; 3914 // Publish the new block. 3915 KMP_MB(); 3916 __kmp_lock_blocks = new_block; 3917 } 3918 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3919 [ last_index * __kmp_user_lock_size ] ) ); 3920 last_index++; 3921 return ret; 3922 } 3923 3924 // 3925 // Get memory for a lock. It may be freshly allocated memory or reused memory 3926 // from lock pool. 3927 // 3928 kmp_user_lock_p 3929 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3930 kmp_lock_flags_t flags ) 3931 { 3932 kmp_user_lock_p lck; 3933 kmp_lock_index_t index; 3934 KMP_DEBUG_ASSERT( user_lock ); 3935 3936 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3937 3938 if ( __kmp_lock_pool == NULL ) { 3939 // Lock pool is empty. Allocate new memory. 3940 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3941 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3942 } 3943 else { 3944 lck = __kmp_lock_block_allocate(); 3945 } 3946 3947 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3948 // and debugger has info on all allocated locks. 3949 index = __kmp_lock_table_insert( lck ); 3950 } 3951 else { 3952 // Pick up lock from pool. 3953 lck = __kmp_lock_pool; 3954 index = __kmp_lock_pool->pool.index; 3955 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3956 } 3957 3958 // 3959 // We could potentially differentiate between nested and regular locks 3960 // here, and do the lock table lookup for regular locks only. 3961 // 3962 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3963 * ( (kmp_lock_index_t *) user_lock ) = index; 3964 } 3965 else { 3966 * ( (kmp_user_lock_p *) user_lock ) = lck; 3967 } 3968 3969 // mark the lock if it is critical section lock. 3970 __kmp_set_user_lock_flags( lck, flags ); 3971 3972 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 3973 3974 return lck; 3975 } 3976 3977 // Put lock's memory to pool for reusing. 3978 void 3979 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 3980 { 3981 KMP_DEBUG_ASSERT( user_lock != NULL ); 3982 KMP_DEBUG_ASSERT( lck != NULL ); 3983 3984 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 3985 3986 lck->pool.next = __kmp_lock_pool; 3987 __kmp_lock_pool = lck; 3988 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3989 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 3990 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 3991 lck->pool.index = index; 3992 } 3993 3994 __kmp_release_lock( & __kmp_global_lock, gtid ); 3995 } 3996 3997 kmp_user_lock_p 3998 __kmp_lookup_user_lock( void **user_lock, char const *func ) 3999 { 4000 kmp_user_lock_p lck = NULL; 4001 4002 if ( __kmp_env_consistency_check ) { 4003 if ( user_lock == NULL ) { 4004 KMP_FATAL( LockIsUninitialized, func ); 4005 } 4006 } 4007 4008 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4009 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 4010 if ( __kmp_env_consistency_check ) { 4011 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 4012 KMP_FATAL( LockIsUninitialized, func ); 4013 } 4014 } 4015 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 4016 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4017 lck = __kmp_user_lock_table.table[index]; 4018 } 4019 else { 4020 lck = *( (kmp_user_lock_p *)user_lock ); 4021 } 4022 4023 if ( __kmp_env_consistency_check ) { 4024 if ( lck == NULL ) { 4025 KMP_FATAL( LockIsUninitialized, func ); 4026 } 4027 } 4028 4029 return lck; 4030 } 4031 4032 void 4033 __kmp_cleanup_user_locks( void ) 4034 { 4035 // 4036 // Reset lock pool. Do not worry about lock in the pool -- we will free 4037 // them when iterating through lock table (it includes all the locks, 4038 // dead or alive). 4039 // 4040 __kmp_lock_pool = NULL; 4041 4042 #define IS_CRITICAL(lck) \ 4043 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 4044 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 4045 4046 // 4047 // Loop through lock table, free all locks. 4048 // 4049 // Do not free item [0], it is reserved for lock tables list. 4050 // 4051 // FIXME - we are iterating through a list of (pointers to) objects of 4052 // type union kmp_user_lock, but we have no way of knowing whether the 4053 // base type is currently "pool" or whatever the global user lock type 4054 // is. 4055 // 4056 // We are relying on the fact that for all of the user lock types 4057 // (except "tas"), the first field in the lock struct is the "initialized" 4058 // field, which is set to the address of the lock object itself when 4059 // the lock is initialized. When the union is of type "pool", the 4060 // first field is a pointer to the next object in the free list, which 4061 // will not be the same address as the object itself. 4062 // 4063 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4064 // will fail for "pool" objects on the free list. This must happen as 4065 // the "location" field of real user locks overlaps the "index" field 4066 // of "pool" objects. 4067 // 4068 // It would be better to run through the free list, and remove all "pool" 4069 // objects from the lock table before executing this loop. However, 4070 // "pool" objects do not always have their index field set (only on 4071 // lin_32e), and I don't want to search the lock table for the address 4072 // of every "pool" object on the free list. 4073 // 4074 while ( __kmp_user_lock_table.used > 1 ) { 4075 const ident *loc; 4076 4077 // 4078 // reduce __kmp_user_lock_table.used before freeing the lock, 4079 // so that state of locks is consistent 4080 // 4081 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4082 --__kmp_user_lock_table.used ]; 4083 4084 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4085 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4086 // 4087 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4088 // initialized AND it is NOT a critical section (user is not 4089 // responsible for destroying criticals) AND we know source 4090 // location to report. 4091 // 4092 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4093 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4094 ( loc->psource != NULL ) ) { 4095 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4096 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4097 __kmp_str_loc_free( &str_loc); 4098 } 4099 4100 #ifdef KMP_DEBUG 4101 if ( IS_CRITICAL( lck ) ) { 4102 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4103 } 4104 else { 4105 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4106 } 4107 #endif // KMP_DEBUG 4108 4109 // 4110 // Cleanup internal lock dynamic resources 4111 // (for drdpa locks particularly). 4112 // 4113 __kmp_destroy_user_lock( lck ); 4114 } 4115 4116 // 4117 // Free the lock if block allocation of locks is not used. 4118 // 4119 if ( __kmp_lock_blocks == NULL ) { 4120 __kmp_free( lck ); 4121 } 4122 } 4123 4124 #undef IS_CRITICAL 4125 4126 // 4127 // delete lock table(s). 4128 // 4129 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4130 __kmp_user_lock_table.table = NULL; 4131 __kmp_user_lock_table.allocated = 0; 4132 4133 while ( table_ptr != NULL ) { 4134 // 4135 // In the first element we saved the pointer to the previous 4136 // (smaller) lock table. 4137 // 4138 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4139 __kmp_free( table_ptr ); 4140 table_ptr = next; 4141 } 4142 4143 // 4144 // Free buffers allocated for blocks of locks. 4145 // 4146 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4147 __kmp_lock_blocks = NULL; 4148 4149 while ( block_ptr != NULL ) { 4150 kmp_block_of_locks_t *next = block_ptr->next_block; 4151 __kmp_free( block_ptr->locks ); 4152 // 4153 // *block_ptr itself was allocated at the end of the locks vector. 4154 // 4155 block_ptr = next; 4156 } 4157 4158 TCW_4(__kmp_init_user_locks, FALSE); 4159 } 4160 4161 #endif // KMP_USE_DYNAMIC_LOCK 4162