1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 18 #include "kmp.h" 19 #include "kmp_itt.h" 20 #include "kmp_i18n.h" 21 #include "kmp_lock.h" 22 #include "kmp_io.h" 23 24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 25 # include <unistd.h> 26 # include <sys/syscall.h> 27 // We should really include <futex.h>, but that causes compatibility problems on different 28 // Linux* OS distributions that either require that you include (or break when you try to include) 29 // <pci/types.h>. 30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 31 // we just define the constants here and don't include <futex.h> 32 # ifndef FUTEX_WAIT 33 # define FUTEX_WAIT 0 34 # endif 35 # ifndef FUTEX_WAKE 36 # define FUTEX_WAKE 1 37 # endif 38 #endif 39 40 /* Implement spin locks for internal library use. */ 41 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 42 43 void 44 __kmp_validate_locks( void ) 45 { 46 int i; 47 kmp_uint32 x, y; 48 49 /* Check to make sure unsigned arithmetic does wraps properly */ 50 x = ~((kmp_uint32) 0) - 2; 51 y = x - 2; 52 53 for (i = 0; i < 8; ++i, ++x, ++y) { 54 kmp_uint32 z = (x - y); 55 KMP_ASSERT( z == 2 ); 56 } 57 58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 59 } 60 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // 66 // For the non-nested locks, we can only assume that the first 4 bytes were 67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 68 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 70 // 71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 73 // 74 75 static kmp_int32 76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 77 { 78 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 79 } 80 81 static inline bool 82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 83 { 84 return lck->lk.depth_locked != -1; 85 } 86 87 __forceinline static int 88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 89 { 90 KMP_MB(); 91 92 #ifdef USE_LOCK_PROFILE 93 kmp_uint32 curr = TCR_4( lck->lk.poll ); 94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 95 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 96 /* else __kmp_printf( "." );*/ 97 #endif /* USE_LOCK_PROFILE */ 98 99 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 101 KMP_FSYNC_ACQUIRED(lck); 102 return KMP_LOCK_ACQUIRED_FIRST; 103 } 104 105 kmp_uint32 spins; 106 KMP_FSYNC_PREPARE( lck ); 107 KMP_INIT_YIELD( spins ); 108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 109 __kmp_xproc ) ) { 110 KMP_YIELD( TRUE ); 111 } 112 else { 113 KMP_YIELD_SPIN( spins ); 114 } 115 116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) || 117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) { 118 // 119 // FIXME - use exponential backoff here 120 // 121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 122 __kmp_xproc ) ) { 123 KMP_YIELD( TRUE ); 124 } 125 else { 126 KMP_YIELD_SPIN( spins ); 127 } 128 } 129 KMP_FSYNC_ACQUIRED( lck ); 130 return KMP_LOCK_ACQUIRED_FIRST; 131 } 132 133 int 134 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 135 { 136 return __kmp_acquire_tas_lock_timed_template( lck, gtid ); 137 } 138 139 static int 140 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 141 { 142 char const * const func = "omp_set_lock"; 143 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 144 && __kmp_is_tas_lock_nestable( lck ) ) { 145 KMP_FATAL( LockNestableUsedAsSimple, func ); 146 } 147 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 148 KMP_FATAL( LockIsAlreadyOwned, func ); 149 } 150 return __kmp_acquire_tas_lock( lck, gtid ); 151 } 152 153 int 154 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 155 { 156 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 157 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 158 KMP_FSYNC_ACQUIRED( lck ); 159 return TRUE; 160 } 161 return FALSE; 162 } 163 164 static int 165 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 166 { 167 char const * const func = "omp_test_lock"; 168 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 169 && __kmp_is_tas_lock_nestable( lck ) ) { 170 KMP_FATAL( LockNestableUsedAsSimple, func ); 171 } 172 return __kmp_test_tas_lock( lck, gtid ); 173 } 174 175 int 176 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 177 { 178 KMP_MB(); /* Flush all pending memory write invalidates. */ 179 180 KMP_FSYNC_RELEASING(lck); 181 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) ); 182 KMP_MB(); /* Flush all pending memory write invalidates. */ 183 184 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 185 __kmp_xproc ) ); 186 return KMP_LOCK_RELEASED; 187 } 188 189 static int 190 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 191 { 192 char const * const func = "omp_unset_lock"; 193 KMP_MB(); /* in case another processor initialized lock */ 194 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 195 && __kmp_is_tas_lock_nestable( lck ) ) { 196 KMP_FATAL( LockNestableUsedAsSimple, func ); 197 } 198 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 199 KMP_FATAL( LockUnsettingFree, func ); 200 } 201 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 202 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 203 KMP_FATAL( LockUnsettingSetByAnother, func ); 204 } 205 return __kmp_release_tas_lock( lck, gtid ); 206 } 207 208 void 209 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 210 { 211 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) ); 212 } 213 214 static void 215 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 216 { 217 __kmp_init_tas_lock( lck ); 218 } 219 220 void 221 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 222 { 223 lck->lk.poll = 0; 224 } 225 226 static void 227 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 228 { 229 char const * const func = "omp_destroy_lock"; 230 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 231 && __kmp_is_tas_lock_nestable( lck ) ) { 232 KMP_FATAL( LockNestableUsedAsSimple, func ); 233 } 234 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 235 KMP_FATAL( LockStillOwned, func ); 236 } 237 __kmp_destroy_tas_lock( lck ); 238 } 239 240 241 // 242 // nested test and set locks 243 // 244 245 int 246 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 247 { 248 KMP_DEBUG_ASSERT( gtid >= 0 ); 249 250 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 251 lck->lk.depth_locked += 1; 252 return KMP_LOCK_ACQUIRED_NEXT; 253 } 254 else { 255 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 256 lck->lk.depth_locked = 1; 257 return KMP_LOCK_ACQUIRED_FIRST; 258 } 259 } 260 261 static int 262 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 263 { 264 char const * const func = "omp_set_nest_lock"; 265 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 266 KMP_FATAL( LockSimpleUsedAsNestable, func ); 267 } 268 return __kmp_acquire_nested_tas_lock( lck, gtid ); 269 } 270 271 int 272 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 273 { 274 int retval; 275 276 KMP_DEBUG_ASSERT( gtid >= 0 ); 277 278 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 279 retval = ++lck->lk.depth_locked; 280 } 281 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 282 retval = 0; 283 } 284 else { 285 KMP_MB(); 286 retval = lck->lk.depth_locked = 1; 287 } 288 return retval; 289 } 290 291 static int 292 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 293 { 294 char const * const func = "omp_test_nest_lock"; 295 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 296 KMP_FATAL( LockSimpleUsedAsNestable, func ); 297 } 298 return __kmp_test_nested_tas_lock( lck, gtid ); 299 } 300 301 int 302 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 303 { 304 KMP_DEBUG_ASSERT( gtid >= 0 ); 305 306 KMP_MB(); 307 if ( --(lck->lk.depth_locked) == 0 ) { 308 __kmp_release_tas_lock( lck, gtid ); 309 return KMP_LOCK_RELEASED; 310 } 311 return KMP_LOCK_STILL_HELD; 312 } 313 314 static int 315 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 316 { 317 char const * const func = "omp_unset_nest_lock"; 318 KMP_MB(); /* in case another processor initialized lock */ 319 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 320 KMP_FATAL( LockSimpleUsedAsNestable, func ); 321 } 322 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 323 KMP_FATAL( LockUnsettingFree, func ); 324 } 325 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 326 KMP_FATAL( LockUnsettingSetByAnother, func ); 327 } 328 return __kmp_release_nested_tas_lock( lck, gtid ); 329 } 330 331 void 332 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 333 { 334 __kmp_init_tas_lock( lck ); 335 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 336 } 337 338 static void 339 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 340 { 341 __kmp_init_nested_tas_lock( lck ); 342 } 343 344 void 345 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 346 { 347 __kmp_destroy_tas_lock( lck ); 348 lck->lk.depth_locked = 0; 349 } 350 351 static void 352 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 353 { 354 char const * const func = "omp_destroy_nest_lock"; 355 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 356 KMP_FATAL( LockSimpleUsedAsNestable, func ); 357 } 358 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 359 KMP_FATAL( LockStillOwned, func ); 360 } 361 __kmp_destroy_nested_tas_lock( lck ); 362 } 363 364 365 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 366 367 /* ------------------------------------------------------------------------ */ 368 /* futex locks */ 369 370 // futex locks are really just test and set locks, with a different method 371 // of handling contention. They take the same amount of space as test and 372 // set locks, and are allocated the same way (i.e. use the area allocated by 373 // the compiler for non-nested locks / allocate nested locks on the heap). 374 375 static kmp_int32 376 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 377 { 378 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 379 } 380 381 static inline bool 382 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 383 { 384 return lck->lk.depth_locked != -1; 385 } 386 387 __forceinline static int 388 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 389 { 390 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 391 392 KMP_MB(); 393 394 #ifdef USE_LOCK_PROFILE 395 kmp_uint32 curr = TCR_4( lck->lk.poll ); 396 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 397 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 398 /* else __kmp_printf( "." );*/ 399 #endif /* USE_LOCK_PROFILE */ 400 401 KMP_FSYNC_PREPARE( lck ); 402 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 403 lck, lck->lk.poll, gtid ) ); 404 405 kmp_int32 poll_val; 406 407 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), 408 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) { 409 410 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; 411 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 412 lck, gtid, poll_val, cond ) ); 413 414 // 415 // NOTE: if you try to use the following condition for this branch 416 // 417 // if ( poll_val & 1 == 0 ) 418 // 419 // Then the 12.0 compiler has a bug where the following block will 420 // always be skipped, regardless of the value of the LSB of poll_val. 421 // 422 if ( ! cond ) { 423 // 424 // Try to set the lsb in the poll to indicate to the owner 425 // thread that they need to wake this thread up. 426 // 427 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) { 428 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 429 lck, lck->lk.poll, gtid ) ); 430 continue; 431 } 432 poll_val |= DYNA_LOCK_BUSY(1, futex); 433 434 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 435 lck, lck->lk.poll, gtid ) ); 436 } 437 438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 439 lck, gtid, poll_val ) ); 440 441 kmp_int32 rc; 442 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 443 poll_val, NULL, NULL, 0 ) ) != 0 ) { 444 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 445 lck, gtid, poll_val, rc, errno ) ); 446 continue; 447 } 448 449 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 450 lck, gtid, poll_val ) ); 451 // 452 // This thread has now done a successful futex wait call and was 453 // entered on the OS futex queue. We must now perform a futex 454 // wake call when releasing the lock, as we have no idea how many 455 // other threads are in the queue. 456 // 457 gtid_code |= 1; 458 } 459 460 KMP_FSYNC_ACQUIRED( lck ); 461 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 462 lck, lck->lk.poll, gtid ) ); 463 return KMP_LOCK_ACQUIRED_FIRST; 464 } 465 466 int 467 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 468 { 469 return __kmp_acquire_futex_lock_timed_template( lck, gtid ); 470 } 471 472 static int 473 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 474 { 475 char const * const func = "omp_set_lock"; 476 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 477 && __kmp_is_futex_lock_nestable( lck ) ) { 478 KMP_FATAL( LockNestableUsedAsSimple, func ); 479 } 480 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 481 KMP_FATAL( LockIsAlreadyOwned, func ); 482 } 483 return __kmp_acquire_futex_lock( lck, gtid ); 484 } 485 486 int 487 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 488 { 489 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) { 490 KMP_FSYNC_ACQUIRED( lck ); 491 return TRUE; 492 } 493 return FALSE; 494 } 495 496 static int 497 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 498 { 499 char const * const func = "omp_test_lock"; 500 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 501 && __kmp_is_futex_lock_nestable( lck ) ) { 502 KMP_FATAL( LockNestableUsedAsSimple, func ); 503 } 504 return __kmp_test_futex_lock( lck, gtid ); 505 } 506 507 int 508 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 509 { 510 KMP_MB(); /* Flush all pending memory write invalidates. */ 511 512 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 513 lck, lck->lk.poll, gtid ) ); 514 515 KMP_FSYNC_RELEASING(lck); 516 517 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) ); 518 519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 520 lck, gtid, poll_val ) ); 521 522 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) { 523 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 524 lck, gtid ) ); 525 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 526 } 527 528 KMP_MB(); /* Flush all pending memory write invalidates. */ 529 530 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 531 lck, lck->lk.poll, gtid ) ); 532 533 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 534 __kmp_xproc ) ); 535 return KMP_LOCK_RELEASED; 536 } 537 538 static int 539 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 540 { 541 char const * const func = "omp_unset_lock"; 542 KMP_MB(); /* in case another processor initialized lock */ 543 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 544 && __kmp_is_futex_lock_nestable( lck ) ) { 545 KMP_FATAL( LockNestableUsedAsSimple, func ); 546 } 547 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 548 KMP_FATAL( LockUnsettingFree, func ); 549 } 550 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 551 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 552 KMP_FATAL( LockUnsettingSetByAnother, func ); 553 } 554 return __kmp_release_futex_lock( lck, gtid ); 555 } 556 557 void 558 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 559 { 560 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) ); 561 } 562 563 static void 564 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 565 { 566 __kmp_init_futex_lock( lck ); 567 } 568 569 void 570 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 571 { 572 lck->lk.poll = 0; 573 } 574 575 static void 576 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 577 { 578 char const * const func = "omp_destroy_lock"; 579 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 580 && __kmp_is_futex_lock_nestable( lck ) ) { 581 KMP_FATAL( LockNestableUsedAsSimple, func ); 582 } 583 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 584 KMP_FATAL( LockStillOwned, func ); 585 } 586 __kmp_destroy_futex_lock( lck ); 587 } 588 589 590 // 591 // nested futex locks 592 // 593 594 int 595 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 596 { 597 KMP_DEBUG_ASSERT( gtid >= 0 ); 598 599 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 600 lck->lk.depth_locked += 1; 601 return KMP_LOCK_ACQUIRED_NEXT; 602 } 603 else { 604 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 605 lck->lk.depth_locked = 1; 606 return KMP_LOCK_ACQUIRED_FIRST; 607 } 608 } 609 610 static int 611 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 612 { 613 char const * const func = "omp_set_nest_lock"; 614 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 615 KMP_FATAL( LockSimpleUsedAsNestable, func ); 616 } 617 return __kmp_acquire_nested_futex_lock( lck, gtid ); 618 } 619 620 int 621 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 622 { 623 int retval; 624 625 KMP_DEBUG_ASSERT( gtid >= 0 ); 626 627 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 628 retval = ++lck->lk.depth_locked; 629 } 630 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 631 retval = 0; 632 } 633 else { 634 KMP_MB(); 635 retval = lck->lk.depth_locked = 1; 636 } 637 return retval; 638 } 639 640 static int 641 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 642 { 643 char const * const func = "omp_test_nest_lock"; 644 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 645 KMP_FATAL( LockSimpleUsedAsNestable, func ); 646 } 647 return __kmp_test_nested_futex_lock( lck, gtid ); 648 } 649 650 int 651 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 652 { 653 KMP_DEBUG_ASSERT( gtid >= 0 ); 654 655 KMP_MB(); 656 if ( --(lck->lk.depth_locked) == 0 ) { 657 __kmp_release_futex_lock( lck, gtid ); 658 return KMP_LOCK_RELEASED; 659 } 660 return KMP_LOCK_STILL_HELD; 661 } 662 663 static int 664 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 665 { 666 char const * const func = "omp_unset_nest_lock"; 667 KMP_MB(); /* in case another processor initialized lock */ 668 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 669 KMP_FATAL( LockSimpleUsedAsNestable, func ); 670 } 671 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 672 KMP_FATAL( LockUnsettingFree, func ); 673 } 674 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 675 KMP_FATAL( LockUnsettingSetByAnother, func ); 676 } 677 return __kmp_release_nested_futex_lock( lck, gtid ); 678 } 679 680 void 681 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 682 { 683 __kmp_init_futex_lock( lck ); 684 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 685 } 686 687 static void 688 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 689 { 690 __kmp_init_nested_futex_lock( lck ); 691 } 692 693 void 694 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 695 { 696 __kmp_destroy_futex_lock( lck ); 697 lck->lk.depth_locked = 0; 698 } 699 700 static void 701 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 702 { 703 char const * const func = "omp_destroy_nest_lock"; 704 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 705 KMP_FATAL( LockSimpleUsedAsNestable, func ); 706 } 707 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 708 KMP_FATAL( LockStillOwned, func ); 709 } 710 __kmp_destroy_nested_futex_lock( lck ); 711 } 712 713 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 714 715 716 /* ------------------------------------------------------------------------ */ 717 /* ticket (bakery) locks */ 718 719 static kmp_int32 720 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 721 { 722 return TCR_4( lck->lk.owner_id ) - 1; 723 } 724 725 static inline bool 726 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 727 { 728 return lck->lk.depth_locked != -1; 729 } 730 731 static kmp_uint32 732 __kmp_bakery_check(kmp_uint value, kmp_uint checker) 733 { 734 register kmp_uint32 pause; 735 736 if (value == checker) { 737 return TRUE; 738 } 739 for (pause = checker - value; pause != 0; --pause); 740 return FALSE; 741 } 742 743 __forceinline static int 744 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 745 { 746 kmp_uint32 my_ticket; 747 KMP_MB(); 748 749 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 750 751 #ifdef USE_LOCK_PROFILE 752 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 753 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 754 /* else __kmp_printf( "." );*/ 755 #endif /* USE_LOCK_PROFILE */ 756 757 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 758 KMP_FSYNC_ACQUIRED(lck); 759 return KMP_LOCK_ACQUIRED_FIRST; 760 } 761 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 762 KMP_FSYNC_ACQUIRED(lck); 763 return KMP_LOCK_ACQUIRED_FIRST; 764 } 765 766 int 767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 768 { 769 return __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 770 } 771 772 static int 773 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 774 { 775 char const * const func = "omp_set_lock"; 776 if ( lck->lk.initialized != lck ) { 777 KMP_FATAL( LockIsUninitialized, func ); 778 } 779 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 780 KMP_FATAL( LockNestableUsedAsSimple, func ); 781 } 782 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 783 KMP_FATAL( LockIsAlreadyOwned, func ); 784 } 785 786 __kmp_acquire_ticket_lock( lck, gtid ); 787 788 lck->lk.owner_id = gtid + 1; 789 return KMP_LOCK_ACQUIRED_FIRST; 790 } 791 792 int 793 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 794 { 795 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 796 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 797 kmp_uint32 next_ticket = my_ticket + 1; 798 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 799 my_ticket, next_ticket ) ) { 800 KMP_FSYNC_ACQUIRED( lck ); 801 return TRUE; 802 } 803 } 804 return FALSE; 805 } 806 807 static int 808 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 809 { 810 char const * const func = "omp_test_lock"; 811 if ( lck->lk.initialized != lck ) { 812 KMP_FATAL( LockIsUninitialized, func ); 813 } 814 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 815 KMP_FATAL( LockNestableUsedAsSimple, func ); 816 } 817 818 int retval = __kmp_test_ticket_lock( lck, gtid ); 819 820 if ( retval ) { 821 lck->lk.owner_id = gtid + 1; 822 } 823 return retval; 824 } 825 826 int 827 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 828 { 829 kmp_uint32 distance; 830 831 KMP_MB(); /* Flush all pending memory write invalidates. */ 832 833 KMP_FSYNC_RELEASING(lck); 834 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 835 836 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 837 838 KMP_MB(); /* Flush all pending memory write invalidates. */ 839 840 KMP_YIELD( distance 841 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 842 return KMP_LOCK_RELEASED; 843 } 844 845 static int 846 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 847 { 848 char const * const func = "omp_unset_lock"; 849 KMP_MB(); /* in case another processor initialized lock */ 850 if ( lck->lk.initialized != lck ) { 851 KMP_FATAL( LockIsUninitialized, func ); 852 } 853 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 854 KMP_FATAL( LockNestableUsedAsSimple, func ); 855 } 856 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 857 KMP_FATAL( LockUnsettingFree, func ); 858 } 859 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 860 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 861 KMP_FATAL( LockUnsettingSetByAnother, func ); 862 } 863 lck->lk.owner_id = 0; 864 return __kmp_release_ticket_lock( lck, gtid ); 865 } 866 867 void 868 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 869 { 870 lck->lk.location = NULL; 871 TCW_4( lck->lk.next_ticket, 0 ); 872 TCW_4( lck->lk.now_serving, 0 ); 873 lck->lk.owner_id = 0; // no thread owns the lock. 874 lck->lk.depth_locked = -1; // -1 => not a nested lock. 875 lck->lk.initialized = (kmp_ticket_lock *)lck; 876 } 877 878 static void 879 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 880 { 881 __kmp_init_ticket_lock( lck ); 882 } 883 884 void 885 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 886 { 887 lck->lk.initialized = NULL; 888 lck->lk.location = NULL; 889 lck->lk.next_ticket = 0; 890 lck->lk.now_serving = 0; 891 lck->lk.owner_id = 0; 892 lck->lk.depth_locked = -1; 893 } 894 895 static void 896 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 897 { 898 char const * const func = "omp_destroy_lock"; 899 if ( lck->lk.initialized != lck ) { 900 KMP_FATAL( LockIsUninitialized, func ); 901 } 902 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 903 KMP_FATAL( LockNestableUsedAsSimple, func ); 904 } 905 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 906 KMP_FATAL( LockStillOwned, func ); 907 } 908 __kmp_destroy_ticket_lock( lck ); 909 } 910 911 912 // 913 // nested ticket locks 914 // 915 916 int 917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 918 { 919 KMP_DEBUG_ASSERT( gtid >= 0 ); 920 921 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 922 lck->lk.depth_locked += 1; 923 return KMP_LOCK_ACQUIRED_NEXT; 924 } 925 else { 926 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 927 KMP_MB(); 928 lck->lk.depth_locked = 1; 929 KMP_MB(); 930 lck->lk.owner_id = gtid + 1; 931 return KMP_LOCK_ACQUIRED_FIRST; 932 } 933 } 934 935 static int 936 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 937 { 938 char const * const func = "omp_set_nest_lock"; 939 if ( lck->lk.initialized != lck ) { 940 KMP_FATAL( LockIsUninitialized, func ); 941 } 942 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 943 KMP_FATAL( LockSimpleUsedAsNestable, func ); 944 } 945 return __kmp_acquire_nested_ticket_lock( lck, gtid ); 946 } 947 948 int 949 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 950 { 951 int retval; 952 953 KMP_DEBUG_ASSERT( gtid >= 0 ); 954 955 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 956 retval = ++lck->lk.depth_locked; 957 } 958 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 959 retval = 0; 960 } 961 else { 962 KMP_MB(); 963 retval = lck->lk.depth_locked = 1; 964 KMP_MB(); 965 lck->lk.owner_id = gtid + 1; 966 } 967 return retval; 968 } 969 970 static int 971 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 972 kmp_int32 gtid ) 973 { 974 char const * const func = "omp_test_nest_lock"; 975 if ( lck->lk.initialized != lck ) { 976 KMP_FATAL( LockIsUninitialized, func ); 977 } 978 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 979 KMP_FATAL( LockSimpleUsedAsNestable, func ); 980 } 981 return __kmp_test_nested_ticket_lock( lck, gtid ); 982 } 983 984 int 985 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 986 { 987 KMP_DEBUG_ASSERT( gtid >= 0 ); 988 989 KMP_MB(); 990 if ( --(lck->lk.depth_locked) == 0 ) { 991 KMP_MB(); 992 lck->lk.owner_id = 0; 993 __kmp_release_ticket_lock( lck, gtid ); 994 return KMP_LOCK_RELEASED; 995 } 996 return KMP_LOCK_STILL_HELD; 997 } 998 999 static int 1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1001 { 1002 char const * const func = "omp_unset_nest_lock"; 1003 KMP_MB(); /* in case another processor initialized lock */ 1004 if ( lck->lk.initialized != lck ) { 1005 KMP_FATAL( LockIsUninitialized, func ); 1006 } 1007 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1008 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1009 } 1010 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1011 KMP_FATAL( LockUnsettingFree, func ); 1012 } 1013 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1014 KMP_FATAL( LockUnsettingSetByAnother, func ); 1015 } 1016 return __kmp_release_nested_ticket_lock( lck, gtid ); 1017 } 1018 1019 void 1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1021 { 1022 __kmp_init_ticket_lock( lck ); 1023 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1024 } 1025 1026 static void 1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1028 { 1029 __kmp_init_nested_ticket_lock( lck ); 1030 } 1031 1032 void 1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1034 { 1035 __kmp_destroy_ticket_lock( lck ); 1036 lck->lk.depth_locked = 0; 1037 } 1038 1039 static void 1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1041 { 1042 char const * const func = "omp_destroy_nest_lock"; 1043 if ( lck->lk.initialized != lck ) { 1044 KMP_FATAL( LockIsUninitialized, func ); 1045 } 1046 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1047 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1048 } 1049 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1050 KMP_FATAL( LockStillOwned, func ); 1051 } 1052 __kmp_destroy_nested_ticket_lock( lck ); 1053 } 1054 1055 1056 // 1057 // access functions to fields which don't exist for all lock kinds. 1058 // 1059 1060 static int 1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1062 { 1063 return lck == lck->lk.initialized; 1064 } 1065 1066 static const ident_t * 1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1068 { 1069 return lck->lk.location; 1070 } 1071 1072 static void 1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1074 { 1075 lck->lk.location = loc; 1076 } 1077 1078 static kmp_lock_flags_t 1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1080 { 1081 return lck->lk.flags; 1082 } 1083 1084 static void 1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1086 { 1087 lck->lk.flags = flags; 1088 } 1089 1090 /* ------------------------------------------------------------------------ */ 1091 /* queuing locks */ 1092 1093 /* 1094 * First the states 1095 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1096 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1097 * h, h means lock is held or about to transition, 1 element on queue 1098 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1099 * 1100 * Now the transitions 1101 * Acquire(0,0) = -1 ,0 1102 * Release(0,0) = Error 1103 * Acquire(-1,0) = h ,h h > 0 1104 * Release(-1,0) = 0 ,0 1105 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1106 * Release(h,h) = -1 ,0 h > 0 1107 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1108 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1109 * 1110 * And pictorially 1111 * 1112 * 1113 * +-----+ 1114 * | 0, 0|------- release -------> Error 1115 * +-----+ 1116 * | ^ 1117 * acquire| |release 1118 * | | 1119 * | | 1120 * v | 1121 * +-----+ 1122 * |-1, 0| 1123 * +-----+ 1124 * | ^ 1125 * acquire| |release 1126 * | | 1127 * | | 1128 * v | 1129 * +-----+ 1130 * | h, h| 1131 * +-----+ 1132 * | ^ 1133 * acquire| |release 1134 * | | 1135 * | | 1136 * v | 1137 * +-----+ 1138 * | h, t|----- acquire, release loopback ---+ 1139 * +-----+ | 1140 * ^ | 1141 * | | 1142 * +------------------------------------+ 1143 * 1144 */ 1145 1146 #ifdef DEBUG_QUEUING_LOCKS 1147 1148 /* Stuff for circular trace buffer */ 1149 #define TRACE_BUF_ELE 1024 1150 static char traces[TRACE_BUF_ELE][128] = { 0 } 1151 static int tc = 0; 1152 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); 1153 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 1154 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 1155 1156 static void 1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1158 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1159 { 1160 kmp_int32 t, i; 1161 1162 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1163 1164 i = tc % TRACE_BUF_ELE; 1165 __kmp_printf_no_lock( "%s\n", traces[i] ); 1166 i = (i+1) % TRACE_BUF_ELE; 1167 while ( i != (tc % TRACE_BUF_ELE) ) { 1168 __kmp_printf_no_lock( "%s", traces[i] ); 1169 i = (i+1) % TRACE_BUF_ELE; 1170 } 1171 __kmp_printf_no_lock( "\n" ); 1172 1173 __kmp_printf_no_lock( 1174 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1175 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1176 head_id, tail_id ); 1177 1178 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1179 1180 if ( lck->lk.head_id >= 1 ) { 1181 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1182 while (t > 0) { 1183 __kmp_printf_no_lock( "-> %d ", t ); 1184 t = __kmp_threads[t-1]->th.th_next_waiting; 1185 } 1186 } 1187 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1188 __kmp_printf_no_lock( "\n\n" ); 1189 } 1190 1191 #endif /* DEBUG_QUEUING_LOCKS */ 1192 1193 static kmp_int32 1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1195 { 1196 return TCR_4( lck->lk.owner_id ) - 1; 1197 } 1198 1199 static inline bool 1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1201 { 1202 return lck->lk.depth_locked != -1; 1203 } 1204 1205 /* Acquire a lock using a the queuing lock implementation */ 1206 template <bool takeTime> 1207 /* [TLW] The unused template above is left behind because of what BEB believes is a 1208 potential compiler problem with __forceinline. */ 1209 __forceinline static int 1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1211 kmp_int32 gtid ) 1212 { 1213 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1214 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1215 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1216 volatile kmp_uint32 *spin_here_p; 1217 kmp_int32 need_mf = 1; 1218 1219 #if OMPT_SUPPORT 1220 ompt_state_t prev_state = ompt_state_undefined; 1221 #endif 1222 1223 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1224 1225 KMP_FSYNC_PREPARE( lck ); 1226 KMP_DEBUG_ASSERT( this_thr != NULL ); 1227 spin_here_p = & this_thr->th.th_spin_here; 1228 1229 #ifdef DEBUG_QUEUING_LOCKS 1230 TRACE_LOCK( gtid+1, "acq ent" ); 1231 if ( *spin_here_p ) 1232 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1233 if ( this_thr->th.th_next_waiting != 0 ) 1234 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1235 #endif 1236 KMP_DEBUG_ASSERT( !*spin_here_p ); 1237 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1238 1239 1240 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1241 that may follow, not just in execution order, but also in visibility order. This way, 1242 when a releasing thread observes the changes to the queue by this thread, it can 1243 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1244 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1245 to FALSE before this thread sets it to TRUE, this thread will hang. 1246 */ 1247 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1248 1249 while( 1 ) { 1250 kmp_int32 enqueued; 1251 kmp_int32 head; 1252 kmp_int32 tail; 1253 1254 head = *head_id_p; 1255 1256 switch ( head ) { 1257 1258 case -1: 1259 { 1260 #ifdef DEBUG_QUEUING_LOCKS 1261 tail = *tail_id_p; 1262 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1263 #endif 1264 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1265 this assignment prevents us from entering the if ( t > 0 ) 1266 condition in the enqueued case below, which is not necessary for 1267 this state transition */ 1268 1269 need_mf = 0; 1270 /* try (-1,0)->(tid,tid) */ 1271 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1272 KMP_PACK_64( -1, 0 ), 1273 KMP_PACK_64( gtid+1, gtid+1 ) ); 1274 #ifdef DEBUG_QUEUING_LOCKS 1275 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1276 #endif 1277 } 1278 break; 1279 1280 default: 1281 { 1282 tail = *tail_id_p; 1283 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1284 1285 #ifdef DEBUG_QUEUING_LOCKS 1286 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1287 #endif 1288 1289 if ( tail == 0 ) { 1290 enqueued = FALSE; 1291 } 1292 else { 1293 need_mf = 0; 1294 /* try (h,t) or (h,h)->(h,tid) */ 1295 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1296 1297 #ifdef DEBUG_QUEUING_LOCKS 1298 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1299 #endif 1300 } 1301 } 1302 break; 1303 1304 case 0: /* empty queue */ 1305 { 1306 kmp_int32 grabbed_lock; 1307 1308 #ifdef DEBUG_QUEUING_LOCKS 1309 tail = *tail_id_p; 1310 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1311 #endif 1312 /* try (0,0)->(-1,0) */ 1313 1314 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1315 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1316 1317 if ( grabbed_lock ) { 1318 1319 *spin_here_p = FALSE; 1320 1321 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1322 lck, gtid )); 1323 #ifdef DEBUG_QUEUING_LOCKS 1324 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1325 #endif 1326 1327 #if OMPT_SUPPORT 1328 if (ompt_enabled && prev_state != ompt_state_undefined) { 1329 /* change the state before clearing wait_id */ 1330 this_thr->th.ompt_thread_info.state = prev_state; 1331 this_thr->th.ompt_thread_info.wait_id = 0; 1332 } 1333 #endif 1334 1335 KMP_FSYNC_ACQUIRED( lck ); 1336 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1337 } 1338 enqueued = FALSE; 1339 } 1340 break; 1341 } 1342 1343 #if OMPT_SUPPORT 1344 if (ompt_enabled && prev_state == ompt_state_undefined) { 1345 /* this thread will spin; set wait_id before entering wait state */ 1346 prev_state = this_thr->th.ompt_thread_info.state; 1347 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 1348 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1349 } 1350 #endif 1351 1352 if ( enqueued ) { 1353 if ( tail > 0 ) { 1354 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1355 KMP_ASSERT( tail_thr != NULL ); 1356 tail_thr->th.th_next_waiting = gtid+1; 1357 /* corresponding wait for this write in release code */ 1358 } 1359 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1360 1361 1362 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1363 * throughput only here. 1364 */ 1365 KMP_MB(); 1366 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1367 1368 #ifdef DEBUG_QUEUING_LOCKS 1369 TRACE_LOCK( gtid+1, "acq spin" ); 1370 1371 if ( this_thr->th.th_next_waiting != 0 ) 1372 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1373 #endif 1374 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1375 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1376 lck, gtid )); 1377 1378 #ifdef DEBUG_QUEUING_LOCKS 1379 TRACE_LOCK( gtid+1, "acq exit 2" ); 1380 #endif 1381 1382 #if OMPT_SUPPORT 1383 /* change the state before clearing wait_id */ 1384 this_thr->th.ompt_thread_info.state = prev_state; 1385 this_thr->th.ompt_thread_info.wait_id = 0; 1386 #endif 1387 1388 /* got lock, we were dequeued by the thread that released lock */ 1389 return KMP_LOCK_ACQUIRED_FIRST; 1390 } 1391 1392 /* Yield if number of threads > number of logical processors */ 1393 /* ToDo: Not sure why this should only be in oversubscription case, 1394 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1395 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1396 __kmp_xproc ) ); 1397 #ifdef DEBUG_QUEUING_LOCKS 1398 TRACE_LOCK( gtid+1, "acq retry" ); 1399 #endif 1400 1401 } 1402 KMP_ASSERT2( 0, "should not get here" ); 1403 return KMP_LOCK_ACQUIRED_FIRST; 1404 } 1405 1406 int 1407 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1408 { 1409 KMP_DEBUG_ASSERT( gtid >= 0 ); 1410 1411 return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1412 } 1413 1414 static int 1415 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1416 kmp_int32 gtid ) 1417 { 1418 char const * const func = "omp_set_lock"; 1419 if ( lck->lk.initialized != lck ) { 1420 KMP_FATAL( LockIsUninitialized, func ); 1421 } 1422 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1423 KMP_FATAL( LockNestableUsedAsSimple, func ); 1424 } 1425 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1426 KMP_FATAL( LockIsAlreadyOwned, func ); 1427 } 1428 1429 __kmp_acquire_queuing_lock( lck, gtid ); 1430 1431 lck->lk.owner_id = gtid + 1; 1432 return KMP_LOCK_ACQUIRED_FIRST; 1433 } 1434 1435 int 1436 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1437 { 1438 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1439 kmp_int32 head; 1440 #ifdef KMP_DEBUG 1441 kmp_info_t *this_thr; 1442 #endif 1443 1444 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1445 KMP_DEBUG_ASSERT( gtid >= 0 ); 1446 #ifdef KMP_DEBUG 1447 this_thr = __kmp_thread_from_gtid( gtid ); 1448 KMP_DEBUG_ASSERT( this_thr != NULL ); 1449 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1450 #endif 1451 1452 head = *head_id_p; 1453 1454 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1455 1456 /* try (0,0)->(-1,0) */ 1457 1458 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1459 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1460 KMP_FSYNC_ACQUIRED(lck); 1461 return TRUE; 1462 } 1463 } 1464 1465 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1466 return FALSE; 1467 } 1468 1469 static int 1470 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1471 { 1472 char const * const func = "omp_test_lock"; 1473 if ( lck->lk.initialized != lck ) { 1474 KMP_FATAL( LockIsUninitialized, func ); 1475 } 1476 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1477 KMP_FATAL( LockNestableUsedAsSimple, func ); 1478 } 1479 1480 int retval = __kmp_test_queuing_lock( lck, gtid ); 1481 1482 if ( retval ) { 1483 lck->lk.owner_id = gtid + 1; 1484 } 1485 return retval; 1486 } 1487 1488 int 1489 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1490 { 1491 register kmp_info_t *this_thr; 1492 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1493 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1494 1495 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1496 KMP_DEBUG_ASSERT( gtid >= 0 ); 1497 this_thr = __kmp_thread_from_gtid( gtid ); 1498 KMP_DEBUG_ASSERT( this_thr != NULL ); 1499 #ifdef DEBUG_QUEUING_LOCKS 1500 TRACE_LOCK( gtid+1, "rel ent" ); 1501 1502 if ( this_thr->th.th_spin_here ) 1503 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1504 if ( this_thr->th.th_next_waiting != 0 ) 1505 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1506 #endif 1507 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1508 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1509 1510 KMP_FSYNC_RELEASING(lck); 1511 1512 while( 1 ) { 1513 kmp_int32 dequeued; 1514 kmp_int32 head; 1515 kmp_int32 tail; 1516 1517 head = *head_id_p; 1518 1519 #ifdef DEBUG_QUEUING_LOCKS 1520 tail = *tail_id_p; 1521 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1522 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1523 #endif 1524 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1525 1526 if ( head == -1 ) { /* nobody on queue */ 1527 1528 /* try (-1,0)->(0,0) */ 1529 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1530 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1531 lck, gtid )); 1532 #ifdef DEBUG_QUEUING_LOCKS 1533 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1534 #endif 1535 1536 #if OMPT_SUPPORT 1537 /* nothing to do - no other thread is trying to shift blame */ 1538 #endif 1539 1540 return KMP_LOCK_RELEASED; 1541 } 1542 dequeued = FALSE; 1543 1544 } 1545 else { 1546 1547 tail = *tail_id_p; 1548 if ( head == tail ) { /* only one thread on the queue */ 1549 1550 #ifdef DEBUG_QUEUING_LOCKS 1551 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1552 #endif 1553 KMP_DEBUG_ASSERT( head > 0 ); 1554 1555 /* try (h,h)->(-1,0) */ 1556 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1557 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1558 #ifdef DEBUG_QUEUING_LOCKS 1559 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1560 #endif 1561 1562 } 1563 else { 1564 volatile kmp_int32 *waiting_id_p; 1565 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1566 KMP_DEBUG_ASSERT( head_thr != NULL ); 1567 waiting_id_p = & head_thr->th.th_next_waiting; 1568 1569 /* Does this require synchronous reads? */ 1570 #ifdef DEBUG_QUEUING_LOCKS 1571 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1572 #endif 1573 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1574 1575 /* try (h,t)->(h',t) or (t,t) */ 1576 1577 KMP_MB(); 1578 /* make sure enqueuing thread has time to update next waiting thread field */ 1579 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 1580 #ifdef DEBUG_QUEUING_LOCKS 1581 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1582 #endif 1583 dequeued = TRUE; 1584 } 1585 } 1586 1587 if ( dequeued ) { 1588 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1589 KMP_DEBUG_ASSERT( head_thr != NULL ); 1590 1591 /* Does this require synchronous reads? */ 1592 #ifdef DEBUG_QUEUING_LOCKS 1593 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1594 #endif 1595 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1596 1597 /* For clean code only. 1598 * Thread not released until next statement prevents race with acquire code. 1599 */ 1600 head_thr->th.th_next_waiting = 0; 1601 #ifdef DEBUG_QUEUING_LOCKS 1602 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1603 #endif 1604 1605 KMP_MB(); 1606 /* reset spin value */ 1607 head_thr->th.th_spin_here = FALSE; 1608 1609 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1610 lck, gtid )); 1611 #ifdef DEBUG_QUEUING_LOCKS 1612 TRACE_LOCK( gtid+1, "rel exit 2" ); 1613 #endif 1614 return KMP_LOCK_RELEASED; 1615 } 1616 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1617 1618 #ifdef DEBUG_QUEUING_LOCKS 1619 TRACE_LOCK( gtid+1, "rel retry" ); 1620 #endif 1621 1622 } /* while */ 1623 KMP_ASSERT2( 0, "should not get here" ); 1624 return KMP_LOCK_RELEASED; 1625 } 1626 1627 static int 1628 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1629 kmp_int32 gtid ) 1630 { 1631 char const * const func = "omp_unset_lock"; 1632 KMP_MB(); /* in case another processor initialized lock */ 1633 if ( lck->lk.initialized != lck ) { 1634 KMP_FATAL( LockIsUninitialized, func ); 1635 } 1636 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1637 KMP_FATAL( LockNestableUsedAsSimple, func ); 1638 } 1639 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1640 KMP_FATAL( LockUnsettingFree, func ); 1641 } 1642 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1643 KMP_FATAL( LockUnsettingSetByAnother, func ); 1644 } 1645 lck->lk.owner_id = 0; 1646 return __kmp_release_queuing_lock( lck, gtid ); 1647 } 1648 1649 void 1650 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1651 { 1652 lck->lk.location = NULL; 1653 lck->lk.head_id = 0; 1654 lck->lk.tail_id = 0; 1655 lck->lk.next_ticket = 0; 1656 lck->lk.now_serving = 0; 1657 lck->lk.owner_id = 0; // no thread owns the lock. 1658 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1659 lck->lk.initialized = lck; 1660 1661 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1662 } 1663 1664 static void 1665 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1666 { 1667 __kmp_init_queuing_lock( lck ); 1668 } 1669 1670 void 1671 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1672 { 1673 lck->lk.initialized = NULL; 1674 lck->lk.location = NULL; 1675 lck->lk.head_id = 0; 1676 lck->lk.tail_id = 0; 1677 lck->lk.next_ticket = 0; 1678 lck->lk.now_serving = 0; 1679 lck->lk.owner_id = 0; 1680 lck->lk.depth_locked = -1; 1681 } 1682 1683 static void 1684 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1685 { 1686 char const * const func = "omp_destroy_lock"; 1687 if ( lck->lk.initialized != lck ) { 1688 KMP_FATAL( LockIsUninitialized, func ); 1689 } 1690 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1691 KMP_FATAL( LockNestableUsedAsSimple, func ); 1692 } 1693 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1694 KMP_FATAL( LockStillOwned, func ); 1695 } 1696 __kmp_destroy_queuing_lock( lck ); 1697 } 1698 1699 1700 // 1701 // nested queuing locks 1702 // 1703 1704 int 1705 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1706 { 1707 KMP_DEBUG_ASSERT( gtid >= 0 ); 1708 1709 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1710 lck->lk.depth_locked += 1; 1711 return KMP_LOCK_ACQUIRED_NEXT; 1712 } 1713 else { 1714 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1715 KMP_MB(); 1716 lck->lk.depth_locked = 1; 1717 KMP_MB(); 1718 lck->lk.owner_id = gtid + 1; 1719 return KMP_LOCK_ACQUIRED_FIRST; 1720 } 1721 } 1722 1723 static int 1724 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1725 { 1726 char const * const func = "omp_set_nest_lock"; 1727 if ( lck->lk.initialized != lck ) { 1728 KMP_FATAL( LockIsUninitialized, func ); 1729 } 1730 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1731 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1732 } 1733 return __kmp_acquire_nested_queuing_lock( lck, gtid ); 1734 } 1735 1736 int 1737 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1738 { 1739 int retval; 1740 1741 KMP_DEBUG_ASSERT( gtid >= 0 ); 1742 1743 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1744 retval = ++lck->lk.depth_locked; 1745 } 1746 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1747 retval = 0; 1748 } 1749 else { 1750 KMP_MB(); 1751 retval = lck->lk.depth_locked = 1; 1752 KMP_MB(); 1753 lck->lk.owner_id = gtid + 1; 1754 } 1755 return retval; 1756 } 1757 1758 static int 1759 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1760 kmp_int32 gtid ) 1761 { 1762 char const * const func = "omp_test_nest_lock"; 1763 if ( lck->lk.initialized != lck ) { 1764 KMP_FATAL( LockIsUninitialized, func ); 1765 } 1766 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1767 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1768 } 1769 return __kmp_test_nested_queuing_lock( lck, gtid ); 1770 } 1771 1772 int 1773 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1774 { 1775 KMP_DEBUG_ASSERT( gtid >= 0 ); 1776 1777 KMP_MB(); 1778 if ( --(lck->lk.depth_locked) == 0 ) { 1779 KMP_MB(); 1780 lck->lk.owner_id = 0; 1781 __kmp_release_queuing_lock( lck, gtid ); 1782 return KMP_LOCK_RELEASED; 1783 } 1784 return KMP_LOCK_STILL_HELD; 1785 } 1786 1787 static int 1788 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1789 { 1790 char const * const func = "omp_unset_nest_lock"; 1791 KMP_MB(); /* in case another processor initialized lock */ 1792 if ( lck->lk.initialized != lck ) { 1793 KMP_FATAL( LockIsUninitialized, func ); 1794 } 1795 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1796 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1797 } 1798 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1799 KMP_FATAL( LockUnsettingFree, func ); 1800 } 1801 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1802 KMP_FATAL( LockUnsettingSetByAnother, func ); 1803 } 1804 return __kmp_release_nested_queuing_lock( lck, gtid ); 1805 } 1806 1807 void 1808 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1809 { 1810 __kmp_init_queuing_lock( lck ); 1811 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1812 } 1813 1814 static void 1815 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1816 { 1817 __kmp_init_nested_queuing_lock( lck ); 1818 } 1819 1820 void 1821 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1822 { 1823 __kmp_destroy_queuing_lock( lck ); 1824 lck->lk.depth_locked = 0; 1825 } 1826 1827 static void 1828 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1829 { 1830 char const * const func = "omp_destroy_nest_lock"; 1831 if ( lck->lk.initialized != lck ) { 1832 KMP_FATAL( LockIsUninitialized, func ); 1833 } 1834 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1835 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1836 } 1837 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1838 KMP_FATAL( LockStillOwned, func ); 1839 } 1840 __kmp_destroy_nested_queuing_lock( lck ); 1841 } 1842 1843 1844 // 1845 // access functions to fields which don't exist for all lock kinds. 1846 // 1847 1848 static int 1849 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1850 { 1851 return lck == lck->lk.initialized; 1852 } 1853 1854 static const ident_t * 1855 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1856 { 1857 return lck->lk.location; 1858 } 1859 1860 static void 1861 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1862 { 1863 lck->lk.location = loc; 1864 } 1865 1866 static kmp_lock_flags_t 1867 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1868 { 1869 return lck->lk.flags; 1870 } 1871 1872 static void 1873 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1874 { 1875 lck->lk.flags = flags; 1876 } 1877 1878 #if KMP_USE_ADAPTIVE_LOCKS 1879 1880 /* 1881 RTM Adaptive locks 1882 */ 1883 1884 // TODO: Use the header for intrinsics below with the compiler 13.0 1885 //#include <immintrin.h> 1886 1887 // Values from the status register after failed speculation. 1888 #define _XBEGIN_STARTED (~0u) 1889 #define _XABORT_EXPLICIT (1 << 0) 1890 #define _XABORT_RETRY (1 << 1) 1891 #define _XABORT_CONFLICT (1 << 2) 1892 #define _XABORT_CAPACITY (1 << 3) 1893 #define _XABORT_DEBUG (1 << 4) 1894 #define _XABORT_NESTED (1 << 5) 1895 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1896 1897 // Aborts for which it's worth trying again immediately 1898 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1899 1900 #define STRINGIZE_INTERNAL(arg) #arg 1901 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1902 1903 // Access to RTM instructions 1904 1905 /* 1906 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1907 This is the same definition as the compiler intrinsic that will be supported at some point. 1908 */ 1909 static __inline int _xbegin() 1910 { 1911 int res = -1; 1912 1913 #if KMP_OS_WINDOWS 1914 #if KMP_ARCH_X86_64 1915 _asm { 1916 _emit 0xC7 1917 _emit 0xF8 1918 _emit 2 1919 _emit 0 1920 _emit 0 1921 _emit 0 1922 jmp L2 1923 mov res, eax 1924 L2: 1925 } 1926 #else /* IA32 */ 1927 _asm { 1928 _emit 0xC7 1929 _emit 0xF8 1930 _emit 2 1931 _emit 0 1932 _emit 0 1933 _emit 0 1934 jmp L2 1935 mov res, eax 1936 L2: 1937 } 1938 #endif // KMP_ARCH_X86_64 1939 #else 1940 /* Note that %eax must be noted as killed (clobbered), because 1941 * the XSR is returned in %eax(%rax) on abort. Other register 1942 * values are restored, so don't need to be killed. 1943 * 1944 * We must also mark 'res' as an input and an output, since otherwise 1945 * 'res=-1' may be dropped as being dead, whereas we do need the 1946 * assignment on the successful (i.e., non-abort) path. 1947 */ 1948 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1949 " .long 1f-1b-6\n" 1950 " jmp 2f\n" 1951 "1: movl %%eax,%0\n" 1952 "2:" 1953 :"+r"(res)::"memory","%eax"); 1954 #endif // KMP_OS_WINDOWS 1955 return res; 1956 } 1957 1958 /* 1959 Transaction end 1960 */ 1961 static __inline void _xend() 1962 { 1963 #if KMP_OS_WINDOWS 1964 __asm { 1965 _emit 0x0f 1966 _emit 0x01 1967 _emit 0xd5 1968 } 1969 #else 1970 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1971 #endif 1972 } 1973 1974 /* 1975 This is a macro, the argument must be a single byte constant which 1976 can be evaluated by the inline assembler, since it is emitted as a 1977 byte into the assembly code. 1978 */ 1979 #if KMP_OS_WINDOWS 1980 #define _xabort(ARG) \ 1981 _asm _emit 0xc6 \ 1982 _asm _emit 0xf8 \ 1983 _asm _emit ARG 1984 #else 1985 #define _xabort(ARG) \ 1986 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 1987 #endif 1988 1989 // 1990 // Statistics is collected for testing purpose 1991 // 1992 #if KMP_DEBUG_ADAPTIVE_LOCKS 1993 1994 // We accumulate speculative lock statistics when the lock is destroyed. 1995 // We keep locks that haven't been destroyed in the liveLocks list 1996 // so that we can grab their statistics too. 1997 static kmp_adaptive_lock_statistics_t destroyedStats; 1998 1999 // To hold the list of live locks. 2000 static kmp_adaptive_lock_info_t liveLocks; 2001 2002 // A lock so we can safely update the list of locks. 2003 static kmp_bootstrap_lock_t chain_lock; 2004 2005 // Initialize the list of stats. 2006 void 2007 __kmp_init_speculative_stats() 2008 { 2009 kmp_adaptive_lock_info_t *lck = &liveLocks; 2010 2011 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 2012 lck->stats.next = lck; 2013 lck->stats.prev = lck; 2014 2015 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2016 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2017 2018 __kmp_init_bootstrap_lock( &chain_lock ); 2019 2020 } 2021 2022 // Insert the lock into the circular list 2023 static void 2024 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 2025 { 2026 __kmp_acquire_bootstrap_lock( &chain_lock ); 2027 2028 lck->stats.next = liveLocks.stats.next; 2029 lck->stats.prev = &liveLocks; 2030 2031 liveLocks.stats.next = lck; 2032 lck->stats.next->stats.prev = lck; 2033 2034 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2035 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2036 2037 __kmp_release_bootstrap_lock( &chain_lock ); 2038 } 2039 2040 static void 2041 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 2042 { 2043 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2044 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2045 2046 kmp_adaptive_lock_info_t * n = lck->stats.next; 2047 kmp_adaptive_lock_info_t * p = lck->stats.prev; 2048 2049 n->stats.prev = p; 2050 p->stats.next = n; 2051 } 2052 2053 static void 2054 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 2055 { 2056 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2057 __kmp_remember_lock( lck ); 2058 } 2059 2060 static void 2061 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2062 { 2063 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2064 2065 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2066 t->successfulSpeculations += s->successfulSpeculations; 2067 t->hardFailedSpeculations += s->hardFailedSpeculations; 2068 t->softFailedSpeculations += s->softFailedSpeculations; 2069 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2070 t->lemmingYields += s->lemmingYields; 2071 } 2072 2073 static void 2074 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2075 { 2076 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2077 2078 __kmp_acquire_bootstrap_lock( &chain_lock ); 2079 2080 __kmp_add_stats( &destroyedStats, lck ); 2081 __kmp_forget_lock( lck ); 2082 2083 __kmp_release_bootstrap_lock( &chain_lock ); 2084 } 2085 2086 static float 2087 percent (kmp_uint32 count, kmp_uint32 total) 2088 { 2089 return (total == 0) ? 0.0: (100.0 * count)/total; 2090 } 2091 2092 static 2093 FILE * __kmp_open_stats_file() 2094 { 2095 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2096 return stdout; 2097 2098 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 2099 char buffer[buffLen]; 2100 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 2101 (kmp_int32)getpid()); 2102 FILE * result = fopen(&buffer[0], "w"); 2103 2104 // Maybe we should issue a warning here... 2105 return result ? result : stdout; 2106 } 2107 2108 void 2109 __kmp_print_speculative_stats() 2110 { 2111 if (__kmp_user_lock_kind != lk_adaptive) 2112 return; 2113 2114 FILE * statsFile = __kmp_open_stats_file(); 2115 2116 kmp_adaptive_lock_statistics_t total = destroyedStats; 2117 kmp_adaptive_lock_info_t *lck; 2118 2119 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2120 __kmp_add_stats( &total, lck ); 2121 } 2122 kmp_adaptive_lock_statistics_t *t = &total; 2123 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2124 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2125 t->softFailedSpeculations; 2126 2127 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2128 fprintf ( statsFile, " Lock parameters: \n" 2129 " max_soft_retries : %10d\n" 2130 " max_badness : %10d\n", 2131 __kmp_adaptive_backoff_params.max_soft_retries, 2132 __kmp_adaptive_backoff_params.max_badness); 2133 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2134 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2135 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2136 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2137 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2138 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2139 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2140 2141 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2142 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2143 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2144 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2145 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2146 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2147 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2148 2149 if (statsFile != stdout) 2150 fclose( statsFile ); 2151 } 2152 2153 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2154 #else 2155 # define KMP_INC_STAT(lck,stat) 2156 2157 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2158 2159 static inline bool 2160 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2161 { 2162 // It is enough to check that the head_id is zero. 2163 // We don't also need to check the tail. 2164 bool res = lck->lk.head_id == 0; 2165 2166 // We need a fence here, since we must ensure that no memory operations 2167 // from later in this thread float above that read. 2168 #if KMP_COMPILER_ICC 2169 _mm_mfence(); 2170 #else 2171 __sync_synchronize(); 2172 #endif 2173 2174 return res; 2175 } 2176 2177 // Functions for manipulating the badness 2178 static __inline void 2179 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2180 { 2181 // Reset the badness to zero so we eagerly try to speculate again 2182 lck->lk.adaptive.badness = 0; 2183 KMP_INC_STAT(lck,successfulSpeculations); 2184 } 2185 2186 // Create a bit mask with one more set bit. 2187 static __inline void 2188 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2189 { 2190 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2191 if ( newBadness > lck->lk.adaptive.max_badness) { 2192 return; 2193 } else { 2194 lck->lk.adaptive.badness = newBadness; 2195 } 2196 } 2197 2198 // Check whether speculation should be attempted. 2199 static __inline int 2200 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2201 { 2202 kmp_uint32 badness = lck->lk.adaptive.badness; 2203 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2204 int res = (attempts & badness) == 0; 2205 return res; 2206 } 2207 2208 // Attempt to acquire only the speculative lock. 2209 // Does not back off to the non-speculative lock. 2210 // 2211 static int 2212 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2213 { 2214 int retries = lck->lk.adaptive.max_soft_retries; 2215 2216 // We don't explicitly count the start of speculation, rather we record 2217 // the results (success, hard fail, soft fail). The sum of all of those 2218 // is the total number of times we started speculation since all 2219 // speculations must end one of those ways. 2220 do 2221 { 2222 kmp_uint32 status = _xbegin(); 2223 // Switch this in to disable actual speculation but exercise 2224 // at least some of the rest of the code. Useful for debugging... 2225 // kmp_uint32 status = _XABORT_NESTED; 2226 2227 if (status == _XBEGIN_STARTED ) 2228 { /* We have successfully started speculation 2229 * Check that no-one acquired the lock for real between when we last looked 2230 * and now. This also gets the lock cache line into our read-set, 2231 * which we need so that we'll abort if anyone later claims it for real. 2232 */ 2233 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2234 { 2235 // Lock is now visibly acquired, so someone beat us to it. 2236 // Abort the transaction so we'll restart from _xbegin with the 2237 // failure status. 2238 _xabort(0x01) 2239 KMP_ASSERT2( 0, "should not get here" ); 2240 } 2241 return 1; // Lock has been acquired (speculatively) 2242 } else { 2243 // We have aborted, update the statistics 2244 if ( status & SOFT_ABORT_MASK) 2245 { 2246 KMP_INC_STAT(lck,softFailedSpeculations); 2247 // and loop round to retry. 2248 } 2249 else 2250 { 2251 KMP_INC_STAT(lck,hardFailedSpeculations); 2252 // Give up if we had a hard failure. 2253 break; 2254 } 2255 } 2256 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2257 2258 // Either we had a hard failure or we didn't succeed softly after 2259 // the full set of attempts, so back off the badness. 2260 __kmp_step_badness( lck ); 2261 return 0; 2262 } 2263 2264 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2265 // if the speculative lock cannot be acquired. 2266 // We can succeed speculatively, non-speculatively, or fail. 2267 static int 2268 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2269 { 2270 // First try to acquire the lock speculatively 2271 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2272 return 1; 2273 2274 // Speculative acquisition failed, so try to acquire it non-speculatively. 2275 // Count the non-speculative acquire attempt 2276 lck->lk.adaptive.acquire_attempts++; 2277 2278 // Use base, non-speculative lock. 2279 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2280 { 2281 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2282 return 1; // Lock is acquired (non-speculatively) 2283 } 2284 else 2285 { 2286 return 0; // Failed to acquire the lock, it's already visibly locked. 2287 } 2288 } 2289 2290 static int 2291 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2292 { 2293 char const * const func = "omp_test_lock"; 2294 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2295 KMP_FATAL( LockIsUninitialized, func ); 2296 } 2297 2298 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2299 2300 if ( retval ) { 2301 lck->lk.qlk.owner_id = gtid + 1; 2302 } 2303 return retval; 2304 } 2305 2306 // Block until we can acquire a speculative, adaptive lock. 2307 // We check whether we should be trying to speculate. 2308 // If we should be, we check the real lock to see if it is free, 2309 // and, if not, pause without attempting to acquire it until it is. 2310 // Then we try the speculative acquire. 2311 // This means that although we suffer from lemmings a little ( 2312 // because all we can't acquire the lock speculatively until 2313 // the queue of threads waiting has cleared), we don't get into a 2314 // state where we can never acquire the lock speculatively (because we 2315 // force the queue to clear by preventing new arrivals from entering the 2316 // queue). 2317 // This does mean that when we're trying to break lemmings, the lock 2318 // is no longer fair. However OpenMP makes no guarantee that its 2319 // locks are fair, so this isn't a real problem. 2320 static void 2321 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2322 { 2323 if ( __kmp_should_speculate( lck, gtid ) ) 2324 { 2325 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2326 { 2327 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2328 return; 2329 // We tried speculation and failed, so give up. 2330 } 2331 else 2332 { 2333 // We can't try speculation until the lock is free, so we 2334 // pause here (without suspending on the queueing lock, 2335 // to allow it to drain, then try again. 2336 // All other threads will also see the same result for 2337 // shouldSpeculate, so will be doing the same if they 2338 // try to claim the lock from now on. 2339 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2340 { 2341 KMP_INC_STAT(lck,lemmingYields); 2342 __kmp_yield (TRUE); 2343 } 2344 2345 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2346 return; 2347 } 2348 } 2349 2350 // Speculative acquisition failed, so acquire it non-speculatively. 2351 // Count the non-speculative acquire attempt 2352 lck->lk.adaptive.acquire_attempts++; 2353 2354 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2355 // We have acquired the base lock, so count that. 2356 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2357 } 2358 2359 static void 2360 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2361 { 2362 char const * const func = "omp_set_lock"; 2363 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2364 KMP_FATAL( LockIsUninitialized, func ); 2365 } 2366 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2367 KMP_FATAL( LockIsAlreadyOwned, func ); 2368 } 2369 2370 __kmp_acquire_adaptive_lock( lck, gtid ); 2371 2372 lck->lk.qlk.owner_id = gtid + 1; 2373 } 2374 2375 static int 2376 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2377 { 2378 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2379 { // If the lock doesn't look claimed we must be speculating. 2380 // (Or the user's code is buggy and they're releasing without locking; 2381 // if we had XTEST we'd be able to check that case...) 2382 _xend(); // Exit speculation 2383 __kmp_update_badness_after_success( lck ); 2384 } 2385 else 2386 { // Since the lock *is* visibly locked we're not speculating, 2387 // so should use the underlying lock's release scheme. 2388 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2389 } 2390 return KMP_LOCK_RELEASED; 2391 } 2392 2393 static int 2394 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2395 { 2396 char const * const func = "omp_unset_lock"; 2397 KMP_MB(); /* in case another processor initialized lock */ 2398 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2399 KMP_FATAL( LockIsUninitialized, func ); 2400 } 2401 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2402 KMP_FATAL( LockUnsettingFree, func ); 2403 } 2404 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2405 KMP_FATAL( LockUnsettingSetByAnother, func ); 2406 } 2407 lck->lk.qlk.owner_id = 0; 2408 __kmp_release_adaptive_lock( lck, gtid ); 2409 return KMP_LOCK_RELEASED; 2410 } 2411 2412 static void 2413 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2414 { 2415 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2416 lck->lk.adaptive.badness = 0; 2417 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2418 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2419 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2420 #if KMP_DEBUG_ADAPTIVE_LOCKS 2421 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2422 #endif 2423 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2424 } 2425 2426 static void 2427 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2428 { 2429 __kmp_init_adaptive_lock( lck ); 2430 } 2431 2432 static void 2433 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2434 { 2435 #if KMP_DEBUG_ADAPTIVE_LOCKS 2436 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2437 #endif 2438 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2439 // Nothing needed for the speculative part. 2440 } 2441 2442 static void 2443 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2444 { 2445 char const * const func = "omp_destroy_lock"; 2446 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2447 KMP_FATAL( LockIsUninitialized, func ); 2448 } 2449 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2450 KMP_FATAL( LockStillOwned, func ); 2451 } 2452 __kmp_destroy_adaptive_lock( lck ); 2453 } 2454 2455 2456 #endif // KMP_USE_ADAPTIVE_LOCKS 2457 2458 2459 /* ------------------------------------------------------------------------ */ 2460 /* DRDPA ticket locks */ 2461 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2462 2463 static kmp_int32 2464 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2465 { 2466 return TCR_4( lck->lk.owner_id ) - 1; 2467 } 2468 2469 static inline bool 2470 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2471 { 2472 return lck->lk.depth_locked != -1; 2473 } 2474 2475 __forceinline static int 2476 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2477 { 2478 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2479 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2480 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2481 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2482 TCR_PTR(lck->lk.polls); // volatile load 2483 2484 #ifdef USE_LOCK_PROFILE 2485 if (TCR_8(polls[ticket & mask].poll) != ticket) 2486 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2487 /* else __kmp_printf( "." );*/ 2488 #endif /* USE_LOCK_PROFILE */ 2489 2490 // 2491 // Now spin-wait, but reload the polls pointer and mask, in case the 2492 // polling area has been reconfigured. Unless it is reconfigured, the 2493 // reloads stay in L1 cache and are cheap. 2494 // 2495 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2496 // 2497 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2498 // and poll to be re-read every spin iteration. 2499 // 2500 kmp_uint32 spins; 2501 2502 KMP_FSYNC_PREPARE(lck); 2503 KMP_INIT_YIELD(spins); 2504 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2505 // If we are oversubscribed, 2506 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2507 // CPU Pause is in the macros for yield. 2508 // 2509 KMP_YIELD(TCR_4(__kmp_nth) 2510 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2511 KMP_YIELD_SPIN(spins); 2512 2513 // Re-read the mask and the poll pointer from the lock structure. 2514 // 2515 // Make certain that "mask" is read before "polls" !!! 2516 // 2517 // If another thread picks reconfigures the polling area and updates 2518 // their values, and we get the new value of mask and the old polls 2519 // pointer, we could access memory beyond the end of the old polling 2520 // area. 2521 // 2522 mask = TCR_8(lck->lk.mask); // volatile load 2523 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2524 TCR_PTR(lck->lk.polls); // volatile load 2525 } 2526 2527 // 2528 // Critical section starts here 2529 // 2530 KMP_FSYNC_ACQUIRED(lck); 2531 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2532 ticket, lck)); 2533 lck->lk.now_serving = ticket; // non-volatile store 2534 2535 // 2536 // Deallocate a garbage polling area if we know that we are the last 2537 // thread that could possibly access it. 2538 // 2539 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2540 // ticket. 2541 // 2542 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2543 __kmp_free((void *)lck->lk.old_polls); 2544 lck->lk.old_polls = NULL; 2545 lck->lk.cleanup_ticket = 0; 2546 } 2547 2548 // 2549 // Check to see if we should reconfigure the polling area. 2550 // If there is still a garbage polling area to be deallocated from a 2551 // previous reconfiguration, let a later thread reconfigure it. 2552 // 2553 if (lck->lk.old_polls == NULL) { 2554 bool reconfigure = false; 2555 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2556 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2557 2558 if (TCR_4(__kmp_nth) 2559 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2560 // 2561 // We are in oversubscription mode. Contract the polling area 2562 // down to a single location, if that hasn't been done already. 2563 // 2564 if (num_polls > 1) { 2565 reconfigure = true; 2566 num_polls = TCR_4(lck->lk.num_polls); 2567 mask = 0; 2568 num_polls = 1; 2569 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2570 __kmp_allocate(num_polls * sizeof(*polls)); 2571 polls[0].poll = ticket; 2572 } 2573 } 2574 else { 2575 // 2576 // We are in under/fully subscribed mode. Check the number of 2577 // threads waiting on the lock. The size of the polling area 2578 // should be at least the number of threads waiting. 2579 // 2580 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2581 if (num_waiting > num_polls) { 2582 kmp_uint32 old_num_polls = num_polls; 2583 reconfigure = true; 2584 do { 2585 mask = (mask << 1) | 1; 2586 num_polls *= 2; 2587 } while (num_polls <= num_waiting); 2588 2589 // 2590 // Allocate the new polling area, and copy the relevant portion 2591 // of the old polling area to the new area. __kmp_allocate() 2592 // zeroes the memory it allocates, and most of the old area is 2593 // just zero padding, so we only copy the release counters. 2594 // 2595 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2596 __kmp_allocate(num_polls * sizeof(*polls)); 2597 kmp_uint32 i; 2598 for (i = 0; i < old_num_polls; i++) { 2599 polls[i].poll = old_polls[i].poll; 2600 } 2601 } 2602 } 2603 2604 if (reconfigure) { 2605 // 2606 // Now write the updated fields back to the lock structure. 2607 // 2608 // Make certain that "polls" is written before "mask" !!! 2609 // 2610 // If another thread picks up the new value of mask and the old 2611 // polls pointer , it could access memory beyond the end of the 2612 // old polling area. 2613 // 2614 // On x86, we need memory fences. 2615 // 2616 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2617 ticket, lck, num_polls)); 2618 2619 lck->lk.old_polls = old_polls; // non-volatile store 2620 lck->lk.polls = polls; // volatile store 2621 2622 KMP_MB(); 2623 2624 lck->lk.num_polls = num_polls; // non-volatile store 2625 lck->lk.mask = mask; // volatile store 2626 2627 KMP_MB(); 2628 2629 // 2630 // Only after the new polling area and mask have been flushed 2631 // to main memory can we update the cleanup ticket field. 2632 // 2633 // volatile load / non-volatile store 2634 // 2635 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2636 } 2637 } 2638 return KMP_LOCK_ACQUIRED_FIRST; 2639 } 2640 2641 int 2642 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2643 { 2644 return __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2645 } 2646 2647 static int 2648 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2649 { 2650 char const * const func = "omp_set_lock"; 2651 if ( lck->lk.initialized != lck ) { 2652 KMP_FATAL( LockIsUninitialized, func ); 2653 } 2654 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2655 KMP_FATAL( LockNestableUsedAsSimple, func ); 2656 } 2657 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2658 KMP_FATAL( LockIsAlreadyOwned, func ); 2659 } 2660 2661 __kmp_acquire_drdpa_lock( lck, gtid ); 2662 2663 lck->lk.owner_id = gtid + 1; 2664 return KMP_LOCK_ACQUIRED_FIRST; 2665 } 2666 2667 int 2668 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2669 { 2670 // 2671 // First get a ticket, then read the polls pointer and the mask. 2672 // The polls pointer must be read before the mask!!! (See above) 2673 // 2674 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2675 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2676 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2677 TCR_PTR(lck->lk.polls); // volatile load 2678 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2679 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2680 kmp_uint64 next_ticket = ticket + 1; 2681 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2682 ticket, next_ticket)) { 2683 KMP_FSYNC_ACQUIRED(lck); 2684 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2685 ticket, lck)); 2686 lck->lk.now_serving = ticket; // non-volatile store 2687 2688 // 2689 // Since no threads are waiting, there is no possibility that 2690 // we would want to reconfigure the polling area. We might 2691 // have the cleanup ticket value (which says that it is now 2692 // safe to deallocate old_polls), but we'll let a later thread 2693 // which calls __kmp_acquire_lock do that - this routine 2694 // isn't supposed to block, and we would risk blocks if we 2695 // called __kmp_free() to do the deallocation. 2696 // 2697 return TRUE; 2698 } 2699 } 2700 return FALSE; 2701 } 2702 2703 static int 2704 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2705 { 2706 char const * const func = "omp_test_lock"; 2707 if ( lck->lk.initialized != lck ) { 2708 KMP_FATAL( LockIsUninitialized, func ); 2709 } 2710 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2711 KMP_FATAL( LockNestableUsedAsSimple, func ); 2712 } 2713 2714 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2715 2716 if ( retval ) { 2717 lck->lk.owner_id = gtid + 1; 2718 } 2719 return retval; 2720 } 2721 2722 int 2723 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2724 { 2725 // 2726 // Read the ticket value from the lock data struct, then the polls 2727 // pointer and the mask. The polls pointer must be read before the 2728 // mask!!! (See above) 2729 // 2730 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2731 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2732 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2733 TCR_PTR(lck->lk.polls); // volatile load 2734 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2735 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2736 ticket - 1, lck)); 2737 KMP_FSYNC_RELEASING(lck); 2738 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2739 return KMP_LOCK_RELEASED; 2740 } 2741 2742 static int 2743 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2744 { 2745 char const * const func = "omp_unset_lock"; 2746 KMP_MB(); /* in case another processor initialized lock */ 2747 if ( lck->lk.initialized != lck ) { 2748 KMP_FATAL( LockIsUninitialized, func ); 2749 } 2750 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2751 KMP_FATAL( LockNestableUsedAsSimple, func ); 2752 } 2753 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2754 KMP_FATAL( LockUnsettingFree, func ); 2755 } 2756 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2757 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2758 KMP_FATAL( LockUnsettingSetByAnother, func ); 2759 } 2760 lck->lk.owner_id = 0; 2761 return __kmp_release_drdpa_lock( lck, gtid ); 2762 } 2763 2764 void 2765 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2766 { 2767 lck->lk.location = NULL; 2768 lck->lk.mask = 0; 2769 lck->lk.num_polls = 1; 2770 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2771 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2772 lck->lk.cleanup_ticket = 0; 2773 lck->lk.old_polls = NULL; 2774 lck->lk.next_ticket = 0; 2775 lck->lk.now_serving = 0; 2776 lck->lk.owner_id = 0; // no thread owns the lock. 2777 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2778 lck->lk.initialized = lck; 2779 2780 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2781 } 2782 2783 static void 2784 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2785 { 2786 __kmp_init_drdpa_lock( lck ); 2787 } 2788 2789 void 2790 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2791 { 2792 lck->lk.initialized = NULL; 2793 lck->lk.location = NULL; 2794 if (lck->lk.polls != NULL) { 2795 __kmp_free((void *)lck->lk.polls); 2796 lck->lk.polls = NULL; 2797 } 2798 if (lck->lk.old_polls != NULL) { 2799 __kmp_free((void *)lck->lk.old_polls); 2800 lck->lk.old_polls = NULL; 2801 } 2802 lck->lk.mask = 0; 2803 lck->lk.num_polls = 0; 2804 lck->lk.cleanup_ticket = 0; 2805 lck->lk.next_ticket = 0; 2806 lck->lk.now_serving = 0; 2807 lck->lk.owner_id = 0; 2808 lck->lk.depth_locked = -1; 2809 } 2810 2811 static void 2812 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2813 { 2814 char const * const func = "omp_destroy_lock"; 2815 if ( lck->lk.initialized != lck ) { 2816 KMP_FATAL( LockIsUninitialized, func ); 2817 } 2818 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2819 KMP_FATAL( LockNestableUsedAsSimple, func ); 2820 } 2821 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2822 KMP_FATAL( LockStillOwned, func ); 2823 } 2824 __kmp_destroy_drdpa_lock( lck ); 2825 } 2826 2827 2828 // 2829 // nested drdpa ticket locks 2830 // 2831 2832 int 2833 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2834 { 2835 KMP_DEBUG_ASSERT( gtid >= 0 ); 2836 2837 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2838 lck->lk.depth_locked += 1; 2839 return KMP_LOCK_ACQUIRED_NEXT; 2840 } 2841 else { 2842 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2843 KMP_MB(); 2844 lck->lk.depth_locked = 1; 2845 KMP_MB(); 2846 lck->lk.owner_id = gtid + 1; 2847 return KMP_LOCK_ACQUIRED_FIRST; 2848 } 2849 } 2850 2851 static void 2852 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2853 { 2854 char const * const func = "omp_set_nest_lock"; 2855 if ( lck->lk.initialized != lck ) { 2856 KMP_FATAL( LockIsUninitialized, func ); 2857 } 2858 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2859 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2860 } 2861 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2862 } 2863 2864 int 2865 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2866 { 2867 int retval; 2868 2869 KMP_DEBUG_ASSERT( gtid >= 0 ); 2870 2871 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2872 retval = ++lck->lk.depth_locked; 2873 } 2874 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2875 retval = 0; 2876 } 2877 else { 2878 KMP_MB(); 2879 retval = lck->lk.depth_locked = 1; 2880 KMP_MB(); 2881 lck->lk.owner_id = gtid + 1; 2882 } 2883 return retval; 2884 } 2885 2886 static int 2887 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2888 { 2889 char const * const func = "omp_test_nest_lock"; 2890 if ( lck->lk.initialized != lck ) { 2891 KMP_FATAL( LockIsUninitialized, func ); 2892 } 2893 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2894 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2895 } 2896 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2897 } 2898 2899 int 2900 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2901 { 2902 KMP_DEBUG_ASSERT( gtid >= 0 ); 2903 2904 KMP_MB(); 2905 if ( --(lck->lk.depth_locked) == 0 ) { 2906 KMP_MB(); 2907 lck->lk.owner_id = 0; 2908 __kmp_release_drdpa_lock( lck, gtid ); 2909 return KMP_LOCK_RELEASED; 2910 } 2911 return KMP_LOCK_STILL_HELD; 2912 } 2913 2914 static int 2915 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2916 { 2917 char const * const func = "omp_unset_nest_lock"; 2918 KMP_MB(); /* in case another processor initialized lock */ 2919 if ( lck->lk.initialized != lck ) { 2920 KMP_FATAL( LockIsUninitialized, func ); 2921 } 2922 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2923 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2924 } 2925 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2926 KMP_FATAL( LockUnsettingFree, func ); 2927 } 2928 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2929 KMP_FATAL( LockUnsettingSetByAnother, func ); 2930 } 2931 return __kmp_release_nested_drdpa_lock( lck, gtid ); 2932 } 2933 2934 void 2935 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2936 { 2937 __kmp_init_drdpa_lock( lck ); 2938 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2939 } 2940 2941 static void 2942 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2943 { 2944 __kmp_init_nested_drdpa_lock( lck ); 2945 } 2946 2947 void 2948 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2949 { 2950 __kmp_destroy_drdpa_lock( lck ); 2951 lck->lk.depth_locked = 0; 2952 } 2953 2954 static void 2955 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2956 { 2957 char const * const func = "omp_destroy_nest_lock"; 2958 if ( lck->lk.initialized != lck ) { 2959 KMP_FATAL( LockIsUninitialized, func ); 2960 } 2961 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2962 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2963 } 2964 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2965 KMP_FATAL( LockStillOwned, func ); 2966 } 2967 __kmp_destroy_nested_drdpa_lock( lck ); 2968 } 2969 2970 2971 // 2972 // access functions to fields which don't exist for all lock kinds. 2973 // 2974 2975 static int 2976 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2977 { 2978 return lck == lck->lk.initialized; 2979 } 2980 2981 static const ident_t * 2982 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2983 { 2984 return lck->lk.location; 2985 } 2986 2987 static void 2988 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 2989 { 2990 lck->lk.location = loc; 2991 } 2992 2993 static kmp_lock_flags_t 2994 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 2995 { 2996 return lck->lk.flags; 2997 } 2998 2999 static void 3000 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 3001 { 3002 lck->lk.flags = flags; 3003 } 3004 3005 #if KMP_USE_DYNAMIC_LOCK 3006 3007 // Definitions of lock hints. 3008 # ifndef __OMP_H 3009 typedef enum kmp_lock_hint_t { 3010 kmp_lock_hint_none = 0, 3011 kmp_lock_hint_contended, 3012 kmp_lock_hint_uncontended, 3013 kmp_lock_hint_nonspeculative, 3014 kmp_lock_hint_speculative, 3015 kmp_lock_hint_adaptive, 3016 } kmp_lock_hint_t; 3017 # endif 3018 3019 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 3020 #define expand_init_lock(l, a) \ 3021 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \ 3022 *lck = DYNA_LOCK_FREE(l); \ 3023 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \ 3024 } 3025 FOREACH_D_LOCK(expand_init_lock, 0) 3026 #undef expand_init_lock 3027 3028 #if DYNA_HAS_HLE 3029 3030 // HLE lock functions - imported from the testbed runtime. 3031 #if KMP_MIC 3032 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument 3033 #else 3034 # define machine_pause() _mm_pause() 3035 #endif 3036 #define HLE_ACQUIRE ".byte 0xf2;" 3037 #define HLE_RELEASE ".byte 0xf3;" 3038 3039 static inline kmp_uint32 3040 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 3041 { 3042 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 3043 : "+r"(v), "+m"(*p) 3044 : 3045 : "memory"); 3046 return v; 3047 } 3048 3049 static void 3050 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 3051 { 3052 *lck = 0; 3053 } 3054 3055 static void 3056 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3057 { 3058 // Use gtid for DYNA_LOCK_BUSY if necessary 3059 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) { 3060 int delay = 1; 3061 do { 3062 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) { 3063 for (int i = delay; i != 0; --i) 3064 machine_pause(); 3065 delay = ((delay << 1) | 1) & 7; 3066 } 3067 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)); 3068 } 3069 } 3070 3071 static void 3072 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3073 { 3074 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3075 } 3076 3077 static void 3078 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3079 { 3080 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3081 : "=m"(*lck) 3082 : "r"(DYNA_LOCK_FREE(hle)) 3083 : "memory"); 3084 } 3085 3086 static void 3087 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3088 { 3089 __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3090 } 3091 3092 static int 3093 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3094 { 3095 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle); 3096 } 3097 3098 static int 3099 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3100 { 3101 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3102 } 3103 3104 #endif // DYNA_HAS_HLE 3105 3106 // Entry functions for indirect locks (first element of direct_*_ops[]). 3107 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3108 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3109 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3110 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3111 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3112 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3113 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3114 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3115 3116 // 3117 // Jump tables for the indirect lock functions. 3118 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3119 // 3120 #define expand_func0(l, op) 0,op##_##l##_##lock, 3121 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3122 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) }; 3123 3124 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock, 3125 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *) 3126 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) }; 3127 3128 // Differentiates *lock and *lock_with_checks. 3129 #define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3130 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3131 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3132 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) }, 3133 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } }; 3134 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3135 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) }, 3136 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } }; 3137 3138 #define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3139 #define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3140 static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3141 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) }, 3142 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } }; 3143 3144 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3145 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3146 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3147 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3148 3149 // 3150 // Jump tables for the indirect lock functions. 3151 // 3152 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3153 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p) 3154 = { FOREACH_I_LOCK(expand_func4, init) }; 3155 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p) 3156 = { FOREACH_I_LOCK(expand_func4, destroy) }; 3157 3158 // Differentiates *lock and *lock_with_checks. 3159 #define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3160 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3161 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3162 = { { FOREACH_I_LOCK(expand_func5, acquire) }, 3163 { FOREACH_I_LOCK(expand_func5c, acquire) } }; 3164 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3165 = { { FOREACH_I_LOCK(expand_func5, release) }, 3166 { FOREACH_I_LOCK(expand_func5c, release) } }; 3167 3168 #define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3169 #define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3170 static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3171 = { { FOREACH_I_LOCK(expand_func6, test) }, 3172 { FOREACH_I_LOCK(expand_func6c, test) } }; 3173 3174 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3175 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0; 3176 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0; 3177 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0; 3178 3179 // Lock index table. 3180 kmp_indirect_lock_t **__kmp_indirect_lock_table; 3181 kmp_lock_index_t __kmp_indirect_lock_table_size; 3182 kmp_lock_index_t __kmp_indirect_lock_table_next; 3183 3184 // Size of indirect locks. 3185 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = { 3186 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3187 #if KMP_USE_ADAPTIVE_LOCKS 3188 sizeof(kmp_adaptive_lock_t), 3189 #endif 3190 sizeof(kmp_drdpa_lock_t), 3191 sizeof(kmp_tas_lock_t), 3192 #if DYNA_HAS_FUTEX 3193 sizeof(kmp_futex_lock_t), 3194 #endif 3195 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3196 sizeof(kmp_drdpa_lock_t) 3197 }; 3198 3199 // Jump tables for lock accessor/modifier. 3200 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3201 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3202 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3203 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3204 3205 // Use different lock pools for different lock types. 3206 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 }; 3207 3208 // Inserts the given lock ptr to the lock table. 3209 kmp_lock_index_t 3210 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck) 3211 { 3212 kmp_lock_index_t next = __kmp_indirect_lock_table_next; 3213 // Check capacity and double the size if required 3214 if (next >= __kmp_indirect_lock_table_size) { 3215 kmp_lock_index_t i; 3216 kmp_lock_index_t size = __kmp_indirect_lock_table_size; 3217 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table; 3218 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *)); 3219 KMP_MEMCPY(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *)); 3220 __kmp_free(old_table); 3221 __kmp_indirect_lock_table_size = 2*next; 3222 } 3223 // Insert lck to the table and return the index. 3224 __kmp_indirect_lock_table[next] = lck; 3225 __kmp_indirect_lock_table_next++; 3226 return next; 3227 } 3228 3229 // User lock allocator for dynamically dispatched locks. 3230 kmp_indirect_lock_t * 3231 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3232 { 3233 kmp_indirect_lock_t *lck; 3234 kmp_lock_index_t idx; 3235 3236 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3237 3238 if (__kmp_indirect_lock_pool[tag] != NULL) { 3239 lck = __kmp_indirect_lock_pool[tag]; 3240 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3241 idx = lck->lock->pool.index; 3242 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3243 } else { 3244 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t)); 3245 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3246 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3247 idx = __kmp_insert_indirect_lock(lck); 3248 } 3249 3250 __kmp_release_lock(&__kmp_global_lock, gtid); 3251 3252 lck->type = tag; 3253 3254 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3255 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3256 } else { 3257 *((kmp_indirect_lock_t **)user_lock) = lck; 3258 } 3259 3260 return lck; 3261 } 3262 3263 // User lock lookup for dynamically dispatched locks. 3264 static __forceinline 3265 kmp_indirect_lock_t * 3266 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3267 { 3268 if (__kmp_env_consistency_check) { 3269 kmp_indirect_lock_t *lck = NULL; 3270 if (user_lock == NULL) { 3271 KMP_FATAL(LockIsUninitialized, func); 3272 } 3273 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3274 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock); 3275 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) { 3276 KMP_FATAL(LockIsUninitialized, func); 3277 } 3278 lck = __kmp_indirect_lock_table[idx]; 3279 } else { 3280 lck = *((kmp_indirect_lock_t **)user_lock); 3281 } 3282 if (lck == NULL) { 3283 KMP_FATAL(LockIsUninitialized, func); 3284 } 3285 return lck; 3286 } else { 3287 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3288 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)]; 3289 } else { 3290 return *((kmp_indirect_lock_t **)user_lock); 3291 } 3292 } 3293 } 3294 3295 static void 3296 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3297 { 3298 #if KMP_USE_ADAPTIVE_LOCKS 3299 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3300 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3301 seq = lockseq_queuing; 3302 } 3303 #endif 3304 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq); 3305 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3306 DYNA_I_LOCK_FUNC(l, init)(l->lock); 3307 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type)); 3308 } 3309 3310 static void 3311 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3312 { 3313 kmp_uint32 gtid = __kmp_entry_gtid(); 3314 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3315 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3316 kmp_indirect_locktag_t tag = l->type; 3317 3318 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3319 3320 // Use the base lock's space to keep the pool chain. 3321 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3322 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3323 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock); 3324 } 3325 __kmp_indirect_lock_pool[tag] = l; 3326 3327 __kmp_release_lock(&__kmp_global_lock, gtid); 3328 } 3329 3330 static void 3331 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3332 { 3333 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3334 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3335 } 3336 3337 static void 3338 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3339 { 3340 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3341 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3342 } 3343 3344 static int 3345 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3346 { 3347 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3348 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3349 } 3350 3351 static void 3352 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3353 { 3354 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3355 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3356 } 3357 3358 static void 3359 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3360 { 3361 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3362 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3363 } 3364 3365 static int 3366 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3367 { 3368 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3369 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3370 } 3371 3372 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3373 3374 // Initialize a hinted lock. 3375 void 3376 __kmp_init_lock_hinted(void **lock, int hint) 3377 { 3378 kmp_dyna_lockseq_t seq; 3379 switch (hint) { 3380 case kmp_lock_hint_uncontended: 3381 seq = lockseq_tas; 3382 break; 3383 case kmp_lock_hint_speculative: 3384 #if DYNA_HAS_HLE 3385 seq = lockseq_hle; 3386 #else 3387 seq = lockseq_tas; 3388 #endif 3389 break; 3390 case kmp_lock_hint_adaptive: 3391 #if KMP_USE_ADAPTIVE_LOCKS 3392 seq = lockseq_adaptive; 3393 #else 3394 seq = lockseq_queuing; 3395 #endif 3396 break; 3397 // Defaults to queuing locks. 3398 case kmp_lock_hint_contended: 3399 case kmp_lock_hint_nonspeculative: 3400 default: 3401 seq = lockseq_queuing; 3402 break; 3403 } 3404 if (DYNA_IS_D_LOCK(seq)) { 3405 DYNA_INIT_D_LOCK(lock, seq); 3406 #if USE_ITT_BUILD 3407 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 3408 #endif 3409 } else { 3410 DYNA_INIT_I_LOCK(lock, seq); 3411 #if USE_ITT_BUILD 3412 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3413 __kmp_itt_lock_creating(ilk->lock, NULL); 3414 #endif 3415 } 3416 } 3417 3418 // This is used only in kmp_error.c when consistency checking is on. 3419 kmp_int32 3420 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3421 { 3422 switch (seq) { 3423 case lockseq_tas: 3424 case lockseq_nested_tas: 3425 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3426 #if DYNA_HAS_FUTEX 3427 case lockseq_futex: 3428 case lockseq_nested_futex: 3429 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3430 #endif 3431 case lockseq_ticket: 3432 case lockseq_nested_ticket: 3433 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3434 case lockseq_queuing: 3435 case lockseq_nested_queuing: 3436 #if KMP_USE_ADAPTIVE_LOCKS 3437 case lockseq_adaptive: 3438 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3439 #endif 3440 case lockseq_drdpa: 3441 case lockseq_nested_drdpa: 3442 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3443 default: 3444 return 0; 3445 } 3446 } 3447 3448 // The value initialized from KMP_LOCK_KIND needs to be translated to its 3449 // nested version. 3450 void 3451 __kmp_init_nest_lock_hinted(void **lock, int hint) 3452 { 3453 kmp_dyna_lockseq_t seq; 3454 switch (hint) { 3455 case kmp_lock_hint_uncontended: 3456 seq = lockseq_nested_tas; 3457 break; 3458 // Defaults to queuing locks. 3459 case kmp_lock_hint_contended: 3460 case kmp_lock_hint_nonspeculative: 3461 default: 3462 seq = lockseq_nested_queuing; 3463 break; 3464 } 3465 DYNA_INIT_I_LOCK(lock, seq); 3466 #if USE_ITT_BUILD 3467 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3468 __kmp_itt_lock_creating(ilk->lock, NULL); 3469 #endif 3470 } 3471 3472 // Initializes the lock table for indirect locks. 3473 static void 3474 __kmp_init_indirect_lock_table() 3475 { 3476 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024); 3477 __kmp_indirect_lock_table_size = 1024; 3478 __kmp_indirect_lock_table_next = 0; 3479 } 3480 3481 #if KMP_USE_ADAPTIVE_LOCKS 3482 # define init_lock_func(table, expand) { \ 3483 table[locktag_ticket] = expand(ticket); \ 3484 table[locktag_queuing] = expand(queuing); \ 3485 table[locktag_adaptive] = expand(queuing); \ 3486 table[locktag_drdpa] = expand(drdpa); \ 3487 table[locktag_nested_ticket] = expand(ticket); \ 3488 table[locktag_nested_queuing] = expand(queuing); \ 3489 table[locktag_nested_drdpa] = expand(drdpa); \ 3490 } 3491 #else 3492 # define init_lock_func(table, expand) { \ 3493 table[locktag_ticket] = expand(ticket); \ 3494 table[locktag_queuing] = expand(queuing); \ 3495 table[locktag_drdpa] = expand(drdpa); \ 3496 table[locktag_nested_ticket] = expand(ticket); \ 3497 table[locktag_nested_queuing] = expand(queuing); \ 3498 table[locktag_nested_drdpa] = expand(drdpa); \ 3499 } 3500 #endif // KMP_USE_ADAPTIVE_LOCKS 3501 3502 // Initializes data for dynamic user locks. 3503 void 3504 __kmp_init_dynamic_user_locks() 3505 { 3506 // Initialize jump table location 3507 int offset = (__kmp_env_consistency_check)? 1: 0; 3508 __kmp_direct_set_ops = direct_set_tab[offset]; 3509 __kmp_direct_unset_ops = direct_unset_tab[offset]; 3510 __kmp_direct_test_ops = direct_test_tab[offset]; 3511 __kmp_indirect_set_ops = indirect_set_tab[offset]; 3512 __kmp_indirect_unset_ops = indirect_unset_tab[offset]; 3513 __kmp_indirect_test_ops = indirect_test_tab[offset]; 3514 __kmp_init_indirect_lock_table(); 3515 3516 // Initialize lock accessor/modifier 3517 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe. 3518 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3519 init_lock_func(__kmp_indirect_set_location, expand_func); 3520 #undef expand_func 3521 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3522 init_lock_func(__kmp_indirect_set_flags, expand_func); 3523 #undef expand_func 3524 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3525 init_lock_func(__kmp_indirect_get_location, expand_func); 3526 #undef expand_func 3527 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3528 init_lock_func(__kmp_indirect_get_flags, expand_func); 3529 #undef expand_func 3530 3531 __kmp_init_user_locks = TRUE; 3532 } 3533 3534 // Clean up the lock table. 3535 void 3536 __kmp_cleanup_indirect_user_locks() 3537 { 3538 kmp_lock_index_t i; 3539 int k; 3540 3541 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3542 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) { 3543 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3544 while (l != NULL) { 3545 kmp_indirect_lock_t *ll = l; 3546 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3547 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3548 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL; 3549 } 3550 __kmp_free(ll->lock); 3551 __kmp_free(ll); 3552 } 3553 } 3554 // Clean up the remaining undestroyed locks. 3555 for (i = 0; i < __kmp_indirect_lock_table_next; i++) { 3556 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i]; 3557 if (l != NULL) { 3558 // Locks not destroyed explicitly need to be destroyed here. 3559 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3560 __kmp_free(l->lock); 3561 __kmp_free(l); 3562 } 3563 } 3564 // Free the table 3565 __kmp_free(__kmp_indirect_lock_table); 3566 3567 __kmp_init_user_locks = FALSE; 3568 } 3569 3570 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3571 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3572 3573 #else // KMP_USE_DYNAMIC_LOCK 3574 3575 /* ------------------------------------------------------------------------ */ 3576 /* user locks 3577 * 3578 * They are implemented as a table of function pointers which are set to the 3579 * lock functions of the appropriate kind, once that has been determined. 3580 */ 3581 3582 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3583 3584 size_t __kmp_base_user_lock_size = 0; 3585 size_t __kmp_user_lock_size = 0; 3586 3587 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3588 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3589 3590 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3591 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3592 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3593 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3594 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3595 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3596 3597 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3598 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3599 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3600 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3601 3602 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3603 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3604 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3605 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3606 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3607 3608 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3609 { 3610 switch ( user_lock_kind ) { 3611 case lk_default: 3612 default: 3613 KMP_ASSERT( 0 ); 3614 3615 case lk_tas: { 3616 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3617 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3618 3619 __kmp_get_user_lock_owner_ = 3620 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3621 ( &__kmp_get_tas_lock_owner ); 3622 3623 if ( __kmp_env_consistency_check ) { 3624 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3625 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3626 } 3627 else { 3628 KMP_BIND_USER_LOCK(tas); 3629 KMP_BIND_NESTED_USER_LOCK(tas); 3630 } 3631 3632 __kmp_destroy_user_lock_ = 3633 ( void ( * )( kmp_user_lock_p ) ) 3634 ( &__kmp_destroy_tas_lock ); 3635 3636 __kmp_is_user_lock_initialized_ = 3637 ( int ( * )( kmp_user_lock_p ) ) NULL; 3638 3639 __kmp_get_user_lock_location_ = 3640 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3641 3642 __kmp_set_user_lock_location_ = 3643 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3644 3645 __kmp_get_user_lock_flags_ = 3646 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3647 3648 __kmp_set_user_lock_flags_ = 3649 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3650 } 3651 break; 3652 3653 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3654 3655 case lk_futex: { 3656 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3657 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3658 3659 __kmp_get_user_lock_owner_ = 3660 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3661 ( &__kmp_get_futex_lock_owner ); 3662 3663 if ( __kmp_env_consistency_check ) { 3664 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3665 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3666 } 3667 else { 3668 KMP_BIND_USER_LOCK(futex); 3669 KMP_BIND_NESTED_USER_LOCK(futex); 3670 } 3671 3672 __kmp_destroy_user_lock_ = 3673 ( void ( * )( kmp_user_lock_p ) ) 3674 ( &__kmp_destroy_futex_lock ); 3675 3676 __kmp_is_user_lock_initialized_ = 3677 ( int ( * )( kmp_user_lock_p ) ) NULL; 3678 3679 __kmp_get_user_lock_location_ = 3680 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3681 3682 __kmp_set_user_lock_location_ = 3683 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3684 3685 __kmp_get_user_lock_flags_ = 3686 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3687 3688 __kmp_set_user_lock_flags_ = 3689 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3690 } 3691 break; 3692 3693 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3694 3695 case lk_ticket: { 3696 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3697 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3698 3699 __kmp_get_user_lock_owner_ = 3700 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3701 ( &__kmp_get_ticket_lock_owner ); 3702 3703 if ( __kmp_env_consistency_check ) { 3704 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3705 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3706 } 3707 else { 3708 KMP_BIND_USER_LOCK(ticket); 3709 KMP_BIND_NESTED_USER_LOCK(ticket); 3710 } 3711 3712 __kmp_destroy_user_lock_ = 3713 ( void ( * )( kmp_user_lock_p ) ) 3714 ( &__kmp_destroy_ticket_lock ); 3715 3716 __kmp_is_user_lock_initialized_ = 3717 ( int ( * )( kmp_user_lock_p ) ) 3718 ( &__kmp_is_ticket_lock_initialized ); 3719 3720 __kmp_get_user_lock_location_ = 3721 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3722 ( &__kmp_get_ticket_lock_location ); 3723 3724 __kmp_set_user_lock_location_ = 3725 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3726 ( &__kmp_set_ticket_lock_location ); 3727 3728 __kmp_get_user_lock_flags_ = 3729 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3730 ( &__kmp_get_ticket_lock_flags ); 3731 3732 __kmp_set_user_lock_flags_ = 3733 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3734 ( &__kmp_set_ticket_lock_flags ); 3735 } 3736 break; 3737 3738 case lk_queuing: { 3739 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3740 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3741 3742 __kmp_get_user_lock_owner_ = 3743 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3744 ( &__kmp_get_queuing_lock_owner ); 3745 3746 if ( __kmp_env_consistency_check ) { 3747 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3748 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3749 } 3750 else { 3751 KMP_BIND_USER_LOCK(queuing); 3752 KMP_BIND_NESTED_USER_LOCK(queuing); 3753 } 3754 3755 __kmp_destroy_user_lock_ = 3756 ( void ( * )( kmp_user_lock_p ) ) 3757 ( &__kmp_destroy_queuing_lock ); 3758 3759 __kmp_is_user_lock_initialized_ = 3760 ( int ( * )( kmp_user_lock_p ) ) 3761 ( &__kmp_is_queuing_lock_initialized ); 3762 3763 __kmp_get_user_lock_location_ = 3764 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3765 ( &__kmp_get_queuing_lock_location ); 3766 3767 __kmp_set_user_lock_location_ = 3768 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3769 ( &__kmp_set_queuing_lock_location ); 3770 3771 __kmp_get_user_lock_flags_ = 3772 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3773 ( &__kmp_get_queuing_lock_flags ); 3774 3775 __kmp_set_user_lock_flags_ = 3776 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3777 ( &__kmp_set_queuing_lock_flags ); 3778 } 3779 break; 3780 3781 #if KMP_USE_ADAPTIVE_LOCKS 3782 case lk_adaptive: { 3783 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3784 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3785 3786 __kmp_get_user_lock_owner_ = 3787 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3788 ( &__kmp_get_queuing_lock_owner ); 3789 3790 if ( __kmp_env_consistency_check ) { 3791 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3792 } 3793 else { 3794 KMP_BIND_USER_LOCK(adaptive); 3795 } 3796 3797 __kmp_destroy_user_lock_ = 3798 ( void ( * )( kmp_user_lock_p ) ) 3799 ( &__kmp_destroy_adaptive_lock ); 3800 3801 __kmp_is_user_lock_initialized_ = 3802 ( int ( * )( kmp_user_lock_p ) ) 3803 ( &__kmp_is_queuing_lock_initialized ); 3804 3805 __kmp_get_user_lock_location_ = 3806 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3807 ( &__kmp_get_queuing_lock_location ); 3808 3809 __kmp_set_user_lock_location_ = 3810 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3811 ( &__kmp_set_queuing_lock_location ); 3812 3813 __kmp_get_user_lock_flags_ = 3814 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3815 ( &__kmp_get_queuing_lock_flags ); 3816 3817 __kmp_set_user_lock_flags_ = 3818 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3819 ( &__kmp_set_queuing_lock_flags ); 3820 3821 } 3822 break; 3823 #endif // KMP_USE_ADAPTIVE_LOCKS 3824 3825 case lk_drdpa: { 3826 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3827 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3828 3829 __kmp_get_user_lock_owner_ = 3830 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3831 ( &__kmp_get_drdpa_lock_owner ); 3832 3833 if ( __kmp_env_consistency_check ) { 3834 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3835 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3836 } 3837 else { 3838 KMP_BIND_USER_LOCK(drdpa); 3839 KMP_BIND_NESTED_USER_LOCK(drdpa); 3840 } 3841 3842 __kmp_destroy_user_lock_ = 3843 ( void ( * )( kmp_user_lock_p ) ) 3844 ( &__kmp_destroy_drdpa_lock ); 3845 3846 __kmp_is_user_lock_initialized_ = 3847 ( int ( * )( kmp_user_lock_p ) ) 3848 ( &__kmp_is_drdpa_lock_initialized ); 3849 3850 __kmp_get_user_lock_location_ = 3851 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3852 ( &__kmp_get_drdpa_lock_location ); 3853 3854 __kmp_set_user_lock_location_ = 3855 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3856 ( &__kmp_set_drdpa_lock_location ); 3857 3858 __kmp_get_user_lock_flags_ = 3859 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3860 ( &__kmp_get_drdpa_lock_flags ); 3861 3862 __kmp_set_user_lock_flags_ = 3863 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3864 ( &__kmp_set_drdpa_lock_flags ); 3865 } 3866 break; 3867 } 3868 } 3869 3870 3871 // ---------------------------------------------------------------------------- 3872 // User lock table & lock allocation 3873 3874 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3875 kmp_user_lock_p __kmp_lock_pool = NULL; 3876 3877 // Lock block-allocation support. 3878 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3879 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3880 3881 static kmp_lock_index_t 3882 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3883 { 3884 // Assume that kmp_global_lock is held upon entry/exit. 3885 kmp_lock_index_t index; 3886 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3887 kmp_lock_index_t size; 3888 kmp_user_lock_p *table; 3889 // Reallocate lock table. 3890 if ( __kmp_user_lock_table.allocated == 0 ) { 3891 size = 1024; 3892 } 3893 else { 3894 size = __kmp_user_lock_table.allocated * 2; 3895 } 3896 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3897 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3898 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3899 // We cannot free the previous table now, since it may be in use by other 3900 // threads. So save the pointer to the previous table in in the first element of the 3901 // new table. All the tables will be organized into a list, and could be freed when 3902 // library shutting down. 3903 __kmp_user_lock_table.table = table; 3904 __kmp_user_lock_table.allocated = size; 3905 } 3906 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3907 index = __kmp_user_lock_table.used; 3908 __kmp_user_lock_table.table[ index ] = lck; 3909 ++ __kmp_user_lock_table.used; 3910 return index; 3911 } 3912 3913 static kmp_user_lock_p 3914 __kmp_lock_block_allocate() 3915 { 3916 // Assume that kmp_global_lock is held upon entry/exit. 3917 static int last_index = 0; 3918 if ( ( last_index >= __kmp_num_locks_in_block ) 3919 || ( __kmp_lock_blocks == NULL ) ) { 3920 // Restart the index. 3921 last_index = 0; 3922 // Need to allocate a new block. 3923 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3924 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3925 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3926 // Set up the new block. 3927 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3928 new_block->next_block = __kmp_lock_blocks; 3929 new_block->locks = (void *)buffer; 3930 // Publish the new block. 3931 KMP_MB(); 3932 __kmp_lock_blocks = new_block; 3933 } 3934 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3935 [ last_index * __kmp_user_lock_size ] ) ); 3936 last_index++; 3937 return ret; 3938 } 3939 3940 // 3941 // Get memory for a lock. It may be freshly allocated memory or reused memory 3942 // from lock pool. 3943 // 3944 kmp_user_lock_p 3945 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3946 kmp_lock_flags_t flags ) 3947 { 3948 kmp_user_lock_p lck; 3949 kmp_lock_index_t index; 3950 KMP_DEBUG_ASSERT( user_lock ); 3951 3952 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3953 3954 if ( __kmp_lock_pool == NULL ) { 3955 // Lock pool is empty. Allocate new memory. 3956 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3957 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3958 } 3959 else { 3960 lck = __kmp_lock_block_allocate(); 3961 } 3962 3963 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3964 // and debugger has info on all allocated locks. 3965 index = __kmp_lock_table_insert( lck ); 3966 } 3967 else { 3968 // Pick up lock from pool. 3969 lck = __kmp_lock_pool; 3970 index = __kmp_lock_pool->pool.index; 3971 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3972 } 3973 3974 // 3975 // We could potentially differentiate between nested and regular locks 3976 // here, and do the lock table lookup for regular locks only. 3977 // 3978 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3979 * ( (kmp_lock_index_t *) user_lock ) = index; 3980 } 3981 else { 3982 * ( (kmp_user_lock_p *) user_lock ) = lck; 3983 } 3984 3985 // mark the lock if it is critical section lock. 3986 __kmp_set_user_lock_flags( lck, flags ); 3987 3988 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 3989 3990 return lck; 3991 } 3992 3993 // Put lock's memory to pool for reusing. 3994 void 3995 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 3996 { 3997 KMP_DEBUG_ASSERT( user_lock != NULL ); 3998 KMP_DEBUG_ASSERT( lck != NULL ); 3999 4000 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 4001 4002 lck->pool.next = __kmp_lock_pool; 4003 __kmp_lock_pool = lck; 4004 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4005 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 4006 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 4007 lck->pool.index = index; 4008 } 4009 4010 __kmp_release_lock( & __kmp_global_lock, gtid ); 4011 } 4012 4013 kmp_user_lock_p 4014 __kmp_lookup_user_lock( void **user_lock, char const *func ) 4015 { 4016 kmp_user_lock_p lck = NULL; 4017 4018 if ( __kmp_env_consistency_check ) { 4019 if ( user_lock == NULL ) { 4020 KMP_FATAL( LockIsUninitialized, func ); 4021 } 4022 } 4023 4024 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4025 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 4026 if ( __kmp_env_consistency_check ) { 4027 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 4028 KMP_FATAL( LockIsUninitialized, func ); 4029 } 4030 } 4031 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 4032 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4033 lck = __kmp_user_lock_table.table[index]; 4034 } 4035 else { 4036 lck = *( (kmp_user_lock_p *)user_lock ); 4037 } 4038 4039 if ( __kmp_env_consistency_check ) { 4040 if ( lck == NULL ) { 4041 KMP_FATAL( LockIsUninitialized, func ); 4042 } 4043 } 4044 4045 return lck; 4046 } 4047 4048 void 4049 __kmp_cleanup_user_locks( void ) 4050 { 4051 // 4052 // Reset lock pool. Do not worry about lock in the pool -- we will free 4053 // them when iterating through lock table (it includes all the locks, 4054 // dead or alive). 4055 // 4056 __kmp_lock_pool = NULL; 4057 4058 #define IS_CRITICAL(lck) \ 4059 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 4060 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 4061 4062 // 4063 // Loop through lock table, free all locks. 4064 // 4065 // Do not free item [0], it is reserved for lock tables list. 4066 // 4067 // FIXME - we are iterating through a list of (pointers to) objects of 4068 // type union kmp_user_lock, but we have no way of knowing whether the 4069 // base type is currently "pool" or whatever the global user lock type 4070 // is. 4071 // 4072 // We are relying on the fact that for all of the user lock types 4073 // (except "tas"), the first field in the lock struct is the "initialized" 4074 // field, which is set to the address of the lock object itself when 4075 // the lock is initialized. When the union is of type "pool", the 4076 // first field is a pointer to the next object in the free list, which 4077 // will not be the same address as the object itself. 4078 // 4079 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4080 // will fail for "pool" objects on the free list. This must happen as 4081 // the "location" field of real user locks overlaps the "index" field 4082 // of "pool" objects. 4083 // 4084 // It would be better to run through the free list, and remove all "pool" 4085 // objects from the lock table before executing this loop. However, 4086 // "pool" objects do not always have their index field set (only on 4087 // lin_32e), and I don't want to search the lock table for the address 4088 // of every "pool" object on the free list. 4089 // 4090 while ( __kmp_user_lock_table.used > 1 ) { 4091 const ident *loc; 4092 4093 // 4094 // reduce __kmp_user_lock_table.used before freeing the lock, 4095 // so that state of locks is consistent 4096 // 4097 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4098 --__kmp_user_lock_table.used ]; 4099 4100 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4101 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4102 // 4103 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4104 // initialized AND it is NOT a critical section (user is not 4105 // responsible for destroying criticals) AND we know source 4106 // location to report. 4107 // 4108 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4109 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4110 ( loc->psource != NULL ) ) { 4111 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4112 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4113 __kmp_str_loc_free( &str_loc); 4114 } 4115 4116 #ifdef KMP_DEBUG 4117 if ( IS_CRITICAL( lck ) ) { 4118 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4119 } 4120 else { 4121 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4122 } 4123 #endif // KMP_DEBUG 4124 4125 // 4126 // Cleanup internal lock dynamic resources 4127 // (for drdpa locks particularly). 4128 // 4129 __kmp_destroy_user_lock( lck ); 4130 } 4131 4132 // 4133 // Free the lock if block allocation of locks is not used. 4134 // 4135 if ( __kmp_lock_blocks == NULL ) { 4136 __kmp_free( lck ); 4137 } 4138 } 4139 4140 #undef IS_CRITICAL 4141 4142 // 4143 // delete lock table(s). 4144 // 4145 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4146 __kmp_user_lock_table.table = NULL; 4147 __kmp_user_lock_table.allocated = 0; 4148 4149 while ( table_ptr != NULL ) { 4150 // 4151 // In the first element we saved the pointer to the previous 4152 // (smaller) lock table. 4153 // 4154 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4155 __kmp_free( table_ptr ); 4156 table_ptr = next; 4157 } 4158 4159 // 4160 // Free buffers allocated for blocks of locks. 4161 // 4162 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4163 __kmp_lock_blocks = NULL; 4164 4165 while ( block_ptr != NULL ) { 4166 kmp_block_of_locks_t *next = block_ptr->next_block; 4167 __kmp_free( block_ptr->locks ); 4168 // 4169 // *block_ptr itself was allocated at the end of the locks vector. 4170 // 4171 block_ptr = next; 4172 } 4173 4174 TCW_4(__kmp_init_user_locks, FALSE); 4175 } 4176 4177 #endif // KMP_USE_DYNAMIC_LOCK 4178