1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 18 #include "kmp.h" 19 #include "kmp_itt.h" 20 #include "kmp_i18n.h" 21 #include "kmp_lock.h" 22 #include "kmp_io.h" 23 24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 25 # include <unistd.h> 26 # include <sys/syscall.h> 27 // We should really include <futex.h>, but that causes compatibility problems on different 28 // Linux* OS distributions that either require that you include (or break when you try to include) 29 // <pci/types.h>. 30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 31 // we just define the constants here and don't include <futex.h> 32 # ifndef FUTEX_WAIT 33 # define FUTEX_WAIT 0 34 # endif 35 # ifndef FUTEX_WAKE 36 # define FUTEX_WAKE 1 37 # endif 38 #endif 39 40 /* Implement spin locks for internal library use. */ 41 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 42 43 void 44 __kmp_validate_locks( void ) 45 { 46 int i; 47 kmp_uint32 x, y; 48 49 /* Check to make sure unsigned arithmetic does wraps properly */ 50 x = ~((kmp_uint32) 0) - 2; 51 y = x - 2; 52 53 for (i = 0; i < 8; ++i, ++x, ++y) { 54 kmp_uint32 z = (x - y); 55 KMP_ASSERT( z == 2 ); 56 } 57 58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 59 } 60 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // 66 // For the non-nested locks, we can only assume that the first 4 bytes were 67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 68 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 70 // 71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 73 // 74 75 static kmp_int32 76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 77 { 78 return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 79 } 80 81 static inline bool 82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 83 { 84 return lck->lk.depth_locked != -1; 85 } 86 87 __forceinline static int 88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 89 { 90 KMP_MB(); 91 92 #ifdef USE_LOCK_PROFILE 93 kmp_uint32 curr = TCR_4( lck->lk.poll ); 94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 95 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 96 /* else __kmp_printf( "." );*/ 97 #endif /* USE_LOCK_PROFILE */ 98 99 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 101 KMP_FSYNC_ACQUIRED(lck); 102 return KMP_LOCK_ACQUIRED_FIRST; 103 } 104 105 kmp_uint32 spins; 106 KMP_FSYNC_PREPARE( lck ); 107 KMP_INIT_YIELD( spins ); 108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 109 __kmp_xproc ) ) { 110 KMP_YIELD( TRUE ); 111 } 112 else { 113 KMP_YIELD_SPIN( spins ); 114 } 115 116 while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || 117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { 118 // 119 // FIXME - use exponential backoff here 120 // 121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 122 __kmp_xproc ) ) { 123 KMP_YIELD( TRUE ); 124 } 125 else { 126 KMP_YIELD_SPIN( spins ); 127 } 128 } 129 KMP_FSYNC_ACQUIRED( lck ); 130 return KMP_LOCK_ACQUIRED_FIRST; 131 } 132 133 int 134 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 135 { 136 return __kmp_acquire_tas_lock_timed_template( lck, gtid ); 137 } 138 139 static int 140 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 141 { 142 char const * const func = "omp_set_lock"; 143 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 144 && __kmp_is_tas_lock_nestable( lck ) ) { 145 KMP_FATAL( LockNestableUsedAsSimple, func ); 146 } 147 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 148 KMP_FATAL( LockIsAlreadyOwned, func ); 149 } 150 return __kmp_acquire_tas_lock( lck, gtid ); 151 } 152 153 int 154 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 155 { 156 if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) 157 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { 158 KMP_FSYNC_ACQUIRED( lck ); 159 return TRUE; 160 } 161 return FALSE; 162 } 163 164 static int 165 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 166 { 167 char const * const func = "omp_test_lock"; 168 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 169 && __kmp_is_tas_lock_nestable( lck ) ) { 170 KMP_FATAL( LockNestableUsedAsSimple, func ); 171 } 172 return __kmp_test_tas_lock( lck, gtid ); 173 } 174 175 int 176 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 177 { 178 KMP_MB(); /* Flush all pending memory write invalidates. */ 179 180 KMP_FSYNC_RELEASING(lck); 181 KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); 182 KMP_MB(); /* Flush all pending memory write invalidates. */ 183 184 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 185 __kmp_xproc ) ); 186 return KMP_LOCK_RELEASED; 187 } 188 189 static int 190 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 191 { 192 char const * const func = "omp_unset_lock"; 193 KMP_MB(); /* in case another processor initialized lock */ 194 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 195 && __kmp_is_tas_lock_nestable( lck ) ) { 196 KMP_FATAL( LockNestableUsedAsSimple, func ); 197 } 198 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 199 KMP_FATAL( LockUnsettingFree, func ); 200 } 201 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 202 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 203 KMP_FATAL( LockUnsettingSetByAnother, func ); 204 } 205 return __kmp_release_tas_lock( lck, gtid ); 206 } 207 208 void 209 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 210 { 211 TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); 212 } 213 214 static void 215 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 216 { 217 __kmp_init_tas_lock( lck ); 218 } 219 220 void 221 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 222 { 223 lck->lk.poll = 0; 224 } 225 226 static void 227 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 228 { 229 char const * const func = "omp_destroy_lock"; 230 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 231 && __kmp_is_tas_lock_nestable( lck ) ) { 232 KMP_FATAL( LockNestableUsedAsSimple, func ); 233 } 234 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 235 KMP_FATAL( LockStillOwned, func ); 236 } 237 __kmp_destroy_tas_lock( lck ); 238 } 239 240 241 // 242 // nested test and set locks 243 // 244 245 int 246 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 247 { 248 KMP_DEBUG_ASSERT( gtid >= 0 ); 249 250 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 251 lck->lk.depth_locked += 1; 252 return KMP_LOCK_ACQUIRED_NEXT; 253 } 254 else { 255 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 256 lck->lk.depth_locked = 1; 257 return KMP_LOCK_ACQUIRED_FIRST; 258 } 259 } 260 261 static int 262 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 263 { 264 char const * const func = "omp_set_nest_lock"; 265 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 266 KMP_FATAL( LockSimpleUsedAsNestable, func ); 267 } 268 return __kmp_acquire_nested_tas_lock( lck, gtid ); 269 } 270 271 int 272 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 273 { 274 int retval; 275 276 KMP_DEBUG_ASSERT( gtid >= 0 ); 277 278 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 279 retval = ++lck->lk.depth_locked; 280 } 281 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 282 retval = 0; 283 } 284 else { 285 KMP_MB(); 286 retval = lck->lk.depth_locked = 1; 287 } 288 return retval; 289 } 290 291 static int 292 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 293 { 294 char const * const func = "omp_test_nest_lock"; 295 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 296 KMP_FATAL( LockSimpleUsedAsNestable, func ); 297 } 298 return __kmp_test_nested_tas_lock( lck, gtid ); 299 } 300 301 int 302 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 303 { 304 KMP_DEBUG_ASSERT( gtid >= 0 ); 305 306 KMP_MB(); 307 if ( --(lck->lk.depth_locked) == 0 ) { 308 __kmp_release_tas_lock( lck, gtid ); 309 return KMP_LOCK_RELEASED; 310 } 311 return KMP_LOCK_STILL_HELD; 312 } 313 314 static int 315 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 316 { 317 char const * const func = "omp_unset_nest_lock"; 318 KMP_MB(); /* in case another processor initialized lock */ 319 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 320 KMP_FATAL( LockSimpleUsedAsNestable, func ); 321 } 322 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 323 KMP_FATAL( LockUnsettingFree, func ); 324 } 325 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 326 KMP_FATAL( LockUnsettingSetByAnother, func ); 327 } 328 return __kmp_release_nested_tas_lock( lck, gtid ); 329 } 330 331 void 332 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 333 { 334 __kmp_init_tas_lock( lck ); 335 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 336 } 337 338 static void 339 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 340 { 341 __kmp_init_nested_tas_lock( lck ); 342 } 343 344 void 345 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 346 { 347 __kmp_destroy_tas_lock( lck ); 348 lck->lk.depth_locked = 0; 349 } 350 351 static void 352 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 353 { 354 char const * const func = "omp_destroy_nest_lock"; 355 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 356 KMP_FATAL( LockSimpleUsedAsNestable, func ); 357 } 358 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 359 KMP_FATAL( LockStillOwned, func ); 360 } 361 __kmp_destroy_nested_tas_lock( lck ); 362 } 363 364 365 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 366 367 /* ------------------------------------------------------------------------ */ 368 /* futex locks */ 369 370 // futex locks are really just test and set locks, with a different method 371 // of handling contention. They take the same amount of space as test and 372 // set locks, and are allocated the same way (i.e. use the area allocated by 373 // the compiler for non-nested locks / allocate nested locks on the heap). 374 375 static kmp_int32 376 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 377 { 378 return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 379 } 380 381 static inline bool 382 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 383 { 384 return lck->lk.depth_locked != -1; 385 } 386 387 __forceinline static int 388 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 389 { 390 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 391 392 KMP_MB(); 393 394 #ifdef USE_LOCK_PROFILE 395 kmp_uint32 curr = TCR_4( lck->lk.poll ); 396 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 397 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 398 /* else __kmp_printf( "." );*/ 399 #endif /* USE_LOCK_PROFILE */ 400 401 KMP_FSYNC_PREPARE( lck ); 402 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 403 lck, lck->lk.poll, gtid ) ); 404 405 kmp_int32 poll_val; 406 407 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), 408 KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { 409 410 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 411 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 412 lck, gtid, poll_val, cond ) ); 413 414 // 415 // NOTE: if you try to use the following condition for this branch 416 // 417 // if ( poll_val & 1 == 0 ) 418 // 419 // Then the 12.0 compiler has a bug where the following block will 420 // always be skipped, regardless of the value of the LSB of poll_val. 421 // 422 if ( ! cond ) { 423 // 424 // Try to set the lsb in the poll to indicate to the owner 425 // thread that they need to wake this thread up. 426 // 427 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { 428 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 429 lck, lck->lk.poll, gtid ) ); 430 continue; 431 } 432 poll_val |= KMP_LOCK_BUSY(1, futex); 433 434 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 435 lck, lck->lk.poll, gtid ) ); 436 } 437 438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 439 lck, gtid, poll_val ) ); 440 441 kmp_int32 rc; 442 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 443 poll_val, NULL, NULL, 0 ) ) != 0 ) { 444 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 445 lck, gtid, poll_val, rc, errno ) ); 446 continue; 447 } 448 449 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 450 lck, gtid, poll_val ) ); 451 // 452 // This thread has now done a successful futex wait call and was 453 // entered on the OS futex queue. We must now perform a futex 454 // wake call when releasing the lock, as we have no idea how many 455 // other threads are in the queue. 456 // 457 gtid_code |= 1; 458 } 459 460 KMP_FSYNC_ACQUIRED( lck ); 461 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 462 lck, lck->lk.poll, gtid ) ); 463 return KMP_LOCK_ACQUIRED_FIRST; 464 } 465 466 int 467 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 468 { 469 return __kmp_acquire_futex_lock_timed_template( lck, gtid ); 470 } 471 472 static int 473 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 474 { 475 char const * const func = "omp_set_lock"; 476 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 477 && __kmp_is_futex_lock_nestable( lck ) ) { 478 KMP_FATAL( LockNestableUsedAsSimple, func ); 479 } 480 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 481 KMP_FATAL( LockIsAlreadyOwned, func ); 482 } 483 return __kmp_acquire_futex_lock( lck, gtid ); 484 } 485 486 int 487 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 488 { 489 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) { 490 KMP_FSYNC_ACQUIRED( lck ); 491 return TRUE; 492 } 493 return FALSE; 494 } 495 496 static int 497 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 498 { 499 char const * const func = "omp_test_lock"; 500 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 501 && __kmp_is_futex_lock_nestable( lck ) ) { 502 KMP_FATAL( LockNestableUsedAsSimple, func ); 503 } 504 return __kmp_test_futex_lock( lck, gtid ); 505 } 506 507 int 508 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 509 { 510 KMP_MB(); /* Flush all pending memory write invalidates. */ 511 512 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 513 lck, lck->lk.poll, gtid ) ); 514 515 KMP_FSYNC_RELEASING(lck); 516 517 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); 518 519 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 520 lck, gtid, poll_val ) ); 521 522 if ( KMP_LOCK_STRIP(poll_val) & 1 ) { 523 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 524 lck, gtid ) ); 525 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 526 } 527 528 KMP_MB(); /* Flush all pending memory write invalidates. */ 529 530 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 531 lck, lck->lk.poll, gtid ) ); 532 533 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 534 __kmp_xproc ) ); 535 return KMP_LOCK_RELEASED; 536 } 537 538 static int 539 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 540 { 541 char const * const func = "omp_unset_lock"; 542 KMP_MB(); /* in case another processor initialized lock */ 543 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 544 && __kmp_is_futex_lock_nestable( lck ) ) { 545 KMP_FATAL( LockNestableUsedAsSimple, func ); 546 } 547 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 548 KMP_FATAL( LockUnsettingFree, func ); 549 } 550 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 551 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 552 KMP_FATAL( LockUnsettingSetByAnother, func ); 553 } 554 return __kmp_release_futex_lock( lck, gtid ); 555 } 556 557 void 558 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 559 { 560 TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); 561 } 562 563 static void 564 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 565 { 566 __kmp_init_futex_lock( lck ); 567 } 568 569 void 570 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 571 { 572 lck->lk.poll = 0; 573 } 574 575 static void 576 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 577 { 578 char const * const func = "omp_destroy_lock"; 579 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 580 && __kmp_is_futex_lock_nestable( lck ) ) { 581 KMP_FATAL( LockNestableUsedAsSimple, func ); 582 } 583 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 584 KMP_FATAL( LockStillOwned, func ); 585 } 586 __kmp_destroy_futex_lock( lck ); 587 } 588 589 590 // 591 // nested futex locks 592 // 593 594 int 595 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 596 { 597 KMP_DEBUG_ASSERT( gtid >= 0 ); 598 599 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 600 lck->lk.depth_locked += 1; 601 return KMP_LOCK_ACQUIRED_NEXT; 602 } 603 else { 604 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 605 lck->lk.depth_locked = 1; 606 return KMP_LOCK_ACQUIRED_FIRST; 607 } 608 } 609 610 static int 611 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 612 { 613 char const * const func = "omp_set_nest_lock"; 614 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 615 KMP_FATAL( LockSimpleUsedAsNestable, func ); 616 } 617 return __kmp_acquire_nested_futex_lock( lck, gtid ); 618 } 619 620 int 621 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 622 { 623 int retval; 624 625 KMP_DEBUG_ASSERT( gtid >= 0 ); 626 627 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 628 retval = ++lck->lk.depth_locked; 629 } 630 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 631 retval = 0; 632 } 633 else { 634 KMP_MB(); 635 retval = lck->lk.depth_locked = 1; 636 } 637 return retval; 638 } 639 640 static int 641 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 642 { 643 char const * const func = "omp_test_nest_lock"; 644 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 645 KMP_FATAL( LockSimpleUsedAsNestable, func ); 646 } 647 return __kmp_test_nested_futex_lock( lck, gtid ); 648 } 649 650 int 651 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 652 { 653 KMP_DEBUG_ASSERT( gtid >= 0 ); 654 655 KMP_MB(); 656 if ( --(lck->lk.depth_locked) == 0 ) { 657 __kmp_release_futex_lock( lck, gtid ); 658 return KMP_LOCK_RELEASED; 659 } 660 return KMP_LOCK_STILL_HELD; 661 } 662 663 static int 664 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 665 { 666 char const * const func = "omp_unset_nest_lock"; 667 KMP_MB(); /* in case another processor initialized lock */ 668 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 669 KMP_FATAL( LockSimpleUsedAsNestable, func ); 670 } 671 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 672 KMP_FATAL( LockUnsettingFree, func ); 673 } 674 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 675 KMP_FATAL( LockUnsettingSetByAnother, func ); 676 } 677 return __kmp_release_nested_futex_lock( lck, gtid ); 678 } 679 680 void 681 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 682 { 683 __kmp_init_futex_lock( lck ); 684 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 685 } 686 687 static void 688 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 689 { 690 __kmp_init_nested_futex_lock( lck ); 691 } 692 693 void 694 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 695 { 696 __kmp_destroy_futex_lock( lck ); 697 lck->lk.depth_locked = 0; 698 } 699 700 static void 701 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 702 { 703 char const * const func = "omp_destroy_nest_lock"; 704 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 705 KMP_FATAL( LockSimpleUsedAsNestable, func ); 706 } 707 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 708 KMP_FATAL( LockStillOwned, func ); 709 } 710 __kmp_destroy_nested_futex_lock( lck ); 711 } 712 713 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 714 715 716 /* ------------------------------------------------------------------------ */ 717 /* ticket (bakery) locks */ 718 719 static kmp_int32 720 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 721 { 722 return TCR_4( lck->lk.owner_id ) - 1; 723 } 724 725 static inline bool 726 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 727 { 728 return lck->lk.depth_locked != -1; 729 } 730 731 static kmp_uint32 732 __kmp_bakery_check(kmp_uint32 value, kmp_uint32 checker) 733 { 734 register kmp_uint32 pause; 735 736 if (value == checker) { 737 return TRUE; 738 } 739 for (pause = checker - value; pause != 0; --pause); 740 return FALSE; 741 } 742 743 __forceinline static int 744 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 745 { 746 kmp_uint32 my_ticket; 747 KMP_MB(); 748 749 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 750 751 #ifdef USE_LOCK_PROFILE 752 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 753 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 754 /* else __kmp_printf( "." );*/ 755 #endif /* USE_LOCK_PROFILE */ 756 757 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 758 KMP_FSYNC_ACQUIRED(lck); 759 return KMP_LOCK_ACQUIRED_FIRST; 760 } 761 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 762 KMP_FSYNC_ACQUIRED(lck); 763 return KMP_LOCK_ACQUIRED_FIRST; 764 } 765 766 int 767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 768 { 769 return __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 770 } 771 772 static int 773 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 774 { 775 char const * const func = "omp_set_lock"; 776 if ( lck->lk.initialized != lck ) { 777 KMP_FATAL( LockIsUninitialized, func ); 778 } 779 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 780 KMP_FATAL( LockNestableUsedAsSimple, func ); 781 } 782 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 783 KMP_FATAL( LockIsAlreadyOwned, func ); 784 } 785 786 __kmp_acquire_ticket_lock( lck, gtid ); 787 788 lck->lk.owner_id = gtid + 1; 789 return KMP_LOCK_ACQUIRED_FIRST; 790 } 791 792 int 793 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 794 { 795 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 796 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 797 kmp_uint32 next_ticket = my_ticket + 1; 798 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 799 my_ticket, next_ticket ) ) { 800 KMP_FSYNC_ACQUIRED( lck ); 801 return TRUE; 802 } 803 } 804 return FALSE; 805 } 806 807 static int 808 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 809 { 810 char const * const func = "omp_test_lock"; 811 if ( lck->lk.initialized != lck ) { 812 KMP_FATAL( LockIsUninitialized, func ); 813 } 814 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 815 KMP_FATAL( LockNestableUsedAsSimple, func ); 816 } 817 818 int retval = __kmp_test_ticket_lock( lck, gtid ); 819 820 if ( retval ) { 821 lck->lk.owner_id = gtid + 1; 822 } 823 return retval; 824 } 825 826 int 827 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 828 { 829 kmp_uint32 distance; 830 831 KMP_MB(); /* Flush all pending memory write invalidates. */ 832 833 KMP_FSYNC_RELEASING(lck); 834 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 835 836 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 837 838 KMP_MB(); /* Flush all pending memory write invalidates. */ 839 840 KMP_YIELD( distance 841 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 842 return KMP_LOCK_RELEASED; 843 } 844 845 static int 846 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 847 { 848 char const * const func = "omp_unset_lock"; 849 KMP_MB(); /* in case another processor initialized lock */ 850 if ( lck->lk.initialized != lck ) { 851 KMP_FATAL( LockIsUninitialized, func ); 852 } 853 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 854 KMP_FATAL( LockNestableUsedAsSimple, func ); 855 } 856 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 857 KMP_FATAL( LockUnsettingFree, func ); 858 } 859 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 860 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 861 KMP_FATAL( LockUnsettingSetByAnother, func ); 862 } 863 lck->lk.owner_id = 0; 864 return __kmp_release_ticket_lock( lck, gtid ); 865 } 866 867 void 868 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 869 { 870 lck->lk.location = NULL; 871 TCW_4( lck->lk.next_ticket, 0 ); 872 TCW_4( lck->lk.now_serving, 0 ); 873 lck->lk.owner_id = 0; // no thread owns the lock. 874 lck->lk.depth_locked = -1; // -1 => not a nested lock. 875 lck->lk.initialized = (kmp_ticket_lock *)lck; 876 } 877 878 static void 879 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 880 { 881 __kmp_init_ticket_lock( lck ); 882 } 883 884 void 885 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 886 { 887 lck->lk.initialized = NULL; 888 lck->lk.location = NULL; 889 lck->lk.next_ticket = 0; 890 lck->lk.now_serving = 0; 891 lck->lk.owner_id = 0; 892 lck->lk.depth_locked = -1; 893 } 894 895 static void 896 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 897 { 898 char const * const func = "omp_destroy_lock"; 899 if ( lck->lk.initialized != lck ) { 900 KMP_FATAL( LockIsUninitialized, func ); 901 } 902 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 903 KMP_FATAL( LockNestableUsedAsSimple, func ); 904 } 905 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 906 KMP_FATAL( LockStillOwned, func ); 907 } 908 __kmp_destroy_ticket_lock( lck ); 909 } 910 911 912 // 913 // nested ticket locks 914 // 915 916 int 917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 918 { 919 KMP_DEBUG_ASSERT( gtid >= 0 ); 920 921 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 922 lck->lk.depth_locked += 1; 923 return KMP_LOCK_ACQUIRED_NEXT; 924 } 925 else { 926 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 927 KMP_MB(); 928 lck->lk.depth_locked = 1; 929 KMP_MB(); 930 lck->lk.owner_id = gtid + 1; 931 return KMP_LOCK_ACQUIRED_FIRST; 932 } 933 } 934 935 static int 936 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 937 { 938 char const * const func = "omp_set_nest_lock"; 939 if ( lck->lk.initialized != lck ) { 940 KMP_FATAL( LockIsUninitialized, func ); 941 } 942 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 943 KMP_FATAL( LockSimpleUsedAsNestable, func ); 944 } 945 return __kmp_acquire_nested_ticket_lock( lck, gtid ); 946 } 947 948 int 949 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 950 { 951 int retval; 952 953 KMP_DEBUG_ASSERT( gtid >= 0 ); 954 955 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 956 retval = ++lck->lk.depth_locked; 957 } 958 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 959 retval = 0; 960 } 961 else { 962 KMP_MB(); 963 retval = lck->lk.depth_locked = 1; 964 KMP_MB(); 965 lck->lk.owner_id = gtid + 1; 966 } 967 return retval; 968 } 969 970 static int 971 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 972 kmp_int32 gtid ) 973 { 974 char const * const func = "omp_test_nest_lock"; 975 if ( lck->lk.initialized != lck ) { 976 KMP_FATAL( LockIsUninitialized, func ); 977 } 978 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 979 KMP_FATAL( LockSimpleUsedAsNestable, func ); 980 } 981 return __kmp_test_nested_ticket_lock( lck, gtid ); 982 } 983 984 int 985 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 986 { 987 KMP_DEBUG_ASSERT( gtid >= 0 ); 988 989 KMP_MB(); 990 if ( --(lck->lk.depth_locked) == 0 ) { 991 KMP_MB(); 992 lck->lk.owner_id = 0; 993 __kmp_release_ticket_lock( lck, gtid ); 994 return KMP_LOCK_RELEASED; 995 } 996 return KMP_LOCK_STILL_HELD; 997 } 998 999 static int 1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 1001 { 1002 char const * const func = "omp_unset_nest_lock"; 1003 KMP_MB(); /* in case another processor initialized lock */ 1004 if ( lck->lk.initialized != lck ) { 1005 KMP_FATAL( LockIsUninitialized, func ); 1006 } 1007 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1008 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1009 } 1010 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 1011 KMP_FATAL( LockUnsettingFree, func ); 1012 } 1013 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 1014 KMP_FATAL( LockUnsettingSetByAnother, func ); 1015 } 1016 return __kmp_release_nested_ticket_lock( lck, gtid ); 1017 } 1018 1019 void 1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1021 { 1022 __kmp_init_ticket_lock( lck ); 1023 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1024 } 1025 1026 static void 1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1028 { 1029 __kmp_init_nested_ticket_lock( lck ); 1030 } 1031 1032 void 1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1034 { 1035 __kmp_destroy_ticket_lock( lck ); 1036 lck->lk.depth_locked = 0; 1037 } 1038 1039 static void 1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1041 { 1042 char const * const func = "omp_destroy_nest_lock"; 1043 if ( lck->lk.initialized != lck ) { 1044 KMP_FATAL( LockIsUninitialized, func ); 1045 } 1046 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1047 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1048 } 1049 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1050 KMP_FATAL( LockStillOwned, func ); 1051 } 1052 __kmp_destroy_nested_ticket_lock( lck ); 1053 } 1054 1055 1056 // 1057 // access functions to fields which don't exist for all lock kinds. 1058 // 1059 1060 static int 1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1062 { 1063 return lck == lck->lk.initialized; 1064 } 1065 1066 static const ident_t * 1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1068 { 1069 return lck->lk.location; 1070 } 1071 1072 static void 1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1074 { 1075 lck->lk.location = loc; 1076 } 1077 1078 static kmp_lock_flags_t 1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1080 { 1081 return lck->lk.flags; 1082 } 1083 1084 static void 1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1086 { 1087 lck->lk.flags = flags; 1088 } 1089 1090 /* ------------------------------------------------------------------------ */ 1091 /* queuing locks */ 1092 1093 /* 1094 * First the states 1095 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1096 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1097 * h, h means lock is held or about to transition, 1 element on queue 1098 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1099 * 1100 * Now the transitions 1101 * Acquire(0,0) = -1 ,0 1102 * Release(0,0) = Error 1103 * Acquire(-1,0) = h ,h h > 0 1104 * Release(-1,0) = 0 ,0 1105 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1106 * Release(h,h) = -1 ,0 h > 0 1107 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1108 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1109 * 1110 * And pictorially 1111 * 1112 * 1113 * +-----+ 1114 * | 0, 0|------- release -------> Error 1115 * +-----+ 1116 * | ^ 1117 * acquire| |release 1118 * | | 1119 * | | 1120 * v | 1121 * +-----+ 1122 * |-1, 0| 1123 * +-----+ 1124 * | ^ 1125 * acquire| |release 1126 * | | 1127 * | | 1128 * v | 1129 * +-----+ 1130 * | h, h| 1131 * +-----+ 1132 * | ^ 1133 * acquire| |release 1134 * | | 1135 * | | 1136 * v | 1137 * +-----+ 1138 * | h, t|----- acquire, release loopback ---+ 1139 * +-----+ | 1140 * ^ | 1141 * | | 1142 * +------------------------------------+ 1143 * 1144 */ 1145 1146 #ifdef DEBUG_QUEUING_LOCKS 1147 1148 /* Stuff for circular trace buffer */ 1149 #define TRACE_BUF_ELE 1024 1150 static char traces[TRACE_BUF_ELE][128] = { 0 } 1151 static int tc = 0; 1152 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); 1153 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); 1154 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); 1155 1156 static void 1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1158 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1159 { 1160 kmp_int32 t, i; 1161 1162 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1163 1164 i = tc % TRACE_BUF_ELE; 1165 __kmp_printf_no_lock( "%s\n", traces[i] ); 1166 i = (i+1) % TRACE_BUF_ELE; 1167 while ( i != (tc % TRACE_BUF_ELE) ) { 1168 __kmp_printf_no_lock( "%s", traces[i] ); 1169 i = (i+1) % TRACE_BUF_ELE; 1170 } 1171 __kmp_printf_no_lock( "\n" ); 1172 1173 __kmp_printf_no_lock( 1174 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1175 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1176 head_id, tail_id ); 1177 1178 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1179 1180 if ( lck->lk.head_id >= 1 ) { 1181 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1182 while (t > 0) { 1183 __kmp_printf_no_lock( "-> %d ", t ); 1184 t = __kmp_threads[t-1]->th.th_next_waiting; 1185 } 1186 } 1187 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1188 __kmp_printf_no_lock( "\n\n" ); 1189 } 1190 1191 #endif /* DEBUG_QUEUING_LOCKS */ 1192 1193 static kmp_int32 1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1195 { 1196 return TCR_4( lck->lk.owner_id ) - 1; 1197 } 1198 1199 static inline bool 1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1201 { 1202 return lck->lk.depth_locked != -1; 1203 } 1204 1205 /* Acquire a lock using a the queuing lock implementation */ 1206 template <bool takeTime> 1207 /* [TLW] The unused template above is left behind because of what BEB believes is a 1208 potential compiler problem with __forceinline. */ 1209 __forceinline static int 1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1211 kmp_int32 gtid ) 1212 { 1213 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1214 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1215 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1216 volatile kmp_uint32 *spin_here_p; 1217 kmp_int32 need_mf = 1; 1218 1219 #if OMPT_SUPPORT 1220 ompt_state_t prev_state = ompt_state_undefined; 1221 #endif 1222 1223 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1224 1225 KMP_FSYNC_PREPARE( lck ); 1226 KMP_DEBUG_ASSERT( this_thr != NULL ); 1227 spin_here_p = & this_thr->th.th_spin_here; 1228 1229 #ifdef DEBUG_QUEUING_LOCKS 1230 TRACE_LOCK( gtid+1, "acq ent" ); 1231 if ( *spin_here_p ) 1232 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1233 if ( this_thr->th.th_next_waiting != 0 ) 1234 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1235 #endif 1236 KMP_DEBUG_ASSERT( !*spin_here_p ); 1237 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1238 1239 1240 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1241 that may follow, not just in execution order, but also in visibility order. This way, 1242 when a releasing thread observes the changes to the queue by this thread, it can 1243 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1244 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1245 to FALSE before this thread sets it to TRUE, this thread will hang. 1246 */ 1247 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1248 1249 while( 1 ) { 1250 kmp_int32 enqueued; 1251 kmp_int32 head; 1252 kmp_int32 tail; 1253 1254 head = *head_id_p; 1255 1256 switch ( head ) { 1257 1258 case -1: 1259 { 1260 #ifdef DEBUG_QUEUING_LOCKS 1261 tail = *tail_id_p; 1262 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1263 #endif 1264 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1265 this assignment prevents us from entering the if ( t > 0 ) 1266 condition in the enqueued case below, which is not necessary for 1267 this state transition */ 1268 1269 need_mf = 0; 1270 /* try (-1,0)->(tid,tid) */ 1271 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1272 KMP_PACK_64( -1, 0 ), 1273 KMP_PACK_64( gtid+1, gtid+1 ) ); 1274 #ifdef DEBUG_QUEUING_LOCKS 1275 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1276 #endif 1277 } 1278 break; 1279 1280 default: 1281 { 1282 tail = *tail_id_p; 1283 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1284 1285 #ifdef DEBUG_QUEUING_LOCKS 1286 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1287 #endif 1288 1289 if ( tail == 0 ) { 1290 enqueued = FALSE; 1291 } 1292 else { 1293 need_mf = 0; 1294 /* try (h,t) or (h,h)->(h,tid) */ 1295 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1296 1297 #ifdef DEBUG_QUEUING_LOCKS 1298 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1299 #endif 1300 } 1301 } 1302 break; 1303 1304 case 0: /* empty queue */ 1305 { 1306 kmp_int32 grabbed_lock; 1307 1308 #ifdef DEBUG_QUEUING_LOCKS 1309 tail = *tail_id_p; 1310 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1311 #endif 1312 /* try (0,0)->(-1,0) */ 1313 1314 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1315 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1316 1317 if ( grabbed_lock ) { 1318 1319 *spin_here_p = FALSE; 1320 1321 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1322 lck, gtid )); 1323 #ifdef DEBUG_QUEUING_LOCKS 1324 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1325 #endif 1326 1327 #if OMPT_SUPPORT 1328 if (ompt_enabled && prev_state != ompt_state_undefined) { 1329 /* change the state before clearing wait_id */ 1330 this_thr->th.ompt_thread_info.state = prev_state; 1331 this_thr->th.ompt_thread_info.wait_id = 0; 1332 } 1333 #endif 1334 1335 KMP_FSYNC_ACQUIRED( lck ); 1336 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1337 } 1338 enqueued = FALSE; 1339 } 1340 break; 1341 } 1342 1343 #if OMPT_SUPPORT 1344 if (ompt_enabled && prev_state == ompt_state_undefined) { 1345 /* this thread will spin; set wait_id before entering wait state */ 1346 prev_state = this_thr->th.ompt_thread_info.state; 1347 this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; 1348 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1349 } 1350 #endif 1351 1352 if ( enqueued ) { 1353 if ( tail > 0 ) { 1354 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1355 KMP_ASSERT( tail_thr != NULL ); 1356 tail_thr->th.th_next_waiting = gtid+1; 1357 /* corresponding wait for this write in release code */ 1358 } 1359 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1360 1361 1362 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1363 * throughput only here. 1364 */ 1365 KMP_MB(); 1366 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1367 1368 #ifdef DEBUG_QUEUING_LOCKS 1369 TRACE_LOCK( gtid+1, "acq spin" ); 1370 1371 if ( this_thr->th.th_next_waiting != 0 ) 1372 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1373 #endif 1374 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1375 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1376 lck, gtid )); 1377 1378 #ifdef DEBUG_QUEUING_LOCKS 1379 TRACE_LOCK( gtid+1, "acq exit 2" ); 1380 #endif 1381 1382 #if OMPT_SUPPORT 1383 /* change the state before clearing wait_id */ 1384 this_thr->th.ompt_thread_info.state = prev_state; 1385 this_thr->th.ompt_thread_info.wait_id = 0; 1386 #endif 1387 1388 /* got lock, we were dequeued by the thread that released lock */ 1389 return KMP_LOCK_ACQUIRED_FIRST; 1390 } 1391 1392 /* Yield if number of threads > number of logical processors */ 1393 /* ToDo: Not sure why this should only be in oversubscription case, 1394 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1395 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1396 __kmp_xproc ) ); 1397 #ifdef DEBUG_QUEUING_LOCKS 1398 TRACE_LOCK( gtid+1, "acq retry" ); 1399 #endif 1400 1401 } 1402 KMP_ASSERT2( 0, "should not get here" ); 1403 return KMP_LOCK_ACQUIRED_FIRST; 1404 } 1405 1406 int 1407 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1408 { 1409 KMP_DEBUG_ASSERT( gtid >= 0 ); 1410 1411 return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1412 } 1413 1414 static int 1415 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1416 kmp_int32 gtid ) 1417 { 1418 char const * const func = "omp_set_lock"; 1419 if ( lck->lk.initialized != lck ) { 1420 KMP_FATAL( LockIsUninitialized, func ); 1421 } 1422 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1423 KMP_FATAL( LockNestableUsedAsSimple, func ); 1424 } 1425 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1426 KMP_FATAL( LockIsAlreadyOwned, func ); 1427 } 1428 1429 __kmp_acquire_queuing_lock( lck, gtid ); 1430 1431 lck->lk.owner_id = gtid + 1; 1432 return KMP_LOCK_ACQUIRED_FIRST; 1433 } 1434 1435 int 1436 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1437 { 1438 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1439 kmp_int32 head; 1440 #ifdef KMP_DEBUG 1441 kmp_info_t *this_thr; 1442 #endif 1443 1444 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1445 KMP_DEBUG_ASSERT( gtid >= 0 ); 1446 #ifdef KMP_DEBUG 1447 this_thr = __kmp_thread_from_gtid( gtid ); 1448 KMP_DEBUG_ASSERT( this_thr != NULL ); 1449 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1450 #endif 1451 1452 head = *head_id_p; 1453 1454 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1455 1456 /* try (0,0)->(-1,0) */ 1457 1458 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1459 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1460 KMP_FSYNC_ACQUIRED(lck); 1461 return TRUE; 1462 } 1463 } 1464 1465 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1466 return FALSE; 1467 } 1468 1469 static int 1470 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1471 { 1472 char const * const func = "omp_test_lock"; 1473 if ( lck->lk.initialized != lck ) { 1474 KMP_FATAL( LockIsUninitialized, func ); 1475 } 1476 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1477 KMP_FATAL( LockNestableUsedAsSimple, func ); 1478 } 1479 1480 int retval = __kmp_test_queuing_lock( lck, gtid ); 1481 1482 if ( retval ) { 1483 lck->lk.owner_id = gtid + 1; 1484 } 1485 return retval; 1486 } 1487 1488 int 1489 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1490 { 1491 register kmp_info_t *this_thr; 1492 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1493 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1494 1495 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1496 KMP_DEBUG_ASSERT( gtid >= 0 ); 1497 this_thr = __kmp_thread_from_gtid( gtid ); 1498 KMP_DEBUG_ASSERT( this_thr != NULL ); 1499 #ifdef DEBUG_QUEUING_LOCKS 1500 TRACE_LOCK( gtid+1, "rel ent" ); 1501 1502 if ( this_thr->th.th_spin_here ) 1503 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1504 if ( this_thr->th.th_next_waiting != 0 ) 1505 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1506 #endif 1507 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1508 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1509 1510 KMP_FSYNC_RELEASING(lck); 1511 1512 while( 1 ) { 1513 kmp_int32 dequeued; 1514 kmp_int32 head; 1515 kmp_int32 tail; 1516 1517 head = *head_id_p; 1518 1519 #ifdef DEBUG_QUEUING_LOCKS 1520 tail = *tail_id_p; 1521 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1522 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1523 #endif 1524 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1525 1526 if ( head == -1 ) { /* nobody on queue */ 1527 1528 /* try (-1,0)->(0,0) */ 1529 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1530 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1531 lck, gtid )); 1532 #ifdef DEBUG_QUEUING_LOCKS 1533 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1534 #endif 1535 1536 #if OMPT_SUPPORT 1537 /* nothing to do - no other thread is trying to shift blame */ 1538 #endif 1539 1540 return KMP_LOCK_RELEASED; 1541 } 1542 dequeued = FALSE; 1543 1544 } 1545 else { 1546 1547 tail = *tail_id_p; 1548 if ( head == tail ) { /* only one thread on the queue */ 1549 1550 #ifdef DEBUG_QUEUING_LOCKS 1551 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1552 #endif 1553 KMP_DEBUG_ASSERT( head > 0 ); 1554 1555 /* try (h,h)->(-1,0) */ 1556 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1557 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1558 #ifdef DEBUG_QUEUING_LOCKS 1559 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1560 #endif 1561 1562 } 1563 else { 1564 volatile kmp_int32 *waiting_id_p; 1565 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1566 KMP_DEBUG_ASSERT( head_thr != NULL ); 1567 waiting_id_p = & head_thr->th.th_next_waiting; 1568 1569 /* Does this require synchronous reads? */ 1570 #ifdef DEBUG_QUEUING_LOCKS 1571 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1572 #endif 1573 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1574 1575 /* try (h,t)->(h',t) or (t,t) */ 1576 1577 KMP_MB(); 1578 /* make sure enqueuing thread has time to update next waiting thread field */ 1579 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL); 1580 #ifdef DEBUG_QUEUING_LOCKS 1581 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1582 #endif 1583 dequeued = TRUE; 1584 } 1585 } 1586 1587 if ( dequeued ) { 1588 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1589 KMP_DEBUG_ASSERT( head_thr != NULL ); 1590 1591 /* Does this require synchronous reads? */ 1592 #ifdef DEBUG_QUEUING_LOCKS 1593 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1594 #endif 1595 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1596 1597 /* For clean code only. 1598 * Thread not released until next statement prevents race with acquire code. 1599 */ 1600 head_thr->th.th_next_waiting = 0; 1601 #ifdef DEBUG_QUEUING_LOCKS 1602 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1603 #endif 1604 1605 KMP_MB(); 1606 /* reset spin value */ 1607 head_thr->th.th_spin_here = FALSE; 1608 1609 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1610 lck, gtid )); 1611 #ifdef DEBUG_QUEUING_LOCKS 1612 TRACE_LOCK( gtid+1, "rel exit 2" ); 1613 #endif 1614 return KMP_LOCK_RELEASED; 1615 } 1616 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1617 1618 #ifdef DEBUG_QUEUING_LOCKS 1619 TRACE_LOCK( gtid+1, "rel retry" ); 1620 #endif 1621 1622 } /* while */ 1623 KMP_ASSERT2( 0, "should not get here" ); 1624 return KMP_LOCK_RELEASED; 1625 } 1626 1627 static int 1628 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1629 kmp_int32 gtid ) 1630 { 1631 char const * const func = "omp_unset_lock"; 1632 KMP_MB(); /* in case another processor initialized lock */ 1633 if ( lck->lk.initialized != lck ) { 1634 KMP_FATAL( LockIsUninitialized, func ); 1635 } 1636 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1637 KMP_FATAL( LockNestableUsedAsSimple, func ); 1638 } 1639 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1640 KMP_FATAL( LockUnsettingFree, func ); 1641 } 1642 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1643 KMP_FATAL( LockUnsettingSetByAnother, func ); 1644 } 1645 lck->lk.owner_id = 0; 1646 return __kmp_release_queuing_lock( lck, gtid ); 1647 } 1648 1649 void 1650 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1651 { 1652 lck->lk.location = NULL; 1653 lck->lk.head_id = 0; 1654 lck->lk.tail_id = 0; 1655 lck->lk.next_ticket = 0; 1656 lck->lk.now_serving = 0; 1657 lck->lk.owner_id = 0; // no thread owns the lock. 1658 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1659 lck->lk.initialized = lck; 1660 1661 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1662 } 1663 1664 static void 1665 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1666 { 1667 __kmp_init_queuing_lock( lck ); 1668 } 1669 1670 void 1671 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1672 { 1673 lck->lk.initialized = NULL; 1674 lck->lk.location = NULL; 1675 lck->lk.head_id = 0; 1676 lck->lk.tail_id = 0; 1677 lck->lk.next_ticket = 0; 1678 lck->lk.now_serving = 0; 1679 lck->lk.owner_id = 0; 1680 lck->lk.depth_locked = -1; 1681 } 1682 1683 static void 1684 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1685 { 1686 char const * const func = "omp_destroy_lock"; 1687 if ( lck->lk.initialized != lck ) { 1688 KMP_FATAL( LockIsUninitialized, func ); 1689 } 1690 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1691 KMP_FATAL( LockNestableUsedAsSimple, func ); 1692 } 1693 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1694 KMP_FATAL( LockStillOwned, func ); 1695 } 1696 __kmp_destroy_queuing_lock( lck ); 1697 } 1698 1699 1700 // 1701 // nested queuing locks 1702 // 1703 1704 int 1705 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1706 { 1707 KMP_DEBUG_ASSERT( gtid >= 0 ); 1708 1709 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1710 lck->lk.depth_locked += 1; 1711 return KMP_LOCK_ACQUIRED_NEXT; 1712 } 1713 else { 1714 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1715 KMP_MB(); 1716 lck->lk.depth_locked = 1; 1717 KMP_MB(); 1718 lck->lk.owner_id = gtid + 1; 1719 return KMP_LOCK_ACQUIRED_FIRST; 1720 } 1721 } 1722 1723 static int 1724 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1725 { 1726 char const * const func = "omp_set_nest_lock"; 1727 if ( lck->lk.initialized != lck ) { 1728 KMP_FATAL( LockIsUninitialized, func ); 1729 } 1730 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1731 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1732 } 1733 return __kmp_acquire_nested_queuing_lock( lck, gtid ); 1734 } 1735 1736 int 1737 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1738 { 1739 int retval; 1740 1741 KMP_DEBUG_ASSERT( gtid >= 0 ); 1742 1743 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1744 retval = ++lck->lk.depth_locked; 1745 } 1746 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1747 retval = 0; 1748 } 1749 else { 1750 KMP_MB(); 1751 retval = lck->lk.depth_locked = 1; 1752 KMP_MB(); 1753 lck->lk.owner_id = gtid + 1; 1754 } 1755 return retval; 1756 } 1757 1758 static int 1759 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1760 kmp_int32 gtid ) 1761 { 1762 char const * const func = "omp_test_nest_lock"; 1763 if ( lck->lk.initialized != lck ) { 1764 KMP_FATAL( LockIsUninitialized, func ); 1765 } 1766 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1767 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1768 } 1769 return __kmp_test_nested_queuing_lock( lck, gtid ); 1770 } 1771 1772 int 1773 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1774 { 1775 KMP_DEBUG_ASSERT( gtid >= 0 ); 1776 1777 KMP_MB(); 1778 if ( --(lck->lk.depth_locked) == 0 ) { 1779 KMP_MB(); 1780 lck->lk.owner_id = 0; 1781 __kmp_release_queuing_lock( lck, gtid ); 1782 return KMP_LOCK_RELEASED; 1783 } 1784 return KMP_LOCK_STILL_HELD; 1785 } 1786 1787 static int 1788 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1789 { 1790 char const * const func = "omp_unset_nest_lock"; 1791 KMP_MB(); /* in case another processor initialized lock */ 1792 if ( lck->lk.initialized != lck ) { 1793 KMP_FATAL( LockIsUninitialized, func ); 1794 } 1795 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1796 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1797 } 1798 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1799 KMP_FATAL( LockUnsettingFree, func ); 1800 } 1801 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1802 KMP_FATAL( LockUnsettingSetByAnother, func ); 1803 } 1804 return __kmp_release_nested_queuing_lock( lck, gtid ); 1805 } 1806 1807 void 1808 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1809 { 1810 __kmp_init_queuing_lock( lck ); 1811 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1812 } 1813 1814 static void 1815 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1816 { 1817 __kmp_init_nested_queuing_lock( lck ); 1818 } 1819 1820 void 1821 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1822 { 1823 __kmp_destroy_queuing_lock( lck ); 1824 lck->lk.depth_locked = 0; 1825 } 1826 1827 static void 1828 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1829 { 1830 char const * const func = "omp_destroy_nest_lock"; 1831 if ( lck->lk.initialized != lck ) { 1832 KMP_FATAL( LockIsUninitialized, func ); 1833 } 1834 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1835 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1836 } 1837 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1838 KMP_FATAL( LockStillOwned, func ); 1839 } 1840 __kmp_destroy_nested_queuing_lock( lck ); 1841 } 1842 1843 1844 // 1845 // access functions to fields which don't exist for all lock kinds. 1846 // 1847 1848 static int 1849 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1850 { 1851 return lck == lck->lk.initialized; 1852 } 1853 1854 static const ident_t * 1855 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1856 { 1857 return lck->lk.location; 1858 } 1859 1860 static void 1861 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1862 { 1863 lck->lk.location = loc; 1864 } 1865 1866 static kmp_lock_flags_t 1867 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1868 { 1869 return lck->lk.flags; 1870 } 1871 1872 static void 1873 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1874 { 1875 lck->lk.flags = flags; 1876 } 1877 1878 #if KMP_USE_ADAPTIVE_LOCKS 1879 1880 /* 1881 RTM Adaptive locks 1882 */ 1883 1884 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1885 1886 #include <immintrin.h> 1887 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1888 1889 #else 1890 1891 // Values from the status register after failed speculation. 1892 #define _XBEGIN_STARTED (~0u) 1893 #define _XABORT_EXPLICIT (1 << 0) 1894 #define _XABORT_RETRY (1 << 1) 1895 #define _XABORT_CONFLICT (1 << 2) 1896 #define _XABORT_CAPACITY (1 << 3) 1897 #define _XABORT_DEBUG (1 << 4) 1898 #define _XABORT_NESTED (1 << 5) 1899 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1900 1901 // Aborts for which it's worth trying again immediately 1902 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1903 1904 #define STRINGIZE_INTERNAL(arg) #arg 1905 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1906 1907 // Access to RTM instructions 1908 1909 /* 1910 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1911 This is the same definition as the compiler intrinsic that will be supported at some point. 1912 */ 1913 static __inline int _xbegin() 1914 { 1915 int res = -1; 1916 1917 #if KMP_OS_WINDOWS 1918 #if KMP_ARCH_X86_64 1919 _asm { 1920 _emit 0xC7 1921 _emit 0xF8 1922 _emit 2 1923 _emit 0 1924 _emit 0 1925 _emit 0 1926 jmp L2 1927 mov res, eax 1928 L2: 1929 } 1930 #else /* IA32 */ 1931 _asm { 1932 _emit 0xC7 1933 _emit 0xF8 1934 _emit 2 1935 _emit 0 1936 _emit 0 1937 _emit 0 1938 jmp L2 1939 mov res, eax 1940 L2: 1941 } 1942 #endif // KMP_ARCH_X86_64 1943 #else 1944 /* Note that %eax must be noted as killed (clobbered), because 1945 * the XSR is returned in %eax(%rax) on abort. Other register 1946 * values are restored, so don't need to be killed. 1947 * 1948 * We must also mark 'res' as an input and an output, since otherwise 1949 * 'res=-1' may be dropped as being dead, whereas we do need the 1950 * assignment on the successful (i.e., non-abort) path. 1951 */ 1952 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1953 " .long 1f-1b-6\n" 1954 " jmp 2f\n" 1955 "1: movl %%eax,%0\n" 1956 "2:" 1957 :"+r"(res)::"memory","%eax"); 1958 #endif // KMP_OS_WINDOWS 1959 return res; 1960 } 1961 1962 /* 1963 Transaction end 1964 */ 1965 static __inline void _xend() 1966 { 1967 #if KMP_OS_WINDOWS 1968 __asm { 1969 _emit 0x0f 1970 _emit 0x01 1971 _emit 0xd5 1972 } 1973 #else 1974 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1975 #endif 1976 } 1977 1978 /* 1979 This is a macro, the argument must be a single byte constant which 1980 can be evaluated by the inline assembler, since it is emitted as a 1981 byte into the assembly code. 1982 */ 1983 #if KMP_OS_WINDOWS 1984 #define _xabort(ARG) \ 1985 _asm _emit 0xc6 \ 1986 _asm _emit 0xf8 \ 1987 _asm _emit ARG 1988 #else 1989 #define _xabort(ARG) \ 1990 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 1991 #endif 1992 1993 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1994 1995 // 1996 // Statistics is collected for testing purpose 1997 // 1998 #if KMP_DEBUG_ADAPTIVE_LOCKS 1999 2000 // We accumulate speculative lock statistics when the lock is destroyed. 2001 // We keep locks that haven't been destroyed in the liveLocks list 2002 // so that we can grab their statistics too. 2003 static kmp_adaptive_lock_statistics_t destroyedStats; 2004 2005 // To hold the list of live locks. 2006 static kmp_adaptive_lock_info_t liveLocks; 2007 2008 // A lock so we can safely update the list of locks. 2009 static kmp_bootstrap_lock_t chain_lock; 2010 2011 // Initialize the list of stats. 2012 void 2013 __kmp_init_speculative_stats() 2014 { 2015 kmp_adaptive_lock_info_t *lck = &liveLocks; 2016 2017 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 2018 lck->stats.next = lck; 2019 lck->stats.prev = lck; 2020 2021 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2022 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2023 2024 __kmp_init_bootstrap_lock( &chain_lock ); 2025 2026 } 2027 2028 // Insert the lock into the circular list 2029 static void 2030 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 2031 { 2032 __kmp_acquire_bootstrap_lock( &chain_lock ); 2033 2034 lck->stats.next = liveLocks.stats.next; 2035 lck->stats.prev = &liveLocks; 2036 2037 liveLocks.stats.next = lck; 2038 lck->stats.next->stats.prev = lck; 2039 2040 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2041 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2042 2043 __kmp_release_bootstrap_lock( &chain_lock ); 2044 } 2045 2046 static void 2047 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 2048 { 2049 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 2050 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 2051 2052 kmp_adaptive_lock_info_t * n = lck->stats.next; 2053 kmp_adaptive_lock_info_t * p = lck->stats.prev; 2054 2055 n->stats.prev = p; 2056 p->stats.next = n; 2057 } 2058 2059 static void 2060 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 2061 { 2062 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 2063 __kmp_remember_lock( lck ); 2064 } 2065 2066 static void 2067 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2068 { 2069 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2070 2071 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2072 t->successfulSpeculations += s->successfulSpeculations; 2073 t->hardFailedSpeculations += s->hardFailedSpeculations; 2074 t->softFailedSpeculations += s->softFailedSpeculations; 2075 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2076 t->lemmingYields += s->lemmingYields; 2077 } 2078 2079 static void 2080 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2081 { 2082 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2083 2084 __kmp_acquire_bootstrap_lock( &chain_lock ); 2085 2086 __kmp_add_stats( &destroyedStats, lck ); 2087 __kmp_forget_lock( lck ); 2088 2089 __kmp_release_bootstrap_lock( &chain_lock ); 2090 } 2091 2092 static float 2093 percent (kmp_uint32 count, kmp_uint32 total) 2094 { 2095 return (total == 0) ? 0.0: (100.0 * count)/total; 2096 } 2097 2098 static 2099 FILE * __kmp_open_stats_file() 2100 { 2101 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2102 return stdout; 2103 2104 size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; 2105 char buffer[buffLen]; 2106 KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, 2107 (kmp_int32)getpid()); 2108 FILE * result = fopen(&buffer[0], "w"); 2109 2110 // Maybe we should issue a warning here... 2111 return result ? result : stdout; 2112 } 2113 2114 void 2115 __kmp_print_speculative_stats() 2116 { 2117 if (__kmp_user_lock_kind != lk_adaptive) 2118 return; 2119 2120 FILE * statsFile = __kmp_open_stats_file(); 2121 2122 kmp_adaptive_lock_statistics_t total = destroyedStats; 2123 kmp_adaptive_lock_info_t *lck; 2124 2125 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2126 __kmp_add_stats( &total, lck ); 2127 } 2128 kmp_adaptive_lock_statistics_t *t = &total; 2129 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2130 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2131 t->softFailedSpeculations; 2132 2133 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2134 fprintf ( statsFile, " Lock parameters: \n" 2135 " max_soft_retries : %10d\n" 2136 " max_badness : %10d\n", 2137 __kmp_adaptive_backoff_params.max_soft_retries, 2138 __kmp_adaptive_backoff_params.max_badness); 2139 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2140 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2141 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2142 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2143 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2144 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2145 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2146 2147 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2148 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2149 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2150 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2151 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2152 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2153 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2154 2155 if (statsFile != stdout) 2156 fclose( statsFile ); 2157 } 2158 2159 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2160 #else 2161 # define KMP_INC_STAT(lck,stat) 2162 2163 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2164 2165 static inline bool 2166 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2167 { 2168 // It is enough to check that the head_id is zero. 2169 // We don't also need to check the tail. 2170 bool res = lck->lk.head_id == 0; 2171 2172 // We need a fence here, since we must ensure that no memory operations 2173 // from later in this thread float above that read. 2174 #if KMP_COMPILER_ICC 2175 _mm_mfence(); 2176 #else 2177 __sync_synchronize(); 2178 #endif 2179 2180 return res; 2181 } 2182 2183 // Functions for manipulating the badness 2184 static __inline void 2185 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2186 { 2187 // Reset the badness to zero so we eagerly try to speculate again 2188 lck->lk.adaptive.badness = 0; 2189 KMP_INC_STAT(lck,successfulSpeculations); 2190 } 2191 2192 // Create a bit mask with one more set bit. 2193 static __inline void 2194 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2195 { 2196 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2197 if ( newBadness > lck->lk.adaptive.max_badness) { 2198 return; 2199 } else { 2200 lck->lk.adaptive.badness = newBadness; 2201 } 2202 } 2203 2204 // Check whether speculation should be attempted. 2205 static __inline int 2206 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2207 { 2208 kmp_uint32 badness = lck->lk.adaptive.badness; 2209 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2210 int res = (attempts & badness) == 0; 2211 return res; 2212 } 2213 2214 // Attempt to acquire only the speculative lock. 2215 // Does not back off to the non-speculative lock. 2216 // 2217 static int 2218 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2219 { 2220 int retries = lck->lk.adaptive.max_soft_retries; 2221 2222 // We don't explicitly count the start of speculation, rather we record 2223 // the results (success, hard fail, soft fail). The sum of all of those 2224 // is the total number of times we started speculation since all 2225 // speculations must end one of those ways. 2226 do 2227 { 2228 kmp_uint32 status = _xbegin(); 2229 // Switch this in to disable actual speculation but exercise 2230 // at least some of the rest of the code. Useful for debugging... 2231 // kmp_uint32 status = _XABORT_NESTED; 2232 2233 if (status == _XBEGIN_STARTED ) 2234 { /* We have successfully started speculation 2235 * Check that no-one acquired the lock for real between when we last looked 2236 * and now. This also gets the lock cache line into our read-set, 2237 * which we need so that we'll abort if anyone later claims it for real. 2238 */ 2239 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2240 { 2241 // Lock is now visibly acquired, so someone beat us to it. 2242 // Abort the transaction so we'll restart from _xbegin with the 2243 // failure status. 2244 _xabort(0x01); 2245 KMP_ASSERT2( 0, "should not get here" ); 2246 } 2247 return 1; // Lock has been acquired (speculatively) 2248 } else { 2249 // We have aborted, update the statistics 2250 if ( status & SOFT_ABORT_MASK) 2251 { 2252 KMP_INC_STAT(lck,softFailedSpeculations); 2253 // and loop round to retry. 2254 } 2255 else 2256 { 2257 KMP_INC_STAT(lck,hardFailedSpeculations); 2258 // Give up if we had a hard failure. 2259 break; 2260 } 2261 } 2262 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2263 2264 // Either we had a hard failure or we didn't succeed softly after 2265 // the full set of attempts, so back off the badness. 2266 __kmp_step_badness( lck ); 2267 return 0; 2268 } 2269 2270 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2271 // if the speculative lock cannot be acquired. 2272 // We can succeed speculatively, non-speculatively, or fail. 2273 static int 2274 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2275 { 2276 // First try to acquire the lock speculatively 2277 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2278 return 1; 2279 2280 // Speculative acquisition failed, so try to acquire it non-speculatively. 2281 // Count the non-speculative acquire attempt 2282 lck->lk.adaptive.acquire_attempts++; 2283 2284 // Use base, non-speculative lock. 2285 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2286 { 2287 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2288 return 1; // Lock is acquired (non-speculatively) 2289 } 2290 else 2291 { 2292 return 0; // Failed to acquire the lock, it's already visibly locked. 2293 } 2294 } 2295 2296 static int 2297 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2298 { 2299 char const * const func = "omp_test_lock"; 2300 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2301 KMP_FATAL( LockIsUninitialized, func ); 2302 } 2303 2304 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2305 2306 if ( retval ) { 2307 lck->lk.qlk.owner_id = gtid + 1; 2308 } 2309 return retval; 2310 } 2311 2312 // Block until we can acquire a speculative, adaptive lock. 2313 // We check whether we should be trying to speculate. 2314 // If we should be, we check the real lock to see if it is free, 2315 // and, if not, pause without attempting to acquire it until it is. 2316 // Then we try the speculative acquire. 2317 // This means that although we suffer from lemmings a little ( 2318 // because all we can't acquire the lock speculatively until 2319 // the queue of threads waiting has cleared), we don't get into a 2320 // state where we can never acquire the lock speculatively (because we 2321 // force the queue to clear by preventing new arrivals from entering the 2322 // queue). 2323 // This does mean that when we're trying to break lemmings, the lock 2324 // is no longer fair. However OpenMP makes no guarantee that its 2325 // locks are fair, so this isn't a real problem. 2326 static void 2327 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2328 { 2329 if ( __kmp_should_speculate( lck, gtid ) ) 2330 { 2331 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2332 { 2333 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2334 return; 2335 // We tried speculation and failed, so give up. 2336 } 2337 else 2338 { 2339 // We can't try speculation until the lock is free, so we 2340 // pause here (without suspending on the queueing lock, 2341 // to allow it to drain, then try again. 2342 // All other threads will also see the same result for 2343 // shouldSpeculate, so will be doing the same if they 2344 // try to claim the lock from now on. 2345 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2346 { 2347 KMP_INC_STAT(lck,lemmingYields); 2348 __kmp_yield (TRUE); 2349 } 2350 2351 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2352 return; 2353 } 2354 } 2355 2356 // Speculative acquisition failed, so acquire it non-speculatively. 2357 // Count the non-speculative acquire attempt 2358 lck->lk.adaptive.acquire_attempts++; 2359 2360 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2361 // We have acquired the base lock, so count that. 2362 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2363 } 2364 2365 static void 2366 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2367 { 2368 char const * const func = "omp_set_lock"; 2369 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2370 KMP_FATAL( LockIsUninitialized, func ); 2371 } 2372 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2373 KMP_FATAL( LockIsAlreadyOwned, func ); 2374 } 2375 2376 __kmp_acquire_adaptive_lock( lck, gtid ); 2377 2378 lck->lk.qlk.owner_id = gtid + 1; 2379 } 2380 2381 static int 2382 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2383 { 2384 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2385 { // If the lock doesn't look claimed we must be speculating. 2386 // (Or the user's code is buggy and they're releasing without locking; 2387 // if we had XTEST we'd be able to check that case...) 2388 _xend(); // Exit speculation 2389 __kmp_update_badness_after_success( lck ); 2390 } 2391 else 2392 { // Since the lock *is* visibly locked we're not speculating, 2393 // so should use the underlying lock's release scheme. 2394 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2395 } 2396 return KMP_LOCK_RELEASED; 2397 } 2398 2399 static int 2400 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2401 { 2402 char const * const func = "omp_unset_lock"; 2403 KMP_MB(); /* in case another processor initialized lock */ 2404 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2405 KMP_FATAL( LockIsUninitialized, func ); 2406 } 2407 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2408 KMP_FATAL( LockUnsettingFree, func ); 2409 } 2410 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2411 KMP_FATAL( LockUnsettingSetByAnother, func ); 2412 } 2413 lck->lk.qlk.owner_id = 0; 2414 __kmp_release_adaptive_lock( lck, gtid ); 2415 return KMP_LOCK_RELEASED; 2416 } 2417 2418 static void 2419 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2420 { 2421 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2422 lck->lk.adaptive.badness = 0; 2423 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2424 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2425 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2426 #if KMP_DEBUG_ADAPTIVE_LOCKS 2427 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2428 #endif 2429 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2430 } 2431 2432 static void 2433 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2434 { 2435 __kmp_init_adaptive_lock( lck ); 2436 } 2437 2438 static void 2439 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2440 { 2441 #if KMP_DEBUG_ADAPTIVE_LOCKS 2442 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2443 #endif 2444 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2445 // Nothing needed for the speculative part. 2446 } 2447 2448 static void 2449 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2450 { 2451 char const * const func = "omp_destroy_lock"; 2452 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2453 KMP_FATAL( LockIsUninitialized, func ); 2454 } 2455 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2456 KMP_FATAL( LockStillOwned, func ); 2457 } 2458 __kmp_destroy_adaptive_lock( lck ); 2459 } 2460 2461 2462 #endif // KMP_USE_ADAPTIVE_LOCKS 2463 2464 2465 /* ------------------------------------------------------------------------ */ 2466 /* DRDPA ticket locks */ 2467 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2468 2469 static kmp_int32 2470 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2471 { 2472 return TCR_4( lck->lk.owner_id ) - 1; 2473 } 2474 2475 static inline bool 2476 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2477 { 2478 return lck->lk.depth_locked != -1; 2479 } 2480 2481 __forceinline static int 2482 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2483 { 2484 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2485 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2486 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2487 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2488 TCR_PTR(lck->lk.polls); // volatile load 2489 2490 #ifdef USE_LOCK_PROFILE 2491 if (TCR_8(polls[ticket & mask].poll) != ticket) 2492 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2493 /* else __kmp_printf( "." );*/ 2494 #endif /* USE_LOCK_PROFILE */ 2495 2496 // 2497 // Now spin-wait, but reload the polls pointer and mask, in case the 2498 // polling area has been reconfigured. Unless it is reconfigured, the 2499 // reloads stay in L1 cache and are cheap. 2500 // 2501 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2502 // 2503 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2504 // and poll to be re-read every spin iteration. 2505 // 2506 kmp_uint32 spins; 2507 2508 KMP_FSYNC_PREPARE(lck); 2509 KMP_INIT_YIELD(spins); 2510 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2511 // If we are oversubscribed, 2512 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2513 // CPU Pause is in the macros for yield. 2514 // 2515 KMP_YIELD(TCR_4(__kmp_nth) 2516 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2517 KMP_YIELD_SPIN(spins); 2518 2519 // Re-read the mask and the poll pointer from the lock structure. 2520 // 2521 // Make certain that "mask" is read before "polls" !!! 2522 // 2523 // If another thread picks reconfigures the polling area and updates 2524 // their values, and we get the new value of mask and the old polls 2525 // pointer, we could access memory beyond the end of the old polling 2526 // area. 2527 // 2528 mask = TCR_8(lck->lk.mask); // volatile load 2529 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2530 TCR_PTR(lck->lk.polls); // volatile load 2531 } 2532 2533 // 2534 // Critical section starts here 2535 // 2536 KMP_FSYNC_ACQUIRED(lck); 2537 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2538 ticket, lck)); 2539 lck->lk.now_serving = ticket; // non-volatile store 2540 2541 // 2542 // Deallocate a garbage polling area if we know that we are the last 2543 // thread that could possibly access it. 2544 // 2545 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2546 // ticket. 2547 // 2548 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2549 __kmp_free((void *)lck->lk.old_polls); 2550 lck->lk.old_polls = NULL; 2551 lck->lk.cleanup_ticket = 0; 2552 } 2553 2554 // 2555 // Check to see if we should reconfigure the polling area. 2556 // If there is still a garbage polling area to be deallocated from a 2557 // previous reconfiguration, let a later thread reconfigure it. 2558 // 2559 if (lck->lk.old_polls == NULL) { 2560 bool reconfigure = false; 2561 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2562 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2563 2564 if (TCR_4(__kmp_nth) 2565 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2566 // 2567 // We are in oversubscription mode. Contract the polling area 2568 // down to a single location, if that hasn't been done already. 2569 // 2570 if (num_polls > 1) { 2571 reconfigure = true; 2572 num_polls = TCR_4(lck->lk.num_polls); 2573 mask = 0; 2574 num_polls = 1; 2575 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2576 __kmp_allocate(num_polls * sizeof(*polls)); 2577 polls[0].poll = ticket; 2578 } 2579 } 2580 else { 2581 // 2582 // We are in under/fully subscribed mode. Check the number of 2583 // threads waiting on the lock. The size of the polling area 2584 // should be at least the number of threads waiting. 2585 // 2586 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2587 if (num_waiting > num_polls) { 2588 kmp_uint32 old_num_polls = num_polls; 2589 reconfigure = true; 2590 do { 2591 mask = (mask << 1) | 1; 2592 num_polls *= 2; 2593 } while (num_polls <= num_waiting); 2594 2595 // 2596 // Allocate the new polling area, and copy the relevant portion 2597 // of the old polling area to the new area. __kmp_allocate() 2598 // zeroes the memory it allocates, and most of the old area is 2599 // just zero padding, so we only copy the release counters. 2600 // 2601 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2602 __kmp_allocate(num_polls * sizeof(*polls)); 2603 kmp_uint32 i; 2604 for (i = 0; i < old_num_polls; i++) { 2605 polls[i].poll = old_polls[i].poll; 2606 } 2607 } 2608 } 2609 2610 if (reconfigure) { 2611 // 2612 // Now write the updated fields back to the lock structure. 2613 // 2614 // Make certain that "polls" is written before "mask" !!! 2615 // 2616 // If another thread picks up the new value of mask and the old 2617 // polls pointer , it could access memory beyond the end of the 2618 // old polling area. 2619 // 2620 // On x86, we need memory fences. 2621 // 2622 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2623 ticket, lck, num_polls)); 2624 2625 lck->lk.old_polls = old_polls; // non-volatile store 2626 lck->lk.polls = polls; // volatile store 2627 2628 KMP_MB(); 2629 2630 lck->lk.num_polls = num_polls; // non-volatile store 2631 lck->lk.mask = mask; // volatile store 2632 2633 KMP_MB(); 2634 2635 // 2636 // Only after the new polling area and mask have been flushed 2637 // to main memory can we update the cleanup ticket field. 2638 // 2639 // volatile load / non-volatile store 2640 // 2641 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2642 } 2643 } 2644 return KMP_LOCK_ACQUIRED_FIRST; 2645 } 2646 2647 int 2648 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2649 { 2650 return __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2651 } 2652 2653 static int 2654 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2655 { 2656 char const * const func = "omp_set_lock"; 2657 if ( lck->lk.initialized != lck ) { 2658 KMP_FATAL( LockIsUninitialized, func ); 2659 } 2660 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2661 KMP_FATAL( LockNestableUsedAsSimple, func ); 2662 } 2663 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2664 KMP_FATAL( LockIsAlreadyOwned, func ); 2665 } 2666 2667 __kmp_acquire_drdpa_lock( lck, gtid ); 2668 2669 lck->lk.owner_id = gtid + 1; 2670 return KMP_LOCK_ACQUIRED_FIRST; 2671 } 2672 2673 int 2674 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2675 { 2676 // 2677 // First get a ticket, then read the polls pointer and the mask. 2678 // The polls pointer must be read before the mask!!! (See above) 2679 // 2680 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2681 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2682 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2683 TCR_PTR(lck->lk.polls); // volatile load 2684 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2685 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2686 kmp_uint64 next_ticket = ticket + 1; 2687 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2688 ticket, next_ticket)) { 2689 KMP_FSYNC_ACQUIRED(lck); 2690 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2691 ticket, lck)); 2692 lck->lk.now_serving = ticket; // non-volatile store 2693 2694 // 2695 // Since no threads are waiting, there is no possibility that 2696 // we would want to reconfigure the polling area. We might 2697 // have the cleanup ticket value (which says that it is now 2698 // safe to deallocate old_polls), but we'll let a later thread 2699 // which calls __kmp_acquire_lock do that - this routine 2700 // isn't supposed to block, and we would risk blocks if we 2701 // called __kmp_free() to do the deallocation. 2702 // 2703 return TRUE; 2704 } 2705 } 2706 return FALSE; 2707 } 2708 2709 static int 2710 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2711 { 2712 char const * const func = "omp_test_lock"; 2713 if ( lck->lk.initialized != lck ) { 2714 KMP_FATAL( LockIsUninitialized, func ); 2715 } 2716 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2717 KMP_FATAL( LockNestableUsedAsSimple, func ); 2718 } 2719 2720 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2721 2722 if ( retval ) { 2723 lck->lk.owner_id = gtid + 1; 2724 } 2725 return retval; 2726 } 2727 2728 int 2729 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2730 { 2731 // 2732 // Read the ticket value from the lock data struct, then the polls 2733 // pointer and the mask. The polls pointer must be read before the 2734 // mask!!! (See above) 2735 // 2736 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2737 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2738 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2739 TCR_PTR(lck->lk.polls); // volatile load 2740 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2741 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2742 ticket - 1, lck)); 2743 KMP_FSYNC_RELEASING(lck); 2744 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2745 return KMP_LOCK_RELEASED; 2746 } 2747 2748 static int 2749 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2750 { 2751 char const * const func = "omp_unset_lock"; 2752 KMP_MB(); /* in case another processor initialized lock */ 2753 if ( lck->lk.initialized != lck ) { 2754 KMP_FATAL( LockIsUninitialized, func ); 2755 } 2756 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2757 KMP_FATAL( LockNestableUsedAsSimple, func ); 2758 } 2759 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2760 KMP_FATAL( LockUnsettingFree, func ); 2761 } 2762 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2763 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2764 KMP_FATAL( LockUnsettingSetByAnother, func ); 2765 } 2766 lck->lk.owner_id = 0; 2767 return __kmp_release_drdpa_lock( lck, gtid ); 2768 } 2769 2770 void 2771 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2772 { 2773 lck->lk.location = NULL; 2774 lck->lk.mask = 0; 2775 lck->lk.num_polls = 1; 2776 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2777 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2778 lck->lk.cleanup_ticket = 0; 2779 lck->lk.old_polls = NULL; 2780 lck->lk.next_ticket = 0; 2781 lck->lk.now_serving = 0; 2782 lck->lk.owner_id = 0; // no thread owns the lock. 2783 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2784 lck->lk.initialized = lck; 2785 2786 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2787 } 2788 2789 static void 2790 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2791 { 2792 __kmp_init_drdpa_lock( lck ); 2793 } 2794 2795 void 2796 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2797 { 2798 lck->lk.initialized = NULL; 2799 lck->lk.location = NULL; 2800 if (lck->lk.polls != NULL) { 2801 __kmp_free((void *)lck->lk.polls); 2802 lck->lk.polls = NULL; 2803 } 2804 if (lck->lk.old_polls != NULL) { 2805 __kmp_free((void *)lck->lk.old_polls); 2806 lck->lk.old_polls = NULL; 2807 } 2808 lck->lk.mask = 0; 2809 lck->lk.num_polls = 0; 2810 lck->lk.cleanup_ticket = 0; 2811 lck->lk.next_ticket = 0; 2812 lck->lk.now_serving = 0; 2813 lck->lk.owner_id = 0; 2814 lck->lk.depth_locked = -1; 2815 } 2816 2817 static void 2818 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2819 { 2820 char const * const func = "omp_destroy_lock"; 2821 if ( lck->lk.initialized != lck ) { 2822 KMP_FATAL( LockIsUninitialized, func ); 2823 } 2824 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2825 KMP_FATAL( LockNestableUsedAsSimple, func ); 2826 } 2827 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2828 KMP_FATAL( LockStillOwned, func ); 2829 } 2830 __kmp_destroy_drdpa_lock( lck ); 2831 } 2832 2833 2834 // 2835 // nested drdpa ticket locks 2836 // 2837 2838 int 2839 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2840 { 2841 KMP_DEBUG_ASSERT( gtid >= 0 ); 2842 2843 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2844 lck->lk.depth_locked += 1; 2845 return KMP_LOCK_ACQUIRED_NEXT; 2846 } 2847 else { 2848 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2849 KMP_MB(); 2850 lck->lk.depth_locked = 1; 2851 KMP_MB(); 2852 lck->lk.owner_id = gtid + 1; 2853 return KMP_LOCK_ACQUIRED_FIRST; 2854 } 2855 } 2856 2857 static void 2858 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2859 { 2860 char const * const func = "omp_set_nest_lock"; 2861 if ( lck->lk.initialized != lck ) { 2862 KMP_FATAL( LockIsUninitialized, func ); 2863 } 2864 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2865 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2866 } 2867 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2868 } 2869 2870 int 2871 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2872 { 2873 int retval; 2874 2875 KMP_DEBUG_ASSERT( gtid >= 0 ); 2876 2877 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2878 retval = ++lck->lk.depth_locked; 2879 } 2880 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2881 retval = 0; 2882 } 2883 else { 2884 KMP_MB(); 2885 retval = lck->lk.depth_locked = 1; 2886 KMP_MB(); 2887 lck->lk.owner_id = gtid + 1; 2888 } 2889 return retval; 2890 } 2891 2892 static int 2893 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2894 { 2895 char const * const func = "omp_test_nest_lock"; 2896 if ( lck->lk.initialized != lck ) { 2897 KMP_FATAL( LockIsUninitialized, func ); 2898 } 2899 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2900 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2901 } 2902 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2903 } 2904 2905 int 2906 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2907 { 2908 KMP_DEBUG_ASSERT( gtid >= 0 ); 2909 2910 KMP_MB(); 2911 if ( --(lck->lk.depth_locked) == 0 ) { 2912 KMP_MB(); 2913 lck->lk.owner_id = 0; 2914 __kmp_release_drdpa_lock( lck, gtid ); 2915 return KMP_LOCK_RELEASED; 2916 } 2917 return KMP_LOCK_STILL_HELD; 2918 } 2919 2920 static int 2921 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2922 { 2923 char const * const func = "omp_unset_nest_lock"; 2924 KMP_MB(); /* in case another processor initialized lock */ 2925 if ( lck->lk.initialized != lck ) { 2926 KMP_FATAL( LockIsUninitialized, func ); 2927 } 2928 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2929 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2930 } 2931 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2932 KMP_FATAL( LockUnsettingFree, func ); 2933 } 2934 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2935 KMP_FATAL( LockUnsettingSetByAnother, func ); 2936 } 2937 return __kmp_release_nested_drdpa_lock( lck, gtid ); 2938 } 2939 2940 void 2941 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2942 { 2943 __kmp_init_drdpa_lock( lck ); 2944 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2945 } 2946 2947 static void 2948 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2949 { 2950 __kmp_init_nested_drdpa_lock( lck ); 2951 } 2952 2953 void 2954 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2955 { 2956 __kmp_destroy_drdpa_lock( lck ); 2957 lck->lk.depth_locked = 0; 2958 } 2959 2960 static void 2961 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2962 { 2963 char const * const func = "omp_destroy_nest_lock"; 2964 if ( lck->lk.initialized != lck ) { 2965 KMP_FATAL( LockIsUninitialized, func ); 2966 } 2967 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2968 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2969 } 2970 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2971 KMP_FATAL( LockStillOwned, func ); 2972 } 2973 __kmp_destroy_nested_drdpa_lock( lck ); 2974 } 2975 2976 2977 // 2978 // access functions to fields which don't exist for all lock kinds. 2979 // 2980 2981 static int 2982 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2983 { 2984 return lck == lck->lk.initialized; 2985 } 2986 2987 static const ident_t * 2988 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2989 { 2990 return lck->lk.location; 2991 } 2992 2993 static void 2994 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 2995 { 2996 lck->lk.location = loc; 2997 } 2998 2999 static kmp_lock_flags_t 3000 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 3001 { 3002 return lck->lk.flags; 3003 } 3004 3005 static void 3006 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 3007 { 3008 lck->lk.flags = flags; 3009 } 3010 3011 #if KMP_USE_DYNAMIC_LOCK 3012 3013 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 3014 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) 3015 { 3016 TCW_4(*lck, KMP_GET_D_TAG(seq)); 3017 KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 3018 } 3019 3020 #if KMP_USE_TSX 3021 3022 // HLE lock functions - imported from the testbed runtime. 3023 #define HLE_ACQUIRE ".byte 0xf2;" 3024 #define HLE_RELEASE ".byte 0xf3;" 3025 3026 static inline kmp_uint32 3027 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 3028 { 3029 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 3030 : "+r"(v), "+m"(*p) 3031 : 3032 : "memory"); 3033 return v; 3034 } 3035 3036 static void 3037 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 3038 { 3039 TCW_4(*lck, 0); 3040 } 3041 3042 static void 3043 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3044 { 3045 // Use gtid for KMP_LOCK_BUSY if necessary 3046 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 3047 int delay = 1; 3048 do { 3049 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 3050 for (int i = delay; i != 0; --i) 3051 KMP_CPU_PAUSE(); 3052 delay = ((delay << 1) | 1) & 7; 3053 } 3054 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 3055 } 3056 } 3057 3058 static void 3059 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3060 { 3061 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3062 } 3063 3064 static int 3065 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3066 { 3067 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3068 : "=m"(*lck) 3069 : "r"(KMP_LOCK_FREE(hle)) 3070 : "memory"); 3071 return KMP_LOCK_RELEASED; 3072 } 3073 3074 static int 3075 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3076 { 3077 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3078 } 3079 3080 static int 3081 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3082 { 3083 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 3084 } 3085 3086 static int 3087 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3088 { 3089 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3090 } 3091 3092 static void 3093 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) 3094 { 3095 __kmp_init_queuing_lock(lck); 3096 } 3097 3098 static void 3099 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) 3100 { 3101 __kmp_destroy_queuing_lock(lck); 3102 } 3103 3104 static void 3105 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3106 { 3107 unsigned retries=3, status; 3108 do { 3109 status = _xbegin(); 3110 if (status == _XBEGIN_STARTED) { 3111 if (__kmp_is_unlocked_queuing_lock(lck)) 3112 return; 3113 _xabort(0xff); 3114 } 3115 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 3116 // Wait until lock becomes free 3117 while (! __kmp_is_unlocked_queuing_lock(lck)) 3118 __kmp_yield(TRUE); 3119 } 3120 else if (!(status & _XABORT_RETRY)) 3121 break; 3122 } while (retries--); 3123 3124 // Fall-back non-speculative lock (xchg) 3125 __kmp_acquire_queuing_lock(lck, gtid); 3126 } 3127 3128 static void 3129 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3130 { 3131 __kmp_acquire_rtm_lock(lck, gtid); 3132 } 3133 3134 static int 3135 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3136 { 3137 if (__kmp_is_unlocked_queuing_lock(lck)) { 3138 // Releasing from speculation 3139 _xend(); 3140 } 3141 else { 3142 // Releasing from a real lock 3143 __kmp_release_queuing_lock(lck, gtid); 3144 } 3145 return KMP_LOCK_RELEASED; 3146 } 3147 3148 static int 3149 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3150 { 3151 return __kmp_release_rtm_lock(lck, gtid); 3152 } 3153 3154 static int 3155 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3156 { 3157 unsigned retries=3, status; 3158 do { 3159 status = _xbegin(); 3160 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 3161 return 1; 3162 } 3163 if (!(status & _XABORT_RETRY)) 3164 break; 3165 } while (retries--); 3166 3167 return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; 3168 } 3169 3170 static int 3171 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) 3172 { 3173 return __kmp_test_rtm_lock(lck, gtid); 3174 } 3175 3176 #endif // KMP_USE_TSX 3177 3178 // Entry functions for indirect locks (first element of direct lock jump tables). 3179 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3180 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3181 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3182 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3183 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3184 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3185 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3186 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3187 3188 // 3189 // Jump tables for the indirect lock functions. 3190 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3191 // 3192 3193 // init functions 3194 #define expand(l, op) 0,__kmp_init_direct_lock, 3195 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3196 = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; 3197 #undef expand 3198 3199 // destroy functions 3200 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 3201 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) 3202 = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; 3203 #undef expand 3204 3205 // set/acquire functions 3206 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3207 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) 3208 = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 3209 #undef expand 3210 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3211 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) 3212 = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; 3213 #undef expand 3214 3215 // unset/release and test functions 3216 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 3217 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) 3218 = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; 3219 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) 3220 = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; 3221 #undef expand 3222 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 3223 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) 3224 = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; 3225 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) 3226 = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; 3227 #undef expand 3228 3229 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3230 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 3231 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 3232 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 3233 3234 // 3235 // Jump tables for the indirect lock functions. 3236 // 3237 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3238 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; 3239 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; 3240 #undef expand 3241 3242 // set/acquire functions 3243 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3244 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 3245 #undef expand 3246 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3247 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; 3248 #undef expand 3249 3250 // unset/release and test functions 3251 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3252 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 3253 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 3254 #undef expand 3255 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3256 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; 3257 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; 3258 #undef expand 3259 3260 // Exposes only one jump tables (*lock or *lock_with_checks). 3261 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3262 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3263 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3264 3265 // Lock index table. 3266 kmp_indirect_lock_table_t __kmp_i_lock_table; 3267 3268 // Size of indirect locks. 3269 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; 3270 3271 // Jump tables for lock accessor/modifier. 3272 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3273 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3274 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3275 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3276 3277 // Use different lock pools for different lock types. 3278 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; 3279 3280 // User lock allocator for dynamically dispatched indirect locks. 3281 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock 3282 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock 3283 // object is returned to the reusable pool of locks, unique to each lock type. 3284 kmp_indirect_lock_t * 3285 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3286 { 3287 kmp_indirect_lock_t *lck; 3288 kmp_lock_index_t idx; 3289 3290 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3291 3292 if (__kmp_indirect_lock_pool[tag] != NULL) { 3293 // Reuse the allocated and destroyed lock object 3294 lck = __kmp_indirect_lock_pool[tag]; 3295 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3296 idx = lck->lock->pool.index; 3297 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3298 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); 3299 } else { 3300 idx = __kmp_i_lock_table.next; 3301 // Check capacity and double the size if it is full 3302 if (idx == __kmp_i_lock_table.size) { 3303 // Double up the space for block pointers 3304 int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; 3305 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3306 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); 3307 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); 3308 __kmp_free(old_table); 3309 // Allocate new objects in the new blocks 3310 for (int i = row; i < 2*row; ++i) 3311 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) 3312 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 3313 __kmp_i_lock_table.size = 2*idx; 3314 } 3315 __kmp_i_lock_table.next++; 3316 lck = KMP_GET_I_LOCK(idx); 3317 // Allocate a new base lock object 3318 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3319 KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3320 } 3321 3322 __kmp_release_lock(&__kmp_global_lock, gtid); 3323 3324 lck->type = tag; 3325 3326 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3327 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3328 } else { 3329 *((kmp_indirect_lock_t **)user_lock) = lck; 3330 } 3331 3332 return lck; 3333 } 3334 3335 // User lock lookup for dynamically dispatched locks. 3336 static __forceinline 3337 kmp_indirect_lock_t * 3338 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3339 { 3340 if (__kmp_env_consistency_check) { 3341 kmp_indirect_lock_t *lck = NULL; 3342 if (user_lock == NULL) { 3343 KMP_FATAL(LockIsUninitialized, func); 3344 } 3345 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3346 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3347 if (idx >= __kmp_i_lock_table.size) { 3348 KMP_FATAL(LockIsUninitialized, func); 3349 } 3350 lck = KMP_GET_I_LOCK(idx); 3351 } else { 3352 lck = *((kmp_indirect_lock_t **)user_lock); 3353 } 3354 if (lck == NULL) { 3355 KMP_FATAL(LockIsUninitialized, func); 3356 } 3357 return lck; 3358 } else { 3359 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3360 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3361 } else { 3362 return *((kmp_indirect_lock_t **)user_lock); 3363 } 3364 } 3365 } 3366 3367 static void 3368 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3369 { 3370 #if KMP_USE_ADAPTIVE_LOCKS 3371 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3372 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3373 seq = lockseq_queuing; 3374 } 3375 #endif 3376 #if KMP_USE_TSX 3377 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3378 seq = lockseq_queuing; 3379 } 3380 #endif 3381 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3382 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3383 KMP_I_LOCK_FUNC(l, init)(l->lock); 3384 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); 3385 } 3386 3387 static void 3388 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3389 { 3390 kmp_uint32 gtid = __kmp_entry_gtid(); 3391 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3392 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3393 kmp_indirect_locktag_t tag = l->type; 3394 3395 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3396 3397 // Use the base lock's space to keep the pool chain. 3398 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3399 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3400 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3401 } 3402 __kmp_indirect_lock_pool[tag] = l; 3403 3404 __kmp_release_lock(&__kmp_global_lock, gtid); 3405 } 3406 3407 static void 3408 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3409 { 3410 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3411 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3412 } 3413 3414 static int 3415 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3416 { 3417 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3418 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3419 } 3420 3421 static int 3422 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3423 { 3424 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3425 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3426 } 3427 3428 static void 3429 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3430 { 3431 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3432 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3433 } 3434 3435 static int 3436 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3437 { 3438 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3439 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3440 } 3441 3442 static int 3443 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3444 { 3445 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3446 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3447 } 3448 3449 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3450 3451 // This is used only in kmp_error.c when consistency checking is on. 3452 kmp_int32 3453 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3454 { 3455 switch (seq) { 3456 case lockseq_tas: 3457 case lockseq_nested_tas: 3458 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3459 #if KMP_HAS_FUTEX 3460 case lockseq_futex: 3461 case lockseq_nested_futex: 3462 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3463 #endif 3464 case lockseq_ticket: 3465 case lockseq_nested_ticket: 3466 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3467 case lockseq_queuing: 3468 case lockseq_nested_queuing: 3469 #if KMP_USE_ADAPTIVE_LOCKS 3470 case lockseq_adaptive: 3471 #endif 3472 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3473 case lockseq_drdpa: 3474 case lockseq_nested_drdpa: 3475 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3476 default: 3477 return 0; 3478 } 3479 } 3480 3481 // Initializes data for dynamic user locks. 3482 void 3483 __kmp_init_dynamic_user_locks() 3484 { 3485 // Initialize jump table for the lock functions 3486 if (__kmp_env_consistency_check) { 3487 __kmp_direct_set = direct_set_check; 3488 __kmp_direct_unset = direct_unset_check; 3489 __kmp_direct_test = direct_test_check; 3490 __kmp_indirect_set = indirect_set_check; 3491 __kmp_indirect_unset = indirect_unset_check; 3492 __kmp_indirect_test = indirect_test_check; 3493 } 3494 else { 3495 __kmp_direct_set = direct_set; 3496 __kmp_direct_unset = direct_unset; 3497 __kmp_direct_test = direct_test; 3498 __kmp_indirect_set = indirect_set; 3499 __kmp_indirect_unset = indirect_unset; 3500 __kmp_indirect_test = indirect_test; 3501 } 3502 3503 // Initialize lock index table 3504 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3505 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3506 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) 3507 __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); 3508 __kmp_i_lock_table.next = 0; 3509 3510 // Indirect lock size 3511 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3512 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3513 #if KMP_USE_ADAPTIVE_LOCKS 3514 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3515 #endif 3516 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3517 #if KMP_USE_TSX 3518 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3519 #endif 3520 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3521 #if KMP_USE_FUTEX 3522 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3523 #endif 3524 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3525 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3526 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3527 3528 // Initialize lock accessor/modifier 3529 #define fill_jumps(table, expand, sep) { \ 3530 table[locktag##sep##ticket] = expand(ticket); \ 3531 table[locktag##sep##queuing] = expand(queuing); \ 3532 table[locktag##sep##drdpa] = expand(drdpa); \ 3533 } 3534 3535 #if KMP_USE_ADAPTIVE_LOCKS 3536 # define fill_table(table, expand) { \ 3537 fill_jumps(table, expand, _); \ 3538 table[locktag_adaptive] = expand(queuing); \ 3539 fill_jumps(table, expand, _nested_); \ 3540 } 3541 #else 3542 # define fill_table(table, expand) { \ 3543 fill_jumps(table, expand, _); \ 3544 fill_jumps(table, expand, _nested_); \ 3545 } 3546 #endif // KMP_USE_ADAPTIVE_LOCKS 3547 3548 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3549 fill_table(__kmp_indirect_set_location, expand); 3550 #undef expand 3551 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3552 fill_table(__kmp_indirect_set_flags, expand); 3553 #undef expand 3554 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3555 fill_table(__kmp_indirect_get_location, expand); 3556 #undef expand 3557 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3558 fill_table(__kmp_indirect_get_flags, expand); 3559 #undef expand 3560 3561 __kmp_init_user_locks = TRUE; 3562 } 3563 3564 // Clean up the lock table. 3565 void 3566 __kmp_cleanup_indirect_user_locks() 3567 { 3568 kmp_lock_index_t i; 3569 int k; 3570 3571 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3572 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3573 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3574 while (l != NULL) { 3575 kmp_indirect_lock_t *ll = l; 3576 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3577 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); 3578 __kmp_free(ll->lock); 3579 ll->lock = NULL; 3580 } 3581 __kmp_indirect_lock_pool[k] = NULL; 3582 } 3583 // Clean up the remaining undestroyed locks. 3584 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3585 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3586 if (l->lock != NULL) { 3587 // Locks not destroyed explicitly need to be destroyed here. 3588 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3589 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); 3590 __kmp_free(l->lock); 3591 } 3592 } 3593 // Free the table 3594 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3595 __kmp_free(__kmp_i_lock_table.table[i]); 3596 __kmp_free(__kmp_i_lock_table.table); 3597 3598 __kmp_init_user_locks = FALSE; 3599 } 3600 3601 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3602 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3603 3604 #else // KMP_USE_DYNAMIC_LOCK 3605 3606 /* ------------------------------------------------------------------------ */ 3607 /* user locks 3608 * 3609 * They are implemented as a table of function pointers which are set to the 3610 * lock functions of the appropriate kind, once that has been determined. 3611 */ 3612 3613 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3614 3615 size_t __kmp_base_user_lock_size = 0; 3616 size_t __kmp_user_lock_size = 0; 3617 3618 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3619 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3620 3621 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3622 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3623 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3624 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3625 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3626 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3627 3628 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3629 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3630 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3631 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3632 3633 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3634 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3635 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3636 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3637 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3638 3639 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3640 { 3641 switch ( user_lock_kind ) { 3642 case lk_default: 3643 default: 3644 KMP_ASSERT( 0 ); 3645 3646 case lk_tas: { 3647 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3648 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3649 3650 __kmp_get_user_lock_owner_ = 3651 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3652 ( &__kmp_get_tas_lock_owner ); 3653 3654 if ( __kmp_env_consistency_check ) { 3655 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3656 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3657 } 3658 else { 3659 KMP_BIND_USER_LOCK(tas); 3660 KMP_BIND_NESTED_USER_LOCK(tas); 3661 } 3662 3663 __kmp_destroy_user_lock_ = 3664 ( void ( * )( kmp_user_lock_p ) ) 3665 ( &__kmp_destroy_tas_lock ); 3666 3667 __kmp_is_user_lock_initialized_ = 3668 ( int ( * )( kmp_user_lock_p ) ) NULL; 3669 3670 __kmp_get_user_lock_location_ = 3671 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3672 3673 __kmp_set_user_lock_location_ = 3674 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3675 3676 __kmp_get_user_lock_flags_ = 3677 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3678 3679 __kmp_set_user_lock_flags_ = 3680 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3681 } 3682 break; 3683 3684 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3685 3686 case lk_futex: { 3687 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3688 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3689 3690 __kmp_get_user_lock_owner_ = 3691 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3692 ( &__kmp_get_futex_lock_owner ); 3693 3694 if ( __kmp_env_consistency_check ) { 3695 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3696 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3697 } 3698 else { 3699 KMP_BIND_USER_LOCK(futex); 3700 KMP_BIND_NESTED_USER_LOCK(futex); 3701 } 3702 3703 __kmp_destroy_user_lock_ = 3704 ( void ( * )( kmp_user_lock_p ) ) 3705 ( &__kmp_destroy_futex_lock ); 3706 3707 __kmp_is_user_lock_initialized_ = 3708 ( int ( * )( kmp_user_lock_p ) ) NULL; 3709 3710 __kmp_get_user_lock_location_ = 3711 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3712 3713 __kmp_set_user_lock_location_ = 3714 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3715 3716 __kmp_get_user_lock_flags_ = 3717 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3718 3719 __kmp_set_user_lock_flags_ = 3720 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3721 } 3722 break; 3723 3724 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3725 3726 case lk_ticket: { 3727 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3728 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3729 3730 __kmp_get_user_lock_owner_ = 3731 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3732 ( &__kmp_get_ticket_lock_owner ); 3733 3734 if ( __kmp_env_consistency_check ) { 3735 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3736 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3737 } 3738 else { 3739 KMP_BIND_USER_LOCK(ticket); 3740 KMP_BIND_NESTED_USER_LOCK(ticket); 3741 } 3742 3743 __kmp_destroy_user_lock_ = 3744 ( void ( * )( kmp_user_lock_p ) ) 3745 ( &__kmp_destroy_ticket_lock ); 3746 3747 __kmp_is_user_lock_initialized_ = 3748 ( int ( * )( kmp_user_lock_p ) ) 3749 ( &__kmp_is_ticket_lock_initialized ); 3750 3751 __kmp_get_user_lock_location_ = 3752 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3753 ( &__kmp_get_ticket_lock_location ); 3754 3755 __kmp_set_user_lock_location_ = 3756 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3757 ( &__kmp_set_ticket_lock_location ); 3758 3759 __kmp_get_user_lock_flags_ = 3760 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3761 ( &__kmp_get_ticket_lock_flags ); 3762 3763 __kmp_set_user_lock_flags_ = 3764 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3765 ( &__kmp_set_ticket_lock_flags ); 3766 } 3767 break; 3768 3769 case lk_queuing: { 3770 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3771 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3772 3773 __kmp_get_user_lock_owner_ = 3774 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3775 ( &__kmp_get_queuing_lock_owner ); 3776 3777 if ( __kmp_env_consistency_check ) { 3778 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3779 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3780 } 3781 else { 3782 KMP_BIND_USER_LOCK(queuing); 3783 KMP_BIND_NESTED_USER_LOCK(queuing); 3784 } 3785 3786 __kmp_destroy_user_lock_ = 3787 ( void ( * )( kmp_user_lock_p ) ) 3788 ( &__kmp_destroy_queuing_lock ); 3789 3790 __kmp_is_user_lock_initialized_ = 3791 ( int ( * )( kmp_user_lock_p ) ) 3792 ( &__kmp_is_queuing_lock_initialized ); 3793 3794 __kmp_get_user_lock_location_ = 3795 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3796 ( &__kmp_get_queuing_lock_location ); 3797 3798 __kmp_set_user_lock_location_ = 3799 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3800 ( &__kmp_set_queuing_lock_location ); 3801 3802 __kmp_get_user_lock_flags_ = 3803 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3804 ( &__kmp_get_queuing_lock_flags ); 3805 3806 __kmp_set_user_lock_flags_ = 3807 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3808 ( &__kmp_set_queuing_lock_flags ); 3809 } 3810 break; 3811 3812 #if KMP_USE_ADAPTIVE_LOCKS 3813 case lk_adaptive: { 3814 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3815 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3816 3817 __kmp_get_user_lock_owner_ = 3818 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3819 ( &__kmp_get_queuing_lock_owner ); 3820 3821 if ( __kmp_env_consistency_check ) { 3822 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3823 } 3824 else { 3825 KMP_BIND_USER_LOCK(adaptive); 3826 } 3827 3828 __kmp_destroy_user_lock_ = 3829 ( void ( * )( kmp_user_lock_p ) ) 3830 ( &__kmp_destroy_adaptive_lock ); 3831 3832 __kmp_is_user_lock_initialized_ = 3833 ( int ( * )( kmp_user_lock_p ) ) 3834 ( &__kmp_is_queuing_lock_initialized ); 3835 3836 __kmp_get_user_lock_location_ = 3837 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3838 ( &__kmp_get_queuing_lock_location ); 3839 3840 __kmp_set_user_lock_location_ = 3841 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3842 ( &__kmp_set_queuing_lock_location ); 3843 3844 __kmp_get_user_lock_flags_ = 3845 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3846 ( &__kmp_get_queuing_lock_flags ); 3847 3848 __kmp_set_user_lock_flags_ = 3849 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3850 ( &__kmp_set_queuing_lock_flags ); 3851 3852 } 3853 break; 3854 #endif // KMP_USE_ADAPTIVE_LOCKS 3855 3856 case lk_drdpa: { 3857 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3858 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3859 3860 __kmp_get_user_lock_owner_ = 3861 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3862 ( &__kmp_get_drdpa_lock_owner ); 3863 3864 if ( __kmp_env_consistency_check ) { 3865 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3866 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3867 } 3868 else { 3869 KMP_BIND_USER_LOCK(drdpa); 3870 KMP_BIND_NESTED_USER_LOCK(drdpa); 3871 } 3872 3873 __kmp_destroy_user_lock_ = 3874 ( void ( * )( kmp_user_lock_p ) ) 3875 ( &__kmp_destroy_drdpa_lock ); 3876 3877 __kmp_is_user_lock_initialized_ = 3878 ( int ( * )( kmp_user_lock_p ) ) 3879 ( &__kmp_is_drdpa_lock_initialized ); 3880 3881 __kmp_get_user_lock_location_ = 3882 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3883 ( &__kmp_get_drdpa_lock_location ); 3884 3885 __kmp_set_user_lock_location_ = 3886 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3887 ( &__kmp_set_drdpa_lock_location ); 3888 3889 __kmp_get_user_lock_flags_ = 3890 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3891 ( &__kmp_get_drdpa_lock_flags ); 3892 3893 __kmp_set_user_lock_flags_ = 3894 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3895 ( &__kmp_set_drdpa_lock_flags ); 3896 } 3897 break; 3898 } 3899 } 3900 3901 3902 // ---------------------------------------------------------------------------- 3903 // User lock table & lock allocation 3904 3905 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3906 kmp_user_lock_p __kmp_lock_pool = NULL; 3907 3908 // Lock block-allocation support. 3909 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3910 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3911 3912 static kmp_lock_index_t 3913 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3914 { 3915 // Assume that kmp_global_lock is held upon entry/exit. 3916 kmp_lock_index_t index; 3917 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3918 kmp_lock_index_t size; 3919 kmp_user_lock_p *table; 3920 // Reallocate lock table. 3921 if ( __kmp_user_lock_table.allocated == 0 ) { 3922 size = 1024; 3923 } 3924 else { 3925 size = __kmp_user_lock_table.allocated * 2; 3926 } 3927 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3928 KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3929 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3930 // We cannot free the previous table now, since it may be in use by other 3931 // threads. So save the pointer to the previous table in in the first element of the 3932 // new table. All the tables will be organized into a list, and could be freed when 3933 // library shutting down. 3934 __kmp_user_lock_table.table = table; 3935 __kmp_user_lock_table.allocated = size; 3936 } 3937 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3938 index = __kmp_user_lock_table.used; 3939 __kmp_user_lock_table.table[ index ] = lck; 3940 ++ __kmp_user_lock_table.used; 3941 return index; 3942 } 3943 3944 static kmp_user_lock_p 3945 __kmp_lock_block_allocate() 3946 { 3947 // Assume that kmp_global_lock is held upon entry/exit. 3948 static int last_index = 0; 3949 if ( ( last_index >= __kmp_num_locks_in_block ) 3950 || ( __kmp_lock_blocks == NULL ) ) { 3951 // Restart the index. 3952 last_index = 0; 3953 // Need to allocate a new block. 3954 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3955 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3956 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3957 // Set up the new block. 3958 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3959 new_block->next_block = __kmp_lock_blocks; 3960 new_block->locks = (void *)buffer; 3961 // Publish the new block. 3962 KMP_MB(); 3963 __kmp_lock_blocks = new_block; 3964 } 3965 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3966 [ last_index * __kmp_user_lock_size ] ) ); 3967 last_index++; 3968 return ret; 3969 } 3970 3971 // 3972 // Get memory for a lock. It may be freshly allocated memory or reused memory 3973 // from lock pool. 3974 // 3975 kmp_user_lock_p 3976 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3977 kmp_lock_flags_t flags ) 3978 { 3979 kmp_user_lock_p lck; 3980 kmp_lock_index_t index; 3981 KMP_DEBUG_ASSERT( user_lock ); 3982 3983 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3984 3985 if ( __kmp_lock_pool == NULL ) { 3986 // Lock pool is empty. Allocate new memory. 3987 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3988 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3989 } 3990 else { 3991 lck = __kmp_lock_block_allocate(); 3992 } 3993 3994 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3995 // and debugger has info on all allocated locks. 3996 index = __kmp_lock_table_insert( lck ); 3997 } 3998 else { 3999 // Pick up lock from pool. 4000 lck = __kmp_lock_pool; 4001 index = __kmp_lock_pool->pool.index; 4002 __kmp_lock_pool = __kmp_lock_pool->pool.next; 4003 } 4004 4005 // 4006 // We could potentially differentiate between nested and regular locks 4007 // here, and do the lock table lookup for regular locks only. 4008 // 4009 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4010 * ( (kmp_lock_index_t *) user_lock ) = index; 4011 } 4012 else { 4013 * ( (kmp_user_lock_p *) user_lock ) = lck; 4014 } 4015 4016 // mark the lock if it is critical section lock. 4017 __kmp_set_user_lock_flags( lck, flags ); 4018 4019 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 4020 4021 return lck; 4022 } 4023 4024 // Put lock's memory to pool for reusing. 4025 void 4026 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 4027 { 4028 KMP_DEBUG_ASSERT( user_lock != NULL ); 4029 KMP_DEBUG_ASSERT( lck != NULL ); 4030 4031 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 4032 4033 lck->pool.next = __kmp_lock_pool; 4034 __kmp_lock_pool = lck; 4035 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4036 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 4037 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 4038 lck->pool.index = index; 4039 } 4040 4041 __kmp_release_lock( & __kmp_global_lock, gtid ); 4042 } 4043 4044 kmp_user_lock_p 4045 __kmp_lookup_user_lock( void **user_lock, char const *func ) 4046 { 4047 kmp_user_lock_p lck = NULL; 4048 4049 if ( __kmp_env_consistency_check ) { 4050 if ( user_lock == NULL ) { 4051 KMP_FATAL( LockIsUninitialized, func ); 4052 } 4053 } 4054 4055 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 4056 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 4057 if ( __kmp_env_consistency_check ) { 4058 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 4059 KMP_FATAL( LockIsUninitialized, func ); 4060 } 4061 } 4062 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 4063 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 4064 lck = __kmp_user_lock_table.table[index]; 4065 } 4066 else { 4067 lck = *( (kmp_user_lock_p *)user_lock ); 4068 } 4069 4070 if ( __kmp_env_consistency_check ) { 4071 if ( lck == NULL ) { 4072 KMP_FATAL( LockIsUninitialized, func ); 4073 } 4074 } 4075 4076 return lck; 4077 } 4078 4079 void 4080 __kmp_cleanup_user_locks( void ) 4081 { 4082 // 4083 // Reset lock pool. Do not worry about lock in the pool -- we will free 4084 // them when iterating through lock table (it includes all the locks, 4085 // dead or alive). 4086 // 4087 __kmp_lock_pool = NULL; 4088 4089 #define IS_CRITICAL(lck) \ 4090 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 4091 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 4092 4093 // 4094 // Loop through lock table, free all locks. 4095 // 4096 // Do not free item [0], it is reserved for lock tables list. 4097 // 4098 // FIXME - we are iterating through a list of (pointers to) objects of 4099 // type union kmp_user_lock, but we have no way of knowing whether the 4100 // base type is currently "pool" or whatever the global user lock type 4101 // is. 4102 // 4103 // We are relying on the fact that for all of the user lock types 4104 // (except "tas"), the first field in the lock struct is the "initialized" 4105 // field, which is set to the address of the lock object itself when 4106 // the lock is initialized. When the union is of type "pool", the 4107 // first field is a pointer to the next object in the free list, which 4108 // will not be the same address as the object itself. 4109 // 4110 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4111 // will fail for "pool" objects on the free list. This must happen as 4112 // the "location" field of real user locks overlaps the "index" field 4113 // of "pool" objects. 4114 // 4115 // It would be better to run through the free list, and remove all "pool" 4116 // objects from the lock table before executing this loop. However, 4117 // "pool" objects do not always have their index field set (only on 4118 // lin_32e), and I don't want to search the lock table for the address 4119 // of every "pool" object on the free list. 4120 // 4121 while ( __kmp_user_lock_table.used > 1 ) { 4122 const ident *loc; 4123 4124 // 4125 // reduce __kmp_user_lock_table.used before freeing the lock, 4126 // so that state of locks is consistent 4127 // 4128 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4129 --__kmp_user_lock_table.used ]; 4130 4131 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4132 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4133 // 4134 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4135 // initialized AND it is NOT a critical section (user is not 4136 // responsible for destroying criticals) AND we know source 4137 // location to report. 4138 // 4139 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4140 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4141 ( loc->psource != NULL ) ) { 4142 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4143 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4144 __kmp_str_loc_free( &str_loc); 4145 } 4146 4147 #ifdef KMP_DEBUG 4148 if ( IS_CRITICAL( lck ) ) { 4149 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4150 } 4151 else { 4152 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4153 } 4154 #endif // KMP_DEBUG 4155 4156 // 4157 // Cleanup internal lock dynamic resources 4158 // (for drdpa locks particularly). 4159 // 4160 __kmp_destroy_user_lock( lck ); 4161 } 4162 4163 // 4164 // Free the lock if block allocation of locks is not used. 4165 // 4166 if ( __kmp_lock_blocks == NULL ) { 4167 __kmp_free( lck ); 4168 } 4169 } 4170 4171 #undef IS_CRITICAL 4172 4173 // 4174 // delete lock table(s). 4175 // 4176 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4177 __kmp_user_lock_table.table = NULL; 4178 __kmp_user_lock_table.allocated = 0; 4179 4180 while ( table_ptr != NULL ) { 4181 // 4182 // In the first element we saved the pointer to the previous 4183 // (smaller) lock table. 4184 // 4185 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4186 __kmp_free( table_ptr ); 4187 table_ptr = next; 4188 } 4189 4190 // 4191 // Free buffers allocated for blocks of locks. 4192 // 4193 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4194 __kmp_lock_blocks = NULL; 4195 4196 while ( block_ptr != NULL ) { 4197 kmp_block_of_locks_t *next = block_ptr->next_block; 4198 __kmp_free( block_ptr->locks ); 4199 // 4200 // *block_ptr itself was allocated at the end of the locks vector. 4201 // 4202 block_ptr = next; 4203 } 4204 4205 TCW_4(__kmp_init_user_locks, FALSE); 4206 } 4207 4208 #endif // KMP_USE_DYNAMIC_LOCK 4209