1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 18 #include "kmp.h" 19 #include "kmp_itt.h" 20 #include "kmp_i18n.h" 21 #include "kmp_lock.h" 22 #include "kmp_io.h" 23 24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 25 # include <unistd.h> 26 # include <sys/syscall.h> 27 // We should really include <futex.h>, but that causes compatibility problems on different 28 // Linux* OS distributions that either require that you include (or break when you try to include) 29 // <pci/types.h>. 30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 31 // we just define the constants here and don't include <futex.h> 32 # ifndef FUTEX_WAIT 33 # define FUTEX_WAIT 0 34 # endif 35 # ifndef FUTEX_WAKE 36 # define FUTEX_WAKE 1 37 # endif 38 #endif 39 40 /* Implement spin locks for internal library use. */ 41 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 42 43 void 44 __kmp_validate_locks( void ) 45 { 46 int i; 47 kmp_uint32 x, y; 48 49 /* Check to make sure unsigned arithmetic does wraps properly */ 50 x = ~((kmp_uint32) 0) - 2; 51 y = x - 2; 52 53 for (i = 0; i < 8; ++i, ++x, ++y) { 54 kmp_uint32 z = (x - y); 55 KMP_ASSERT( z == 2 ); 56 } 57 58 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 59 } 60 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // 66 // For the non-nested locks, we can only assume that the first 4 bytes were 67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 68 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 70 // 71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 73 // 74 75 static kmp_int32 76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 77 { 78 return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; 79 } 80 81 static inline bool 82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 83 { 84 return lck->lk.depth_locked != -1; 85 } 86 87 __forceinline static void 88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 89 { 90 KMP_MB(); 91 92 #ifdef USE_LOCK_PROFILE 93 kmp_uint32 curr = TCR_4( lck->lk.poll ); 94 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 95 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 96 /* else __kmp_printf( "." );*/ 97 #endif /* USE_LOCK_PROFILE */ 98 99 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 100 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 101 KMP_FSYNC_ACQUIRED(lck); 102 return; 103 } 104 105 kmp_uint32 spins; 106 KMP_FSYNC_PREPARE( lck ); 107 KMP_INIT_YIELD( spins ); 108 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 109 __kmp_xproc ) ) { 110 KMP_YIELD( TRUE ); 111 } 112 else { 113 KMP_YIELD_SPIN( spins ); 114 } 115 116 while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) || 117 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) { 118 // 119 // FIXME - use exponential backoff here 120 // 121 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 122 __kmp_xproc ) ) { 123 KMP_YIELD( TRUE ); 124 } 125 else { 126 KMP_YIELD_SPIN( spins ); 127 } 128 } 129 KMP_FSYNC_ACQUIRED( lck ); 130 } 131 132 void 133 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 134 { 135 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 136 } 137 138 static void 139 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 140 { 141 char const * const func = "omp_set_lock"; 142 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 143 && __kmp_is_tas_lock_nestable( lck ) ) { 144 KMP_FATAL( LockNestableUsedAsSimple, func ); 145 } 146 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 147 KMP_FATAL( LockIsAlreadyOwned, func ); 148 } 149 __kmp_acquire_tas_lock( lck, gtid ); 150 } 151 152 int 153 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 154 { 155 if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) 156 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { 157 KMP_FSYNC_ACQUIRED( lck ); 158 return TRUE; 159 } 160 return FALSE; 161 } 162 163 static int 164 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 165 { 166 char const * const func = "omp_test_lock"; 167 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 168 && __kmp_is_tas_lock_nestable( lck ) ) { 169 KMP_FATAL( LockNestableUsedAsSimple, func ); 170 } 171 return __kmp_test_tas_lock( lck, gtid ); 172 } 173 174 void 175 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 176 { 177 KMP_MB(); /* Flush all pending memory write invalidates. */ 178 179 KMP_FSYNC_RELEASING(lck); 180 KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) ); 181 KMP_MB(); /* Flush all pending memory write invalidates. */ 182 183 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 184 __kmp_xproc ) ); 185 } 186 187 static void 188 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 189 { 190 char const * const func = "omp_unset_lock"; 191 KMP_MB(); /* in case another processor initialized lock */ 192 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 193 && __kmp_is_tas_lock_nestable( lck ) ) { 194 KMP_FATAL( LockNestableUsedAsSimple, func ); 195 } 196 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 197 KMP_FATAL( LockUnsettingFree, func ); 198 } 199 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 200 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 201 KMP_FATAL( LockUnsettingSetByAnother, func ); 202 } 203 __kmp_release_tas_lock( lck, gtid ); 204 } 205 206 void 207 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 208 { 209 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) ); 210 } 211 212 static void 213 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 214 { 215 __kmp_init_tas_lock( lck ); 216 } 217 218 void 219 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 220 { 221 lck->lk.poll = 0; 222 } 223 224 static void 225 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 226 { 227 char const * const func = "omp_destroy_lock"; 228 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 229 && __kmp_is_tas_lock_nestable( lck ) ) { 230 KMP_FATAL( LockNestableUsedAsSimple, func ); 231 } 232 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 233 KMP_FATAL( LockStillOwned, func ); 234 } 235 __kmp_destroy_tas_lock( lck ); 236 } 237 238 239 // 240 // nested test and set locks 241 // 242 243 void 244 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 245 { 246 KMP_DEBUG_ASSERT( gtid >= 0 ); 247 248 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 249 lck->lk.depth_locked += 1; 250 } 251 else { 252 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 253 lck->lk.depth_locked = 1; 254 } 255 } 256 257 static void 258 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 259 { 260 char const * const func = "omp_set_nest_lock"; 261 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 262 KMP_FATAL( LockSimpleUsedAsNestable, func ); 263 } 264 __kmp_acquire_nested_tas_lock( lck, gtid ); 265 } 266 267 int 268 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 269 { 270 int retval; 271 272 KMP_DEBUG_ASSERT( gtid >= 0 ); 273 274 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 275 retval = ++lck->lk.depth_locked; 276 } 277 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 278 retval = 0; 279 } 280 else { 281 KMP_MB(); 282 retval = lck->lk.depth_locked = 1; 283 } 284 return retval; 285 } 286 287 static int 288 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 289 { 290 char const * const func = "omp_test_nest_lock"; 291 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 292 KMP_FATAL( LockSimpleUsedAsNestable, func ); 293 } 294 return __kmp_test_nested_tas_lock( lck, gtid ); 295 } 296 297 void 298 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 299 { 300 KMP_DEBUG_ASSERT( gtid >= 0 ); 301 302 KMP_MB(); 303 if ( --(lck->lk.depth_locked) == 0 ) { 304 __kmp_release_tas_lock( lck, gtid ); 305 } 306 } 307 308 static void 309 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 310 { 311 char const * const func = "omp_unset_nest_lock"; 312 KMP_MB(); /* in case another processor initialized lock */ 313 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 314 KMP_FATAL( LockSimpleUsedAsNestable, func ); 315 } 316 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 317 KMP_FATAL( LockUnsettingFree, func ); 318 } 319 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 320 KMP_FATAL( LockUnsettingSetByAnother, func ); 321 } 322 __kmp_release_nested_tas_lock( lck, gtid ); 323 } 324 325 void 326 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 327 { 328 __kmp_init_tas_lock( lck ); 329 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 330 } 331 332 static void 333 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 334 { 335 __kmp_init_nested_tas_lock( lck ); 336 } 337 338 void 339 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 340 { 341 __kmp_destroy_tas_lock( lck ); 342 lck->lk.depth_locked = 0; 343 } 344 345 static void 346 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 347 { 348 char const * const func = "omp_destroy_nest_lock"; 349 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 350 KMP_FATAL( LockSimpleUsedAsNestable, func ); 351 } 352 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 353 KMP_FATAL( LockStillOwned, func ); 354 } 355 __kmp_destroy_nested_tas_lock( lck ); 356 } 357 358 359 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 360 361 /* ------------------------------------------------------------------------ */ 362 /* futex locks */ 363 364 // futex locks are really just test and set locks, with a different method 365 // of handling contention. They take the same amount of space as test and 366 // set locks, and are allocated the same way (i.e. use the area allocated by 367 // the compiler for non-nested locks / allocate nested locks on the heap). 368 369 static kmp_int32 370 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 371 { 372 return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; 373 } 374 375 static inline bool 376 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 377 { 378 return lck->lk.depth_locked != -1; 379 } 380 381 __forceinline static void 382 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 383 { 384 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 385 386 KMP_MB(); 387 388 #ifdef USE_LOCK_PROFILE 389 kmp_uint32 curr = TCR_4( lck->lk.poll ); 390 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 391 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 392 /* else __kmp_printf( "." );*/ 393 #endif /* USE_LOCK_PROFILE */ 394 395 KMP_FSYNC_PREPARE( lck ); 396 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 397 lck, lck->lk.poll, gtid ) ); 398 399 kmp_int32 poll_val; 400 401 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), 402 DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) { 403 404 kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; 405 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 406 lck, gtid, poll_val, cond ) ); 407 408 // 409 // NOTE: if you try to use the following condition for this branch 410 // 411 // if ( poll_val & 1 == 0 ) 412 // 413 // Then the 12.0 compiler has a bug where the following block will 414 // always be skipped, regardless of the value of the LSB of poll_val. 415 // 416 if ( ! cond ) { 417 // 418 // Try to set the lsb in the poll to indicate to the owner 419 // thread that they need to wake this thread up. 420 // 421 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) { 422 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 423 lck, lck->lk.poll, gtid ) ); 424 continue; 425 } 426 poll_val |= DYNA_LOCK_BUSY(1, futex); 427 428 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 429 lck, lck->lk.poll, gtid ) ); 430 } 431 432 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 433 lck, gtid, poll_val ) ); 434 435 kmp_int32 rc; 436 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 437 poll_val, NULL, NULL, 0 ) ) != 0 ) { 438 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 439 lck, gtid, poll_val, rc, errno ) ); 440 continue; 441 } 442 443 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 444 lck, gtid, poll_val ) ); 445 // 446 // This thread has now done a successful futex wait call and was 447 // entered on the OS futex queue. We must now perform a futex 448 // wake call when releasing the lock, as we have no idea how many 449 // other threads are in the queue. 450 // 451 gtid_code |= 1; 452 } 453 454 KMP_FSYNC_ACQUIRED( lck ); 455 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 456 lck, lck->lk.poll, gtid ) ); 457 } 458 459 void 460 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 461 { 462 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 463 } 464 465 static void 466 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 467 { 468 char const * const func = "omp_set_lock"; 469 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 470 && __kmp_is_futex_lock_nestable( lck ) ) { 471 KMP_FATAL( LockNestableUsedAsSimple, func ); 472 } 473 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 474 KMP_FATAL( LockIsAlreadyOwned, func ); 475 } 476 __kmp_acquire_futex_lock( lck, gtid ); 477 } 478 479 int 480 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 481 { 482 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) { 483 KMP_FSYNC_ACQUIRED( lck ); 484 return TRUE; 485 } 486 return FALSE; 487 } 488 489 static int 490 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 491 { 492 char const * const func = "omp_test_lock"; 493 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 494 && __kmp_is_futex_lock_nestable( lck ) ) { 495 KMP_FATAL( LockNestableUsedAsSimple, func ); 496 } 497 return __kmp_test_futex_lock( lck, gtid ); 498 } 499 500 void 501 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 502 { 503 KMP_MB(); /* Flush all pending memory write invalidates. */ 504 505 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 506 lck, lck->lk.poll, gtid ) ); 507 508 KMP_FSYNC_RELEASING(lck); 509 510 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) ); 511 512 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 513 lck, gtid, poll_val ) ); 514 515 if ( DYNA_LOCK_STRIP(poll_val) & 1 ) { 516 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 517 lck, gtid ) ); 518 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 ); 519 } 520 521 KMP_MB(); /* Flush all pending memory write invalidates. */ 522 523 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 524 lck, lck->lk.poll, gtid ) ); 525 526 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 527 __kmp_xproc ) ); 528 } 529 530 static void 531 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 532 { 533 char const * const func = "omp_unset_lock"; 534 KMP_MB(); /* in case another processor initialized lock */ 535 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 536 && __kmp_is_futex_lock_nestable( lck ) ) { 537 KMP_FATAL( LockNestableUsedAsSimple, func ); 538 } 539 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 540 KMP_FATAL( LockUnsettingFree, func ); 541 } 542 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 543 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 544 KMP_FATAL( LockUnsettingSetByAnother, func ); 545 } 546 __kmp_release_futex_lock( lck, gtid ); 547 } 548 549 void 550 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 551 { 552 TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) ); 553 } 554 555 static void 556 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 557 { 558 __kmp_init_futex_lock( lck ); 559 } 560 561 void 562 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 563 { 564 lck->lk.poll = 0; 565 } 566 567 static void 568 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 569 { 570 char const * const func = "omp_destroy_lock"; 571 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 572 && __kmp_is_futex_lock_nestable( lck ) ) { 573 KMP_FATAL( LockNestableUsedAsSimple, func ); 574 } 575 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 576 KMP_FATAL( LockStillOwned, func ); 577 } 578 __kmp_destroy_futex_lock( lck ); 579 } 580 581 582 // 583 // nested futex locks 584 // 585 586 void 587 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 588 { 589 KMP_DEBUG_ASSERT( gtid >= 0 ); 590 591 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 592 lck->lk.depth_locked += 1; 593 } 594 else { 595 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 596 lck->lk.depth_locked = 1; 597 } 598 } 599 600 static void 601 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 602 { 603 char const * const func = "omp_set_nest_lock"; 604 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 605 KMP_FATAL( LockSimpleUsedAsNestable, func ); 606 } 607 __kmp_acquire_nested_futex_lock( lck, gtid ); 608 } 609 610 int 611 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 612 { 613 int retval; 614 615 KMP_DEBUG_ASSERT( gtid >= 0 ); 616 617 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 618 retval = ++lck->lk.depth_locked; 619 } 620 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 621 retval = 0; 622 } 623 else { 624 KMP_MB(); 625 retval = lck->lk.depth_locked = 1; 626 } 627 return retval; 628 } 629 630 static int 631 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 632 { 633 char const * const func = "omp_test_nest_lock"; 634 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 635 KMP_FATAL( LockSimpleUsedAsNestable, func ); 636 } 637 return __kmp_test_nested_futex_lock( lck, gtid ); 638 } 639 640 void 641 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 642 { 643 KMP_DEBUG_ASSERT( gtid >= 0 ); 644 645 KMP_MB(); 646 if ( --(lck->lk.depth_locked) == 0 ) { 647 __kmp_release_futex_lock( lck, gtid ); 648 } 649 } 650 651 static void 652 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 653 { 654 char const * const func = "omp_unset_nest_lock"; 655 KMP_MB(); /* in case another processor initialized lock */ 656 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 657 KMP_FATAL( LockSimpleUsedAsNestable, func ); 658 } 659 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 660 KMP_FATAL( LockUnsettingFree, func ); 661 } 662 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 663 KMP_FATAL( LockUnsettingSetByAnother, func ); 664 } 665 __kmp_release_nested_futex_lock( lck, gtid ); 666 } 667 668 void 669 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 670 { 671 __kmp_init_futex_lock( lck ); 672 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 673 } 674 675 static void 676 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 677 { 678 __kmp_init_nested_futex_lock( lck ); 679 } 680 681 void 682 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 683 { 684 __kmp_destroy_futex_lock( lck ); 685 lck->lk.depth_locked = 0; 686 } 687 688 static void 689 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 690 { 691 char const * const func = "omp_destroy_nest_lock"; 692 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 693 KMP_FATAL( LockSimpleUsedAsNestable, func ); 694 } 695 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 696 KMP_FATAL( LockStillOwned, func ); 697 } 698 __kmp_destroy_nested_futex_lock( lck ); 699 } 700 701 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 702 703 704 /* ------------------------------------------------------------------------ */ 705 /* ticket (bakery) locks */ 706 707 static kmp_int32 708 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 709 { 710 return TCR_4( lck->lk.owner_id ) - 1; 711 } 712 713 static inline bool 714 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 715 { 716 return lck->lk.depth_locked != -1; 717 } 718 719 static kmp_uint32 720 __kmp_bakery_check(kmp_uint value, kmp_uint checker) 721 { 722 register kmp_uint32 pause; 723 724 if (value == checker) { 725 return TRUE; 726 } 727 for (pause = checker - value; pause != 0; --pause); 728 return FALSE; 729 } 730 731 __forceinline static void 732 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 733 { 734 kmp_uint32 my_ticket; 735 KMP_MB(); 736 737 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 738 739 #ifdef USE_LOCK_PROFILE 740 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 741 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 742 /* else __kmp_printf( "." );*/ 743 #endif /* USE_LOCK_PROFILE */ 744 745 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 746 KMP_FSYNC_ACQUIRED(lck); 747 return; 748 } 749 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 750 KMP_FSYNC_ACQUIRED(lck); 751 } 752 753 void 754 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 755 { 756 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 757 } 758 759 static void 760 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 761 { 762 char const * const func = "omp_set_lock"; 763 if ( lck->lk.initialized != lck ) { 764 KMP_FATAL( LockIsUninitialized, func ); 765 } 766 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 767 KMP_FATAL( LockNestableUsedAsSimple, func ); 768 } 769 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 770 KMP_FATAL( LockIsAlreadyOwned, func ); 771 } 772 773 __kmp_acquire_ticket_lock( lck, gtid ); 774 775 lck->lk.owner_id = gtid + 1; 776 } 777 778 int 779 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 780 { 781 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 782 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 783 kmp_uint32 next_ticket = my_ticket + 1; 784 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 785 my_ticket, next_ticket ) ) { 786 KMP_FSYNC_ACQUIRED( lck ); 787 return TRUE; 788 } 789 } 790 return FALSE; 791 } 792 793 static int 794 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 795 { 796 char const * const func = "omp_test_lock"; 797 if ( lck->lk.initialized != lck ) { 798 KMP_FATAL( LockIsUninitialized, func ); 799 } 800 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 801 KMP_FATAL( LockNestableUsedAsSimple, func ); 802 } 803 804 int retval = __kmp_test_ticket_lock( lck, gtid ); 805 806 if ( retval ) { 807 lck->lk.owner_id = gtid + 1; 808 } 809 return retval; 810 } 811 812 void 813 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 814 { 815 kmp_uint32 distance; 816 817 KMP_MB(); /* Flush all pending memory write invalidates. */ 818 819 KMP_FSYNC_RELEASING(lck); 820 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 821 822 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 823 824 KMP_MB(); /* Flush all pending memory write invalidates. */ 825 826 KMP_YIELD( distance 827 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 828 } 829 830 static void 831 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 832 { 833 char const * const func = "omp_unset_lock"; 834 KMP_MB(); /* in case another processor initialized lock */ 835 if ( lck->lk.initialized != lck ) { 836 KMP_FATAL( LockIsUninitialized, func ); 837 } 838 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 839 KMP_FATAL( LockNestableUsedAsSimple, func ); 840 } 841 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 842 KMP_FATAL( LockUnsettingFree, func ); 843 } 844 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 845 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 846 KMP_FATAL( LockUnsettingSetByAnother, func ); 847 } 848 lck->lk.owner_id = 0; 849 __kmp_release_ticket_lock( lck, gtid ); 850 } 851 852 void 853 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 854 { 855 lck->lk.location = NULL; 856 TCW_4( lck->lk.next_ticket, 0 ); 857 TCW_4( lck->lk.now_serving, 0 ); 858 lck->lk.owner_id = 0; // no thread owns the lock. 859 lck->lk.depth_locked = -1; // -1 => not a nested lock. 860 lck->lk.initialized = (kmp_ticket_lock *)lck; 861 } 862 863 static void 864 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 865 { 866 __kmp_init_ticket_lock( lck ); 867 } 868 869 void 870 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 871 { 872 lck->lk.initialized = NULL; 873 lck->lk.location = NULL; 874 lck->lk.next_ticket = 0; 875 lck->lk.now_serving = 0; 876 lck->lk.owner_id = 0; 877 lck->lk.depth_locked = -1; 878 } 879 880 static void 881 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 882 { 883 char const * const func = "omp_destroy_lock"; 884 if ( lck->lk.initialized != lck ) { 885 KMP_FATAL( LockIsUninitialized, func ); 886 } 887 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 888 KMP_FATAL( LockNestableUsedAsSimple, func ); 889 } 890 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 891 KMP_FATAL( LockStillOwned, func ); 892 } 893 __kmp_destroy_ticket_lock( lck ); 894 } 895 896 897 // 898 // nested ticket locks 899 // 900 901 void 902 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 903 { 904 KMP_DEBUG_ASSERT( gtid >= 0 ); 905 906 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 907 lck->lk.depth_locked += 1; 908 } 909 else { 910 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 911 KMP_MB(); 912 lck->lk.depth_locked = 1; 913 KMP_MB(); 914 lck->lk.owner_id = gtid + 1; 915 } 916 } 917 918 static void 919 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 920 { 921 char const * const func = "omp_set_nest_lock"; 922 if ( lck->lk.initialized != lck ) { 923 KMP_FATAL( LockIsUninitialized, func ); 924 } 925 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 926 KMP_FATAL( LockSimpleUsedAsNestable, func ); 927 } 928 __kmp_acquire_nested_ticket_lock( lck, gtid ); 929 } 930 931 int 932 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 933 { 934 int retval; 935 936 KMP_DEBUG_ASSERT( gtid >= 0 ); 937 938 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 939 retval = ++lck->lk.depth_locked; 940 } 941 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 942 retval = 0; 943 } 944 else { 945 KMP_MB(); 946 retval = lck->lk.depth_locked = 1; 947 KMP_MB(); 948 lck->lk.owner_id = gtid + 1; 949 } 950 return retval; 951 } 952 953 static int 954 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 955 kmp_int32 gtid ) 956 { 957 char const * const func = "omp_test_nest_lock"; 958 if ( lck->lk.initialized != lck ) { 959 KMP_FATAL( LockIsUninitialized, func ); 960 } 961 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 962 KMP_FATAL( LockSimpleUsedAsNestable, func ); 963 } 964 return __kmp_test_nested_ticket_lock( lck, gtid ); 965 } 966 967 void 968 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 969 { 970 KMP_DEBUG_ASSERT( gtid >= 0 ); 971 972 KMP_MB(); 973 if ( --(lck->lk.depth_locked) == 0 ) { 974 KMP_MB(); 975 lck->lk.owner_id = 0; 976 __kmp_release_ticket_lock( lck, gtid ); 977 } 978 } 979 980 static void 981 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 982 { 983 char const * const func = "omp_unset_nest_lock"; 984 KMP_MB(); /* in case another processor initialized lock */ 985 if ( lck->lk.initialized != lck ) { 986 KMP_FATAL( LockIsUninitialized, func ); 987 } 988 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 989 KMP_FATAL( LockSimpleUsedAsNestable, func ); 990 } 991 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 992 KMP_FATAL( LockUnsettingFree, func ); 993 } 994 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 995 KMP_FATAL( LockUnsettingSetByAnother, func ); 996 } 997 __kmp_release_nested_ticket_lock( lck, gtid ); 998 } 999 1000 void 1001 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1002 { 1003 __kmp_init_ticket_lock( lck ); 1004 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1005 } 1006 1007 static void 1008 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1009 { 1010 __kmp_init_nested_ticket_lock( lck ); 1011 } 1012 1013 void 1014 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1015 { 1016 __kmp_destroy_ticket_lock( lck ); 1017 lck->lk.depth_locked = 0; 1018 } 1019 1020 static void 1021 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1022 { 1023 char const * const func = "omp_destroy_nest_lock"; 1024 if ( lck->lk.initialized != lck ) { 1025 KMP_FATAL( LockIsUninitialized, func ); 1026 } 1027 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1028 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1029 } 1030 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1031 KMP_FATAL( LockStillOwned, func ); 1032 } 1033 __kmp_destroy_nested_ticket_lock( lck ); 1034 } 1035 1036 1037 // 1038 // access functions to fields which don't exist for all lock kinds. 1039 // 1040 1041 static int 1042 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1043 { 1044 return lck == lck->lk.initialized; 1045 } 1046 1047 static const ident_t * 1048 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1049 { 1050 return lck->lk.location; 1051 } 1052 1053 static void 1054 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1055 { 1056 lck->lk.location = loc; 1057 } 1058 1059 static kmp_lock_flags_t 1060 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1061 { 1062 return lck->lk.flags; 1063 } 1064 1065 static void 1066 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1067 { 1068 lck->lk.flags = flags; 1069 } 1070 1071 /* ------------------------------------------------------------------------ */ 1072 /* queuing locks */ 1073 1074 /* 1075 * First the states 1076 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1077 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1078 * h, h means lock is held or about to transition, 1 element on queue 1079 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1080 * 1081 * Now the transitions 1082 * Acquire(0,0) = -1 ,0 1083 * Release(0,0) = Error 1084 * Acquire(-1,0) = h ,h h > 0 1085 * Release(-1,0) = 0 ,0 1086 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1087 * Release(h,h) = -1 ,0 h > 0 1088 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1089 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1090 * 1091 * And pictorially 1092 * 1093 * 1094 * +-----+ 1095 * | 0, 0|------- release -------> Error 1096 * +-----+ 1097 * | ^ 1098 * acquire| |release 1099 * | | 1100 * | | 1101 * v | 1102 * +-----+ 1103 * |-1, 0| 1104 * +-----+ 1105 * | ^ 1106 * acquire| |release 1107 * | | 1108 * | | 1109 * v | 1110 * +-----+ 1111 * | h, h| 1112 * +-----+ 1113 * | ^ 1114 * acquire| |release 1115 * | | 1116 * | | 1117 * v | 1118 * +-----+ 1119 * | h, t|----- acquire, release loopback ---+ 1120 * +-----+ | 1121 * ^ | 1122 * | | 1123 * +------------------------------------+ 1124 * 1125 */ 1126 1127 #ifdef DEBUG_QUEUING_LOCKS 1128 1129 /* Stuff for circular trace buffer */ 1130 #define TRACE_BUF_ELE 1024 1131 static char traces[TRACE_BUF_ELE][128] = { 0 } 1132 static int tc = 0; 1133 #define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y ); 1134 #define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z ); 1135 #define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q ); 1136 1137 static void 1138 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1139 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1140 { 1141 kmp_int32 t, i; 1142 1143 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1144 1145 i = tc % TRACE_BUF_ELE; 1146 __kmp_printf_no_lock( "%s\n", traces[i] ); 1147 i = (i+1) % TRACE_BUF_ELE; 1148 while ( i != (tc % TRACE_BUF_ELE) ) { 1149 __kmp_printf_no_lock( "%s", traces[i] ); 1150 i = (i+1) % TRACE_BUF_ELE; 1151 } 1152 __kmp_printf_no_lock( "\n" ); 1153 1154 __kmp_printf_no_lock( 1155 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1156 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1157 head_id, tail_id ); 1158 1159 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1160 1161 if ( lck->lk.head_id >= 1 ) { 1162 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1163 while (t > 0) { 1164 __kmp_printf_no_lock( "-> %d ", t ); 1165 t = __kmp_threads[t-1]->th.th_next_waiting; 1166 } 1167 } 1168 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1169 __kmp_printf_no_lock( "\n\n" ); 1170 } 1171 1172 #endif /* DEBUG_QUEUING_LOCKS */ 1173 1174 static kmp_int32 1175 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1176 { 1177 return TCR_4( lck->lk.owner_id ) - 1; 1178 } 1179 1180 static inline bool 1181 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1182 { 1183 return lck->lk.depth_locked != -1; 1184 } 1185 1186 /* Acquire a lock using a the queuing lock implementation */ 1187 template <bool takeTime> 1188 /* [TLW] The unused template above is left behind because of what BEB believes is a 1189 potential compiler problem with __forceinline. */ 1190 __forceinline static void 1191 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1192 kmp_int32 gtid ) 1193 { 1194 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1195 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1196 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1197 volatile kmp_uint32 *spin_here_p; 1198 kmp_int32 need_mf = 1; 1199 1200 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1201 1202 KMP_FSYNC_PREPARE( lck ); 1203 KMP_DEBUG_ASSERT( this_thr != NULL ); 1204 spin_here_p = & this_thr->th.th_spin_here; 1205 1206 #ifdef DEBUG_QUEUING_LOCKS 1207 TRACE_LOCK( gtid+1, "acq ent" ); 1208 if ( *spin_here_p ) 1209 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1210 if ( this_thr->th.th_next_waiting != 0 ) 1211 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1212 #endif 1213 KMP_DEBUG_ASSERT( !*spin_here_p ); 1214 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1215 1216 1217 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1218 that may follow, not just in execution order, but also in visibility order. This way, 1219 when a releasing thread observes the changes to the queue by this thread, it can 1220 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1221 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1222 to FALSE before this thread sets it to TRUE, this thread will hang. 1223 */ 1224 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1225 1226 while( 1 ) { 1227 kmp_int32 enqueued; 1228 kmp_int32 head; 1229 kmp_int32 tail; 1230 1231 head = *head_id_p; 1232 1233 switch ( head ) { 1234 1235 case -1: 1236 { 1237 #ifdef DEBUG_QUEUING_LOCKS 1238 tail = *tail_id_p; 1239 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1240 #endif 1241 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1242 this assignment prevents us from entering the if ( t > 0 ) 1243 condition in the enqueued case below, which is not necessary for 1244 this state transition */ 1245 1246 need_mf = 0; 1247 /* try (-1,0)->(tid,tid) */ 1248 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1249 KMP_PACK_64( -1, 0 ), 1250 KMP_PACK_64( gtid+1, gtid+1 ) ); 1251 #ifdef DEBUG_QUEUING_LOCKS 1252 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1253 #endif 1254 } 1255 break; 1256 1257 default: 1258 { 1259 tail = *tail_id_p; 1260 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1261 1262 #ifdef DEBUG_QUEUING_LOCKS 1263 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1264 #endif 1265 1266 if ( tail == 0 ) { 1267 enqueued = FALSE; 1268 } 1269 else { 1270 need_mf = 0; 1271 /* try (h,t) or (h,h)->(h,tid) */ 1272 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1273 1274 #ifdef DEBUG_QUEUING_LOCKS 1275 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1276 #endif 1277 } 1278 } 1279 break; 1280 1281 case 0: /* empty queue */ 1282 { 1283 kmp_int32 grabbed_lock; 1284 1285 #ifdef DEBUG_QUEUING_LOCKS 1286 tail = *tail_id_p; 1287 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1288 #endif 1289 /* try (0,0)->(-1,0) */ 1290 1291 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1292 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1293 1294 if ( grabbed_lock ) { 1295 1296 *spin_here_p = FALSE; 1297 1298 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1299 lck, gtid )); 1300 #ifdef DEBUG_QUEUING_LOCKS 1301 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1302 #endif 1303 KMP_FSYNC_ACQUIRED( lck ); 1304 return; /* lock holder cannot be on queue */ 1305 } 1306 enqueued = FALSE; 1307 } 1308 break; 1309 } 1310 1311 if ( enqueued ) { 1312 if ( tail > 0 ) { 1313 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1314 KMP_ASSERT( tail_thr != NULL ); 1315 tail_thr->th.th_next_waiting = gtid+1; 1316 /* corresponding wait for this write in release code */ 1317 } 1318 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1319 1320 1321 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1322 * throughput only here. 1323 */ 1324 KMP_MB(); 1325 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1326 1327 #ifdef DEBUG_QUEUING_LOCKS 1328 TRACE_LOCK( gtid+1, "acq spin" ); 1329 1330 if ( this_thr->th.th_next_waiting != 0 ) 1331 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1332 #endif 1333 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1334 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1335 lck, gtid )); 1336 1337 #ifdef DEBUG_QUEUING_LOCKS 1338 TRACE_LOCK( gtid+1, "acq exit 2" ); 1339 #endif 1340 /* got lock, we were dequeued by the thread that released lock */ 1341 return; 1342 } 1343 1344 /* Yield if number of threads > number of logical processors */ 1345 /* ToDo: Not sure why this should only be in oversubscription case, 1346 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1347 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1348 __kmp_xproc ) ); 1349 #ifdef DEBUG_QUEUING_LOCKS 1350 TRACE_LOCK( gtid+1, "acq retry" ); 1351 #endif 1352 1353 } 1354 KMP_ASSERT2( 0, "should not get here" ); 1355 } 1356 1357 void 1358 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1359 { 1360 KMP_DEBUG_ASSERT( gtid >= 0 ); 1361 1362 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1363 } 1364 1365 static void 1366 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1367 kmp_int32 gtid ) 1368 { 1369 char const * const func = "omp_set_lock"; 1370 if ( lck->lk.initialized != lck ) { 1371 KMP_FATAL( LockIsUninitialized, func ); 1372 } 1373 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1374 KMP_FATAL( LockNestableUsedAsSimple, func ); 1375 } 1376 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1377 KMP_FATAL( LockIsAlreadyOwned, func ); 1378 } 1379 1380 __kmp_acquire_queuing_lock( lck, gtid ); 1381 1382 lck->lk.owner_id = gtid + 1; 1383 } 1384 1385 int 1386 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1387 { 1388 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1389 kmp_int32 head; 1390 #ifdef KMP_DEBUG 1391 kmp_info_t *this_thr; 1392 #endif 1393 1394 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1395 KMP_DEBUG_ASSERT( gtid >= 0 ); 1396 #ifdef KMP_DEBUG 1397 this_thr = __kmp_thread_from_gtid( gtid ); 1398 KMP_DEBUG_ASSERT( this_thr != NULL ); 1399 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1400 #endif 1401 1402 head = *head_id_p; 1403 1404 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1405 1406 /* try (0,0)->(-1,0) */ 1407 1408 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1409 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1410 KMP_FSYNC_ACQUIRED(lck); 1411 return TRUE; 1412 } 1413 } 1414 1415 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1416 return FALSE; 1417 } 1418 1419 static int 1420 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1421 { 1422 char const * const func = "omp_test_lock"; 1423 if ( lck->lk.initialized != lck ) { 1424 KMP_FATAL( LockIsUninitialized, func ); 1425 } 1426 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1427 KMP_FATAL( LockNestableUsedAsSimple, func ); 1428 } 1429 1430 int retval = __kmp_test_queuing_lock( lck, gtid ); 1431 1432 if ( retval ) { 1433 lck->lk.owner_id = gtid + 1; 1434 } 1435 return retval; 1436 } 1437 1438 void 1439 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1440 { 1441 register kmp_info_t *this_thr; 1442 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1443 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1444 1445 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1446 KMP_DEBUG_ASSERT( gtid >= 0 ); 1447 this_thr = __kmp_thread_from_gtid( gtid ); 1448 KMP_DEBUG_ASSERT( this_thr != NULL ); 1449 #ifdef DEBUG_QUEUING_LOCKS 1450 TRACE_LOCK( gtid+1, "rel ent" ); 1451 1452 if ( this_thr->th.th_spin_here ) 1453 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1454 if ( this_thr->th.th_next_waiting != 0 ) 1455 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1456 #endif 1457 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1458 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1459 1460 KMP_FSYNC_RELEASING(lck); 1461 1462 while( 1 ) { 1463 kmp_int32 dequeued; 1464 kmp_int32 head; 1465 kmp_int32 tail; 1466 1467 head = *head_id_p; 1468 1469 #ifdef DEBUG_QUEUING_LOCKS 1470 tail = *tail_id_p; 1471 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1472 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1473 #endif 1474 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1475 1476 if ( head == -1 ) { /* nobody on queue */ 1477 1478 /* try (-1,0)->(0,0) */ 1479 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1480 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1481 lck, gtid )); 1482 #ifdef DEBUG_QUEUING_LOCKS 1483 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1484 #endif 1485 return; 1486 } 1487 dequeued = FALSE; 1488 1489 } 1490 else { 1491 1492 tail = *tail_id_p; 1493 if ( head == tail ) { /* only one thread on the queue */ 1494 1495 #ifdef DEBUG_QUEUING_LOCKS 1496 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1497 #endif 1498 KMP_DEBUG_ASSERT( head > 0 ); 1499 1500 /* try (h,h)->(-1,0) */ 1501 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1502 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1503 #ifdef DEBUG_QUEUING_LOCKS 1504 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1505 #endif 1506 1507 } 1508 else { 1509 volatile kmp_int32 *waiting_id_p; 1510 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1511 KMP_DEBUG_ASSERT( head_thr != NULL ); 1512 waiting_id_p = & head_thr->th.th_next_waiting; 1513 1514 /* Does this require synchronous reads? */ 1515 #ifdef DEBUG_QUEUING_LOCKS 1516 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1517 #endif 1518 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1519 1520 /* try (h,t)->(h',t) or (t,t) */ 1521 1522 KMP_MB(); 1523 /* make sure enqueuing thread has time to update next waiting thread field */ 1524 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 1525 #ifdef DEBUG_QUEUING_LOCKS 1526 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1527 #endif 1528 dequeued = TRUE; 1529 } 1530 } 1531 1532 if ( dequeued ) { 1533 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1534 KMP_DEBUG_ASSERT( head_thr != NULL ); 1535 1536 /* Does this require synchronous reads? */ 1537 #ifdef DEBUG_QUEUING_LOCKS 1538 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1539 #endif 1540 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1541 1542 /* For clean code only. 1543 * Thread not released until next statement prevents race with acquire code. 1544 */ 1545 head_thr->th.th_next_waiting = 0; 1546 #ifdef DEBUG_QUEUING_LOCKS 1547 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1548 #endif 1549 1550 KMP_MB(); 1551 /* reset spin value */ 1552 head_thr->th.th_spin_here = FALSE; 1553 1554 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1555 lck, gtid )); 1556 #ifdef DEBUG_QUEUING_LOCKS 1557 TRACE_LOCK( gtid+1, "rel exit 2" ); 1558 #endif 1559 return; 1560 } 1561 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1562 1563 #ifdef DEBUG_QUEUING_LOCKS 1564 TRACE_LOCK( gtid+1, "rel retry" ); 1565 #endif 1566 1567 } /* while */ 1568 KMP_ASSERT2( 0, "should not get here" ); 1569 } 1570 1571 static void 1572 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1573 kmp_int32 gtid ) 1574 { 1575 char const * const func = "omp_unset_lock"; 1576 KMP_MB(); /* in case another processor initialized lock */ 1577 if ( lck->lk.initialized != lck ) { 1578 KMP_FATAL( LockIsUninitialized, func ); 1579 } 1580 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1581 KMP_FATAL( LockNestableUsedAsSimple, func ); 1582 } 1583 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1584 KMP_FATAL( LockUnsettingFree, func ); 1585 } 1586 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1587 KMP_FATAL( LockUnsettingSetByAnother, func ); 1588 } 1589 lck->lk.owner_id = 0; 1590 __kmp_release_queuing_lock( lck, gtid ); 1591 } 1592 1593 void 1594 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1595 { 1596 lck->lk.location = NULL; 1597 lck->lk.head_id = 0; 1598 lck->lk.tail_id = 0; 1599 lck->lk.next_ticket = 0; 1600 lck->lk.now_serving = 0; 1601 lck->lk.owner_id = 0; // no thread owns the lock. 1602 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1603 lck->lk.initialized = lck; 1604 1605 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1606 } 1607 1608 static void 1609 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1610 { 1611 __kmp_init_queuing_lock( lck ); 1612 } 1613 1614 void 1615 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1616 { 1617 lck->lk.initialized = NULL; 1618 lck->lk.location = NULL; 1619 lck->lk.head_id = 0; 1620 lck->lk.tail_id = 0; 1621 lck->lk.next_ticket = 0; 1622 lck->lk.now_serving = 0; 1623 lck->lk.owner_id = 0; 1624 lck->lk.depth_locked = -1; 1625 } 1626 1627 static void 1628 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1629 { 1630 char const * const func = "omp_destroy_lock"; 1631 if ( lck->lk.initialized != lck ) { 1632 KMP_FATAL( LockIsUninitialized, func ); 1633 } 1634 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1635 KMP_FATAL( LockNestableUsedAsSimple, func ); 1636 } 1637 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1638 KMP_FATAL( LockStillOwned, func ); 1639 } 1640 __kmp_destroy_queuing_lock( lck ); 1641 } 1642 1643 1644 // 1645 // nested queuing locks 1646 // 1647 1648 void 1649 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1650 { 1651 KMP_DEBUG_ASSERT( gtid >= 0 ); 1652 1653 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1654 lck->lk.depth_locked += 1; 1655 } 1656 else { 1657 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1658 KMP_MB(); 1659 lck->lk.depth_locked = 1; 1660 KMP_MB(); 1661 lck->lk.owner_id = gtid + 1; 1662 } 1663 } 1664 1665 static void 1666 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1667 { 1668 char const * const func = "omp_set_nest_lock"; 1669 if ( lck->lk.initialized != lck ) { 1670 KMP_FATAL( LockIsUninitialized, func ); 1671 } 1672 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1673 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1674 } 1675 __kmp_acquire_nested_queuing_lock( lck, gtid ); 1676 } 1677 1678 int 1679 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1680 { 1681 int retval; 1682 1683 KMP_DEBUG_ASSERT( gtid >= 0 ); 1684 1685 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1686 retval = ++lck->lk.depth_locked; 1687 } 1688 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1689 retval = 0; 1690 } 1691 else { 1692 KMP_MB(); 1693 retval = lck->lk.depth_locked = 1; 1694 KMP_MB(); 1695 lck->lk.owner_id = gtid + 1; 1696 } 1697 return retval; 1698 } 1699 1700 static int 1701 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1702 kmp_int32 gtid ) 1703 { 1704 char const * const func = "omp_test_nest_lock"; 1705 if ( lck->lk.initialized != lck ) { 1706 KMP_FATAL( LockIsUninitialized, func ); 1707 } 1708 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1709 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1710 } 1711 return __kmp_test_nested_queuing_lock( lck, gtid ); 1712 } 1713 1714 void 1715 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1716 { 1717 KMP_DEBUG_ASSERT( gtid >= 0 ); 1718 1719 KMP_MB(); 1720 if ( --(lck->lk.depth_locked) == 0 ) { 1721 KMP_MB(); 1722 lck->lk.owner_id = 0; 1723 __kmp_release_queuing_lock( lck, gtid ); 1724 } 1725 } 1726 1727 static void 1728 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1729 { 1730 char const * const func = "omp_unset_nest_lock"; 1731 KMP_MB(); /* in case another processor initialized lock */ 1732 if ( lck->lk.initialized != lck ) { 1733 KMP_FATAL( LockIsUninitialized, func ); 1734 } 1735 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1736 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1737 } 1738 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1739 KMP_FATAL( LockUnsettingFree, func ); 1740 } 1741 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1742 KMP_FATAL( LockUnsettingSetByAnother, func ); 1743 } 1744 __kmp_release_nested_queuing_lock( lck, gtid ); 1745 } 1746 1747 void 1748 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1749 { 1750 __kmp_init_queuing_lock( lck ); 1751 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1752 } 1753 1754 static void 1755 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1756 { 1757 __kmp_init_nested_queuing_lock( lck ); 1758 } 1759 1760 void 1761 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1762 { 1763 __kmp_destroy_queuing_lock( lck ); 1764 lck->lk.depth_locked = 0; 1765 } 1766 1767 static void 1768 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1769 { 1770 char const * const func = "omp_destroy_nest_lock"; 1771 if ( lck->lk.initialized != lck ) { 1772 KMP_FATAL( LockIsUninitialized, func ); 1773 } 1774 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1775 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1776 } 1777 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1778 KMP_FATAL( LockStillOwned, func ); 1779 } 1780 __kmp_destroy_nested_queuing_lock( lck ); 1781 } 1782 1783 1784 // 1785 // access functions to fields which don't exist for all lock kinds. 1786 // 1787 1788 static int 1789 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1790 { 1791 return lck == lck->lk.initialized; 1792 } 1793 1794 static const ident_t * 1795 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1796 { 1797 return lck->lk.location; 1798 } 1799 1800 static void 1801 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1802 { 1803 lck->lk.location = loc; 1804 } 1805 1806 static kmp_lock_flags_t 1807 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1808 { 1809 return lck->lk.flags; 1810 } 1811 1812 static void 1813 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1814 { 1815 lck->lk.flags = flags; 1816 } 1817 1818 #if KMP_USE_ADAPTIVE_LOCKS 1819 1820 /* 1821 RTM Adaptive locks 1822 */ 1823 1824 // TODO: Use the header for intrinsics below with the compiler 13.0 1825 //#include <immintrin.h> 1826 1827 // Values from the status register after failed speculation. 1828 #define _XBEGIN_STARTED (~0u) 1829 #define _XABORT_EXPLICIT (1 << 0) 1830 #define _XABORT_RETRY (1 << 1) 1831 #define _XABORT_CONFLICT (1 << 2) 1832 #define _XABORT_CAPACITY (1 << 3) 1833 #define _XABORT_DEBUG (1 << 4) 1834 #define _XABORT_NESTED (1 << 5) 1835 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1836 1837 // Aborts for which it's worth trying again immediately 1838 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1839 1840 #define STRINGIZE_INTERNAL(arg) #arg 1841 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1842 1843 // Access to RTM instructions 1844 1845 /* 1846 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1847 This is the same definition as the compiler intrinsic that will be supported at some point. 1848 */ 1849 static __inline int _xbegin() 1850 { 1851 int res = -1; 1852 1853 #if KMP_OS_WINDOWS 1854 #if KMP_ARCH_X86_64 1855 _asm { 1856 _emit 0xC7 1857 _emit 0xF8 1858 _emit 2 1859 _emit 0 1860 _emit 0 1861 _emit 0 1862 jmp L2 1863 mov res, eax 1864 L2: 1865 } 1866 #else /* IA32 */ 1867 _asm { 1868 _emit 0xC7 1869 _emit 0xF8 1870 _emit 2 1871 _emit 0 1872 _emit 0 1873 _emit 0 1874 jmp L2 1875 mov res, eax 1876 L2: 1877 } 1878 #endif // KMP_ARCH_X86_64 1879 #else 1880 /* Note that %eax must be noted as killed (clobbered), because 1881 * the XSR is returned in %eax(%rax) on abort. Other register 1882 * values are restored, so don't need to be killed. 1883 * 1884 * We must also mark 'res' as an input and an output, since otherwise 1885 * 'res=-1' may be dropped as being dead, whereas we do need the 1886 * assignment on the successful (i.e., non-abort) path. 1887 */ 1888 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1889 " .long 1f-1b-6\n" 1890 " jmp 2f\n" 1891 "1: movl %%eax,%0\n" 1892 "2:" 1893 :"+r"(res)::"memory","%eax"); 1894 #endif // KMP_OS_WINDOWS 1895 return res; 1896 } 1897 1898 /* 1899 Transaction end 1900 */ 1901 static __inline void _xend() 1902 { 1903 #if KMP_OS_WINDOWS 1904 __asm { 1905 _emit 0x0f 1906 _emit 0x01 1907 _emit 0xd5 1908 } 1909 #else 1910 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1911 #endif 1912 } 1913 1914 /* 1915 This is a macro, the argument must be a single byte constant which 1916 can be evaluated by the inline assembler, since it is emitted as a 1917 byte into the assembly code. 1918 */ 1919 #if KMP_OS_WINDOWS 1920 #define _xabort(ARG) \ 1921 _asm _emit 0xc6 \ 1922 _asm _emit 0xf8 \ 1923 _asm _emit ARG 1924 #else 1925 #define _xabort(ARG) \ 1926 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 1927 #endif 1928 1929 // 1930 // Statistics is collected for testing purpose 1931 // 1932 #if KMP_DEBUG_ADAPTIVE_LOCKS 1933 1934 // We accumulate speculative lock statistics when the lock is destroyed. 1935 // We keep locks that haven't been destroyed in the liveLocks list 1936 // so that we can grab their statistics too. 1937 static kmp_adaptive_lock_statistics_t destroyedStats; 1938 1939 // To hold the list of live locks. 1940 static kmp_adaptive_lock_info_t liveLocks; 1941 1942 // A lock so we can safely update the list of locks. 1943 static kmp_bootstrap_lock_t chain_lock; 1944 1945 // Initialize the list of stats. 1946 void 1947 __kmp_init_speculative_stats() 1948 { 1949 kmp_adaptive_lock_info_t *lck = &liveLocks; 1950 1951 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 1952 lck->stats.next = lck; 1953 lck->stats.prev = lck; 1954 1955 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 1956 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 1957 1958 __kmp_init_bootstrap_lock( &chain_lock ); 1959 1960 } 1961 1962 // Insert the lock into the circular list 1963 static void 1964 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 1965 { 1966 __kmp_acquire_bootstrap_lock( &chain_lock ); 1967 1968 lck->stats.next = liveLocks.stats.next; 1969 lck->stats.prev = &liveLocks; 1970 1971 liveLocks.stats.next = lck; 1972 lck->stats.next->stats.prev = lck; 1973 1974 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 1975 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 1976 1977 __kmp_release_bootstrap_lock( &chain_lock ); 1978 } 1979 1980 static void 1981 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 1982 { 1983 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 1984 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 1985 1986 kmp_adaptive_lock_info_t * n = lck->stats.next; 1987 kmp_adaptive_lock_info_t * p = lck->stats.prev; 1988 1989 n->stats.prev = p; 1990 p->stats.next = n; 1991 } 1992 1993 static void 1994 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 1995 { 1996 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 1997 __kmp_remember_lock( lck ); 1998 } 1999 2000 static void 2001 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2002 { 2003 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2004 2005 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2006 t->successfulSpeculations += s->successfulSpeculations; 2007 t->hardFailedSpeculations += s->hardFailedSpeculations; 2008 t->softFailedSpeculations += s->softFailedSpeculations; 2009 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2010 t->lemmingYields += s->lemmingYields; 2011 } 2012 2013 static void 2014 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2015 { 2016 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2017 2018 __kmp_acquire_bootstrap_lock( &chain_lock ); 2019 2020 __kmp_add_stats( &destroyedStats, lck ); 2021 __kmp_forget_lock( lck ); 2022 2023 __kmp_release_bootstrap_lock( &chain_lock ); 2024 } 2025 2026 static float 2027 percent (kmp_uint32 count, kmp_uint32 total) 2028 { 2029 return (total == 0) ? 0.0: (100.0 * count)/total; 2030 } 2031 2032 static 2033 FILE * __kmp_open_stats_file() 2034 { 2035 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2036 return stdout; 2037 2038 size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20; 2039 char buffer[buffLen]; 2040 snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, 2041 (kmp_int32)getpid()); 2042 FILE * result = fopen(&buffer[0], "w"); 2043 2044 // Maybe we should issue a warning here... 2045 return result ? result : stdout; 2046 } 2047 2048 void 2049 __kmp_print_speculative_stats() 2050 { 2051 if (__kmp_user_lock_kind != lk_adaptive) 2052 return; 2053 2054 FILE * statsFile = __kmp_open_stats_file(); 2055 2056 kmp_adaptive_lock_statistics_t total = destroyedStats; 2057 kmp_adaptive_lock_info_t *lck; 2058 2059 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2060 __kmp_add_stats( &total, lck ); 2061 } 2062 kmp_adaptive_lock_statistics_t *t = &total; 2063 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2064 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2065 t->softFailedSpeculations; 2066 2067 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2068 fprintf ( statsFile, " Lock parameters: \n" 2069 " max_soft_retries : %10d\n" 2070 " max_badness : %10d\n", 2071 __kmp_adaptive_backoff_params.max_soft_retries, 2072 __kmp_adaptive_backoff_params.max_badness); 2073 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2074 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2075 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2076 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2077 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2078 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2079 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2080 2081 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2082 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2083 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2084 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2085 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2086 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2087 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2088 2089 if (statsFile != stdout) 2090 fclose( statsFile ); 2091 } 2092 2093 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2094 #else 2095 # define KMP_INC_STAT(lck,stat) 2096 2097 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2098 2099 static inline bool 2100 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2101 { 2102 // It is enough to check that the head_id is zero. 2103 // We don't also need to check the tail. 2104 bool res = lck->lk.head_id == 0; 2105 2106 // We need a fence here, since we must ensure that no memory operations 2107 // from later in this thread float above that read. 2108 #if KMP_COMPILER_ICC 2109 _mm_mfence(); 2110 #else 2111 __sync_synchronize(); 2112 #endif 2113 2114 return res; 2115 } 2116 2117 // Functions for manipulating the badness 2118 static __inline void 2119 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2120 { 2121 // Reset the badness to zero so we eagerly try to speculate again 2122 lck->lk.adaptive.badness = 0; 2123 KMP_INC_STAT(lck,successfulSpeculations); 2124 } 2125 2126 // Create a bit mask with one more set bit. 2127 static __inline void 2128 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2129 { 2130 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2131 if ( newBadness > lck->lk.adaptive.max_badness) { 2132 return; 2133 } else { 2134 lck->lk.adaptive.badness = newBadness; 2135 } 2136 } 2137 2138 // Check whether speculation should be attempted. 2139 static __inline int 2140 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2141 { 2142 kmp_uint32 badness = lck->lk.adaptive.badness; 2143 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2144 int res = (attempts & badness) == 0; 2145 return res; 2146 } 2147 2148 // Attempt to acquire only the speculative lock. 2149 // Does not back off to the non-speculative lock. 2150 // 2151 static int 2152 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2153 { 2154 int retries = lck->lk.adaptive.max_soft_retries; 2155 2156 // We don't explicitly count the start of speculation, rather we record 2157 // the results (success, hard fail, soft fail). The sum of all of those 2158 // is the total number of times we started speculation since all 2159 // speculations must end one of those ways. 2160 do 2161 { 2162 kmp_uint32 status = _xbegin(); 2163 // Switch this in to disable actual speculation but exercise 2164 // at least some of the rest of the code. Useful for debugging... 2165 // kmp_uint32 status = _XABORT_NESTED; 2166 2167 if (status == _XBEGIN_STARTED ) 2168 { /* We have successfully started speculation 2169 * Check that no-one acquired the lock for real between when we last looked 2170 * and now. This also gets the lock cache line into our read-set, 2171 * which we need so that we'll abort if anyone later claims it for real. 2172 */ 2173 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2174 { 2175 // Lock is now visibly acquired, so someone beat us to it. 2176 // Abort the transaction so we'll restart from _xbegin with the 2177 // failure status. 2178 _xabort(0x01) 2179 KMP_ASSERT2( 0, "should not get here" ); 2180 } 2181 return 1; // Lock has been acquired (speculatively) 2182 } else { 2183 // We have aborted, update the statistics 2184 if ( status & SOFT_ABORT_MASK) 2185 { 2186 KMP_INC_STAT(lck,softFailedSpeculations); 2187 // and loop round to retry. 2188 } 2189 else 2190 { 2191 KMP_INC_STAT(lck,hardFailedSpeculations); 2192 // Give up if we had a hard failure. 2193 break; 2194 } 2195 } 2196 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2197 2198 // Either we had a hard failure or we didn't succeed softly after 2199 // the full set of attempts, so back off the badness. 2200 __kmp_step_badness( lck ); 2201 return 0; 2202 } 2203 2204 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2205 // if the speculative lock cannot be acquired. 2206 // We can succeed speculatively, non-speculatively, or fail. 2207 static int 2208 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2209 { 2210 // First try to acquire the lock speculatively 2211 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2212 return 1; 2213 2214 // Speculative acquisition failed, so try to acquire it non-speculatively. 2215 // Count the non-speculative acquire attempt 2216 lck->lk.adaptive.acquire_attempts++; 2217 2218 // Use base, non-speculative lock. 2219 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2220 { 2221 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2222 return 1; // Lock is acquired (non-speculatively) 2223 } 2224 else 2225 { 2226 return 0; // Failed to acquire the lock, it's already visibly locked. 2227 } 2228 } 2229 2230 static int 2231 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2232 { 2233 char const * const func = "omp_test_lock"; 2234 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2235 KMP_FATAL( LockIsUninitialized, func ); 2236 } 2237 2238 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2239 2240 if ( retval ) { 2241 lck->lk.qlk.owner_id = gtid + 1; 2242 } 2243 return retval; 2244 } 2245 2246 // Block until we can acquire a speculative, adaptive lock. 2247 // We check whether we should be trying to speculate. 2248 // If we should be, we check the real lock to see if it is free, 2249 // and, if not, pause without attempting to acquire it until it is. 2250 // Then we try the speculative acquire. 2251 // This means that although we suffer from lemmings a little ( 2252 // because all we can't acquire the lock speculatively until 2253 // the queue of threads waiting has cleared), we don't get into a 2254 // state where we can never acquire the lock speculatively (because we 2255 // force the queue to clear by preventing new arrivals from entering the 2256 // queue). 2257 // This does mean that when we're trying to break lemmings, the lock 2258 // is no longer fair. However OpenMP makes no guarantee that its 2259 // locks are fair, so this isn't a real problem. 2260 static void 2261 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2262 { 2263 if ( __kmp_should_speculate( lck, gtid ) ) 2264 { 2265 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2266 { 2267 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2268 return; 2269 // We tried speculation and failed, so give up. 2270 } 2271 else 2272 { 2273 // We can't try speculation until the lock is free, so we 2274 // pause here (without suspending on the queueing lock, 2275 // to allow it to drain, then try again. 2276 // All other threads will also see the same result for 2277 // shouldSpeculate, so will be doing the same if they 2278 // try to claim the lock from now on. 2279 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2280 { 2281 KMP_INC_STAT(lck,lemmingYields); 2282 __kmp_yield (TRUE); 2283 } 2284 2285 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2286 return; 2287 } 2288 } 2289 2290 // Speculative acquisition failed, so acquire it non-speculatively. 2291 // Count the non-speculative acquire attempt 2292 lck->lk.adaptive.acquire_attempts++; 2293 2294 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2295 // We have acquired the base lock, so count that. 2296 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2297 } 2298 2299 static void 2300 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2301 { 2302 char const * const func = "omp_set_lock"; 2303 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2304 KMP_FATAL( LockIsUninitialized, func ); 2305 } 2306 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2307 KMP_FATAL( LockIsAlreadyOwned, func ); 2308 } 2309 2310 __kmp_acquire_adaptive_lock( lck, gtid ); 2311 2312 lck->lk.qlk.owner_id = gtid + 1; 2313 } 2314 2315 static void 2316 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2317 { 2318 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2319 { // If the lock doesn't look claimed we must be speculating. 2320 // (Or the user's code is buggy and they're releasing without locking; 2321 // if we had XTEST we'd be able to check that case...) 2322 _xend(); // Exit speculation 2323 __kmp_update_badness_after_success( lck ); 2324 } 2325 else 2326 { // Since the lock *is* visibly locked we're not speculating, 2327 // so should use the underlying lock's release scheme. 2328 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2329 } 2330 } 2331 2332 static void 2333 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2334 { 2335 char const * const func = "omp_unset_lock"; 2336 KMP_MB(); /* in case another processor initialized lock */ 2337 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2338 KMP_FATAL( LockIsUninitialized, func ); 2339 } 2340 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2341 KMP_FATAL( LockUnsettingFree, func ); 2342 } 2343 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2344 KMP_FATAL( LockUnsettingSetByAnother, func ); 2345 } 2346 lck->lk.qlk.owner_id = 0; 2347 __kmp_release_adaptive_lock( lck, gtid ); 2348 } 2349 2350 static void 2351 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2352 { 2353 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2354 lck->lk.adaptive.badness = 0; 2355 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2356 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2357 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2358 #if KMP_DEBUG_ADAPTIVE_LOCKS 2359 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2360 #endif 2361 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2362 } 2363 2364 static void 2365 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2366 { 2367 __kmp_init_adaptive_lock( lck ); 2368 } 2369 2370 static void 2371 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2372 { 2373 #if KMP_DEBUG_ADAPTIVE_LOCKS 2374 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2375 #endif 2376 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2377 // Nothing needed for the speculative part. 2378 } 2379 2380 static void 2381 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2382 { 2383 char const * const func = "omp_destroy_lock"; 2384 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2385 KMP_FATAL( LockIsUninitialized, func ); 2386 } 2387 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2388 KMP_FATAL( LockStillOwned, func ); 2389 } 2390 __kmp_destroy_adaptive_lock( lck ); 2391 } 2392 2393 2394 #endif // KMP_USE_ADAPTIVE_LOCKS 2395 2396 2397 /* ------------------------------------------------------------------------ */ 2398 /* DRDPA ticket locks */ 2399 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2400 2401 static kmp_int32 2402 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2403 { 2404 return TCR_4( lck->lk.owner_id ) - 1; 2405 } 2406 2407 static inline bool 2408 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2409 { 2410 return lck->lk.depth_locked != -1; 2411 } 2412 2413 __forceinline static void 2414 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2415 { 2416 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2417 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2418 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2419 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2420 TCR_PTR(lck->lk.polls); // volatile load 2421 2422 #ifdef USE_LOCK_PROFILE 2423 if (TCR_8(polls[ticket & mask].poll) != ticket) 2424 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2425 /* else __kmp_printf( "." );*/ 2426 #endif /* USE_LOCK_PROFILE */ 2427 2428 // 2429 // Now spin-wait, but reload the polls pointer and mask, in case the 2430 // polling area has been reconfigured. Unless it is reconfigured, the 2431 // reloads stay in L1 cache and are cheap. 2432 // 2433 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2434 // 2435 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2436 // and poll to be re-read every spin iteration. 2437 // 2438 kmp_uint32 spins; 2439 2440 KMP_FSYNC_PREPARE(lck); 2441 KMP_INIT_YIELD(spins); 2442 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2443 // If we are oversubscribed, 2444 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2445 // CPU Pause is in the macros for yield. 2446 // 2447 KMP_YIELD(TCR_4(__kmp_nth) 2448 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2449 KMP_YIELD_SPIN(spins); 2450 2451 // Re-read the mask and the poll pointer from the lock structure. 2452 // 2453 // Make certain that "mask" is read before "polls" !!! 2454 // 2455 // If another thread picks reconfigures the polling area and updates 2456 // their values, and we get the new value of mask and the old polls 2457 // pointer, we could access memory beyond the end of the old polling 2458 // area. 2459 // 2460 mask = TCR_8(lck->lk.mask); // volatile load 2461 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2462 TCR_PTR(lck->lk.polls); // volatile load 2463 } 2464 2465 // 2466 // Critical section starts here 2467 // 2468 KMP_FSYNC_ACQUIRED(lck); 2469 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2470 ticket, lck)); 2471 lck->lk.now_serving = ticket; // non-volatile store 2472 2473 // 2474 // Deallocate a garbage polling area if we know that we are the last 2475 // thread that could possibly access it. 2476 // 2477 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2478 // ticket. 2479 // 2480 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2481 __kmp_free((void *)lck->lk.old_polls); 2482 lck->lk.old_polls = NULL; 2483 lck->lk.cleanup_ticket = 0; 2484 } 2485 2486 // 2487 // Check to see if we should reconfigure the polling area. 2488 // If there is still a garbage polling area to be deallocated from a 2489 // previous reconfiguration, let a later thread reconfigure it. 2490 // 2491 if (lck->lk.old_polls == NULL) { 2492 bool reconfigure = false; 2493 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2494 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2495 2496 if (TCR_4(__kmp_nth) 2497 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2498 // 2499 // We are in oversubscription mode. Contract the polling area 2500 // down to a single location, if that hasn't been done already. 2501 // 2502 if (num_polls > 1) { 2503 reconfigure = true; 2504 num_polls = TCR_4(lck->lk.num_polls); 2505 mask = 0; 2506 num_polls = 1; 2507 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2508 __kmp_allocate(num_polls * sizeof(*polls)); 2509 polls[0].poll = ticket; 2510 } 2511 } 2512 else { 2513 // 2514 // We are in under/fully subscribed mode. Check the number of 2515 // threads waiting on the lock. The size of the polling area 2516 // should be at least the number of threads waiting. 2517 // 2518 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2519 if (num_waiting > num_polls) { 2520 kmp_uint32 old_num_polls = num_polls; 2521 reconfigure = true; 2522 do { 2523 mask = (mask << 1) | 1; 2524 num_polls *= 2; 2525 } while (num_polls <= num_waiting); 2526 2527 // 2528 // Allocate the new polling area, and copy the relevant portion 2529 // of the old polling area to the new area. __kmp_allocate() 2530 // zeroes the memory it allocates, and most of the old area is 2531 // just zero padding, so we only copy the release counters. 2532 // 2533 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2534 __kmp_allocate(num_polls * sizeof(*polls)); 2535 kmp_uint32 i; 2536 for (i = 0; i < old_num_polls; i++) { 2537 polls[i].poll = old_polls[i].poll; 2538 } 2539 } 2540 } 2541 2542 if (reconfigure) { 2543 // 2544 // Now write the updated fields back to the lock structure. 2545 // 2546 // Make certain that "polls" is written before "mask" !!! 2547 // 2548 // If another thread picks up the new value of mask and the old 2549 // polls pointer , it could access memory beyond the end of the 2550 // old polling area. 2551 // 2552 // On x86, we need memory fences. 2553 // 2554 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2555 ticket, lck, num_polls)); 2556 2557 lck->lk.old_polls = old_polls; // non-volatile store 2558 lck->lk.polls = polls; // volatile store 2559 2560 KMP_MB(); 2561 2562 lck->lk.num_polls = num_polls; // non-volatile store 2563 lck->lk.mask = mask; // volatile store 2564 2565 KMP_MB(); 2566 2567 // 2568 // Only after the new polling area and mask have been flushed 2569 // to main memory can we update the cleanup ticket field. 2570 // 2571 // volatile load / non-volatile store 2572 // 2573 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2574 } 2575 } 2576 } 2577 2578 void 2579 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2580 { 2581 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2582 } 2583 2584 static void 2585 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2586 { 2587 char const * const func = "omp_set_lock"; 2588 if ( lck->lk.initialized != lck ) { 2589 KMP_FATAL( LockIsUninitialized, func ); 2590 } 2591 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2592 KMP_FATAL( LockNestableUsedAsSimple, func ); 2593 } 2594 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2595 KMP_FATAL( LockIsAlreadyOwned, func ); 2596 } 2597 2598 __kmp_acquire_drdpa_lock( lck, gtid ); 2599 2600 lck->lk.owner_id = gtid + 1; 2601 } 2602 2603 int 2604 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2605 { 2606 // 2607 // First get a ticket, then read the polls pointer and the mask. 2608 // The polls pointer must be read before the mask!!! (See above) 2609 // 2610 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2611 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2612 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2613 TCR_PTR(lck->lk.polls); // volatile load 2614 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2615 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2616 kmp_uint64 next_ticket = ticket + 1; 2617 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2618 ticket, next_ticket)) { 2619 KMP_FSYNC_ACQUIRED(lck); 2620 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2621 ticket, lck)); 2622 lck->lk.now_serving = ticket; // non-volatile store 2623 2624 // 2625 // Since no threads are waiting, there is no possibility that 2626 // we would want to reconfigure the polling area. We might 2627 // have the cleanup ticket value (which says that it is now 2628 // safe to deallocate old_polls), but we'll let a later thread 2629 // which calls __kmp_acquire_lock do that - this routine 2630 // isn't supposed to block, and we would risk blocks if we 2631 // called __kmp_free() to do the deallocation. 2632 // 2633 return TRUE; 2634 } 2635 } 2636 return FALSE; 2637 } 2638 2639 static int 2640 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2641 { 2642 char const * const func = "omp_test_lock"; 2643 if ( lck->lk.initialized != lck ) { 2644 KMP_FATAL( LockIsUninitialized, func ); 2645 } 2646 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2647 KMP_FATAL( LockNestableUsedAsSimple, func ); 2648 } 2649 2650 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2651 2652 if ( retval ) { 2653 lck->lk.owner_id = gtid + 1; 2654 } 2655 return retval; 2656 } 2657 2658 void 2659 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2660 { 2661 // 2662 // Read the ticket value from the lock data struct, then the polls 2663 // pointer and the mask. The polls pointer must be read before the 2664 // mask!!! (See above) 2665 // 2666 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2667 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2668 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2669 TCR_PTR(lck->lk.polls); // volatile load 2670 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2671 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2672 ticket - 1, lck)); 2673 KMP_FSYNC_RELEASING(lck); 2674 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2675 } 2676 2677 static void 2678 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2679 { 2680 char const * const func = "omp_unset_lock"; 2681 KMP_MB(); /* in case another processor initialized lock */ 2682 if ( lck->lk.initialized != lck ) { 2683 KMP_FATAL( LockIsUninitialized, func ); 2684 } 2685 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2686 KMP_FATAL( LockNestableUsedAsSimple, func ); 2687 } 2688 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2689 KMP_FATAL( LockUnsettingFree, func ); 2690 } 2691 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2692 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2693 KMP_FATAL( LockUnsettingSetByAnother, func ); 2694 } 2695 lck->lk.owner_id = 0; 2696 __kmp_release_drdpa_lock( lck, gtid ); 2697 } 2698 2699 void 2700 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2701 { 2702 lck->lk.location = NULL; 2703 lck->lk.mask = 0; 2704 lck->lk.num_polls = 1; 2705 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2706 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2707 lck->lk.cleanup_ticket = 0; 2708 lck->lk.old_polls = NULL; 2709 lck->lk.next_ticket = 0; 2710 lck->lk.now_serving = 0; 2711 lck->lk.owner_id = 0; // no thread owns the lock. 2712 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2713 lck->lk.initialized = lck; 2714 2715 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2716 } 2717 2718 static void 2719 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2720 { 2721 __kmp_init_drdpa_lock( lck ); 2722 } 2723 2724 void 2725 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2726 { 2727 lck->lk.initialized = NULL; 2728 lck->lk.location = NULL; 2729 if (lck->lk.polls != NULL) { 2730 __kmp_free((void *)lck->lk.polls); 2731 lck->lk.polls = NULL; 2732 } 2733 if (lck->lk.old_polls != NULL) { 2734 __kmp_free((void *)lck->lk.old_polls); 2735 lck->lk.old_polls = NULL; 2736 } 2737 lck->lk.mask = 0; 2738 lck->lk.num_polls = 0; 2739 lck->lk.cleanup_ticket = 0; 2740 lck->lk.next_ticket = 0; 2741 lck->lk.now_serving = 0; 2742 lck->lk.owner_id = 0; 2743 lck->lk.depth_locked = -1; 2744 } 2745 2746 static void 2747 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2748 { 2749 char const * const func = "omp_destroy_lock"; 2750 if ( lck->lk.initialized != lck ) { 2751 KMP_FATAL( LockIsUninitialized, func ); 2752 } 2753 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2754 KMP_FATAL( LockNestableUsedAsSimple, func ); 2755 } 2756 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2757 KMP_FATAL( LockStillOwned, func ); 2758 } 2759 __kmp_destroy_drdpa_lock( lck ); 2760 } 2761 2762 2763 // 2764 // nested drdpa ticket locks 2765 // 2766 2767 void 2768 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2769 { 2770 KMP_DEBUG_ASSERT( gtid >= 0 ); 2771 2772 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2773 lck->lk.depth_locked += 1; 2774 } 2775 else { 2776 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2777 KMP_MB(); 2778 lck->lk.depth_locked = 1; 2779 KMP_MB(); 2780 lck->lk.owner_id = gtid + 1; 2781 } 2782 } 2783 2784 static void 2785 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2786 { 2787 char const * const func = "omp_set_nest_lock"; 2788 if ( lck->lk.initialized != lck ) { 2789 KMP_FATAL( LockIsUninitialized, func ); 2790 } 2791 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2792 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2793 } 2794 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2795 } 2796 2797 int 2798 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2799 { 2800 int retval; 2801 2802 KMP_DEBUG_ASSERT( gtid >= 0 ); 2803 2804 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2805 retval = ++lck->lk.depth_locked; 2806 } 2807 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2808 retval = 0; 2809 } 2810 else { 2811 KMP_MB(); 2812 retval = lck->lk.depth_locked = 1; 2813 KMP_MB(); 2814 lck->lk.owner_id = gtid + 1; 2815 } 2816 return retval; 2817 } 2818 2819 static int 2820 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2821 { 2822 char const * const func = "omp_test_nest_lock"; 2823 if ( lck->lk.initialized != lck ) { 2824 KMP_FATAL( LockIsUninitialized, func ); 2825 } 2826 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2827 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2828 } 2829 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2830 } 2831 2832 void 2833 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2834 { 2835 KMP_DEBUG_ASSERT( gtid >= 0 ); 2836 2837 KMP_MB(); 2838 if ( --(lck->lk.depth_locked) == 0 ) { 2839 KMP_MB(); 2840 lck->lk.owner_id = 0; 2841 __kmp_release_drdpa_lock( lck, gtid ); 2842 } 2843 } 2844 2845 static void 2846 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2847 { 2848 char const * const func = "omp_unset_nest_lock"; 2849 KMP_MB(); /* in case another processor initialized lock */ 2850 if ( lck->lk.initialized != lck ) { 2851 KMP_FATAL( LockIsUninitialized, func ); 2852 } 2853 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2854 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2855 } 2856 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2857 KMP_FATAL( LockUnsettingFree, func ); 2858 } 2859 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2860 KMP_FATAL( LockUnsettingSetByAnother, func ); 2861 } 2862 __kmp_release_nested_drdpa_lock( lck, gtid ); 2863 } 2864 2865 void 2866 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2867 { 2868 __kmp_init_drdpa_lock( lck ); 2869 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2870 } 2871 2872 static void 2873 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2874 { 2875 __kmp_init_nested_drdpa_lock( lck ); 2876 } 2877 2878 void 2879 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2880 { 2881 __kmp_destroy_drdpa_lock( lck ); 2882 lck->lk.depth_locked = 0; 2883 } 2884 2885 static void 2886 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2887 { 2888 char const * const func = "omp_destroy_nest_lock"; 2889 if ( lck->lk.initialized != lck ) { 2890 KMP_FATAL( LockIsUninitialized, func ); 2891 } 2892 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2893 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2894 } 2895 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2896 KMP_FATAL( LockStillOwned, func ); 2897 } 2898 __kmp_destroy_nested_drdpa_lock( lck ); 2899 } 2900 2901 2902 // 2903 // access functions to fields which don't exist for all lock kinds. 2904 // 2905 2906 static int 2907 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2908 { 2909 return lck == lck->lk.initialized; 2910 } 2911 2912 static const ident_t * 2913 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2914 { 2915 return lck->lk.location; 2916 } 2917 2918 static void 2919 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 2920 { 2921 lck->lk.location = loc; 2922 } 2923 2924 static kmp_lock_flags_t 2925 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 2926 { 2927 return lck->lk.flags; 2928 } 2929 2930 static void 2931 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 2932 { 2933 lck->lk.flags = flags; 2934 } 2935 2936 #if KMP_USE_DYNAMIC_LOCK 2937 2938 // Definitions of lock hints. 2939 # ifndef __OMP_H 2940 typedef enum kmp_lock_hint_t { 2941 kmp_lock_hint_none = 0, 2942 kmp_lock_hint_contended, 2943 kmp_lock_hint_uncontended, 2944 kmp_lock_hint_nonspeculative, 2945 kmp_lock_hint_speculative, 2946 kmp_lock_hint_adaptive, 2947 } kmp_lock_hint_t; 2948 # endif 2949 2950 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. 2951 #define expand_init_lock(l, a) \ 2952 static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \ 2953 *lck = DYNA_LOCK_FREE(l); \ 2954 KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \ 2955 } 2956 FOREACH_D_LOCK(expand_init_lock, 0) 2957 #undef expand_init_lock 2958 2959 #if DYNA_HAS_HLE 2960 2961 // HLE lock functions - imported from the testbed runtime. 2962 #if KMP_MIC 2963 # define machine_pause() _mm_delay_32(10) // TODO: find the right argument 2964 #else 2965 # define machine_pause() _mm_pause() 2966 #endif 2967 #define HLE_ACQUIRE ".byte 0xf2;" 2968 #define HLE_RELEASE ".byte 0xf3;" 2969 2970 static inline kmp_uint32 2971 swap4(kmp_uint32 volatile *p, kmp_uint32 v) 2972 { 2973 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" 2974 : "+r"(v), "+m"(*p) 2975 : 2976 : "memory"); 2977 return v; 2978 } 2979 2980 static void 2981 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) 2982 { 2983 *lck = 0; 2984 } 2985 2986 static void 2987 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 2988 { 2989 // Use gtid for DYNA_LOCK_BUSY if necessary 2990 if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) { 2991 int delay = 1; 2992 do { 2993 while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) { 2994 for (int i = delay; i != 0; --i) 2995 machine_pause(); 2996 delay = ((delay << 1) | 1) & 7; 2997 } 2998 } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)); 2999 } 3000 } 3001 3002 static void 3003 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3004 { 3005 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 3006 } 3007 3008 static void 3009 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3010 { 3011 __asm__ volatile(HLE_RELEASE "movl %1,%0" 3012 : "=m"(*lck) 3013 : "r"(DYNA_LOCK_FREE(hle)) 3014 : "memory"); 3015 } 3016 3017 static void 3018 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3019 { 3020 __kmp_release_hle_lock(lck, gtid); // TODO: add checks 3021 } 3022 3023 static int 3024 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3025 { 3026 return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle); 3027 } 3028 3029 static int 3030 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) 3031 { 3032 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 3033 } 3034 3035 #endif // DYNA_HAS_HLE 3036 3037 // Entry functions for indirect locks (first element of direct_*_ops[]). 3038 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); 3039 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); 3040 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3041 static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3042 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); 3043 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3044 static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3045 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); 3046 3047 // 3048 // Jump tables for the indirect lock functions. 3049 // Only fill in the odd entries, that avoids the need to shift out the low bit. 3050 // 3051 #define expand_func0(l, op) 0,op##_##l##_##lock, 3052 void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) 3053 = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) }; 3054 3055 #define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock, 3056 void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *) 3057 = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) }; 3058 3059 // Differentiates *lock and *lock_with_checks. 3060 #define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3061 #define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3062 static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3063 = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) }, 3064 { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } }; 3065 static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3066 = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) }, 3067 { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } }; 3068 3069 #define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, 3070 #define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3071 static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) 3072 = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) }, 3073 { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } }; 3074 3075 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3076 void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3077 void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3078 int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; 3079 3080 // 3081 // Jump tables for the indirect lock functions. 3082 // 3083 #define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, 3084 void (*__kmp_indirect_init_ops[])(kmp_user_lock_p) 3085 = { FOREACH_I_LOCK(expand_func4, init) }; 3086 void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p) 3087 = { FOREACH_I_LOCK(expand_func4, destroy) }; 3088 3089 // Differentiates *lock and *lock_with_checks. 3090 #define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3091 #define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3092 static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3093 = { { FOREACH_I_LOCK(expand_func5, acquire) }, 3094 { FOREACH_I_LOCK(expand_func5c, acquire) } }; 3095 static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3096 = { { FOREACH_I_LOCK(expand_func5, release) }, 3097 { FOREACH_I_LOCK(expand_func5c, release) } }; 3098 3099 #define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, 3100 #define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, 3101 static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) 3102 = { { FOREACH_I_LOCK(expand_func6, test) }, 3103 { FOREACH_I_LOCK(expand_func6c, test) } }; 3104 3105 // Exposes only one set of jump tables (*lock or *lock_with_checks). 3106 void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0; 3107 void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0; 3108 int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0; 3109 3110 // Lock index table. 3111 kmp_indirect_lock_t **__kmp_indirect_lock_table; 3112 kmp_lock_index_t __kmp_indirect_lock_table_size; 3113 kmp_lock_index_t __kmp_indirect_lock_table_next; 3114 3115 // Size of indirect locks. 3116 static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = { 3117 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3118 #if KMP_USE_ADAPTIVE_LOCKS 3119 sizeof(kmp_adaptive_lock_t), 3120 #endif 3121 sizeof(kmp_drdpa_lock_t), 3122 sizeof(kmp_tas_lock_t), 3123 #if DYNA_HAS_FUTEX 3124 sizeof(kmp_futex_lock_t), 3125 #endif 3126 sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), 3127 sizeof(kmp_drdpa_lock_t) 3128 }; 3129 3130 // Jump tables for lock accessor/modifier. 3131 void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; 3132 void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; 3133 const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3134 kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; 3135 3136 // Use different lock pools for different lock types. 3137 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 }; 3138 3139 // Inserts the given lock ptr to the lock table. 3140 kmp_lock_index_t 3141 __kmp_insert_indirect_lock(kmp_indirect_lock_t *lck) 3142 { 3143 kmp_lock_index_t next = __kmp_indirect_lock_table_next; 3144 // Check capacity and double the size if required 3145 if (next >= __kmp_indirect_lock_table_size) { 3146 kmp_lock_index_t i; 3147 kmp_lock_index_t size = __kmp_indirect_lock_table_size; 3148 kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table; 3149 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *)); 3150 memcpy(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *)); 3151 __kmp_free(old_table); 3152 __kmp_indirect_lock_table_size = 2*next; 3153 } 3154 // Insert lck to the table and return the index. 3155 __kmp_indirect_lock_table[next] = lck; 3156 __kmp_indirect_lock_table_next++; 3157 return next; 3158 } 3159 3160 // User lock allocator for dynamically dispatched locks. 3161 kmp_indirect_lock_t * 3162 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) 3163 { 3164 kmp_indirect_lock_t *lck; 3165 kmp_lock_index_t idx; 3166 3167 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3168 3169 if (__kmp_indirect_lock_pool[tag] != NULL) { 3170 lck = __kmp_indirect_lock_pool[tag]; 3171 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3172 idx = lck->lock->pool.index; 3173 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3174 } else { 3175 lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t)); 3176 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3177 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3178 idx = __kmp_insert_indirect_lock(lck); 3179 } 3180 3181 __kmp_release_lock(&__kmp_global_lock, gtid); 3182 3183 lck->type = tag; 3184 3185 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3186 *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. 3187 } else { 3188 *((kmp_indirect_lock_t **)user_lock) = lck; 3189 } 3190 3191 return lck; 3192 } 3193 3194 // User lock lookup for dynamically dispatched locks. 3195 static __forceinline 3196 kmp_indirect_lock_t * 3197 __kmp_lookup_indirect_lock(void **user_lock, const char *func) 3198 { 3199 if (__kmp_env_consistency_check) { 3200 kmp_indirect_lock_t *lck = NULL; 3201 if (user_lock == NULL) { 3202 KMP_FATAL(LockIsUninitialized, func); 3203 } 3204 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3205 kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock); 3206 if (idx < 0 || idx >= __kmp_indirect_lock_table_size) { 3207 KMP_FATAL(LockIsUninitialized, func); 3208 } 3209 lck = __kmp_indirect_lock_table[idx]; 3210 } else { 3211 lck = *((kmp_indirect_lock_t **)user_lock); 3212 } 3213 if (lck == NULL) { 3214 KMP_FATAL(LockIsUninitialized, func); 3215 } 3216 return lck; 3217 } else { 3218 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3219 return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)]; 3220 } else { 3221 return *((kmp_indirect_lock_t **)user_lock); 3222 } 3223 } 3224 } 3225 3226 static void 3227 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) 3228 { 3229 #if KMP_USE_ADAPTIVE_LOCKS 3230 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3231 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3232 seq = lockseq_queuing; 3233 } 3234 #endif 3235 kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq); 3236 kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3237 DYNA_I_LOCK_FUNC(l, init)(l->lock); 3238 KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type)); 3239 } 3240 3241 static void 3242 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) 3243 { 3244 kmp_uint32 gtid = __kmp_entry_gtid(); 3245 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3246 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3247 kmp_indirect_locktag_t tag = l->type; 3248 3249 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3250 3251 // Use the base lock's space to keep the pool chain. 3252 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3253 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3254 l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock); 3255 } 3256 __kmp_indirect_lock_pool[tag] = l; 3257 3258 __kmp_release_lock(&__kmp_global_lock, gtid); 3259 } 3260 3261 static void 3262 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3263 { 3264 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3265 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3266 } 3267 3268 static void 3269 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3270 { 3271 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3272 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3273 } 3274 3275 static int 3276 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3277 { 3278 kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); 3279 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3280 } 3281 3282 static void 3283 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3284 { 3285 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3286 DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); 3287 } 3288 3289 static void 3290 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3291 { 3292 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3293 DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3294 } 3295 3296 static int 3297 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) 3298 { 3299 kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3300 return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); 3301 } 3302 3303 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3304 3305 // Initialize a hinted lock. 3306 void 3307 __kmp_init_lock_hinted(void **lock, int hint) 3308 { 3309 kmp_dyna_lockseq_t seq; 3310 switch (hint) { 3311 case kmp_lock_hint_uncontended: 3312 seq = lockseq_tas; 3313 break; 3314 case kmp_lock_hint_speculative: 3315 #if DYNA_HAS_HLE 3316 seq = lockseq_hle; 3317 #else 3318 seq = lockseq_tas; 3319 #endif 3320 break; 3321 case kmp_lock_hint_adaptive: 3322 #if KMP_USE_ADAPTIVE_LOCKS 3323 seq = lockseq_adaptive; 3324 #else 3325 seq = lockseq_queuing; 3326 #endif 3327 break; 3328 // Defaults to queuing locks. 3329 case kmp_lock_hint_contended: 3330 case kmp_lock_hint_nonspeculative: 3331 default: 3332 seq = lockseq_queuing; 3333 break; 3334 } 3335 if (DYNA_IS_D_LOCK(seq)) { 3336 DYNA_INIT_D_LOCK(lock, seq); 3337 #if USE_ITT_BUILD 3338 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 3339 #endif 3340 } else { 3341 DYNA_INIT_I_LOCK(lock, seq); 3342 #if USE_ITT_BUILD 3343 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3344 __kmp_itt_lock_creating(ilk->lock, NULL); 3345 #endif 3346 } 3347 } 3348 3349 // This is used only in kmp_error.c when consistency checking is on. 3350 kmp_int32 3351 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) 3352 { 3353 switch (seq) { 3354 case lockseq_tas: 3355 case lockseq_nested_tas: 3356 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3357 #if DYNA_HAS_FUTEX 3358 case lockseq_futex: 3359 case lockseq_nested_futex: 3360 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3361 #endif 3362 case lockseq_ticket: 3363 case lockseq_nested_ticket: 3364 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3365 case lockseq_queuing: 3366 case lockseq_nested_queuing: 3367 #if KMP_USE_ADAPTIVE_LOCKS 3368 case lockseq_adaptive: 3369 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3370 #endif 3371 case lockseq_drdpa: 3372 case lockseq_nested_drdpa: 3373 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3374 default: 3375 return 0; 3376 } 3377 } 3378 3379 // The value initialized from KMP_LOCK_KIND needs to be translated to its 3380 // nested version. 3381 void 3382 __kmp_init_nest_lock_hinted(void **lock, int hint) 3383 { 3384 kmp_dyna_lockseq_t seq; 3385 switch (hint) { 3386 case kmp_lock_hint_uncontended: 3387 seq = lockseq_nested_tas; 3388 break; 3389 // Defaults to queuing locks. 3390 case kmp_lock_hint_contended: 3391 case kmp_lock_hint_nonspeculative: 3392 default: 3393 seq = lockseq_nested_queuing; 3394 break; 3395 } 3396 DYNA_INIT_I_LOCK(lock, seq); 3397 #if USE_ITT_BUILD 3398 kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); 3399 __kmp_itt_lock_creating(ilk->lock, NULL); 3400 #endif 3401 } 3402 3403 // Initializes the lock table for indirect locks. 3404 static void 3405 __kmp_init_indirect_lock_table() 3406 { 3407 __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024); 3408 __kmp_indirect_lock_table_size = 1024; 3409 __kmp_indirect_lock_table_next = 0; 3410 } 3411 3412 #if KMP_USE_ADAPTIVE_LOCKS 3413 # define init_lock_func(table, expand) { \ 3414 table[locktag_ticket] = expand(ticket); \ 3415 table[locktag_queuing] = expand(queuing); \ 3416 table[locktag_adaptive] = expand(queuing); \ 3417 table[locktag_drdpa] = expand(drdpa); \ 3418 table[locktag_nested_ticket] = expand(ticket); \ 3419 table[locktag_nested_queuing] = expand(queuing); \ 3420 table[locktag_nested_drdpa] = expand(drdpa); \ 3421 } 3422 #else 3423 # define init_lock_func(table, expand) { \ 3424 table[locktag_ticket] = expand(ticket); \ 3425 table[locktag_queuing] = expand(queuing); \ 3426 table[locktag_drdpa] = expand(drdpa); \ 3427 table[locktag_nested_ticket] = expand(ticket); \ 3428 table[locktag_nested_queuing] = expand(queuing); \ 3429 table[locktag_nested_drdpa] = expand(drdpa); \ 3430 } 3431 #endif // KMP_USE_ADAPTIVE_LOCKS 3432 3433 // Initializes data for dynamic user locks. 3434 void 3435 __kmp_init_dynamic_user_locks() 3436 { 3437 // Initialize jump table location 3438 int offset = (__kmp_env_consistency_check)? 1: 0; 3439 __kmp_direct_set_ops = direct_set_tab[offset]; 3440 __kmp_direct_unset_ops = direct_unset_tab[offset]; 3441 __kmp_direct_test_ops = direct_test_tab[offset]; 3442 __kmp_indirect_set_ops = indirect_set_tab[offset]; 3443 __kmp_indirect_unset_ops = indirect_unset_tab[offset]; 3444 __kmp_indirect_test_ops = indirect_test_tab[offset]; 3445 __kmp_init_indirect_lock_table(); 3446 3447 // Initialize lock accessor/modifier 3448 // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe. 3449 #define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location 3450 init_lock_func(__kmp_indirect_set_location, expand_func); 3451 #undef expand_func 3452 #define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags 3453 init_lock_func(__kmp_indirect_set_flags, expand_func); 3454 #undef expand_func 3455 #define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location 3456 init_lock_func(__kmp_indirect_get_location, expand_func); 3457 #undef expand_func 3458 #define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags 3459 init_lock_func(__kmp_indirect_get_flags, expand_func); 3460 #undef expand_func 3461 3462 __kmp_init_user_locks = TRUE; 3463 } 3464 3465 // Clean up the lock table. 3466 void 3467 __kmp_cleanup_indirect_user_locks() 3468 { 3469 kmp_lock_index_t i; 3470 int k; 3471 3472 // Clean up locks in the pools first (they were already destroyed before going into the pools). 3473 for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) { 3474 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3475 while (l != NULL) { 3476 kmp_indirect_lock_t *ll = l; 3477 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3478 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3479 __kmp_indirect_lock_table[ll->lock->pool.index] = NULL; 3480 } 3481 __kmp_free(ll->lock); 3482 __kmp_free(ll); 3483 } 3484 } 3485 // Clean up the remaining undestroyed locks. 3486 for (i = 0; i < __kmp_indirect_lock_table_next; i++) { 3487 kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i]; 3488 if (l != NULL) { 3489 // Locks not destroyed explicitly need to be destroyed here. 3490 DYNA_I_LOCK_FUNC(l, destroy)(l->lock); 3491 __kmp_free(l->lock); 3492 __kmp_free(l); 3493 } 3494 } 3495 // Free the table 3496 __kmp_free(__kmp_indirect_lock_table); 3497 3498 __kmp_init_user_locks = FALSE; 3499 } 3500 3501 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3502 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3503 3504 #else // KMP_USE_DYNAMIC_LOCK 3505 3506 /* ------------------------------------------------------------------------ */ 3507 /* user locks 3508 * 3509 * They are implemented as a table of function pointers which are set to the 3510 * lock functions of the appropriate kind, once that has been determined. 3511 */ 3512 3513 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3514 3515 size_t __kmp_base_user_lock_size = 0; 3516 size_t __kmp_user_lock_size = 0; 3517 3518 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 3519 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3520 3521 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3522 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3523 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3524 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 3525 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3526 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3527 3528 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3529 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 3530 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3531 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 3532 3533 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 3534 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 3535 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 3536 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 3537 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 3538 3539 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 3540 { 3541 switch ( user_lock_kind ) { 3542 case lk_default: 3543 default: 3544 KMP_ASSERT( 0 ); 3545 3546 case lk_tas: { 3547 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 3548 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 3549 3550 __kmp_get_user_lock_owner_ = 3551 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3552 ( &__kmp_get_tas_lock_owner ); 3553 3554 if ( __kmp_env_consistency_check ) { 3555 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3556 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3557 } 3558 else { 3559 KMP_BIND_USER_LOCK(tas); 3560 KMP_BIND_NESTED_USER_LOCK(tas); 3561 } 3562 3563 __kmp_destroy_user_lock_ = 3564 ( void ( * )( kmp_user_lock_p ) ) 3565 ( &__kmp_destroy_tas_lock ); 3566 3567 __kmp_is_user_lock_initialized_ = 3568 ( int ( * )( kmp_user_lock_p ) ) NULL; 3569 3570 __kmp_get_user_lock_location_ = 3571 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3572 3573 __kmp_set_user_lock_location_ = 3574 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3575 3576 __kmp_get_user_lock_flags_ = 3577 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3578 3579 __kmp_set_user_lock_flags_ = 3580 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3581 } 3582 break; 3583 3584 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3585 3586 case lk_futex: { 3587 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3588 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3589 3590 __kmp_get_user_lock_owner_ = 3591 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3592 ( &__kmp_get_futex_lock_owner ); 3593 3594 if ( __kmp_env_consistency_check ) { 3595 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3596 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3597 } 3598 else { 3599 KMP_BIND_USER_LOCK(futex); 3600 KMP_BIND_NESTED_USER_LOCK(futex); 3601 } 3602 3603 __kmp_destroy_user_lock_ = 3604 ( void ( * )( kmp_user_lock_p ) ) 3605 ( &__kmp_destroy_futex_lock ); 3606 3607 __kmp_is_user_lock_initialized_ = 3608 ( int ( * )( kmp_user_lock_p ) ) NULL; 3609 3610 __kmp_get_user_lock_location_ = 3611 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3612 3613 __kmp_set_user_lock_location_ = 3614 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3615 3616 __kmp_get_user_lock_flags_ = 3617 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3618 3619 __kmp_set_user_lock_flags_ = 3620 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3621 } 3622 break; 3623 3624 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3625 3626 case lk_ticket: { 3627 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3628 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3629 3630 __kmp_get_user_lock_owner_ = 3631 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3632 ( &__kmp_get_ticket_lock_owner ); 3633 3634 if ( __kmp_env_consistency_check ) { 3635 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3636 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3637 } 3638 else { 3639 KMP_BIND_USER_LOCK(ticket); 3640 KMP_BIND_NESTED_USER_LOCK(ticket); 3641 } 3642 3643 __kmp_destroy_user_lock_ = 3644 ( void ( * )( kmp_user_lock_p ) ) 3645 ( &__kmp_destroy_ticket_lock ); 3646 3647 __kmp_is_user_lock_initialized_ = 3648 ( int ( * )( kmp_user_lock_p ) ) 3649 ( &__kmp_is_ticket_lock_initialized ); 3650 3651 __kmp_get_user_lock_location_ = 3652 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3653 ( &__kmp_get_ticket_lock_location ); 3654 3655 __kmp_set_user_lock_location_ = 3656 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3657 ( &__kmp_set_ticket_lock_location ); 3658 3659 __kmp_get_user_lock_flags_ = 3660 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3661 ( &__kmp_get_ticket_lock_flags ); 3662 3663 __kmp_set_user_lock_flags_ = 3664 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3665 ( &__kmp_set_ticket_lock_flags ); 3666 } 3667 break; 3668 3669 case lk_queuing: { 3670 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3671 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3672 3673 __kmp_get_user_lock_owner_ = 3674 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3675 ( &__kmp_get_queuing_lock_owner ); 3676 3677 if ( __kmp_env_consistency_check ) { 3678 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3679 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3680 } 3681 else { 3682 KMP_BIND_USER_LOCK(queuing); 3683 KMP_BIND_NESTED_USER_LOCK(queuing); 3684 } 3685 3686 __kmp_destroy_user_lock_ = 3687 ( void ( * )( kmp_user_lock_p ) ) 3688 ( &__kmp_destroy_queuing_lock ); 3689 3690 __kmp_is_user_lock_initialized_ = 3691 ( int ( * )( kmp_user_lock_p ) ) 3692 ( &__kmp_is_queuing_lock_initialized ); 3693 3694 __kmp_get_user_lock_location_ = 3695 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3696 ( &__kmp_get_queuing_lock_location ); 3697 3698 __kmp_set_user_lock_location_ = 3699 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3700 ( &__kmp_set_queuing_lock_location ); 3701 3702 __kmp_get_user_lock_flags_ = 3703 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3704 ( &__kmp_get_queuing_lock_flags ); 3705 3706 __kmp_set_user_lock_flags_ = 3707 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3708 ( &__kmp_set_queuing_lock_flags ); 3709 } 3710 break; 3711 3712 #if KMP_USE_ADAPTIVE_LOCKS 3713 case lk_adaptive: { 3714 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3715 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3716 3717 __kmp_get_user_lock_owner_ = 3718 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3719 ( &__kmp_get_queuing_lock_owner ); 3720 3721 if ( __kmp_env_consistency_check ) { 3722 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3723 } 3724 else { 3725 KMP_BIND_USER_LOCK(adaptive); 3726 } 3727 3728 __kmp_destroy_user_lock_ = 3729 ( void ( * )( kmp_user_lock_p ) ) 3730 ( &__kmp_destroy_adaptive_lock ); 3731 3732 __kmp_is_user_lock_initialized_ = 3733 ( int ( * )( kmp_user_lock_p ) ) 3734 ( &__kmp_is_queuing_lock_initialized ); 3735 3736 __kmp_get_user_lock_location_ = 3737 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3738 ( &__kmp_get_queuing_lock_location ); 3739 3740 __kmp_set_user_lock_location_ = 3741 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3742 ( &__kmp_set_queuing_lock_location ); 3743 3744 __kmp_get_user_lock_flags_ = 3745 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3746 ( &__kmp_get_queuing_lock_flags ); 3747 3748 __kmp_set_user_lock_flags_ = 3749 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3750 ( &__kmp_set_queuing_lock_flags ); 3751 3752 } 3753 break; 3754 #endif // KMP_USE_ADAPTIVE_LOCKS 3755 3756 case lk_drdpa: { 3757 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3758 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3759 3760 __kmp_get_user_lock_owner_ = 3761 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3762 ( &__kmp_get_drdpa_lock_owner ); 3763 3764 if ( __kmp_env_consistency_check ) { 3765 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3766 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3767 } 3768 else { 3769 KMP_BIND_USER_LOCK(drdpa); 3770 KMP_BIND_NESTED_USER_LOCK(drdpa); 3771 } 3772 3773 __kmp_destroy_user_lock_ = 3774 ( void ( * )( kmp_user_lock_p ) ) 3775 ( &__kmp_destroy_drdpa_lock ); 3776 3777 __kmp_is_user_lock_initialized_ = 3778 ( int ( * )( kmp_user_lock_p ) ) 3779 ( &__kmp_is_drdpa_lock_initialized ); 3780 3781 __kmp_get_user_lock_location_ = 3782 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3783 ( &__kmp_get_drdpa_lock_location ); 3784 3785 __kmp_set_user_lock_location_ = 3786 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3787 ( &__kmp_set_drdpa_lock_location ); 3788 3789 __kmp_get_user_lock_flags_ = 3790 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3791 ( &__kmp_get_drdpa_lock_flags ); 3792 3793 __kmp_set_user_lock_flags_ = 3794 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3795 ( &__kmp_set_drdpa_lock_flags ); 3796 } 3797 break; 3798 } 3799 } 3800 3801 3802 // ---------------------------------------------------------------------------- 3803 // User lock table & lock allocation 3804 3805 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3806 kmp_user_lock_p __kmp_lock_pool = NULL; 3807 3808 // Lock block-allocation support. 3809 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3810 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3811 3812 static kmp_lock_index_t 3813 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3814 { 3815 // Assume that kmp_global_lock is held upon entry/exit. 3816 kmp_lock_index_t index; 3817 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3818 kmp_lock_index_t size; 3819 kmp_user_lock_p *table; 3820 kmp_lock_index_t i; 3821 // Reallocate lock table. 3822 if ( __kmp_user_lock_table.allocated == 0 ) { 3823 size = 1024; 3824 } 3825 else { 3826 size = __kmp_user_lock_table.allocated * 2; 3827 } 3828 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3829 memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3830 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3831 // We cannot free the previos table now, sinse it may be in use by other 3832 // threads. So save the pointer to the previous table in in the first element of the 3833 // new table. All the tables will be organized into a list, and could be freed when 3834 // library shutting down. 3835 __kmp_user_lock_table.table = table; 3836 __kmp_user_lock_table.allocated = size; 3837 } 3838 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3839 index = __kmp_user_lock_table.used; 3840 __kmp_user_lock_table.table[ index ] = lck; 3841 ++ __kmp_user_lock_table.used; 3842 return index; 3843 } 3844 3845 static kmp_user_lock_p 3846 __kmp_lock_block_allocate() 3847 { 3848 // Assume that kmp_global_lock is held upon entry/exit. 3849 static int last_index = 0; 3850 if ( ( last_index >= __kmp_num_locks_in_block ) 3851 || ( __kmp_lock_blocks == NULL ) ) { 3852 // Restart the index. 3853 last_index = 0; 3854 // Need to allocate a new block. 3855 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3856 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3857 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3858 // Set up the new block. 3859 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3860 new_block->next_block = __kmp_lock_blocks; 3861 new_block->locks = (void *)buffer; 3862 // Publish the new block. 3863 KMP_MB(); 3864 __kmp_lock_blocks = new_block; 3865 } 3866 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3867 [ last_index * __kmp_user_lock_size ] ) ); 3868 last_index++; 3869 return ret; 3870 } 3871 3872 // 3873 // Get memory for a lock. It may be freshly allocated memory or reused memory 3874 // from lock pool. 3875 // 3876 kmp_user_lock_p 3877 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3878 kmp_lock_flags_t flags ) 3879 { 3880 kmp_user_lock_p lck; 3881 kmp_lock_index_t index; 3882 KMP_DEBUG_ASSERT( user_lock ); 3883 3884 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3885 3886 if ( __kmp_lock_pool == NULL ) { 3887 // Lock pool is empty. Allocate new memory. 3888 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3889 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3890 } 3891 else { 3892 lck = __kmp_lock_block_allocate(); 3893 } 3894 3895 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3896 // and debugger has info on all allocated locks. 3897 index = __kmp_lock_table_insert( lck ); 3898 } 3899 else { 3900 // Pick up lock from pool. 3901 lck = __kmp_lock_pool; 3902 index = __kmp_lock_pool->pool.index; 3903 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3904 } 3905 3906 // 3907 // We could potentially differentiate between nested and regular locks 3908 // here, and do the lock table lookup for regular locks only. 3909 // 3910 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3911 * ( (kmp_lock_index_t *) user_lock ) = index; 3912 } 3913 else { 3914 * ( (kmp_user_lock_p *) user_lock ) = lck; 3915 } 3916 3917 // mark the lock if it is critical section lock. 3918 __kmp_set_user_lock_flags( lck, flags ); 3919 3920 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 3921 3922 return lck; 3923 } 3924 3925 // Put lock's memory to pool for reusing. 3926 void 3927 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 3928 { 3929 kmp_lock_pool_t * lock_pool; 3930 3931 KMP_DEBUG_ASSERT( user_lock != NULL ); 3932 KMP_DEBUG_ASSERT( lck != NULL ); 3933 3934 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 3935 3936 lck->pool.next = __kmp_lock_pool; 3937 __kmp_lock_pool = lck; 3938 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3939 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 3940 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 3941 lck->pool.index = index; 3942 } 3943 3944 __kmp_release_lock( & __kmp_global_lock, gtid ); 3945 } 3946 3947 kmp_user_lock_p 3948 __kmp_lookup_user_lock( void **user_lock, char const *func ) 3949 { 3950 kmp_user_lock_p lck = NULL; 3951 3952 if ( __kmp_env_consistency_check ) { 3953 if ( user_lock == NULL ) { 3954 KMP_FATAL( LockIsUninitialized, func ); 3955 } 3956 } 3957 3958 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3959 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 3960 if ( __kmp_env_consistency_check ) { 3961 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 3962 KMP_FATAL( LockIsUninitialized, func ); 3963 } 3964 } 3965 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 3966 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3967 lck = __kmp_user_lock_table.table[index]; 3968 } 3969 else { 3970 lck = *( (kmp_user_lock_p *)user_lock ); 3971 } 3972 3973 if ( __kmp_env_consistency_check ) { 3974 if ( lck == NULL ) { 3975 KMP_FATAL( LockIsUninitialized, func ); 3976 } 3977 } 3978 3979 return lck; 3980 } 3981 3982 void 3983 __kmp_cleanup_user_locks( void ) 3984 { 3985 // 3986 // Reset lock pool. Do not worry about lock in the pool -- we will free 3987 // them when iterating through lock table (it includes all the locks, 3988 // dead or alive). 3989 // 3990 __kmp_lock_pool = NULL; 3991 3992 #define IS_CRITICAL(lck) \ 3993 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 3994 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 3995 3996 // 3997 // Loop through lock table, free all locks. 3998 // 3999 // Do not free item [0], it is reserved for lock tables list. 4000 // 4001 // FIXME - we are iterating through a list of (pointers to) objects of 4002 // type union kmp_user_lock, but we have no way of knowing whether the 4003 // base type is currently "pool" or whatever the global user lock type 4004 // is. 4005 // 4006 // We are relying on the fact that for all of the user lock types 4007 // (except "tas"), the first field in the lock struct is the "initialized" 4008 // field, which is set to the address of the lock object itself when 4009 // the lock is initialized. When the union is of type "pool", the 4010 // first field is a pointer to the next object in the free list, which 4011 // will not be the same address as the object itself. 4012 // 4013 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 4014 // will fail for "pool" objects on the free list. This must happen as 4015 // the "location" field of real user locks overlaps the "index" field 4016 // of "pool" objects. 4017 // 4018 // It would be better to run through the free list, and remove all "pool" 4019 // objects from the lock table before executing this loop. However, 4020 // "pool" objects do not always have their index field set (only on 4021 // lin_32e), and I don't want to search the lock table for the address 4022 // of every "pool" object on the free list. 4023 // 4024 while ( __kmp_user_lock_table.used > 1 ) { 4025 const ident *loc; 4026 4027 // 4028 // reduce __kmp_user_lock_table.used before freeing the lock, 4029 // so that state of locks is consistent 4030 // 4031 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 4032 --__kmp_user_lock_table.used ]; 4033 4034 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 4035 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 4036 // 4037 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 4038 // initialized AND it is NOT a critical section (user is not 4039 // responsible for destroying criticals) AND we know source 4040 // location to report. 4041 // 4042 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 4043 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 4044 ( loc->psource != NULL ) ) { 4045 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 4046 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); 4047 __kmp_str_loc_free( &str_loc); 4048 } 4049 4050 #ifdef KMP_DEBUG 4051 if ( IS_CRITICAL( lck ) ) { 4052 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 4053 } 4054 else { 4055 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 4056 } 4057 #endif // KMP_DEBUG 4058 4059 // 4060 // Cleanup internal lock dynamic resources 4061 // (for drdpa locks particularly). 4062 // 4063 __kmp_destroy_user_lock( lck ); 4064 } 4065 4066 // 4067 // Free the lock if block allocation of locks is not used. 4068 // 4069 if ( __kmp_lock_blocks == NULL ) { 4070 __kmp_free( lck ); 4071 } 4072 } 4073 4074 #undef IS_CRITICAL 4075 4076 // 4077 // delete lock table(s). 4078 // 4079 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 4080 __kmp_user_lock_table.table = NULL; 4081 __kmp_user_lock_table.allocated = 0; 4082 4083 while ( table_ptr != NULL ) { 4084 // 4085 // In the first element we saved the pointer to the previous 4086 // (smaller) lock table. 4087 // 4088 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 4089 __kmp_free( table_ptr ); 4090 table_ptr = next; 4091 } 4092 4093 // 4094 // Free buffers allocated for blocks of locks. 4095 // 4096 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 4097 __kmp_lock_blocks = NULL; 4098 4099 while ( block_ptr != NULL ) { 4100 kmp_block_of_locks_t *next = block_ptr->next_block; 4101 __kmp_free( block_ptr->locks ); 4102 // 4103 // *block_ptr itself was allocated at the end of the locks vector. 4104 // 4105 block_ptr = next; 4106 } 4107 4108 TCW_4(__kmp_init_user_locks, FALSE); 4109 } 4110 4111 #endif // KMP_USE_DYNAMIC_LOCK 4112