1 /* 2 * kmp_lock.cpp -- lock-related functions 3 * $Revision: 43389 $ 4 * $Date: 2014-08-11 10:54:01 -0500 (Mon, 11 Aug 2014) $ 5 */ 6 7 8 //===----------------------------------------------------------------------===// 9 // 10 // The LLVM Compiler Infrastructure 11 // 12 // This file is dual licensed under the MIT and the University of Illinois Open 13 // Source Licenses. See LICENSE.txt for details. 14 // 15 //===----------------------------------------------------------------------===// 16 17 18 #include <stddef.h> 19 20 #include "kmp.h" 21 #include "kmp_itt.h" 22 #include "kmp_i18n.h" 23 #include "kmp_lock.h" 24 #include "kmp_io.h" 25 26 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 27 # include <unistd.h> 28 # include <sys/syscall.h> 29 // We should really include <futex.h>, but that causes compatibility problems on different 30 // Linux* OS distributions that either require that you include (or break when you try to include) 31 // <pci/types.h>. 32 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change) 33 // we just define the constants here and don't include <futex.h> 34 # ifndef FUTEX_WAIT 35 # define FUTEX_WAIT 0 36 # endif 37 # ifndef FUTEX_WAKE 38 # define FUTEX_WAKE 1 39 # endif 40 #endif 41 42 /* Implement spin locks for internal library use. */ 43 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 44 45 void 46 __kmp_validate_locks( void ) 47 { 48 int i; 49 kmp_uint32 x, y; 50 51 /* Check to make sure unsigned arithmetic does wraps properly */ 52 x = ~((kmp_uint32) 0) - 2; 53 y = x - 2; 54 55 for (i = 0; i < 8; ++i, ++x, ++y) { 56 kmp_uint32 z = (x - y); 57 KMP_ASSERT( z == 2 ); 58 } 59 60 KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); 61 } 62 63 64 /* ------------------------------------------------------------------------ */ 65 /* test and set locks */ 66 67 // 68 // For the non-nested locks, we can only assume that the first 4 bytes were 69 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 70 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 71 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 72 // 73 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 74 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 75 // 76 77 static kmp_int32 78 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) 79 { 80 return TCR_4( lck->lk.poll ) - 1; 81 } 82 83 static inline bool 84 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) 85 { 86 return lck->lk.depth_locked != -1; 87 } 88 89 __forceinline static void 90 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) 91 { 92 KMP_MB(); 93 94 #ifdef USE_LOCK_PROFILE 95 kmp_uint32 curr = TCR_4( lck->lk.poll ); 96 if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) 97 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 98 /* else __kmp_printf( "." );*/ 99 #endif /* USE_LOCK_PROFILE */ 100 101 if ( ( lck->lk.poll == 0 ) 102 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) { 103 KMP_FSYNC_ACQUIRED(lck); 104 return; 105 } 106 107 kmp_uint32 spins; 108 KMP_FSYNC_PREPARE( lck ); 109 KMP_INIT_YIELD( spins ); 110 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 111 __kmp_xproc ) ) { 112 KMP_YIELD( TRUE ); 113 } 114 else { 115 KMP_YIELD_SPIN( spins ); 116 } 117 118 while ( ( lck->lk.poll != 0 ) || 119 ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) { 120 // 121 // FIXME - use exponential backoff here 122 // 123 if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 124 __kmp_xproc ) ) { 125 KMP_YIELD( TRUE ); 126 } 127 else { 128 KMP_YIELD_SPIN( spins ); 129 } 130 } 131 KMP_FSYNC_ACQUIRED( lck ); 132 } 133 134 void 135 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 136 { 137 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 138 } 139 140 static void 141 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 142 { 143 char const * const func = "omp_set_lock"; 144 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 145 && __kmp_is_tas_lock_nestable( lck ) ) { 146 KMP_FATAL( LockNestableUsedAsSimple, func ); 147 } 148 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { 149 KMP_FATAL( LockIsAlreadyOwned, func ); 150 } 151 __kmp_acquire_tas_lock( lck, gtid ); 152 } 153 154 int 155 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 156 { 157 if ( ( lck->lk.poll == 0 ) 158 && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) { 159 KMP_FSYNC_ACQUIRED( lck ); 160 return TRUE; 161 } 162 return FALSE; 163 } 164 165 static int 166 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 167 { 168 char const * const func = "omp_test_lock"; 169 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 170 && __kmp_is_tas_lock_nestable( lck ) ) { 171 KMP_FATAL( LockNestableUsedAsSimple, func ); 172 } 173 return __kmp_test_tas_lock( lck, gtid ); 174 } 175 176 void 177 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 178 { 179 KMP_MB(); /* Flush all pending memory write invalidates. */ 180 181 KMP_FSYNC_RELEASING(lck); 182 KMP_ST_REL32( &(lck->lk.poll), 0 ); 183 184 KMP_MB(); /* Flush all pending memory write invalidates. */ 185 186 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 187 __kmp_xproc ) ); 188 } 189 190 static void 191 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 192 { 193 char const * const func = "omp_unset_lock"; 194 KMP_MB(); /* in case another processor initialized lock */ 195 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 196 && __kmp_is_tas_lock_nestable( lck ) ) { 197 KMP_FATAL( LockNestableUsedAsSimple, func ); 198 } 199 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 200 KMP_FATAL( LockUnsettingFree, func ); 201 } 202 if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) 203 && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { 204 KMP_FATAL( LockUnsettingSetByAnother, func ); 205 } 206 __kmp_release_tas_lock( lck, gtid ); 207 } 208 209 void 210 __kmp_init_tas_lock( kmp_tas_lock_t * lck ) 211 { 212 TCW_4( lck->lk.poll, 0 ); 213 } 214 215 static void 216 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) 217 { 218 __kmp_init_tas_lock( lck ); 219 } 220 221 void 222 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) 223 { 224 lck->lk.poll = 0; 225 } 226 227 static void 228 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) 229 { 230 char const * const func = "omp_destroy_lock"; 231 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 232 && __kmp_is_tas_lock_nestable( lck ) ) { 233 KMP_FATAL( LockNestableUsedAsSimple, func ); 234 } 235 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 236 KMP_FATAL( LockStillOwned, func ); 237 } 238 __kmp_destroy_tas_lock( lck ); 239 } 240 241 242 // 243 // nested test and set locks 244 // 245 246 void 247 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 248 { 249 KMP_DEBUG_ASSERT( gtid >= 0 ); 250 251 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 252 lck->lk.depth_locked += 1; 253 } 254 else { 255 __kmp_acquire_tas_lock_timed_template( lck, gtid ); 256 lck->lk.depth_locked = 1; 257 } 258 } 259 260 static void 261 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 262 { 263 char const * const func = "omp_set_nest_lock"; 264 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 265 KMP_FATAL( LockSimpleUsedAsNestable, func ); 266 } 267 __kmp_acquire_nested_tas_lock( lck, gtid ); 268 } 269 270 int 271 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 272 { 273 int retval; 274 275 KMP_DEBUG_ASSERT( gtid >= 0 ); 276 277 if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { 278 retval = ++lck->lk.depth_locked; 279 } 280 else if ( !__kmp_test_tas_lock( lck, gtid ) ) { 281 retval = 0; 282 } 283 else { 284 KMP_MB(); 285 retval = lck->lk.depth_locked = 1; 286 } 287 return retval; 288 } 289 290 static int 291 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 292 { 293 char const * const func = "omp_test_nest_lock"; 294 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 295 KMP_FATAL( LockSimpleUsedAsNestable, func ); 296 } 297 return __kmp_test_nested_tas_lock( lck, gtid ); 298 } 299 300 void 301 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) 302 { 303 KMP_DEBUG_ASSERT( gtid >= 0 ); 304 305 KMP_MB(); 306 if ( --(lck->lk.depth_locked) == 0 ) { 307 __kmp_release_tas_lock( lck, gtid ); 308 } 309 } 310 311 static void 312 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) 313 { 314 char const * const func = "omp_unset_nest_lock"; 315 KMP_MB(); /* in case another processor initialized lock */ 316 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 317 KMP_FATAL( LockSimpleUsedAsNestable, func ); 318 } 319 if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { 320 KMP_FATAL( LockUnsettingFree, func ); 321 } 322 if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { 323 KMP_FATAL( LockUnsettingSetByAnother, func ); 324 } 325 __kmp_release_nested_tas_lock( lck, gtid ); 326 } 327 328 void 329 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) 330 { 331 __kmp_init_tas_lock( lck ); 332 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 333 } 334 335 static void 336 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) 337 { 338 __kmp_init_nested_tas_lock( lck ); 339 } 340 341 void 342 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) 343 { 344 __kmp_destroy_tas_lock( lck ); 345 lck->lk.depth_locked = 0; 346 } 347 348 static void 349 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) 350 { 351 char const * const func = "omp_destroy_nest_lock"; 352 if ( ! __kmp_is_tas_lock_nestable( lck ) ) { 353 KMP_FATAL( LockSimpleUsedAsNestable, func ); 354 } 355 if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { 356 KMP_FATAL( LockStillOwned, func ); 357 } 358 __kmp_destroy_nested_tas_lock( lck ); 359 } 360 361 362 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 363 364 /* ------------------------------------------------------------------------ */ 365 /* futex locks */ 366 367 // futex locks are really just test and set locks, with a different method 368 // of handling contention. They take the same amount of space as test and 369 // set locks, and are allocated the same way (i.e. use the area allocated by 370 // the compiler for non-nested locks / allocate nested locks on the heap). 371 372 static kmp_int32 373 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) 374 { 375 return ( TCR_4( lck->lk.poll ) >> 1 ) - 1; 376 } 377 378 static inline bool 379 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) 380 { 381 return lck->lk.depth_locked != -1; 382 } 383 384 __forceinline static void 385 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) 386 { 387 kmp_int32 gtid_code = ( gtid + 1 ) << 1; 388 389 KMP_MB(); 390 391 #ifdef USE_LOCK_PROFILE 392 kmp_uint32 curr = TCR_4( lck->lk.poll ); 393 if ( ( curr != 0 ) && ( curr != gtid_code ) ) 394 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 395 /* else __kmp_printf( "." );*/ 396 #endif /* USE_LOCK_PROFILE */ 397 398 KMP_FSYNC_PREPARE( lck ); 399 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 400 lck, lck->lk.poll, gtid ) ); 401 402 kmp_int32 poll_val; 403 while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0, 404 gtid_code ) ) != 0 ) { 405 kmp_int32 cond = poll_val & 1; 406 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 407 lck, gtid, poll_val, cond ) ); 408 409 // 410 // NOTE: if you try to use the following condition for this branch 411 // 412 // if ( poll_val & 1 == 0 ) 413 // 414 // Then the 12.0 compiler has a bug where the following block will 415 // always be skipped, regardless of the value of the LSB of poll_val. 416 // 417 if ( ! cond ) { 418 // 419 // Try to set the lsb in the poll to indicate to the owner 420 // thread that they need to wake this thread up. 421 // 422 if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), 423 poll_val, poll_val | 1 ) ) { 424 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 425 lck, lck->lk.poll, gtid ) ); 426 continue; 427 } 428 poll_val |= 1; 429 430 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", 431 lck, lck->lk.poll, gtid ) ); 432 } 433 434 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 435 lck, gtid, poll_val ) ); 436 437 kmp_int32 rc; 438 if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, 439 poll_val, NULL, NULL, 0 ) ) != 0 ) { 440 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", 441 lck, gtid, poll_val, rc, errno ) ); 442 continue; 443 } 444 445 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 446 lck, gtid, poll_val ) ); 447 // 448 // This thread has now done a successful futex wait call and was 449 // entered on the OS futex queue. We must now perform a futex 450 // wake call when releasing the lock, as we have no idea how many 451 // other threads are in the queue. 452 // 453 gtid_code |= 1; 454 } 455 456 KMP_FSYNC_ACQUIRED( lck ); 457 KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", 458 lck, lck->lk.poll, gtid ) ); 459 } 460 461 void 462 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 463 { 464 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 465 } 466 467 static void 468 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 469 { 470 char const * const func = "omp_set_lock"; 471 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 472 && __kmp_is_futex_lock_nestable( lck ) ) { 473 KMP_FATAL( LockNestableUsedAsSimple, func ); 474 } 475 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { 476 KMP_FATAL( LockIsAlreadyOwned, func ); 477 } 478 __kmp_acquire_futex_lock( lck, gtid ); 479 } 480 481 int 482 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 483 { 484 if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) { 485 KMP_FSYNC_ACQUIRED( lck ); 486 return TRUE; 487 } 488 return FALSE; 489 } 490 491 static int 492 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 493 { 494 char const * const func = "omp_test_lock"; 495 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 496 && __kmp_is_futex_lock_nestable( lck ) ) { 497 KMP_FATAL( LockNestableUsedAsSimple, func ); 498 } 499 return __kmp_test_futex_lock( lck, gtid ); 500 } 501 502 void 503 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 504 { 505 KMP_MB(); /* Flush all pending memory write invalidates. */ 506 507 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 508 lck, lck->lk.poll, gtid ) ); 509 510 KMP_FSYNC_RELEASING(lck); 511 512 kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 ); 513 514 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 515 lck, gtid, poll_val ) ); 516 517 if ( poll_val & 1 ) { 518 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 519 lck, gtid ) ); 520 syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 ); 521 } 522 523 KMP_MB(); /* Flush all pending memory write invalidates. */ 524 525 KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", 526 lck, lck->lk.poll, gtid ) ); 527 528 KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : 529 __kmp_xproc ) ); 530 } 531 532 static void 533 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 534 { 535 char const * const func = "omp_unset_lock"; 536 KMP_MB(); /* in case another processor initialized lock */ 537 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 538 && __kmp_is_futex_lock_nestable( lck ) ) { 539 KMP_FATAL( LockNestableUsedAsSimple, func ); 540 } 541 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 542 KMP_FATAL( LockUnsettingFree, func ); 543 } 544 if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) 545 && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { 546 KMP_FATAL( LockUnsettingSetByAnother, func ); 547 } 548 __kmp_release_futex_lock( lck, gtid ); 549 } 550 551 void 552 __kmp_init_futex_lock( kmp_futex_lock_t * lck ) 553 { 554 TCW_4( lck->lk.poll, 0 ); 555 } 556 557 static void 558 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) 559 { 560 __kmp_init_futex_lock( lck ); 561 } 562 563 void 564 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) 565 { 566 lck->lk.poll = 0; 567 } 568 569 static void 570 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) 571 { 572 char const * const func = "omp_destroy_lock"; 573 if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) 574 && __kmp_is_futex_lock_nestable( lck ) ) { 575 KMP_FATAL( LockNestableUsedAsSimple, func ); 576 } 577 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 578 KMP_FATAL( LockStillOwned, func ); 579 } 580 __kmp_destroy_futex_lock( lck ); 581 } 582 583 584 // 585 // nested futex locks 586 // 587 588 void 589 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 590 { 591 KMP_DEBUG_ASSERT( gtid >= 0 ); 592 593 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 594 lck->lk.depth_locked += 1; 595 } 596 else { 597 __kmp_acquire_futex_lock_timed_template( lck, gtid ); 598 lck->lk.depth_locked = 1; 599 } 600 } 601 602 static void 603 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 604 { 605 char const * const func = "omp_set_nest_lock"; 606 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 607 KMP_FATAL( LockSimpleUsedAsNestable, func ); 608 } 609 __kmp_acquire_nested_futex_lock( lck, gtid ); 610 } 611 612 int 613 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 614 { 615 int retval; 616 617 KMP_DEBUG_ASSERT( gtid >= 0 ); 618 619 if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { 620 retval = ++lck->lk.depth_locked; 621 } 622 else if ( !__kmp_test_futex_lock( lck, gtid ) ) { 623 retval = 0; 624 } 625 else { 626 KMP_MB(); 627 retval = lck->lk.depth_locked = 1; 628 } 629 return retval; 630 } 631 632 static int 633 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 634 { 635 char const * const func = "omp_test_nest_lock"; 636 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 637 KMP_FATAL( LockSimpleUsedAsNestable, func ); 638 } 639 return __kmp_test_nested_futex_lock( lck, gtid ); 640 } 641 642 void 643 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) 644 { 645 KMP_DEBUG_ASSERT( gtid >= 0 ); 646 647 KMP_MB(); 648 if ( --(lck->lk.depth_locked) == 0 ) { 649 __kmp_release_futex_lock( lck, gtid ); 650 } 651 } 652 653 static void 654 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) 655 { 656 char const * const func = "omp_unset_nest_lock"; 657 KMP_MB(); /* in case another processor initialized lock */ 658 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 659 KMP_FATAL( LockSimpleUsedAsNestable, func ); 660 } 661 if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { 662 KMP_FATAL( LockUnsettingFree, func ); 663 } 664 if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { 665 KMP_FATAL( LockUnsettingSetByAnother, func ); 666 } 667 __kmp_release_nested_futex_lock( lck, gtid ); 668 } 669 670 void 671 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) 672 { 673 __kmp_init_futex_lock( lck ); 674 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 675 } 676 677 static void 678 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) 679 { 680 __kmp_init_nested_futex_lock( lck ); 681 } 682 683 void 684 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) 685 { 686 __kmp_destroy_futex_lock( lck ); 687 lck->lk.depth_locked = 0; 688 } 689 690 static void 691 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) 692 { 693 char const * const func = "omp_destroy_nest_lock"; 694 if ( ! __kmp_is_futex_lock_nestable( lck ) ) { 695 KMP_FATAL( LockSimpleUsedAsNestable, func ); 696 } 697 if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { 698 KMP_FATAL( LockStillOwned, func ); 699 } 700 __kmp_destroy_nested_futex_lock( lck ); 701 } 702 703 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 704 705 706 /* ------------------------------------------------------------------------ */ 707 /* ticket (bakery) locks */ 708 709 static kmp_int32 710 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) 711 { 712 return TCR_4( lck->lk.owner_id ) - 1; 713 } 714 715 static inline bool 716 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) 717 { 718 return lck->lk.depth_locked != -1; 719 } 720 721 static kmp_uint32 722 __kmp_bakery_check(kmp_uint value, kmp_uint checker) 723 { 724 register kmp_uint32 pause; 725 726 if (value == checker) { 727 return TRUE; 728 } 729 for (pause = checker - value; pause != 0; --pause); 730 return FALSE; 731 } 732 733 __forceinline static void 734 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 735 { 736 kmp_uint32 my_ticket; 737 KMP_MB(); 738 739 my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); 740 741 #ifdef USE_LOCK_PROFILE 742 if ( TCR_4( lck->lk.now_serving ) != my_ticket ) 743 __kmp_printf( "LOCK CONTENTION: %p\n", lck ); 744 /* else __kmp_printf( "." );*/ 745 #endif /* USE_LOCK_PROFILE */ 746 747 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 748 KMP_FSYNC_ACQUIRED(lck); 749 return; 750 } 751 KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); 752 KMP_FSYNC_ACQUIRED(lck); 753 } 754 755 void 756 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 757 { 758 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 759 } 760 761 static void 762 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 763 { 764 char const * const func = "omp_set_lock"; 765 if ( lck->lk.initialized != lck ) { 766 KMP_FATAL( LockIsUninitialized, func ); 767 } 768 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 769 KMP_FATAL( LockNestableUsedAsSimple, func ); 770 } 771 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { 772 KMP_FATAL( LockIsAlreadyOwned, func ); 773 } 774 775 __kmp_acquire_ticket_lock( lck, gtid ); 776 777 lck->lk.owner_id = gtid + 1; 778 } 779 780 int 781 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 782 { 783 kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); 784 if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { 785 kmp_uint32 next_ticket = my_ticket + 1; 786 if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, 787 my_ticket, next_ticket ) ) { 788 KMP_FSYNC_ACQUIRED( lck ); 789 return TRUE; 790 } 791 } 792 return FALSE; 793 } 794 795 static int 796 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 797 { 798 char const * const func = "omp_test_lock"; 799 if ( lck->lk.initialized != lck ) { 800 KMP_FATAL( LockIsUninitialized, func ); 801 } 802 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 803 KMP_FATAL( LockNestableUsedAsSimple, func ); 804 } 805 806 int retval = __kmp_test_ticket_lock( lck, gtid ); 807 808 if ( retval ) { 809 lck->lk.owner_id = gtid + 1; 810 } 811 return retval; 812 } 813 814 void 815 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 816 { 817 kmp_uint32 distance; 818 819 KMP_MB(); /* Flush all pending memory write invalidates. */ 820 821 KMP_FSYNC_RELEASING(lck); 822 distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); 823 824 KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); 825 826 KMP_MB(); /* Flush all pending memory write invalidates. */ 827 828 KMP_YIELD( distance 829 > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); 830 } 831 832 static void 833 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 834 { 835 char const * const func = "omp_unset_lock"; 836 KMP_MB(); /* in case another processor initialized lock */ 837 if ( lck->lk.initialized != lck ) { 838 KMP_FATAL( LockIsUninitialized, func ); 839 } 840 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 841 KMP_FATAL( LockNestableUsedAsSimple, func ); 842 } 843 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 844 KMP_FATAL( LockUnsettingFree, func ); 845 } 846 if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) 847 && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { 848 KMP_FATAL( LockUnsettingSetByAnother, func ); 849 } 850 lck->lk.owner_id = 0; 851 __kmp_release_ticket_lock( lck, gtid ); 852 } 853 854 void 855 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) 856 { 857 lck->lk.location = NULL; 858 TCW_4( lck->lk.next_ticket, 0 ); 859 TCW_4( lck->lk.now_serving, 0 ); 860 lck->lk.owner_id = 0; // no thread owns the lock. 861 lck->lk.depth_locked = -1; // -1 => not a nested lock. 862 lck->lk.initialized = (kmp_ticket_lock *)lck; 863 } 864 865 static void 866 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 867 { 868 __kmp_init_ticket_lock( lck ); 869 } 870 871 void 872 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) 873 { 874 lck->lk.initialized = NULL; 875 lck->lk.location = NULL; 876 lck->lk.next_ticket = 0; 877 lck->lk.now_serving = 0; 878 lck->lk.owner_id = 0; 879 lck->lk.depth_locked = -1; 880 } 881 882 static void 883 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 884 { 885 char const * const func = "omp_destroy_lock"; 886 if ( lck->lk.initialized != lck ) { 887 KMP_FATAL( LockIsUninitialized, func ); 888 } 889 if ( __kmp_is_ticket_lock_nestable( lck ) ) { 890 KMP_FATAL( LockNestableUsedAsSimple, func ); 891 } 892 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 893 KMP_FATAL( LockStillOwned, func ); 894 } 895 __kmp_destroy_ticket_lock( lck ); 896 } 897 898 899 // 900 // nested ticket locks 901 // 902 903 void 904 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 905 { 906 KMP_DEBUG_ASSERT( gtid >= 0 ); 907 908 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 909 lck->lk.depth_locked += 1; 910 } 911 else { 912 __kmp_acquire_ticket_lock_timed_template( lck, gtid ); 913 KMP_MB(); 914 lck->lk.depth_locked = 1; 915 KMP_MB(); 916 lck->lk.owner_id = gtid + 1; 917 } 918 } 919 920 static void 921 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 922 { 923 char const * const func = "omp_set_nest_lock"; 924 if ( lck->lk.initialized != lck ) { 925 KMP_FATAL( LockIsUninitialized, func ); 926 } 927 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 928 KMP_FATAL( LockSimpleUsedAsNestable, func ); 929 } 930 __kmp_acquire_nested_ticket_lock( lck, gtid ); 931 } 932 933 int 934 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 935 { 936 int retval; 937 938 KMP_DEBUG_ASSERT( gtid >= 0 ); 939 940 if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { 941 retval = ++lck->lk.depth_locked; 942 } 943 else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { 944 retval = 0; 945 } 946 else { 947 KMP_MB(); 948 retval = lck->lk.depth_locked = 1; 949 KMP_MB(); 950 lck->lk.owner_id = gtid + 1; 951 } 952 return retval; 953 } 954 955 static int 956 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, 957 kmp_int32 gtid ) 958 { 959 char const * const func = "omp_test_nest_lock"; 960 if ( lck->lk.initialized != lck ) { 961 KMP_FATAL( LockIsUninitialized, func ); 962 } 963 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 964 KMP_FATAL( LockSimpleUsedAsNestable, func ); 965 } 966 return __kmp_test_nested_ticket_lock( lck, gtid ); 967 } 968 969 void 970 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 971 { 972 KMP_DEBUG_ASSERT( gtid >= 0 ); 973 974 KMP_MB(); 975 if ( --(lck->lk.depth_locked) == 0 ) { 976 KMP_MB(); 977 lck->lk.owner_id = 0; 978 __kmp_release_ticket_lock( lck, gtid ); 979 } 980 } 981 982 static void 983 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) 984 { 985 char const * const func = "omp_unset_nest_lock"; 986 KMP_MB(); /* in case another processor initialized lock */ 987 if ( lck->lk.initialized != lck ) { 988 KMP_FATAL( LockIsUninitialized, func ); 989 } 990 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 991 KMP_FATAL( LockSimpleUsedAsNestable, func ); 992 } 993 if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { 994 KMP_FATAL( LockUnsettingFree, func ); 995 } 996 if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { 997 KMP_FATAL( LockUnsettingSetByAnother, func ); 998 } 999 __kmp_release_nested_ticket_lock( lck, gtid ); 1000 } 1001 1002 void 1003 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) 1004 { 1005 __kmp_init_ticket_lock( lck ); 1006 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1007 } 1008 1009 static void 1010 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) 1011 { 1012 __kmp_init_nested_ticket_lock( lck ); 1013 } 1014 1015 void 1016 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) 1017 { 1018 __kmp_destroy_ticket_lock( lck ); 1019 lck->lk.depth_locked = 0; 1020 } 1021 1022 static void 1023 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) 1024 { 1025 char const * const func = "omp_destroy_nest_lock"; 1026 if ( lck->lk.initialized != lck ) { 1027 KMP_FATAL( LockIsUninitialized, func ); 1028 } 1029 if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { 1030 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1031 } 1032 if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { 1033 KMP_FATAL( LockStillOwned, func ); 1034 } 1035 __kmp_destroy_nested_ticket_lock( lck ); 1036 } 1037 1038 1039 // 1040 // access functions to fields which don't exist for all lock kinds. 1041 // 1042 1043 static int 1044 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) 1045 { 1046 return lck == lck->lk.initialized; 1047 } 1048 1049 static const ident_t * 1050 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) 1051 { 1052 return lck->lk.location; 1053 } 1054 1055 static void 1056 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) 1057 { 1058 lck->lk.location = loc; 1059 } 1060 1061 static kmp_lock_flags_t 1062 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) 1063 { 1064 return lck->lk.flags; 1065 } 1066 1067 static void 1068 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) 1069 { 1070 lck->lk.flags = flags; 1071 } 1072 1073 /* ------------------------------------------------------------------------ */ 1074 /* queuing locks */ 1075 1076 /* 1077 * First the states 1078 * (head,tail) = 0, 0 means lock is unheld, nobody on queue 1079 * UINT_MAX or -1, 0 means lock is held, nobody on queue 1080 * h, h means lock is held or about to transition, 1 element on queue 1081 * h, t h <> t, means lock is held or about to transition, >1 elements on queue 1082 * 1083 * Now the transitions 1084 * Acquire(0,0) = -1 ,0 1085 * Release(0,0) = Error 1086 * Acquire(-1,0) = h ,h h > 0 1087 * Release(-1,0) = 0 ,0 1088 * Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1089 * Release(h,h) = -1 ,0 h > 0 1090 * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1091 * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1092 * 1093 * And pictorially 1094 * 1095 * 1096 * +-----+ 1097 * | 0, 0|------- release -------> Error 1098 * +-----+ 1099 * | ^ 1100 * acquire| |release 1101 * | | 1102 * | | 1103 * v | 1104 * +-----+ 1105 * |-1, 0| 1106 * +-----+ 1107 * | ^ 1108 * acquire| |release 1109 * | | 1110 * | | 1111 * v | 1112 * +-----+ 1113 * | h, h| 1114 * +-----+ 1115 * | ^ 1116 * acquire| |release 1117 * | | 1118 * | | 1119 * v | 1120 * +-----+ 1121 * | h, t|----- acquire, release loopback ---+ 1122 * +-----+ | 1123 * ^ | 1124 * | | 1125 * +------------------------------------+ 1126 * 1127 */ 1128 1129 #ifdef DEBUG_QUEUING_LOCKS 1130 1131 /* Stuff for circular trace buffer */ 1132 #define TRACE_BUF_ELE 1024 1133 static char traces[TRACE_BUF_ELE][128] = { 0 } 1134 static int tc = 0; 1135 #define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y ); 1136 #define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z ); 1137 #define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q ); 1138 1139 static void 1140 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, 1141 kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) 1142 { 1143 kmp_int32 t, i; 1144 1145 __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); 1146 1147 i = tc % TRACE_BUF_ELE; 1148 __kmp_printf_no_lock( "%s\n", traces[i] ); 1149 i = (i+1) % TRACE_BUF_ELE; 1150 while ( i != (tc % TRACE_BUF_ELE) ) { 1151 __kmp_printf_no_lock( "%s", traces[i] ); 1152 i = (i+1) % TRACE_BUF_ELE; 1153 } 1154 __kmp_printf_no_lock( "\n" ); 1155 1156 __kmp_printf_no_lock( 1157 "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", 1158 gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, 1159 head_id, tail_id ); 1160 1161 __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); 1162 1163 if ( lck->lk.head_id >= 1 ) { 1164 t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; 1165 while (t > 0) { 1166 __kmp_printf_no_lock( "-> %d ", t ); 1167 t = __kmp_threads[t-1]->th.th_next_waiting; 1168 } 1169 } 1170 __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); 1171 __kmp_printf_no_lock( "\n\n" ); 1172 } 1173 1174 #endif /* DEBUG_QUEUING_LOCKS */ 1175 1176 static kmp_int32 1177 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) 1178 { 1179 return TCR_4( lck->lk.owner_id ) - 1; 1180 } 1181 1182 static inline bool 1183 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) 1184 { 1185 return lck->lk.depth_locked != -1; 1186 } 1187 1188 /* Acquire a lock using a the queuing lock implementation */ 1189 template <bool takeTime> 1190 /* [TLW] The unused template above is left behind because of what BEB believes is a 1191 potential compiler problem with __forceinline. */ 1192 __forceinline static void 1193 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, 1194 kmp_int32 gtid ) 1195 { 1196 register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); 1197 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1198 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1199 volatile kmp_uint32 *spin_here_p; 1200 kmp_int32 need_mf = 1; 1201 1202 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1203 1204 KMP_FSYNC_PREPARE( lck ); 1205 KMP_DEBUG_ASSERT( this_thr != NULL ); 1206 spin_here_p = & this_thr->th.th_spin_here; 1207 1208 #ifdef DEBUG_QUEUING_LOCKS 1209 TRACE_LOCK( gtid+1, "acq ent" ); 1210 if ( *spin_here_p ) 1211 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1212 if ( this_thr->th.th_next_waiting != 0 ) 1213 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1214 #endif 1215 KMP_DEBUG_ASSERT( !*spin_here_p ); 1216 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1217 1218 1219 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p 1220 that may follow, not just in execution order, but also in visibility order. This way, 1221 when a releasing thread observes the changes to the queue by this thread, it can 1222 rightly assume that spin_here_p has already been set to TRUE, so that when it sets 1223 spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p 1224 to FALSE before this thread sets it to TRUE, this thread will hang. 1225 */ 1226 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1227 1228 while( 1 ) { 1229 kmp_int32 enqueued; 1230 kmp_int32 head; 1231 kmp_int32 tail; 1232 1233 head = *head_id_p; 1234 1235 switch ( head ) { 1236 1237 case -1: 1238 { 1239 #ifdef DEBUG_QUEUING_LOCKS 1240 tail = *tail_id_p; 1241 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1242 #endif 1243 tail = 0; /* to make sure next link asynchronously read is not set accidentally; 1244 this assignment prevents us from entering the if ( t > 0 ) 1245 condition in the enqueued case below, which is not necessary for 1246 this state transition */ 1247 1248 need_mf = 0; 1249 /* try (-1,0)->(tid,tid) */ 1250 enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, 1251 KMP_PACK_64( -1, 0 ), 1252 KMP_PACK_64( gtid+1, gtid+1 ) ); 1253 #ifdef DEBUG_QUEUING_LOCKS 1254 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); 1255 #endif 1256 } 1257 break; 1258 1259 default: 1260 { 1261 tail = *tail_id_p; 1262 KMP_DEBUG_ASSERT( tail != gtid + 1 ); 1263 1264 #ifdef DEBUG_QUEUING_LOCKS 1265 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1266 #endif 1267 1268 if ( tail == 0 ) { 1269 enqueued = FALSE; 1270 } 1271 else { 1272 need_mf = 0; 1273 /* try (h,t) or (h,h)->(h,tid) */ 1274 enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); 1275 1276 #ifdef DEBUG_QUEUING_LOCKS 1277 if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); 1278 #endif 1279 } 1280 } 1281 break; 1282 1283 case 0: /* empty queue */ 1284 { 1285 kmp_int32 grabbed_lock; 1286 1287 #ifdef DEBUG_QUEUING_LOCKS 1288 tail = *tail_id_p; 1289 TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); 1290 #endif 1291 /* try (0,0)->(-1,0) */ 1292 1293 /* only legal transition out of head = 0 is head = -1 with no change to tail */ 1294 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); 1295 1296 if ( grabbed_lock ) { 1297 1298 *spin_here_p = FALSE; 1299 1300 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1301 lck, gtid )); 1302 #ifdef DEBUG_QUEUING_LOCKS 1303 TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); 1304 #endif 1305 KMP_FSYNC_ACQUIRED( lck ); 1306 return; /* lock holder cannot be on queue */ 1307 } 1308 enqueued = FALSE; 1309 } 1310 break; 1311 } 1312 1313 if ( enqueued ) { 1314 if ( tail > 0 ) { 1315 kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); 1316 KMP_ASSERT( tail_thr != NULL ); 1317 tail_thr->th.th_next_waiting = gtid+1; 1318 /* corresponding wait for this write in release code */ 1319 } 1320 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); 1321 1322 1323 /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for 1324 * throughput only here. 1325 */ 1326 KMP_MB(); 1327 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1328 1329 #ifdef DEBUG_QUEUING_LOCKS 1330 TRACE_LOCK( gtid+1, "acq spin" ); 1331 1332 if ( this_thr->th.th_next_waiting != 0 ) 1333 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1334 #endif 1335 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1336 KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", 1337 lck, gtid )); 1338 1339 #ifdef DEBUG_QUEUING_LOCKS 1340 TRACE_LOCK( gtid+1, "acq exit 2" ); 1341 #endif 1342 /* got lock, we were dequeued by the thread that released lock */ 1343 return; 1344 } 1345 1346 /* Yield if number of threads > number of logical processors */ 1347 /* ToDo: Not sure why this should only be in oversubscription case, 1348 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1349 KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : 1350 __kmp_xproc ) ); 1351 #ifdef DEBUG_QUEUING_LOCKS 1352 TRACE_LOCK( gtid+1, "acq retry" ); 1353 #endif 1354 1355 } 1356 KMP_ASSERT2( 0, "should not get here" ); 1357 } 1358 1359 void 1360 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1361 { 1362 KMP_DEBUG_ASSERT( gtid >= 0 ); 1363 1364 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1365 } 1366 1367 static void 1368 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1369 kmp_int32 gtid ) 1370 { 1371 char const * const func = "omp_set_lock"; 1372 if ( lck->lk.initialized != lck ) { 1373 KMP_FATAL( LockIsUninitialized, func ); 1374 } 1375 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1376 KMP_FATAL( LockNestableUsedAsSimple, func ); 1377 } 1378 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1379 KMP_FATAL( LockIsAlreadyOwned, func ); 1380 } 1381 1382 __kmp_acquire_queuing_lock( lck, gtid ); 1383 1384 lck->lk.owner_id = gtid + 1; 1385 } 1386 1387 int 1388 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1389 { 1390 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1391 kmp_int32 head; 1392 #ifdef KMP_DEBUG 1393 kmp_info_t *this_thr; 1394 #endif 1395 1396 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); 1397 KMP_DEBUG_ASSERT( gtid >= 0 ); 1398 #ifdef KMP_DEBUG 1399 this_thr = __kmp_thread_from_gtid( gtid ); 1400 KMP_DEBUG_ASSERT( this_thr != NULL ); 1401 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1402 #endif 1403 1404 head = *head_id_p; 1405 1406 if ( head == 0 ) { /* nobody on queue, nobody holding */ 1407 1408 /* try (0,0)->(-1,0) */ 1409 1410 if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { 1411 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); 1412 KMP_FSYNC_ACQUIRED(lck); 1413 return TRUE; 1414 } 1415 } 1416 1417 KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); 1418 return FALSE; 1419 } 1420 1421 static int 1422 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1423 { 1424 char const * const func = "omp_test_lock"; 1425 if ( lck->lk.initialized != lck ) { 1426 KMP_FATAL( LockIsUninitialized, func ); 1427 } 1428 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1429 KMP_FATAL( LockNestableUsedAsSimple, func ); 1430 } 1431 1432 int retval = __kmp_test_queuing_lock( lck, gtid ); 1433 1434 if ( retval ) { 1435 lck->lk.owner_id = gtid + 1; 1436 } 1437 return retval; 1438 } 1439 1440 void 1441 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1442 { 1443 register kmp_info_t *this_thr; 1444 volatile kmp_int32 *head_id_p = & lck->lk.head_id; 1445 volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; 1446 1447 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); 1448 KMP_DEBUG_ASSERT( gtid >= 0 ); 1449 this_thr = __kmp_thread_from_gtid( gtid ); 1450 KMP_DEBUG_ASSERT( this_thr != NULL ); 1451 #ifdef DEBUG_QUEUING_LOCKS 1452 TRACE_LOCK( gtid+1, "rel ent" ); 1453 1454 if ( this_thr->th.th_spin_here ) 1455 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1456 if ( this_thr->th.th_next_waiting != 0 ) 1457 __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); 1458 #endif 1459 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); 1460 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); 1461 1462 KMP_FSYNC_RELEASING(lck); 1463 1464 while( 1 ) { 1465 kmp_int32 dequeued; 1466 kmp_int32 head; 1467 kmp_int32 tail; 1468 1469 head = *head_id_p; 1470 1471 #ifdef DEBUG_QUEUING_LOCKS 1472 tail = *tail_id_p; 1473 TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); 1474 if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1475 #endif 1476 KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ 1477 1478 if ( head == -1 ) { /* nobody on queue */ 1479 1480 /* try (-1,0)->(0,0) */ 1481 if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { 1482 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1483 lck, gtid )); 1484 #ifdef DEBUG_QUEUING_LOCKS 1485 TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); 1486 #endif 1487 return; 1488 } 1489 dequeued = FALSE; 1490 1491 } 1492 else { 1493 1494 tail = *tail_id_p; 1495 if ( head == tail ) { /* only one thread on the queue */ 1496 1497 #ifdef DEBUG_QUEUING_LOCKS 1498 if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1499 #endif 1500 KMP_DEBUG_ASSERT( head > 0 ); 1501 1502 /* try (h,h)->(-1,0) */ 1503 dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, 1504 KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); 1505 #ifdef DEBUG_QUEUING_LOCKS 1506 TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); 1507 #endif 1508 1509 } 1510 else { 1511 volatile kmp_int32 *waiting_id_p; 1512 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1513 KMP_DEBUG_ASSERT( head_thr != NULL ); 1514 waiting_id_p = & head_thr->th.th_next_waiting; 1515 1516 /* Does this require synchronous reads? */ 1517 #ifdef DEBUG_QUEUING_LOCKS 1518 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1519 #endif 1520 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1521 1522 /* try (h,t)->(h',t) or (t,t) */ 1523 1524 KMP_MB(); 1525 /* make sure enqueuing thread has time to update next waiting thread field */ 1526 *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL); 1527 #ifdef DEBUG_QUEUING_LOCKS 1528 TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); 1529 #endif 1530 dequeued = TRUE; 1531 } 1532 } 1533 1534 if ( dequeued ) { 1535 kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); 1536 KMP_DEBUG_ASSERT( head_thr != NULL ); 1537 1538 /* Does this require synchronous reads? */ 1539 #ifdef DEBUG_QUEUING_LOCKS 1540 if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); 1541 #endif 1542 KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); 1543 1544 /* For clean code only. 1545 * Thread not released until next statement prevents race with acquire code. 1546 */ 1547 head_thr->th.th_next_waiting = 0; 1548 #ifdef DEBUG_QUEUING_LOCKS 1549 TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); 1550 #endif 1551 1552 KMP_MB(); 1553 /* reset spin value */ 1554 head_thr->th.th_spin_here = FALSE; 1555 1556 KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", 1557 lck, gtid )); 1558 #ifdef DEBUG_QUEUING_LOCKS 1559 TRACE_LOCK( gtid+1, "rel exit 2" ); 1560 #endif 1561 return; 1562 } 1563 /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ 1564 1565 #ifdef DEBUG_QUEUING_LOCKS 1566 TRACE_LOCK( gtid+1, "rel retry" ); 1567 #endif 1568 1569 } /* while */ 1570 KMP_ASSERT2( 0, "should not get here" ); 1571 } 1572 1573 static void 1574 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1575 kmp_int32 gtid ) 1576 { 1577 char const * const func = "omp_unset_lock"; 1578 KMP_MB(); /* in case another processor initialized lock */ 1579 if ( lck->lk.initialized != lck ) { 1580 KMP_FATAL( LockIsUninitialized, func ); 1581 } 1582 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1583 KMP_FATAL( LockNestableUsedAsSimple, func ); 1584 } 1585 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1586 KMP_FATAL( LockUnsettingFree, func ); 1587 } 1588 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1589 KMP_FATAL( LockUnsettingSetByAnother, func ); 1590 } 1591 lck->lk.owner_id = 0; 1592 __kmp_release_queuing_lock( lck, gtid ); 1593 } 1594 1595 void 1596 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) 1597 { 1598 lck->lk.location = NULL; 1599 lck->lk.head_id = 0; 1600 lck->lk.tail_id = 0; 1601 lck->lk.next_ticket = 0; 1602 lck->lk.now_serving = 0; 1603 lck->lk.owner_id = 0; // no thread owns the lock. 1604 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1605 lck->lk.initialized = lck; 1606 1607 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1608 } 1609 1610 static void 1611 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1612 { 1613 __kmp_init_queuing_lock( lck ); 1614 } 1615 1616 void 1617 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) 1618 { 1619 lck->lk.initialized = NULL; 1620 lck->lk.location = NULL; 1621 lck->lk.head_id = 0; 1622 lck->lk.tail_id = 0; 1623 lck->lk.next_ticket = 0; 1624 lck->lk.now_serving = 0; 1625 lck->lk.owner_id = 0; 1626 lck->lk.depth_locked = -1; 1627 } 1628 1629 static void 1630 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1631 { 1632 char const * const func = "omp_destroy_lock"; 1633 if ( lck->lk.initialized != lck ) { 1634 KMP_FATAL( LockIsUninitialized, func ); 1635 } 1636 if ( __kmp_is_queuing_lock_nestable( lck ) ) { 1637 KMP_FATAL( LockNestableUsedAsSimple, func ); 1638 } 1639 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1640 KMP_FATAL( LockStillOwned, func ); 1641 } 1642 __kmp_destroy_queuing_lock( lck ); 1643 } 1644 1645 1646 // 1647 // nested queuing locks 1648 // 1649 1650 void 1651 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1652 { 1653 KMP_DEBUG_ASSERT( gtid >= 0 ); 1654 1655 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1656 lck->lk.depth_locked += 1; 1657 } 1658 else { 1659 __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid ); 1660 KMP_MB(); 1661 lck->lk.depth_locked = 1; 1662 KMP_MB(); 1663 lck->lk.owner_id = gtid + 1; 1664 } 1665 } 1666 1667 static void 1668 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1669 { 1670 char const * const func = "omp_set_nest_lock"; 1671 if ( lck->lk.initialized != lck ) { 1672 KMP_FATAL( LockIsUninitialized, func ); 1673 } 1674 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1675 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1676 } 1677 __kmp_acquire_nested_queuing_lock( lck, gtid ); 1678 } 1679 1680 int 1681 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1682 { 1683 int retval; 1684 1685 KMP_DEBUG_ASSERT( gtid >= 0 ); 1686 1687 if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { 1688 retval = ++lck->lk.depth_locked; 1689 } 1690 else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { 1691 retval = 0; 1692 } 1693 else { 1694 KMP_MB(); 1695 retval = lck->lk.depth_locked = 1; 1696 KMP_MB(); 1697 lck->lk.owner_id = gtid + 1; 1698 } 1699 return retval; 1700 } 1701 1702 static int 1703 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, 1704 kmp_int32 gtid ) 1705 { 1706 char const * const func = "omp_test_nest_lock"; 1707 if ( lck->lk.initialized != lck ) { 1708 KMP_FATAL( LockIsUninitialized, func ); 1709 } 1710 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1711 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1712 } 1713 return __kmp_test_nested_queuing_lock( lck, gtid ); 1714 } 1715 1716 void 1717 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1718 { 1719 KMP_DEBUG_ASSERT( gtid >= 0 ); 1720 1721 KMP_MB(); 1722 if ( --(lck->lk.depth_locked) == 0 ) { 1723 KMP_MB(); 1724 lck->lk.owner_id = 0; 1725 __kmp_release_queuing_lock( lck, gtid ); 1726 } 1727 } 1728 1729 static void 1730 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) 1731 { 1732 char const * const func = "omp_unset_nest_lock"; 1733 KMP_MB(); /* in case another processor initialized lock */ 1734 if ( lck->lk.initialized != lck ) { 1735 KMP_FATAL( LockIsUninitialized, func ); 1736 } 1737 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1738 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1739 } 1740 if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { 1741 KMP_FATAL( LockUnsettingFree, func ); 1742 } 1743 if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { 1744 KMP_FATAL( LockUnsettingSetByAnother, func ); 1745 } 1746 __kmp_release_nested_queuing_lock( lck, gtid ); 1747 } 1748 1749 void 1750 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) 1751 { 1752 __kmp_init_queuing_lock( lck ); 1753 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1754 } 1755 1756 static void 1757 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) 1758 { 1759 __kmp_init_nested_queuing_lock( lck ); 1760 } 1761 1762 void 1763 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) 1764 { 1765 __kmp_destroy_queuing_lock( lck ); 1766 lck->lk.depth_locked = 0; 1767 } 1768 1769 static void 1770 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) 1771 { 1772 char const * const func = "omp_destroy_nest_lock"; 1773 if ( lck->lk.initialized != lck ) { 1774 KMP_FATAL( LockIsUninitialized, func ); 1775 } 1776 if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { 1777 KMP_FATAL( LockSimpleUsedAsNestable, func ); 1778 } 1779 if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { 1780 KMP_FATAL( LockStillOwned, func ); 1781 } 1782 __kmp_destroy_nested_queuing_lock( lck ); 1783 } 1784 1785 1786 // 1787 // access functions to fields which don't exist for all lock kinds. 1788 // 1789 1790 static int 1791 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) 1792 { 1793 return lck == lck->lk.initialized; 1794 } 1795 1796 static const ident_t * 1797 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) 1798 { 1799 return lck->lk.location; 1800 } 1801 1802 static void 1803 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) 1804 { 1805 lck->lk.location = loc; 1806 } 1807 1808 static kmp_lock_flags_t 1809 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) 1810 { 1811 return lck->lk.flags; 1812 } 1813 1814 static void 1815 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) 1816 { 1817 lck->lk.flags = flags; 1818 } 1819 1820 #if KMP_USE_ADAPTIVE_LOCKS 1821 1822 /* 1823 RTM Adaptive locks 1824 */ 1825 1826 // TODO: Use the header for intrinsics below with the compiler 13.0 1827 //#include <immintrin.h> 1828 1829 // Values from the status register after failed speculation. 1830 #define _XBEGIN_STARTED (~0u) 1831 #define _XABORT_EXPLICIT (1 << 0) 1832 #define _XABORT_RETRY (1 << 1) 1833 #define _XABORT_CONFLICT (1 << 2) 1834 #define _XABORT_CAPACITY (1 << 3) 1835 #define _XABORT_DEBUG (1 << 4) 1836 #define _XABORT_NESTED (1 << 5) 1837 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1838 1839 // Aborts for which it's worth trying again immediately 1840 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1841 1842 #define STRINGIZE_INTERNAL(arg) #arg 1843 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1844 1845 // Access to RTM instructions 1846 1847 /* 1848 A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. 1849 This is the same definition as the compiler intrinsic that will be supported at some point. 1850 */ 1851 static __inline int _xbegin() 1852 { 1853 int res = -1; 1854 1855 #if KMP_OS_WINDOWS 1856 #if KMP_ARCH_X86_64 1857 _asm { 1858 _emit 0xC7 1859 _emit 0xF8 1860 _emit 2 1861 _emit 0 1862 _emit 0 1863 _emit 0 1864 jmp L2 1865 mov res, eax 1866 L2: 1867 } 1868 #else /* IA32 */ 1869 _asm { 1870 _emit 0xC7 1871 _emit 0xF8 1872 _emit 2 1873 _emit 0 1874 _emit 0 1875 _emit 0 1876 jmp L2 1877 mov res, eax 1878 L2: 1879 } 1880 #endif // KMP_ARCH_X86_64 1881 #else 1882 /* Note that %eax must be noted as killed (clobbered), because 1883 * the XSR is returned in %eax(%rax) on abort. Other register 1884 * values are restored, so don't need to be killed. 1885 * 1886 * We must also mark 'res' as an input and an output, since otherwise 1887 * 'res=-1' may be dropped as being dead, whereas we do need the 1888 * assignment on the successful (i.e., non-abort) path. 1889 */ 1890 __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" 1891 " .long 1f-1b-6\n" 1892 " jmp 2f\n" 1893 "1: movl %%eax,%0\n" 1894 "2:" 1895 :"+r"(res)::"memory","%eax"); 1896 #endif // KMP_OS_WINDOWS 1897 return res; 1898 } 1899 1900 /* 1901 Transaction end 1902 */ 1903 static __inline void _xend() 1904 { 1905 #if KMP_OS_WINDOWS 1906 __asm { 1907 _emit 0x0f 1908 _emit 0x01 1909 _emit 0xd5 1910 } 1911 #else 1912 __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); 1913 #endif 1914 } 1915 1916 /* 1917 This is a macro, the argument must be a single byte constant which 1918 can be evaluated by the inline assembler, since it is emitted as a 1919 byte into the assembly code. 1920 */ 1921 #if KMP_OS_WINDOWS 1922 #define _xabort(ARG) \ 1923 _asm _emit 0xc6 \ 1924 _asm _emit 0xf8 \ 1925 _asm _emit ARG 1926 #else 1927 #define _xabort(ARG) \ 1928 __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); 1929 #endif 1930 1931 // 1932 // Statistics is collected for testing purpose 1933 // 1934 #if KMP_DEBUG_ADAPTIVE_LOCKS 1935 1936 // We accumulate speculative lock statistics when the lock is destroyed. 1937 // We keep locks that haven't been destroyed in the liveLocks list 1938 // so that we can grab their statistics too. 1939 static kmp_adaptive_lock_statistics_t destroyedStats; 1940 1941 // To hold the list of live locks. 1942 static kmp_adaptive_lock_info_t liveLocks; 1943 1944 // A lock so we can safely update the list of locks. 1945 static kmp_bootstrap_lock_t chain_lock; 1946 1947 // Initialize the list of stats. 1948 void 1949 __kmp_init_speculative_stats() 1950 { 1951 kmp_adaptive_lock_info_t *lck = &liveLocks; 1952 1953 memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); 1954 lck->stats.next = lck; 1955 lck->stats.prev = lck; 1956 1957 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 1958 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 1959 1960 __kmp_init_bootstrap_lock( &chain_lock ); 1961 1962 } 1963 1964 // Insert the lock into the circular list 1965 static void 1966 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) 1967 { 1968 __kmp_acquire_bootstrap_lock( &chain_lock ); 1969 1970 lck->stats.next = liveLocks.stats.next; 1971 lck->stats.prev = &liveLocks; 1972 1973 liveLocks.stats.next = lck; 1974 lck->stats.next->stats.prev = lck; 1975 1976 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 1977 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 1978 1979 __kmp_release_bootstrap_lock( &chain_lock ); 1980 } 1981 1982 static void 1983 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) 1984 { 1985 KMP_ASSERT( lck->stats.next->stats.prev == lck ); 1986 KMP_ASSERT( lck->stats.prev->stats.next == lck ); 1987 1988 kmp_adaptive_lock_info_t * n = lck->stats.next; 1989 kmp_adaptive_lock_info_t * p = lck->stats.prev; 1990 1991 n->stats.prev = p; 1992 p->stats.next = n; 1993 } 1994 1995 static void 1996 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) 1997 { 1998 memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); 1999 __kmp_remember_lock( lck ); 2000 } 2001 2002 static void 2003 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) 2004 { 2005 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 2006 2007 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 2008 t->successfulSpeculations += s->successfulSpeculations; 2009 t->hardFailedSpeculations += s->hardFailedSpeculations; 2010 t->softFailedSpeculations += s->softFailedSpeculations; 2011 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 2012 t->lemmingYields += s->lemmingYields; 2013 } 2014 2015 static void 2016 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) 2017 { 2018 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 2019 2020 __kmp_acquire_bootstrap_lock( &chain_lock ); 2021 2022 __kmp_add_stats( &destroyedStats, lck ); 2023 __kmp_forget_lock( lck ); 2024 2025 __kmp_release_bootstrap_lock( &chain_lock ); 2026 } 2027 2028 static float 2029 percent (kmp_uint32 count, kmp_uint32 total) 2030 { 2031 return (total == 0) ? 0.0: (100.0 * count)/total; 2032 } 2033 2034 static 2035 FILE * __kmp_open_stats_file() 2036 { 2037 if (strcmp (__kmp_speculative_statsfile, "-") == 0) 2038 return stdout; 2039 2040 size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20; 2041 char buffer[buffLen]; 2042 snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, 2043 (kmp_int32)getpid()); 2044 FILE * result = fopen(&buffer[0], "w"); 2045 2046 // Maybe we should issue a warning here... 2047 return result ? result : stdout; 2048 } 2049 2050 void 2051 __kmp_print_speculative_stats() 2052 { 2053 if (__kmp_user_lock_kind != lk_adaptive) 2054 return; 2055 2056 FILE * statsFile = __kmp_open_stats_file(); 2057 2058 kmp_adaptive_lock_statistics_t total = destroyedStats; 2059 kmp_adaptive_lock_info_t *lck; 2060 2061 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 2062 __kmp_add_stats( &total, lck ); 2063 } 2064 kmp_adaptive_lock_statistics_t *t = &total; 2065 kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; 2066 kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + 2067 t->softFailedSpeculations; 2068 2069 fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); 2070 fprintf ( statsFile, " Lock parameters: \n" 2071 " max_soft_retries : %10d\n" 2072 " max_badness : %10d\n", 2073 __kmp_adaptive_backoff_params.max_soft_retries, 2074 __kmp_adaptive_backoff_params.max_badness); 2075 fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); 2076 fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); 2077 fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", 2078 t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); 2079 fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 2080 t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); 2081 fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); 2082 2083 fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); 2084 fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", 2085 t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); 2086 fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", 2087 t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); 2088 fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", 2089 t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); 2090 2091 if (statsFile != stdout) 2092 fclose( statsFile ); 2093 } 2094 2095 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) 2096 #else 2097 # define KMP_INC_STAT(lck,stat) 2098 2099 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2100 2101 static inline bool 2102 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) 2103 { 2104 // It is enough to check that the head_id is zero. 2105 // We don't also need to check the tail. 2106 bool res = lck->lk.head_id == 0; 2107 2108 // We need a fence here, since we must ensure that no memory operations 2109 // from later in this thread float above that read. 2110 #if KMP_COMPILER_ICC 2111 _mm_mfence(); 2112 #else 2113 __sync_synchronize(); 2114 #endif 2115 2116 return res; 2117 } 2118 2119 // Functions for manipulating the badness 2120 static __inline void 2121 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) 2122 { 2123 // Reset the badness to zero so we eagerly try to speculate again 2124 lck->lk.adaptive.badness = 0; 2125 KMP_INC_STAT(lck,successfulSpeculations); 2126 } 2127 2128 // Create a bit mask with one more set bit. 2129 static __inline void 2130 __kmp_step_badness( kmp_adaptive_lock_t *lck ) 2131 { 2132 kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; 2133 if ( newBadness > lck->lk.adaptive.max_badness) { 2134 return; 2135 } else { 2136 lck->lk.adaptive.badness = newBadness; 2137 } 2138 } 2139 2140 // Check whether speculation should be attempted. 2141 static __inline int 2142 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2143 { 2144 kmp_uint32 badness = lck->lk.adaptive.badness; 2145 kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; 2146 int res = (attempts & badness) == 0; 2147 return res; 2148 } 2149 2150 // Attempt to acquire only the speculative lock. 2151 // Does not back off to the non-speculative lock. 2152 // 2153 static int 2154 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2155 { 2156 int retries = lck->lk.adaptive.max_soft_retries; 2157 2158 // We don't explicitly count the start of speculation, rather we record 2159 // the results (success, hard fail, soft fail). The sum of all of those 2160 // is the total number of times we started speculation since all 2161 // speculations must end one of those ways. 2162 do 2163 { 2164 kmp_uint32 status = _xbegin(); 2165 // Switch this in to disable actual speculation but exercise 2166 // at least some of the rest of the code. Useful for debugging... 2167 // kmp_uint32 status = _XABORT_NESTED; 2168 2169 if (status == _XBEGIN_STARTED ) 2170 { /* We have successfully started speculation 2171 * Check that no-one acquired the lock for real between when we last looked 2172 * and now. This also gets the lock cache line into our read-set, 2173 * which we need so that we'll abort if anyone later claims it for real. 2174 */ 2175 if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2176 { 2177 // Lock is now visibly acquired, so someone beat us to it. 2178 // Abort the transaction so we'll restart from _xbegin with the 2179 // failure status. 2180 _xabort(0x01) 2181 KMP_ASSERT2( 0, "should not get here" ); 2182 } 2183 return 1; // Lock has been acquired (speculatively) 2184 } else { 2185 // We have aborted, update the statistics 2186 if ( status & SOFT_ABORT_MASK) 2187 { 2188 KMP_INC_STAT(lck,softFailedSpeculations); 2189 // and loop round to retry. 2190 } 2191 else 2192 { 2193 KMP_INC_STAT(lck,hardFailedSpeculations); 2194 // Give up if we had a hard failure. 2195 break; 2196 } 2197 } 2198 } while( retries-- ); // Loop while we have retries, and didn't fail hard. 2199 2200 // Either we had a hard failure or we didn't succeed softly after 2201 // the full set of attempts, so back off the badness. 2202 __kmp_step_badness( lck ); 2203 return 0; 2204 } 2205 2206 // Attempt to acquire the speculative lock, or back off to the non-speculative one 2207 // if the speculative lock cannot be acquired. 2208 // We can succeed speculatively, non-speculatively, or fail. 2209 static int 2210 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2211 { 2212 // First try to acquire the lock speculatively 2213 if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) 2214 return 1; 2215 2216 // Speculative acquisition failed, so try to acquire it non-speculatively. 2217 // Count the non-speculative acquire attempt 2218 lck->lk.adaptive.acquire_attempts++; 2219 2220 // Use base, non-speculative lock. 2221 if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) 2222 { 2223 KMP_INC_STAT(lck,nonSpeculativeAcquires); 2224 return 1; // Lock is acquired (non-speculatively) 2225 } 2226 else 2227 { 2228 return 0; // Failed to acquire the lock, it's already visibly locked. 2229 } 2230 } 2231 2232 static int 2233 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2234 { 2235 char const * const func = "omp_test_lock"; 2236 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2237 KMP_FATAL( LockIsUninitialized, func ); 2238 } 2239 2240 int retval = __kmp_test_adaptive_lock( lck, gtid ); 2241 2242 if ( retval ) { 2243 lck->lk.qlk.owner_id = gtid + 1; 2244 } 2245 return retval; 2246 } 2247 2248 // Block until we can acquire a speculative, adaptive lock. 2249 // We check whether we should be trying to speculate. 2250 // If we should be, we check the real lock to see if it is free, 2251 // and, if not, pause without attempting to acquire it until it is. 2252 // Then we try the speculative acquire. 2253 // This means that although we suffer from lemmings a little ( 2254 // because all we can't acquire the lock speculatively until 2255 // the queue of threads waiting has cleared), we don't get into a 2256 // state where we can never acquire the lock speculatively (because we 2257 // force the queue to clear by preventing new arrivals from entering the 2258 // queue). 2259 // This does mean that when we're trying to break lemmings, the lock 2260 // is no longer fair. However OpenMP makes no guarantee that its 2261 // locks are fair, so this isn't a real problem. 2262 static void 2263 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) 2264 { 2265 if ( __kmp_should_speculate( lck, gtid ) ) 2266 { 2267 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2268 { 2269 if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) 2270 return; 2271 // We tried speculation and failed, so give up. 2272 } 2273 else 2274 { 2275 // We can't try speculation until the lock is free, so we 2276 // pause here (without suspending on the queueing lock, 2277 // to allow it to drain, then try again. 2278 // All other threads will also see the same result for 2279 // shouldSpeculate, so will be doing the same if they 2280 // try to claim the lock from now on. 2281 while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2282 { 2283 KMP_INC_STAT(lck,lemmingYields); 2284 __kmp_yield (TRUE); 2285 } 2286 2287 if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) 2288 return; 2289 } 2290 } 2291 2292 // Speculative acquisition failed, so acquire it non-speculatively. 2293 // Count the non-speculative acquire attempt 2294 lck->lk.adaptive.acquire_attempts++; 2295 2296 __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid ); 2297 // We have acquired the base lock, so count that. 2298 KMP_INC_STAT(lck,nonSpeculativeAcquires ); 2299 } 2300 2301 static void 2302 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2303 { 2304 char const * const func = "omp_set_lock"; 2305 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2306 KMP_FATAL( LockIsUninitialized, func ); 2307 } 2308 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { 2309 KMP_FATAL( LockIsAlreadyOwned, func ); 2310 } 2311 2312 __kmp_acquire_adaptive_lock( lck, gtid ); 2313 2314 lck->lk.qlk.owner_id = gtid + 1; 2315 } 2316 2317 static void 2318 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2319 { 2320 if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) 2321 { // If the lock doesn't look claimed we must be speculating. 2322 // (Or the user's code is buggy and they're releasing without locking; 2323 // if we had XTEST we'd be able to check that case...) 2324 _xend(); // Exit speculation 2325 __kmp_update_badness_after_success( lck ); 2326 } 2327 else 2328 { // Since the lock *is* visibly locked we're not speculating, 2329 // so should use the underlying lock's release scheme. 2330 __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); 2331 } 2332 } 2333 2334 static void 2335 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) 2336 { 2337 char const * const func = "omp_unset_lock"; 2338 KMP_MB(); /* in case another processor initialized lock */ 2339 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2340 KMP_FATAL( LockIsUninitialized, func ); 2341 } 2342 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { 2343 KMP_FATAL( LockUnsettingFree, func ); 2344 } 2345 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { 2346 KMP_FATAL( LockUnsettingSetByAnother, func ); 2347 } 2348 lck->lk.qlk.owner_id = 0; 2349 __kmp_release_adaptive_lock( lck, gtid ); 2350 } 2351 2352 static void 2353 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) 2354 { 2355 __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); 2356 lck->lk.adaptive.badness = 0; 2357 lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; 2358 lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; 2359 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2360 #if KMP_DEBUG_ADAPTIVE_LOCKS 2361 __kmp_zero_speculative_stats( &lck->lk.adaptive ); 2362 #endif 2363 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2364 } 2365 2366 static void 2367 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) 2368 { 2369 __kmp_init_adaptive_lock( lck ); 2370 } 2371 2372 static void 2373 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) 2374 { 2375 #if KMP_DEBUG_ADAPTIVE_LOCKS 2376 __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); 2377 #endif 2378 __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); 2379 // Nothing needed for the speculative part. 2380 } 2381 2382 static void 2383 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) 2384 { 2385 char const * const func = "omp_destroy_lock"; 2386 if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { 2387 KMP_FATAL( LockIsUninitialized, func ); 2388 } 2389 if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { 2390 KMP_FATAL( LockStillOwned, func ); 2391 } 2392 __kmp_destroy_adaptive_lock( lck ); 2393 } 2394 2395 2396 #endif // KMP_USE_ADAPTIVE_LOCKS 2397 2398 2399 /* ------------------------------------------------------------------------ */ 2400 /* DRDPA ticket locks */ 2401 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2402 2403 static kmp_int32 2404 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) 2405 { 2406 return TCR_4( lck->lk.owner_id ) - 1; 2407 } 2408 2409 static inline bool 2410 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) 2411 { 2412 return lck->lk.depth_locked != -1; 2413 } 2414 2415 __forceinline static void 2416 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2417 { 2418 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2419 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2420 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2421 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2422 TCR_PTR(lck->lk.polls); // volatile load 2423 2424 #ifdef USE_LOCK_PROFILE 2425 if (TCR_8(polls[ticket & mask].poll) != ticket) 2426 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2427 /* else __kmp_printf( "." );*/ 2428 #endif /* USE_LOCK_PROFILE */ 2429 2430 // 2431 // Now spin-wait, but reload the polls pointer and mask, in case the 2432 // polling area has been reconfigured. Unless it is reconfigured, the 2433 // reloads stay in L1 cache and are cheap. 2434 // 2435 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!! 2436 // 2437 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2438 // and poll to be re-read every spin iteration. 2439 // 2440 kmp_uint32 spins; 2441 2442 KMP_FSYNC_PREPARE(lck); 2443 KMP_INIT_YIELD(spins); 2444 while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load 2445 // If we are oversubscribed, 2446 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2447 // CPU Pause is in the macros for yield. 2448 // 2449 KMP_YIELD(TCR_4(__kmp_nth) 2450 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2451 KMP_YIELD_SPIN(spins); 2452 2453 // Re-read the mask and the poll pointer from the lock structure. 2454 // 2455 // Make certain that "mask" is read before "polls" !!! 2456 // 2457 // If another thread picks reconfigures the polling area and updates 2458 // their values, and we get the new value of mask and the old polls 2459 // pointer, we could access memory beyond the end of the old polling 2460 // area. 2461 // 2462 mask = TCR_8(lck->lk.mask); // volatile load 2463 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2464 TCR_PTR(lck->lk.polls); // volatile load 2465 } 2466 2467 // 2468 // Critical section starts here 2469 // 2470 KMP_FSYNC_ACQUIRED(lck); 2471 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2472 ticket, lck)); 2473 lck->lk.now_serving = ticket; // non-volatile store 2474 2475 // 2476 // Deallocate a garbage polling area if we know that we are the last 2477 // thread that could possibly access it. 2478 // 2479 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2480 // ticket. 2481 // 2482 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2483 __kmp_free((void *)lck->lk.old_polls); 2484 lck->lk.old_polls = NULL; 2485 lck->lk.cleanup_ticket = 0; 2486 } 2487 2488 // 2489 // Check to see if we should reconfigure the polling area. 2490 // If there is still a garbage polling area to be deallocated from a 2491 // previous reconfiguration, let a later thread reconfigure it. 2492 // 2493 if (lck->lk.old_polls == NULL) { 2494 bool reconfigure = false; 2495 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2496 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2497 2498 if (TCR_4(__kmp_nth) 2499 > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2500 // 2501 // We are in oversubscription mode. Contract the polling area 2502 // down to a single location, if that hasn't been done already. 2503 // 2504 if (num_polls > 1) { 2505 reconfigure = true; 2506 num_polls = TCR_4(lck->lk.num_polls); 2507 mask = 0; 2508 num_polls = 1; 2509 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2510 __kmp_allocate(num_polls * sizeof(*polls)); 2511 polls[0].poll = ticket; 2512 } 2513 } 2514 else { 2515 // 2516 // We are in under/fully subscribed mode. Check the number of 2517 // threads waiting on the lock. The size of the polling area 2518 // should be at least the number of threads waiting. 2519 // 2520 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2521 if (num_waiting > num_polls) { 2522 kmp_uint32 old_num_polls = num_polls; 2523 reconfigure = true; 2524 do { 2525 mask = (mask << 1) | 1; 2526 num_polls *= 2; 2527 } while (num_polls <= num_waiting); 2528 2529 // 2530 // Allocate the new polling area, and copy the relevant portion 2531 // of the old polling area to the new area. __kmp_allocate() 2532 // zeroes the memory it allocates, and most of the old area is 2533 // just zero padding, so we only copy the release counters. 2534 // 2535 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2536 __kmp_allocate(num_polls * sizeof(*polls)); 2537 kmp_uint32 i; 2538 for (i = 0; i < old_num_polls; i++) { 2539 polls[i].poll = old_polls[i].poll; 2540 } 2541 } 2542 } 2543 2544 if (reconfigure) { 2545 // 2546 // Now write the updated fields back to the lock structure. 2547 // 2548 // Make certain that "polls" is written before "mask" !!! 2549 // 2550 // If another thread picks up the new value of mask and the old 2551 // polls pointer , it could access memory beyond the end of the 2552 // old polling area. 2553 // 2554 // On x86, we need memory fences. 2555 // 2556 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", 2557 ticket, lck, num_polls)); 2558 2559 lck->lk.old_polls = old_polls; // non-volatile store 2560 lck->lk.polls = polls; // volatile store 2561 2562 KMP_MB(); 2563 2564 lck->lk.num_polls = num_polls; // non-volatile store 2565 lck->lk.mask = mask; // volatile store 2566 2567 KMP_MB(); 2568 2569 // 2570 // Only after the new polling area and mask have been flushed 2571 // to main memory can we update the cleanup ticket field. 2572 // 2573 // volatile load / non-volatile store 2574 // 2575 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2576 } 2577 } 2578 } 2579 2580 void 2581 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2582 { 2583 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2584 } 2585 2586 static void 2587 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2588 { 2589 char const * const func = "omp_set_lock"; 2590 if ( lck->lk.initialized != lck ) { 2591 KMP_FATAL( LockIsUninitialized, func ); 2592 } 2593 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2594 KMP_FATAL( LockNestableUsedAsSimple, func ); 2595 } 2596 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { 2597 KMP_FATAL( LockIsAlreadyOwned, func ); 2598 } 2599 2600 __kmp_acquire_drdpa_lock( lck, gtid ); 2601 2602 lck->lk.owner_id = gtid + 1; 2603 } 2604 2605 int 2606 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2607 { 2608 // 2609 // First get a ticket, then read the polls pointer and the mask. 2610 // The polls pointer must be read before the mask!!! (See above) 2611 // 2612 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2613 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2614 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2615 TCR_PTR(lck->lk.polls); // volatile load 2616 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2617 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2618 kmp_uint64 next_ticket = ticket + 1; 2619 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, 2620 ticket, next_ticket)) { 2621 KMP_FSYNC_ACQUIRED(lck); 2622 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2623 ticket, lck)); 2624 lck->lk.now_serving = ticket; // non-volatile store 2625 2626 // 2627 // Since no threads are waiting, there is no possibility that 2628 // we would want to reconfigure the polling area. We might 2629 // have the cleanup ticket value (which says that it is now 2630 // safe to deallocate old_polls), but we'll let a later thread 2631 // which calls __kmp_acquire_lock do that - this routine 2632 // isn't supposed to block, and we would risk blocks if we 2633 // called __kmp_free() to do the deallocation. 2634 // 2635 return TRUE; 2636 } 2637 } 2638 return FALSE; 2639 } 2640 2641 static int 2642 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2643 { 2644 char const * const func = "omp_test_lock"; 2645 if ( lck->lk.initialized != lck ) { 2646 KMP_FATAL( LockIsUninitialized, func ); 2647 } 2648 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2649 KMP_FATAL( LockNestableUsedAsSimple, func ); 2650 } 2651 2652 int retval = __kmp_test_drdpa_lock( lck, gtid ); 2653 2654 if ( retval ) { 2655 lck->lk.owner_id = gtid + 1; 2656 } 2657 return retval; 2658 } 2659 2660 void 2661 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2662 { 2663 // 2664 // Read the ticket value from the lock data struct, then the polls 2665 // pointer and the mask. The polls pointer must be read before the 2666 // mask!!! (See above) 2667 // 2668 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2669 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls 2670 = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2671 TCR_PTR(lck->lk.polls); // volatile load 2672 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2673 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2674 ticket - 1, lck)); 2675 KMP_FSYNC_RELEASING(lck); 2676 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2677 } 2678 2679 static void 2680 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2681 { 2682 char const * const func = "omp_unset_lock"; 2683 KMP_MB(); /* in case another processor initialized lock */ 2684 if ( lck->lk.initialized != lck ) { 2685 KMP_FATAL( LockIsUninitialized, func ); 2686 } 2687 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2688 KMP_FATAL( LockNestableUsedAsSimple, func ); 2689 } 2690 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2691 KMP_FATAL( LockUnsettingFree, func ); 2692 } 2693 if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) 2694 && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { 2695 KMP_FATAL( LockUnsettingSetByAnother, func ); 2696 } 2697 lck->lk.owner_id = 0; 2698 __kmp_release_drdpa_lock( lck, gtid ); 2699 } 2700 2701 void 2702 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) 2703 { 2704 lck->lk.location = NULL; 2705 lck->lk.mask = 0; 2706 lck->lk.num_polls = 1; 2707 lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2708 __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2709 lck->lk.cleanup_ticket = 0; 2710 lck->lk.old_polls = NULL; 2711 lck->lk.next_ticket = 0; 2712 lck->lk.now_serving = 0; 2713 lck->lk.owner_id = 0; // no thread owns the lock. 2714 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2715 lck->lk.initialized = lck; 2716 2717 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2718 } 2719 2720 static void 2721 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2722 { 2723 __kmp_init_drdpa_lock( lck ); 2724 } 2725 2726 void 2727 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) 2728 { 2729 lck->lk.initialized = NULL; 2730 lck->lk.location = NULL; 2731 if (lck->lk.polls != NULL) { 2732 __kmp_free((void *)lck->lk.polls); 2733 lck->lk.polls = NULL; 2734 } 2735 if (lck->lk.old_polls != NULL) { 2736 __kmp_free((void *)lck->lk.old_polls); 2737 lck->lk.old_polls = NULL; 2738 } 2739 lck->lk.mask = 0; 2740 lck->lk.num_polls = 0; 2741 lck->lk.cleanup_ticket = 0; 2742 lck->lk.next_ticket = 0; 2743 lck->lk.now_serving = 0; 2744 lck->lk.owner_id = 0; 2745 lck->lk.depth_locked = -1; 2746 } 2747 2748 static void 2749 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2750 { 2751 char const * const func = "omp_destroy_lock"; 2752 if ( lck->lk.initialized != lck ) { 2753 KMP_FATAL( LockIsUninitialized, func ); 2754 } 2755 if ( __kmp_is_drdpa_lock_nestable( lck ) ) { 2756 KMP_FATAL( LockNestableUsedAsSimple, func ); 2757 } 2758 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2759 KMP_FATAL( LockStillOwned, func ); 2760 } 2761 __kmp_destroy_drdpa_lock( lck ); 2762 } 2763 2764 2765 // 2766 // nested drdpa ticket locks 2767 // 2768 2769 void 2770 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2771 { 2772 KMP_DEBUG_ASSERT( gtid >= 0 ); 2773 2774 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2775 lck->lk.depth_locked += 1; 2776 } 2777 else { 2778 __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); 2779 KMP_MB(); 2780 lck->lk.depth_locked = 1; 2781 KMP_MB(); 2782 lck->lk.owner_id = gtid + 1; 2783 } 2784 } 2785 2786 static void 2787 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2788 { 2789 char const * const func = "omp_set_nest_lock"; 2790 if ( lck->lk.initialized != lck ) { 2791 KMP_FATAL( LockIsUninitialized, func ); 2792 } 2793 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2794 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2795 } 2796 __kmp_acquire_nested_drdpa_lock( lck, gtid ); 2797 } 2798 2799 int 2800 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2801 { 2802 int retval; 2803 2804 KMP_DEBUG_ASSERT( gtid >= 0 ); 2805 2806 if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { 2807 retval = ++lck->lk.depth_locked; 2808 } 2809 else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { 2810 retval = 0; 2811 } 2812 else { 2813 KMP_MB(); 2814 retval = lck->lk.depth_locked = 1; 2815 KMP_MB(); 2816 lck->lk.owner_id = gtid + 1; 2817 } 2818 return retval; 2819 } 2820 2821 static int 2822 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2823 { 2824 char const * const func = "omp_test_nest_lock"; 2825 if ( lck->lk.initialized != lck ) { 2826 KMP_FATAL( LockIsUninitialized, func ); 2827 } 2828 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2829 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2830 } 2831 return __kmp_test_nested_drdpa_lock( lck, gtid ); 2832 } 2833 2834 void 2835 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2836 { 2837 KMP_DEBUG_ASSERT( gtid >= 0 ); 2838 2839 KMP_MB(); 2840 if ( --(lck->lk.depth_locked) == 0 ) { 2841 KMP_MB(); 2842 lck->lk.owner_id = 0; 2843 __kmp_release_drdpa_lock( lck, gtid ); 2844 } 2845 } 2846 2847 static void 2848 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) 2849 { 2850 char const * const func = "omp_unset_nest_lock"; 2851 KMP_MB(); /* in case another processor initialized lock */ 2852 if ( lck->lk.initialized != lck ) { 2853 KMP_FATAL( LockIsUninitialized, func ); 2854 } 2855 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2856 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2857 } 2858 if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { 2859 KMP_FATAL( LockUnsettingFree, func ); 2860 } 2861 if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { 2862 KMP_FATAL( LockUnsettingSetByAnother, func ); 2863 } 2864 __kmp_release_nested_drdpa_lock( lck, gtid ); 2865 } 2866 2867 void 2868 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) 2869 { 2870 __kmp_init_drdpa_lock( lck ); 2871 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2872 } 2873 2874 static void 2875 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) 2876 { 2877 __kmp_init_nested_drdpa_lock( lck ); 2878 } 2879 2880 void 2881 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) 2882 { 2883 __kmp_destroy_drdpa_lock( lck ); 2884 lck->lk.depth_locked = 0; 2885 } 2886 2887 static void 2888 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) 2889 { 2890 char const * const func = "omp_destroy_nest_lock"; 2891 if ( lck->lk.initialized != lck ) { 2892 KMP_FATAL( LockIsUninitialized, func ); 2893 } 2894 if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { 2895 KMP_FATAL( LockSimpleUsedAsNestable, func ); 2896 } 2897 if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { 2898 KMP_FATAL( LockStillOwned, func ); 2899 } 2900 __kmp_destroy_nested_drdpa_lock( lck ); 2901 } 2902 2903 2904 // 2905 // access functions to fields which don't exist for all lock kinds. 2906 // 2907 2908 static int 2909 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) 2910 { 2911 return lck == lck->lk.initialized; 2912 } 2913 2914 static const ident_t * 2915 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) 2916 { 2917 return lck->lk.location; 2918 } 2919 2920 static void 2921 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) 2922 { 2923 lck->lk.location = loc; 2924 } 2925 2926 static kmp_lock_flags_t 2927 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) 2928 { 2929 return lck->lk.flags; 2930 } 2931 2932 static void 2933 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) 2934 { 2935 lck->lk.flags = flags; 2936 } 2937 2938 /* ------------------------------------------------------------------------ */ 2939 /* user locks 2940 * 2941 * They are implemented as a table of function pointers which are set to the 2942 * lock functions of the appropriate kind, once that has been determined. 2943 */ 2944 2945 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 2946 2947 size_t __kmp_base_user_lock_size = 0; 2948 size_t __kmp_user_lock_size = 0; 2949 2950 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; 2951 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 2952 2953 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 2954 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 2955 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 2956 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; 2957 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 2958 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 2959 2960 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 2961 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; 2962 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 2963 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; 2964 2965 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; 2966 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; 2967 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; 2968 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; 2969 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; 2970 2971 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) 2972 { 2973 switch ( user_lock_kind ) { 2974 case lk_default: 2975 default: 2976 KMP_ASSERT( 0 ); 2977 2978 case lk_tas: { 2979 __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); 2980 __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); 2981 2982 __kmp_get_user_lock_owner_ = 2983 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 2984 ( &__kmp_get_tas_lock_owner ); 2985 2986 if ( __kmp_env_consistency_check ) { 2987 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 2988 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 2989 } 2990 else { 2991 KMP_BIND_USER_LOCK(tas); 2992 KMP_BIND_NESTED_USER_LOCK(tas); 2993 } 2994 2995 __kmp_destroy_user_lock_ = 2996 ( void ( * )( kmp_user_lock_p ) ) 2997 ( &__kmp_destroy_tas_lock ); 2998 2999 __kmp_is_user_lock_initialized_ = 3000 ( int ( * )( kmp_user_lock_p ) ) NULL; 3001 3002 __kmp_get_user_lock_location_ = 3003 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3004 3005 __kmp_set_user_lock_location_ = 3006 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3007 3008 __kmp_get_user_lock_flags_ = 3009 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3010 3011 __kmp_set_user_lock_flags_ = 3012 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3013 } 3014 break; 3015 3016 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3017 3018 case lk_futex: { 3019 __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); 3020 __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); 3021 3022 __kmp_get_user_lock_owner_ = 3023 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3024 ( &__kmp_get_futex_lock_owner ); 3025 3026 if ( __kmp_env_consistency_check ) { 3027 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3028 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3029 } 3030 else { 3031 KMP_BIND_USER_LOCK(futex); 3032 KMP_BIND_NESTED_USER_LOCK(futex); 3033 } 3034 3035 __kmp_destroy_user_lock_ = 3036 ( void ( * )( kmp_user_lock_p ) ) 3037 ( &__kmp_destroy_futex_lock ); 3038 3039 __kmp_is_user_lock_initialized_ = 3040 ( int ( * )( kmp_user_lock_p ) ) NULL; 3041 3042 __kmp_get_user_lock_location_ = 3043 ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; 3044 3045 __kmp_set_user_lock_location_ = 3046 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; 3047 3048 __kmp_get_user_lock_flags_ = 3049 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; 3050 3051 __kmp_set_user_lock_flags_ = 3052 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; 3053 } 3054 break; 3055 3056 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) 3057 3058 case lk_ticket: { 3059 __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); 3060 __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); 3061 3062 __kmp_get_user_lock_owner_ = 3063 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3064 ( &__kmp_get_ticket_lock_owner ); 3065 3066 if ( __kmp_env_consistency_check ) { 3067 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3068 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3069 } 3070 else { 3071 KMP_BIND_USER_LOCK(ticket); 3072 KMP_BIND_NESTED_USER_LOCK(ticket); 3073 } 3074 3075 __kmp_destroy_user_lock_ = 3076 ( void ( * )( kmp_user_lock_p ) ) 3077 ( &__kmp_destroy_ticket_lock ); 3078 3079 __kmp_is_user_lock_initialized_ = 3080 ( int ( * )( kmp_user_lock_p ) ) 3081 ( &__kmp_is_ticket_lock_initialized ); 3082 3083 __kmp_get_user_lock_location_ = 3084 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3085 ( &__kmp_get_ticket_lock_location ); 3086 3087 __kmp_set_user_lock_location_ = 3088 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3089 ( &__kmp_set_ticket_lock_location ); 3090 3091 __kmp_get_user_lock_flags_ = 3092 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3093 ( &__kmp_get_ticket_lock_flags ); 3094 3095 __kmp_set_user_lock_flags_ = 3096 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3097 ( &__kmp_set_ticket_lock_flags ); 3098 } 3099 break; 3100 3101 case lk_queuing: { 3102 __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); 3103 __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); 3104 3105 __kmp_get_user_lock_owner_ = 3106 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3107 ( &__kmp_get_queuing_lock_owner ); 3108 3109 if ( __kmp_env_consistency_check ) { 3110 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3111 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3112 } 3113 else { 3114 KMP_BIND_USER_LOCK(queuing); 3115 KMP_BIND_NESTED_USER_LOCK(queuing); 3116 } 3117 3118 __kmp_destroy_user_lock_ = 3119 ( void ( * )( kmp_user_lock_p ) ) 3120 ( &__kmp_destroy_queuing_lock ); 3121 3122 __kmp_is_user_lock_initialized_ = 3123 ( int ( * )( kmp_user_lock_p ) ) 3124 ( &__kmp_is_queuing_lock_initialized ); 3125 3126 __kmp_get_user_lock_location_ = 3127 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3128 ( &__kmp_get_queuing_lock_location ); 3129 3130 __kmp_set_user_lock_location_ = 3131 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3132 ( &__kmp_set_queuing_lock_location ); 3133 3134 __kmp_get_user_lock_flags_ = 3135 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3136 ( &__kmp_get_queuing_lock_flags ); 3137 3138 __kmp_set_user_lock_flags_ = 3139 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3140 ( &__kmp_set_queuing_lock_flags ); 3141 } 3142 break; 3143 3144 #if KMP_USE_ADAPTIVE_LOCKS 3145 case lk_adaptive: { 3146 __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); 3147 __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); 3148 3149 __kmp_get_user_lock_owner_ = 3150 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3151 ( &__kmp_get_queuing_lock_owner ); 3152 3153 if ( __kmp_env_consistency_check ) { 3154 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3155 } 3156 else { 3157 KMP_BIND_USER_LOCK(adaptive); 3158 } 3159 3160 __kmp_destroy_user_lock_ = 3161 ( void ( * )( kmp_user_lock_p ) ) 3162 ( &__kmp_destroy_adaptive_lock ); 3163 3164 __kmp_is_user_lock_initialized_ = 3165 ( int ( * )( kmp_user_lock_p ) ) 3166 ( &__kmp_is_queuing_lock_initialized ); 3167 3168 __kmp_get_user_lock_location_ = 3169 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3170 ( &__kmp_get_queuing_lock_location ); 3171 3172 __kmp_set_user_lock_location_ = 3173 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3174 ( &__kmp_set_queuing_lock_location ); 3175 3176 __kmp_get_user_lock_flags_ = 3177 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3178 ( &__kmp_get_queuing_lock_flags ); 3179 3180 __kmp_set_user_lock_flags_ = 3181 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3182 ( &__kmp_set_queuing_lock_flags ); 3183 3184 } 3185 break; 3186 #endif // KMP_USE_ADAPTIVE_LOCKS 3187 3188 case lk_drdpa: { 3189 __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); 3190 __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); 3191 3192 __kmp_get_user_lock_owner_ = 3193 ( kmp_int32 ( * )( kmp_user_lock_p ) ) 3194 ( &__kmp_get_drdpa_lock_owner ); 3195 3196 if ( __kmp_env_consistency_check ) { 3197 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3198 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3199 } 3200 else { 3201 KMP_BIND_USER_LOCK(drdpa); 3202 KMP_BIND_NESTED_USER_LOCK(drdpa); 3203 } 3204 3205 __kmp_destroy_user_lock_ = 3206 ( void ( * )( kmp_user_lock_p ) ) 3207 ( &__kmp_destroy_drdpa_lock ); 3208 3209 __kmp_is_user_lock_initialized_ = 3210 ( int ( * )( kmp_user_lock_p ) ) 3211 ( &__kmp_is_drdpa_lock_initialized ); 3212 3213 __kmp_get_user_lock_location_ = 3214 ( const ident_t * ( * )( kmp_user_lock_p ) ) 3215 ( &__kmp_get_drdpa_lock_location ); 3216 3217 __kmp_set_user_lock_location_ = 3218 ( void ( * )( kmp_user_lock_p, const ident_t * ) ) 3219 ( &__kmp_set_drdpa_lock_location ); 3220 3221 __kmp_get_user_lock_flags_ = 3222 ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) 3223 ( &__kmp_get_drdpa_lock_flags ); 3224 3225 __kmp_set_user_lock_flags_ = 3226 ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) 3227 ( &__kmp_set_drdpa_lock_flags ); 3228 } 3229 break; 3230 } 3231 } 3232 3233 3234 // ---------------------------------------------------------------------------- 3235 // User lock table & lock allocation 3236 3237 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; 3238 kmp_user_lock_p __kmp_lock_pool = NULL; 3239 3240 // Lock block-allocation support. 3241 kmp_block_of_locks* __kmp_lock_blocks = NULL; 3242 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3243 3244 static kmp_lock_index_t 3245 __kmp_lock_table_insert( kmp_user_lock_p lck ) 3246 { 3247 // Assume that kmp_global_lock is held upon entry/exit. 3248 kmp_lock_index_t index; 3249 if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { 3250 kmp_lock_index_t size; 3251 kmp_user_lock_p *table; 3252 kmp_lock_index_t i; 3253 // Reallocate lock table. 3254 if ( __kmp_user_lock_table.allocated == 0 ) { 3255 size = 1024; 3256 } 3257 else { 3258 size = __kmp_user_lock_table.allocated * 2; 3259 } 3260 table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); 3261 memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); 3262 table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3263 // We cannot free the previos table now, sinse it may be in use by other 3264 // threads. So save the pointer to the previous table in in the first element of the 3265 // new table. All the tables will be organized into a list, and could be freed when 3266 // library shutting down. 3267 __kmp_user_lock_table.table = table; 3268 __kmp_user_lock_table.allocated = size; 3269 } 3270 KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); 3271 index = __kmp_user_lock_table.used; 3272 __kmp_user_lock_table.table[ index ] = lck; 3273 ++ __kmp_user_lock_table.used; 3274 return index; 3275 } 3276 3277 static kmp_user_lock_p 3278 __kmp_lock_block_allocate() 3279 { 3280 // Assume that kmp_global_lock is held upon entry/exit. 3281 static int last_index = 0; 3282 if ( ( last_index >= __kmp_num_locks_in_block ) 3283 || ( __kmp_lock_blocks == NULL ) ) { 3284 // Restart the index. 3285 last_index = 0; 3286 // Need to allocate a new block. 3287 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3288 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3289 char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); 3290 // Set up the new block. 3291 kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); 3292 new_block->next_block = __kmp_lock_blocks; 3293 new_block->locks = (void *)buffer; 3294 // Publish the new block. 3295 KMP_MB(); 3296 __kmp_lock_blocks = new_block; 3297 } 3298 kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) 3299 [ last_index * __kmp_user_lock_size ] ) ); 3300 last_index++; 3301 return ret; 3302 } 3303 3304 // 3305 // Get memory for a lock. It may be freshly allocated memory or reused memory 3306 // from lock pool. 3307 // 3308 kmp_user_lock_p 3309 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, 3310 kmp_lock_flags_t flags ) 3311 { 3312 kmp_user_lock_p lck; 3313 kmp_lock_index_t index; 3314 KMP_DEBUG_ASSERT( user_lock ); 3315 3316 __kmp_acquire_lock( &__kmp_global_lock, gtid ); 3317 3318 if ( __kmp_lock_pool == NULL ) { 3319 // Lock pool is empty. Allocate new memory. 3320 if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. 3321 lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); 3322 } 3323 else { 3324 lck = __kmp_lock_block_allocate(); 3325 } 3326 3327 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3328 // and debugger has info on all allocated locks. 3329 index = __kmp_lock_table_insert( lck ); 3330 } 3331 else { 3332 // Pick up lock from pool. 3333 lck = __kmp_lock_pool; 3334 index = __kmp_lock_pool->pool.index; 3335 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3336 } 3337 3338 // 3339 // We could potentially differentiate between nested and regular locks 3340 // here, and do the lock table lookup for regular locks only. 3341 // 3342 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3343 * ( (kmp_lock_index_t *) user_lock ) = index; 3344 } 3345 else { 3346 * ( (kmp_user_lock_p *) user_lock ) = lck; 3347 } 3348 3349 // mark the lock if it is critical section lock. 3350 __kmp_set_user_lock_flags( lck, flags ); 3351 3352 __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper 3353 3354 return lck; 3355 } 3356 3357 // Put lock's memory to pool for reusing. 3358 void 3359 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) 3360 { 3361 kmp_lock_pool_t * lock_pool; 3362 3363 KMP_DEBUG_ASSERT( user_lock != NULL ); 3364 KMP_DEBUG_ASSERT( lck != NULL ); 3365 3366 __kmp_acquire_lock( & __kmp_global_lock, gtid ); 3367 3368 lck->pool.next = __kmp_lock_pool; 3369 __kmp_lock_pool = lck; 3370 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3371 kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); 3372 KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); 3373 lck->pool.index = index; 3374 } 3375 3376 __kmp_release_lock( & __kmp_global_lock, gtid ); 3377 } 3378 3379 kmp_user_lock_p 3380 __kmp_lookup_user_lock( void **user_lock, char const *func ) 3381 { 3382 kmp_user_lock_p lck = NULL; 3383 3384 if ( __kmp_env_consistency_check ) { 3385 if ( user_lock == NULL ) { 3386 KMP_FATAL( LockIsUninitialized, func ); 3387 } 3388 } 3389 3390 if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { 3391 kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); 3392 if ( __kmp_env_consistency_check ) { 3393 if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { 3394 KMP_FATAL( LockIsUninitialized, func ); 3395 } 3396 } 3397 KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); 3398 KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); 3399 lck = __kmp_user_lock_table.table[index]; 3400 } 3401 else { 3402 lck = *( (kmp_user_lock_p *)user_lock ); 3403 } 3404 3405 if ( __kmp_env_consistency_check ) { 3406 if ( lck == NULL ) { 3407 KMP_FATAL( LockIsUninitialized, func ); 3408 } 3409 } 3410 3411 return lck; 3412 } 3413 3414 void 3415 __kmp_cleanup_user_locks( void ) 3416 { 3417 // 3418 // Reset lock pool. Do not worry about lock in the pool -- we will free 3419 // them when iterating through lock table (it includes all the locks, 3420 // dead or alive). 3421 // 3422 __kmp_lock_pool = NULL; 3423 3424 #define IS_CRITICAL(lck) \ 3425 ( ( __kmp_get_user_lock_flags_ != NULL ) && \ 3426 ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) 3427 3428 // 3429 // Loop through lock table, free all locks. 3430 // 3431 // Do not free item [0], it is reserved for lock tables list. 3432 // 3433 // FIXME - we are iterating through a list of (pointers to) objects of 3434 // type union kmp_user_lock, but we have no way of knowing whether the 3435 // base type is currently "pool" or whatever the global user lock type 3436 // is. 3437 // 3438 // We are relying on the fact that for all of the user lock types 3439 // (except "tas"), the first field in the lock struct is the "initialized" 3440 // field, which is set to the address of the lock object itself when 3441 // the lock is initialized. When the union is of type "pool", the 3442 // first field is a pointer to the next object in the free list, which 3443 // will not be the same address as the object itself. 3444 // 3445 // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) 3446 // will fail for "pool" objects on the free list. This must happen as 3447 // the "location" field of real user locks overlaps the "index" field 3448 // of "pool" objects. 3449 // 3450 // It would be better to run through the free list, and remove all "pool" 3451 // objects from the lock table before executing this loop. However, 3452 // "pool" objects do not always have their index field set (only on 3453 // lin_32e), and I don't want to search the lock table for the address 3454 // of every "pool" object on the free list. 3455 // 3456 while ( __kmp_user_lock_table.used > 1 ) { 3457 const ident *loc; 3458 3459 // 3460 // reduce __kmp_user_lock_table.used before freeing the lock, 3461 // so that state of locks is consistent 3462 // 3463 kmp_user_lock_p lck = __kmp_user_lock_table.table[ 3464 --__kmp_user_lock_table.used ]; 3465 3466 if ( ( __kmp_is_user_lock_initialized_ != NULL ) && 3467 ( *__kmp_is_user_lock_initialized_ )( lck ) ) { 3468 // 3469 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is 3470 // initialized AND it is NOT a critical section (user is not 3471 // responsible for destroying criticals) AND we know source 3472 // location to report. 3473 // 3474 if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && 3475 ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && 3476 ( loc->psource != NULL ) ) { 3477 kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); 3478 KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func, 3479 str_loc.line, str_loc.col ); 3480 __kmp_str_loc_free( &str_loc); 3481 } 3482 3483 #ifdef KMP_DEBUG 3484 if ( IS_CRITICAL( lck ) ) { 3485 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); 3486 } 3487 else { 3488 KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); 3489 } 3490 #endif // KMP_DEBUG 3491 3492 // 3493 // Cleanup internal lock dynamic resources 3494 // (for drdpa locks particularly). 3495 // 3496 __kmp_destroy_user_lock( lck ); 3497 } 3498 3499 // 3500 // Free the lock if block allocation of locks is not used. 3501 // 3502 if ( __kmp_lock_blocks == NULL ) { 3503 __kmp_free( lck ); 3504 } 3505 } 3506 3507 #undef IS_CRITICAL 3508 3509 // 3510 // delete lock table(s). 3511 // 3512 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3513 __kmp_user_lock_table.table = NULL; 3514 __kmp_user_lock_table.allocated = 0; 3515 3516 while ( table_ptr != NULL ) { 3517 // 3518 // In the first element we saved the pointer to the previous 3519 // (smaller) lock table. 3520 // 3521 kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); 3522 __kmp_free( table_ptr ); 3523 table_ptr = next; 3524 } 3525 3526 // 3527 // Free buffers allocated for blocks of locks. 3528 // 3529 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3530 __kmp_lock_blocks = NULL; 3531 3532 while ( block_ptr != NULL ) { 3533 kmp_block_of_locks_t *next = block_ptr->next_block; 3534 __kmp_free( block_ptr->locks ); 3535 // 3536 // *block_ptr itself was allocated at the end of the locks vector. 3537 // 3538 block_ptr = next; 3539 } 3540 3541 TCW_4(__kmp_init_user_locks, FALSE); 3542 } 3543 3544