1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include <stddef.h> 15 #include <atomic> 16 17 #include "kmp.h" 18 #include "kmp_i18n.h" 19 #include "kmp_io.h" 20 #include "kmp_itt.h" 21 #include "kmp_lock.h" 22 #include "kmp_wait_release.h" 23 #include "kmp_wrapper_getpid.h" 24 25 #include "tsan_annotations.h" 26 27 #if KMP_USE_FUTEX 28 #include <sys/syscall.h> 29 #include <unistd.h> 30 // We should really include <futex.h>, but that causes compatibility problems on 31 // different Linux* OS distributions that either require that you include (or 32 // break when you try to include) <pci/types.h>. Since all we need is the two 33 // macros below (which are part of the kernel ABI, so can't change) we just 34 // define the constants here and don't include <futex.h> 35 #ifndef FUTEX_WAIT 36 #define FUTEX_WAIT 0 37 #endif 38 #ifndef FUTEX_WAKE 39 #define FUTEX_WAKE 1 40 #endif 41 #endif 42 43 /* Implement spin locks for internal library use. */ 44 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 45 46 void __kmp_validate_locks(void) { 47 int i; 48 kmp_uint32 x, y; 49 50 /* Check to make sure unsigned arithmetic does wraps properly */ 51 x = ~((kmp_uint32)0) - 2; 52 y = x - 2; 53 54 for (i = 0; i < 8; ++i, ++x, ++y) { 55 kmp_uint32 z = (x - y); 56 KMP_ASSERT(z == 2); 57 } 58 59 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 60 } 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // For the non-nested locks, we can only assume that the first 4 bytes were 66 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 67 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 68 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 69 // 70 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 71 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 72 73 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 74 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; 75 } 76 77 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 78 return lck->lk.depth_locked != -1; 79 } 80 81 __forceinline static int 82 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 83 KMP_MB(); 84 85 #ifdef USE_LOCK_PROFILE 86 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); 87 if ((curr != 0) && (curr != gtid + 1)) 88 __kmp_printf("LOCK CONTENTION: %p\n", lck); 89 /* else __kmp_printf( "." );*/ 90 #endif /* USE_LOCK_PROFILE */ 91 92 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 93 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 94 95 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 96 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 97 KMP_FSYNC_ACQUIRED(lck); 98 return KMP_LOCK_ACQUIRED_FIRST; 99 } 100 101 kmp_uint32 spins; 102 KMP_FSYNC_PREPARE(lck); 103 KMP_INIT_YIELD(spins); 104 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 105 KMP_YIELD(TRUE); 106 } else { 107 KMP_YIELD_SPIN(spins); 108 } 109 110 kmp_backoff_t backoff = __kmp_spin_backoff_params; 111 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || 112 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 113 __kmp_spin_backoff(&backoff); 114 if (TCR_4(__kmp_nth) > 115 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 116 KMP_YIELD(TRUE); 117 } else { 118 KMP_YIELD_SPIN(spins); 119 } 120 } 121 KMP_FSYNC_ACQUIRED(lck); 122 return KMP_LOCK_ACQUIRED_FIRST; 123 } 124 125 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 126 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 127 ANNOTATE_TAS_ACQUIRED(lck); 128 return retval; 129 } 130 131 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 132 kmp_int32 gtid) { 133 char const *const func = "omp_set_lock"; 134 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 135 __kmp_is_tas_lock_nestable(lck)) { 136 KMP_FATAL(LockNestableUsedAsSimple, func); 137 } 138 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 139 KMP_FATAL(LockIsAlreadyOwned, func); 140 } 141 return __kmp_acquire_tas_lock(lck, gtid); 142 } 143 144 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 147 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 148 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 149 KMP_FSYNC_ACQUIRED(lck); 150 return TRUE; 151 } 152 return FALSE; 153 } 154 155 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 156 kmp_int32 gtid) { 157 char const *const func = "omp_test_lock"; 158 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 159 __kmp_is_tas_lock_nestable(lck)) { 160 KMP_FATAL(LockNestableUsedAsSimple, func); 161 } 162 return __kmp_test_tas_lock(lck, gtid); 163 } 164 165 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 166 KMP_MB(); /* Flush all pending memory write invalidates. */ 167 168 KMP_FSYNC_RELEASING(lck); 169 ANNOTATE_TAS_RELEASED(lck); 170 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); 171 KMP_MB(); /* Flush all pending memory write invalidates. */ 172 173 KMP_YIELD(TCR_4(__kmp_nth) > 174 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 175 return KMP_LOCK_RELEASED; 176 } 177 178 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 179 kmp_int32 gtid) { 180 char const *const func = "omp_unset_lock"; 181 KMP_MB(); /* in case another processor initialized lock */ 182 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 183 __kmp_is_tas_lock_nestable(lck)) { 184 KMP_FATAL(LockNestableUsedAsSimple, func); 185 } 186 if (__kmp_get_tas_lock_owner(lck) == -1) { 187 KMP_FATAL(LockUnsettingFree, func); 188 } 189 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 190 (__kmp_get_tas_lock_owner(lck) != gtid)) { 191 KMP_FATAL(LockUnsettingSetByAnother, func); 192 } 193 return __kmp_release_tas_lock(lck, gtid); 194 } 195 196 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 197 lck->lk.poll = KMP_LOCK_FREE(tas); 198 } 199 200 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 201 __kmp_init_tas_lock(lck); 202 } 203 204 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 205 206 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 207 char const *const func = "omp_destroy_lock"; 208 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 209 __kmp_is_tas_lock_nestable(lck)) { 210 KMP_FATAL(LockNestableUsedAsSimple, func); 211 } 212 if (__kmp_get_tas_lock_owner(lck) != -1) { 213 KMP_FATAL(LockStillOwned, func); 214 } 215 __kmp_destroy_tas_lock(lck); 216 } 217 218 // nested test and set locks 219 220 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 221 KMP_DEBUG_ASSERT(gtid >= 0); 222 223 if (__kmp_get_tas_lock_owner(lck) == gtid) { 224 lck->lk.depth_locked += 1; 225 return KMP_LOCK_ACQUIRED_NEXT; 226 } else { 227 __kmp_acquire_tas_lock_timed_template(lck, gtid); 228 ANNOTATE_TAS_ACQUIRED(lck); 229 lck->lk.depth_locked = 1; 230 return KMP_LOCK_ACQUIRED_FIRST; 231 } 232 } 233 234 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 235 kmp_int32 gtid) { 236 char const *const func = "omp_set_nest_lock"; 237 if (!__kmp_is_tas_lock_nestable(lck)) { 238 KMP_FATAL(LockSimpleUsedAsNestable, func); 239 } 240 return __kmp_acquire_nested_tas_lock(lck, gtid); 241 } 242 243 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 244 int retval; 245 246 KMP_DEBUG_ASSERT(gtid >= 0); 247 248 if (__kmp_get_tas_lock_owner(lck) == gtid) { 249 retval = ++lck->lk.depth_locked; 250 } else if (!__kmp_test_tas_lock(lck, gtid)) { 251 retval = 0; 252 } else { 253 KMP_MB(); 254 retval = lck->lk.depth_locked = 1; 255 } 256 return retval; 257 } 258 259 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 260 kmp_int32 gtid) { 261 char const *const func = "omp_test_nest_lock"; 262 if (!__kmp_is_tas_lock_nestable(lck)) { 263 KMP_FATAL(LockSimpleUsedAsNestable, func); 264 } 265 return __kmp_test_nested_tas_lock(lck, gtid); 266 } 267 268 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 269 KMP_DEBUG_ASSERT(gtid >= 0); 270 271 KMP_MB(); 272 if (--(lck->lk.depth_locked) == 0) { 273 __kmp_release_tas_lock(lck, gtid); 274 return KMP_LOCK_RELEASED; 275 } 276 return KMP_LOCK_STILL_HELD; 277 } 278 279 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 280 kmp_int32 gtid) { 281 char const *const func = "omp_unset_nest_lock"; 282 KMP_MB(); /* in case another processor initialized lock */ 283 if (!__kmp_is_tas_lock_nestable(lck)) { 284 KMP_FATAL(LockSimpleUsedAsNestable, func); 285 } 286 if (__kmp_get_tas_lock_owner(lck) == -1) { 287 KMP_FATAL(LockUnsettingFree, func); 288 } 289 if (__kmp_get_tas_lock_owner(lck) != gtid) { 290 KMP_FATAL(LockUnsettingSetByAnother, func); 291 } 292 return __kmp_release_nested_tas_lock(lck, gtid); 293 } 294 295 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 296 __kmp_init_tas_lock(lck); 297 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 298 } 299 300 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 301 __kmp_init_nested_tas_lock(lck); 302 } 303 304 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 305 __kmp_destroy_tas_lock(lck); 306 lck->lk.depth_locked = 0; 307 } 308 309 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 310 char const *const func = "omp_destroy_nest_lock"; 311 if (!__kmp_is_tas_lock_nestable(lck)) { 312 KMP_FATAL(LockSimpleUsedAsNestable, func); 313 } 314 if (__kmp_get_tas_lock_owner(lck) != -1) { 315 KMP_FATAL(LockStillOwned, func); 316 } 317 __kmp_destroy_nested_tas_lock(lck); 318 } 319 320 #if KMP_USE_FUTEX 321 322 /* ------------------------------------------------------------------------ */ 323 /* futex locks */ 324 325 // futex locks are really just test and set locks, with a different method 326 // of handling contention. They take the same amount of space as test and 327 // set locks, and are allocated the same way (i.e. use the area allocated by 328 // the compiler for non-nested locks / allocate nested locks on the heap). 329 330 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 331 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 332 } 333 334 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 335 return lck->lk.depth_locked != -1; 336 } 337 338 __forceinline static int 339 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 340 kmp_int32 gtid_code = (gtid + 1) << 1; 341 342 KMP_MB(); 343 344 #ifdef USE_LOCK_PROFILE 345 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 346 if ((curr != 0) && (curr != gtid_code)) 347 __kmp_printf("LOCK CONTENTION: %p\n", lck); 348 /* else __kmp_printf( "." );*/ 349 #endif /* USE_LOCK_PROFILE */ 350 351 KMP_FSYNC_PREPARE(lck); 352 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 353 lck, lck->lk.poll, gtid)); 354 355 kmp_int32 poll_val; 356 357 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 358 &(lck->lk.poll), KMP_LOCK_FREE(futex), 359 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 360 361 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 362 KA_TRACE( 363 1000, 364 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 365 lck, gtid, poll_val, cond)); 366 367 // NOTE: if you try to use the following condition for this branch 368 // 369 // if ( poll_val & 1 == 0 ) 370 // 371 // Then the 12.0 compiler has a bug where the following block will 372 // always be skipped, regardless of the value of the LSB of poll_val. 373 if (!cond) { 374 // Try to set the lsb in the poll to indicate to the owner 375 // thread that they need to wake this thread up. 376 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 377 poll_val | KMP_LOCK_BUSY(1, futex))) { 378 KA_TRACE( 379 1000, 380 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 381 lck, lck->lk.poll, gtid)); 382 continue; 383 } 384 poll_val |= KMP_LOCK_BUSY(1, futex); 385 386 KA_TRACE(1000, 387 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 388 lck->lk.poll, gtid)); 389 } 390 391 KA_TRACE( 392 1000, 393 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 394 lck, gtid, poll_val)); 395 396 kmp_int32 rc; 397 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 398 NULL, 0)) != 0) { 399 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 400 "failed (rc=%d errno=%d)\n", 401 lck, gtid, poll_val, rc, errno)); 402 continue; 403 } 404 405 KA_TRACE(1000, 406 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 407 lck, gtid, poll_val)); 408 // This thread has now done a successful futex wait call and was entered on 409 // the OS futex queue. We must now perform a futex wake call when releasing 410 // the lock, as we have no idea how many other threads are in the queue. 411 gtid_code |= 1; 412 } 413 414 KMP_FSYNC_ACQUIRED(lck); 415 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 416 lck->lk.poll, gtid)); 417 return KMP_LOCK_ACQUIRED_FIRST; 418 } 419 420 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 421 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 422 ANNOTATE_FUTEX_ACQUIRED(lck); 423 return retval; 424 } 425 426 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 427 kmp_int32 gtid) { 428 char const *const func = "omp_set_lock"; 429 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 430 __kmp_is_futex_lock_nestable(lck)) { 431 KMP_FATAL(LockNestableUsedAsSimple, func); 432 } 433 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 434 KMP_FATAL(LockIsAlreadyOwned, func); 435 } 436 return __kmp_acquire_futex_lock(lck, gtid); 437 } 438 439 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 440 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 441 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 442 KMP_FSYNC_ACQUIRED(lck); 443 return TRUE; 444 } 445 return FALSE; 446 } 447 448 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 449 kmp_int32 gtid) { 450 char const *const func = "omp_test_lock"; 451 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 452 __kmp_is_futex_lock_nestable(lck)) { 453 KMP_FATAL(LockNestableUsedAsSimple, func); 454 } 455 return __kmp_test_futex_lock(lck, gtid); 456 } 457 458 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 459 KMP_MB(); /* Flush all pending memory write invalidates. */ 460 461 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 462 lck, lck->lk.poll, gtid)); 463 464 KMP_FSYNC_RELEASING(lck); 465 ANNOTATE_FUTEX_RELEASED(lck); 466 467 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 468 469 KA_TRACE(1000, 470 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 471 lck, gtid, poll_val)); 472 473 if (KMP_LOCK_STRIP(poll_val) & 1) { 474 KA_TRACE(1000, 475 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 476 lck, gtid)); 477 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 478 NULL, NULL, 0); 479 } 480 481 KMP_MB(); /* Flush all pending memory write invalidates. */ 482 483 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 484 lck->lk.poll, gtid)); 485 486 KMP_YIELD(TCR_4(__kmp_nth) > 487 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 488 return KMP_LOCK_RELEASED; 489 } 490 491 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 492 kmp_int32 gtid) { 493 char const *const func = "omp_unset_lock"; 494 KMP_MB(); /* in case another processor initialized lock */ 495 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 496 __kmp_is_futex_lock_nestable(lck)) { 497 KMP_FATAL(LockNestableUsedAsSimple, func); 498 } 499 if (__kmp_get_futex_lock_owner(lck) == -1) { 500 KMP_FATAL(LockUnsettingFree, func); 501 } 502 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 503 (__kmp_get_futex_lock_owner(lck) != gtid)) { 504 KMP_FATAL(LockUnsettingSetByAnother, func); 505 } 506 return __kmp_release_futex_lock(lck, gtid); 507 } 508 509 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 510 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 511 } 512 513 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 514 __kmp_init_futex_lock(lck); 515 } 516 517 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 518 519 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 520 char const *const func = "omp_destroy_lock"; 521 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 522 __kmp_is_futex_lock_nestable(lck)) { 523 KMP_FATAL(LockNestableUsedAsSimple, func); 524 } 525 if (__kmp_get_futex_lock_owner(lck) != -1) { 526 KMP_FATAL(LockStillOwned, func); 527 } 528 __kmp_destroy_futex_lock(lck); 529 } 530 531 // nested futex locks 532 533 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 534 KMP_DEBUG_ASSERT(gtid >= 0); 535 536 if (__kmp_get_futex_lock_owner(lck) == gtid) { 537 lck->lk.depth_locked += 1; 538 return KMP_LOCK_ACQUIRED_NEXT; 539 } else { 540 __kmp_acquire_futex_lock_timed_template(lck, gtid); 541 ANNOTATE_FUTEX_ACQUIRED(lck); 542 lck->lk.depth_locked = 1; 543 return KMP_LOCK_ACQUIRED_FIRST; 544 } 545 } 546 547 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 548 kmp_int32 gtid) { 549 char const *const func = "omp_set_nest_lock"; 550 if (!__kmp_is_futex_lock_nestable(lck)) { 551 KMP_FATAL(LockSimpleUsedAsNestable, func); 552 } 553 return __kmp_acquire_nested_futex_lock(lck, gtid); 554 } 555 556 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 557 int retval; 558 559 KMP_DEBUG_ASSERT(gtid >= 0); 560 561 if (__kmp_get_futex_lock_owner(lck) == gtid) { 562 retval = ++lck->lk.depth_locked; 563 } else if (!__kmp_test_futex_lock(lck, gtid)) { 564 retval = 0; 565 } else { 566 KMP_MB(); 567 retval = lck->lk.depth_locked = 1; 568 } 569 return retval; 570 } 571 572 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 573 kmp_int32 gtid) { 574 char const *const func = "omp_test_nest_lock"; 575 if (!__kmp_is_futex_lock_nestable(lck)) { 576 KMP_FATAL(LockSimpleUsedAsNestable, func); 577 } 578 return __kmp_test_nested_futex_lock(lck, gtid); 579 } 580 581 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 582 KMP_DEBUG_ASSERT(gtid >= 0); 583 584 KMP_MB(); 585 if (--(lck->lk.depth_locked) == 0) { 586 __kmp_release_futex_lock(lck, gtid); 587 return KMP_LOCK_RELEASED; 588 } 589 return KMP_LOCK_STILL_HELD; 590 } 591 592 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 593 kmp_int32 gtid) { 594 char const *const func = "omp_unset_nest_lock"; 595 KMP_MB(); /* in case another processor initialized lock */ 596 if (!__kmp_is_futex_lock_nestable(lck)) { 597 KMP_FATAL(LockSimpleUsedAsNestable, func); 598 } 599 if (__kmp_get_futex_lock_owner(lck) == -1) { 600 KMP_FATAL(LockUnsettingFree, func); 601 } 602 if (__kmp_get_futex_lock_owner(lck) != gtid) { 603 KMP_FATAL(LockUnsettingSetByAnother, func); 604 } 605 return __kmp_release_nested_futex_lock(lck, gtid); 606 } 607 608 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 609 __kmp_init_futex_lock(lck); 610 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 611 } 612 613 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 614 __kmp_init_nested_futex_lock(lck); 615 } 616 617 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 618 __kmp_destroy_futex_lock(lck); 619 lck->lk.depth_locked = 0; 620 } 621 622 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 623 char const *const func = "omp_destroy_nest_lock"; 624 if (!__kmp_is_futex_lock_nestable(lck)) { 625 KMP_FATAL(LockSimpleUsedAsNestable, func); 626 } 627 if (__kmp_get_futex_lock_owner(lck) != -1) { 628 KMP_FATAL(LockStillOwned, func); 629 } 630 __kmp_destroy_nested_futex_lock(lck); 631 } 632 633 #endif // KMP_USE_FUTEX 634 635 /* ------------------------------------------------------------------------ */ 636 /* ticket (bakery) locks */ 637 638 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 639 return std::atomic_load_explicit(&lck->lk.owner_id, 640 std::memory_order_relaxed) - 641 1; 642 } 643 644 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 645 return std::atomic_load_explicit(&lck->lk.depth_locked, 646 std::memory_order_relaxed) != -1; 647 } 648 649 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 650 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 651 std::memory_order_acquire) == my_ticket; 652 } 653 654 __forceinline static int 655 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 656 kmp_int32 gtid) { 657 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 658 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 659 660 #ifdef USE_LOCK_PROFILE 661 if (std::atomic_load_explicit(&lck->lk.now_serving, 662 std::memory_order_relaxed) != my_ticket) 663 __kmp_printf("LOCK CONTENTION: %p\n", lck); 664 /* else __kmp_printf( "." );*/ 665 #endif /* USE_LOCK_PROFILE */ 666 667 if (std::atomic_load_explicit(&lck->lk.now_serving, 668 std::memory_order_acquire) == my_ticket) { 669 return KMP_LOCK_ACQUIRED_FIRST; 670 } 671 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 672 return KMP_LOCK_ACQUIRED_FIRST; 673 } 674 675 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 676 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 677 ANNOTATE_TICKET_ACQUIRED(lck); 678 return retval; 679 } 680 681 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 682 kmp_int32 gtid) { 683 char const *const func = "omp_set_lock"; 684 685 if (!std::atomic_load_explicit(&lck->lk.initialized, 686 std::memory_order_relaxed)) { 687 KMP_FATAL(LockIsUninitialized, func); 688 } 689 if (lck->lk.self != lck) { 690 KMP_FATAL(LockIsUninitialized, func); 691 } 692 if (__kmp_is_ticket_lock_nestable(lck)) { 693 KMP_FATAL(LockNestableUsedAsSimple, func); 694 } 695 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 696 KMP_FATAL(LockIsAlreadyOwned, func); 697 } 698 699 __kmp_acquire_ticket_lock(lck, gtid); 700 701 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 702 std::memory_order_relaxed); 703 return KMP_LOCK_ACQUIRED_FIRST; 704 } 705 706 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 707 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 708 std::memory_order_relaxed); 709 710 if (std::atomic_load_explicit(&lck->lk.now_serving, 711 std::memory_order_relaxed) == my_ticket) { 712 kmp_uint32 next_ticket = my_ticket + 1; 713 if (std::atomic_compare_exchange_strong_explicit( 714 &lck->lk.next_ticket, &my_ticket, next_ticket, 715 std::memory_order_acquire, std::memory_order_acquire)) { 716 return TRUE; 717 } 718 } 719 return FALSE; 720 } 721 722 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 723 kmp_int32 gtid) { 724 char const *const func = "omp_test_lock"; 725 726 if (!std::atomic_load_explicit(&lck->lk.initialized, 727 std::memory_order_relaxed)) { 728 KMP_FATAL(LockIsUninitialized, func); 729 } 730 if (lck->lk.self != lck) { 731 KMP_FATAL(LockIsUninitialized, func); 732 } 733 if (__kmp_is_ticket_lock_nestable(lck)) { 734 KMP_FATAL(LockNestableUsedAsSimple, func); 735 } 736 737 int retval = __kmp_test_ticket_lock(lck, gtid); 738 739 if (retval) { 740 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 741 std::memory_order_relaxed); 742 } 743 return retval; 744 } 745 746 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 747 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 748 std::memory_order_relaxed) - 749 std::atomic_load_explicit(&lck->lk.now_serving, 750 std::memory_order_relaxed); 751 752 ANNOTATE_TICKET_RELEASED(lck); 753 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 754 std::memory_order_release); 755 756 KMP_YIELD(distance > 757 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 758 return KMP_LOCK_RELEASED; 759 } 760 761 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 762 kmp_int32 gtid) { 763 char const *const func = "omp_unset_lock"; 764 765 if (!std::atomic_load_explicit(&lck->lk.initialized, 766 std::memory_order_relaxed)) { 767 KMP_FATAL(LockIsUninitialized, func); 768 } 769 if (lck->lk.self != lck) { 770 KMP_FATAL(LockIsUninitialized, func); 771 } 772 if (__kmp_is_ticket_lock_nestable(lck)) { 773 KMP_FATAL(LockNestableUsedAsSimple, func); 774 } 775 if (__kmp_get_ticket_lock_owner(lck) == -1) { 776 KMP_FATAL(LockUnsettingFree, func); 777 } 778 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 779 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 780 KMP_FATAL(LockUnsettingSetByAnother, func); 781 } 782 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 783 return __kmp_release_ticket_lock(lck, gtid); 784 } 785 786 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 787 lck->lk.location = NULL; 788 lck->lk.self = lck; 789 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 790 std::memory_order_relaxed); 791 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 792 std::memory_order_relaxed); 793 std::atomic_store_explicit( 794 &lck->lk.owner_id, 0, 795 std::memory_order_relaxed); // no thread owns the lock. 796 std::atomic_store_explicit( 797 &lck->lk.depth_locked, -1, 798 std::memory_order_relaxed); // -1 => not a nested lock. 799 std::atomic_store_explicit(&lck->lk.initialized, true, 800 std::memory_order_release); 801 } 802 803 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 804 __kmp_init_ticket_lock(lck); 805 } 806 807 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 808 std::atomic_store_explicit(&lck->lk.initialized, false, 809 std::memory_order_release); 810 lck->lk.self = NULL; 811 lck->lk.location = NULL; 812 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 813 std::memory_order_relaxed); 814 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 815 std::memory_order_relaxed); 816 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 817 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 818 std::memory_order_relaxed); 819 } 820 821 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 822 char const *const func = "omp_destroy_lock"; 823 824 if (!std::atomic_load_explicit(&lck->lk.initialized, 825 std::memory_order_relaxed)) { 826 KMP_FATAL(LockIsUninitialized, func); 827 } 828 if (lck->lk.self != lck) { 829 KMP_FATAL(LockIsUninitialized, func); 830 } 831 if (__kmp_is_ticket_lock_nestable(lck)) { 832 KMP_FATAL(LockNestableUsedAsSimple, func); 833 } 834 if (__kmp_get_ticket_lock_owner(lck) != -1) { 835 KMP_FATAL(LockStillOwned, func); 836 } 837 __kmp_destroy_ticket_lock(lck); 838 } 839 840 // nested ticket locks 841 842 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 843 KMP_DEBUG_ASSERT(gtid >= 0); 844 845 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 846 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 847 std::memory_order_relaxed); 848 return KMP_LOCK_ACQUIRED_NEXT; 849 } else { 850 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 851 ANNOTATE_TICKET_ACQUIRED(lck); 852 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 853 std::memory_order_relaxed); 854 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 855 std::memory_order_relaxed); 856 return KMP_LOCK_ACQUIRED_FIRST; 857 } 858 } 859 860 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 861 kmp_int32 gtid) { 862 char const *const func = "omp_set_nest_lock"; 863 864 if (!std::atomic_load_explicit(&lck->lk.initialized, 865 std::memory_order_relaxed)) { 866 KMP_FATAL(LockIsUninitialized, func); 867 } 868 if (lck->lk.self != lck) { 869 KMP_FATAL(LockIsUninitialized, func); 870 } 871 if (!__kmp_is_ticket_lock_nestable(lck)) { 872 KMP_FATAL(LockSimpleUsedAsNestable, func); 873 } 874 return __kmp_acquire_nested_ticket_lock(lck, gtid); 875 } 876 877 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 878 int retval; 879 880 KMP_DEBUG_ASSERT(gtid >= 0); 881 882 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 883 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 884 std::memory_order_relaxed) + 885 1; 886 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 887 retval = 0; 888 } else { 889 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 890 std::memory_order_relaxed); 891 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 892 std::memory_order_relaxed); 893 retval = 1; 894 } 895 return retval; 896 } 897 898 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 899 kmp_int32 gtid) { 900 char const *const func = "omp_test_nest_lock"; 901 902 if (!std::atomic_load_explicit(&lck->lk.initialized, 903 std::memory_order_relaxed)) { 904 KMP_FATAL(LockIsUninitialized, func); 905 } 906 if (lck->lk.self != lck) { 907 KMP_FATAL(LockIsUninitialized, func); 908 } 909 if (!__kmp_is_ticket_lock_nestable(lck)) { 910 KMP_FATAL(LockSimpleUsedAsNestable, func); 911 } 912 return __kmp_test_nested_ticket_lock(lck, gtid); 913 } 914 915 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 916 KMP_DEBUG_ASSERT(gtid >= 0); 917 918 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 919 std::memory_order_relaxed) - 920 1) == 0) { 921 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 922 __kmp_release_ticket_lock(lck, gtid); 923 return KMP_LOCK_RELEASED; 924 } 925 return KMP_LOCK_STILL_HELD; 926 } 927 928 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 929 kmp_int32 gtid) { 930 char const *const func = "omp_unset_nest_lock"; 931 932 if (!std::atomic_load_explicit(&lck->lk.initialized, 933 std::memory_order_relaxed)) { 934 KMP_FATAL(LockIsUninitialized, func); 935 } 936 if (lck->lk.self != lck) { 937 KMP_FATAL(LockIsUninitialized, func); 938 } 939 if (!__kmp_is_ticket_lock_nestable(lck)) { 940 KMP_FATAL(LockSimpleUsedAsNestable, func); 941 } 942 if (__kmp_get_ticket_lock_owner(lck) == -1) { 943 KMP_FATAL(LockUnsettingFree, func); 944 } 945 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 946 KMP_FATAL(LockUnsettingSetByAnother, func); 947 } 948 return __kmp_release_nested_ticket_lock(lck, gtid); 949 } 950 951 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 952 __kmp_init_ticket_lock(lck); 953 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 954 std::memory_order_relaxed); 955 // >= 0 for nestable locks, -1 for simple locks 956 } 957 958 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 959 __kmp_init_nested_ticket_lock(lck); 960 } 961 962 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 963 __kmp_destroy_ticket_lock(lck); 964 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 965 std::memory_order_relaxed); 966 } 967 968 static void 969 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 970 char const *const func = "omp_destroy_nest_lock"; 971 972 if (!std::atomic_load_explicit(&lck->lk.initialized, 973 std::memory_order_relaxed)) { 974 KMP_FATAL(LockIsUninitialized, func); 975 } 976 if (lck->lk.self != lck) { 977 KMP_FATAL(LockIsUninitialized, func); 978 } 979 if (!__kmp_is_ticket_lock_nestable(lck)) { 980 KMP_FATAL(LockSimpleUsedAsNestable, func); 981 } 982 if (__kmp_get_ticket_lock_owner(lck) != -1) { 983 KMP_FATAL(LockStillOwned, func); 984 } 985 __kmp_destroy_nested_ticket_lock(lck); 986 } 987 988 // access functions to fields which don't exist for all lock kinds. 989 990 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 991 return std::atomic_load_explicit(&lck->lk.initialized, 992 std::memory_order_relaxed) && 993 (lck->lk.self == lck); 994 } 995 996 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 997 return lck->lk.location; 998 } 999 1000 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 1001 const ident_t *loc) { 1002 lck->lk.location = loc; 1003 } 1004 1005 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 1006 return lck->lk.flags; 1007 } 1008 1009 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 1010 kmp_lock_flags_t flags) { 1011 lck->lk.flags = flags; 1012 } 1013 1014 /* ------------------------------------------------------------------------ */ 1015 /* queuing locks */ 1016 1017 /* First the states 1018 (head,tail) = 0, 0 means lock is unheld, nobody on queue 1019 UINT_MAX or -1, 0 means lock is held, nobody on queue 1020 h, h means lock held or about to transition, 1021 1 element on queue 1022 h, t h <> t, means lock is held or about to 1023 transition, >1 elements on queue 1024 1025 Now the transitions 1026 Acquire(0,0) = -1 ,0 1027 Release(0,0) = Error 1028 Acquire(-1,0) = h ,h h > 0 1029 Release(-1,0) = 0 ,0 1030 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1031 Release(h,h) = -1 ,0 h > 0 1032 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1033 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1034 1035 And pictorially 1036 1037 +-----+ 1038 | 0, 0|------- release -------> Error 1039 +-----+ 1040 | ^ 1041 acquire| |release 1042 | | 1043 | | 1044 v | 1045 +-----+ 1046 |-1, 0| 1047 +-----+ 1048 | ^ 1049 acquire| |release 1050 | | 1051 | | 1052 v | 1053 +-----+ 1054 | h, h| 1055 +-----+ 1056 | ^ 1057 acquire| |release 1058 | | 1059 | | 1060 v | 1061 +-----+ 1062 | h, t|----- acquire, release loopback ---+ 1063 +-----+ | 1064 ^ | 1065 | | 1066 +------------------------------------+ 1067 */ 1068 1069 #ifdef DEBUG_QUEUING_LOCKS 1070 1071 /* Stuff for circular trace buffer */ 1072 #define TRACE_BUF_ELE 1024 1073 static char traces[TRACE_BUF_ELE][128] = {0}; 1074 static int tc = 0; 1075 #define TRACE_LOCK(X, Y) \ 1076 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1077 #define TRACE_LOCK_T(X, Y, Z) \ 1078 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1079 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1080 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1081 Z, Q); 1082 1083 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1084 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1085 kmp_int32 tail_id) { 1086 kmp_int32 t, i; 1087 1088 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1089 1090 i = tc % TRACE_BUF_ELE; 1091 __kmp_printf_no_lock("%s\n", traces[i]); 1092 i = (i + 1) % TRACE_BUF_ELE; 1093 while (i != (tc % TRACE_BUF_ELE)) { 1094 __kmp_printf_no_lock("%s", traces[i]); 1095 i = (i + 1) % TRACE_BUF_ELE; 1096 } 1097 __kmp_printf_no_lock("\n"); 1098 1099 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1100 "next_wait:%d, head_id:%d, tail_id:%d\n", 1101 gtid + 1, this_thr->th.th_spin_here, 1102 this_thr->th.th_next_waiting, head_id, tail_id); 1103 1104 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1105 1106 if (lck->lk.head_id >= 1) { 1107 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1108 while (t > 0) { 1109 __kmp_printf_no_lock("-> %d ", t); 1110 t = __kmp_threads[t - 1]->th.th_next_waiting; 1111 } 1112 } 1113 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1114 __kmp_printf_no_lock("\n\n"); 1115 } 1116 1117 #endif /* DEBUG_QUEUING_LOCKS */ 1118 1119 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1120 return TCR_4(lck->lk.owner_id) - 1; 1121 } 1122 1123 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1124 return lck->lk.depth_locked != -1; 1125 } 1126 1127 /* Acquire a lock using a the queuing lock implementation */ 1128 template <bool takeTime> 1129 /* [TLW] The unused template above is left behind because of what BEB believes 1130 is a potential compiler problem with __forceinline. */ 1131 __forceinline static int 1132 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1133 kmp_int32 gtid) { 1134 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1135 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1136 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1137 volatile kmp_uint32 *spin_here_p; 1138 kmp_int32 need_mf = 1; 1139 1140 #if OMPT_SUPPORT 1141 omp_state_t prev_state = omp_state_undefined; 1142 #endif 1143 1144 KA_TRACE(1000, 1145 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1146 1147 KMP_FSYNC_PREPARE(lck); 1148 KMP_DEBUG_ASSERT(this_thr != NULL); 1149 spin_here_p = &this_thr->th.th_spin_here; 1150 1151 #ifdef DEBUG_QUEUING_LOCKS 1152 TRACE_LOCK(gtid + 1, "acq ent"); 1153 if (*spin_here_p) 1154 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1155 if (this_thr->th.th_next_waiting != 0) 1156 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1157 #endif 1158 KMP_DEBUG_ASSERT(!*spin_here_p); 1159 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1160 1161 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1162 head_id_p that may follow, not just in execution order, but also in 1163 visibility order. This way, when a releasing thread observes the changes to 1164 the queue by this thread, it can rightly assume that spin_here_p has 1165 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1166 not premature. If the releasing thread sets spin_here_p to FALSE before 1167 this thread sets it to TRUE, this thread will hang. */ 1168 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1169 1170 while (1) { 1171 kmp_int32 enqueued; 1172 kmp_int32 head; 1173 kmp_int32 tail; 1174 1175 head = *head_id_p; 1176 1177 switch (head) { 1178 1179 case -1: { 1180 #ifdef DEBUG_QUEUING_LOCKS 1181 tail = *tail_id_p; 1182 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1183 #endif 1184 tail = 0; /* to make sure next link asynchronously read is not set 1185 accidentally; this assignment prevents us from entering the 1186 if ( t > 0 ) condition in the enqueued case below, which is not 1187 necessary for this state transition */ 1188 1189 need_mf = 0; 1190 /* try (-1,0)->(tid,tid) */ 1191 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1192 KMP_PACK_64(-1, 0), 1193 KMP_PACK_64(gtid + 1, gtid + 1)); 1194 #ifdef DEBUG_QUEUING_LOCKS 1195 if (enqueued) 1196 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1197 #endif 1198 } break; 1199 1200 default: { 1201 tail = *tail_id_p; 1202 KMP_DEBUG_ASSERT(tail != gtid + 1); 1203 1204 #ifdef DEBUG_QUEUING_LOCKS 1205 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1206 #endif 1207 1208 if (tail == 0) { 1209 enqueued = FALSE; 1210 } else { 1211 need_mf = 0; 1212 /* try (h,t) or (h,h)->(h,tid) */ 1213 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1214 1215 #ifdef DEBUG_QUEUING_LOCKS 1216 if (enqueued) 1217 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1218 #endif 1219 } 1220 } break; 1221 1222 case 0: /* empty queue */ 1223 { 1224 kmp_int32 grabbed_lock; 1225 1226 #ifdef DEBUG_QUEUING_LOCKS 1227 tail = *tail_id_p; 1228 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1229 #endif 1230 /* try (0,0)->(-1,0) */ 1231 1232 /* only legal transition out of head = 0 is head = -1 with no change to 1233 * tail */ 1234 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1235 1236 if (grabbed_lock) { 1237 1238 *spin_here_p = FALSE; 1239 1240 KA_TRACE( 1241 1000, 1242 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1243 lck, gtid)); 1244 #ifdef DEBUG_QUEUING_LOCKS 1245 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1246 #endif 1247 1248 #if OMPT_SUPPORT 1249 if (ompt_enabled.enabled && prev_state != omp_state_undefined) { 1250 /* change the state before clearing wait_id */ 1251 this_thr->th.ompt_thread_info.state = prev_state; 1252 this_thr->th.ompt_thread_info.wait_id = 0; 1253 } 1254 #endif 1255 1256 KMP_FSYNC_ACQUIRED(lck); 1257 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1258 } 1259 enqueued = FALSE; 1260 } break; 1261 } 1262 1263 #if OMPT_SUPPORT 1264 if (ompt_enabled.enabled && prev_state == omp_state_undefined) { 1265 /* this thread will spin; set wait_id before entering wait state */ 1266 prev_state = this_thr->th.ompt_thread_info.state; 1267 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1268 this_thr->th.ompt_thread_info.state = omp_state_wait_lock; 1269 } 1270 #endif 1271 1272 if (enqueued) { 1273 if (tail > 0) { 1274 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1275 KMP_ASSERT(tail_thr != NULL); 1276 tail_thr->th.th_next_waiting = gtid + 1; 1277 /* corresponding wait for this write in release code */ 1278 } 1279 KA_TRACE(1000, 1280 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1281 lck, gtid)); 1282 1283 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1284 sleeps for throughput only here. */ 1285 KMP_MB(); 1286 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1287 1288 #ifdef DEBUG_QUEUING_LOCKS 1289 TRACE_LOCK(gtid + 1, "acq spin"); 1290 1291 if (this_thr->th.th_next_waiting != 0) 1292 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1293 #endif 1294 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1295 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1296 "waiting on queue\n", 1297 lck, gtid)); 1298 1299 #ifdef DEBUG_QUEUING_LOCKS 1300 TRACE_LOCK(gtid + 1, "acq exit 2"); 1301 #endif 1302 1303 #if OMPT_SUPPORT 1304 /* change the state before clearing wait_id */ 1305 this_thr->th.ompt_thread_info.state = prev_state; 1306 this_thr->th.ompt_thread_info.wait_id = 0; 1307 #endif 1308 1309 /* got lock, we were dequeued by the thread that released lock */ 1310 return KMP_LOCK_ACQUIRED_FIRST; 1311 } 1312 1313 /* Yield if number of threads > number of logical processors */ 1314 /* ToDo: Not sure why this should only be in oversubscription case, 1315 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1316 KMP_YIELD(TCR_4(__kmp_nth) > 1317 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1318 #ifdef DEBUG_QUEUING_LOCKS 1319 TRACE_LOCK(gtid + 1, "acq retry"); 1320 #endif 1321 } 1322 KMP_ASSERT2(0, "should not get here"); 1323 return KMP_LOCK_ACQUIRED_FIRST; 1324 } 1325 1326 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1327 KMP_DEBUG_ASSERT(gtid >= 0); 1328 1329 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1330 ANNOTATE_QUEUING_ACQUIRED(lck); 1331 return retval; 1332 } 1333 1334 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1335 kmp_int32 gtid) { 1336 char const *const func = "omp_set_lock"; 1337 if (lck->lk.initialized != lck) { 1338 KMP_FATAL(LockIsUninitialized, func); 1339 } 1340 if (__kmp_is_queuing_lock_nestable(lck)) { 1341 KMP_FATAL(LockNestableUsedAsSimple, func); 1342 } 1343 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1344 KMP_FATAL(LockIsAlreadyOwned, func); 1345 } 1346 1347 __kmp_acquire_queuing_lock(lck, gtid); 1348 1349 lck->lk.owner_id = gtid + 1; 1350 return KMP_LOCK_ACQUIRED_FIRST; 1351 } 1352 1353 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1354 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1355 kmp_int32 head; 1356 #ifdef KMP_DEBUG 1357 kmp_info_t *this_thr; 1358 #endif 1359 1360 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1361 KMP_DEBUG_ASSERT(gtid >= 0); 1362 #ifdef KMP_DEBUG 1363 this_thr = __kmp_thread_from_gtid(gtid); 1364 KMP_DEBUG_ASSERT(this_thr != NULL); 1365 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1366 #endif 1367 1368 head = *head_id_p; 1369 1370 if (head == 0) { /* nobody on queue, nobody holding */ 1371 /* try (0,0)->(-1,0) */ 1372 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1373 KA_TRACE(1000, 1374 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1375 KMP_FSYNC_ACQUIRED(lck); 1376 ANNOTATE_QUEUING_ACQUIRED(lck); 1377 return TRUE; 1378 } 1379 } 1380 1381 KA_TRACE(1000, 1382 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1383 return FALSE; 1384 } 1385 1386 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1387 kmp_int32 gtid) { 1388 char const *const func = "omp_test_lock"; 1389 if (lck->lk.initialized != lck) { 1390 KMP_FATAL(LockIsUninitialized, func); 1391 } 1392 if (__kmp_is_queuing_lock_nestable(lck)) { 1393 KMP_FATAL(LockNestableUsedAsSimple, func); 1394 } 1395 1396 int retval = __kmp_test_queuing_lock(lck, gtid); 1397 1398 if (retval) { 1399 lck->lk.owner_id = gtid + 1; 1400 } 1401 return retval; 1402 } 1403 1404 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1405 kmp_info_t *this_thr; 1406 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1407 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1408 1409 KA_TRACE(1000, 1410 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1411 KMP_DEBUG_ASSERT(gtid >= 0); 1412 this_thr = __kmp_thread_from_gtid(gtid); 1413 KMP_DEBUG_ASSERT(this_thr != NULL); 1414 #ifdef DEBUG_QUEUING_LOCKS 1415 TRACE_LOCK(gtid + 1, "rel ent"); 1416 1417 if (this_thr->th.th_spin_here) 1418 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1419 if (this_thr->th.th_next_waiting != 0) 1420 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1421 #endif 1422 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1423 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1424 1425 KMP_FSYNC_RELEASING(lck); 1426 ANNOTATE_QUEUING_RELEASED(lck); 1427 1428 while (1) { 1429 kmp_int32 dequeued; 1430 kmp_int32 head; 1431 kmp_int32 tail; 1432 1433 head = *head_id_p; 1434 1435 #ifdef DEBUG_QUEUING_LOCKS 1436 tail = *tail_id_p; 1437 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1438 if (head == 0) 1439 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1440 #endif 1441 KMP_DEBUG_ASSERT(head != 1442 0); /* holding the lock, head must be -1 or queue head */ 1443 1444 if (head == -1) { /* nobody on queue */ 1445 /* try (-1,0)->(0,0) */ 1446 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1447 KA_TRACE( 1448 1000, 1449 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1450 lck, gtid)); 1451 #ifdef DEBUG_QUEUING_LOCKS 1452 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1453 #endif 1454 1455 #if OMPT_SUPPORT 1456 /* nothing to do - no other thread is trying to shift blame */ 1457 #endif 1458 return KMP_LOCK_RELEASED; 1459 } 1460 dequeued = FALSE; 1461 } else { 1462 KMP_MB(); 1463 tail = *tail_id_p; 1464 if (head == tail) { /* only one thread on the queue */ 1465 #ifdef DEBUG_QUEUING_LOCKS 1466 if (head <= 0) 1467 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1468 #endif 1469 KMP_DEBUG_ASSERT(head > 0); 1470 1471 /* try (h,h)->(-1,0) */ 1472 dequeued = KMP_COMPARE_AND_STORE_REL64( 1473 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1474 KMP_PACK_64(-1, 0)); 1475 #ifdef DEBUG_QUEUING_LOCKS 1476 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1477 #endif 1478 1479 } else { 1480 volatile kmp_int32 *waiting_id_p; 1481 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1482 KMP_DEBUG_ASSERT(head_thr != NULL); 1483 waiting_id_p = &head_thr->th.th_next_waiting; 1484 1485 /* Does this require synchronous reads? */ 1486 #ifdef DEBUG_QUEUING_LOCKS 1487 if (head <= 0 || tail <= 0) 1488 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1489 #endif 1490 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1491 1492 /* try (h,t)->(h',t) or (t,t) */ 1493 KMP_MB(); 1494 /* make sure enqueuing thread has time to update next waiting thread 1495 * field */ 1496 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1497 KMP_NEQ, NULL); 1498 #ifdef DEBUG_QUEUING_LOCKS 1499 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1500 #endif 1501 dequeued = TRUE; 1502 } 1503 } 1504 1505 if (dequeued) { 1506 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1507 KMP_DEBUG_ASSERT(head_thr != NULL); 1508 1509 /* Does this require synchronous reads? */ 1510 #ifdef DEBUG_QUEUING_LOCKS 1511 if (head <= 0 || tail <= 0) 1512 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1513 #endif 1514 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1515 1516 /* For clean code only. Thread not released until next statement prevents 1517 race with acquire code. */ 1518 head_thr->th.th_next_waiting = 0; 1519 #ifdef DEBUG_QUEUING_LOCKS 1520 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1521 #endif 1522 1523 KMP_MB(); 1524 /* reset spin value */ 1525 head_thr->th.th_spin_here = FALSE; 1526 1527 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1528 "dequeuing\n", 1529 lck, gtid)); 1530 #ifdef DEBUG_QUEUING_LOCKS 1531 TRACE_LOCK(gtid + 1, "rel exit 2"); 1532 #endif 1533 return KMP_LOCK_RELEASED; 1534 } 1535 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1536 threads */ 1537 1538 #ifdef DEBUG_QUEUING_LOCKS 1539 TRACE_LOCK(gtid + 1, "rel retry"); 1540 #endif 1541 1542 } /* while */ 1543 KMP_ASSERT2(0, "should not get here"); 1544 return KMP_LOCK_RELEASED; 1545 } 1546 1547 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1548 kmp_int32 gtid) { 1549 char const *const func = "omp_unset_lock"; 1550 KMP_MB(); /* in case another processor initialized lock */ 1551 if (lck->lk.initialized != lck) { 1552 KMP_FATAL(LockIsUninitialized, func); 1553 } 1554 if (__kmp_is_queuing_lock_nestable(lck)) { 1555 KMP_FATAL(LockNestableUsedAsSimple, func); 1556 } 1557 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1558 KMP_FATAL(LockUnsettingFree, func); 1559 } 1560 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1561 KMP_FATAL(LockUnsettingSetByAnother, func); 1562 } 1563 lck->lk.owner_id = 0; 1564 return __kmp_release_queuing_lock(lck, gtid); 1565 } 1566 1567 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1568 lck->lk.location = NULL; 1569 lck->lk.head_id = 0; 1570 lck->lk.tail_id = 0; 1571 lck->lk.next_ticket = 0; 1572 lck->lk.now_serving = 0; 1573 lck->lk.owner_id = 0; // no thread owns the lock. 1574 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1575 lck->lk.initialized = lck; 1576 1577 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1578 } 1579 1580 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1581 __kmp_init_queuing_lock(lck); 1582 } 1583 1584 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1585 lck->lk.initialized = NULL; 1586 lck->lk.location = NULL; 1587 lck->lk.head_id = 0; 1588 lck->lk.tail_id = 0; 1589 lck->lk.next_ticket = 0; 1590 lck->lk.now_serving = 0; 1591 lck->lk.owner_id = 0; 1592 lck->lk.depth_locked = -1; 1593 } 1594 1595 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1596 char const *const func = "omp_destroy_lock"; 1597 if (lck->lk.initialized != lck) { 1598 KMP_FATAL(LockIsUninitialized, func); 1599 } 1600 if (__kmp_is_queuing_lock_nestable(lck)) { 1601 KMP_FATAL(LockNestableUsedAsSimple, func); 1602 } 1603 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1604 KMP_FATAL(LockStillOwned, func); 1605 } 1606 __kmp_destroy_queuing_lock(lck); 1607 } 1608 1609 // nested queuing locks 1610 1611 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1612 KMP_DEBUG_ASSERT(gtid >= 0); 1613 1614 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1615 lck->lk.depth_locked += 1; 1616 return KMP_LOCK_ACQUIRED_NEXT; 1617 } else { 1618 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1619 ANNOTATE_QUEUING_ACQUIRED(lck); 1620 KMP_MB(); 1621 lck->lk.depth_locked = 1; 1622 KMP_MB(); 1623 lck->lk.owner_id = gtid + 1; 1624 return KMP_LOCK_ACQUIRED_FIRST; 1625 } 1626 } 1627 1628 static int 1629 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1630 kmp_int32 gtid) { 1631 char const *const func = "omp_set_nest_lock"; 1632 if (lck->lk.initialized != lck) { 1633 KMP_FATAL(LockIsUninitialized, func); 1634 } 1635 if (!__kmp_is_queuing_lock_nestable(lck)) { 1636 KMP_FATAL(LockSimpleUsedAsNestable, func); 1637 } 1638 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1639 } 1640 1641 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1642 int retval; 1643 1644 KMP_DEBUG_ASSERT(gtid >= 0); 1645 1646 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1647 retval = ++lck->lk.depth_locked; 1648 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1649 retval = 0; 1650 } else { 1651 KMP_MB(); 1652 retval = lck->lk.depth_locked = 1; 1653 KMP_MB(); 1654 lck->lk.owner_id = gtid + 1; 1655 } 1656 return retval; 1657 } 1658 1659 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1660 kmp_int32 gtid) { 1661 char const *const func = "omp_test_nest_lock"; 1662 if (lck->lk.initialized != lck) { 1663 KMP_FATAL(LockIsUninitialized, func); 1664 } 1665 if (!__kmp_is_queuing_lock_nestable(lck)) { 1666 KMP_FATAL(LockSimpleUsedAsNestable, func); 1667 } 1668 return __kmp_test_nested_queuing_lock(lck, gtid); 1669 } 1670 1671 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1672 KMP_DEBUG_ASSERT(gtid >= 0); 1673 1674 KMP_MB(); 1675 if (--(lck->lk.depth_locked) == 0) { 1676 KMP_MB(); 1677 lck->lk.owner_id = 0; 1678 __kmp_release_queuing_lock(lck, gtid); 1679 return KMP_LOCK_RELEASED; 1680 } 1681 return KMP_LOCK_STILL_HELD; 1682 } 1683 1684 static int 1685 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1686 kmp_int32 gtid) { 1687 char const *const func = "omp_unset_nest_lock"; 1688 KMP_MB(); /* in case another processor initialized lock */ 1689 if (lck->lk.initialized != lck) { 1690 KMP_FATAL(LockIsUninitialized, func); 1691 } 1692 if (!__kmp_is_queuing_lock_nestable(lck)) { 1693 KMP_FATAL(LockSimpleUsedAsNestable, func); 1694 } 1695 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1696 KMP_FATAL(LockUnsettingFree, func); 1697 } 1698 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1699 KMP_FATAL(LockUnsettingSetByAnother, func); 1700 } 1701 return __kmp_release_nested_queuing_lock(lck, gtid); 1702 } 1703 1704 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1705 __kmp_init_queuing_lock(lck); 1706 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1707 } 1708 1709 static void 1710 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1711 __kmp_init_nested_queuing_lock(lck); 1712 } 1713 1714 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1715 __kmp_destroy_queuing_lock(lck); 1716 lck->lk.depth_locked = 0; 1717 } 1718 1719 static void 1720 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1721 char const *const func = "omp_destroy_nest_lock"; 1722 if (lck->lk.initialized != lck) { 1723 KMP_FATAL(LockIsUninitialized, func); 1724 } 1725 if (!__kmp_is_queuing_lock_nestable(lck)) { 1726 KMP_FATAL(LockSimpleUsedAsNestable, func); 1727 } 1728 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1729 KMP_FATAL(LockStillOwned, func); 1730 } 1731 __kmp_destroy_nested_queuing_lock(lck); 1732 } 1733 1734 // access functions to fields which don't exist for all lock kinds. 1735 1736 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 1737 return lck == lck->lk.initialized; 1738 } 1739 1740 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1741 return lck->lk.location; 1742 } 1743 1744 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1745 const ident_t *loc) { 1746 lck->lk.location = loc; 1747 } 1748 1749 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1750 return lck->lk.flags; 1751 } 1752 1753 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1754 kmp_lock_flags_t flags) { 1755 lck->lk.flags = flags; 1756 } 1757 1758 #if KMP_USE_ADAPTIVE_LOCKS 1759 1760 /* RTM Adaptive locks */ 1761 1762 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1763 1764 #include <immintrin.h> 1765 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1766 1767 #else 1768 1769 // Values from the status register after failed speculation. 1770 #define _XBEGIN_STARTED (~0u) 1771 #define _XABORT_EXPLICIT (1 << 0) 1772 #define _XABORT_RETRY (1 << 1) 1773 #define _XABORT_CONFLICT (1 << 2) 1774 #define _XABORT_CAPACITY (1 << 3) 1775 #define _XABORT_DEBUG (1 << 4) 1776 #define _XABORT_NESTED (1 << 5) 1777 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1778 1779 // Aborts for which it's worth trying again immediately 1780 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1781 1782 #define STRINGIZE_INTERNAL(arg) #arg 1783 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1784 1785 // Access to RTM instructions 1786 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1787 an abort. This is the same definition as the compiler intrinsic that will be 1788 supported at some point. */ 1789 static __inline int _xbegin() { 1790 int res = -1; 1791 1792 #if KMP_OS_WINDOWS 1793 #if KMP_ARCH_X86_64 1794 _asm { 1795 _emit 0xC7 1796 _emit 0xF8 1797 _emit 2 1798 _emit 0 1799 _emit 0 1800 _emit 0 1801 jmp L2 1802 mov res, eax 1803 L2: 1804 } 1805 #else /* IA32 */ 1806 _asm { 1807 _emit 0xC7 1808 _emit 0xF8 1809 _emit 2 1810 _emit 0 1811 _emit 0 1812 _emit 0 1813 jmp L2 1814 mov res, eax 1815 L2: 1816 } 1817 #endif // KMP_ARCH_X86_64 1818 #else 1819 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1820 returned in %eax(%rax) on abort. Other register values are restored, so 1821 don't need to be killed. 1822 1823 We must also mark 'res' as an input and an output, since otherwise 1824 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1825 the successful (i.e., non-abort) path. */ 1826 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1827 " .long 1f-1b-6\n" 1828 " jmp 2f\n" 1829 "1: movl %%eax,%0\n" 1830 "2:" 1831 : "+r"(res)::"memory", "%eax"); 1832 #endif // KMP_OS_WINDOWS 1833 return res; 1834 } 1835 1836 /* Transaction end */ 1837 static __inline void _xend() { 1838 #if KMP_OS_WINDOWS 1839 __asm { 1840 _emit 0x0f 1841 _emit 0x01 1842 _emit 0xd5 1843 } 1844 #else 1845 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1846 #endif 1847 } 1848 1849 /* This is a macro, the argument must be a single byte constant which can be 1850 evaluated by the inline assembler, since it is emitted as a byte into the 1851 assembly code. */ 1852 // clang-format off 1853 #if KMP_OS_WINDOWS 1854 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1855 #else 1856 #define _xabort(ARG) \ 1857 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1858 #endif 1859 // clang-format on 1860 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1861 1862 // Statistics is collected for testing purpose 1863 #if KMP_DEBUG_ADAPTIVE_LOCKS 1864 1865 // We accumulate speculative lock statistics when the lock is destroyed. We 1866 // keep locks that haven't been destroyed in the liveLocks list so that we can 1867 // grab their statistics too. 1868 static kmp_adaptive_lock_statistics_t destroyedStats; 1869 1870 // To hold the list of live locks. 1871 static kmp_adaptive_lock_info_t liveLocks; 1872 1873 // A lock so we can safely update the list of locks. 1874 static kmp_bootstrap_lock_t chain_lock = 1875 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); 1876 1877 // Initialize the list of stats. 1878 void __kmp_init_speculative_stats() { 1879 kmp_adaptive_lock_info_t *lck = &liveLocks; 1880 1881 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, 1882 sizeof(lck->stats)); 1883 lck->stats.next = lck; 1884 lck->stats.prev = lck; 1885 1886 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1887 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1888 1889 __kmp_init_bootstrap_lock(&chain_lock); 1890 } 1891 1892 // Insert the lock into the circular list 1893 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1894 __kmp_acquire_bootstrap_lock(&chain_lock); 1895 1896 lck->stats.next = liveLocks.stats.next; 1897 lck->stats.prev = &liveLocks; 1898 1899 liveLocks.stats.next = lck; 1900 lck->stats.next->stats.prev = lck; 1901 1902 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1903 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1904 1905 __kmp_release_bootstrap_lock(&chain_lock); 1906 } 1907 1908 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1909 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1910 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1911 1912 kmp_adaptive_lock_info_t *n = lck->stats.next; 1913 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1914 1915 n->stats.prev = p; 1916 p->stats.next = n; 1917 } 1918 1919 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1920 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, 1921 sizeof(lck->stats)); 1922 __kmp_remember_lock(lck); 1923 } 1924 1925 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1926 kmp_adaptive_lock_info_t *lck) { 1927 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1928 1929 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1930 t->successfulSpeculations += s->successfulSpeculations; 1931 t->hardFailedSpeculations += s->hardFailedSpeculations; 1932 t->softFailedSpeculations += s->softFailedSpeculations; 1933 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1934 t->lemmingYields += s->lemmingYields; 1935 } 1936 1937 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1938 __kmp_acquire_bootstrap_lock(&chain_lock); 1939 1940 __kmp_add_stats(&destroyedStats, lck); 1941 __kmp_forget_lock(lck); 1942 1943 __kmp_release_bootstrap_lock(&chain_lock); 1944 } 1945 1946 static float percent(kmp_uint32 count, kmp_uint32 total) { 1947 return (total == 0) ? 0.0 : (100.0 * count) / total; 1948 } 1949 1950 static FILE *__kmp_open_stats_file() { 1951 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1952 return stdout; 1953 1954 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1955 char buffer[buffLen]; 1956 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1957 (kmp_int32)getpid()); 1958 FILE *result = fopen(&buffer[0], "w"); 1959 1960 // Maybe we should issue a warning here... 1961 return result ? result : stdout; 1962 } 1963 1964 void __kmp_print_speculative_stats() { 1965 kmp_adaptive_lock_statistics_t total = destroyedStats; 1966 kmp_adaptive_lock_info_t *lck; 1967 1968 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1969 __kmp_add_stats(&total, lck); 1970 } 1971 kmp_adaptive_lock_statistics_t *t = &total; 1972 kmp_uint32 totalSections = 1973 t->nonSpeculativeAcquires + t->successfulSpeculations; 1974 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1975 t->hardFailedSpeculations + 1976 t->softFailedSpeculations; 1977 if (totalSections <= 0) 1978 return; 1979 1980 FILE *statsFile = __kmp_open_stats_file(); 1981 1982 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1983 fprintf(statsFile, " Lock parameters: \n" 1984 " max_soft_retries : %10d\n" 1985 " max_badness : %10d\n", 1986 __kmp_adaptive_backoff_params.max_soft_retries, 1987 __kmp_adaptive_backoff_params.max_badness); 1988 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1989 t->nonSpeculativeAcquireAttempts); 1990 fprintf(statsFile, " Total critical sections : %10d\n", 1991 totalSections); 1992 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1993 t->successfulSpeculations, 1994 percent(t->successfulSpeculations, totalSections)); 1995 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1996 t->nonSpeculativeAcquires, 1997 percent(t->nonSpeculativeAcquires, totalSections)); 1998 fprintf(statsFile, " Lemming yields : %10d\n\n", 1999 t->lemmingYields); 2000 2001 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 2002 totalSpeculations); 2003 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 2004 t->successfulSpeculations, 2005 percent(t->successfulSpeculations, totalSpeculations)); 2006 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 2007 t->softFailedSpeculations, 2008 percent(t->softFailedSpeculations, totalSpeculations)); 2009 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 2010 t->hardFailedSpeculations, 2011 percent(t->hardFailedSpeculations, totalSpeculations)); 2012 2013 if (statsFile != stdout) 2014 fclose(statsFile); 2015 } 2016 2017 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 2018 #else 2019 #define KMP_INC_STAT(lck, stat) 2020 2021 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2022 2023 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 2024 // It is enough to check that the head_id is zero. 2025 // We don't also need to check the tail. 2026 bool res = lck->lk.head_id == 0; 2027 2028 // We need a fence here, since we must ensure that no memory operations 2029 // from later in this thread float above that read. 2030 #if KMP_COMPILER_ICC 2031 _mm_mfence(); 2032 #else 2033 __sync_synchronize(); 2034 #endif 2035 2036 return res; 2037 } 2038 2039 // Functions for manipulating the badness 2040 static __inline void 2041 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 2042 // Reset the badness to zero so we eagerly try to speculate again 2043 lck->lk.adaptive.badness = 0; 2044 KMP_INC_STAT(lck, successfulSpeculations); 2045 } 2046 2047 // Create a bit mask with one more set bit. 2048 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2049 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2050 if (newBadness > lck->lk.adaptive.max_badness) { 2051 return; 2052 } else { 2053 lck->lk.adaptive.badness = newBadness; 2054 } 2055 } 2056 2057 // Check whether speculation should be attempted. 2058 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2059 kmp_int32 gtid) { 2060 kmp_uint32 badness = lck->lk.adaptive.badness; 2061 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2062 int res = (attempts & badness) == 0; 2063 return res; 2064 } 2065 2066 // Attempt to acquire only the speculative lock. 2067 // Does not back off to the non-speculative lock. 2068 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2069 kmp_int32 gtid) { 2070 int retries = lck->lk.adaptive.max_soft_retries; 2071 2072 // We don't explicitly count the start of speculation, rather we record the 2073 // results (success, hard fail, soft fail). The sum of all of those is the 2074 // total number of times we started speculation since all speculations must 2075 // end one of those ways. 2076 do { 2077 kmp_uint32 status = _xbegin(); 2078 // Switch this in to disable actual speculation but exercise at least some 2079 // of the rest of the code. Useful for debugging... 2080 // kmp_uint32 status = _XABORT_NESTED; 2081 2082 if (status == _XBEGIN_STARTED) { 2083 /* We have successfully started speculation. Check that no-one acquired 2084 the lock for real between when we last looked and now. This also gets 2085 the lock cache line into our read-set, which we need so that we'll 2086 abort if anyone later claims it for real. */ 2087 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2088 // Lock is now visibly acquired, so someone beat us to it. Abort the 2089 // transaction so we'll restart from _xbegin with the failure status. 2090 _xabort(0x01); 2091 KMP_ASSERT2(0, "should not get here"); 2092 } 2093 return 1; // Lock has been acquired (speculatively) 2094 } else { 2095 // We have aborted, update the statistics 2096 if (status & SOFT_ABORT_MASK) { 2097 KMP_INC_STAT(lck, softFailedSpeculations); 2098 // and loop round to retry. 2099 } else { 2100 KMP_INC_STAT(lck, hardFailedSpeculations); 2101 // Give up if we had a hard failure. 2102 break; 2103 } 2104 } 2105 } while (retries--); // Loop while we have retries, and didn't fail hard. 2106 2107 // Either we had a hard failure or we didn't succeed softly after 2108 // the full set of attempts, so back off the badness. 2109 __kmp_step_badness(lck); 2110 return 0; 2111 } 2112 2113 // Attempt to acquire the speculative lock, or back off to the non-speculative 2114 // one if the speculative lock cannot be acquired. 2115 // We can succeed speculatively, non-speculatively, or fail. 2116 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2117 // First try to acquire the lock speculatively 2118 if (__kmp_should_speculate(lck, gtid) && 2119 __kmp_test_adaptive_lock_only(lck, gtid)) 2120 return 1; 2121 2122 // Speculative acquisition failed, so try to acquire it non-speculatively. 2123 // Count the non-speculative acquire attempt 2124 lck->lk.adaptive.acquire_attempts++; 2125 2126 // Use base, non-speculative lock. 2127 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2128 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2129 return 1; // Lock is acquired (non-speculatively) 2130 } else { 2131 return 0; // Failed to acquire the lock, it's already visibly locked. 2132 } 2133 } 2134 2135 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2136 kmp_int32 gtid) { 2137 char const *const func = "omp_test_lock"; 2138 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2139 KMP_FATAL(LockIsUninitialized, func); 2140 } 2141 2142 int retval = __kmp_test_adaptive_lock(lck, gtid); 2143 2144 if (retval) { 2145 lck->lk.qlk.owner_id = gtid + 1; 2146 } 2147 return retval; 2148 } 2149 2150 // Block until we can acquire a speculative, adaptive lock. We check whether we 2151 // should be trying to speculate. If we should be, we check the real lock to see 2152 // if it is free, and, if not, pause without attempting to acquire it until it 2153 // is. Then we try the speculative acquire. This means that although we suffer 2154 // from lemmings a little (because all we can't acquire the lock speculatively 2155 // until the queue of threads waiting has cleared), we don't get into a state 2156 // where we can never acquire the lock speculatively (because we force the queue 2157 // to clear by preventing new arrivals from entering the queue). This does mean 2158 // that when we're trying to break lemmings, the lock is no longer fair. However 2159 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2160 // problem. 2161 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2162 kmp_int32 gtid) { 2163 if (__kmp_should_speculate(lck, gtid)) { 2164 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2165 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2166 return; 2167 // We tried speculation and failed, so give up. 2168 } else { 2169 // We can't try speculation until the lock is free, so we pause here 2170 // (without suspending on the queueing lock, to allow it to drain, then 2171 // try again. All other threads will also see the same result for 2172 // shouldSpeculate, so will be doing the same if they try to claim the 2173 // lock from now on. 2174 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2175 KMP_INC_STAT(lck, lemmingYields); 2176 __kmp_yield(TRUE); 2177 } 2178 2179 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2180 return; 2181 } 2182 } 2183 2184 // Speculative acquisition failed, so acquire it non-speculatively. 2185 // Count the non-speculative acquire attempt 2186 lck->lk.adaptive.acquire_attempts++; 2187 2188 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2189 // We have acquired the base lock, so count that. 2190 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2191 ANNOTATE_QUEUING_ACQUIRED(lck); 2192 } 2193 2194 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2195 kmp_int32 gtid) { 2196 char const *const func = "omp_set_lock"; 2197 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2198 KMP_FATAL(LockIsUninitialized, func); 2199 } 2200 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2201 KMP_FATAL(LockIsAlreadyOwned, func); 2202 } 2203 2204 __kmp_acquire_adaptive_lock(lck, gtid); 2205 2206 lck->lk.qlk.owner_id = gtid + 1; 2207 } 2208 2209 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2210 kmp_int32 gtid) { 2211 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2212 lck))) { // If the lock doesn't look claimed we must be speculating. 2213 // (Or the user's code is buggy and they're releasing without locking; 2214 // if we had XTEST we'd be able to check that case...) 2215 _xend(); // Exit speculation 2216 __kmp_update_badness_after_success(lck); 2217 } else { // Since the lock *is* visibly locked we're not speculating, 2218 // so should use the underlying lock's release scheme. 2219 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2220 } 2221 return KMP_LOCK_RELEASED; 2222 } 2223 2224 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2225 kmp_int32 gtid) { 2226 char const *const func = "omp_unset_lock"; 2227 KMP_MB(); /* in case another processor initialized lock */ 2228 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2229 KMP_FATAL(LockIsUninitialized, func); 2230 } 2231 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2232 KMP_FATAL(LockUnsettingFree, func); 2233 } 2234 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2235 KMP_FATAL(LockUnsettingSetByAnother, func); 2236 } 2237 lck->lk.qlk.owner_id = 0; 2238 __kmp_release_adaptive_lock(lck, gtid); 2239 return KMP_LOCK_RELEASED; 2240 } 2241 2242 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2243 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2244 lck->lk.adaptive.badness = 0; 2245 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2246 lck->lk.adaptive.max_soft_retries = 2247 __kmp_adaptive_backoff_params.max_soft_retries; 2248 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2249 #if KMP_DEBUG_ADAPTIVE_LOCKS 2250 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2251 #endif 2252 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2253 } 2254 2255 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2256 __kmp_init_adaptive_lock(lck); 2257 } 2258 2259 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2260 #if KMP_DEBUG_ADAPTIVE_LOCKS 2261 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2262 #endif 2263 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2264 // Nothing needed for the speculative part. 2265 } 2266 2267 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2268 char const *const func = "omp_destroy_lock"; 2269 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2270 KMP_FATAL(LockIsUninitialized, func); 2271 } 2272 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2273 KMP_FATAL(LockStillOwned, func); 2274 } 2275 __kmp_destroy_adaptive_lock(lck); 2276 } 2277 2278 #endif // KMP_USE_ADAPTIVE_LOCKS 2279 2280 /* ------------------------------------------------------------------------ */ 2281 /* DRDPA ticket locks */ 2282 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2283 2284 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2285 return lck->lk.owner_id - 1; 2286 } 2287 2288 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2289 return lck->lk.depth_locked != -1; 2290 } 2291 2292 __forceinline static int 2293 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2294 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); 2295 kmp_uint64 mask = lck->lk.mask; // atomic load 2296 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2297 2298 #ifdef USE_LOCK_PROFILE 2299 if (polls[ticket & mask] != ticket) 2300 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2301 /* else __kmp_printf( "." );*/ 2302 #endif /* USE_LOCK_PROFILE */ 2303 2304 // Now spin-wait, but reload the polls pointer and mask, in case the 2305 // polling area has been reconfigured. Unless it is reconfigured, the 2306 // reloads stay in L1 cache and are cheap. 2307 // 2308 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2309 // 2310 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2311 // and poll to be re-read every spin iteration. 2312 kmp_uint32 spins; 2313 2314 KMP_FSYNC_PREPARE(lck); 2315 KMP_INIT_YIELD(spins); 2316 while (polls[ticket & mask] < ticket) { // atomic load 2317 // If we are oversubscribed, 2318 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2319 // CPU Pause is in the macros for yield. 2320 // 2321 KMP_YIELD(TCR_4(__kmp_nth) > 2322 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2323 KMP_YIELD_SPIN(spins); 2324 2325 // Re-read the mask and the poll pointer from the lock structure. 2326 // 2327 // Make certain that "mask" is read before "polls" !!! 2328 // 2329 // If another thread picks reconfigures the polling area and updates their 2330 // values, and we get the new value of mask and the old polls pointer, we 2331 // could access memory beyond the end of the old polling area. 2332 mask = lck->lk.mask; // atomic load 2333 polls = lck->lk.polls; // atomic load 2334 } 2335 2336 // Critical section starts here 2337 KMP_FSYNC_ACQUIRED(lck); 2338 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2339 ticket, lck)); 2340 lck->lk.now_serving = ticket; // non-volatile store 2341 2342 // Deallocate a garbage polling area if we know that we are the last 2343 // thread that could possibly access it. 2344 // 2345 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2346 // ticket. 2347 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2348 __kmp_free(lck->lk.old_polls); 2349 lck->lk.old_polls = NULL; 2350 lck->lk.cleanup_ticket = 0; 2351 } 2352 2353 // Check to see if we should reconfigure the polling area. 2354 // If there is still a garbage polling area to be deallocated from a 2355 // previous reconfiguration, let a later thread reconfigure it. 2356 if (lck->lk.old_polls == NULL) { 2357 bool reconfigure = false; 2358 std::atomic<kmp_uint64> *old_polls = polls; 2359 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2360 2361 if (TCR_4(__kmp_nth) > 2362 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2363 // We are in oversubscription mode. Contract the polling area 2364 // down to a single location, if that hasn't been done already. 2365 if (num_polls > 1) { 2366 reconfigure = true; 2367 num_polls = TCR_4(lck->lk.num_polls); 2368 mask = 0; 2369 num_polls = 1; 2370 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2371 sizeof(*polls)); 2372 polls[0] = ticket; 2373 } 2374 } else { 2375 // We are in under/fully subscribed mode. Check the number of 2376 // threads waiting on the lock. The size of the polling area 2377 // should be at least the number of threads waiting. 2378 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2379 if (num_waiting > num_polls) { 2380 kmp_uint32 old_num_polls = num_polls; 2381 reconfigure = true; 2382 do { 2383 mask = (mask << 1) | 1; 2384 num_polls *= 2; 2385 } while (num_polls <= num_waiting); 2386 2387 // Allocate the new polling area, and copy the relevant portion 2388 // of the old polling area to the new area. __kmp_allocate() 2389 // zeroes the memory it allocates, and most of the old area is 2390 // just zero padding, so we only copy the release counters. 2391 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2392 sizeof(*polls)); 2393 kmp_uint32 i; 2394 for (i = 0; i < old_num_polls; i++) { 2395 polls[i].store(old_polls[i]); 2396 } 2397 } 2398 } 2399 2400 if (reconfigure) { 2401 // Now write the updated fields back to the lock structure. 2402 // 2403 // Make certain that "polls" is written before "mask" !!! 2404 // 2405 // If another thread picks up the new value of mask and the old polls 2406 // pointer , it could access memory beyond the end of the old polling 2407 // area. 2408 // 2409 // On x86, we need memory fences. 2410 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2411 "lock %p to %d polls\n", 2412 ticket, lck, num_polls)); 2413 2414 lck->lk.old_polls = old_polls; 2415 lck->lk.polls = polls; // atomic store 2416 2417 KMP_MB(); 2418 2419 lck->lk.num_polls = num_polls; 2420 lck->lk.mask = mask; // atomic store 2421 2422 KMP_MB(); 2423 2424 // Only after the new polling area and mask have been flushed 2425 // to main memory can we update the cleanup ticket field. 2426 // 2427 // volatile load / non-volatile store 2428 lck->lk.cleanup_ticket = lck->lk.next_ticket; 2429 } 2430 } 2431 return KMP_LOCK_ACQUIRED_FIRST; 2432 } 2433 2434 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2435 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2436 ANNOTATE_DRDPA_ACQUIRED(lck); 2437 return retval; 2438 } 2439 2440 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2441 kmp_int32 gtid) { 2442 char const *const func = "omp_set_lock"; 2443 if (lck->lk.initialized != lck) { 2444 KMP_FATAL(LockIsUninitialized, func); 2445 } 2446 if (__kmp_is_drdpa_lock_nestable(lck)) { 2447 KMP_FATAL(LockNestableUsedAsSimple, func); 2448 } 2449 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2450 KMP_FATAL(LockIsAlreadyOwned, func); 2451 } 2452 2453 __kmp_acquire_drdpa_lock(lck, gtid); 2454 2455 lck->lk.owner_id = gtid + 1; 2456 return KMP_LOCK_ACQUIRED_FIRST; 2457 } 2458 2459 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2460 // First get a ticket, then read the polls pointer and the mask. 2461 // The polls pointer must be read before the mask!!! (See above) 2462 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load 2463 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2464 kmp_uint64 mask = lck->lk.mask; // atomic load 2465 if (polls[ticket & mask] == ticket) { 2466 kmp_uint64 next_ticket = ticket + 1; 2467 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, 2468 next_ticket)) { 2469 KMP_FSYNC_ACQUIRED(lck); 2470 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2471 ticket, lck)); 2472 lck->lk.now_serving = ticket; // non-volatile store 2473 2474 // Since no threads are waiting, there is no possibility that we would 2475 // want to reconfigure the polling area. We might have the cleanup ticket 2476 // value (which says that it is now safe to deallocate old_polls), but 2477 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2478 // routine isn't supposed to block, and we would risk blocks if we called 2479 // __kmp_free() to do the deallocation. 2480 return TRUE; 2481 } 2482 } 2483 return FALSE; 2484 } 2485 2486 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2487 kmp_int32 gtid) { 2488 char const *const func = "omp_test_lock"; 2489 if (lck->lk.initialized != lck) { 2490 KMP_FATAL(LockIsUninitialized, func); 2491 } 2492 if (__kmp_is_drdpa_lock_nestable(lck)) { 2493 KMP_FATAL(LockNestableUsedAsSimple, func); 2494 } 2495 2496 int retval = __kmp_test_drdpa_lock(lck, gtid); 2497 2498 if (retval) { 2499 lck->lk.owner_id = gtid + 1; 2500 } 2501 return retval; 2502 } 2503 2504 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2505 // Read the ticket value from the lock data struct, then the polls pointer and 2506 // the mask. The polls pointer must be read before the mask!!! (See above) 2507 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load 2508 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load 2509 kmp_uint64 mask = lck->lk.mask; // atomic load 2510 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2511 ticket - 1, lck)); 2512 KMP_FSYNC_RELEASING(lck); 2513 ANNOTATE_DRDPA_RELEASED(lck); 2514 polls[ticket & mask] = ticket; // atomic store 2515 return KMP_LOCK_RELEASED; 2516 } 2517 2518 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2519 kmp_int32 gtid) { 2520 char const *const func = "omp_unset_lock"; 2521 KMP_MB(); /* in case another processor initialized lock */ 2522 if (lck->lk.initialized != lck) { 2523 KMP_FATAL(LockIsUninitialized, func); 2524 } 2525 if (__kmp_is_drdpa_lock_nestable(lck)) { 2526 KMP_FATAL(LockNestableUsedAsSimple, func); 2527 } 2528 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2529 KMP_FATAL(LockUnsettingFree, func); 2530 } 2531 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2532 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2533 KMP_FATAL(LockUnsettingSetByAnother, func); 2534 } 2535 lck->lk.owner_id = 0; 2536 return __kmp_release_drdpa_lock(lck, gtid); 2537 } 2538 2539 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2540 lck->lk.location = NULL; 2541 lck->lk.mask = 0; 2542 lck->lk.num_polls = 1; 2543 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate( 2544 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2545 lck->lk.cleanup_ticket = 0; 2546 lck->lk.old_polls = NULL; 2547 lck->lk.next_ticket = 0; 2548 lck->lk.now_serving = 0; 2549 lck->lk.owner_id = 0; // no thread owns the lock. 2550 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2551 lck->lk.initialized = lck; 2552 2553 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2554 } 2555 2556 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2557 __kmp_init_drdpa_lock(lck); 2558 } 2559 2560 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2561 lck->lk.initialized = NULL; 2562 lck->lk.location = NULL; 2563 if (lck->lk.polls.load() != NULL) { 2564 __kmp_free(lck->lk.polls.load()); 2565 lck->lk.polls = NULL; 2566 } 2567 if (lck->lk.old_polls != NULL) { 2568 __kmp_free(lck->lk.old_polls); 2569 lck->lk.old_polls = NULL; 2570 } 2571 lck->lk.mask = 0; 2572 lck->lk.num_polls = 0; 2573 lck->lk.cleanup_ticket = 0; 2574 lck->lk.next_ticket = 0; 2575 lck->lk.now_serving = 0; 2576 lck->lk.owner_id = 0; 2577 lck->lk.depth_locked = -1; 2578 } 2579 2580 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2581 char const *const func = "omp_destroy_lock"; 2582 if (lck->lk.initialized != lck) { 2583 KMP_FATAL(LockIsUninitialized, func); 2584 } 2585 if (__kmp_is_drdpa_lock_nestable(lck)) { 2586 KMP_FATAL(LockNestableUsedAsSimple, func); 2587 } 2588 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2589 KMP_FATAL(LockStillOwned, func); 2590 } 2591 __kmp_destroy_drdpa_lock(lck); 2592 } 2593 2594 // nested drdpa ticket locks 2595 2596 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2597 KMP_DEBUG_ASSERT(gtid >= 0); 2598 2599 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2600 lck->lk.depth_locked += 1; 2601 return KMP_LOCK_ACQUIRED_NEXT; 2602 } else { 2603 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2604 ANNOTATE_DRDPA_ACQUIRED(lck); 2605 KMP_MB(); 2606 lck->lk.depth_locked = 1; 2607 KMP_MB(); 2608 lck->lk.owner_id = gtid + 1; 2609 return KMP_LOCK_ACQUIRED_FIRST; 2610 } 2611 } 2612 2613 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2614 kmp_int32 gtid) { 2615 char const *const func = "omp_set_nest_lock"; 2616 if (lck->lk.initialized != lck) { 2617 KMP_FATAL(LockIsUninitialized, func); 2618 } 2619 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2620 KMP_FATAL(LockSimpleUsedAsNestable, func); 2621 } 2622 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2623 } 2624 2625 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2626 int retval; 2627 2628 KMP_DEBUG_ASSERT(gtid >= 0); 2629 2630 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2631 retval = ++lck->lk.depth_locked; 2632 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2633 retval = 0; 2634 } else { 2635 KMP_MB(); 2636 retval = lck->lk.depth_locked = 1; 2637 KMP_MB(); 2638 lck->lk.owner_id = gtid + 1; 2639 } 2640 return retval; 2641 } 2642 2643 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2644 kmp_int32 gtid) { 2645 char const *const func = "omp_test_nest_lock"; 2646 if (lck->lk.initialized != lck) { 2647 KMP_FATAL(LockIsUninitialized, func); 2648 } 2649 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2650 KMP_FATAL(LockSimpleUsedAsNestable, func); 2651 } 2652 return __kmp_test_nested_drdpa_lock(lck, gtid); 2653 } 2654 2655 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2656 KMP_DEBUG_ASSERT(gtid >= 0); 2657 2658 KMP_MB(); 2659 if (--(lck->lk.depth_locked) == 0) { 2660 KMP_MB(); 2661 lck->lk.owner_id = 0; 2662 __kmp_release_drdpa_lock(lck, gtid); 2663 return KMP_LOCK_RELEASED; 2664 } 2665 return KMP_LOCK_STILL_HELD; 2666 } 2667 2668 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2669 kmp_int32 gtid) { 2670 char const *const func = "omp_unset_nest_lock"; 2671 KMP_MB(); /* in case another processor initialized lock */ 2672 if (lck->lk.initialized != lck) { 2673 KMP_FATAL(LockIsUninitialized, func); 2674 } 2675 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2676 KMP_FATAL(LockSimpleUsedAsNestable, func); 2677 } 2678 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2679 KMP_FATAL(LockUnsettingFree, func); 2680 } 2681 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2682 KMP_FATAL(LockUnsettingSetByAnother, func); 2683 } 2684 return __kmp_release_nested_drdpa_lock(lck, gtid); 2685 } 2686 2687 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2688 __kmp_init_drdpa_lock(lck); 2689 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2690 } 2691 2692 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2693 __kmp_init_nested_drdpa_lock(lck); 2694 } 2695 2696 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2697 __kmp_destroy_drdpa_lock(lck); 2698 lck->lk.depth_locked = 0; 2699 } 2700 2701 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2702 char const *const func = "omp_destroy_nest_lock"; 2703 if (lck->lk.initialized != lck) { 2704 KMP_FATAL(LockIsUninitialized, func); 2705 } 2706 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2707 KMP_FATAL(LockSimpleUsedAsNestable, func); 2708 } 2709 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2710 KMP_FATAL(LockStillOwned, func); 2711 } 2712 __kmp_destroy_nested_drdpa_lock(lck); 2713 } 2714 2715 // access functions to fields which don't exist for all lock kinds. 2716 2717 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 2718 return lck == lck->lk.initialized; 2719 } 2720 2721 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2722 return lck->lk.location; 2723 } 2724 2725 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2726 const ident_t *loc) { 2727 lck->lk.location = loc; 2728 } 2729 2730 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2731 return lck->lk.flags; 2732 } 2733 2734 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2735 kmp_lock_flags_t flags) { 2736 lck->lk.flags = flags; 2737 } 2738 2739 // Time stamp counter 2740 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2741 #define __kmp_tsc() __kmp_hardware_timestamp() 2742 // Runtime's default backoff parameters 2743 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2744 #else 2745 // Use nanoseconds for other platforms 2746 extern kmp_uint64 __kmp_now_nsec(); 2747 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2748 #define __kmp_tsc() __kmp_now_nsec() 2749 #endif 2750 2751 // A useful predicate for dealing with timestamps that may wrap. 2752 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2753 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2754 // Times where going clockwise is less distance than going anti-clockwise 2755 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2756 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2757 // signed(b) = 0 captures the actual difference 2758 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2759 return ((kmp_int64)b - (kmp_int64)a) > 0; 2760 } 2761 2762 // Truncated binary exponential backoff function 2763 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2764 // We could flatten this loop, but making it a nested loop gives better result 2765 kmp_uint32 i; 2766 for (i = boff->step; i > 0; i--) { 2767 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2768 do { 2769 KMP_CPU_PAUSE(); 2770 } while (before(__kmp_tsc(), goal)); 2771 } 2772 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2773 } 2774 2775 #if KMP_USE_DYNAMIC_LOCK 2776 2777 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2778 // lock word. 2779 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2780 kmp_dyna_lockseq_t seq) { 2781 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2782 KA_TRACE( 2783 20, 2784 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2785 } 2786 2787 #if KMP_USE_TSX 2788 2789 // HLE lock functions - imported from the testbed runtime. 2790 #define HLE_ACQUIRE ".byte 0xf2;" 2791 #define HLE_RELEASE ".byte 0xf3;" 2792 2793 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2794 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2795 return v; 2796 } 2797 2798 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2799 2800 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2801 // Use gtid for KMP_LOCK_BUSY if necessary 2802 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2803 int delay = 1; 2804 do { 2805 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2806 for (int i = delay; i != 0; --i) 2807 KMP_CPU_PAUSE(); 2808 delay = ((delay << 1) | 1) & 7; 2809 } 2810 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2811 } 2812 } 2813 2814 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2815 kmp_int32 gtid) { 2816 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2817 } 2818 2819 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2820 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2821 : "=m"(*lck) 2822 : "r"(KMP_LOCK_FREE(hle)) 2823 : "memory"); 2824 return KMP_LOCK_RELEASED; 2825 } 2826 2827 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2828 kmp_int32 gtid) { 2829 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2830 } 2831 2832 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2833 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2834 } 2835 2836 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2837 kmp_int32 gtid) { 2838 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2839 } 2840 2841 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2842 __kmp_init_queuing_lock(lck); 2843 } 2844 2845 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2846 __kmp_destroy_queuing_lock(lck); 2847 } 2848 2849 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2850 unsigned retries = 3, status; 2851 do { 2852 status = _xbegin(); 2853 if (status == _XBEGIN_STARTED) { 2854 if (__kmp_is_unlocked_queuing_lock(lck)) 2855 return; 2856 _xabort(0xff); 2857 } 2858 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2859 // Wait until lock becomes free 2860 while (!__kmp_is_unlocked_queuing_lock(lck)) 2861 __kmp_yield(TRUE); 2862 } else if (!(status & _XABORT_RETRY)) 2863 break; 2864 } while (retries--); 2865 2866 // Fall-back non-speculative lock (xchg) 2867 __kmp_acquire_queuing_lock(lck, gtid); 2868 } 2869 2870 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2871 kmp_int32 gtid) { 2872 __kmp_acquire_rtm_lock(lck, gtid); 2873 } 2874 2875 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2876 if (__kmp_is_unlocked_queuing_lock(lck)) { 2877 // Releasing from speculation 2878 _xend(); 2879 } else { 2880 // Releasing from a real lock 2881 __kmp_release_queuing_lock(lck, gtid); 2882 } 2883 return KMP_LOCK_RELEASED; 2884 } 2885 2886 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2887 kmp_int32 gtid) { 2888 return __kmp_release_rtm_lock(lck, gtid); 2889 } 2890 2891 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2892 unsigned retries = 3, status; 2893 do { 2894 status = _xbegin(); 2895 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2896 return 1; 2897 } 2898 if (!(status & _XABORT_RETRY)) 2899 break; 2900 } while (retries--); 2901 2902 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2903 } 2904 2905 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2906 kmp_int32 gtid) { 2907 return __kmp_test_rtm_lock(lck, gtid); 2908 } 2909 2910 #endif // KMP_USE_TSX 2911 2912 // Entry functions for indirect locks (first element of direct lock jump tables) 2913 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2914 kmp_dyna_lockseq_t tag); 2915 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2916 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2917 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2918 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2919 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2920 kmp_int32); 2921 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2922 kmp_int32); 2923 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2924 kmp_int32); 2925 2926 // Jump tables for the indirect lock functions 2927 // Only fill in the odd entries, that avoids the need to shift out the low bit 2928 2929 // init functions 2930 #define expand(l, op) 0, __kmp_init_direct_lock, 2931 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2932 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2933 #undef expand 2934 2935 // destroy functions 2936 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2937 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = { 2938 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2939 #undef expand 2940 2941 // set/acquire functions 2942 #define expand(l, op) \ 2943 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2944 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2945 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2946 #undef expand 2947 #define expand(l, op) \ 2948 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2949 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2950 __kmp_set_indirect_lock_with_checks, 0, 2951 KMP_FOREACH_D_LOCK(expand, acquire)}; 2952 #undef expand 2953 2954 // unset/release and test functions 2955 #define expand(l, op) \ 2956 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2957 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2958 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2959 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2960 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2961 #undef expand 2962 #define expand(l, op) \ 2963 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2964 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2965 __kmp_unset_indirect_lock_with_checks, 0, 2966 KMP_FOREACH_D_LOCK(expand, release)}; 2967 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2968 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2969 #undef expand 2970 2971 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2972 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2973 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2974 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2975 2976 // Jump tables for the indirect lock functions 2977 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2978 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2979 KMP_FOREACH_I_LOCK(expand, init)}; 2980 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { 2981 KMP_FOREACH_I_LOCK(expand, destroy)}; 2982 #undef expand 2983 2984 // set/acquire functions 2985 #define expand(l, op) \ 2986 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2987 static int (*indirect_set[])(kmp_user_lock_p, 2988 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 2989 #undef expand 2990 #define expand(l, op) \ 2991 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2992 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2993 KMP_FOREACH_I_LOCK(expand, acquire)}; 2994 #undef expand 2995 2996 // unset/release and test functions 2997 #define expand(l, op) \ 2998 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2999 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 3000 KMP_FOREACH_I_LOCK(expand, release)}; 3001 static int (*indirect_test[])(kmp_user_lock_p, 3002 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3003 #undef expand 3004 #define expand(l, op) \ 3005 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3006 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3007 KMP_FOREACH_I_LOCK(expand, release)}; 3008 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3009 KMP_FOREACH_I_LOCK(expand, test)}; 3010 #undef expand 3011 3012 // Exposes only one jump tables (*lock or *lock_with_checks). 3013 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3014 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3015 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3016 3017 // Lock index table. 3018 kmp_indirect_lock_table_t __kmp_i_lock_table; 3019 3020 // Size of indirect locks. 3021 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3022 3023 // Jump tables for lock accessor/modifier. 3024 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3025 const ident_t *) = {0}; 3026 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3027 kmp_lock_flags_t) = {0}; 3028 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3029 kmp_user_lock_p) = {0}; 3030 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3031 kmp_user_lock_p) = {0}; 3032 3033 // Use different lock pools for different lock types. 3034 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3035 3036 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3037 // the indirect lock table holds the address and type of the allocated indrect 3038 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3039 // full. A destroyed indirect lock object is returned to the reusable pool of 3040 // locks, unique to each lock type. 3041 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3042 kmp_int32 gtid, 3043 kmp_indirect_locktag_t tag) { 3044 kmp_indirect_lock_t *lck; 3045 kmp_lock_index_t idx; 3046 3047 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3048 3049 if (__kmp_indirect_lock_pool[tag] != NULL) { 3050 // Reuse the allocated and destroyed lock object 3051 lck = __kmp_indirect_lock_pool[tag]; 3052 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3053 idx = lck->lock->pool.index; 3054 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3055 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3056 lck)); 3057 } else { 3058 idx = __kmp_i_lock_table.next; 3059 // Check capacity and double the size if it is full 3060 if (idx == __kmp_i_lock_table.size) { 3061 // Double up the space for block pointers 3062 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3063 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 3064 2 * row * sizeof(kmp_indirect_lock_t *)); 3065 KMP_MEMCPY(new_table, __kmp_i_lock_table.table, 3066 row * sizeof(kmp_indirect_lock_t *)); 3067 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3068 __kmp_i_lock_table.table = new_table; 3069 __kmp_free(old_table); 3070 // Allocate new objects in the new blocks 3071 for (int i = row; i < 2 * row; ++i) 3072 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3073 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3074 __kmp_i_lock_table.size = 2 * idx; 3075 } 3076 __kmp_i_lock_table.next++; 3077 lck = KMP_GET_I_LOCK(idx); 3078 // Allocate a new base lock object 3079 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3080 KA_TRACE(20, 3081 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3082 } 3083 3084 __kmp_release_lock(&__kmp_global_lock, gtid); 3085 3086 lck->type = tag; 3087 3088 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3089 *((kmp_lock_index_t *)user_lock) = idx 3090 << 1; // indirect lock word must be even 3091 } else { 3092 *((kmp_indirect_lock_t **)user_lock) = lck; 3093 } 3094 3095 return lck; 3096 } 3097 3098 // User lock lookup for dynamically dispatched locks. 3099 static __forceinline kmp_indirect_lock_t * 3100 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3101 if (__kmp_env_consistency_check) { 3102 kmp_indirect_lock_t *lck = NULL; 3103 if (user_lock == NULL) { 3104 KMP_FATAL(LockIsUninitialized, func); 3105 } 3106 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3107 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3108 if (idx >= __kmp_i_lock_table.size) { 3109 KMP_FATAL(LockIsUninitialized, func); 3110 } 3111 lck = KMP_GET_I_LOCK(idx); 3112 } else { 3113 lck = *((kmp_indirect_lock_t **)user_lock); 3114 } 3115 if (lck == NULL) { 3116 KMP_FATAL(LockIsUninitialized, func); 3117 } 3118 return lck; 3119 } else { 3120 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3121 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3122 } else { 3123 return *((kmp_indirect_lock_t **)user_lock); 3124 } 3125 } 3126 } 3127 3128 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3129 kmp_dyna_lockseq_t seq) { 3130 #if KMP_USE_ADAPTIVE_LOCKS 3131 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3132 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3133 seq = lockseq_queuing; 3134 } 3135 #endif 3136 #if KMP_USE_TSX 3137 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3138 seq = lockseq_queuing; 3139 } 3140 #endif 3141 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3142 kmp_indirect_lock_t *l = 3143 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3144 KMP_I_LOCK_FUNC(l, init)(l->lock); 3145 KA_TRACE( 3146 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3147 seq)); 3148 } 3149 3150 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3151 kmp_uint32 gtid = __kmp_entry_gtid(); 3152 kmp_indirect_lock_t *l = 3153 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3154 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3155 kmp_indirect_locktag_t tag = l->type; 3156 3157 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3158 3159 // Use the base lock's space to keep the pool chain. 3160 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3161 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3162 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3163 } 3164 __kmp_indirect_lock_pool[tag] = l; 3165 3166 __kmp_release_lock(&__kmp_global_lock, gtid); 3167 } 3168 3169 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3170 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3171 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3172 } 3173 3174 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3175 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3176 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3177 } 3178 3179 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3180 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3181 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3182 } 3183 3184 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3185 kmp_int32 gtid) { 3186 kmp_indirect_lock_t *l = 3187 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3188 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3189 } 3190 3191 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3192 kmp_int32 gtid) { 3193 kmp_indirect_lock_t *l = 3194 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3195 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3196 } 3197 3198 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3199 kmp_int32 gtid) { 3200 kmp_indirect_lock_t *l = 3201 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3202 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3203 } 3204 3205 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3206 3207 // This is used only in kmp_error.cpp when consistency checking is on. 3208 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3209 switch (seq) { 3210 case lockseq_tas: 3211 case lockseq_nested_tas: 3212 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3213 #if KMP_USE_FUTEX 3214 case lockseq_futex: 3215 case lockseq_nested_futex: 3216 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3217 #endif 3218 case lockseq_ticket: 3219 case lockseq_nested_ticket: 3220 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3221 case lockseq_queuing: 3222 case lockseq_nested_queuing: 3223 #if KMP_USE_ADAPTIVE_LOCKS 3224 case lockseq_adaptive: 3225 #endif 3226 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3227 case lockseq_drdpa: 3228 case lockseq_nested_drdpa: 3229 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3230 default: 3231 return 0; 3232 } 3233 } 3234 3235 // Initializes data for dynamic user locks. 3236 void __kmp_init_dynamic_user_locks() { 3237 // Initialize jump table for the lock functions 3238 if (__kmp_env_consistency_check) { 3239 __kmp_direct_set = direct_set_check; 3240 __kmp_direct_unset = direct_unset_check; 3241 __kmp_direct_test = direct_test_check; 3242 __kmp_indirect_set = indirect_set_check; 3243 __kmp_indirect_unset = indirect_unset_check; 3244 __kmp_indirect_test = indirect_test_check; 3245 } else { 3246 __kmp_direct_set = direct_set; 3247 __kmp_direct_unset = direct_unset; 3248 __kmp_direct_test = direct_test; 3249 __kmp_indirect_set = indirect_set; 3250 __kmp_indirect_unset = indirect_unset; 3251 __kmp_indirect_test = indirect_test; 3252 } 3253 // If the user locks have already been initialized, then return. Allow the 3254 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3255 // new lock tables if they have already been allocated. 3256 if (__kmp_init_user_locks) 3257 return; 3258 3259 // Initialize lock index table 3260 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3261 __kmp_i_lock_table.table = 3262 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3263 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3264 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3265 __kmp_i_lock_table.next = 0; 3266 3267 // Indirect lock size 3268 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3269 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3270 #if KMP_USE_ADAPTIVE_LOCKS 3271 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3272 #endif 3273 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3274 #if KMP_USE_TSX 3275 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3276 #endif 3277 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3278 #if KMP_USE_FUTEX 3279 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3280 #endif 3281 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3282 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3283 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3284 3285 // Initialize lock accessor/modifier 3286 #define fill_jumps(table, expand, sep) \ 3287 { \ 3288 table[locktag##sep##ticket] = expand(ticket); \ 3289 table[locktag##sep##queuing] = expand(queuing); \ 3290 table[locktag##sep##drdpa] = expand(drdpa); \ 3291 } 3292 3293 #if KMP_USE_ADAPTIVE_LOCKS 3294 #define fill_table(table, expand) \ 3295 { \ 3296 fill_jumps(table, expand, _); \ 3297 table[locktag_adaptive] = expand(queuing); \ 3298 fill_jumps(table, expand, _nested_); \ 3299 } 3300 #else 3301 #define fill_table(table, expand) \ 3302 { \ 3303 fill_jumps(table, expand, _); \ 3304 fill_jumps(table, expand, _nested_); \ 3305 } 3306 #endif // KMP_USE_ADAPTIVE_LOCKS 3307 3308 #define expand(l) \ 3309 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3310 fill_table(__kmp_indirect_set_location, expand); 3311 #undef expand 3312 #define expand(l) \ 3313 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3314 fill_table(__kmp_indirect_set_flags, expand); 3315 #undef expand 3316 #define expand(l) \ 3317 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3318 fill_table(__kmp_indirect_get_location, expand); 3319 #undef expand 3320 #define expand(l) \ 3321 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3322 fill_table(__kmp_indirect_get_flags, expand); 3323 #undef expand 3324 3325 __kmp_init_user_locks = TRUE; 3326 } 3327 3328 // Clean up the lock table. 3329 void __kmp_cleanup_indirect_user_locks() { 3330 kmp_lock_index_t i; 3331 int k; 3332 3333 // Clean up locks in the pools first (they were already destroyed before going 3334 // into the pools). 3335 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3336 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3337 while (l != NULL) { 3338 kmp_indirect_lock_t *ll = l; 3339 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3340 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3341 ll)); 3342 __kmp_free(ll->lock); 3343 ll->lock = NULL; 3344 } 3345 __kmp_indirect_lock_pool[k] = NULL; 3346 } 3347 // Clean up the remaining undestroyed locks. 3348 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3349 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3350 if (l->lock != NULL) { 3351 // Locks not destroyed explicitly need to be destroyed here. 3352 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3353 KA_TRACE( 3354 20, 3355 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3356 l)); 3357 __kmp_free(l->lock); 3358 } 3359 } 3360 // Free the table 3361 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3362 __kmp_free(__kmp_i_lock_table.table[i]); 3363 __kmp_free(__kmp_i_lock_table.table); 3364 3365 __kmp_init_user_locks = FALSE; 3366 } 3367 3368 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3369 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3370 3371 #else // KMP_USE_DYNAMIC_LOCK 3372 3373 /* user locks 3374 * They are implemented as a table of function pointers which are set to the 3375 * lock functions of the appropriate kind, once that has been determined. */ 3376 3377 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3378 3379 size_t __kmp_base_user_lock_size = 0; 3380 size_t __kmp_user_lock_size = 0; 3381 3382 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3383 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3384 kmp_int32 gtid) = NULL; 3385 3386 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3387 kmp_int32 gtid) = NULL; 3388 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3389 kmp_int32 gtid) = NULL; 3390 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3391 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3392 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3393 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3394 kmp_int32 gtid) = NULL; 3395 3396 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3397 kmp_int32 gtid) = NULL; 3398 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3399 kmp_int32 gtid) = NULL; 3400 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3401 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3402 3403 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3404 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3405 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3406 const ident_t *loc) = NULL; 3407 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3408 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3409 kmp_lock_flags_t flags) = NULL; 3410 3411 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3412 switch (user_lock_kind) { 3413 case lk_default: 3414 default: 3415 KMP_ASSERT(0); 3416 3417 case lk_tas: { 3418 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3419 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3420 3421 __kmp_get_user_lock_owner_ = 3422 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3423 3424 if (__kmp_env_consistency_check) { 3425 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3426 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3427 } else { 3428 KMP_BIND_USER_LOCK(tas); 3429 KMP_BIND_NESTED_USER_LOCK(tas); 3430 } 3431 3432 __kmp_destroy_user_lock_ = 3433 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3434 3435 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3436 3437 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3438 3439 __kmp_set_user_lock_location_ = 3440 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3441 3442 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3443 3444 __kmp_set_user_lock_flags_ = 3445 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3446 } break; 3447 3448 #if KMP_USE_FUTEX 3449 3450 case lk_futex: { 3451 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3452 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3453 3454 __kmp_get_user_lock_owner_ = 3455 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3456 3457 if (__kmp_env_consistency_check) { 3458 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3459 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3460 } else { 3461 KMP_BIND_USER_LOCK(futex); 3462 KMP_BIND_NESTED_USER_LOCK(futex); 3463 } 3464 3465 __kmp_destroy_user_lock_ = 3466 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3467 3468 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3469 3470 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3471 3472 __kmp_set_user_lock_location_ = 3473 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3474 3475 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3476 3477 __kmp_set_user_lock_flags_ = 3478 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3479 } break; 3480 3481 #endif // KMP_USE_FUTEX 3482 3483 case lk_ticket: { 3484 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3485 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3486 3487 __kmp_get_user_lock_owner_ = 3488 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3489 3490 if (__kmp_env_consistency_check) { 3491 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3492 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3493 } else { 3494 KMP_BIND_USER_LOCK(ticket); 3495 KMP_BIND_NESTED_USER_LOCK(ticket); 3496 } 3497 3498 __kmp_destroy_user_lock_ = 3499 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3500 3501 __kmp_is_user_lock_initialized_ = 3502 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3503 3504 __kmp_get_user_lock_location_ = 3505 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3506 3507 __kmp_set_user_lock_location_ = (void (*)( 3508 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3509 3510 __kmp_get_user_lock_flags_ = 3511 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3512 3513 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3514 &__kmp_set_ticket_lock_flags); 3515 } break; 3516 3517 case lk_queuing: { 3518 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3519 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3520 3521 __kmp_get_user_lock_owner_ = 3522 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3523 3524 if (__kmp_env_consistency_check) { 3525 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3526 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3527 } else { 3528 KMP_BIND_USER_LOCK(queuing); 3529 KMP_BIND_NESTED_USER_LOCK(queuing); 3530 } 3531 3532 __kmp_destroy_user_lock_ = 3533 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3534 3535 __kmp_is_user_lock_initialized_ = 3536 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3537 3538 __kmp_get_user_lock_location_ = 3539 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3540 3541 __kmp_set_user_lock_location_ = (void (*)( 3542 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3543 3544 __kmp_get_user_lock_flags_ = 3545 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3546 3547 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3548 &__kmp_set_queuing_lock_flags); 3549 } break; 3550 3551 #if KMP_USE_ADAPTIVE_LOCKS 3552 case lk_adaptive: { 3553 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3554 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3555 3556 __kmp_get_user_lock_owner_ = 3557 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3558 3559 if (__kmp_env_consistency_check) { 3560 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3561 } else { 3562 KMP_BIND_USER_LOCK(adaptive); 3563 } 3564 3565 __kmp_destroy_user_lock_ = 3566 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3567 3568 __kmp_is_user_lock_initialized_ = 3569 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3570 3571 __kmp_get_user_lock_location_ = 3572 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3573 3574 __kmp_set_user_lock_location_ = (void (*)( 3575 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3576 3577 __kmp_get_user_lock_flags_ = 3578 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3579 3580 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3581 &__kmp_set_queuing_lock_flags); 3582 3583 } break; 3584 #endif // KMP_USE_ADAPTIVE_LOCKS 3585 3586 case lk_drdpa: { 3587 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3588 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3589 3590 __kmp_get_user_lock_owner_ = 3591 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3592 3593 if (__kmp_env_consistency_check) { 3594 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3595 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3596 } else { 3597 KMP_BIND_USER_LOCK(drdpa); 3598 KMP_BIND_NESTED_USER_LOCK(drdpa); 3599 } 3600 3601 __kmp_destroy_user_lock_ = 3602 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3603 3604 __kmp_is_user_lock_initialized_ = 3605 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3606 3607 __kmp_get_user_lock_location_ = 3608 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3609 3610 __kmp_set_user_lock_location_ = (void (*)( 3611 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3612 3613 __kmp_get_user_lock_flags_ = 3614 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3615 3616 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3617 &__kmp_set_drdpa_lock_flags); 3618 } break; 3619 } 3620 } 3621 3622 // ---------------------------------------------------------------------------- 3623 // User lock table & lock allocation 3624 3625 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3626 kmp_user_lock_p __kmp_lock_pool = NULL; 3627 3628 // Lock block-allocation support. 3629 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3630 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3631 3632 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3633 // Assume that kmp_global_lock is held upon entry/exit. 3634 kmp_lock_index_t index; 3635 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3636 kmp_lock_index_t size; 3637 kmp_user_lock_p *table; 3638 // Reallocate lock table. 3639 if (__kmp_user_lock_table.allocated == 0) { 3640 size = 1024; 3641 } else { 3642 size = __kmp_user_lock_table.allocated * 2; 3643 } 3644 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3645 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3646 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3647 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3648 // We cannot free the previous table now, since it may be in use by other 3649 // threads. So save the pointer to the previous table in in the first 3650 // element of the new table. All the tables will be organized into a list, 3651 // and could be freed when library shutting down. 3652 __kmp_user_lock_table.table = table; 3653 __kmp_user_lock_table.allocated = size; 3654 } 3655 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3656 __kmp_user_lock_table.allocated); 3657 index = __kmp_user_lock_table.used; 3658 __kmp_user_lock_table.table[index] = lck; 3659 ++__kmp_user_lock_table.used; 3660 return index; 3661 } 3662 3663 static kmp_user_lock_p __kmp_lock_block_allocate() { 3664 // Assume that kmp_global_lock is held upon entry/exit. 3665 static int last_index = 0; 3666 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3667 // Restart the index. 3668 last_index = 0; 3669 // Need to allocate a new block. 3670 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3671 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3672 char *buffer = 3673 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3674 // Set up the new block. 3675 kmp_block_of_locks *new_block = 3676 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3677 new_block->next_block = __kmp_lock_blocks; 3678 new_block->locks = (void *)buffer; 3679 // Publish the new block. 3680 KMP_MB(); 3681 __kmp_lock_blocks = new_block; 3682 } 3683 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3684 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3685 last_index++; 3686 return ret; 3687 } 3688 3689 // Get memory for a lock. It may be freshly allocated memory or reused memory 3690 // from lock pool. 3691 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3692 kmp_lock_flags_t flags) { 3693 kmp_user_lock_p lck; 3694 kmp_lock_index_t index; 3695 KMP_DEBUG_ASSERT(user_lock); 3696 3697 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3698 3699 if (__kmp_lock_pool == NULL) { 3700 // Lock pool is empty. Allocate new memory. 3701 3702 // ANNOTATION: Found no good way to express the syncronisation 3703 // between allocation and usage, so ignore the allocation 3704 ANNOTATE_IGNORE_WRITES_BEGIN(); 3705 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3706 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3707 } else { 3708 lck = __kmp_lock_block_allocate(); 3709 } 3710 ANNOTATE_IGNORE_WRITES_END(); 3711 3712 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3713 // and debugger has info on all allocated locks. 3714 index = __kmp_lock_table_insert(lck); 3715 } else { 3716 // Pick up lock from pool. 3717 lck = __kmp_lock_pool; 3718 index = __kmp_lock_pool->pool.index; 3719 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3720 } 3721 3722 // We could potentially differentiate between nested and regular locks 3723 // here, and do the lock table lookup for regular locks only. 3724 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3725 *((kmp_lock_index_t *)user_lock) = index; 3726 } else { 3727 *((kmp_user_lock_p *)user_lock) = lck; 3728 } 3729 3730 // mark the lock if it is critical section lock. 3731 __kmp_set_user_lock_flags(lck, flags); 3732 3733 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3734 3735 return lck; 3736 } 3737 3738 // Put lock's memory to pool for reusing. 3739 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3740 kmp_user_lock_p lck) { 3741 KMP_DEBUG_ASSERT(user_lock != NULL); 3742 KMP_DEBUG_ASSERT(lck != NULL); 3743 3744 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3745 3746 lck->pool.next = __kmp_lock_pool; 3747 __kmp_lock_pool = lck; 3748 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3749 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3750 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3751 lck->pool.index = index; 3752 } 3753 3754 __kmp_release_lock(&__kmp_global_lock, gtid); 3755 } 3756 3757 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3758 kmp_user_lock_p lck = NULL; 3759 3760 if (__kmp_env_consistency_check) { 3761 if (user_lock == NULL) { 3762 KMP_FATAL(LockIsUninitialized, func); 3763 } 3764 } 3765 3766 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3767 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3768 if (__kmp_env_consistency_check) { 3769 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3770 KMP_FATAL(LockIsUninitialized, func); 3771 } 3772 } 3773 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3774 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3775 lck = __kmp_user_lock_table.table[index]; 3776 } else { 3777 lck = *((kmp_user_lock_p *)user_lock); 3778 } 3779 3780 if (__kmp_env_consistency_check) { 3781 if (lck == NULL) { 3782 KMP_FATAL(LockIsUninitialized, func); 3783 } 3784 } 3785 3786 return lck; 3787 } 3788 3789 void __kmp_cleanup_user_locks(void) { 3790 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3791 // iterating through lock table (it includes all the locks, dead or alive). 3792 __kmp_lock_pool = NULL; 3793 3794 #define IS_CRITICAL(lck) \ 3795 ((__kmp_get_user_lock_flags_ != NULL) && \ 3796 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3797 3798 // Loop through lock table, free all locks. 3799 // Do not free item [0], it is reserved for lock tables list. 3800 // 3801 // FIXME - we are iterating through a list of (pointers to) objects of type 3802 // union kmp_user_lock, but we have no way of knowing whether the base type is 3803 // currently "pool" or whatever the global user lock type is. 3804 // 3805 // We are relying on the fact that for all of the user lock types 3806 // (except "tas"), the first field in the lock struct is the "initialized" 3807 // field, which is set to the address of the lock object itself when 3808 // the lock is initialized. When the union is of type "pool", the 3809 // first field is a pointer to the next object in the free list, which 3810 // will not be the same address as the object itself. 3811 // 3812 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3813 // for "pool" objects on the free list. This must happen as the "location" 3814 // field of real user locks overlaps the "index" field of "pool" objects. 3815 // 3816 // It would be better to run through the free list, and remove all "pool" 3817 // objects from the lock table before executing this loop. However, 3818 // "pool" objects do not always have their index field set (only on 3819 // lin_32e), and I don't want to search the lock table for the address 3820 // of every "pool" object on the free list. 3821 while (__kmp_user_lock_table.used > 1) { 3822 const ident *loc; 3823 3824 // reduce __kmp_user_lock_table.used before freeing the lock, 3825 // so that state of locks is consistent 3826 kmp_user_lock_p lck = 3827 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3828 3829 if ((__kmp_is_user_lock_initialized_ != NULL) && 3830 (*__kmp_is_user_lock_initialized_)(lck)) { 3831 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3832 // it is NOT a critical section (user is not responsible for destroying 3833 // criticals) AND we know source location to report. 3834 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3835 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3836 (loc->psource != NULL)) { 3837 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3838 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3839 __kmp_str_loc_free(&str_loc); 3840 } 3841 3842 #ifdef KMP_DEBUG 3843 if (IS_CRITICAL(lck)) { 3844 KA_TRACE( 3845 20, 3846 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3847 lck, *(void **)lck)); 3848 } else { 3849 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3850 *(void **)lck)); 3851 } 3852 #endif // KMP_DEBUG 3853 3854 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3855 __kmp_destroy_user_lock(lck); 3856 } 3857 3858 // Free the lock if block allocation of locks is not used. 3859 if (__kmp_lock_blocks == NULL) { 3860 __kmp_free(lck); 3861 } 3862 } 3863 3864 #undef IS_CRITICAL 3865 3866 // delete lock table(s). 3867 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3868 __kmp_user_lock_table.table = NULL; 3869 __kmp_user_lock_table.allocated = 0; 3870 3871 while (table_ptr != NULL) { 3872 // In the first element we saved the pointer to the previous 3873 // (smaller) lock table. 3874 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3875 __kmp_free(table_ptr); 3876 table_ptr = next; 3877 } 3878 3879 // Free buffers allocated for blocks of locks. 3880 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3881 __kmp_lock_blocks = NULL; 3882 3883 while (block_ptr != NULL) { 3884 kmp_block_of_locks_t *next = block_ptr->next_block; 3885 __kmp_free(block_ptr->locks); 3886 // *block_ptr itself was allocated at the end of the locks vector. 3887 block_ptr = next; 3888 } 3889 3890 TCW_4(__kmp_init_user_locks, FALSE); 3891 } 3892 3893 #endif // KMP_USE_DYNAMIC_LOCK 3894