1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include <stddef.h> 15 #include <atomic> 16 17 #include "kmp.h" 18 #include "kmp_i18n.h" 19 #include "kmp_io.h" 20 #include "kmp_itt.h" 21 #include "kmp_lock.h" 22 #include "kmp_wait_release.h" 23 #include "kmp_wrapper_getpid.h" 24 25 #include "tsan_annotations.h" 26 27 #if KMP_USE_FUTEX 28 #include <sys/syscall.h> 29 #include <unistd.h> 30 // We should really include <futex.h>, but that causes compatibility problems on 31 // different Linux* OS distributions that either require that you include (or 32 // break when you try to include) <pci/types.h>. Since all we need is the two 33 // macros below (which are part of the kernel ABI, so can't change) we just 34 // define the constants here and don't include <futex.h> 35 #ifndef FUTEX_WAIT 36 #define FUTEX_WAIT 0 37 #endif 38 #ifndef FUTEX_WAKE 39 #define FUTEX_WAKE 1 40 #endif 41 #endif 42 43 /* Implement spin locks for internal library use. */ 44 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 45 46 void __kmp_validate_locks(void) { 47 int i; 48 kmp_uint32 x, y; 49 50 /* Check to make sure unsigned arithmetic does wraps properly */ 51 x = ~((kmp_uint32)0) - 2; 52 y = x - 2; 53 54 for (i = 0; i < 8; ++i, ++x, ++y) { 55 kmp_uint32 z = (x - y); 56 KMP_ASSERT(z == 2); 57 } 58 59 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 60 } 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // For the non-nested locks, we can only assume that the first 4 bytes were 66 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 67 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 68 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 69 // 70 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 71 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 72 73 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 74 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; 75 } 76 77 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 78 return lck->lk.depth_locked != -1; 79 } 80 81 __forceinline static int 82 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 83 KMP_MB(); 84 85 #ifdef USE_LOCK_PROFILE 86 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); 87 if ((curr != 0) && (curr != gtid + 1)) 88 __kmp_printf("LOCK CONTENTION: %p\n", lck); 89 /* else __kmp_printf( "." );*/ 90 #endif /* USE_LOCK_PROFILE */ 91 92 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 93 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 94 95 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 96 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 97 KMP_FSYNC_ACQUIRED(lck); 98 return KMP_LOCK_ACQUIRED_FIRST; 99 } 100 101 kmp_uint32 spins; 102 KMP_FSYNC_PREPARE(lck); 103 KMP_INIT_YIELD(spins); 104 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 105 KMP_YIELD(TRUE); 106 } else { 107 KMP_YIELD_SPIN(spins); 108 } 109 110 kmp_backoff_t backoff = __kmp_spin_backoff_params; 111 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || 112 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 113 __kmp_spin_backoff(&backoff); 114 if (TCR_4(__kmp_nth) > 115 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 116 KMP_YIELD(TRUE); 117 } else { 118 KMP_YIELD_SPIN(spins); 119 } 120 } 121 KMP_FSYNC_ACQUIRED(lck); 122 return KMP_LOCK_ACQUIRED_FIRST; 123 } 124 125 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 126 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 127 ANNOTATE_TAS_ACQUIRED(lck); 128 return retval; 129 } 130 131 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 132 kmp_int32 gtid) { 133 char const *const func = "omp_set_lock"; 134 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 135 __kmp_is_tas_lock_nestable(lck)) { 136 KMP_FATAL(LockNestableUsedAsSimple, func); 137 } 138 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 139 KMP_FATAL(LockIsAlreadyOwned, func); 140 } 141 return __kmp_acquire_tas_lock(lck, gtid); 142 } 143 144 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 147 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 148 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 149 KMP_FSYNC_ACQUIRED(lck); 150 return TRUE; 151 } 152 return FALSE; 153 } 154 155 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 156 kmp_int32 gtid) { 157 char const *const func = "omp_test_lock"; 158 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 159 __kmp_is_tas_lock_nestable(lck)) { 160 KMP_FATAL(LockNestableUsedAsSimple, func); 161 } 162 return __kmp_test_tas_lock(lck, gtid); 163 } 164 165 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 166 KMP_MB(); /* Flush all pending memory write invalidates. */ 167 168 KMP_FSYNC_RELEASING(lck); 169 ANNOTATE_TAS_RELEASED(lck); 170 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); 171 KMP_MB(); /* Flush all pending memory write invalidates. */ 172 173 KMP_YIELD(TCR_4(__kmp_nth) > 174 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 175 return KMP_LOCK_RELEASED; 176 } 177 178 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 179 kmp_int32 gtid) { 180 char const *const func = "omp_unset_lock"; 181 KMP_MB(); /* in case another processor initialized lock */ 182 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 183 __kmp_is_tas_lock_nestable(lck)) { 184 KMP_FATAL(LockNestableUsedAsSimple, func); 185 } 186 if (__kmp_get_tas_lock_owner(lck) == -1) { 187 KMP_FATAL(LockUnsettingFree, func); 188 } 189 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 190 (__kmp_get_tas_lock_owner(lck) != gtid)) { 191 KMP_FATAL(LockUnsettingSetByAnother, func); 192 } 193 return __kmp_release_tas_lock(lck, gtid); 194 } 195 196 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 197 lck->lk.poll = KMP_LOCK_FREE(tas); 198 } 199 200 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 201 202 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 203 char const *const func = "omp_destroy_lock"; 204 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 205 __kmp_is_tas_lock_nestable(lck)) { 206 KMP_FATAL(LockNestableUsedAsSimple, func); 207 } 208 if (__kmp_get_tas_lock_owner(lck) != -1) { 209 KMP_FATAL(LockStillOwned, func); 210 } 211 __kmp_destroy_tas_lock(lck); 212 } 213 214 // nested test and set locks 215 216 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 217 KMP_DEBUG_ASSERT(gtid >= 0); 218 219 if (__kmp_get_tas_lock_owner(lck) == gtid) { 220 lck->lk.depth_locked += 1; 221 return KMP_LOCK_ACQUIRED_NEXT; 222 } else { 223 __kmp_acquire_tas_lock_timed_template(lck, gtid); 224 ANNOTATE_TAS_ACQUIRED(lck); 225 lck->lk.depth_locked = 1; 226 return KMP_LOCK_ACQUIRED_FIRST; 227 } 228 } 229 230 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 231 kmp_int32 gtid) { 232 char const *const func = "omp_set_nest_lock"; 233 if (!__kmp_is_tas_lock_nestable(lck)) { 234 KMP_FATAL(LockSimpleUsedAsNestable, func); 235 } 236 return __kmp_acquire_nested_tas_lock(lck, gtid); 237 } 238 239 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 240 int retval; 241 242 KMP_DEBUG_ASSERT(gtid >= 0); 243 244 if (__kmp_get_tas_lock_owner(lck) == gtid) { 245 retval = ++lck->lk.depth_locked; 246 } else if (!__kmp_test_tas_lock(lck, gtid)) { 247 retval = 0; 248 } else { 249 KMP_MB(); 250 retval = lck->lk.depth_locked = 1; 251 } 252 return retval; 253 } 254 255 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 256 kmp_int32 gtid) { 257 char const *const func = "omp_test_nest_lock"; 258 if (!__kmp_is_tas_lock_nestable(lck)) { 259 KMP_FATAL(LockSimpleUsedAsNestable, func); 260 } 261 return __kmp_test_nested_tas_lock(lck, gtid); 262 } 263 264 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 265 KMP_DEBUG_ASSERT(gtid >= 0); 266 267 KMP_MB(); 268 if (--(lck->lk.depth_locked) == 0) { 269 __kmp_release_tas_lock(lck, gtid); 270 return KMP_LOCK_RELEASED; 271 } 272 return KMP_LOCK_STILL_HELD; 273 } 274 275 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 276 kmp_int32 gtid) { 277 char const *const func = "omp_unset_nest_lock"; 278 KMP_MB(); /* in case another processor initialized lock */ 279 if (!__kmp_is_tas_lock_nestable(lck)) { 280 KMP_FATAL(LockSimpleUsedAsNestable, func); 281 } 282 if (__kmp_get_tas_lock_owner(lck) == -1) { 283 KMP_FATAL(LockUnsettingFree, func); 284 } 285 if (__kmp_get_tas_lock_owner(lck) != gtid) { 286 KMP_FATAL(LockUnsettingSetByAnother, func); 287 } 288 return __kmp_release_nested_tas_lock(lck, gtid); 289 } 290 291 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 292 __kmp_init_tas_lock(lck); 293 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 294 } 295 296 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 297 __kmp_destroy_tas_lock(lck); 298 lck->lk.depth_locked = 0; 299 } 300 301 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 302 char const *const func = "omp_destroy_nest_lock"; 303 if (!__kmp_is_tas_lock_nestable(lck)) { 304 KMP_FATAL(LockSimpleUsedAsNestable, func); 305 } 306 if (__kmp_get_tas_lock_owner(lck) != -1) { 307 KMP_FATAL(LockStillOwned, func); 308 } 309 __kmp_destroy_nested_tas_lock(lck); 310 } 311 312 #if KMP_USE_FUTEX 313 314 /* ------------------------------------------------------------------------ */ 315 /* futex locks */ 316 317 // futex locks are really just test and set locks, with a different method 318 // of handling contention. They take the same amount of space as test and 319 // set locks, and are allocated the same way (i.e. use the area allocated by 320 // the compiler for non-nested locks / allocate nested locks on the heap). 321 322 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 323 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 324 } 325 326 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 327 return lck->lk.depth_locked != -1; 328 } 329 330 __forceinline static int 331 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 332 kmp_int32 gtid_code = (gtid + 1) << 1; 333 334 KMP_MB(); 335 336 #ifdef USE_LOCK_PROFILE 337 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 338 if ((curr != 0) && (curr != gtid_code)) 339 __kmp_printf("LOCK CONTENTION: %p\n", lck); 340 /* else __kmp_printf( "." );*/ 341 #endif /* USE_LOCK_PROFILE */ 342 343 KMP_FSYNC_PREPARE(lck); 344 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 345 lck, lck->lk.poll, gtid)); 346 347 kmp_int32 poll_val; 348 349 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 350 &(lck->lk.poll), KMP_LOCK_FREE(futex), 351 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 352 353 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 354 KA_TRACE( 355 1000, 356 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 357 lck, gtid, poll_val, cond)); 358 359 // NOTE: if you try to use the following condition for this branch 360 // 361 // if ( poll_val & 1 == 0 ) 362 // 363 // Then the 12.0 compiler has a bug where the following block will 364 // always be skipped, regardless of the value of the LSB of poll_val. 365 if (!cond) { 366 // Try to set the lsb in the poll to indicate to the owner 367 // thread that they need to wake this thread up. 368 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 369 poll_val | KMP_LOCK_BUSY(1, futex))) { 370 KA_TRACE( 371 1000, 372 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 373 lck, lck->lk.poll, gtid)); 374 continue; 375 } 376 poll_val |= KMP_LOCK_BUSY(1, futex); 377 378 KA_TRACE(1000, 379 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 380 lck->lk.poll, gtid)); 381 } 382 383 KA_TRACE( 384 1000, 385 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 386 lck, gtid, poll_val)); 387 388 kmp_int32 rc; 389 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 390 NULL, 0)) != 0) { 391 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 392 "failed (rc=%d errno=%d)\n", 393 lck, gtid, poll_val, rc, errno)); 394 continue; 395 } 396 397 KA_TRACE(1000, 398 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 399 lck, gtid, poll_val)); 400 // This thread has now done a successful futex wait call and was entered on 401 // the OS futex queue. We must now perform a futex wake call when releasing 402 // the lock, as we have no idea how many other threads are in the queue. 403 gtid_code |= 1; 404 } 405 406 KMP_FSYNC_ACQUIRED(lck); 407 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 408 lck->lk.poll, gtid)); 409 return KMP_LOCK_ACQUIRED_FIRST; 410 } 411 412 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 413 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 414 ANNOTATE_FUTEX_ACQUIRED(lck); 415 return retval; 416 } 417 418 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 419 kmp_int32 gtid) { 420 char const *const func = "omp_set_lock"; 421 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 422 __kmp_is_futex_lock_nestable(lck)) { 423 KMP_FATAL(LockNestableUsedAsSimple, func); 424 } 425 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 426 KMP_FATAL(LockIsAlreadyOwned, func); 427 } 428 return __kmp_acquire_futex_lock(lck, gtid); 429 } 430 431 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 432 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 433 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 434 KMP_FSYNC_ACQUIRED(lck); 435 return TRUE; 436 } 437 return FALSE; 438 } 439 440 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 441 kmp_int32 gtid) { 442 char const *const func = "omp_test_lock"; 443 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 444 __kmp_is_futex_lock_nestable(lck)) { 445 KMP_FATAL(LockNestableUsedAsSimple, func); 446 } 447 return __kmp_test_futex_lock(lck, gtid); 448 } 449 450 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 451 KMP_MB(); /* Flush all pending memory write invalidates. */ 452 453 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 454 lck, lck->lk.poll, gtid)); 455 456 KMP_FSYNC_RELEASING(lck); 457 ANNOTATE_FUTEX_RELEASED(lck); 458 459 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 460 461 KA_TRACE(1000, 462 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 463 lck, gtid, poll_val)); 464 465 if (KMP_LOCK_STRIP(poll_val) & 1) { 466 KA_TRACE(1000, 467 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 468 lck, gtid)); 469 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 470 NULL, NULL, 0); 471 } 472 473 KMP_MB(); /* Flush all pending memory write invalidates. */ 474 475 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 476 lck->lk.poll, gtid)); 477 478 KMP_YIELD(TCR_4(__kmp_nth) > 479 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 480 return KMP_LOCK_RELEASED; 481 } 482 483 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 484 kmp_int32 gtid) { 485 char const *const func = "omp_unset_lock"; 486 KMP_MB(); /* in case another processor initialized lock */ 487 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 488 __kmp_is_futex_lock_nestable(lck)) { 489 KMP_FATAL(LockNestableUsedAsSimple, func); 490 } 491 if (__kmp_get_futex_lock_owner(lck) == -1) { 492 KMP_FATAL(LockUnsettingFree, func); 493 } 494 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 495 (__kmp_get_futex_lock_owner(lck) != gtid)) { 496 KMP_FATAL(LockUnsettingSetByAnother, func); 497 } 498 return __kmp_release_futex_lock(lck, gtid); 499 } 500 501 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 502 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 503 } 504 505 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 506 507 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 508 char const *const func = "omp_destroy_lock"; 509 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 510 __kmp_is_futex_lock_nestable(lck)) { 511 KMP_FATAL(LockNestableUsedAsSimple, func); 512 } 513 if (__kmp_get_futex_lock_owner(lck) != -1) { 514 KMP_FATAL(LockStillOwned, func); 515 } 516 __kmp_destroy_futex_lock(lck); 517 } 518 519 // nested futex locks 520 521 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 522 KMP_DEBUG_ASSERT(gtid >= 0); 523 524 if (__kmp_get_futex_lock_owner(lck) == gtid) { 525 lck->lk.depth_locked += 1; 526 return KMP_LOCK_ACQUIRED_NEXT; 527 } else { 528 __kmp_acquire_futex_lock_timed_template(lck, gtid); 529 ANNOTATE_FUTEX_ACQUIRED(lck); 530 lck->lk.depth_locked = 1; 531 return KMP_LOCK_ACQUIRED_FIRST; 532 } 533 } 534 535 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 536 kmp_int32 gtid) { 537 char const *const func = "omp_set_nest_lock"; 538 if (!__kmp_is_futex_lock_nestable(lck)) { 539 KMP_FATAL(LockSimpleUsedAsNestable, func); 540 } 541 return __kmp_acquire_nested_futex_lock(lck, gtid); 542 } 543 544 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 545 int retval; 546 547 KMP_DEBUG_ASSERT(gtid >= 0); 548 549 if (__kmp_get_futex_lock_owner(lck) == gtid) { 550 retval = ++lck->lk.depth_locked; 551 } else if (!__kmp_test_futex_lock(lck, gtid)) { 552 retval = 0; 553 } else { 554 KMP_MB(); 555 retval = lck->lk.depth_locked = 1; 556 } 557 return retval; 558 } 559 560 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 561 kmp_int32 gtid) { 562 char const *const func = "omp_test_nest_lock"; 563 if (!__kmp_is_futex_lock_nestable(lck)) { 564 KMP_FATAL(LockSimpleUsedAsNestable, func); 565 } 566 return __kmp_test_nested_futex_lock(lck, gtid); 567 } 568 569 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 570 KMP_DEBUG_ASSERT(gtid >= 0); 571 572 KMP_MB(); 573 if (--(lck->lk.depth_locked) == 0) { 574 __kmp_release_futex_lock(lck, gtid); 575 return KMP_LOCK_RELEASED; 576 } 577 return KMP_LOCK_STILL_HELD; 578 } 579 580 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 581 kmp_int32 gtid) { 582 char const *const func = "omp_unset_nest_lock"; 583 KMP_MB(); /* in case another processor initialized lock */ 584 if (!__kmp_is_futex_lock_nestable(lck)) { 585 KMP_FATAL(LockSimpleUsedAsNestable, func); 586 } 587 if (__kmp_get_futex_lock_owner(lck) == -1) { 588 KMP_FATAL(LockUnsettingFree, func); 589 } 590 if (__kmp_get_futex_lock_owner(lck) != gtid) { 591 KMP_FATAL(LockUnsettingSetByAnother, func); 592 } 593 return __kmp_release_nested_futex_lock(lck, gtid); 594 } 595 596 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 597 __kmp_init_futex_lock(lck); 598 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 599 } 600 601 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 602 __kmp_destroy_futex_lock(lck); 603 lck->lk.depth_locked = 0; 604 } 605 606 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 607 char const *const func = "omp_destroy_nest_lock"; 608 if (!__kmp_is_futex_lock_nestable(lck)) { 609 KMP_FATAL(LockSimpleUsedAsNestable, func); 610 } 611 if (__kmp_get_futex_lock_owner(lck) != -1) { 612 KMP_FATAL(LockStillOwned, func); 613 } 614 __kmp_destroy_nested_futex_lock(lck); 615 } 616 617 #endif // KMP_USE_FUTEX 618 619 /* ------------------------------------------------------------------------ */ 620 /* ticket (bakery) locks */ 621 622 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 623 return std::atomic_load_explicit(&lck->lk.owner_id, 624 std::memory_order_relaxed) - 625 1; 626 } 627 628 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 629 return std::atomic_load_explicit(&lck->lk.depth_locked, 630 std::memory_order_relaxed) != -1; 631 } 632 633 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 634 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 635 std::memory_order_acquire) == my_ticket; 636 } 637 638 __forceinline static int 639 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 640 kmp_int32 gtid) { 641 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 642 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 643 644 #ifdef USE_LOCK_PROFILE 645 if (std::atomic_load_explicit(&lck->lk.now_serving, 646 std::memory_order_relaxed) != my_ticket) 647 __kmp_printf("LOCK CONTENTION: %p\n", lck); 648 /* else __kmp_printf( "." );*/ 649 #endif /* USE_LOCK_PROFILE */ 650 651 if (std::atomic_load_explicit(&lck->lk.now_serving, 652 std::memory_order_acquire) == my_ticket) { 653 return KMP_LOCK_ACQUIRED_FIRST; 654 } 655 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 656 return KMP_LOCK_ACQUIRED_FIRST; 657 } 658 659 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 660 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 661 ANNOTATE_TICKET_ACQUIRED(lck); 662 return retval; 663 } 664 665 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 666 kmp_int32 gtid) { 667 char const *const func = "omp_set_lock"; 668 669 if (!std::atomic_load_explicit(&lck->lk.initialized, 670 std::memory_order_relaxed)) { 671 KMP_FATAL(LockIsUninitialized, func); 672 } 673 if (lck->lk.self != lck) { 674 KMP_FATAL(LockIsUninitialized, func); 675 } 676 if (__kmp_is_ticket_lock_nestable(lck)) { 677 KMP_FATAL(LockNestableUsedAsSimple, func); 678 } 679 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 680 KMP_FATAL(LockIsAlreadyOwned, func); 681 } 682 683 __kmp_acquire_ticket_lock(lck, gtid); 684 685 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 686 std::memory_order_relaxed); 687 return KMP_LOCK_ACQUIRED_FIRST; 688 } 689 690 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 691 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 692 std::memory_order_relaxed); 693 694 if (std::atomic_load_explicit(&lck->lk.now_serving, 695 std::memory_order_relaxed) == my_ticket) { 696 kmp_uint32 next_ticket = my_ticket + 1; 697 if (std::atomic_compare_exchange_strong_explicit( 698 &lck->lk.next_ticket, &my_ticket, next_ticket, 699 std::memory_order_acquire, std::memory_order_acquire)) { 700 return TRUE; 701 } 702 } 703 return FALSE; 704 } 705 706 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 707 kmp_int32 gtid) { 708 char const *const func = "omp_test_lock"; 709 710 if (!std::atomic_load_explicit(&lck->lk.initialized, 711 std::memory_order_relaxed)) { 712 KMP_FATAL(LockIsUninitialized, func); 713 } 714 if (lck->lk.self != lck) { 715 KMP_FATAL(LockIsUninitialized, func); 716 } 717 if (__kmp_is_ticket_lock_nestable(lck)) { 718 KMP_FATAL(LockNestableUsedAsSimple, func); 719 } 720 721 int retval = __kmp_test_ticket_lock(lck, gtid); 722 723 if (retval) { 724 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 725 std::memory_order_relaxed); 726 } 727 return retval; 728 } 729 730 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 731 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 732 std::memory_order_relaxed) - 733 std::atomic_load_explicit(&lck->lk.now_serving, 734 std::memory_order_relaxed); 735 736 ANNOTATE_TICKET_RELEASED(lck); 737 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 738 std::memory_order_release); 739 740 KMP_YIELD(distance > 741 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 742 return KMP_LOCK_RELEASED; 743 } 744 745 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 746 kmp_int32 gtid) { 747 char const *const func = "omp_unset_lock"; 748 749 if (!std::atomic_load_explicit(&lck->lk.initialized, 750 std::memory_order_relaxed)) { 751 KMP_FATAL(LockIsUninitialized, func); 752 } 753 if (lck->lk.self != lck) { 754 KMP_FATAL(LockIsUninitialized, func); 755 } 756 if (__kmp_is_ticket_lock_nestable(lck)) { 757 KMP_FATAL(LockNestableUsedAsSimple, func); 758 } 759 if (__kmp_get_ticket_lock_owner(lck) == -1) { 760 KMP_FATAL(LockUnsettingFree, func); 761 } 762 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 763 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 764 KMP_FATAL(LockUnsettingSetByAnother, func); 765 } 766 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 767 return __kmp_release_ticket_lock(lck, gtid); 768 } 769 770 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 771 lck->lk.location = NULL; 772 lck->lk.self = lck; 773 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 774 std::memory_order_relaxed); 775 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 776 std::memory_order_relaxed); 777 std::atomic_store_explicit( 778 &lck->lk.owner_id, 0, 779 std::memory_order_relaxed); // no thread owns the lock. 780 std::atomic_store_explicit( 781 &lck->lk.depth_locked, -1, 782 std::memory_order_relaxed); // -1 => not a nested lock. 783 std::atomic_store_explicit(&lck->lk.initialized, true, 784 std::memory_order_release); 785 } 786 787 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 788 std::atomic_store_explicit(&lck->lk.initialized, false, 789 std::memory_order_release); 790 lck->lk.self = NULL; 791 lck->lk.location = NULL; 792 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 793 std::memory_order_relaxed); 794 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 795 std::memory_order_relaxed); 796 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 797 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 798 std::memory_order_relaxed); 799 } 800 801 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 802 char const *const func = "omp_destroy_lock"; 803 804 if (!std::atomic_load_explicit(&lck->lk.initialized, 805 std::memory_order_relaxed)) { 806 KMP_FATAL(LockIsUninitialized, func); 807 } 808 if (lck->lk.self != lck) { 809 KMP_FATAL(LockIsUninitialized, func); 810 } 811 if (__kmp_is_ticket_lock_nestable(lck)) { 812 KMP_FATAL(LockNestableUsedAsSimple, func); 813 } 814 if (__kmp_get_ticket_lock_owner(lck) != -1) { 815 KMP_FATAL(LockStillOwned, func); 816 } 817 __kmp_destroy_ticket_lock(lck); 818 } 819 820 // nested ticket locks 821 822 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 823 KMP_DEBUG_ASSERT(gtid >= 0); 824 825 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 826 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 827 std::memory_order_relaxed); 828 return KMP_LOCK_ACQUIRED_NEXT; 829 } else { 830 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 831 ANNOTATE_TICKET_ACQUIRED(lck); 832 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 833 std::memory_order_relaxed); 834 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 835 std::memory_order_relaxed); 836 return KMP_LOCK_ACQUIRED_FIRST; 837 } 838 } 839 840 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 841 kmp_int32 gtid) { 842 char const *const func = "omp_set_nest_lock"; 843 844 if (!std::atomic_load_explicit(&lck->lk.initialized, 845 std::memory_order_relaxed)) { 846 KMP_FATAL(LockIsUninitialized, func); 847 } 848 if (lck->lk.self != lck) { 849 KMP_FATAL(LockIsUninitialized, func); 850 } 851 if (!__kmp_is_ticket_lock_nestable(lck)) { 852 KMP_FATAL(LockSimpleUsedAsNestable, func); 853 } 854 return __kmp_acquire_nested_ticket_lock(lck, gtid); 855 } 856 857 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 858 int retval; 859 860 KMP_DEBUG_ASSERT(gtid >= 0); 861 862 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 863 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 864 std::memory_order_relaxed) + 865 1; 866 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 867 retval = 0; 868 } else { 869 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 870 std::memory_order_relaxed); 871 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 872 std::memory_order_relaxed); 873 retval = 1; 874 } 875 return retval; 876 } 877 878 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 879 kmp_int32 gtid) { 880 char const *const func = "omp_test_nest_lock"; 881 882 if (!std::atomic_load_explicit(&lck->lk.initialized, 883 std::memory_order_relaxed)) { 884 KMP_FATAL(LockIsUninitialized, func); 885 } 886 if (lck->lk.self != lck) { 887 KMP_FATAL(LockIsUninitialized, func); 888 } 889 if (!__kmp_is_ticket_lock_nestable(lck)) { 890 KMP_FATAL(LockSimpleUsedAsNestable, func); 891 } 892 return __kmp_test_nested_ticket_lock(lck, gtid); 893 } 894 895 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 896 KMP_DEBUG_ASSERT(gtid >= 0); 897 898 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 899 std::memory_order_relaxed) - 900 1) == 0) { 901 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 902 __kmp_release_ticket_lock(lck, gtid); 903 return KMP_LOCK_RELEASED; 904 } 905 return KMP_LOCK_STILL_HELD; 906 } 907 908 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 909 kmp_int32 gtid) { 910 char const *const func = "omp_unset_nest_lock"; 911 912 if (!std::atomic_load_explicit(&lck->lk.initialized, 913 std::memory_order_relaxed)) { 914 KMP_FATAL(LockIsUninitialized, func); 915 } 916 if (lck->lk.self != lck) { 917 KMP_FATAL(LockIsUninitialized, func); 918 } 919 if (!__kmp_is_ticket_lock_nestable(lck)) { 920 KMP_FATAL(LockSimpleUsedAsNestable, func); 921 } 922 if (__kmp_get_ticket_lock_owner(lck) == -1) { 923 KMP_FATAL(LockUnsettingFree, func); 924 } 925 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 926 KMP_FATAL(LockUnsettingSetByAnother, func); 927 } 928 return __kmp_release_nested_ticket_lock(lck, gtid); 929 } 930 931 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 932 __kmp_init_ticket_lock(lck); 933 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 934 std::memory_order_relaxed); 935 // >= 0 for nestable locks, -1 for simple locks 936 } 937 938 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 939 __kmp_destroy_ticket_lock(lck); 940 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 941 std::memory_order_relaxed); 942 } 943 944 static void 945 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 946 char const *const func = "omp_destroy_nest_lock"; 947 948 if (!std::atomic_load_explicit(&lck->lk.initialized, 949 std::memory_order_relaxed)) { 950 KMP_FATAL(LockIsUninitialized, func); 951 } 952 if (lck->lk.self != lck) { 953 KMP_FATAL(LockIsUninitialized, func); 954 } 955 if (!__kmp_is_ticket_lock_nestable(lck)) { 956 KMP_FATAL(LockSimpleUsedAsNestable, func); 957 } 958 if (__kmp_get_ticket_lock_owner(lck) != -1) { 959 KMP_FATAL(LockStillOwned, func); 960 } 961 __kmp_destroy_nested_ticket_lock(lck); 962 } 963 964 // access functions to fields which don't exist for all lock kinds. 965 966 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 967 return lck->lk.location; 968 } 969 970 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 971 const ident_t *loc) { 972 lck->lk.location = loc; 973 } 974 975 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 976 return lck->lk.flags; 977 } 978 979 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 980 kmp_lock_flags_t flags) { 981 lck->lk.flags = flags; 982 } 983 984 /* ------------------------------------------------------------------------ */ 985 /* queuing locks */ 986 987 /* First the states 988 (head,tail) = 0, 0 means lock is unheld, nobody on queue 989 UINT_MAX or -1, 0 means lock is held, nobody on queue 990 h, h means lock held or about to transition, 991 1 element on queue 992 h, t h <> t, means lock is held or about to 993 transition, >1 elements on queue 994 995 Now the transitions 996 Acquire(0,0) = -1 ,0 997 Release(0,0) = Error 998 Acquire(-1,0) = h ,h h > 0 999 Release(-1,0) = 0 ,0 1000 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1001 Release(h,h) = -1 ,0 h > 0 1002 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1003 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1004 1005 And pictorially 1006 1007 +-----+ 1008 | 0, 0|------- release -------> Error 1009 +-----+ 1010 | ^ 1011 acquire| |release 1012 | | 1013 | | 1014 v | 1015 +-----+ 1016 |-1, 0| 1017 +-----+ 1018 | ^ 1019 acquire| |release 1020 | | 1021 | | 1022 v | 1023 +-----+ 1024 | h, h| 1025 +-----+ 1026 | ^ 1027 acquire| |release 1028 | | 1029 | | 1030 v | 1031 +-----+ 1032 | h, t|----- acquire, release loopback ---+ 1033 +-----+ | 1034 ^ | 1035 | | 1036 +------------------------------------+ 1037 */ 1038 1039 #ifdef DEBUG_QUEUING_LOCKS 1040 1041 /* Stuff for circular trace buffer */ 1042 #define TRACE_BUF_ELE 1024 1043 static char traces[TRACE_BUF_ELE][128] = {0}; 1044 static int tc = 0; 1045 #define TRACE_LOCK(X, Y) \ 1046 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1047 #define TRACE_LOCK_T(X, Y, Z) \ 1048 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1049 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1050 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1051 Z, Q); 1052 1053 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1054 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1055 kmp_int32 tail_id) { 1056 kmp_int32 t, i; 1057 1058 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1059 1060 i = tc % TRACE_BUF_ELE; 1061 __kmp_printf_no_lock("%s\n", traces[i]); 1062 i = (i + 1) % TRACE_BUF_ELE; 1063 while (i != (tc % TRACE_BUF_ELE)) { 1064 __kmp_printf_no_lock("%s", traces[i]); 1065 i = (i + 1) % TRACE_BUF_ELE; 1066 } 1067 __kmp_printf_no_lock("\n"); 1068 1069 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1070 "next_wait:%d, head_id:%d, tail_id:%d\n", 1071 gtid + 1, this_thr->th.th_spin_here, 1072 this_thr->th.th_next_waiting, head_id, tail_id); 1073 1074 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1075 1076 if (lck->lk.head_id >= 1) { 1077 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1078 while (t > 0) { 1079 __kmp_printf_no_lock("-> %d ", t); 1080 t = __kmp_threads[t - 1]->th.th_next_waiting; 1081 } 1082 } 1083 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1084 __kmp_printf_no_lock("\n\n"); 1085 } 1086 1087 #endif /* DEBUG_QUEUING_LOCKS */ 1088 1089 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1090 return TCR_4(lck->lk.owner_id) - 1; 1091 } 1092 1093 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1094 return lck->lk.depth_locked != -1; 1095 } 1096 1097 /* Acquire a lock using a the queuing lock implementation */ 1098 template <bool takeTime> 1099 /* [TLW] The unused template above is left behind because of what BEB believes 1100 is a potential compiler problem with __forceinline. */ 1101 __forceinline static int 1102 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1103 kmp_int32 gtid) { 1104 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1105 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1106 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1107 volatile kmp_uint32 *spin_here_p; 1108 kmp_int32 need_mf = 1; 1109 1110 #if OMPT_SUPPORT 1111 omp_state_t prev_state = omp_state_undefined; 1112 #endif 1113 1114 KA_TRACE(1000, 1115 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1116 1117 KMP_FSYNC_PREPARE(lck); 1118 KMP_DEBUG_ASSERT(this_thr != NULL); 1119 spin_here_p = &this_thr->th.th_spin_here; 1120 1121 #ifdef DEBUG_QUEUING_LOCKS 1122 TRACE_LOCK(gtid + 1, "acq ent"); 1123 if (*spin_here_p) 1124 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1125 if (this_thr->th.th_next_waiting != 0) 1126 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1127 #endif 1128 KMP_DEBUG_ASSERT(!*spin_here_p); 1129 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1130 1131 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1132 head_id_p that may follow, not just in execution order, but also in 1133 visibility order. This way, when a releasing thread observes the changes to 1134 the queue by this thread, it can rightly assume that spin_here_p has 1135 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1136 not premature. If the releasing thread sets spin_here_p to FALSE before 1137 this thread sets it to TRUE, this thread will hang. */ 1138 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1139 1140 while (1) { 1141 kmp_int32 enqueued; 1142 kmp_int32 head; 1143 kmp_int32 tail; 1144 1145 head = *head_id_p; 1146 1147 switch (head) { 1148 1149 case -1: { 1150 #ifdef DEBUG_QUEUING_LOCKS 1151 tail = *tail_id_p; 1152 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1153 #endif 1154 tail = 0; /* to make sure next link asynchronously read is not set 1155 accidentally; this assignment prevents us from entering the 1156 if ( t > 0 ) condition in the enqueued case below, which is not 1157 necessary for this state transition */ 1158 1159 need_mf = 0; 1160 /* try (-1,0)->(tid,tid) */ 1161 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1162 KMP_PACK_64(-1, 0), 1163 KMP_PACK_64(gtid + 1, gtid + 1)); 1164 #ifdef DEBUG_QUEUING_LOCKS 1165 if (enqueued) 1166 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1167 #endif 1168 } break; 1169 1170 default: { 1171 tail = *tail_id_p; 1172 KMP_DEBUG_ASSERT(tail != gtid + 1); 1173 1174 #ifdef DEBUG_QUEUING_LOCKS 1175 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1176 #endif 1177 1178 if (tail == 0) { 1179 enqueued = FALSE; 1180 } else { 1181 need_mf = 0; 1182 /* try (h,t) or (h,h)->(h,tid) */ 1183 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1184 1185 #ifdef DEBUG_QUEUING_LOCKS 1186 if (enqueued) 1187 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1188 #endif 1189 } 1190 } break; 1191 1192 case 0: /* empty queue */ 1193 { 1194 kmp_int32 grabbed_lock; 1195 1196 #ifdef DEBUG_QUEUING_LOCKS 1197 tail = *tail_id_p; 1198 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1199 #endif 1200 /* try (0,0)->(-1,0) */ 1201 1202 /* only legal transition out of head = 0 is head = -1 with no change to 1203 * tail */ 1204 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1205 1206 if (grabbed_lock) { 1207 1208 *spin_here_p = FALSE; 1209 1210 KA_TRACE( 1211 1000, 1212 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1213 lck, gtid)); 1214 #ifdef DEBUG_QUEUING_LOCKS 1215 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1216 #endif 1217 1218 #if OMPT_SUPPORT 1219 if (ompt_enabled.enabled && prev_state != omp_state_undefined) { 1220 /* change the state before clearing wait_id */ 1221 this_thr->th.ompt_thread_info.state = prev_state; 1222 this_thr->th.ompt_thread_info.wait_id = 0; 1223 } 1224 #endif 1225 1226 KMP_FSYNC_ACQUIRED(lck); 1227 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1228 } 1229 enqueued = FALSE; 1230 } break; 1231 } 1232 1233 #if OMPT_SUPPORT 1234 if (ompt_enabled.enabled && prev_state == omp_state_undefined) { 1235 /* this thread will spin; set wait_id before entering wait state */ 1236 prev_state = this_thr->th.ompt_thread_info.state; 1237 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1238 this_thr->th.ompt_thread_info.state = omp_state_wait_lock; 1239 } 1240 #endif 1241 1242 if (enqueued) { 1243 if (tail > 0) { 1244 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1245 KMP_ASSERT(tail_thr != NULL); 1246 tail_thr->th.th_next_waiting = gtid + 1; 1247 /* corresponding wait for this write in release code */ 1248 } 1249 KA_TRACE(1000, 1250 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1251 lck, gtid)); 1252 1253 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1254 sleeps for throughput only here. */ 1255 KMP_MB(); 1256 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1257 1258 #ifdef DEBUG_QUEUING_LOCKS 1259 TRACE_LOCK(gtid + 1, "acq spin"); 1260 1261 if (this_thr->th.th_next_waiting != 0) 1262 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1263 #endif 1264 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1265 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1266 "waiting on queue\n", 1267 lck, gtid)); 1268 1269 #ifdef DEBUG_QUEUING_LOCKS 1270 TRACE_LOCK(gtid + 1, "acq exit 2"); 1271 #endif 1272 1273 #if OMPT_SUPPORT 1274 /* change the state before clearing wait_id */ 1275 this_thr->th.ompt_thread_info.state = prev_state; 1276 this_thr->th.ompt_thread_info.wait_id = 0; 1277 #endif 1278 1279 /* got lock, we were dequeued by the thread that released lock */ 1280 return KMP_LOCK_ACQUIRED_FIRST; 1281 } 1282 1283 /* Yield if number of threads > number of logical processors */ 1284 /* ToDo: Not sure why this should only be in oversubscription case, 1285 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1286 KMP_YIELD(TCR_4(__kmp_nth) > 1287 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1288 #ifdef DEBUG_QUEUING_LOCKS 1289 TRACE_LOCK(gtid + 1, "acq retry"); 1290 #endif 1291 } 1292 KMP_ASSERT2(0, "should not get here"); 1293 return KMP_LOCK_ACQUIRED_FIRST; 1294 } 1295 1296 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1297 KMP_DEBUG_ASSERT(gtid >= 0); 1298 1299 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1300 ANNOTATE_QUEUING_ACQUIRED(lck); 1301 return retval; 1302 } 1303 1304 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1305 kmp_int32 gtid) { 1306 char const *const func = "omp_set_lock"; 1307 if (lck->lk.initialized != lck) { 1308 KMP_FATAL(LockIsUninitialized, func); 1309 } 1310 if (__kmp_is_queuing_lock_nestable(lck)) { 1311 KMP_FATAL(LockNestableUsedAsSimple, func); 1312 } 1313 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1314 KMP_FATAL(LockIsAlreadyOwned, func); 1315 } 1316 1317 __kmp_acquire_queuing_lock(lck, gtid); 1318 1319 lck->lk.owner_id = gtid + 1; 1320 return KMP_LOCK_ACQUIRED_FIRST; 1321 } 1322 1323 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1324 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1325 kmp_int32 head; 1326 #ifdef KMP_DEBUG 1327 kmp_info_t *this_thr; 1328 #endif 1329 1330 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1331 KMP_DEBUG_ASSERT(gtid >= 0); 1332 #ifdef KMP_DEBUG 1333 this_thr = __kmp_thread_from_gtid(gtid); 1334 KMP_DEBUG_ASSERT(this_thr != NULL); 1335 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1336 #endif 1337 1338 head = *head_id_p; 1339 1340 if (head == 0) { /* nobody on queue, nobody holding */ 1341 /* try (0,0)->(-1,0) */ 1342 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1343 KA_TRACE(1000, 1344 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1345 KMP_FSYNC_ACQUIRED(lck); 1346 ANNOTATE_QUEUING_ACQUIRED(lck); 1347 return TRUE; 1348 } 1349 } 1350 1351 KA_TRACE(1000, 1352 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1353 return FALSE; 1354 } 1355 1356 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1357 kmp_int32 gtid) { 1358 char const *const func = "omp_test_lock"; 1359 if (lck->lk.initialized != lck) { 1360 KMP_FATAL(LockIsUninitialized, func); 1361 } 1362 if (__kmp_is_queuing_lock_nestable(lck)) { 1363 KMP_FATAL(LockNestableUsedAsSimple, func); 1364 } 1365 1366 int retval = __kmp_test_queuing_lock(lck, gtid); 1367 1368 if (retval) { 1369 lck->lk.owner_id = gtid + 1; 1370 } 1371 return retval; 1372 } 1373 1374 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1375 kmp_info_t *this_thr; 1376 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1377 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1378 1379 KA_TRACE(1000, 1380 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1381 KMP_DEBUG_ASSERT(gtid >= 0); 1382 this_thr = __kmp_thread_from_gtid(gtid); 1383 KMP_DEBUG_ASSERT(this_thr != NULL); 1384 #ifdef DEBUG_QUEUING_LOCKS 1385 TRACE_LOCK(gtid + 1, "rel ent"); 1386 1387 if (this_thr->th.th_spin_here) 1388 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1389 if (this_thr->th.th_next_waiting != 0) 1390 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1391 #endif 1392 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1393 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1394 1395 KMP_FSYNC_RELEASING(lck); 1396 ANNOTATE_QUEUING_RELEASED(lck); 1397 1398 while (1) { 1399 kmp_int32 dequeued; 1400 kmp_int32 head; 1401 kmp_int32 tail; 1402 1403 head = *head_id_p; 1404 1405 #ifdef DEBUG_QUEUING_LOCKS 1406 tail = *tail_id_p; 1407 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1408 if (head == 0) 1409 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1410 #endif 1411 KMP_DEBUG_ASSERT(head != 1412 0); /* holding the lock, head must be -1 or queue head */ 1413 1414 if (head == -1) { /* nobody on queue */ 1415 /* try (-1,0)->(0,0) */ 1416 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1417 KA_TRACE( 1418 1000, 1419 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1420 lck, gtid)); 1421 #ifdef DEBUG_QUEUING_LOCKS 1422 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1423 #endif 1424 1425 #if OMPT_SUPPORT 1426 /* nothing to do - no other thread is trying to shift blame */ 1427 #endif 1428 return KMP_LOCK_RELEASED; 1429 } 1430 dequeued = FALSE; 1431 } else { 1432 KMP_MB(); 1433 tail = *tail_id_p; 1434 if (head == tail) { /* only one thread on the queue */ 1435 #ifdef DEBUG_QUEUING_LOCKS 1436 if (head <= 0) 1437 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1438 #endif 1439 KMP_DEBUG_ASSERT(head > 0); 1440 1441 /* try (h,h)->(-1,0) */ 1442 dequeued = KMP_COMPARE_AND_STORE_REL64( 1443 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1444 KMP_PACK_64(-1, 0)); 1445 #ifdef DEBUG_QUEUING_LOCKS 1446 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1447 #endif 1448 1449 } else { 1450 volatile kmp_int32 *waiting_id_p; 1451 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1452 KMP_DEBUG_ASSERT(head_thr != NULL); 1453 waiting_id_p = &head_thr->th.th_next_waiting; 1454 1455 /* Does this require synchronous reads? */ 1456 #ifdef DEBUG_QUEUING_LOCKS 1457 if (head <= 0 || tail <= 0) 1458 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1459 #endif 1460 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1461 1462 /* try (h,t)->(h',t) or (t,t) */ 1463 KMP_MB(); 1464 /* make sure enqueuing thread has time to update next waiting thread 1465 * field */ 1466 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1467 KMP_NEQ, NULL); 1468 #ifdef DEBUG_QUEUING_LOCKS 1469 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1470 #endif 1471 dequeued = TRUE; 1472 } 1473 } 1474 1475 if (dequeued) { 1476 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1477 KMP_DEBUG_ASSERT(head_thr != NULL); 1478 1479 /* Does this require synchronous reads? */ 1480 #ifdef DEBUG_QUEUING_LOCKS 1481 if (head <= 0 || tail <= 0) 1482 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1483 #endif 1484 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1485 1486 /* For clean code only. Thread not released until next statement prevents 1487 race with acquire code. */ 1488 head_thr->th.th_next_waiting = 0; 1489 #ifdef DEBUG_QUEUING_LOCKS 1490 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1491 #endif 1492 1493 KMP_MB(); 1494 /* reset spin value */ 1495 head_thr->th.th_spin_here = FALSE; 1496 1497 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1498 "dequeuing\n", 1499 lck, gtid)); 1500 #ifdef DEBUG_QUEUING_LOCKS 1501 TRACE_LOCK(gtid + 1, "rel exit 2"); 1502 #endif 1503 return KMP_LOCK_RELEASED; 1504 } 1505 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1506 threads */ 1507 1508 #ifdef DEBUG_QUEUING_LOCKS 1509 TRACE_LOCK(gtid + 1, "rel retry"); 1510 #endif 1511 1512 } /* while */ 1513 KMP_ASSERT2(0, "should not get here"); 1514 return KMP_LOCK_RELEASED; 1515 } 1516 1517 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1518 kmp_int32 gtid) { 1519 char const *const func = "omp_unset_lock"; 1520 KMP_MB(); /* in case another processor initialized lock */ 1521 if (lck->lk.initialized != lck) { 1522 KMP_FATAL(LockIsUninitialized, func); 1523 } 1524 if (__kmp_is_queuing_lock_nestable(lck)) { 1525 KMP_FATAL(LockNestableUsedAsSimple, func); 1526 } 1527 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1528 KMP_FATAL(LockUnsettingFree, func); 1529 } 1530 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1531 KMP_FATAL(LockUnsettingSetByAnother, func); 1532 } 1533 lck->lk.owner_id = 0; 1534 return __kmp_release_queuing_lock(lck, gtid); 1535 } 1536 1537 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1538 lck->lk.location = NULL; 1539 lck->lk.head_id = 0; 1540 lck->lk.tail_id = 0; 1541 lck->lk.next_ticket = 0; 1542 lck->lk.now_serving = 0; 1543 lck->lk.owner_id = 0; // no thread owns the lock. 1544 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1545 lck->lk.initialized = lck; 1546 1547 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1548 } 1549 1550 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1551 lck->lk.initialized = NULL; 1552 lck->lk.location = NULL; 1553 lck->lk.head_id = 0; 1554 lck->lk.tail_id = 0; 1555 lck->lk.next_ticket = 0; 1556 lck->lk.now_serving = 0; 1557 lck->lk.owner_id = 0; 1558 lck->lk.depth_locked = -1; 1559 } 1560 1561 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1562 char const *const func = "omp_destroy_lock"; 1563 if (lck->lk.initialized != lck) { 1564 KMP_FATAL(LockIsUninitialized, func); 1565 } 1566 if (__kmp_is_queuing_lock_nestable(lck)) { 1567 KMP_FATAL(LockNestableUsedAsSimple, func); 1568 } 1569 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1570 KMP_FATAL(LockStillOwned, func); 1571 } 1572 __kmp_destroy_queuing_lock(lck); 1573 } 1574 1575 // nested queuing locks 1576 1577 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1578 KMP_DEBUG_ASSERT(gtid >= 0); 1579 1580 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1581 lck->lk.depth_locked += 1; 1582 return KMP_LOCK_ACQUIRED_NEXT; 1583 } else { 1584 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1585 ANNOTATE_QUEUING_ACQUIRED(lck); 1586 KMP_MB(); 1587 lck->lk.depth_locked = 1; 1588 KMP_MB(); 1589 lck->lk.owner_id = gtid + 1; 1590 return KMP_LOCK_ACQUIRED_FIRST; 1591 } 1592 } 1593 1594 static int 1595 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1596 kmp_int32 gtid) { 1597 char const *const func = "omp_set_nest_lock"; 1598 if (lck->lk.initialized != lck) { 1599 KMP_FATAL(LockIsUninitialized, func); 1600 } 1601 if (!__kmp_is_queuing_lock_nestable(lck)) { 1602 KMP_FATAL(LockSimpleUsedAsNestable, func); 1603 } 1604 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1605 } 1606 1607 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1608 int retval; 1609 1610 KMP_DEBUG_ASSERT(gtid >= 0); 1611 1612 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1613 retval = ++lck->lk.depth_locked; 1614 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1615 retval = 0; 1616 } else { 1617 KMP_MB(); 1618 retval = lck->lk.depth_locked = 1; 1619 KMP_MB(); 1620 lck->lk.owner_id = gtid + 1; 1621 } 1622 return retval; 1623 } 1624 1625 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1626 kmp_int32 gtid) { 1627 char const *const func = "omp_test_nest_lock"; 1628 if (lck->lk.initialized != lck) { 1629 KMP_FATAL(LockIsUninitialized, func); 1630 } 1631 if (!__kmp_is_queuing_lock_nestable(lck)) { 1632 KMP_FATAL(LockSimpleUsedAsNestable, func); 1633 } 1634 return __kmp_test_nested_queuing_lock(lck, gtid); 1635 } 1636 1637 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1638 KMP_DEBUG_ASSERT(gtid >= 0); 1639 1640 KMP_MB(); 1641 if (--(lck->lk.depth_locked) == 0) { 1642 KMP_MB(); 1643 lck->lk.owner_id = 0; 1644 __kmp_release_queuing_lock(lck, gtid); 1645 return KMP_LOCK_RELEASED; 1646 } 1647 return KMP_LOCK_STILL_HELD; 1648 } 1649 1650 static int 1651 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1652 kmp_int32 gtid) { 1653 char const *const func = "omp_unset_nest_lock"; 1654 KMP_MB(); /* in case another processor initialized lock */ 1655 if (lck->lk.initialized != lck) { 1656 KMP_FATAL(LockIsUninitialized, func); 1657 } 1658 if (!__kmp_is_queuing_lock_nestable(lck)) { 1659 KMP_FATAL(LockSimpleUsedAsNestable, func); 1660 } 1661 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1662 KMP_FATAL(LockUnsettingFree, func); 1663 } 1664 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1665 KMP_FATAL(LockUnsettingSetByAnother, func); 1666 } 1667 return __kmp_release_nested_queuing_lock(lck, gtid); 1668 } 1669 1670 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1671 __kmp_init_queuing_lock(lck); 1672 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1673 } 1674 1675 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1676 __kmp_destroy_queuing_lock(lck); 1677 lck->lk.depth_locked = 0; 1678 } 1679 1680 static void 1681 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1682 char const *const func = "omp_destroy_nest_lock"; 1683 if (lck->lk.initialized != lck) { 1684 KMP_FATAL(LockIsUninitialized, func); 1685 } 1686 if (!__kmp_is_queuing_lock_nestable(lck)) { 1687 KMP_FATAL(LockSimpleUsedAsNestable, func); 1688 } 1689 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1690 KMP_FATAL(LockStillOwned, func); 1691 } 1692 __kmp_destroy_nested_queuing_lock(lck); 1693 } 1694 1695 // access functions to fields which don't exist for all lock kinds. 1696 1697 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1698 return lck->lk.location; 1699 } 1700 1701 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1702 const ident_t *loc) { 1703 lck->lk.location = loc; 1704 } 1705 1706 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1707 return lck->lk.flags; 1708 } 1709 1710 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1711 kmp_lock_flags_t flags) { 1712 lck->lk.flags = flags; 1713 } 1714 1715 #if KMP_USE_ADAPTIVE_LOCKS 1716 1717 /* RTM Adaptive locks */ 1718 1719 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1720 1721 #include <immintrin.h> 1722 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1723 1724 #else 1725 1726 // Values from the status register after failed speculation. 1727 #define _XBEGIN_STARTED (~0u) 1728 #define _XABORT_EXPLICIT (1 << 0) 1729 #define _XABORT_RETRY (1 << 1) 1730 #define _XABORT_CONFLICT (1 << 2) 1731 #define _XABORT_CAPACITY (1 << 3) 1732 #define _XABORT_DEBUG (1 << 4) 1733 #define _XABORT_NESTED (1 << 5) 1734 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1735 1736 // Aborts for which it's worth trying again immediately 1737 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1738 1739 #define STRINGIZE_INTERNAL(arg) #arg 1740 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1741 1742 // Access to RTM instructions 1743 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1744 an abort. This is the same definition as the compiler intrinsic that will be 1745 supported at some point. */ 1746 static __inline int _xbegin() { 1747 int res = -1; 1748 1749 #if KMP_OS_WINDOWS 1750 #if KMP_ARCH_X86_64 1751 _asm { 1752 _emit 0xC7 1753 _emit 0xF8 1754 _emit 2 1755 _emit 0 1756 _emit 0 1757 _emit 0 1758 jmp L2 1759 mov res, eax 1760 L2: 1761 } 1762 #else /* IA32 */ 1763 _asm { 1764 _emit 0xC7 1765 _emit 0xF8 1766 _emit 2 1767 _emit 0 1768 _emit 0 1769 _emit 0 1770 jmp L2 1771 mov res, eax 1772 L2: 1773 } 1774 #endif // KMP_ARCH_X86_64 1775 #else 1776 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1777 returned in %eax(%rax) on abort. Other register values are restored, so 1778 don't need to be killed. 1779 1780 We must also mark 'res' as an input and an output, since otherwise 1781 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1782 the successful (i.e., non-abort) path. */ 1783 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1784 " .long 1f-1b-6\n" 1785 " jmp 2f\n" 1786 "1: movl %%eax,%0\n" 1787 "2:" 1788 : "+r"(res)::"memory", "%eax"); 1789 #endif // KMP_OS_WINDOWS 1790 return res; 1791 } 1792 1793 /* Transaction end */ 1794 static __inline void _xend() { 1795 #if KMP_OS_WINDOWS 1796 __asm { 1797 _emit 0x0f 1798 _emit 0x01 1799 _emit 0xd5 1800 } 1801 #else 1802 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1803 #endif 1804 } 1805 1806 /* This is a macro, the argument must be a single byte constant which can be 1807 evaluated by the inline assembler, since it is emitted as a byte into the 1808 assembly code. */ 1809 // clang-format off 1810 #if KMP_OS_WINDOWS 1811 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1812 #else 1813 #define _xabort(ARG) \ 1814 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1815 #endif 1816 // clang-format on 1817 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1818 1819 // Statistics is collected for testing purpose 1820 #if KMP_DEBUG_ADAPTIVE_LOCKS 1821 1822 // We accumulate speculative lock statistics when the lock is destroyed. We 1823 // keep locks that haven't been destroyed in the liveLocks list so that we can 1824 // grab their statistics too. 1825 static kmp_adaptive_lock_statistics_t destroyedStats; 1826 1827 // To hold the list of live locks. 1828 static kmp_adaptive_lock_info_t liveLocks; 1829 1830 // A lock so we can safely update the list of locks. 1831 static kmp_bootstrap_lock_t chain_lock = 1832 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); 1833 1834 // Initialize the list of stats. 1835 void __kmp_init_speculative_stats() { 1836 kmp_adaptive_lock_info_t *lck = &liveLocks; 1837 1838 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, 1839 sizeof(lck->stats)); 1840 lck->stats.next = lck; 1841 lck->stats.prev = lck; 1842 1843 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1844 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1845 1846 __kmp_init_bootstrap_lock(&chain_lock); 1847 } 1848 1849 // Insert the lock into the circular list 1850 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1851 __kmp_acquire_bootstrap_lock(&chain_lock); 1852 1853 lck->stats.next = liveLocks.stats.next; 1854 lck->stats.prev = &liveLocks; 1855 1856 liveLocks.stats.next = lck; 1857 lck->stats.next->stats.prev = lck; 1858 1859 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1860 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1861 1862 __kmp_release_bootstrap_lock(&chain_lock); 1863 } 1864 1865 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1866 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1867 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1868 1869 kmp_adaptive_lock_info_t *n = lck->stats.next; 1870 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1871 1872 n->stats.prev = p; 1873 p->stats.next = n; 1874 } 1875 1876 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1877 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, 1878 sizeof(lck->stats)); 1879 __kmp_remember_lock(lck); 1880 } 1881 1882 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1883 kmp_adaptive_lock_info_t *lck) { 1884 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1885 1886 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1887 t->successfulSpeculations += s->successfulSpeculations; 1888 t->hardFailedSpeculations += s->hardFailedSpeculations; 1889 t->softFailedSpeculations += s->softFailedSpeculations; 1890 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1891 t->lemmingYields += s->lemmingYields; 1892 } 1893 1894 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1895 __kmp_acquire_bootstrap_lock(&chain_lock); 1896 1897 __kmp_add_stats(&destroyedStats, lck); 1898 __kmp_forget_lock(lck); 1899 1900 __kmp_release_bootstrap_lock(&chain_lock); 1901 } 1902 1903 static float percent(kmp_uint32 count, kmp_uint32 total) { 1904 return (total == 0) ? 0.0 : (100.0 * count) / total; 1905 } 1906 1907 static FILE *__kmp_open_stats_file() { 1908 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1909 return stdout; 1910 1911 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1912 char buffer[buffLen]; 1913 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1914 (kmp_int32)getpid()); 1915 FILE *result = fopen(&buffer[0], "w"); 1916 1917 // Maybe we should issue a warning here... 1918 return result ? result : stdout; 1919 } 1920 1921 void __kmp_print_speculative_stats() { 1922 kmp_adaptive_lock_statistics_t total = destroyedStats; 1923 kmp_adaptive_lock_info_t *lck; 1924 1925 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1926 __kmp_add_stats(&total, lck); 1927 } 1928 kmp_adaptive_lock_statistics_t *t = &total; 1929 kmp_uint32 totalSections = 1930 t->nonSpeculativeAcquires + t->successfulSpeculations; 1931 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1932 t->hardFailedSpeculations + 1933 t->softFailedSpeculations; 1934 if (totalSections <= 0) 1935 return; 1936 1937 FILE *statsFile = __kmp_open_stats_file(); 1938 1939 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1940 fprintf(statsFile, " Lock parameters: \n" 1941 " max_soft_retries : %10d\n" 1942 " max_badness : %10d\n", 1943 __kmp_adaptive_backoff_params.max_soft_retries, 1944 __kmp_adaptive_backoff_params.max_badness); 1945 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1946 t->nonSpeculativeAcquireAttempts); 1947 fprintf(statsFile, " Total critical sections : %10d\n", 1948 totalSections); 1949 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1950 t->successfulSpeculations, 1951 percent(t->successfulSpeculations, totalSections)); 1952 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1953 t->nonSpeculativeAcquires, 1954 percent(t->nonSpeculativeAcquires, totalSections)); 1955 fprintf(statsFile, " Lemming yields : %10d\n\n", 1956 t->lemmingYields); 1957 1958 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 1959 totalSpeculations); 1960 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 1961 t->successfulSpeculations, 1962 percent(t->successfulSpeculations, totalSpeculations)); 1963 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 1964 t->softFailedSpeculations, 1965 percent(t->softFailedSpeculations, totalSpeculations)); 1966 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 1967 t->hardFailedSpeculations, 1968 percent(t->hardFailedSpeculations, totalSpeculations)); 1969 1970 if (statsFile != stdout) 1971 fclose(statsFile); 1972 } 1973 1974 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 1975 #else 1976 #define KMP_INC_STAT(lck, stat) 1977 1978 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 1979 1980 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 1981 // It is enough to check that the head_id is zero. 1982 // We don't also need to check the tail. 1983 bool res = lck->lk.head_id == 0; 1984 1985 // We need a fence here, since we must ensure that no memory operations 1986 // from later in this thread float above that read. 1987 #if KMP_COMPILER_ICC 1988 _mm_mfence(); 1989 #else 1990 __sync_synchronize(); 1991 #endif 1992 1993 return res; 1994 } 1995 1996 // Functions for manipulating the badness 1997 static __inline void 1998 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 1999 // Reset the badness to zero so we eagerly try to speculate again 2000 lck->lk.adaptive.badness = 0; 2001 KMP_INC_STAT(lck, successfulSpeculations); 2002 } 2003 2004 // Create a bit mask with one more set bit. 2005 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2006 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2007 if (newBadness > lck->lk.adaptive.max_badness) { 2008 return; 2009 } else { 2010 lck->lk.adaptive.badness = newBadness; 2011 } 2012 } 2013 2014 // Check whether speculation should be attempted. 2015 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2016 kmp_int32 gtid) { 2017 kmp_uint32 badness = lck->lk.adaptive.badness; 2018 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2019 int res = (attempts & badness) == 0; 2020 return res; 2021 } 2022 2023 // Attempt to acquire only the speculative lock. 2024 // Does not back off to the non-speculative lock. 2025 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2026 kmp_int32 gtid) { 2027 int retries = lck->lk.adaptive.max_soft_retries; 2028 2029 // We don't explicitly count the start of speculation, rather we record the 2030 // results (success, hard fail, soft fail). The sum of all of those is the 2031 // total number of times we started speculation since all speculations must 2032 // end one of those ways. 2033 do { 2034 kmp_uint32 status = _xbegin(); 2035 // Switch this in to disable actual speculation but exercise at least some 2036 // of the rest of the code. Useful for debugging... 2037 // kmp_uint32 status = _XABORT_NESTED; 2038 2039 if (status == _XBEGIN_STARTED) { 2040 /* We have successfully started speculation. Check that no-one acquired 2041 the lock for real between when we last looked and now. This also gets 2042 the lock cache line into our read-set, which we need so that we'll 2043 abort if anyone later claims it for real. */ 2044 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2045 // Lock is now visibly acquired, so someone beat us to it. Abort the 2046 // transaction so we'll restart from _xbegin with the failure status. 2047 _xabort(0x01); 2048 KMP_ASSERT2(0, "should not get here"); 2049 } 2050 return 1; // Lock has been acquired (speculatively) 2051 } else { 2052 // We have aborted, update the statistics 2053 if (status & SOFT_ABORT_MASK) { 2054 KMP_INC_STAT(lck, softFailedSpeculations); 2055 // and loop round to retry. 2056 } else { 2057 KMP_INC_STAT(lck, hardFailedSpeculations); 2058 // Give up if we had a hard failure. 2059 break; 2060 } 2061 } 2062 } while (retries--); // Loop while we have retries, and didn't fail hard. 2063 2064 // Either we had a hard failure or we didn't succeed softly after 2065 // the full set of attempts, so back off the badness. 2066 __kmp_step_badness(lck); 2067 return 0; 2068 } 2069 2070 // Attempt to acquire the speculative lock, or back off to the non-speculative 2071 // one if the speculative lock cannot be acquired. 2072 // We can succeed speculatively, non-speculatively, or fail. 2073 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2074 // First try to acquire the lock speculatively 2075 if (__kmp_should_speculate(lck, gtid) && 2076 __kmp_test_adaptive_lock_only(lck, gtid)) 2077 return 1; 2078 2079 // Speculative acquisition failed, so try to acquire it non-speculatively. 2080 // Count the non-speculative acquire attempt 2081 lck->lk.adaptive.acquire_attempts++; 2082 2083 // Use base, non-speculative lock. 2084 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2085 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2086 return 1; // Lock is acquired (non-speculatively) 2087 } else { 2088 return 0; // Failed to acquire the lock, it's already visibly locked. 2089 } 2090 } 2091 2092 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2093 kmp_int32 gtid) { 2094 char const *const func = "omp_test_lock"; 2095 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2096 KMP_FATAL(LockIsUninitialized, func); 2097 } 2098 2099 int retval = __kmp_test_adaptive_lock(lck, gtid); 2100 2101 if (retval) { 2102 lck->lk.qlk.owner_id = gtid + 1; 2103 } 2104 return retval; 2105 } 2106 2107 // Block until we can acquire a speculative, adaptive lock. We check whether we 2108 // should be trying to speculate. If we should be, we check the real lock to see 2109 // if it is free, and, if not, pause without attempting to acquire it until it 2110 // is. Then we try the speculative acquire. This means that although we suffer 2111 // from lemmings a little (because all we can't acquire the lock speculatively 2112 // until the queue of threads waiting has cleared), we don't get into a state 2113 // where we can never acquire the lock speculatively (because we force the queue 2114 // to clear by preventing new arrivals from entering the queue). This does mean 2115 // that when we're trying to break lemmings, the lock is no longer fair. However 2116 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2117 // problem. 2118 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2119 kmp_int32 gtid) { 2120 if (__kmp_should_speculate(lck, gtid)) { 2121 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2122 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2123 return; 2124 // We tried speculation and failed, so give up. 2125 } else { 2126 // We can't try speculation until the lock is free, so we pause here 2127 // (without suspending on the queueing lock, to allow it to drain, then 2128 // try again. All other threads will also see the same result for 2129 // shouldSpeculate, so will be doing the same if they try to claim the 2130 // lock from now on. 2131 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2132 KMP_INC_STAT(lck, lemmingYields); 2133 __kmp_yield(TRUE); 2134 } 2135 2136 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2137 return; 2138 } 2139 } 2140 2141 // Speculative acquisition failed, so acquire it non-speculatively. 2142 // Count the non-speculative acquire attempt 2143 lck->lk.adaptive.acquire_attempts++; 2144 2145 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2146 // We have acquired the base lock, so count that. 2147 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2148 ANNOTATE_QUEUING_ACQUIRED(lck); 2149 } 2150 2151 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2152 kmp_int32 gtid) { 2153 char const *const func = "omp_set_lock"; 2154 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2155 KMP_FATAL(LockIsUninitialized, func); 2156 } 2157 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2158 KMP_FATAL(LockIsAlreadyOwned, func); 2159 } 2160 2161 __kmp_acquire_adaptive_lock(lck, gtid); 2162 2163 lck->lk.qlk.owner_id = gtid + 1; 2164 } 2165 2166 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2167 kmp_int32 gtid) { 2168 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2169 lck))) { // If the lock doesn't look claimed we must be speculating. 2170 // (Or the user's code is buggy and they're releasing without locking; 2171 // if we had XTEST we'd be able to check that case...) 2172 _xend(); // Exit speculation 2173 __kmp_update_badness_after_success(lck); 2174 } else { // Since the lock *is* visibly locked we're not speculating, 2175 // so should use the underlying lock's release scheme. 2176 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2177 } 2178 return KMP_LOCK_RELEASED; 2179 } 2180 2181 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2182 kmp_int32 gtid) { 2183 char const *const func = "omp_unset_lock"; 2184 KMP_MB(); /* in case another processor initialized lock */ 2185 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2186 KMP_FATAL(LockIsUninitialized, func); 2187 } 2188 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2189 KMP_FATAL(LockUnsettingFree, func); 2190 } 2191 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2192 KMP_FATAL(LockUnsettingSetByAnother, func); 2193 } 2194 lck->lk.qlk.owner_id = 0; 2195 __kmp_release_adaptive_lock(lck, gtid); 2196 return KMP_LOCK_RELEASED; 2197 } 2198 2199 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2200 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2201 lck->lk.adaptive.badness = 0; 2202 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2203 lck->lk.adaptive.max_soft_retries = 2204 __kmp_adaptive_backoff_params.max_soft_retries; 2205 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2206 #if KMP_DEBUG_ADAPTIVE_LOCKS 2207 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2208 #endif 2209 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2210 } 2211 2212 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2213 #if KMP_DEBUG_ADAPTIVE_LOCKS 2214 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2215 #endif 2216 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2217 // Nothing needed for the speculative part. 2218 } 2219 2220 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2221 char const *const func = "omp_destroy_lock"; 2222 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2223 KMP_FATAL(LockIsUninitialized, func); 2224 } 2225 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2226 KMP_FATAL(LockStillOwned, func); 2227 } 2228 __kmp_destroy_adaptive_lock(lck); 2229 } 2230 2231 #endif // KMP_USE_ADAPTIVE_LOCKS 2232 2233 /* ------------------------------------------------------------------------ */ 2234 /* DRDPA ticket locks */ 2235 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2236 2237 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2238 return lck->lk.owner_id - 1; 2239 } 2240 2241 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2242 return lck->lk.depth_locked != -1; 2243 } 2244 2245 __forceinline static int 2246 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2247 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); 2248 kmp_uint64 mask = lck->lk.mask; // atomic load 2249 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2250 2251 #ifdef USE_LOCK_PROFILE 2252 if (polls[ticket & mask] != ticket) 2253 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2254 /* else __kmp_printf( "." );*/ 2255 #endif /* USE_LOCK_PROFILE */ 2256 2257 // Now spin-wait, but reload the polls pointer and mask, in case the 2258 // polling area has been reconfigured. Unless it is reconfigured, the 2259 // reloads stay in L1 cache and are cheap. 2260 // 2261 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2262 // 2263 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2264 // and poll to be re-read every spin iteration. 2265 kmp_uint32 spins; 2266 2267 KMP_FSYNC_PREPARE(lck); 2268 KMP_INIT_YIELD(spins); 2269 while (polls[ticket & mask] < ticket) { // atomic load 2270 // If we are oversubscribed, 2271 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2272 // CPU Pause is in the macros for yield. 2273 // 2274 KMP_YIELD(TCR_4(__kmp_nth) > 2275 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2276 KMP_YIELD_SPIN(spins); 2277 2278 // Re-read the mask and the poll pointer from the lock structure. 2279 // 2280 // Make certain that "mask" is read before "polls" !!! 2281 // 2282 // If another thread picks reconfigures the polling area and updates their 2283 // values, and we get the new value of mask and the old polls pointer, we 2284 // could access memory beyond the end of the old polling area. 2285 mask = lck->lk.mask; // atomic load 2286 polls = lck->lk.polls; // atomic load 2287 } 2288 2289 // Critical section starts here 2290 KMP_FSYNC_ACQUIRED(lck); 2291 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2292 ticket, lck)); 2293 lck->lk.now_serving = ticket; // non-volatile store 2294 2295 // Deallocate a garbage polling area if we know that we are the last 2296 // thread that could possibly access it. 2297 // 2298 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2299 // ticket. 2300 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2301 __kmp_free(lck->lk.old_polls); 2302 lck->lk.old_polls = NULL; 2303 lck->lk.cleanup_ticket = 0; 2304 } 2305 2306 // Check to see if we should reconfigure the polling area. 2307 // If there is still a garbage polling area to be deallocated from a 2308 // previous reconfiguration, let a later thread reconfigure it. 2309 if (lck->lk.old_polls == NULL) { 2310 bool reconfigure = false; 2311 std::atomic<kmp_uint64> *old_polls = polls; 2312 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2313 2314 if (TCR_4(__kmp_nth) > 2315 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2316 // We are in oversubscription mode. Contract the polling area 2317 // down to a single location, if that hasn't been done already. 2318 if (num_polls > 1) { 2319 reconfigure = true; 2320 num_polls = TCR_4(lck->lk.num_polls); 2321 mask = 0; 2322 num_polls = 1; 2323 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2324 sizeof(*polls)); 2325 polls[0] = ticket; 2326 } 2327 } else { 2328 // We are in under/fully subscribed mode. Check the number of 2329 // threads waiting on the lock. The size of the polling area 2330 // should be at least the number of threads waiting. 2331 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2332 if (num_waiting > num_polls) { 2333 kmp_uint32 old_num_polls = num_polls; 2334 reconfigure = true; 2335 do { 2336 mask = (mask << 1) | 1; 2337 num_polls *= 2; 2338 } while (num_polls <= num_waiting); 2339 2340 // Allocate the new polling area, and copy the relevant portion 2341 // of the old polling area to the new area. __kmp_allocate() 2342 // zeroes the memory it allocates, and most of the old area is 2343 // just zero padding, so we only copy the release counters. 2344 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2345 sizeof(*polls)); 2346 kmp_uint32 i; 2347 for (i = 0; i < old_num_polls; i++) { 2348 polls[i].store(old_polls[i]); 2349 } 2350 } 2351 } 2352 2353 if (reconfigure) { 2354 // Now write the updated fields back to the lock structure. 2355 // 2356 // Make certain that "polls" is written before "mask" !!! 2357 // 2358 // If another thread picks up the new value of mask and the old polls 2359 // pointer , it could access memory beyond the end of the old polling 2360 // area. 2361 // 2362 // On x86, we need memory fences. 2363 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2364 "lock %p to %d polls\n", 2365 ticket, lck, num_polls)); 2366 2367 lck->lk.old_polls = old_polls; 2368 lck->lk.polls = polls; // atomic store 2369 2370 KMP_MB(); 2371 2372 lck->lk.num_polls = num_polls; 2373 lck->lk.mask = mask; // atomic store 2374 2375 KMP_MB(); 2376 2377 // Only after the new polling area and mask have been flushed 2378 // to main memory can we update the cleanup ticket field. 2379 // 2380 // volatile load / non-volatile store 2381 lck->lk.cleanup_ticket = lck->lk.next_ticket; 2382 } 2383 } 2384 return KMP_LOCK_ACQUIRED_FIRST; 2385 } 2386 2387 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2388 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2389 ANNOTATE_DRDPA_ACQUIRED(lck); 2390 return retval; 2391 } 2392 2393 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2394 kmp_int32 gtid) { 2395 char const *const func = "omp_set_lock"; 2396 if (lck->lk.initialized != lck) { 2397 KMP_FATAL(LockIsUninitialized, func); 2398 } 2399 if (__kmp_is_drdpa_lock_nestable(lck)) { 2400 KMP_FATAL(LockNestableUsedAsSimple, func); 2401 } 2402 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2403 KMP_FATAL(LockIsAlreadyOwned, func); 2404 } 2405 2406 __kmp_acquire_drdpa_lock(lck, gtid); 2407 2408 lck->lk.owner_id = gtid + 1; 2409 return KMP_LOCK_ACQUIRED_FIRST; 2410 } 2411 2412 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2413 // First get a ticket, then read the polls pointer and the mask. 2414 // The polls pointer must be read before the mask!!! (See above) 2415 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load 2416 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2417 kmp_uint64 mask = lck->lk.mask; // atomic load 2418 if (polls[ticket & mask] == ticket) { 2419 kmp_uint64 next_ticket = ticket + 1; 2420 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, 2421 next_ticket)) { 2422 KMP_FSYNC_ACQUIRED(lck); 2423 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2424 ticket, lck)); 2425 lck->lk.now_serving = ticket; // non-volatile store 2426 2427 // Since no threads are waiting, there is no possibility that we would 2428 // want to reconfigure the polling area. We might have the cleanup ticket 2429 // value (which says that it is now safe to deallocate old_polls), but 2430 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2431 // routine isn't supposed to block, and we would risk blocks if we called 2432 // __kmp_free() to do the deallocation. 2433 return TRUE; 2434 } 2435 } 2436 return FALSE; 2437 } 2438 2439 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2440 kmp_int32 gtid) { 2441 char const *const func = "omp_test_lock"; 2442 if (lck->lk.initialized != lck) { 2443 KMP_FATAL(LockIsUninitialized, func); 2444 } 2445 if (__kmp_is_drdpa_lock_nestable(lck)) { 2446 KMP_FATAL(LockNestableUsedAsSimple, func); 2447 } 2448 2449 int retval = __kmp_test_drdpa_lock(lck, gtid); 2450 2451 if (retval) { 2452 lck->lk.owner_id = gtid + 1; 2453 } 2454 return retval; 2455 } 2456 2457 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2458 // Read the ticket value from the lock data struct, then the polls pointer and 2459 // the mask. The polls pointer must be read before the mask!!! (See above) 2460 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load 2461 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load 2462 kmp_uint64 mask = lck->lk.mask; // atomic load 2463 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2464 ticket - 1, lck)); 2465 KMP_FSYNC_RELEASING(lck); 2466 ANNOTATE_DRDPA_RELEASED(lck); 2467 polls[ticket & mask] = ticket; // atomic store 2468 return KMP_LOCK_RELEASED; 2469 } 2470 2471 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2472 kmp_int32 gtid) { 2473 char const *const func = "omp_unset_lock"; 2474 KMP_MB(); /* in case another processor initialized lock */ 2475 if (lck->lk.initialized != lck) { 2476 KMP_FATAL(LockIsUninitialized, func); 2477 } 2478 if (__kmp_is_drdpa_lock_nestable(lck)) { 2479 KMP_FATAL(LockNestableUsedAsSimple, func); 2480 } 2481 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2482 KMP_FATAL(LockUnsettingFree, func); 2483 } 2484 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2485 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2486 KMP_FATAL(LockUnsettingSetByAnother, func); 2487 } 2488 lck->lk.owner_id = 0; 2489 return __kmp_release_drdpa_lock(lck, gtid); 2490 } 2491 2492 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2493 lck->lk.location = NULL; 2494 lck->lk.mask = 0; 2495 lck->lk.num_polls = 1; 2496 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate( 2497 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2498 lck->lk.cleanup_ticket = 0; 2499 lck->lk.old_polls = NULL; 2500 lck->lk.next_ticket = 0; 2501 lck->lk.now_serving = 0; 2502 lck->lk.owner_id = 0; // no thread owns the lock. 2503 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2504 lck->lk.initialized = lck; 2505 2506 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2507 } 2508 2509 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2510 lck->lk.initialized = NULL; 2511 lck->lk.location = NULL; 2512 if (lck->lk.polls.load() != NULL) { 2513 __kmp_free(lck->lk.polls.load()); 2514 lck->lk.polls = NULL; 2515 } 2516 if (lck->lk.old_polls != NULL) { 2517 __kmp_free(lck->lk.old_polls); 2518 lck->lk.old_polls = NULL; 2519 } 2520 lck->lk.mask = 0; 2521 lck->lk.num_polls = 0; 2522 lck->lk.cleanup_ticket = 0; 2523 lck->lk.next_ticket = 0; 2524 lck->lk.now_serving = 0; 2525 lck->lk.owner_id = 0; 2526 lck->lk.depth_locked = -1; 2527 } 2528 2529 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2530 char const *const func = "omp_destroy_lock"; 2531 if (lck->lk.initialized != lck) { 2532 KMP_FATAL(LockIsUninitialized, func); 2533 } 2534 if (__kmp_is_drdpa_lock_nestable(lck)) { 2535 KMP_FATAL(LockNestableUsedAsSimple, func); 2536 } 2537 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2538 KMP_FATAL(LockStillOwned, func); 2539 } 2540 __kmp_destroy_drdpa_lock(lck); 2541 } 2542 2543 // nested drdpa ticket locks 2544 2545 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2546 KMP_DEBUG_ASSERT(gtid >= 0); 2547 2548 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2549 lck->lk.depth_locked += 1; 2550 return KMP_LOCK_ACQUIRED_NEXT; 2551 } else { 2552 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2553 ANNOTATE_DRDPA_ACQUIRED(lck); 2554 KMP_MB(); 2555 lck->lk.depth_locked = 1; 2556 KMP_MB(); 2557 lck->lk.owner_id = gtid + 1; 2558 return KMP_LOCK_ACQUIRED_FIRST; 2559 } 2560 } 2561 2562 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2563 kmp_int32 gtid) { 2564 char const *const func = "omp_set_nest_lock"; 2565 if (lck->lk.initialized != lck) { 2566 KMP_FATAL(LockIsUninitialized, func); 2567 } 2568 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2569 KMP_FATAL(LockSimpleUsedAsNestable, func); 2570 } 2571 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2572 } 2573 2574 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2575 int retval; 2576 2577 KMP_DEBUG_ASSERT(gtid >= 0); 2578 2579 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2580 retval = ++lck->lk.depth_locked; 2581 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2582 retval = 0; 2583 } else { 2584 KMP_MB(); 2585 retval = lck->lk.depth_locked = 1; 2586 KMP_MB(); 2587 lck->lk.owner_id = gtid + 1; 2588 } 2589 return retval; 2590 } 2591 2592 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2593 kmp_int32 gtid) { 2594 char const *const func = "omp_test_nest_lock"; 2595 if (lck->lk.initialized != lck) { 2596 KMP_FATAL(LockIsUninitialized, func); 2597 } 2598 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2599 KMP_FATAL(LockSimpleUsedAsNestable, func); 2600 } 2601 return __kmp_test_nested_drdpa_lock(lck, gtid); 2602 } 2603 2604 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2605 KMP_DEBUG_ASSERT(gtid >= 0); 2606 2607 KMP_MB(); 2608 if (--(lck->lk.depth_locked) == 0) { 2609 KMP_MB(); 2610 lck->lk.owner_id = 0; 2611 __kmp_release_drdpa_lock(lck, gtid); 2612 return KMP_LOCK_RELEASED; 2613 } 2614 return KMP_LOCK_STILL_HELD; 2615 } 2616 2617 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2618 kmp_int32 gtid) { 2619 char const *const func = "omp_unset_nest_lock"; 2620 KMP_MB(); /* in case another processor initialized lock */ 2621 if (lck->lk.initialized != lck) { 2622 KMP_FATAL(LockIsUninitialized, func); 2623 } 2624 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2625 KMP_FATAL(LockSimpleUsedAsNestable, func); 2626 } 2627 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2628 KMP_FATAL(LockUnsettingFree, func); 2629 } 2630 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2631 KMP_FATAL(LockUnsettingSetByAnother, func); 2632 } 2633 return __kmp_release_nested_drdpa_lock(lck, gtid); 2634 } 2635 2636 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2637 __kmp_init_drdpa_lock(lck); 2638 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2639 } 2640 2641 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2642 __kmp_destroy_drdpa_lock(lck); 2643 lck->lk.depth_locked = 0; 2644 } 2645 2646 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2647 char const *const func = "omp_destroy_nest_lock"; 2648 if (lck->lk.initialized != lck) { 2649 KMP_FATAL(LockIsUninitialized, func); 2650 } 2651 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2652 KMP_FATAL(LockSimpleUsedAsNestable, func); 2653 } 2654 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2655 KMP_FATAL(LockStillOwned, func); 2656 } 2657 __kmp_destroy_nested_drdpa_lock(lck); 2658 } 2659 2660 // access functions to fields which don't exist for all lock kinds. 2661 2662 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2663 return lck->lk.location; 2664 } 2665 2666 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2667 const ident_t *loc) { 2668 lck->lk.location = loc; 2669 } 2670 2671 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2672 return lck->lk.flags; 2673 } 2674 2675 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2676 kmp_lock_flags_t flags) { 2677 lck->lk.flags = flags; 2678 } 2679 2680 // Time stamp counter 2681 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2682 #define __kmp_tsc() __kmp_hardware_timestamp() 2683 // Runtime's default backoff parameters 2684 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2685 #else 2686 // Use nanoseconds for other platforms 2687 extern kmp_uint64 __kmp_now_nsec(); 2688 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2689 #define __kmp_tsc() __kmp_now_nsec() 2690 #endif 2691 2692 // A useful predicate for dealing with timestamps that may wrap. 2693 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2694 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2695 // Times where going clockwise is less distance than going anti-clockwise 2696 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2697 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2698 // signed(b) = 0 captures the actual difference 2699 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2700 return ((kmp_int64)b - (kmp_int64)a) > 0; 2701 } 2702 2703 // Truncated binary exponential backoff function 2704 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2705 // We could flatten this loop, but making it a nested loop gives better result 2706 kmp_uint32 i; 2707 for (i = boff->step; i > 0; i--) { 2708 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2709 do { 2710 KMP_CPU_PAUSE(); 2711 } while (before(__kmp_tsc(), goal)); 2712 } 2713 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2714 } 2715 2716 #if KMP_USE_DYNAMIC_LOCK 2717 2718 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2719 // lock word. 2720 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2721 kmp_dyna_lockseq_t seq) { 2722 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2723 KA_TRACE( 2724 20, 2725 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2726 } 2727 2728 #if KMP_USE_TSX 2729 2730 // HLE lock functions - imported from the testbed runtime. 2731 #define HLE_ACQUIRE ".byte 0xf2;" 2732 #define HLE_RELEASE ".byte 0xf3;" 2733 2734 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2735 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2736 return v; 2737 } 2738 2739 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2740 2741 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) { 2742 TCW_4(*lck, 0); 2743 } 2744 2745 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2746 // Use gtid for KMP_LOCK_BUSY if necessary 2747 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2748 int delay = 1; 2749 do { 2750 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2751 for (int i = delay; i != 0; --i) 2752 KMP_CPU_PAUSE(); 2753 delay = ((delay << 1) | 1) & 7; 2754 } 2755 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2756 } 2757 } 2758 2759 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2760 kmp_int32 gtid) { 2761 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2762 } 2763 2764 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2765 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2766 : "=m"(*lck) 2767 : "r"(KMP_LOCK_FREE(hle)) 2768 : "memory"); 2769 return KMP_LOCK_RELEASED; 2770 } 2771 2772 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2773 kmp_int32 gtid) { 2774 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2775 } 2776 2777 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2778 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2779 } 2780 2781 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2782 kmp_int32 gtid) { 2783 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2784 } 2785 2786 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2787 __kmp_init_queuing_lock(lck); 2788 } 2789 2790 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2791 __kmp_destroy_queuing_lock(lck); 2792 } 2793 2794 static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) { 2795 __kmp_destroy_queuing_lock_with_checks(lck); 2796 } 2797 2798 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2799 unsigned retries = 3, status; 2800 do { 2801 status = _xbegin(); 2802 if (status == _XBEGIN_STARTED) { 2803 if (__kmp_is_unlocked_queuing_lock(lck)) 2804 return; 2805 _xabort(0xff); 2806 } 2807 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2808 // Wait until lock becomes free 2809 while (!__kmp_is_unlocked_queuing_lock(lck)) 2810 __kmp_yield(TRUE); 2811 } else if (!(status & _XABORT_RETRY)) 2812 break; 2813 } while (retries--); 2814 2815 // Fall-back non-speculative lock (xchg) 2816 __kmp_acquire_queuing_lock(lck, gtid); 2817 } 2818 2819 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2820 kmp_int32 gtid) { 2821 __kmp_acquire_rtm_lock(lck, gtid); 2822 } 2823 2824 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2825 if (__kmp_is_unlocked_queuing_lock(lck)) { 2826 // Releasing from speculation 2827 _xend(); 2828 } else { 2829 // Releasing from a real lock 2830 __kmp_release_queuing_lock(lck, gtid); 2831 } 2832 return KMP_LOCK_RELEASED; 2833 } 2834 2835 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2836 kmp_int32 gtid) { 2837 return __kmp_release_rtm_lock(lck, gtid); 2838 } 2839 2840 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2841 unsigned retries = 3, status; 2842 do { 2843 status = _xbegin(); 2844 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2845 return 1; 2846 } 2847 if (!(status & _XABORT_RETRY)) 2848 break; 2849 } while (retries--); 2850 2851 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2852 } 2853 2854 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2855 kmp_int32 gtid) { 2856 return __kmp_test_rtm_lock(lck, gtid); 2857 } 2858 2859 #endif // KMP_USE_TSX 2860 2861 // Entry functions for indirect locks (first element of direct lock jump tables) 2862 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2863 kmp_dyna_lockseq_t tag); 2864 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2865 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2866 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2867 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2868 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2869 kmp_int32); 2870 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2871 kmp_int32); 2872 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2873 kmp_int32); 2874 2875 // Jump tables for the indirect lock functions 2876 // Only fill in the odd entries, that avoids the need to shift out the low bit 2877 2878 // init functions 2879 #define expand(l, op) 0, __kmp_init_direct_lock, 2880 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2881 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2882 #undef expand 2883 2884 // destroy functions 2885 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2886 static void (*direct_destroy[])(kmp_dyna_lock_t *) = { 2887 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2888 #undef expand 2889 #define expand(l, op) \ 2890 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks, 2891 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = { 2892 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2893 #undef expand 2894 2895 // set/acquire functions 2896 #define expand(l, op) \ 2897 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2898 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2899 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2900 #undef expand 2901 #define expand(l, op) \ 2902 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2903 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2904 __kmp_set_indirect_lock_with_checks, 0, 2905 KMP_FOREACH_D_LOCK(expand, acquire)}; 2906 #undef expand 2907 2908 // unset/release and test functions 2909 #define expand(l, op) \ 2910 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2911 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2912 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2913 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2914 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2915 #undef expand 2916 #define expand(l, op) \ 2917 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2918 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2919 __kmp_unset_indirect_lock_with_checks, 0, 2920 KMP_FOREACH_D_LOCK(expand, release)}; 2921 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2922 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2923 #undef expand 2924 2925 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2926 void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0; 2927 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2928 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2929 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2930 2931 // Jump tables for the indirect lock functions 2932 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2933 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2934 KMP_FOREACH_I_LOCK(expand, init)}; 2935 #undef expand 2936 2937 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2938 static void (*indirect_destroy[])(kmp_user_lock_p) = { 2939 KMP_FOREACH_I_LOCK(expand, destroy)}; 2940 #undef expand 2941 #define expand(l, op) \ 2942 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks, 2943 static void (*indirect_destroy_check[])(kmp_user_lock_p) = { 2944 KMP_FOREACH_I_LOCK(expand, destroy)}; 2945 #undef expand 2946 2947 // set/acquire functions 2948 #define expand(l, op) \ 2949 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2950 static int (*indirect_set[])(kmp_user_lock_p, 2951 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 2952 #undef expand 2953 #define expand(l, op) \ 2954 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2955 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2956 KMP_FOREACH_I_LOCK(expand, acquire)}; 2957 #undef expand 2958 2959 // unset/release and test functions 2960 #define expand(l, op) \ 2961 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2962 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 2963 KMP_FOREACH_I_LOCK(expand, release)}; 2964 static int (*indirect_test[])(kmp_user_lock_p, 2965 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 2966 #undef expand 2967 #define expand(l, op) \ 2968 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2969 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 2970 KMP_FOREACH_I_LOCK(expand, release)}; 2971 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 2972 KMP_FOREACH_I_LOCK(expand, test)}; 2973 #undef expand 2974 2975 // Exposes only one jump tables (*lock or *lock_with_checks). 2976 void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0; 2977 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 2978 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 2979 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 2980 2981 // Lock index table. 2982 kmp_indirect_lock_table_t __kmp_i_lock_table; 2983 2984 // Size of indirect locks. 2985 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 2986 2987 // Jump tables for lock accessor/modifier. 2988 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 2989 const ident_t *) = {0}; 2990 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 2991 kmp_lock_flags_t) = {0}; 2992 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 2993 kmp_user_lock_p) = {0}; 2994 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 2995 kmp_user_lock_p) = {0}; 2996 2997 // Use different lock pools for different lock types. 2998 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 2999 3000 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3001 // the indirect lock table holds the address and type of the allocated indrect 3002 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3003 // full. A destroyed indirect lock object is returned to the reusable pool of 3004 // locks, unique to each lock type. 3005 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3006 kmp_int32 gtid, 3007 kmp_indirect_locktag_t tag) { 3008 kmp_indirect_lock_t *lck; 3009 kmp_lock_index_t idx; 3010 3011 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3012 3013 if (__kmp_indirect_lock_pool[tag] != NULL) { 3014 // Reuse the allocated and destroyed lock object 3015 lck = __kmp_indirect_lock_pool[tag]; 3016 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3017 idx = lck->lock->pool.index; 3018 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3019 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3020 lck)); 3021 } else { 3022 idx = __kmp_i_lock_table.next; 3023 // Check capacity and double the size if it is full 3024 if (idx == __kmp_i_lock_table.size) { 3025 // Double up the space for block pointers 3026 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3027 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 3028 2 * row * sizeof(kmp_indirect_lock_t *)); 3029 KMP_MEMCPY(new_table, __kmp_i_lock_table.table, 3030 row * sizeof(kmp_indirect_lock_t *)); 3031 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3032 __kmp_i_lock_table.table = new_table; 3033 __kmp_free(old_table); 3034 // Allocate new objects in the new blocks 3035 for (int i = row; i < 2 * row; ++i) 3036 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3037 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3038 __kmp_i_lock_table.size = 2 * idx; 3039 } 3040 __kmp_i_lock_table.next++; 3041 lck = KMP_GET_I_LOCK(idx); 3042 // Allocate a new base lock object 3043 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3044 KA_TRACE(20, 3045 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3046 } 3047 3048 __kmp_release_lock(&__kmp_global_lock, gtid); 3049 3050 lck->type = tag; 3051 3052 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3053 *((kmp_lock_index_t *)user_lock) = idx 3054 << 1; // indirect lock word must be even 3055 } else { 3056 *((kmp_indirect_lock_t **)user_lock) = lck; 3057 } 3058 3059 return lck; 3060 } 3061 3062 // User lock lookup for dynamically dispatched locks. 3063 static __forceinline kmp_indirect_lock_t * 3064 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3065 if (__kmp_env_consistency_check) { 3066 kmp_indirect_lock_t *lck = NULL; 3067 if (user_lock == NULL) { 3068 KMP_FATAL(LockIsUninitialized, func); 3069 } 3070 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3071 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3072 if (idx >= __kmp_i_lock_table.size) { 3073 KMP_FATAL(LockIsUninitialized, func); 3074 } 3075 lck = KMP_GET_I_LOCK(idx); 3076 } else { 3077 lck = *((kmp_indirect_lock_t **)user_lock); 3078 } 3079 if (lck == NULL) { 3080 KMP_FATAL(LockIsUninitialized, func); 3081 } 3082 return lck; 3083 } else { 3084 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3085 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3086 } else { 3087 return *((kmp_indirect_lock_t **)user_lock); 3088 } 3089 } 3090 } 3091 3092 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3093 kmp_dyna_lockseq_t seq) { 3094 #if KMP_USE_ADAPTIVE_LOCKS 3095 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3096 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3097 seq = lockseq_queuing; 3098 } 3099 #endif 3100 #if KMP_USE_TSX 3101 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3102 seq = lockseq_queuing; 3103 } 3104 #endif 3105 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3106 kmp_indirect_lock_t *l = 3107 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3108 KMP_I_LOCK_FUNC(l, init)(l->lock); 3109 KA_TRACE( 3110 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3111 seq)); 3112 } 3113 3114 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3115 kmp_uint32 gtid = __kmp_entry_gtid(); 3116 kmp_indirect_lock_t *l = 3117 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3118 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3119 kmp_indirect_locktag_t tag = l->type; 3120 3121 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3122 3123 // Use the base lock's space to keep the pool chain. 3124 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3125 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3126 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3127 } 3128 __kmp_indirect_lock_pool[tag] = l; 3129 3130 __kmp_release_lock(&__kmp_global_lock, gtid); 3131 } 3132 3133 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3134 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3135 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3136 } 3137 3138 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3139 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3140 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3141 } 3142 3143 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3144 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3145 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3146 } 3147 3148 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3149 kmp_int32 gtid) { 3150 kmp_indirect_lock_t *l = 3151 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3152 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3153 } 3154 3155 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3156 kmp_int32 gtid) { 3157 kmp_indirect_lock_t *l = 3158 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3159 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3160 } 3161 3162 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3163 kmp_int32 gtid) { 3164 kmp_indirect_lock_t *l = 3165 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3166 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3167 } 3168 3169 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3170 3171 // This is used only in kmp_error.cpp when consistency checking is on. 3172 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3173 switch (seq) { 3174 case lockseq_tas: 3175 case lockseq_nested_tas: 3176 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3177 #if KMP_USE_FUTEX 3178 case lockseq_futex: 3179 case lockseq_nested_futex: 3180 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3181 #endif 3182 case lockseq_ticket: 3183 case lockseq_nested_ticket: 3184 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3185 case lockseq_queuing: 3186 case lockseq_nested_queuing: 3187 #if KMP_USE_ADAPTIVE_LOCKS 3188 case lockseq_adaptive: 3189 #endif 3190 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3191 case lockseq_drdpa: 3192 case lockseq_nested_drdpa: 3193 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3194 default: 3195 return 0; 3196 } 3197 } 3198 3199 // Initializes data for dynamic user locks. 3200 void __kmp_init_dynamic_user_locks() { 3201 // Initialize jump table for the lock functions 3202 if (__kmp_env_consistency_check) { 3203 __kmp_direct_set = direct_set_check; 3204 __kmp_direct_unset = direct_unset_check; 3205 __kmp_direct_test = direct_test_check; 3206 __kmp_direct_destroy = direct_destroy_check; 3207 __kmp_indirect_set = indirect_set_check; 3208 __kmp_indirect_unset = indirect_unset_check; 3209 __kmp_indirect_test = indirect_test_check; 3210 __kmp_indirect_destroy = indirect_destroy_check; 3211 } else { 3212 __kmp_direct_set = direct_set; 3213 __kmp_direct_unset = direct_unset; 3214 __kmp_direct_test = direct_test; 3215 __kmp_direct_destroy = direct_destroy; 3216 __kmp_indirect_set = indirect_set; 3217 __kmp_indirect_unset = indirect_unset; 3218 __kmp_indirect_test = indirect_test; 3219 __kmp_indirect_destroy = indirect_destroy; 3220 } 3221 // If the user locks have already been initialized, then return. Allow the 3222 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3223 // new lock tables if they have already been allocated. 3224 if (__kmp_init_user_locks) 3225 return; 3226 3227 // Initialize lock index table 3228 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3229 __kmp_i_lock_table.table = 3230 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3231 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3232 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3233 __kmp_i_lock_table.next = 0; 3234 3235 // Indirect lock size 3236 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3237 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3238 #if KMP_USE_ADAPTIVE_LOCKS 3239 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3240 #endif 3241 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3242 #if KMP_USE_TSX 3243 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3244 #endif 3245 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3246 #if KMP_USE_FUTEX 3247 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3248 #endif 3249 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3250 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3251 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3252 3253 // Initialize lock accessor/modifier 3254 #define fill_jumps(table, expand, sep) \ 3255 { \ 3256 table[locktag##sep##ticket] = expand(ticket); \ 3257 table[locktag##sep##queuing] = expand(queuing); \ 3258 table[locktag##sep##drdpa] = expand(drdpa); \ 3259 } 3260 3261 #if KMP_USE_ADAPTIVE_LOCKS 3262 #define fill_table(table, expand) \ 3263 { \ 3264 fill_jumps(table, expand, _); \ 3265 table[locktag_adaptive] = expand(queuing); \ 3266 fill_jumps(table, expand, _nested_); \ 3267 } 3268 #else 3269 #define fill_table(table, expand) \ 3270 { \ 3271 fill_jumps(table, expand, _); \ 3272 fill_jumps(table, expand, _nested_); \ 3273 } 3274 #endif // KMP_USE_ADAPTIVE_LOCKS 3275 3276 #define expand(l) \ 3277 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3278 fill_table(__kmp_indirect_set_location, expand); 3279 #undef expand 3280 #define expand(l) \ 3281 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3282 fill_table(__kmp_indirect_set_flags, expand); 3283 #undef expand 3284 #define expand(l) \ 3285 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3286 fill_table(__kmp_indirect_get_location, expand); 3287 #undef expand 3288 #define expand(l) \ 3289 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3290 fill_table(__kmp_indirect_get_flags, expand); 3291 #undef expand 3292 3293 __kmp_init_user_locks = TRUE; 3294 } 3295 3296 // Clean up the lock table. 3297 void __kmp_cleanup_indirect_user_locks() { 3298 kmp_lock_index_t i; 3299 int k; 3300 3301 // Clean up locks in the pools first (they were already destroyed before going 3302 // into the pools). 3303 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3304 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3305 while (l != NULL) { 3306 kmp_indirect_lock_t *ll = l; 3307 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3308 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3309 ll)); 3310 __kmp_free(ll->lock); 3311 ll->lock = NULL; 3312 } 3313 __kmp_indirect_lock_pool[k] = NULL; 3314 } 3315 // Clean up the remaining undestroyed locks. 3316 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3317 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3318 if (l->lock != NULL) { 3319 // Locks not destroyed explicitly need to be destroyed here. 3320 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3321 KA_TRACE( 3322 20, 3323 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3324 l)); 3325 __kmp_free(l->lock); 3326 } 3327 } 3328 // Free the table 3329 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3330 __kmp_free(__kmp_i_lock_table.table[i]); 3331 __kmp_free(__kmp_i_lock_table.table); 3332 3333 __kmp_init_user_locks = FALSE; 3334 } 3335 3336 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3337 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3338 3339 #else // KMP_USE_DYNAMIC_LOCK 3340 3341 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3342 __kmp_init_tas_lock(lck); 3343 } 3344 3345 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3346 __kmp_init_nested_tas_lock(lck); 3347 } 3348 3349 #if KMP_USE_FUTEX 3350 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3351 __kmp_init_futex_lock(lck); 3352 } 3353 3354 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3355 __kmp_init_nested_futex_lock(lck); 3356 } 3357 #endif 3358 3359 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 3360 return lck == lck->lk.initialized; 3361 } 3362 3363 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3364 __kmp_init_ticket_lock(lck); 3365 } 3366 3367 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3368 __kmp_init_nested_ticket_lock(lck); 3369 } 3370 3371 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 3372 return lck == lck->lk.initialized; 3373 } 3374 3375 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3376 __kmp_init_queuing_lock(lck); 3377 } 3378 3379 static void 3380 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3381 __kmp_init_nested_queuing_lock(lck); 3382 } 3383 3384 #if KMP_USE_ADAPTIVE_LOCKS 3385 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 3386 __kmp_init_adaptive_lock(lck); 3387 } 3388 #endif 3389 3390 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 3391 return lck == lck->lk.initialized; 3392 } 3393 3394 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3395 __kmp_init_drdpa_lock(lck); 3396 } 3397 3398 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3399 __kmp_init_nested_drdpa_lock(lck); 3400 } 3401 3402 /* user locks 3403 * They are implemented as a table of function pointers which are set to the 3404 * lock functions of the appropriate kind, once that has been determined. */ 3405 3406 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3407 3408 size_t __kmp_base_user_lock_size = 0; 3409 size_t __kmp_user_lock_size = 0; 3410 3411 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3412 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3413 kmp_int32 gtid) = NULL; 3414 3415 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3416 kmp_int32 gtid) = NULL; 3417 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3418 kmp_int32 gtid) = NULL; 3419 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3420 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3421 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3422 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3423 kmp_int32 gtid) = NULL; 3424 3425 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3426 kmp_int32 gtid) = NULL; 3427 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3428 kmp_int32 gtid) = NULL; 3429 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3430 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3431 3432 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3433 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3434 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3435 const ident_t *loc) = NULL; 3436 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3437 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3438 kmp_lock_flags_t flags) = NULL; 3439 3440 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3441 switch (user_lock_kind) { 3442 case lk_default: 3443 default: 3444 KMP_ASSERT(0); 3445 3446 case lk_tas: { 3447 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3448 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3449 3450 __kmp_get_user_lock_owner_ = 3451 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3452 3453 if (__kmp_env_consistency_check) { 3454 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3455 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3456 } else { 3457 KMP_BIND_USER_LOCK(tas); 3458 KMP_BIND_NESTED_USER_LOCK(tas); 3459 } 3460 3461 __kmp_destroy_user_lock_ = 3462 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3463 3464 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3465 3466 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3467 3468 __kmp_set_user_lock_location_ = 3469 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3470 3471 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3472 3473 __kmp_set_user_lock_flags_ = 3474 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3475 } break; 3476 3477 #if KMP_USE_FUTEX 3478 3479 case lk_futex: { 3480 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3481 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3482 3483 __kmp_get_user_lock_owner_ = 3484 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3485 3486 if (__kmp_env_consistency_check) { 3487 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3488 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3489 } else { 3490 KMP_BIND_USER_LOCK(futex); 3491 KMP_BIND_NESTED_USER_LOCK(futex); 3492 } 3493 3494 __kmp_destroy_user_lock_ = 3495 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3496 3497 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3498 3499 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3500 3501 __kmp_set_user_lock_location_ = 3502 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3503 3504 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3505 3506 __kmp_set_user_lock_flags_ = 3507 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3508 } break; 3509 3510 #endif // KMP_USE_FUTEX 3511 3512 case lk_ticket: { 3513 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3514 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3515 3516 __kmp_get_user_lock_owner_ = 3517 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3518 3519 if (__kmp_env_consistency_check) { 3520 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3521 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3522 } else { 3523 KMP_BIND_USER_LOCK(ticket); 3524 KMP_BIND_NESTED_USER_LOCK(ticket); 3525 } 3526 3527 __kmp_destroy_user_lock_ = 3528 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3529 3530 __kmp_is_user_lock_initialized_ = 3531 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3532 3533 __kmp_get_user_lock_location_ = 3534 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3535 3536 __kmp_set_user_lock_location_ = (void (*)( 3537 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3538 3539 __kmp_get_user_lock_flags_ = 3540 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3541 3542 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3543 &__kmp_set_ticket_lock_flags); 3544 } break; 3545 3546 case lk_queuing: { 3547 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3548 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3549 3550 __kmp_get_user_lock_owner_ = 3551 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3552 3553 if (__kmp_env_consistency_check) { 3554 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3555 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3556 } else { 3557 KMP_BIND_USER_LOCK(queuing); 3558 KMP_BIND_NESTED_USER_LOCK(queuing); 3559 } 3560 3561 __kmp_destroy_user_lock_ = 3562 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3563 3564 __kmp_is_user_lock_initialized_ = 3565 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3566 3567 __kmp_get_user_lock_location_ = 3568 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3569 3570 __kmp_set_user_lock_location_ = (void (*)( 3571 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3572 3573 __kmp_get_user_lock_flags_ = 3574 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3575 3576 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3577 &__kmp_set_queuing_lock_flags); 3578 } break; 3579 3580 #if KMP_USE_ADAPTIVE_LOCKS 3581 case lk_adaptive: { 3582 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3583 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3584 3585 __kmp_get_user_lock_owner_ = 3586 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3587 3588 if (__kmp_env_consistency_check) { 3589 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3590 } else { 3591 KMP_BIND_USER_LOCK(adaptive); 3592 } 3593 3594 __kmp_destroy_user_lock_ = 3595 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3596 3597 __kmp_is_user_lock_initialized_ = 3598 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3599 3600 __kmp_get_user_lock_location_ = 3601 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3602 3603 __kmp_set_user_lock_location_ = (void (*)( 3604 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3605 3606 __kmp_get_user_lock_flags_ = 3607 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3608 3609 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3610 &__kmp_set_queuing_lock_flags); 3611 3612 } break; 3613 #endif // KMP_USE_ADAPTIVE_LOCKS 3614 3615 case lk_drdpa: { 3616 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3617 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3618 3619 __kmp_get_user_lock_owner_ = 3620 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3621 3622 if (__kmp_env_consistency_check) { 3623 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3624 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3625 } else { 3626 KMP_BIND_USER_LOCK(drdpa); 3627 KMP_BIND_NESTED_USER_LOCK(drdpa); 3628 } 3629 3630 __kmp_destroy_user_lock_ = 3631 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3632 3633 __kmp_is_user_lock_initialized_ = 3634 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3635 3636 __kmp_get_user_lock_location_ = 3637 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3638 3639 __kmp_set_user_lock_location_ = (void (*)( 3640 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3641 3642 __kmp_get_user_lock_flags_ = 3643 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3644 3645 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3646 &__kmp_set_drdpa_lock_flags); 3647 } break; 3648 } 3649 } 3650 3651 // ---------------------------------------------------------------------------- 3652 // User lock table & lock allocation 3653 3654 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3655 kmp_user_lock_p __kmp_lock_pool = NULL; 3656 3657 // Lock block-allocation support. 3658 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3659 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3660 3661 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3662 // Assume that kmp_global_lock is held upon entry/exit. 3663 kmp_lock_index_t index; 3664 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3665 kmp_lock_index_t size; 3666 kmp_user_lock_p *table; 3667 // Reallocate lock table. 3668 if (__kmp_user_lock_table.allocated == 0) { 3669 size = 1024; 3670 } else { 3671 size = __kmp_user_lock_table.allocated * 2; 3672 } 3673 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3674 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3675 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3676 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3677 // We cannot free the previous table now, since it may be in use by other 3678 // threads. So save the pointer to the previous table in in the first 3679 // element of the new table. All the tables will be organized into a list, 3680 // and could be freed when library shutting down. 3681 __kmp_user_lock_table.table = table; 3682 __kmp_user_lock_table.allocated = size; 3683 } 3684 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3685 __kmp_user_lock_table.allocated); 3686 index = __kmp_user_lock_table.used; 3687 __kmp_user_lock_table.table[index] = lck; 3688 ++__kmp_user_lock_table.used; 3689 return index; 3690 } 3691 3692 static kmp_user_lock_p __kmp_lock_block_allocate() { 3693 // Assume that kmp_global_lock is held upon entry/exit. 3694 static int last_index = 0; 3695 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3696 // Restart the index. 3697 last_index = 0; 3698 // Need to allocate a new block. 3699 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3700 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3701 char *buffer = 3702 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3703 // Set up the new block. 3704 kmp_block_of_locks *new_block = 3705 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3706 new_block->next_block = __kmp_lock_blocks; 3707 new_block->locks = (void *)buffer; 3708 // Publish the new block. 3709 KMP_MB(); 3710 __kmp_lock_blocks = new_block; 3711 } 3712 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3713 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3714 last_index++; 3715 return ret; 3716 } 3717 3718 // Get memory for a lock. It may be freshly allocated memory or reused memory 3719 // from lock pool. 3720 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3721 kmp_lock_flags_t flags) { 3722 kmp_user_lock_p lck; 3723 kmp_lock_index_t index; 3724 KMP_DEBUG_ASSERT(user_lock); 3725 3726 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3727 3728 if (__kmp_lock_pool == NULL) { 3729 // Lock pool is empty. Allocate new memory. 3730 3731 // ANNOTATION: Found no good way to express the syncronisation 3732 // between allocation and usage, so ignore the allocation 3733 ANNOTATE_IGNORE_WRITES_BEGIN(); 3734 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3735 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3736 } else { 3737 lck = __kmp_lock_block_allocate(); 3738 } 3739 ANNOTATE_IGNORE_WRITES_END(); 3740 3741 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3742 // and debugger has info on all allocated locks. 3743 index = __kmp_lock_table_insert(lck); 3744 } else { 3745 // Pick up lock from pool. 3746 lck = __kmp_lock_pool; 3747 index = __kmp_lock_pool->pool.index; 3748 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3749 } 3750 3751 // We could potentially differentiate between nested and regular locks 3752 // here, and do the lock table lookup for regular locks only. 3753 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3754 *((kmp_lock_index_t *)user_lock) = index; 3755 } else { 3756 *((kmp_user_lock_p *)user_lock) = lck; 3757 } 3758 3759 // mark the lock if it is critical section lock. 3760 __kmp_set_user_lock_flags(lck, flags); 3761 3762 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3763 3764 return lck; 3765 } 3766 3767 // Put lock's memory to pool for reusing. 3768 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3769 kmp_user_lock_p lck) { 3770 KMP_DEBUG_ASSERT(user_lock != NULL); 3771 KMP_DEBUG_ASSERT(lck != NULL); 3772 3773 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3774 3775 lck->pool.next = __kmp_lock_pool; 3776 __kmp_lock_pool = lck; 3777 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3778 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3779 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3780 lck->pool.index = index; 3781 } 3782 3783 __kmp_release_lock(&__kmp_global_lock, gtid); 3784 } 3785 3786 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3787 kmp_user_lock_p lck = NULL; 3788 3789 if (__kmp_env_consistency_check) { 3790 if (user_lock == NULL) { 3791 KMP_FATAL(LockIsUninitialized, func); 3792 } 3793 } 3794 3795 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3796 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3797 if (__kmp_env_consistency_check) { 3798 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3799 KMP_FATAL(LockIsUninitialized, func); 3800 } 3801 } 3802 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3803 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3804 lck = __kmp_user_lock_table.table[index]; 3805 } else { 3806 lck = *((kmp_user_lock_p *)user_lock); 3807 } 3808 3809 if (__kmp_env_consistency_check) { 3810 if (lck == NULL) { 3811 KMP_FATAL(LockIsUninitialized, func); 3812 } 3813 } 3814 3815 return lck; 3816 } 3817 3818 void __kmp_cleanup_user_locks(void) { 3819 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3820 // iterating through lock table (it includes all the locks, dead or alive). 3821 __kmp_lock_pool = NULL; 3822 3823 #define IS_CRITICAL(lck) \ 3824 ((__kmp_get_user_lock_flags_ != NULL) && \ 3825 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3826 3827 // Loop through lock table, free all locks. 3828 // Do not free item [0], it is reserved for lock tables list. 3829 // 3830 // FIXME - we are iterating through a list of (pointers to) objects of type 3831 // union kmp_user_lock, but we have no way of knowing whether the base type is 3832 // currently "pool" or whatever the global user lock type is. 3833 // 3834 // We are relying on the fact that for all of the user lock types 3835 // (except "tas"), the first field in the lock struct is the "initialized" 3836 // field, which is set to the address of the lock object itself when 3837 // the lock is initialized. When the union is of type "pool", the 3838 // first field is a pointer to the next object in the free list, which 3839 // will not be the same address as the object itself. 3840 // 3841 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3842 // for "pool" objects on the free list. This must happen as the "location" 3843 // field of real user locks overlaps the "index" field of "pool" objects. 3844 // 3845 // It would be better to run through the free list, and remove all "pool" 3846 // objects from the lock table before executing this loop. However, 3847 // "pool" objects do not always have their index field set (only on 3848 // lin_32e), and I don't want to search the lock table for the address 3849 // of every "pool" object on the free list. 3850 while (__kmp_user_lock_table.used > 1) { 3851 const ident *loc; 3852 3853 // reduce __kmp_user_lock_table.used before freeing the lock, 3854 // so that state of locks is consistent 3855 kmp_user_lock_p lck = 3856 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3857 3858 if ((__kmp_is_user_lock_initialized_ != NULL) && 3859 (*__kmp_is_user_lock_initialized_)(lck)) { 3860 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3861 // it is NOT a critical section (user is not responsible for destroying 3862 // criticals) AND we know source location to report. 3863 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3864 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3865 (loc->psource != NULL)) { 3866 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3867 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3868 __kmp_str_loc_free(&str_loc); 3869 } 3870 3871 #ifdef KMP_DEBUG 3872 if (IS_CRITICAL(lck)) { 3873 KA_TRACE( 3874 20, 3875 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3876 lck, *(void **)lck)); 3877 } else { 3878 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3879 *(void **)lck)); 3880 } 3881 #endif // KMP_DEBUG 3882 3883 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3884 __kmp_destroy_user_lock(lck); 3885 } 3886 3887 // Free the lock if block allocation of locks is not used. 3888 if (__kmp_lock_blocks == NULL) { 3889 __kmp_free(lck); 3890 } 3891 } 3892 3893 #undef IS_CRITICAL 3894 3895 // delete lock table(s). 3896 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3897 __kmp_user_lock_table.table = NULL; 3898 __kmp_user_lock_table.allocated = 0; 3899 3900 while (table_ptr != NULL) { 3901 // In the first element we saved the pointer to the previous 3902 // (smaller) lock table. 3903 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3904 __kmp_free(table_ptr); 3905 table_ptr = next; 3906 } 3907 3908 // Free buffers allocated for blocks of locks. 3909 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3910 __kmp_lock_blocks = NULL; 3911 3912 while (block_ptr != NULL) { 3913 kmp_block_of_locks_t *next = block_ptr->next_block; 3914 __kmp_free(block_ptr->locks); 3915 // *block_ptr itself was allocated at the end of the locks vector. 3916 block_ptr = next; 3917 } 3918 3919 TCW_4(__kmp_init_user_locks, FALSE); 3920 } 3921 3922 #endif // KMP_USE_DYNAMIC_LOCK 3923