1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <stddef.h> 14 #include <atomic> 15 16 #include "kmp.h" 17 #include "kmp_i18n.h" 18 #include "kmp_io.h" 19 #include "kmp_itt.h" 20 #include "kmp_lock.h" 21 #include "kmp_wait_release.h" 22 #include "kmp_wrapper_getpid.h" 23 24 #include "tsan_annotations.h" 25 26 #if KMP_USE_FUTEX 27 #include <sys/syscall.h> 28 #include <unistd.h> 29 // We should really include <futex.h>, but that causes compatibility problems on 30 // different Linux* OS distributions that either require that you include (or 31 // break when you try to include) <pci/types.h>. Since all we need is the two 32 // macros below (which are part of the kernel ABI, so can't change) we just 33 // define the constants here and don't include <futex.h> 34 #ifndef FUTEX_WAIT 35 #define FUTEX_WAIT 0 36 #endif 37 #ifndef FUTEX_WAKE 38 #define FUTEX_WAKE 1 39 #endif 40 #endif 41 42 /* Implement spin locks for internal library use. */ 43 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 44 45 void __kmp_validate_locks(void) { 46 int i; 47 kmp_uint32 x, y; 48 49 /* Check to make sure unsigned arithmetic does wraps properly */ 50 x = ~((kmp_uint32)0) - 2; 51 y = x - 2; 52 53 for (i = 0; i < 8; ++i, ++x, ++y) { 54 kmp_uint32 z = (x - y); 55 KMP_ASSERT(z == 2); 56 } 57 58 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 59 } 60 61 /* ------------------------------------------------------------------------ */ 62 /* test and set locks */ 63 64 // For the non-nested locks, we can only assume that the first 4 bytes were 65 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 66 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 67 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 68 // 69 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 70 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 71 72 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 73 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; 74 } 75 76 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 77 return lck->lk.depth_locked != -1; 78 } 79 80 __forceinline static int 81 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 82 KMP_MB(); 83 84 #ifdef USE_LOCK_PROFILE 85 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); 86 if ((curr != 0) && (curr != gtid + 1)) 87 __kmp_printf("LOCK CONTENTION: %p\n", lck); 88 /* else __kmp_printf( "." );*/ 89 #endif /* USE_LOCK_PROFILE */ 90 91 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 92 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 93 94 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 95 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 96 KMP_FSYNC_ACQUIRED(lck); 97 return KMP_LOCK_ACQUIRED_FIRST; 98 } 99 100 kmp_uint32 spins; 101 KMP_FSYNC_PREPARE(lck); 102 KMP_INIT_YIELD(spins); 103 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 104 KMP_YIELD(TRUE); 105 } else { 106 KMP_YIELD_SPIN(spins); 107 } 108 109 kmp_backoff_t backoff = __kmp_spin_backoff_params; 110 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || 111 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 112 __kmp_spin_backoff(&backoff); 113 if (TCR_4(__kmp_nth) > 114 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 115 KMP_YIELD(TRUE); 116 } else { 117 KMP_YIELD_SPIN(spins); 118 } 119 } 120 KMP_FSYNC_ACQUIRED(lck); 121 return KMP_LOCK_ACQUIRED_FIRST; 122 } 123 124 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 125 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 126 ANNOTATE_TAS_ACQUIRED(lck); 127 return retval; 128 } 129 130 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 131 kmp_int32 gtid) { 132 char const *const func = "omp_set_lock"; 133 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 134 __kmp_is_tas_lock_nestable(lck)) { 135 KMP_FATAL(LockNestableUsedAsSimple, func); 136 } 137 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 138 KMP_FATAL(LockIsAlreadyOwned, func); 139 } 140 return __kmp_acquire_tas_lock(lck, gtid); 141 } 142 143 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 144 kmp_int32 tas_free = KMP_LOCK_FREE(tas); 145 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 146 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 147 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 148 KMP_FSYNC_ACQUIRED(lck); 149 return TRUE; 150 } 151 return FALSE; 152 } 153 154 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 155 kmp_int32 gtid) { 156 char const *const func = "omp_test_lock"; 157 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 158 __kmp_is_tas_lock_nestable(lck)) { 159 KMP_FATAL(LockNestableUsedAsSimple, func); 160 } 161 return __kmp_test_tas_lock(lck, gtid); 162 } 163 164 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 165 KMP_MB(); /* Flush all pending memory write invalidates. */ 166 167 KMP_FSYNC_RELEASING(lck); 168 ANNOTATE_TAS_RELEASED(lck); 169 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); 170 KMP_MB(); /* Flush all pending memory write invalidates. */ 171 172 KMP_YIELD(TCR_4(__kmp_nth) > 173 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 174 return KMP_LOCK_RELEASED; 175 } 176 177 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 178 kmp_int32 gtid) { 179 char const *const func = "omp_unset_lock"; 180 KMP_MB(); /* in case another processor initialized lock */ 181 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 182 __kmp_is_tas_lock_nestable(lck)) { 183 KMP_FATAL(LockNestableUsedAsSimple, func); 184 } 185 if (__kmp_get_tas_lock_owner(lck) == -1) { 186 KMP_FATAL(LockUnsettingFree, func); 187 } 188 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 189 (__kmp_get_tas_lock_owner(lck) != gtid)) { 190 KMP_FATAL(LockUnsettingSetByAnother, func); 191 } 192 return __kmp_release_tas_lock(lck, gtid); 193 } 194 195 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 196 lck->lk.poll = KMP_LOCK_FREE(tas); 197 } 198 199 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 200 201 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 202 char const *const func = "omp_destroy_lock"; 203 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 204 __kmp_is_tas_lock_nestable(lck)) { 205 KMP_FATAL(LockNestableUsedAsSimple, func); 206 } 207 if (__kmp_get_tas_lock_owner(lck) != -1) { 208 KMP_FATAL(LockStillOwned, func); 209 } 210 __kmp_destroy_tas_lock(lck); 211 } 212 213 // nested test and set locks 214 215 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 216 KMP_DEBUG_ASSERT(gtid >= 0); 217 218 if (__kmp_get_tas_lock_owner(lck) == gtid) { 219 lck->lk.depth_locked += 1; 220 return KMP_LOCK_ACQUIRED_NEXT; 221 } else { 222 __kmp_acquire_tas_lock_timed_template(lck, gtid); 223 ANNOTATE_TAS_ACQUIRED(lck); 224 lck->lk.depth_locked = 1; 225 return KMP_LOCK_ACQUIRED_FIRST; 226 } 227 } 228 229 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 230 kmp_int32 gtid) { 231 char const *const func = "omp_set_nest_lock"; 232 if (!__kmp_is_tas_lock_nestable(lck)) { 233 KMP_FATAL(LockSimpleUsedAsNestable, func); 234 } 235 return __kmp_acquire_nested_tas_lock(lck, gtid); 236 } 237 238 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 239 int retval; 240 241 KMP_DEBUG_ASSERT(gtid >= 0); 242 243 if (__kmp_get_tas_lock_owner(lck) == gtid) { 244 retval = ++lck->lk.depth_locked; 245 } else if (!__kmp_test_tas_lock(lck, gtid)) { 246 retval = 0; 247 } else { 248 KMP_MB(); 249 retval = lck->lk.depth_locked = 1; 250 } 251 return retval; 252 } 253 254 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 255 kmp_int32 gtid) { 256 char const *const func = "omp_test_nest_lock"; 257 if (!__kmp_is_tas_lock_nestable(lck)) { 258 KMP_FATAL(LockSimpleUsedAsNestable, func); 259 } 260 return __kmp_test_nested_tas_lock(lck, gtid); 261 } 262 263 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 264 KMP_DEBUG_ASSERT(gtid >= 0); 265 266 KMP_MB(); 267 if (--(lck->lk.depth_locked) == 0) { 268 __kmp_release_tas_lock(lck, gtid); 269 return KMP_LOCK_RELEASED; 270 } 271 return KMP_LOCK_STILL_HELD; 272 } 273 274 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 275 kmp_int32 gtid) { 276 char const *const func = "omp_unset_nest_lock"; 277 KMP_MB(); /* in case another processor initialized lock */ 278 if (!__kmp_is_tas_lock_nestable(lck)) { 279 KMP_FATAL(LockSimpleUsedAsNestable, func); 280 } 281 if (__kmp_get_tas_lock_owner(lck) == -1) { 282 KMP_FATAL(LockUnsettingFree, func); 283 } 284 if (__kmp_get_tas_lock_owner(lck) != gtid) { 285 KMP_FATAL(LockUnsettingSetByAnother, func); 286 } 287 return __kmp_release_nested_tas_lock(lck, gtid); 288 } 289 290 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 291 __kmp_init_tas_lock(lck); 292 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 293 } 294 295 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 296 __kmp_destroy_tas_lock(lck); 297 lck->lk.depth_locked = 0; 298 } 299 300 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 301 char const *const func = "omp_destroy_nest_lock"; 302 if (!__kmp_is_tas_lock_nestable(lck)) { 303 KMP_FATAL(LockSimpleUsedAsNestable, func); 304 } 305 if (__kmp_get_tas_lock_owner(lck) != -1) { 306 KMP_FATAL(LockStillOwned, func); 307 } 308 __kmp_destroy_nested_tas_lock(lck); 309 } 310 311 #if KMP_USE_FUTEX 312 313 /* ------------------------------------------------------------------------ */ 314 /* futex locks */ 315 316 // futex locks are really just test and set locks, with a different method 317 // of handling contention. They take the same amount of space as test and 318 // set locks, and are allocated the same way (i.e. use the area allocated by 319 // the compiler for non-nested locks / allocate nested locks on the heap). 320 321 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 322 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 323 } 324 325 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 326 return lck->lk.depth_locked != -1; 327 } 328 329 __forceinline static int 330 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 331 kmp_int32 gtid_code = (gtid + 1) << 1; 332 333 KMP_MB(); 334 335 #ifdef USE_LOCK_PROFILE 336 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 337 if ((curr != 0) && (curr != gtid_code)) 338 __kmp_printf("LOCK CONTENTION: %p\n", lck); 339 /* else __kmp_printf( "." );*/ 340 #endif /* USE_LOCK_PROFILE */ 341 342 KMP_FSYNC_PREPARE(lck); 343 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 344 lck, lck->lk.poll, gtid)); 345 346 kmp_int32 poll_val; 347 348 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 349 &(lck->lk.poll), KMP_LOCK_FREE(futex), 350 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 351 352 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 353 KA_TRACE( 354 1000, 355 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 356 lck, gtid, poll_val, cond)); 357 358 // NOTE: if you try to use the following condition for this branch 359 // 360 // if ( poll_val & 1 == 0 ) 361 // 362 // Then the 12.0 compiler has a bug where the following block will 363 // always be skipped, regardless of the value of the LSB of poll_val. 364 if (!cond) { 365 // Try to set the lsb in the poll to indicate to the owner 366 // thread that they need to wake this thread up. 367 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 368 poll_val | KMP_LOCK_BUSY(1, futex))) { 369 KA_TRACE( 370 1000, 371 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 372 lck, lck->lk.poll, gtid)); 373 continue; 374 } 375 poll_val |= KMP_LOCK_BUSY(1, futex); 376 377 KA_TRACE(1000, 378 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 379 lck->lk.poll, gtid)); 380 } 381 382 KA_TRACE( 383 1000, 384 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 385 lck, gtid, poll_val)); 386 387 kmp_int32 rc; 388 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 389 NULL, 0)) != 0) { 390 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 391 "failed (rc=%d errno=%d)\n", 392 lck, gtid, poll_val, rc, errno)); 393 continue; 394 } 395 396 KA_TRACE(1000, 397 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 398 lck, gtid, poll_val)); 399 // This thread has now done a successful futex wait call and was entered on 400 // the OS futex queue. We must now perform a futex wake call when releasing 401 // the lock, as we have no idea how many other threads are in the queue. 402 gtid_code |= 1; 403 } 404 405 KMP_FSYNC_ACQUIRED(lck); 406 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 407 lck->lk.poll, gtid)); 408 return KMP_LOCK_ACQUIRED_FIRST; 409 } 410 411 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 412 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 413 ANNOTATE_FUTEX_ACQUIRED(lck); 414 return retval; 415 } 416 417 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 418 kmp_int32 gtid) { 419 char const *const func = "omp_set_lock"; 420 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 421 __kmp_is_futex_lock_nestable(lck)) { 422 KMP_FATAL(LockNestableUsedAsSimple, func); 423 } 424 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 425 KMP_FATAL(LockIsAlreadyOwned, func); 426 } 427 return __kmp_acquire_futex_lock(lck, gtid); 428 } 429 430 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 431 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 432 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 433 KMP_FSYNC_ACQUIRED(lck); 434 return TRUE; 435 } 436 return FALSE; 437 } 438 439 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 440 kmp_int32 gtid) { 441 char const *const func = "omp_test_lock"; 442 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 443 __kmp_is_futex_lock_nestable(lck)) { 444 KMP_FATAL(LockNestableUsedAsSimple, func); 445 } 446 return __kmp_test_futex_lock(lck, gtid); 447 } 448 449 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 450 KMP_MB(); /* Flush all pending memory write invalidates. */ 451 452 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 453 lck, lck->lk.poll, gtid)); 454 455 KMP_FSYNC_RELEASING(lck); 456 ANNOTATE_FUTEX_RELEASED(lck); 457 458 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 459 460 KA_TRACE(1000, 461 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 462 lck, gtid, poll_val)); 463 464 if (KMP_LOCK_STRIP(poll_val) & 1) { 465 KA_TRACE(1000, 466 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 467 lck, gtid)); 468 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 469 NULL, NULL, 0); 470 } 471 472 KMP_MB(); /* Flush all pending memory write invalidates. */ 473 474 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 475 lck->lk.poll, gtid)); 476 477 KMP_YIELD(TCR_4(__kmp_nth) > 478 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 479 return KMP_LOCK_RELEASED; 480 } 481 482 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 483 kmp_int32 gtid) { 484 char const *const func = "omp_unset_lock"; 485 KMP_MB(); /* in case another processor initialized lock */ 486 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 487 __kmp_is_futex_lock_nestable(lck)) { 488 KMP_FATAL(LockNestableUsedAsSimple, func); 489 } 490 if (__kmp_get_futex_lock_owner(lck) == -1) { 491 KMP_FATAL(LockUnsettingFree, func); 492 } 493 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 494 (__kmp_get_futex_lock_owner(lck) != gtid)) { 495 KMP_FATAL(LockUnsettingSetByAnother, func); 496 } 497 return __kmp_release_futex_lock(lck, gtid); 498 } 499 500 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 501 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 502 } 503 504 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 505 506 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 507 char const *const func = "omp_destroy_lock"; 508 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 509 __kmp_is_futex_lock_nestable(lck)) { 510 KMP_FATAL(LockNestableUsedAsSimple, func); 511 } 512 if (__kmp_get_futex_lock_owner(lck) != -1) { 513 KMP_FATAL(LockStillOwned, func); 514 } 515 __kmp_destroy_futex_lock(lck); 516 } 517 518 // nested futex locks 519 520 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 521 KMP_DEBUG_ASSERT(gtid >= 0); 522 523 if (__kmp_get_futex_lock_owner(lck) == gtid) { 524 lck->lk.depth_locked += 1; 525 return KMP_LOCK_ACQUIRED_NEXT; 526 } else { 527 __kmp_acquire_futex_lock_timed_template(lck, gtid); 528 ANNOTATE_FUTEX_ACQUIRED(lck); 529 lck->lk.depth_locked = 1; 530 return KMP_LOCK_ACQUIRED_FIRST; 531 } 532 } 533 534 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 535 kmp_int32 gtid) { 536 char const *const func = "omp_set_nest_lock"; 537 if (!__kmp_is_futex_lock_nestable(lck)) { 538 KMP_FATAL(LockSimpleUsedAsNestable, func); 539 } 540 return __kmp_acquire_nested_futex_lock(lck, gtid); 541 } 542 543 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 544 int retval; 545 546 KMP_DEBUG_ASSERT(gtid >= 0); 547 548 if (__kmp_get_futex_lock_owner(lck) == gtid) { 549 retval = ++lck->lk.depth_locked; 550 } else if (!__kmp_test_futex_lock(lck, gtid)) { 551 retval = 0; 552 } else { 553 KMP_MB(); 554 retval = lck->lk.depth_locked = 1; 555 } 556 return retval; 557 } 558 559 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 560 kmp_int32 gtid) { 561 char const *const func = "omp_test_nest_lock"; 562 if (!__kmp_is_futex_lock_nestable(lck)) { 563 KMP_FATAL(LockSimpleUsedAsNestable, func); 564 } 565 return __kmp_test_nested_futex_lock(lck, gtid); 566 } 567 568 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 569 KMP_DEBUG_ASSERT(gtid >= 0); 570 571 KMP_MB(); 572 if (--(lck->lk.depth_locked) == 0) { 573 __kmp_release_futex_lock(lck, gtid); 574 return KMP_LOCK_RELEASED; 575 } 576 return KMP_LOCK_STILL_HELD; 577 } 578 579 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 580 kmp_int32 gtid) { 581 char const *const func = "omp_unset_nest_lock"; 582 KMP_MB(); /* in case another processor initialized lock */ 583 if (!__kmp_is_futex_lock_nestable(lck)) { 584 KMP_FATAL(LockSimpleUsedAsNestable, func); 585 } 586 if (__kmp_get_futex_lock_owner(lck) == -1) { 587 KMP_FATAL(LockUnsettingFree, func); 588 } 589 if (__kmp_get_futex_lock_owner(lck) != gtid) { 590 KMP_FATAL(LockUnsettingSetByAnother, func); 591 } 592 return __kmp_release_nested_futex_lock(lck, gtid); 593 } 594 595 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 596 __kmp_init_futex_lock(lck); 597 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 598 } 599 600 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 601 __kmp_destroy_futex_lock(lck); 602 lck->lk.depth_locked = 0; 603 } 604 605 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 606 char const *const func = "omp_destroy_nest_lock"; 607 if (!__kmp_is_futex_lock_nestable(lck)) { 608 KMP_FATAL(LockSimpleUsedAsNestable, func); 609 } 610 if (__kmp_get_futex_lock_owner(lck) != -1) { 611 KMP_FATAL(LockStillOwned, func); 612 } 613 __kmp_destroy_nested_futex_lock(lck); 614 } 615 616 #endif // KMP_USE_FUTEX 617 618 /* ------------------------------------------------------------------------ */ 619 /* ticket (bakery) locks */ 620 621 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 622 return std::atomic_load_explicit(&lck->lk.owner_id, 623 std::memory_order_relaxed) - 624 1; 625 } 626 627 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 628 return std::atomic_load_explicit(&lck->lk.depth_locked, 629 std::memory_order_relaxed) != -1; 630 } 631 632 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 633 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 634 std::memory_order_acquire) == my_ticket; 635 } 636 637 __forceinline static int 638 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 639 kmp_int32 gtid) { 640 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 641 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 642 643 #ifdef USE_LOCK_PROFILE 644 if (std::atomic_load_explicit(&lck->lk.now_serving, 645 std::memory_order_relaxed) != my_ticket) 646 __kmp_printf("LOCK CONTENTION: %p\n", lck); 647 /* else __kmp_printf( "." );*/ 648 #endif /* USE_LOCK_PROFILE */ 649 650 if (std::atomic_load_explicit(&lck->lk.now_serving, 651 std::memory_order_acquire) == my_ticket) { 652 return KMP_LOCK_ACQUIRED_FIRST; 653 } 654 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 655 return KMP_LOCK_ACQUIRED_FIRST; 656 } 657 658 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 659 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 660 ANNOTATE_TICKET_ACQUIRED(lck); 661 return retval; 662 } 663 664 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 665 kmp_int32 gtid) { 666 char const *const func = "omp_set_lock"; 667 668 if (!std::atomic_load_explicit(&lck->lk.initialized, 669 std::memory_order_relaxed)) { 670 KMP_FATAL(LockIsUninitialized, func); 671 } 672 if (lck->lk.self != lck) { 673 KMP_FATAL(LockIsUninitialized, func); 674 } 675 if (__kmp_is_ticket_lock_nestable(lck)) { 676 KMP_FATAL(LockNestableUsedAsSimple, func); 677 } 678 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 679 KMP_FATAL(LockIsAlreadyOwned, func); 680 } 681 682 __kmp_acquire_ticket_lock(lck, gtid); 683 684 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 685 std::memory_order_relaxed); 686 return KMP_LOCK_ACQUIRED_FIRST; 687 } 688 689 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 690 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 691 std::memory_order_relaxed); 692 693 if (std::atomic_load_explicit(&lck->lk.now_serving, 694 std::memory_order_relaxed) == my_ticket) { 695 kmp_uint32 next_ticket = my_ticket + 1; 696 if (std::atomic_compare_exchange_strong_explicit( 697 &lck->lk.next_ticket, &my_ticket, next_ticket, 698 std::memory_order_acquire, std::memory_order_acquire)) { 699 return TRUE; 700 } 701 } 702 return FALSE; 703 } 704 705 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 706 kmp_int32 gtid) { 707 char const *const func = "omp_test_lock"; 708 709 if (!std::atomic_load_explicit(&lck->lk.initialized, 710 std::memory_order_relaxed)) { 711 KMP_FATAL(LockIsUninitialized, func); 712 } 713 if (lck->lk.self != lck) { 714 KMP_FATAL(LockIsUninitialized, func); 715 } 716 if (__kmp_is_ticket_lock_nestable(lck)) { 717 KMP_FATAL(LockNestableUsedAsSimple, func); 718 } 719 720 int retval = __kmp_test_ticket_lock(lck, gtid); 721 722 if (retval) { 723 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 724 std::memory_order_relaxed); 725 } 726 return retval; 727 } 728 729 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 730 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 731 std::memory_order_relaxed) - 732 std::atomic_load_explicit(&lck->lk.now_serving, 733 std::memory_order_relaxed); 734 735 ANNOTATE_TICKET_RELEASED(lck); 736 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 737 std::memory_order_release); 738 739 KMP_YIELD(distance > 740 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 741 return KMP_LOCK_RELEASED; 742 } 743 744 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 745 kmp_int32 gtid) { 746 char const *const func = "omp_unset_lock"; 747 748 if (!std::atomic_load_explicit(&lck->lk.initialized, 749 std::memory_order_relaxed)) { 750 KMP_FATAL(LockIsUninitialized, func); 751 } 752 if (lck->lk.self != lck) { 753 KMP_FATAL(LockIsUninitialized, func); 754 } 755 if (__kmp_is_ticket_lock_nestable(lck)) { 756 KMP_FATAL(LockNestableUsedAsSimple, func); 757 } 758 if (__kmp_get_ticket_lock_owner(lck) == -1) { 759 KMP_FATAL(LockUnsettingFree, func); 760 } 761 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 762 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 763 KMP_FATAL(LockUnsettingSetByAnother, func); 764 } 765 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 766 return __kmp_release_ticket_lock(lck, gtid); 767 } 768 769 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 770 lck->lk.location = NULL; 771 lck->lk.self = lck; 772 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 773 std::memory_order_relaxed); 774 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 775 std::memory_order_relaxed); 776 std::atomic_store_explicit( 777 &lck->lk.owner_id, 0, 778 std::memory_order_relaxed); // no thread owns the lock. 779 std::atomic_store_explicit( 780 &lck->lk.depth_locked, -1, 781 std::memory_order_relaxed); // -1 => not a nested lock. 782 std::atomic_store_explicit(&lck->lk.initialized, true, 783 std::memory_order_release); 784 } 785 786 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 787 std::atomic_store_explicit(&lck->lk.initialized, false, 788 std::memory_order_release); 789 lck->lk.self = NULL; 790 lck->lk.location = NULL; 791 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 792 std::memory_order_relaxed); 793 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 794 std::memory_order_relaxed); 795 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 796 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 797 std::memory_order_relaxed); 798 } 799 800 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 801 char const *const func = "omp_destroy_lock"; 802 803 if (!std::atomic_load_explicit(&lck->lk.initialized, 804 std::memory_order_relaxed)) { 805 KMP_FATAL(LockIsUninitialized, func); 806 } 807 if (lck->lk.self != lck) { 808 KMP_FATAL(LockIsUninitialized, func); 809 } 810 if (__kmp_is_ticket_lock_nestable(lck)) { 811 KMP_FATAL(LockNestableUsedAsSimple, func); 812 } 813 if (__kmp_get_ticket_lock_owner(lck) != -1) { 814 KMP_FATAL(LockStillOwned, func); 815 } 816 __kmp_destroy_ticket_lock(lck); 817 } 818 819 // nested ticket locks 820 821 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 822 KMP_DEBUG_ASSERT(gtid >= 0); 823 824 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 825 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 826 std::memory_order_relaxed); 827 return KMP_LOCK_ACQUIRED_NEXT; 828 } else { 829 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 830 ANNOTATE_TICKET_ACQUIRED(lck); 831 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 832 std::memory_order_relaxed); 833 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 834 std::memory_order_relaxed); 835 return KMP_LOCK_ACQUIRED_FIRST; 836 } 837 } 838 839 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 840 kmp_int32 gtid) { 841 char const *const func = "omp_set_nest_lock"; 842 843 if (!std::atomic_load_explicit(&lck->lk.initialized, 844 std::memory_order_relaxed)) { 845 KMP_FATAL(LockIsUninitialized, func); 846 } 847 if (lck->lk.self != lck) { 848 KMP_FATAL(LockIsUninitialized, func); 849 } 850 if (!__kmp_is_ticket_lock_nestable(lck)) { 851 KMP_FATAL(LockSimpleUsedAsNestable, func); 852 } 853 return __kmp_acquire_nested_ticket_lock(lck, gtid); 854 } 855 856 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 857 int retval; 858 859 KMP_DEBUG_ASSERT(gtid >= 0); 860 861 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 862 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 863 std::memory_order_relaxed) + 864 1; 865 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 866 retval = 0; 867 } else { 868 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 869 std::memory_order_relaxed); 870 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 871 std::memory_order_relaxed); 872 retval = 1; 873 } 874 return retval; 875 } 876 877 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 878 kmp_int32 gtid) { 879 char const *const func = "omp_test_nest_lock"; 880 881 if (!std::atomic_load_explicit(&lck->lk.initialized, 882 std::memory_order_relaxed)) { 883 KMP_FATAL(LockIsUninitialized, func); 884 } 885 if (lck->lk.self != lck) { 886 KMP_FATAL(LockIsUninitialized, func); 887 } 888 if (!__kmp_is_ticket_lock_nestable(lck)) { 889 KMP_FATAL(LockSimpleUsedAsNestable, func); 890 } 891 return __kmp_test_nested_ticket_lock(lck, gtid); 892 } 893 894 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 895 KMP_DEBUG_ASSERT(gtid >= 0); 896 897 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 898 std::memory_order_relaxed) - 899 1) == 0) { 900 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 901 __kmp_release_ticket_lock(lck, gtid); 902 return KMP_LOCK_RELEASED; 903 } 904 return KMP_LOCK_STILL_HELD; 905 } 906 907 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 908 kmp_int32 gtid) { 909 char const *const func = "omp_unset_nest_lock"; 910 911 if (!std::atomic_load_explicit(&lck->lk.initialized, 912 std::memory_order_relaxed)) { 913 KMP_FATAL(LockIsUninitialized, func); 914 } 915 if (lck->lk.self != lck) { 916 KMP_FATAL(LockIsUninitialized, func); 917 } 918 if (!__kmp_is_ticket_lock_nestable(lck)) { 919 KMP_FATAL(LockSimpleUsedAsNestable, func); 920 } 921 if (__kmp_get_ticket_lock_owner(lck) == -1) { 922 KMP_FATAL(LockUnsettingFree, func); 923 } 924 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 925 KMP_FATAL(LockUnsettingSetByAnother, func); 926 } 927 return __kmp_release_nested_ticket_lock(lck, gtid); 928 } 929 930 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 931 __kmp_init_ticket_lock(lck); 932 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 933 std::memory_order_relaxed); 934 // >= 0 for nestable locks, -1 for simple locks 935 } 936 937 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 938 __kmp_destroy_ticket_lock(lck); 939 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 940 std::memory_order_relaxed); 941 } 942 943 static void 944 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 945 char const *const func = "omp_destroy_nest_lock"; 946 947 if (!std::atomic_load_explicit(&lck->lk.initialized, 948 std::memory_order_relaxed)) { 949 KMP_FATAL(LockIsUninitialized, func); 950 } 951 if (lck->lk.self != lck) { 952 KMP_FATAL(LockIsUninitialized, func); 953 } 954 if (!__kmp_is_ticket_lock_nestable(lck)) { 955 KMP_FATAL(LockSimpleUsedAsNestable, func); 956 } 957 if (__kmp_get_ticket_lock_owner(lck) != -1) { 958 KMP_FATAL(LockStillOwned, func); 959 } 960 __kmp_destroy_nested_ticket_lock(lck); 961 } 962 963 // access functions to fields which don't exist for all lock kinds. 964 965 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 966 return lck->lk.location; 967 } 968 969 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 970 const ident_t *loc) { 971 lck->lk.location = loc; 972 } 973 974 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 975 return lck->lk.flags; 976 } 977 978 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 979 kmp_lock_flags_t flags) { 980 lck->lk.flags = flags; 981 } 982 983 /* ------------------------------------------------------------------------ */ 984 /* queuing locks */ 985 986 /* First the states 987 (head,tail) = 0, 0 means lock is unheld, nobody on queue 988 UINT_MAX or -1, 0 means lock is held, nobody on queue 989 h, h means lock held or about to transition, 990 1 element on queue 991 h, t h <> t, means lock is held or about to 992 transition, >1 elements on queue 993 994 Now the transitions 995 Acquire(0,0) = -1 ,0 996 Release(0,0) = Error 997 Acquire(-1,0) = h ,h h > 0 998 Release(-1,0) = 0 ,0 999 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1000 Release(h,h) = -1 ,0 h > 0 1001 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1002 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1003 1004 And pictorially 1005 1006 +-----+ 1007 | 0, 0|------- release -------> Error 1008 +-----+ 1009 | ^ 1010 acquire| |release 1011 | | 1012 | | 1013 v | 1014 +-----+ 1015 |-1, 0| 1016 +-----+ 1017 | ^ 1018 acquire| |release 1019 | | 1020 | | 1021 v | 1022 +-----+ 1023 | h, h| 1024 +-----+ 1025 | ^ 1026 acquire| |release 1027 | | 1028 | | 1029 v | 1030 +-----+ 1031 | h, t|----- acquire, release loopback ---+ 1032 +-----+ | 1033 ^ | 1034 | | 1035 +------------------------------------+ 1036 */ 1037 1038 #ifdef DEBUG_QUEUING_LOCKS 1039 1040 /* Stuff for circular trace buffer */ 1041 #define TRACE_BUF_ELE 1024 1042 static char traces[TRACE_BUF_ELE][128] = {0}; 1043 static int tc = 0; 1044 #define TRACE_LOCK(X, Y) \ 1045 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1046 #define TRACE_LOCK_T(X, Y, Z) \ 1047 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1048 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1049 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1050 Z, Q); 1051 1052 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1053 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1054 kmp_int32 tail_id) { 1055 kmp_int32 t, i; 1056 1057 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1058 1059 i = tc % TRACE_BUF_ELE; 1060 __kmp_printf_no_lock("%s\n", traces[i]); 1061 i = (i + 1) % TRACE_BUF_ELE; 1062 while (i != (tc % TRACE_BUF_ELE)) { 1063 __kmp_printf_no_lock("%s", traces[i]); 1064 i = (i + 1) % TRACE_BUF_ELE; 1065 } 1066 __kmp_printf_no_lock("\n"); 1067 1068 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1069 "next_wait:%d, head_id:%d, tail_id:%d\n", 1070 gtid + 1, this_thr->th.th_spin_here, 1071 this_thr->th.th_next_waiting, head_id, tail_id); 1072 1073 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1074 1075 if (lck->lk.head_id >= 1) { 1076 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1077 while (t > 0) { 1078 __kmp_printf_no_lock("-> %d ", t); 1079 t = __kmp_threads[t - 1]->th.th_next_waiting; 1080 } 1081 } 1082 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1083 __kmp_printf_no_lock("\n\n"); 1084 } 1085 1086 #endif /* DEBUG_QUEUING_LOCKS */ 1087 1088 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1089 return TCR_4(lck->lk.owner_id) - 1; 1090 } 1091 1092 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1093 return lck->lk.depth_locked != -1; 1094 } 1095 1096 /* Acquire a lock using a the queuing lock implementation */ 1097 template <bool takeTime> 1098 /* [TLW] The unused template above is left behind because of what BEB believes 1099 is a potential compiler problem with __forceinline. */ 1100 __forceinline static int 1101 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1102 kmp_int32 gtid) { 1103 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1104 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1105 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1106 volatile kmp_uint32 *spin_here_p; 1107 kmp_int32 need_mf = 1; 1108 1109 #if OMPT_SUPPORT 1110 ompt_state_t prev_state = ompt_state_undefined; 1111 #endif 1112 1113 KA_TRACE(1000, 1114 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1115 1116 KMP_FSYNC_PREPARE(lck); 1117 KMP_DEBUG_ASSERT(this_thr != NULL); 1118 spin_here_p = &this_thr->th.th_spin_here; 1119 1120 #ifdef DEBUG_QUEUING_LOCKS 1121 TRACE_LOCK(gtid + 1, "acq ent"); 1122 if (*spin_here_p) 1123 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1124 if (this_thr->th.th_next_waiting != 0) 1125 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1126 #endif 1127 KMP_DEBUG_ASSERT(!*spin_here_p); 1128 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1129 1130 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1131 head_id_p that may follow, not just in execution order, but also in 1132 visibility order. This way, when a releasing thread observes the changes to 1133 the queue by this thread, it can rightly assume that spin_here_p has 1134 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1135 not premature. If the releasing thread sets spin_here_p to FALSE before 1136 this thread sets it to TRUE, this thread will hang. */ 1137 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1138 1139 while (1) { 1140 kmp_int32 enqueued; 1141 kmp_int32 head; 1142 kmp_int32 tail; 1143 1144 head = *head_id_p; 1145 1146 switch (head) { 1147 1148 case -1: { 1149 #ifdef DEBUG_QUEUING_LOCKS 1150 tail = *tail_id_p; 1151 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1152 #endif 1153 tail = 0; /* to make sure next link asynchronously read is not set 1154 accidentally; this assignment prevents us from entering the 1155 if ( t > 0 ) condition in the enqueued case below, which is not 1156 necessary for this state transition */ 1157 1158 need_mf = 0; 1159 /* try (-1,0)->(tid,tid) */ 1160 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1161 KMP_PACK_64(-1, 0), 1162 KMP_PACK_64(gtid + 1, gtid + 1)); 1163 #ifdef DEBUG_QUEUING_LOCKS 1164 if (enqueued) 1165 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1166 #endif 1167 } break; 1168 1169 default: { 1170 tail = *tail_id_p; 1171 KMP_DEBUG_ASSERT(tail != gtid + 1); 1172 1173 #ifdef DEBUG_QUEUING_LOCKS 1174 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1175 #endif 1176 1177 if (tail == 0) { 1178 enqueued = FALSE; 1179 } else { 1180 need_mf = 0; 1181 /* try (h,t) or (h,h)->(h,tid) */ 1182 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1183 1184 #ifdef DEBUG_QUEUING_LOCKS 1185 if (enqueued) 1186 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1187 #endif 1188 } 1189 } break; 1190 1191 case 0: /* empty queue */ 1192 { 1193 kmp_int32 grabbed_lock; 1194 1195 #ifdef DEBUG_QUEUING_LOCKS 1196 tail = *tail_id_p; 1197 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1198 #endif 1199 /* try (0,0)->(-1,0) */ 1200 1201 /* only legal transition out of head = 0 is head = -1 with no change to 1202 * tail */ 1203 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1204 1205 if (grabbed_lock) { 1206 1207 *spin_here_p = FALSE; 1208 1209 KA_TRACE( 1210 1000, 1211 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1212 lck, gtid)); 1213 #ifdef DEBUG_QUEUING_LOCKS 1214 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1215 #endif 1216 1217 #if OMPT_SUPPORT 1218 if (ompt_enabled.enabled && prev_state != ompt_state_undefined) { 1219 /* change the state before clearing wait_id */ 1220 this_thr->th.ompt_thread_info.state = prev_state; 1221 this_thr->th.ompt_thread_info.wait_id = 0; 1222 } 1223 #endif 1224 1225 KMP_FSYNC_ACQUIRED(lck); 1226 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1227 } 1228 enqueued = FALSE; 1229 } break; 1230 } 1231 1232 #if OMPT_SUPPORT 1233 if (ompt_enabled.enabled && prev_state == ompt_state_undefined) { 1234 /* this thread will spin; set wait_id before entering wait state */ 1235 prev_state = this_thr->th.ompt_thread_info.state; 1236 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1237 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1238 } 1239 #endif 1240 1241 if (enqueued) { 1242 if (tail > 0) { 1243 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1244 KMP_ASSERT(tail_thr != NULL); 1245 tail_thr->th.th_next_waiting = gtid + 1; 1246 /* corresponding wait for this write in release code */ 1247 } 1248 KA_TRACE(1000, 1249 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1250 lck, gtid)); 1251 1252 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1253 sleeps for throughput only here. */ 1254 KMP_MB(); 1255 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1256 1257 #ifdef DEBUG_QUEUING_LOCKS 1258 TRACE_LOCK(gtid + 1, "acq spin"); 1259 1260 if (this_thr->th.th_next_waiting != 0) 1261 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1262 #endif 1263 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1264 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1265 "waiting on queue\n", 1266 lck, gtid)); 1267 1268 #ifdef DEBUG_QUEUING_LOCKS 1269 TRACE_LOCK(gtid + 1, "acq exit 2"); 1270 #endif 1271 1272 #if OMPT_SUPPORT 1273 /* change the state before clearing wait_id */ 1274 this_thr->th.ompt_thread_info.state = prev_state; 1275 this_thr->th.ompt_thread_info.wait_id = 0; 1276 #endif 1277 1278 /* got lock, we were dequeued by the thread that released lock */ 1279 return KMP_LOCK_ACQUIRED_FIRST; 1280 } 1281 1282 /* Yield if number of threads > number of logical processors */ 1283 /* ToDo: Not sure why this should only be in oversubscription case, 1284 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1285 KMP_YIELD(TCR_4(__kmp_nth) > 1286 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1287 #ifdef DEBUG_QUEUING_LOCKS 1288 TRACE_LOCK(gtid + 1, "acq retry"); 1289 #endif 1290 } 1291 KMP_ASSERT2(0, "should not get here"); 1292 return KMP_LOCK_ACQUIRED_FIRST; 1293 } 1294 1295 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1296 KMP_DEBUG_ASSERT(gtid >= 0); 1297 1298 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1299 ANNOTATE_QUEUING_ACQUIRED(lck); 1300 return retval; 1301 } 1302 1303 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1304 kmp_int32 gtid) { 1305 char const *const func = "omp_set_lock"; 1306 if (lck->lk.initialized != lck) { 1307 KMP_FATAL(LockIsUninitialized, func); 1308 } 1309 if (__kmp_is_queuing_lock_nestable(lck)) { 1310 KMP_FATAL(LockNestableUsedAsSimple, func); 1311 } 1312 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1313 KMP_FATAL(LockIsAlreadyOwned, func); 1314 } 1315 1316 __kmp_acquire_queuing_lock(lck, gtid); 1317 1318 lck->lk.owner_id = gtid + 1; 1319 return KMP_LOCK_ACQUIRED_FIRST; 1320 } 1321 1322 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1323 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1324 kmp_int32 head; 1325 #ifdef KMP_DEBUG 1326 kmp_info_t *this_thr; 1327 #endif 1328 1329 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1330 KMP_DEBUG_ASSERT(gtid >= 0); 1331 #ifdef KMP_DEBUG 1332 this_thr = __kmp_thread_from_gtid(gtid); 1333 KMP_DEBUG_ASSERT(this_thr != NULL); 1334 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1335 #endif 1336 1337 head = *head_id_p; 1338 1339 if (head == 0) { /* nobody on queue, nobody holding */ 1340 /* try (0,0)->(-1,0) */ 1341 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1342 KA_TRACE(1000, 1343 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1344 KMP_FSYNC_ACQUIRED(lck); 1345 ANNOTATE_QUEUING_ACQUIRED(lck); 1346 return TRUE; 1347 } 1348 } 1349 1350 KA_TRACE(1000, 1351 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1352 return FALSE; 1353 } 1354 1355 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1356 kmp_int32 gtid) { 1357 char const *const func = "omp_test_lock"; 1358 if (lck->lk.initialized != lck) { 1359 KMP_FATAL(LockIsUninitialized, func); 1360 } 1361 if (__kmp_is_queuing_lock_nestable(lck)) { 1362 KMP_FATAL(LockNestableUsedAsSimple, func); 1363 } 1364 1365 int retval = __kmp_test_queuing_lock(lck, gtid); 1366 1367 if (retval) { 1368 lck->lk.owner_id = gtid + 1; 1369 } 1370 return retval; 1371 } 1372 1373 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1374 kmp_info_t *this_thr; 1375 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1376 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1377 1378 KA_TRACE(1000, 1379 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1380 KMP_DEBUG_ASSERT(gtid >= 0); 1381 this_thr = __kmp_thread_from_gtid(gtid); 1382 KMP_DEBUG_ASSERT(this_thr != NULL); 1383 #ifdef DEBUG_QUEUING_LOCKS 1384 TRACE_LOCK(gtid + 1, "rel ent"); 1385 1386 if (this_thr->th.th_spin_here) 1387 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1388 if (this_thr->th.th_next_waiting != 0) 1389 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1390 #endif 1391 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1392 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1393 1394 KMP_FSYNC_RELEASING(lck); 1395 ANNOTATE_QUEUING_RELEASED(lck); 1396 1397 while (1) { 1398 kmp_int32 dequeued; 1399 kmp_int32 head; 1400 kmp_int32 tail; 1401 1402 head = *head_id_p; 1403 1404 #ifdef DEBUG_QUEUING_LOCKS 1405 tail = *tail_id_p; 1406 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1407 if (head == 0) 1408 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1409 #endif 1410 KMP_DEBUG_ASSERT(head != 1411 0); /* holding the lock, head must be -1 or queue head */ 1412 1413 if (head == -1) { /* nobody on queue */ 1414 /* try (-1,0)->(0,0) */ 1415 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1416 KA_TRACE( 1417 1000, 1418 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1419 lck, gtid)); 1420 #ifdef DEBUG_QUEUING_LOCKS 1421 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1422 #endif 1423 1424 #if OMPT_SUPPORT 1425 /* nothing to do - no other thread is trying to shift blame */ 1426 #endif 1427 return KMP_LOCK_RELEASED; 1428 } 1429 dequeued = FALSE; 1430 } else { 1431 KMP_MB(); 1432 tail = *tail_id_p; 1433 if (head == tail) { /* only one thread on the queue */ 1434 #ifdef DEBUG_QUEUING_LOCKS 1435 if (head <= 0) 1436 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1437 #endif 1438 KMP_DEBUG_ASSERT(head > 0); 1439 1440 /* try (h,h)->(-1,0) */ 1441 dequeued = KMP_COMPARE_AND_STORE_REL64( 1442 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1443 KMP_PACK_64(-1, 0)); 1444 #ifdef DEBUG_QUEUING_LOCKS 1445 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1446 #endif 1447 1448 } else { 1449 volatile kmp_int32 *waiting_id_p; 1450 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1451 KMP_DEBUG_ASSERT(head_thr != NULL); 1452 waiting_id_p = &head_thr->th.th_next_waiting; 1453 1454 /* Does this require synchronous reads? */ 1455 #ifdef DEBUG_QUEUING_LOCKS 1456 if (head <= 0 || tail <= 0) 1457 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1458 #endif 1459 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1460 1461 /* try (h,t)->(h',t) or (t,t) */ 1462 KMP_MB(); 1463 /* make sure enqueuing thread has time to update next waiting thread 1464 * field */ 1465 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1466 KMP_NEQ, NULL); 1467 #ifdef DEBUG_QUEUING_LOCKS 1468 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1469 #endif 1470 dequeued = TRUE; 1471 } 1472 } 1473 1474 if (dequeued) { 1475 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1476 KMP_DEBUG_ASSERT(head_thr != NULL); 1477 1478 /* Does this require synchronous reads? */ 1479 #ifdef DEBUG_QUEUING_LOCKS 1480 if (head <= 0 || tail <= 0) 1481 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1482 #endif 1483 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1484 1485 /* For clean code only. Thread not released until next statement prevents 1486 race with acquire code. */ 1487 head_thr->th.th_next_waiting = 0; 1488 #ifdef DEBUG_QUEUING_LOCKS 1489 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1490 #endif 1491 1492 KMP_MB(); 1493 /* reset spin value */ 1494 head_thr->th.th_spin_here = FALSE; 1495 1496 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1497 "dequeuing\n", 1498 lck, gtid)); 1499 #ifdef DEBUG_QUEUING_LOCKS 1500 TRACE_LOCK(gtid + 1, "rel exit 2"); 1501 #endif 1502 return KMP_LOCK_RELEASED; 1503 } 1504 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1505 threads */ 1506 1507 #ifdef DEBUG_QUEUING_LOCKS 1508 TRACE_LOCK(gtid + 1, "rel retry"); 1509 #endif 1510 1511 } /* while */ 1512 KMP_ASSERT2(0, "should not get here"); 1513 return KMP_LOCK_RELEASED; 1514 } 1515 1516 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1517 kmp_int32 gtid) { 1518 char const *const func = "omp_unset_lock"; 1519 KMP_MB(); /* in case another processor initialized lock */ 1520 if (lck->lk.initialized != lck) { 1521 KMP_FATAL(LockIsUninitialized, func); 1522 } 1523 if (__kmp_is_queuing_lock_nestable(lck)) { 1524 KMP_FATAL(LockNestableUsedAsSimple, func); 1525 } 1526 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1527 KMP_FATAL(LockUnsettingFree, func); 1528 } 1529 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1530 KMP_FATAL(LockUnsettingSetByAnother, func); 1531 } 1532 lck->lk.owner_id = 0; 1533 return __kmp_release_queuing_lock(lck, gtid); 1534 } 1535 1536 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1537 lck->lk.location = NULL; 1538 lck->lk.head_id = 0; 1539 lck->lk.tail_id = 0; 1540 lck->lk.next_ticket = 0; 1541 lck->lk.now_serving = 0; 1542 lck->lk.owner_id = 0; // no thread owns the lock. 1543 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1544 lck->lk.initialized = lck; 1545 1546 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1547 } 1548 1549 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1550 lck->lk.initialized = NULL; 1551 lck->lk.location = NULL; 1552 lck->lk.head_id = 0; 1553 lck->lk.tail_id = 0; 1554 lck->lk.next_ticket = 0; 1555 lck->lk.now_serving = 0; 1556 lck->lk.owner_id = 0; 1557 lck->lk.depth_locked = -1; 1558 } 1559 1560 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1561 char const *const func = "omp_destroy_lock"; 1562 if (lck->lk.initialized != lck) { 1563 KMP_FATAL(LockIsUninitialized, func); 1564 } 1565 if (__kmp_is_queuing_lock_nestable(lck)) { 1566 KMP_FATAL(LockNestableUsedAsSimple, func); 1567 } 1568 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1569 KMP_FATAL(LockStillOwned, func); 1570 } 1571 __kmp_destroy_queuing_lock(lck); 1572 } 1573 1574 // nested queuing locks 1575 1576 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1577 KMP_DEBUG_ASSERT(gtid >= 0); 1578 1579 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1580 lck->lk.depth_locked += 1; 1581 return KMP_LOCK_ACQUIRED_NEXT; 1582 } else { 1583 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1584 ANNOTATE_QUEUING_ACQUIRED(lck); 1585 KMP_MB(); 1586 lck->lk.depth_locked = 1; 1587 KMP_MB(); 1588 lck->lk.owner_id = gtid + 1; 1589 return KMP_LOCK_ACQUIRED_FIRST; 1590 } 1591 } 1592 1593 static int 1594 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1595 kmp_int32 gtid) { 1596 char const *const func = "omp_set_nest_lock"; 1597 if (lck->lk.initialized != lck) { 1598 KMP_FATAL(LockIsUninitialized, func); 1599 } 1600 if (!__kmp_is_queuing_lock_nestable(lck)) { 1601 KMP_FATAL(LockSimpleUsedAsNestable, func); 1602 } 1603 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1604 } 1605 1606 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1607 int retval; 1608 1609 KMP_DEBUG_ASSERT(gtid >= 0); 1610 1611 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1612 retval = ++lck->lk.depth_locked; 1613 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1614 retval = 0; 1615 } else { 1616 KMP_MB(); 1617 retval = lck->lk.depth_locked = 1; 1618 KMP_MB(); 1619 lck->lk.owner_id = gtid + 1; 1620 } 1621 return retval; 1622 } 1623 1624 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1625 kmp_int32 gtid) { 1626 char const *const func = "omp_test_nest_lock"; 1627 if (lck->lk.initialized != lck) { 1628 KMP_FATAL(LockIsUninitialized, func); 1629 } 1630 if (!__kmp_is_queuing_lock_nestable(lck)) { 1631 KMP_FATAL(LockSimpleUsedAsNestable, func); 1632 } 1633 return __kmp_test_nested_queuing_lock(lck, gtid); 1634 } 1635 1636 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1637 KMP_DEBUG_ASSERT(gtid >= 0); 1638 1639 KMP_MB(); 1640 if (--(lck->lk.depth_locked) == 0) { 1641 KMP_MB(); 1642 lck->lk.owner_id = 0; 1643 __kmp_release_queuing_lock(lck, gtid); 1644 return KMP_LOCK_RELEASED; 1645 } 1646 return KMP_LOCK_STILL_HELD; 1647 } 1648 1649 static int 1650 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1651 kmp_int32 gtid) { 1652 char const *const func = "omp_unset_nest_lock"; 1653 KMP_MB(); /* in case another processor initialized lock */ 1654 if (lck->lk.initialized != lck) { 1655 KMP_FATAL(LockIsUninitialized, func); 1656 } 1657 if (!__kmp_is_queuing_lock_nestable(lck)) { 1658 KMP_FATAL(LockSimpleUsedAsNestable, func); 1659 } 1660 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1661 KMP_FATAL(LockUnsettingFree, func); 1662 } 1663 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1664 KMP_FATAL(LockUnsettingSetByAnother, func); 1665 } 1666 return __kmp_release_nested_queuing_lock(lck, gtid); 1667 } 1668 1669 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1670 __kmp_init_queuing_lock(lck); 1671 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1672 } 1673 1674 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1675 __kmp_destroy_queuing_lock(lck); 1676 lck->lk.depth_locked = 0; 1677 } 1678 1679 static void 1680 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1681 char const *const func = "omp_destroy_nest_lock"; 1682 if (lck->lk.initialized != lck) { 1683 KMP_FATAL(LockIsUninitialized, func); 1684 } 1685 if (!__kmp_is_queuing_lock_nestable(lck)) { 1686 KMP_FATAL(LockSimpleUsedAsNestable, func); 1687 } 1688 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1689 KMP_FATAL(LockStillOwned, func); 1690 } 1691 __kmp_destroy_nested_queuing_lock(lck); 1692 } 1693 1694 // access functions to fields which don't exist for all lock kinds. 1695 1696 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1697 return lck->lk.location; 1698 } 1699 1700 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1701 const ident_t *loc) { 1702 lck->lk.location = loc; 1703 } 1704 1705 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1706 return lck->lk.flags; 1707 } 1708 1709 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1710 kmp_lock_flags_t flags) { 1711 lck->lk.flags = flags; 1712 } 1713 1714 #if KMP_USE_ADAPTIVE_LOCKS 1715 1716 /* RTM Adaptive locks */ 1717 1718 #if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ 1719 (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ 1720 (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT) 1721 1722 #include <immintrin.h> 1723 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1724 1725 #else 1726 1727 // Values from the status register after failed speculation. 1728 #define _XBEGIN_STARTED (~0u) 1729 #define _XABORT_EXPLICIT (1 << 0) 1730 #define _XABORT_RETRY (1 << 1) 1731 #define _XABORT_CONFLICT (1 << 2) 1732 #define _XABORT_CAPACITY (1 << 3) 1733 #define _XABORT_DEBUG (1 << 4) 1734 #define _XABORT_NESTED (1 << 5) 1735 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1736 1737 // Aborts for which it's worth trying again immediately 1738 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1739 1740 #define STRINGIZE_INTERNAL(arg) #arg 1741 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1742 1743 // Access to RTM instructions 1744 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1745 an abort. This is the same definition as the compiler intrinsic that will be 1746 supported at some point. */ 1747 static __inline int _xbegin() { 1748 int res = -1; 1749 1750 #if KMP_OS_WINDOWS 1751 #if KMP_ARCH_X86_64 1752 _asm { 1753 _emit 0xC7 1754 _emit 0xF8 1755 _emit 2 1756 _emit 0 1757 _emit 0 1758 _emit 0 1759 jmp L2 1760 mov res, eax 1761 L2: 1762 } 1763 #else /* IA32 */ 1764 _asm { 1765 _emit 0xC7 1766 _emit 0xF8 1767 _emit 2 1768 _emit 0 1769 _emit 0 1770 _emit 0 1771 jmp L2 1772 mov res, eax 1773 L2: 1774 } 1775 #endif // KMP_ARCH_X86_64 1776 #else 1777 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1778 returned in %eax(%rax) on abort. Other register values are restored, so 1779 don't need to be killed. 1780 1781 We must also mark 'res' as an input and an output, since otherwise 1782 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1783 the successful (i.e., non-abort) path. */ 1784 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1785 " .long 1f-1b-6\n" 1786 " jmp 2f\n" 1787 "1: movl %%eax,%0\n" 1788 "2:" 1789 : "+r"(res)::"memory", "%eax"); 1790 #endif // KMP_OS_WINDOWS 1791 return res; 1792 } 1793 1794 /* Transaction end */ 1795 static __inline void _xend() { 1796 #if KMP_OS_WINDOWS 1797 __asm { 1798 _emit 0x0f 1799 _emit 0x01 1800 _emit 0xd5 1801 } 1802 #else 1803 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1804 #endif 1805 } 1806 1807 /* This is a macro, the argument must be a single byte constant which can be 1808 evaluated by the inline assembler, since it is emitted as a byte into the 1809 assembly code. */ 1810 // clang-format off 1811 #if KMP_OS_WINDOWS 1812 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1813 #else 1814 #define _xabort(ARG) \ 1815 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1816 #endif 1817 // clang-format on 1818 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1819 1820 // Statistics is collected for testing purpose 1821 #if KMP_DEBUG_ADAPTIVE_LOCKS 1822 1823 // We accumulate speculative lock statistics when the lock is destroyed. We 1824 // keep locks that haven't been destroyed in the liveLocks list so that we can 1825 // grab their statistics too. 1826 static kmp_adaptive_lock_statistics_t destroyedStats; 1827 1828 // To hold the list of live locks. 1829 static kmp_adaptive_lock_info_t liveLocks; 1830 1831 // A lock so we can safely update the list of locks. 1832 static kmp_bootstrap_lock_t chain_lock = 1833 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); 1834 1835 // Initialize the list of stats. 1836 void __kmp_init_speculative_stats() { 1837 kmp_adaptive_lock_info_t *lck = &liveLocks; 1838 1839 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, 1840 sizeof(lck->stats)); 1841 lck->stats.next = lck; 1842 lck->stats.prev = lck; 1843 1844 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1845 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1846 1847 __kmp_init_bootstrap_lock(&chain_lock); 1848 } 1849 1850 // Insert the lock into the circular list 1851 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1852 __kmp_acquire_bootstrap_lock(&chain_lock); 1853 1854 lck->stats.next = liveLocks.stats.next; 1855 lck->stats.prev = &liveLocks; 1856 1857 liveLocks.stats.next = lck; 1858 lck->stats.next->stats.prev = lck; 1859 1860 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1861 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1862 1863 __kmp_release_bootstrap_lock(&chain_lock); 1864 } 1865 1866 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1867 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1868 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1869 1870 kmp_adaptive_lock_info_t *n = lck->stats.next; 1871 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1872 1873 n->stats.prev = p; 1874 p->stats.next = n; 1875 } 1876 1877 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1878 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, 1879 sizeof(lck->stats)); 1880 __kmp_remember_lock(lck); 1881 } 1882 1883 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1884 kmp_adaptive_lock_info_t *lck) { 1885 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1886 1887 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1888 t->successfulSpeculations += s->successfulSpeculations; 1889 t->hardFailedSpeculations += s->hardFailedSpeculations; 1890 t->softFailedSpeculations += s->softFailedSpeculations; 1891 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1892 t->lemmingYields += s->lemmingYields; 1893 } 1894 1895 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1896 __kmp_acquire_bootstrap_lock(&chain_lock); 1897 1898 __kmp_add_stats(&destroyedStats, lck); 1899 __kmp_forget_lock(lck); 1900 1901 __kmp_release_bootstrap_lock(&chain_lock); 1902 } 1903 1904 static float percent(kmp_uint32 count, kmp_uint32 total) { 1905 return (total == 0) ? 0.0 : (100.0 * count) / total; 1906 } 1907 1908 static FILE *__kmp_open_stats_file() { 1909 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1910 return stdout; 1911 1912 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1913 char buffer[buffLen]; 1914 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1915 (kmp_int32)getpid()); 1916 FILE *result = fopen(&buffer[0], "w"); 1917 1918 // Maybe we should issue a warning here... 1919 return result ? result : stdout; 1920 } 1921 1922 void __kmp_print_speculative_stats() { 1923 kmp_adaptive_lock_statistics_t total = destroyedStats; 1924 kmp_adaptive_lock_info_t *lck; 1925 1926 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1927 __kmp_add_stats(&total, lck); 1928 } 1929 kmp_adaptive_lock_statistics_t *t = &total; 1930 kmp_uint32 totalSections = 1931 t->nonSpeculativeAcquires + t->successfulSpeculations; 1932 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1933 t->hardFailedSpeculations + 1934 t->softFailedSpeculations; 1935 if (totalSections <= 0) 1936 return; 1937 1938 FILE *statsFile = __kmp_open_stats_file(); 1939 1940 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1941 fprintf(statsFile, " Lock parameters: \n" 1942 " max_soft_retries : %10d\n" 1943 " max_badness : %10d\n", 1944 __kmp_adaptive_backoff_params.max_soft_retries, 1945 __kmp_adaptive_backoff_params.max_badness); 1946 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1947 t->nonSpeculativeAcquireAttempts); 1948 fprintf(statsFile, " Total critical sections : %10d\n", 1949 totalSections); 1950 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1951 t->successfulSpeculations, 1952 percent(t->successfulSpeculations, totalSections)); 1953 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1954 t->nonSpeculativeAcquires, 1955 percent(t->nonSpeculativeAcquires, totalSections)); 1956 fprintf(statsFile, " Lemming yields : %10d\n\n", 1957 t->lemmingYields); 1958 1959 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 1960 totalSpeculations); 1961 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 1962 t->successfulSpeculations, 1963 percent(t->successfulSpeculations, totalSpeculations)); 1964 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 1965 t->softFailedSpeculations, 1966 percent(t->softFailedSpeculations, totalSpeculations)); 1967 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 1968 t->hardFailedSpeculations, 1969 percent(t->hardFailedSpeculations, totalSpeculations)); 1970 1971 if (statsFile != stdout) 1972 fclose(statsFile); 1973 } 1974 1975 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 1976 #else 1977 #define KMP_INC_STAT(lck, stat) 1978 1979 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 1980 1981 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 1982 // It is enough to check that the head_id is zero. 1983 // We don't also need to check the tail. 1984 bool res = lck->lk.head_id == 0; 1985 1986 // We need a fence here, since we must ensure that no memory operations 1987 // from later in this thread float above that read. 1988 #if KMP_COMPILER_ICC 1989 _mm_mfence(); 1990 #else 1991 __sync_synchronize(); 1992 #endif 1993 1994 return res; 1995 } 1996 1997 // Functions for manipulating the badness 1998 static __inline void 1999 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 2000 // Reset the badness to zero so we eagerly try to speculate again 2001 lck->lk.adaptive.badness = 0; 2002 KMP_INC_STAT(lck, successfulSpeculations); 2003 } 2004 2005 // Create a bit mask with one more set bit. 2006 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2007 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2008 if (newBadness > lck->lk.adaptive.max_badness) { 2009 return; 2010 } else { 2011 lck->lk.adaptive.badness = newBadness; 2012 } 2013 } 2014 2015 // Check whether speculation should be attempted. 2016 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2017 kmp_int32 gtid) { 2018 kmp_uint32 badness = lck->lk.adaptive.badness; 2019 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2020 int res = (attempts & badness) == 0; 2021 return res; 2022 } 2023 2024 // Attempt to acquire only the speculative lock. 2025 // Does not back off to the non-speculative lock. 2026 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2027 kmp_int32 gtid) { 2028 int retries = lck->lk.adaptive.max_soft_retries; 2029 2030 // We don't explicitly count the start of speculation, rather we record the 2031 // results (success, hard fail, soft fail). The sum of all of those is the 2032 // total number of times we started speculation since all speculations must 2033 // end one of those ways. 2034 do { 2035 kmp_uint32 status = _xbegin(); 2036 // Switch this in to disable actual speculation but exercise at least some 2037 // of the rest of the code. Useful for debugging... 2038 // kmp_uint32 status = _XABORT_NESTED; 2039 2040 if (status == _XBEGIN_STARTED) { 2041 /* We have successfully started speculation. Check that no-one acquired 2042 the lock for real between when we last looked and now. This also gets 2043 the lock cache line into our read-set, which we need so that we'll 2044 abort if anyone later claims it for real. */ 2045 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2046 // Lock is now visibly acquired, so someone beat us to it. Abort the 2047 // transaction so we'll restart from _xbegin with the failure status. 2048 _xabort(0x01); 2049 KMP_ASSERT2(0, "should not get here"); 2050 } 2051 return 1; // Lock has been acquired (speculatively) 2052 } else { 2053 // We have aborted, update the statistics 2054 if (status & SOFT_ABORT_MASK) { 2055 KMP_INC_STAT(lck, softFailedSpeculations); 2056 // and loop round to retry. 2057 } else { 2058 KMP_INC_STAT(lck, hardFailedSpeculations); 2059 // Give up if we had a hard failure. 2060 break; 2061 } 2062 } 2063 } while (retries--); // Loop while we have retries, and didn't fail hard. 2064 2065 // Either we had a hard failure or we didn't succeed softly after 2066 // the full set of attempts, so back off the badness. 2067 __kmp_step_badness(lck); 2068 return 0; 2069 } 2070 2071 // Attempt to acquire the speculative lock, or back off to the non-speculative 2072 // one if the speculative lock cannot be acquired. 2073 // We can succeed speculatively, non-speculatively, or fail. 2074 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2075 // First try to acquire the lock speculatively 2076 if (__kmp_should_speculate(lck, gtid) && 2077 __kmp_test_adaptive_lock_only(lck, gtid)) 2078 return 1; 2079 2080 // Speculative acquisition failed, so try to acquire it non-speculatively. 2081 // Count the non-speculative acquire attempt 2082 lck->lk.adaptive.acquire_attempts++; 2083 2084 // Use base, non-speculative lock. 2085 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2086 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2087 return 1; // Lock is acquired (non-speculatively) 2088 } else { 2089 return 0; // Failed to acquire the lock, it's already visibly locked. 2090 } 2091 } 2092 2093 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2094 kmp_int32 gtid) { 2095 char const *const func = "omp_test_lock"; 2096 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2097 KMP_FATAL(LockIsUninitialized, func); 2098 } 2099 2100 int retval = __kmp_test_adaptive_lock(lck, gtid); 2101 2102 if (retval) { 2103 lck->lk.qlk.owner_id = gtid + 1; 2104 } 2105 return retval; 2106 } 2107 2108 // Block until we can acquire a speculative, adaptive lock. We check whether we 2109 // should be trying to speculate. If we should be, we check the real lock to see 2110 // if it is free, and, if not, pause without attempting to acquire it until it 2111 // is. Then we try the speculative acquire. This means that although we suffer 2112 // from lemmings a little (because all we can't acquire the lock speculatively 2113 // until the queue of threads waiting has cleared), we don't get into a state 2114 // where we can never acquire the lock speculatively (because we force the queue 2115 // to clear by preventing new arrivals from entering the queue). This does mean 2116 // that when we're trying to break lemmings, the lock is no longer fair. However 2117 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2118 // problem. 2119 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2120 kmp_int32 gtid) { 2121 if (__kmp_should_speculate(lck, gtid)) { 2122 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2123 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2124 return; 2125 // We tried speculation and failed, so give up. 2126 } else { 2127 // We can't try speculation until the lock is free, so we pause here 2128 // (without suspending on the queueing lock, to allow it to drain, then 2129 // try again. All other threads will also see the same result for 2130 // shouldSpeculate, so will be doing the same if they try to claim the 2131 // lock from now on. 2132 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2133 KMP_INC_STAT(lck, lemmingYields); 2134 __kmp_yield(TRUE); 2135 } 2136 2137 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2138 return; 2139 } 2140 } 2141 2142 // Speculative acquisition failed, so acquire it non-speculatively. 2143 // Count the non-speculative acquire attempt 2144 lck->lk.adaptive.acquire_attempts++; 2145 2146 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2147 // We have acquired the base lock, so count that. 2148 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2149 ANNOTATE_QUEUING_ACQUIRED(lck); 2150 } 2151 2152 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2153 kmp_int32 gtid) { 2154 char const *const func = "omp_set_lock"; 2155 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2156 KMP_FATAL(LockIsUninitialized, func); 2157 } 2158 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2159 KMP_FATAL(LockIsAlreadyOwned, func); 2160 } 2161 2162 __kmp_acquire_adaptive_lock(lck, gtid); 2163 2164 lck->lk.qlk.owner_id = gtid + 1; 2165 } 2166 2167 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2168 kmp_int32 gtid) { 2169 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2170 lck))) { // If the lock doesn't look claimed we must be speculating. 2171 // (Or the user's code is buggy and they're releasing without locking; 2172 // if we had XTEST we'd be able to check that case...) 2173 _xend(); // Exit speculation 2174 __kmp_update_badness_after_success(lck); 2175 } else { // Since the lock *is* visibly locked we're not speculating, 2176 // so should use the underlying lock's release scheme. 2177 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2178 } 2179 return KMP_LOCK_RELEASED; 2180 } 2181 2182 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2183 kmp_int32 gtid) { 2184 char const *const func = "omp_unset_lock"; 2185 KMP_MB(); /* in case another processor initialized lock */ 2186 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2187 KMP_FATAL(LockIsUninitialized, func); 2188 } 2189 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2190 KMP_FATAL(LockUnsettingFree, func); 2191 } 2192 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2193 KMP_FATAL(LockUnsettingSetByAnother, func); 2194 } 2195 lck->lk.qlk.owner_id = 0; 2196 __kmp_release_adaptive_lock(lck, gtid); 2197 return KMP_LOCK_RELEASED; 2198 } 2199 2200 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2201 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2202 lck->lk.adaptive.badness = 0; 2203 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2204 lck->lk.adaptive.max_soft_retries = 2205 __kmp_adaptive_backoff_params.max_soft_retries; 2206 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2207 #if KMP_DEBUG_ADAPTIVE_LOCKS 2208 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2209 #endif 2210 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2211 } 2212 2213 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2214 #if KMP_DEBUG_ADAPTIVE_LOCKS 2215 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2216 #endif 2217 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2218 // Nothing needed for the speculative part. 2219 } 2220 2221 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2222 char const *const func = "omp_destroy_lock"; 2223 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2224 KMP_FATAL(LockIsUninitialized, func); 2225 } 2226 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2227 KMP_FATAL(LockStillOwned, func); 2228 } 2229 __kmp_destroy_adaptive_lock(lck); 2230 } 2231 2232 #endif // KMP_USE_ADAPTIVE_LOCKS 2233 2234 /* ------------------------------------------------------------------------ */ 2235 /* DRDPA ticket locks */ 2236 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2237 2238 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2239 return lck->lk.owner_id - 1; 2240 } 2241 2242 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2243 return lck->lk.depth_locked != -1; 2244 } 2245 2246 __forceinline static int 2247 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2248 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); 2249 kmp_uint64 mask = lck->lk.mask; // atomic load 2250 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2251 2252 #ifdef USE_LOCK_PROFILE 2253 if (polls[ticket & mask] != ticket) 2254 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2255 /* else __kmp_printf( "." );*/ 2256 #endif /* USE_LOCK_PROFILE */ 2257 2258 // Now spin-wait, but reload the polls pointer and mask, in case the 2259 // polling area has been reconfigured. Unless it is reconfigured, the 2260 // reloads stay in L1 cache and are cheap. 2261 // 2262 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2263 // 2264 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2265 // and poll to be re-read every spin iteration. 2266 kmp_uint32 spins; 2267 2268 KMP_FSYNC_PREPARE(lck); 2269 KMP_INIT_YIELD(spins); 2270 while (polls[ticket & mask] < ticket) { // atomic load 2271 // If we are oversubscribed, 2272 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2273 // CPU Pause is in the macros for yield. 2274 // 2275 KMP_YIELD(TCR_4(__kmp_nth) > 2276 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2277 KMP_YIELD_SPIN(spins); 2278 2279 // Re-read the mask and the poll pointer from the lock structure. 2280 // 2281 // Make certain that "mask" is read before "polls" !!! 2282 // 2283 // If another thread picks reconfigures the polling area and updates their 2284 // values, and we get the new value of mask and the old polls pointer, we 2285 // could access memory beyond the end of the old polling area. 2286 mask = lck->lk.mask; // atomic load 2287 polls = lck->lk.polls; // atomic load 2288 } 2289 2290 // Critical section starts here 2291 KMP_FSYNC_ACQUIRED(lck); 2292 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2293 ticket, lck)); 2294 lck->lk.now_serving = ticket; // non-volatile store 2295 2296 // Deallocate a garbage polling area if we know that we are the last 2297 // thread that could possibly access it. 2298 // 2299 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2300 // ticket. 2301 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2302 __kmp_free(lck->lk.old_polls); 2303 lck->lk.old_polls = NULL; 2304 lck->lk.cleanup_ticket = 0; 2305 } 2306 2307 // Check to see if we should reconfigure the polling area. 2308 // If there is still a garbage polling area to be deallocated from a 2309 // previous reconfiguration, let a later thread reconfigure it. 2310 if (lck->lk.old_polls == NULL) { 2311 bool reconfigure = false; 2312 std::atomic<kmp_uint64> *old_polls = polls; 2313 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2314 2315 if (TCR_4(__kmp_nth) > 2316 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2317 // We are in oversubscription mode. Contract the polling area 2318 // down to a single location, if that hasn't been done already. 2319 if (num_polls > 1) { 2320 reconfigure = true; 2321 num_polls = TCR_4(lck->lk.num_polls); 2322 mask = 0; 2323 num_polls = 1; 2324 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2325 sizeof(*polls)); 2326 polls[0] = ticket; 2327 } 2328 } else { 2329 // We are in under/fully subscribed mode. Check the number of 2330 // threads waiting on the lock. The size of the polling area 2331 // should be at least the number of threads waiting. 2332 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2333 if (num_waiting > num_polls) { 2334 kmp_uint32 old_num_polls = num_polls; 2335 reconfigure = true; 2336 do { 2337 mask = (mask << 1) | 1; 2338 num_polls *= 2; 2339 } while (num_polls <= num_waiting); 2340 2341 // Allocate the new polling area, and copy the relevant portion 2342 // of the old polling area to the new area. __kmp_allocate() 2343 // zeroes the memory it allocates, and most of the old area is 2344 // just zero padding, so we only copy the release counters. 2345 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2346 sizeof(*polls)); 2347 kmp_uint32 i; 2348 for (i = 0; i < old_num_polls; i++) { 2349 polls[i].store(old_polls[i]); 2350 } 2351 } 2352 } 2353 2354 if (reconfigure) { 2355 // Now write the updated fields back to the lock structure. 2356 // 2357 // Make certain that "polls" is written before "mask" !!! 2358 // 2359 // If another thread picks up the new value of mask and the old polls 2360 // pointer , it could access memory beyond the end of the old polling 2361 // area. 2362 // 2363 // On x86, we need memory fences. 2364 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2365 "lock %p to %d polls\n", 2366 ticket, lck, num_polls)); 2367 2368 lck->lk.old_polls = old_polls; 2369 lck->lk.polls = polls; // atomic store 2370 2371 KMP_MB(); 2372 2373 lck->lk.num_polls = num_polls; 2374 lck->lk.mask = mask; // atomic store 2375 2376 KMP_MB(); 2377 2378 // Only after the new polling area and mask have been flushed 2379 // to main memory can we update the cleanup ticket field. 2380 // 2381 // volatile load / non-volatile store 2382 lck->lk.cleanup_ticket = lck->lk.next_ticket; 2383 } 2384 } 2385 return KMP_LOCK_ACQUIRED_FIRST; 2386 } 2387 2388 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2389 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2390 ANNOTATE_DRDPA_ACQUIRED(lck); 2391 return retval; 2392 } 2393 2394 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2395 kmp_int32 gtid) { 2396 char const *const func = "omp_set_lock"; 2397 if (lck->lk.initialized != lck) { 2398 KMP_FATAL(LockIsUninitialized, func); 2399 } 2400 if (__kmp_is_drdpa_lock_nestable(lck)) { 2401 KMP_FATAL(LockNestableUsedAsSimple, func); 2402 } 2403 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2404 KMP_FATAL(LockIsAlreadyOwned, func); 2405 } 2406 2407 __kmp_acquire_drdpa_lock(lck, gtid); 2408 2409 lck->lk.owner_id = gtid + 1; 2410 return KMP_LOCK_ACQUIRED_FIRST; 2411 } 2412 2413 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2414 // First get a ticket, then read the polls pointer and the mask. 2415 // The polls pointer must be read before the mask!!! (See above) 2416 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load 2417 std::atomic<kmp_uint64> *polls = lck->lk.polls; 2418 kmp_uint64 mask = lck->lk.mask; // atomic load 2419 if (polls[ticket & mask] == ticket) { 2420 kmp_uint64 next_ticket = ticket + 1; 2421 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, 2422 next_ticket)) { 2423 KMP_FSYNC_ACQUIRED(lck); 2424 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2425 ticket, lck)); 2426 lck->lk.now_serving = ticket; // non-volatile store 2427 2428 // Since no threads are waiting, there is no possibility that we would 2429 // want to reconfigure the polling area. We might have the cleanup ticket 2430 // value (which says that it is now safe to deallocate old_polls), but 2431 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2432 // routine isn't supposed to block, and we would risk blocks if we called 2433 // __kmp_free() to do the deallocation. 2434 return TRUE; 2435 } 2436 } 2437 return FALSE; 2438 } 2439 2440 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2441 kmp_int32 gtid) { 2442 char const *const func = "omp_test_lock"; 2443 if (lck->lk.initialized != lck) { 2444 KMP_FATAL(LockIsUninitialized, func); 2445 } 2446 if (__kmp_is_drdpa_lock_nestable(lck)) { 2447 KMP_FATAL(LockNestableUsedAsSimple, func); 2448 } 2449 2450 int retval = __kmp_test_drdpa_lock(lck, gtid); 2451 2452 if (retval) { 2453 lck->lk.owner_id = gtid + 1; 2454 } 2455 return retval; 2456 } 2457 2458 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2459 // Read the ticket value from the lock data struct, then the polls pointer and 2460 // the mask. The polls pointer must be read before the mask!!! (See above) 2461 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load 2462 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load 2463 kmp_uint64 mask = lck->lk.mask; // atomic load 2464 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2465 ticket - 1, lck)); 2466 KMP_FSYNC_RELEASING(lck); 2467 ANNOTATE_DRDPA_RELEASED(lck); 2468 polls[ticket & mask] = ticket; // atomic store 2469 return KMP_LOCK_RELEASED; 2470 } 2471 2472 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2473 kmp_int32 gtid) { 2474 char const *const func = "omp_unset_lock"; 2475 KMP_MB(); /* in case another processor initialized lock */ 2476 if (lck->lk.initialized != lck) { 2477 KMP_FATAL(LockIsUninitialized, func); 2478 } 2479 if (__kmp_is_drdpa_lock_nestable(lck)) { 2480 KMP_FATAL(LockNestableUsedAsSimple, func); 2481 } 2482 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2483 KMP_FATAL(LockUnsettingFree, func); 2484 } 2485 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2486 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2487 KMP_FATAL(LockUnsettingSetByAnother, func); 2488 } 2489 lck->lk.owner_id = 0; 2490 return __kmp_release_drdpa_lock(lck, gtid); 2491 } 2492 2493 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2494 lck->lk.location = NULL; 2495 lck->lk.mask = 0; 2496 lck->lk.num_polls = 1; 2497 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate( 2498 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2499 lck->lk.cleanup_ticket = 0; 2500 lck->lk.old_polls = NULL; 2501 lck->lk.next_ticket = 0; 2502 lck->lk.now_serving = 0; 2503 lck->lk.owner_id = 0; // no thread owns the lock. 2504 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2505 lck->lk.initialized = lck; 2506 2507 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2508 } 2509 2510 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2511 lck->lk.initialized = NULL; 2512 lck->lk.location = NULL; 2513 if (lck->lk.polls.load() != NULL) { 2514 __kmp_free(lck->lk.polls.load()); 2515 lck->lk.polls = NULL; 2516 } 2517 if (lck->lk.old_polls != NULL) { 2518 __kmp_free(lck->lk.old_polls); 2519 lck->lk.old_polls = NULL; 2520 } 2521 lck->lk.mask = 0; 2522 lck->lk.num_polls = 0; 2523 lck->lk.cleanup_ticket = 0; 2524 lck->lk.next_ticket = 0; 2525 lck->lk.now_serving = 0; 2526 lck->lk.owner_id = 0; 2527 lck->lk.depth_locked = -1; 2528 } 2529 2530 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2531 char const *const func = "omp_destroy_lock"; 2532 if (lck->lk.initialized != lck) { 2533 KMP_FATAL(LockIsUninitialized, func); 2534 } 2535 if (__kmp_is_drdpa_lock_nestable(lck)) { 2536 KMP_FATAL(LockNestableUsedAsSimple, func); 2537 } 2538 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2539 KMP_FATAL(LockStillOwned, func); 2540 } 2541 __kmp_destroy_drdpa_lock(lck); 2542 } 2543 2544 // nested drdpa ticket locks 2545 2546 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2547 KMP_DEBUG_ASSERT(gtid >= 0); 2548 2549 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2550 lck->lk.depth_locked += 1; 2551 return KMP_LOCK_ACQUIRED_NEXT; 2552 } else { 2553 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2554 ANNOTATE_DRDPA_ACQUIRED(lck); 2555 KMP_MB(); 2556 lck->lk.depth_locked = 1; 2557 KMP_MB(); 2558 lck->lk.owner_id = gtid + 1; 2559 return KMP_LOCK_ACQUIRED_FIRST; 2560 } 2561 } 2562 2563 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2564 kmp_int32 gtid) { 2565 char const *const func = "omp_set_nest_lock"; 2566 if (lck->lk.initialized != lck) { 2567 KMP_FATAL(LockIsUninitialized, func); 2568 } 2569 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2570 KMP_FATAL(LockSimpleUsedAsNestable, func); 2571 } 2572 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2573 } 2574 2575 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2576 int retval; 2577 2578 KMP_DEBUG_ASSERT(gtid >= 0); 2579 2580 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2581 retval = ++lck->lk.depth_locked; 2582 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2583 retval = 0; 2584 } else { 2585 KMP_MB(); 2586 retval = lck->lk.depth_locked = 1; 2587 KMP_MB(); 2588 lck->lk.owner_id = gtid + 1; 2589 } 2590 return retval; 2591 } 2592 2593 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2594 kmp_int32 gtid) { 2595 char const *const func = "omp_test_nest_lock"; 2596 if (lck->lk.initialized != lck) { 2597 KMP_FATAL(LockIsUninitialized, func); 2598 } 2599 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2600 KMP_FATAL(LockSimpleUsedAsNestable, func); 2601 } 2602 return __kmp_test_nested_drdpa_lock(lck, gtid); 2603 } 2604 2605 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2606 KMP_DEBUG_ASSERT(gtid >= 0); 2607 2608 KMP_MB(); 2609 if (--(lck->lk.depth_locked) == 0) { 2610 KMP_MB(); 2611 lck->lk.owner_id = 0; 2612 __kmp_release_drdpa_lock(lck, gtid); 2613 return KMP_LOCK_RELEASED; 2614 } 2615 return KMP_LOCK_STILL_HELD; 2616 } 2617 2618 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2619 kmp_int32 gtid) { 2620 char const *const func = "omp_unset_nest_lock"; 2621 KMP_MB(); /* in case another processor initialized lock */ 2622 if (lck->lk.initialized != lck) { 2623 KMP_FATAL(LockIsUninitialized, func); 2624 } 2625 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2626 KMP_FATAL(LockSimpleUsedAsNestable, func); 2627 } 2628 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2629 KMP_FATAL(LockUnsettingFree, func); 2630 } 2631 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2632 KMP_FATAL(LockUnsettingSetByAnother, func); 2633 } 2634 return __kmp_release_nested_drdpa_lock(lck, gtid); 2635 } 2636 2637 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2638 __kmp_init_drdpa_lock(lck); 2639 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2640 } 2641 2642 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2643 __kmp_destroy_drdpa_lock(lck); 2644 lck->lk.depth_locked = 0; 2645 } 2646 2647 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2648 char const *const func = "omp_destroy_nest_lock"; 2649 if (lck->lk.initialized != lck) { 2650 KMP_FATAL(LockIsUninitialized, func); 2651 } 2652 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2653 KMP_FATAL(LockSimpleUsedAsNestable, func); 2654 } 2655 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2656 KMP_FATAL(LockStillOwned, func); 2657 } 2658 __kmp_destroy_nested_drdpa_lock(lck); 2659 } 2660 2661 // access functions to fields which don't exist for all lock kinds. 2662 2663 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2664 return lck->lk.location; 2665 } 2666 2667 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2668 const ident_t *loc) { 2669 lck->lk.location = loc; 2670 } 2671 2672 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2673 return lck->lk.flags; 2674 } 2675 2676 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2677 kmp_lock_flags_t flags) { 2678 lck->lk.flags = flags; 2679 } 2680 2681 // Time stamp counter 2682 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2683 #define __kmp_tsc() __kmp_hardware_timestamp() 2684 // Runtime's default backoff parameters 2685 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2686 #else 2687 // Use nanoseconds for other platforms 2688 extern kmp_uint64 __kmp_now_nsec(); 2689 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2690 #define __kmp_tsc() __kmp_now_nsec() 2691 #endif 2692 2693 // A useful predicate for dealing with timestamps that may wrap. 2694 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2695 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2696 // Times where going clockwise is less distance than going anti-clockwise 2697 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2698 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2699 // signed(b) = 0 captures the actual difference 2700 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2701 return ((kmp_int64)b - (kmp_int64)a) > 0; 2702 } 2703 2704 // Truncated binary exponential backoff function 2705 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2706 // We could flatten this loop, but making it a nested loop gives better result 2707 kmp_uint32 i; 2708 for (i = boff->step; i > 0; i--) { 2709 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2710 do { 2711 KMP_CPU_PAUSE(); 2712 } while (before(__kmp_tsc(), goal)); 2713 } 2714 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2715 } 2716 2717 #if KMP_USE_DYNAMIC_LOCK 2718 2719 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2720 // lock word. 2721 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2722 kmp_dyna_lockseq_t seq) { 2723 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2724 KA_TRACE( 2725 20, 2726 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2727 } 2728 2729 #if KMP_USE_TSX 2730 2731 // HLE lock functions - imported from the testbed runtime. 2732 #define HLE_ACQUIRE ".byte 0xf2;" 2733 #define HLE_RELEASE ".byte 0xf3;" 2734 2735 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2736 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2737 return v; 2738 } 2739 2740 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2741 2742 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) { 2743 TCW_4(*lck, 0); 2744 } 2745 2746 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2747 // Use gtid for KMP_LOCK_BUSY if necessary 2748 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2749 int delay = 1; 2750 do { 2751 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2752 for (int i = delay; i != 0; --i) 2753 KMP_CPU_PAUSE(); 2754 delay = ((delay << 1) | 1) & 7; 2755 } 2756 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2757 } 2758 } 2759 2760 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2761 kmp_int32 gtid) { 2762 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2763 } 2764 2765 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2766 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2767 : "=m"(*lck) 2768 : "r"(KMP_LOCK_FREE(hle)) 2769 : "memory"); 2770 return KMP_LOCK_RELEASED; 2771 } 2772 2773 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2774 kmp_int32 gtid) { 2775 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2776 } 2777 2778 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2779 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2780 } 2781 2782 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2783 kmp_int32 gtid) { 2784 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2785 } 2786 2787 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2788 __kmp_init_queuing_lock(lck); 2789 } 2790 2791 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2792 __kmp_destroy_queuing_lock(lck); 2793 } 2794 2795 static void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) { 2796 __kmp_destroy_queuing_lock_with_checks(lck); 2797 } 2798 2799 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2800 unsigned retries = 3, status; 2801 do { 2802 status = _xbegin(); 2803 if (status == _XBEGIN_STARTED) { 2804 if (__kmp_is_unlocked_queuing_lock(lck)) 2805 return; 2806 _xabort(0xff); 2807 } 2808 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2809 // Wait until lock becomes free 2810 while (!__kmp_is_unlocked_queuing_lock(lck)) 2811 __kmp_yield(TRUE); 2812 } else if (!(status & _XABORT_RETRY)) 2813 break; 2814 } while (retries--); 2815 2816 // Fall-back non-speculative lock (xchg) 2817 __kmp_acquire_queuing_lock(lck, gtid); 2818 } 2819 2820 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2821 kmp_int32 gtid) { 2822 __kmp_acquire_rtm_lock(lck, gtid); 2823 } 2824 2825 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2826 if (__kmp_is_unlocked_queuing_lock(lck)) { 2827 // Releasing from speculation 2828 _xend(); 2829 } else { 2830 // Releasing from a real lock 2831 __kmp_release_queuing_lock(lck, gtid); 2832 } 2833 return KMP_LOCK_RELEASED; 2834 } 2835 2836 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2837 kmp_int32 gtid) { 2838 return __kmp_release_rtm_lock(lck, gtid); 2839 } 2840 2841 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2842 unsigned retries = 3, status; 2843 do { 2844 status = _xbegin(); 2845 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2846 return 1; 2847 } 2848 if (!(status & _XABORT_RETRY)) 2849 break; 2850 } while (retries--); 2851 2852 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2853 } 2854 2855 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2856 kmp_int32 gtid) { 2857 return __kmp_test_rtm_lock(lck, gtid); 2858 } 2859 2860 #endif // KMP_USE_TSX 2861 2862 // Entry functions for indirect locks (first element of direct lock jump tables) 2863 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2864 kmp_dyna_lockseq_t tag); 2865 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2866 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2867 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2868 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2869 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2870 kmp_int32); 2871 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2872 kmp_int32); 2873 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2874 kmp_int32); 2875 2876 // Lock function definitions for the union parameter type 2877 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) 2878 2879 #define expand1(lk, op) \ 2880 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \ 2881 __kmp_##op##_##lk##_##lock(&lock->lk); \ 2882 } 2883 #define expand2(lk, op) \ 2884 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \ 2885 kmp_int32 gtid) { \ 2886 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \ 2887 } 2888 #define expand3(lk, op) \ 2889 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \ 2890 kmp_lock_flags_t flags) { \ 2891 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \ 2892 } 2893 #define expand4(lk, op) \ 2894 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \ 2895 const ident_t *loc) { \ 2896 __kmp_set_##lk##_lock_location(&lock->lk, loc); \ 2897 } 2898 2899 KMP_FOREACH_LOCK_KIND(expand1, init) 2900 KMP_FOREACH_LOCK_KIND(expand1, init_nested) 2901 KMP_FOREACH_LOCK_KIND(expand1, destroy) 2902 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested) 2903 KMP_FOREACH_LOCK_KIND(expand2, acquire) 2904 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested) 2905 KMP_FOREACH_LOCK_KIND(expand2, release) 2906 KMP_FOREACH_LOCK_KIND(expand2, release_nested) 2907 KMP_FOREACH_LOCK_KIND(expand2, test) 2908 KMP_FOREACH_LOCK_KIND(expand2, test_nested) 2909 KMP_FOREACH_LOCK_KIND(expand3, ) 2910 KMP_FOREACH_LOCK_KIND(expand4, ) 2911 2912 #undef expand1 2913 #undef expand2 2914 #undef expand3 2915 #undef expand4 2916 2917 // Jump tables for the indirect lock functions 2918 // Only fill in the odd entries, that avoids the need to shift out the low bit 2919 2920 // init functions 2921 #define expand(l, op) 0, __kmp_init_direct_lock, 2922 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2923 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2924 #undef expand 2925 2926 // destroy functions 2927 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2928 static void (*direct_destroy[])(kmp_dyna_lock_t *) = { 2929 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2930 #undef expand 2931 #define expand(l, op) \ 2932 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks, 2933 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = { 2934 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2935 #undef expand 2936 2937 // set/acquire functions 2938 #define expand(l, op) \ 2939 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2940 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2941 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2942 #undef expand 2943 #define expand(l, op) \ 2944 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2945 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2946 __kmp_set_indirect_lock_with_checks, 0, 2947 KMP_FOREACH_D_LOCK(expand, acquire)}; 2948 #undef expand 2949 2950 // unset/release and test functions 2951 #define expand(l, op) \ 2952 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2953 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2954 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2955 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2956 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2957 #undef expand 2958 #define expand(l, op) \ 2959 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2960 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2961 __kmp_unset_indirect_lock_with_checks, 0, 2962 KMP_FOREACH_D_LOCK(expand, release)}; 2963 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2964 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2965 #undef expand 2966 2967 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2968 void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0; 2969 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2970 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2971 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2972 2973 // Jump tables for the indirect lock functions 2974 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2975 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2976 KMP_FOREACH_I_LOCK(expand, init)}; 2977 #undef expand 2978 2979 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2980 static void (*indirect_destroy[])(kmp_user_lock_p) = { 2981 KMP_FOREACH_I_LOCK(expand, destroy)}; 2982 #undef expand 2983 #define expand(l, op) \ 2984 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks, 2985 static void (*indirect_destroy_check[])(kmp_user_lock_p) = { 2986 KMP_FOREACH_I_LOCK(expand, destroy)}; 2987 #undef expand 2988 2989 // set/acquire functions 2990 #define expand(l, op) \ 2991 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2992 static int (*indirect_set[])(kmp_user_lock_p, 2993 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 2994 #undef expand 2995 #define expand(l, op) \ 2996 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2997 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2998 KMP_FOREACH_I_LOCK(expand, acquire)}; 2999 #undef expand 3000 3001 // unset/release and test functions 3002 #define expand(l, op) \ 3003 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 3004 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 3005 KMP_FOREACH_I_LOCK(expand, release)}; 3006 static int (*indirect_test[])(kmp_user_lock_p, 3007 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3008 #undef expand 3009 #define expand(l, op) \ 3010 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3011 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3012 KMP_FOREACH_I_LOCK(expand, release)}; 3013 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3014 KMP_FOREACH_I_LOCK(expand, test)}; 3015 #undef expand 3016 3017 // Exposes only one jump tables (*lock or *lock_with_checks). 3018 void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0; 3019 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3020 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3021 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3022 3023 // Lock index table. 3024 kmp_indirect_lock_table_t __kmp_i_lock_table; 3025 3026 // Size of indirect locks. 3027 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3028 3029 // Jump tables for lock accessor/modifier. 3030 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3031 const ident_t *) = {0}; 3032 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3033 kmp_lock_flags_t) = {0}; 3034 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3035 kmp_user_lock_p) = {0}; 3036 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3037 kmp_user_lock_p) = {0}; 3038 3039 // Use different lock pools for different lock types. 3040 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3041 3042 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3043 // the indirect lock table holds the address and type of the allocated indrect 3044 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3045 // full. A destroyed indirect lock object is returned to the reusable pool of 3046 // locks, unique to each lock type. 3047 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3048 kmp_int32 gtid, 3049 kmp_indirect_locktag_t tag) { 3050 kmp_indirect_lock_t *lck; 3051 kmp_lock_index_t idx; 3052 3053 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3054 3055 if (__kmp_indirect_lock_pool[tag] != NULL) { 3056 // Reuse the allocated and destroyed lock object 3057 lck = __kmp_indirect_lock_pool[tag]; 3058 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3059 idx = lck->lock->pool.index; 3060 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3061 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3062 lck)); 3063 } else { 3064 idx = __kmp_i_lock_table.next; 3065 // Check capacity and double the size if it is full 3066 if (idx == __kmp_i_lock_table.size) { 3067 // Double up the space for block pointers 3068 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3069 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 3070 2 * row * sizeof(kmp_indirect_lock_t *)); 3071 KMP_MEMCPY(new_table, __kmp_i_lock_table.table, 3072 row * sizeof(kmp_indirect_lock_t *)); 3073 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3074 __kmp_i_lock_table.table = new_table; 3075 __kmp_free(old_table); 3076 // Allocate new objects in the new blocks 3077 for (int i = row; i < 2 * row; ++i) 3078 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3079 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3080 __kmp_i_lock_table.size = 2 * idx; 3081 } 3082 __kmp_i_lock_table.next++; 3083 lck = KMP_GET_I_LOCK(idx); 3084 // Allocate a new base lock object 3085 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3086 KA_TRACE(20, 3087 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3088 } 3089 3090 __kmp_release_lock(&__kmp_global_lock, gtid); 3091 3092 lck->type = tag; 3093 3094 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3095 *((kmp_lock_index_t *)user_lock) = idx 3096 << 1; // indirect lock word must be even 3097 } else { 3098 *((kmp_indirect_lock_t **)user_lock) = lck; 3099 } 3100 3101 return lck; 3102 } 3103 3104 // User lock lookup for dynamically dispatched locks. 3105 static __forceinline kmp_indirect_lock_t * 3106 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3107 if (__kmp_env_consistency_check) { 3108 kmp_indirect_lock_t *lck = NULL; 3109 if (user_lock == NULL) { 3110 KMP_FATAL(LockIsUninitialized, func); 3111 } 3112 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3113 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3114 if (idx >= __kmp_i_lock_table.size) { 3115 KMP_FATAL(LockIsUninitialized, func); 3116 } 3117 lck = KMP_GET_I_LOCK(idx); 3118 } else { 3119 lck = *((kmp_indirect_lock_t **)user_lock); 3120 } 3121 if (lck == NULL) { 3122 KMP_FATAL(LockIsUninitialized, func); 3123 } 3124 return lck; 3125 } else { 3126 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3127 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3128 } else { 3129 return *((kmp_indirect_lock_t **)user_lock); 3130 } 3131 } 3132 } 3133 3134 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3135 kmp_dyna_lockseq_t seq) { 3136 #if KMP_USE_ADAPTIVE_LOCKS 3137 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3138 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3139 seq = lockseq_queuing; 3140 } 3141 #endif 3142 #if KMP_USE_TSX 3143 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3144 seq = lockseq_queuing; 3145 } 3146 #endif 3147 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3148 kmp_indirect_lock_t *l = 3149 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3150 KMP_I_LOCK_FUNC(l, init)(l->lock); 3151 KA_TRACE( 3152 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3153 seq)); 3154 } 3155 3156 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3157 kmp_uint32 gtid = __kmp_entry_gtid(); 3158 kmp_indirect_lock_t *l = 3159 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3160 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3161 kmp_indirect_locktag_t tag = l->type; 3162 3163 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3164 3165 // Use the base lock's space to keep the pool chain. 3166 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3167 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3168 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3169 } 3170 __kmp_indirect_lock_pool[tag] = l; 3171 3172 __kmp_release_lock(&__kmp_global_lock, gtid); 3173 } 3174 3175 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3176 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3177 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3178 } 3179 3180 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3181 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3182 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3183 } 3184 3185 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3186 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3187 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3188 } 3189 3190 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3191 kmp_int32 gtid) { 3192 kmp_indirect_lock_t *l = 3193 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3194 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3195 } 3196 3197 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3198 kmp_int32 gtid) { 3199 kmp_indirect_lock_t *l = 3200 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3201 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3202 } 3203 3204 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3205 kmp_int32 gtid) { 3206 kmp_indirect_lock_t *l = 3207 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3208 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3209 } 3210 3211 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3212 3213 // This is used only in kmp_error.cpp when consistency checking is on. 3214 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3215 switch (seq) { 3216 case lockseq_tas: 3217 case lockseq_nested_tas: 3218 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3219 #if KMP_USE_FUTEX 3220 case lockseq_futex: 3221 case lockseq_nested_futex: 3222 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3223 #endif 3224 case lockseq_ticket: 3225 case lockseq_nested_ticket: 3226 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3227 case lockseq_queuing: 3228 case lockseq_nested_queuing: 3229 #if KMP_USE_ADAPTIVE_LOCKS 3230 case lockseq_adaptive: 3231 #endif 3232 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3233 case lockseq_drdpa: 3234 case lockseq_nested_drdpa: 3235 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3236 default: 3237 return 0; 3238 } 3239 } 3240 3241 // Initializes data for dynamic user locks. 3242 void __kmp_init_dynamic_user_locks() { 3243 // Initialize jump table for the lock functions 3244 if (__kmp_env_consistency_check) { 3245 __kmp_direct_set = direct_set_check; 3246 __kmp_direct_unset = direct_unset_check; 3247 __kmp_direct_test = direct_test_check; 3248 __kmp_direct_destroy = direct_destroy_check; 3249 __kmp_indirect_set = indirect_set_check; 3250 __kmp_indirect_unset = indirect_unset_check; 3251 __kmp_indirect_test = indirect_test_check; 3252 __kmp_indirect_destroy = indirect_destroy_check; 3253 } else { 3254 __kmp_direct_set = direct_set; 3255 __kmp_direct_unset = direct_unset; 3256 __kmp_direct_test = direct_test; 3257 __kmp_direct_destroy = direct_destroy; 3258 __kmp_indirect_set = indirect_set; 3259 __kmp_indirect_unset = indirect_unset; 3260 __kmp_indirect_test = indirect_test; 3261 __kmp_indirect_destroy = indirect_destroy; 3262 } 3263 // If the user locks have already been initialized, then return. Allow the 3264 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3265 // new lock tables if they have already been allocated. 3266 if (__kmp_init_user_locks) 3267 return; 3268 3269 // Initialize lock index table 3270 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3271 __kmp_i_lock_table.table = 3272 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3273 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3274 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3275 __kmp_i_lock_table.next = 0; 3276 3277 // Indirect lock size 3278 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3279 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3280 #if KMP_USE_ADAPTIVE_LOCKS 3281 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3282 #endif 3283 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3284 #if KMP_USE_TSX 3285 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3286 #endif 3287 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3288 #if KMP_USE_FUTEX 3289 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3290 #endif 3291 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3292 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3293 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3294 3295 // Initialize lock accessor/modifier 3296 #define fill_jumps(table, expand, sep) \ 3297 { \ 3298 table[locktag##sep##ticket] = expand(ticket); \ 3299 table[locktag##sep##queuing] = expand(queuing); \ 3300 table[locktag##sep##drdpa] = expand(drdpa); \ 3301 } 3302 3303 #if KMP_USE_ADAPTIVE_LOCKS 3304 #define fill_table(table, expand) \ 3305 { \ 3306 fill_jumps(table, expand, _); \ 3307 table[locktag_adaptive] = expand(queuing); \ 3308 fill_jumps(table, expand, _nested_); \ 3309 } 3310 #else 3311 #define fill_table(table, expand) \ 3312 { \ 3313 fill_jumps(table, expand, _); \ 3314 fill_jumps(table, expand, _nested_); \ 3315 } 3316 #endif // KMP_USE_ADAPTIVE_LOCKS 3317 3318 #define expand(l) \ 3319 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3320 fill_table(__kmp_indirect_set_location, expand); 3321 #undef expand 3322 #define expand(l) \ 3323 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3324 fill_table(__kmp_indirect_set_flags, expand); 3325 #undef expand 3326 #define expand(l) \ 3327 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3328 fill_table(__kmp_indirect_get_location, expand); 3329 #undef expand 3330 #define expand(l) \ 3331 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3332 fill_table(__kmp_indirect_get_flags, expand); 3333 #undef expand 3334 3335 __kmp_init_user_locks = TRUE; 3336 } 3337 3338 // Clean up the lock table. 3339 void __kmp_cleanup_indirect_user_locks() { 3340 kmp_lock_index_t i; 3341 int k; 3342 3343 // Clean up locks in the pools first (they were already destroyed before going 3344 // into the pools). 3345 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3346 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3347 while (l != NULL) { 3348 kmp_indirect_lock_t *ll = l; 3349 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3350 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3351 ll)); 3352 __kmp_free(ll->lock); 3353 ll->lock = NULL; 3354 } 3355 __kmp_indirect_lock_pool[k] = NULL; 3356 } 3357 // Clean up the remaining undestroyed locks. 3358 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3359 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3360 if (l->lock != NULL) { 3361 // Locks not destroyed explicitly need to be destroyed here. 3362 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3363 KA_TRACE( 3364 20, 3365 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3366 l)); 3367 __kmp_free(l->lock); 3368 } 3369 } 3370 // Free the table 3371 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3372 __kmp_free(__kmp_i_lock_table.table[i]); 3373 __kmp_free(__kmp_i_lock_table.table); 3374 3375 __kmp_init_user_locks = FALSE; 3376 } 3377 3378 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3379 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3380 3381 #else // KMP_USE_DYNAMIC_LOCK 3382 3383 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3384 __kmp_init_tas_lock(lck); 3385 } 3386 3387 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3388 __kmp_init_nested_tas_lock(lck); 3389 } 3390 3391 #if KMP_USE_FUTEX 3392 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3393 __kmp_init_futex_lock(lck); 3394 } 3395 3396 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3397 __kmp_init_nested_futex_lock(lck); 3398 } 3399 #endif 3400 3401 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 3402 return lck == lck->lk.self; 3403 } 3404 3405 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3406 __kmp_init_ticket_lock(lck); 3407 } 3408 3409 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3410 __kmp_init_nested_ticket_lock(lck); 3411 } 3412 3413 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 3414 return lck == lck->lk.initialized; 3415 } 3416 3417 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3418 __kmp_init_queuing_lock(lck); 3419 } 3420 3421 static void 3422 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3423 __kmp_init_nested_queuing_lock(lck); 3424 } 3425 3426 #if KMP_USE_ADAPTIVE_LOCKS 3427 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 3428 __kmp_init_adaptive_lock(lck); 3429 } 3430 #endif 3431 3432 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 3433 return lck == lck->lk.initialized; 3434 } 3435 3436 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3437 __kmp_init_drdpa_lock(lck); 3438 } 3439 3440 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3441 __kmp_init_nested_drdpa_lock(lck); 3442 } 3443 3444 /* user locks 3445 * They are implemented as a table of function pointers which are set to the 3446 * lock functions of the appropriate kind, once that has been determined. */ 3447 3448 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3449 3450 size_t __kmp_base_user_lock_size = 0; 3451 size_t __kmp_user_lock_size = 0; 3452 3453 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3454 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3455 kmp_int32 gtid) = NULL; 3456 3457 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3458 kmp_int32 gtid) = NULL; 3459 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3460 kmp_int32 gtid) = NULL; 3461 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3462 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3463 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3464 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3465 kmp_int32 gtid) = NULL; 3466 3467 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3468 kmp_int32 gtid) = NULL; 3469 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3470 kmp_int32 gtid) = NULL; 3471 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3472 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3473 3474 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3475 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3476 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3477 const ident_t *loc) = NULL; 3478 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3479 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3480 kmp_lock_flags_t flags) = NULL; 3481 3482 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3483 switch (user_lock_kind) { 3484 case lk_default: 3485 default: 3486 KMP_ASSERT(0); 3487 3488 case lk_tas: { 3489 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3490 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3491 3492 __kmp_get_user_lock_owner_ = 3493 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3494 3495 if (__kmp_env_consistency_check) { 3496 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3497 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3498 } else { 3499 KMP_BIND_USER_LOCK(tas); 3500 KMP_BIND_NESTED_USER_LOCK(tas); 3501 } 3502 3503 __kmp_destroy_user_lock_ = 3504 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3505 3506 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3507 3508 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3509 3510 __kmp_set_user_lock_location_ = 3511 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3512 3513 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3514 3515 __kmp_set_user_lock_flags_ = 3516 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3517 } break; 3518 3519 #if KMP_USE_FUTEX 3520 3521 case lk_futex: { 3522 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3523 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3524 3525 __kmp_get_user_lock_owner_ = 3526 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3527 3528 if (__kmp_env_consistency_check) { 3529 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3530 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3531 } else { 3532 KMP_BIND_USER_LOCK(futex); 3533 KMP_BIND_NESTED_USER_LOCK(futex); 3534 } 3535 3536 __kmp_destroy_user_lock_ = 3537 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3538 3539 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3540 3541 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3542 3543 __kmp_set_user_lock_location_ = 3544 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3545 3546 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3547 3548 __kmp_set_user_lock_flags_ = 3549 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3550 } break; 3551 3552 #endif // KMP_USE_FUTEX 3553 3554 case lk_ticket: { 3555 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3556 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3557 3558 __kmp_get_user_lock_owner_ = 3559 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3560 3561 if (__kmp_env_consistency_check) { 3562 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3563 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3564 } else { 3565 KMP_BIND_USER_LOCK(ticket); 3566 KMP_BIND_NESTED_USER_LOCK(ticket); 3567 } 3568 3569 __kmp_destroy_user_lock_ = 3570 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3571 3572 __kmp_is_user_lock_initialized_ = 3573 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3574 3575 __kmp_get_user_lock_location_ = 3576 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3577 3578 __kmp_set_user_lock_location_ = (void (*)( 3579 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3580 3581 __kmp_get_user_lock_flags_ = 3582 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3583 3584 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3585 &__kmp_set_ticket_lock_flags); 3586 } break; 3587 3588 case lk_queuing: { 3589 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3590 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3591 3592 __kmp_get_user_lock_owner_ = 3593 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3594 3595 if (__kmp_env_consistency_check) { 3596 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3597 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3598 } else { 3599 KMP_BIND_USER_LOCK(queuing); 3600 KMP_BIND_NESTED_USER_LOCK(queuing); 3601 } 3602 3603 __kmp_destroy_user_lock_ = 3604 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3605 3606 __kmp_is_user_lock_initialized_ = 3607 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3608 3609 __kmp_get_user_lock_location_ = 3610 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3611 3612 __kmp_set_user_lock_location_ = (void (*)( 3613 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3614 3615 __kmp_get_user_lock_flags_ = 3616 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3617 3618 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3619 &__kmp_set_queuing_lock_flags); 3620 } break; 3621 3622 #if KMP_USE_ADAPTIVE_LOCKS 3623 case lk_adaptive: { 3624 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3625 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3626 3627 __kmp_get_user_lock_owner_ = 3628 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3629 3630 if (__kmp_env_consistency_check) { 3631 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3632 } else { 3633 KMP_BIND_USER_LOCK(adaptive); 3634 } 3635 3636 __kmp_destroy_user_lock_ = 3637 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3638 3639 __kmp_is_user_lock_initialized_ = 3640 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3641 3642 __kmp_get_user_lock_location_ = 3643 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3644 3645 __kmp_set_user_lock_location_ = (void (*)( 3646 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3647 3648 __kmp_get_user_lock_flags_ = 3649 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3650 3651 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3652 &__kmp_set_queuing_lock_flags); 3653 3654 } break; 3655 #endif // KMP_USE_ADAPTIVE_LOCKS 3656 3657 case lk_drdpa: { 3658 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3659 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3660 3661 __kmp_get_user_lock_owner_ = 3662 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3663 3664 if (__kmp_env_consistency_check) { 3665 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3666 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3667 } else { 3668 KMP_BIND_USER_LOCK(drdpa); 3669 KMP_BIND_NESTED_USER_LOCK(drdpa); 3670 } 3671 3672 __kmp_destroy_user_lock_ = 3673 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3674 3675 __kmp_is_user_lock_initialized_ = 3676 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3677 3678 __kmp_get_user_lock_location_ = 3679 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3680 3681 __kmp_set_user_lock_location_ = (void (*)( 3682 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3683 3684 __kmp_get_user_lock_flags_ = 3685 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3686 3687 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3688 &__kmp_set_drdpa_lock_flags); 3689 } break; 3690 } 3691 } 3692 3693 // ---------------------------------------------------------------------------- 3694 // User lock table & lock allocation 3695 3696 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3697 kmp_user_lock_p __kmp_lock_pool = NULL; 3698 3699 // Lock block-allocation support. 3700 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3701 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3702 3703 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3704 // Assume that kmp_global_lock is held upon entry/exit. 3705 kmp_lock_index_t index; 3706 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3707 kmp_lock_index_t size; 3708 kmp_user_lock_p *table; 3709 // Reallocate lock table. 3710 if (__kmp_user_lock_table.allocated == 0) { 3711 size = 1024; 3712 } else { 3713 size = __kmp_user_lock_table.allocated * 2; 3714 } 3715 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3716 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3717 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3718 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3719 // We cannot free the previous table now, since it may be in use by other 3720 // threads. So save the pointer to the previous table in in the first 3721 // element of the new table. All the tables will be organized into a list, 3722 // and could be freed when library shutting down. 3723 __kmp_user_lock_table.table = table; 3724 __kmp_user_lock_table.allocated = size; 3725 } 3726 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3727 __kmp_user_lock_table.allocated); 3728 index = __kmp_user_lock_table.used; 3729 __kmp_user_lock_table.table[index] = lck; 3730 ++__kmp_user_lock_table.used; 3731 return index; 3732 } 3733 3734 static kmp_user_lock_p __kmp_lock_block_allocate() { 3735 // Assume that kmp_global_lock is held upon entry/exit. 3736 static int last_index = 0; 3737 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3738 // Restart the index. 3739 last_index = 0; 3740 // Need to allocate a new block. 3741 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3742 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3743 char *buffer = 3744 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3745 // Set up the new block. 3746 kmp_block_of_locks *new_block = 3747 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3748 new_block->next_block = __kmp_lock_blocks; 3749 new_block->locks = (void *)buffer; 3750 // Publish the new block. 3751 KMP_MB(); 3752 __kmp_lock_blocks = new_block; 3753 } 3754 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3755 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3756 last_index++; 3757 return ret; 3758 } 3759 3760 // Get memory for a lock. It may be freshly allocated memory or reused memory 3761 // from lock pool. 3762 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3763 kmp_lock_flags_t flags) { 3764 kmp_user_lock_p lck; 3765 kmp_lock_index_t index; 3766 KMP_DEBUG_ASSERT(user_lock); 3767 3768 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3769 3770 if (__kmp_lock_pool == NULL) { 3771 // Lock pool is empty. Allocate new memory. 3772 3773 // ANNOTATION: Found no good way to express the syncronisation 3774 // between allocation and usage, so ignore the allocation 3775 ANNOTATE_IGNORE_WRITES_BEGIN(); 3776 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3777 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3778 } else { 3779 lck = __kmp_lock_block_allocate(); 3780 } 3781 ANNOTATE_IGNORE_WRITES_END(); 3782 3783 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3784 // and debugger has info on all allocated locks. 3785 index = __kmp_lock_table_insert(lck); 3786 } else { 3787 // Pick up lock from pool. 3788 lck = __kmp_lock_pool; 3789 index = __kmp_lock_pool->pool.index; 3790 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3791 } 3792 3793 // We could potentially differentiate between nested and regular locks 3794 // here, and do the lock table lookup for regular locks only. 3795 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3796 *((kmp_lock_index_t *)user_lock) = index; 3797 } else { 3798 *((kmp_user_lock_p *)user_lock) = lck; 3799 } 3800 3801 // mark the lock if it is critical section lock. 3802 __kmp_set_user_lock_flags(lck, flags); 3803 3804 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3805 3806 return lck; 3807 } 3808 3809 // Put lock's memory to pool for reusing. 3810 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3811 kmp_user_lock_p lck) { 3812 KMP_DEBUG_ASSERT(user_lock != NULL); 3813 KMP_DEBUG_ASSERT(lck != NULL); 3814 3815 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3816 3817 lck->pool.next = __kmp_lock_pool; 3818 __kmp_lock_pool = lck; 3819 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3820 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3821 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3822 lck->pool.index = index; 3823 } 3824 3825 __kmp_release_lock(&__kmp_global_lock, gtid); 3826 } 3827 3828 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3829 kmp_user_lock_p lck = NULL; 3830 3831 if (__kmp_env_consistency_check) { 3832 if (user_lock == NULL) { 3833 KMP_FATAL(LockIsUninitialized, func); 3834 } 3835 } 3836 3837 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3838 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3839 if (__kmp_env_consistency_check) { 3840 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3841 KMP_FATAL(LockIsUninitialized, func); 3842 } 3843 } 3844 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3845 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3846 lck = __kmp_user_lock_table.table[index]; 3847 } else { 3848 lck = *((kmp_user_lock_p *)user_lock); 3849 } 3850 3851 if (__kmp_env_consistency_check) { 3852 if (lck == NULL) { 3853 KMP_FATAL(LockIsUninitialized, func); 3854 } 3855 } 3856 3857 return lck; 3858 } 3859 3860 void __kmp_cleanup_user_locks(void) { 3861 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3862 // iterating through lock table (it includes all the locks, dead or alive). 3863 __kmp_lock_pool = NULL; 3864 3865 #define IS_CRITICAL(lck) \ 3866 ((__kmp_get_user_lock_flags_ != NULL) && \ 3867 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3868 3869 // Loop through lock table, free all locks. 3870 // Do not free item [0], it is reserved for lock tables list. 3871 // 3872 // FIXME - we are iterating through a list of (pointers to) objects of type 3873 // union kmp_user_lock, but we have no way of knowing whether the base type is 3874 // currently "pool" or whatever the global user lock type is. 3875 // 3876 // We are relying on the fact that for all of the user lock types 3877 // (except "tas"), the first field in the lock struct is the "initialized" 3878 // field, which is set to the address of the lock object itself when 3879 // the lock is initialized. When the union is of type "pool", the 3880 // first field is a pointer to the next object in the free list, which 3881 // will not be the same address as the object itself. 3882 // 3883 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3884 // for "pool" objects on the free list. This must happen as the "location" 3885 // field of real user locks overlaps the "index" field of "pool" objects. 3886 // 3887 // It would be better to run through the free list, and remove all "pool" 3888 // objects from the lock table before executing this loop. However, 3889 // "pool" objects do not always have their index field set (only on 3890 // lin_32e), and I don't want to search the lock table for the address 3891 // of every "pool" object on the free list. 3892 while (__kmp_user_lock_table.used > 1) { 3893 const ident *loc; 3894 3895 // reduce __kmp_user_lock_table.used before freeing the lock, 3896 // so that state of locks is consistent 3897 kmp_user_lock_p lck = 3898 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3899 3900 if ((__kmp_is_user_lock_initialized_ != NULL) && 3901 (*__kmp_is_user_lock_initialized_)(lck)) { 3902 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3903 // it is NOT a critical section (user is not responsible for destroying 3904 // criticals) AND we know source location to report. 3905 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3906 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3907 (loc->psource != NULL)) { 3908 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3909 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3910 __kmp_str_loc_free(&str_loc); 3911 } 3912 3913 #ifdef KMP_DEBUG 3914 if (IS_CRITICAL(lck)) { 3915 KA_TRACE( 3916 20, 3917 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3918 lck, *(void **)lck)); 3919 } else { 3920 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3921 *(void **)lck)); 3922 } 3923 #endif // KMP_DEBUG 3924 3925 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3926 __kmp_destroy_user_lock(lck); 3927 } 3928 3929 // Free the lock if block allocation of locks is not used. 3930 if (__kmp_lock_blocks == NULL) { 3931 __kmp_free(lck); 3932 } 3933 } 3934 3935 #undef IS_CRITICAL 3936 3937 // delete lock table(s). 3938 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3939 __kmp_user_lock_table.table = NULL; 3940 __kmp_user_lock_table.allocated = 0; 3941 3942 while (table_ptr != NULL) { 3943 // In the first element we saved the pointer to the previous 3944 // (smaller) lock table. 3945 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3946 __kmp_free(table_ptr); 3947 table_ptr = next; 3948 } 3949 3950 // Free buffers allocated for blocks of locks. 3951 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3952 __kmp_lock_blocks = NULL; 3953 3954 while (block_ptr != NULL) { 3955 kmp_block_of_locks_t *next = block_ptr->next_block; 3956 __kmp_free(block_ptr->locks); 3957 // *block_ptr itself was allocated at the end of the locks vector. 3958 block_ptr = next; 3959 } 3960 3961 TCW_4(__kmp_init_user_locks, FALSE); 3962 } 3963 3964 #endif // KMP_USE_DYNAMIC_LOCK 3965