1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include <stddef.h> 15 #include <atomic> 16 17 #include "kmp.h" 18 #include "kmp_i18n.h" 19 #include "kmp_io.h" 20 #include "kmp_itt.h" 21 #include "kmp_lock.h" 22 23 #include "tsan_annotations.h" 24 25 #if KMP_USE_FUTEX 26 #include <sys/syscall.h> 27 #include <unistd.h> 28 // We should really include <futex.h>, but that causes compatibility problems on 29 // different Linux* OS distributions that either require that you include (or 30 // break when you try to include) <pci/types.h>. Since all we need is the two 31 // macros below (which are part of the kernel ABI, so can't change) we just 32 // define the constants here and don't include <futex.h> 33 #ifndef FUTEX_WAIT 34 #define FUTEX_WAIT 0 35 #endif 36 #ifndef FUTEX_WAKE 37 #define FUTEX_WAKE 1 38 #endif 39 #endif 40 41 /* Implement spin locks for internal library use. */ 42 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 43 44 void __kmp_validate_locks(void) { 45 int i; 46 kmp_uint32 x, y; 47 48 /* Check to make sure unsigned arithmetic does wraps properly */ 49 x = ~((kmp_uint32)0) - 2; 50 y = x - 2; 51 52 for (i = 0; i < 8; ++i, ++x, ++y) { 53 kmp_uint32 z = (x - y); 54 KMP_ASSERT(z == 2); 55 } 56 57 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 58 } 59 60 /* ------------------------------------------------------------------------ */ 61 /* test and set locks */ 62 63 // For the non-nested locks, we can only assume that the first 4 bytes were 64 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 65 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 66 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 67 // 68 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 69 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 70 71 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 72 return KMP_LOCK_STRIP(TCR_4(lck->lk.poll)) - 1; 73 } 74 75 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 76 return lck->lk.depth_locked != -1; 77 } 78 79 __forceinline static int 80 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 81 KMP_MB(); 82 83 #ifdef USE_LOCK_PROFILE 84 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 85 if ((curr != 0) && (curr != gtid + 1)) 86 __kmp_printf("LOCK CONTENTION: %p\n", lck); 87 /* else __kmp_printf( "." );*/ 88 #endif /* USE_LOCK_PROFILE */ 89 90 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 91 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 92 KMP_LOCK_BUSY(gtid + 1, tas))) { 93 KMP_FSYNC_ACQUIRED(lck); 94 return KMP_LOCK_ACQUIRED_FIRST; 95 } 96 97 kmp_uint32 spins; 98 KMP_FSYNC_PREPARE(lck); 99 KMP_INIT_YIELD(spins); 100 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 101 KMP_YIELD(TRUE); 102 } else { 103 KMP_YIELD_SPIN(spins); 104 } 105 106 kmp_backoff_t backoff = __kmp_spin_backoff_params; 107 while ((lck->lk.poll != KMP_LOCK_FREE(tas)) || 108 (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 109 KMP_LOCK_BUSY(gtid + 1, tas)))) { 110 111 __kmp_spin_backoff(&backoff); 112 if (TCR_4(__kmp_nth) > 113 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 114 KMP_YIELD(TRUE); 115 } else { 116 KMP_YIELD_SPIN(spins); 117 } 118 } 119 KMP_FSYNC_ACQUIRED(lck); 120 return KMP_LOCK_ACQUIRED_FIRST; 121 } 122 123 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 124 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 125 ANNOTATE_TAS_ACQUIRED(lck); 126 return retval; 127 } 128 129 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 130 kmp_int32 gtid) { 131 char const *const func = "omp_set_lock"; 132 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 133 __kmp_is_tas_lock_nestable(lck)) { 134 KMP_FATAL(LockNestableUsedAsSimple, func); 135 } 136 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 137 KMP_FATAL(LockIsAlreadyOwned, func); 138 } 139 return __kmp_acquire_tas_lock(lck, gtid); 140 } 141 142 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 143 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 144 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 145 KMP_LOCK_BUSY(gtid + 1, tas))) { 146 KMP_FSYNC_ACQUIRED(lck); 147 return TRUE; 148 } 149 return FALSE; 150 } 151 152 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 153 kmp_int32 gtid) { 154 char const *const func = "omp_test_lock"; 155 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 156 __kmp_is_tas_lock_nestable(lck)) { 157 KMP_FATAL(LockNestableUsedAsSimple, func); 158 } 159 return __kmp_test_tas_lock(lck, gtid); 160 } 161 162 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 163 KMP_MB(); /* Flush all pending memory write invalidates. */ 164 165 KMP_FSYNC_RELEASING(lck); 166 ANNOTATE_TAS_RELEASED(lck); 167 KMP_ST_REL32(&(lck->lk.poll), KMP_LOCK_FREE(tas)); 168 KMP_MB(); /* Flush all pending memory write invalidates. */ 169 170 KMP_YIELD(TCR_4(__kmp_nth) > 171 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 172 return KMP_LOCK_RELEASED; 173 } 174 175 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 176 kmp_int32 gtid) { 177 char const *const func = "omp_unset_lock"; 178 KMP_MB(); /* in case another processor initialized lock */ 179 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 180 __kmp_is_tas_lock_nestable(lck)) { 181 KMP_FATAL(LockNestableUsedAsSimple, func); 182 } 183 if (__kmp_get_tas_lock_owner(lck) == -1) { 184 KMP_FATAL(LockUnsettingFree, func); 185 } 186 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 187 (__kmp_get_tas_lock_owner(lck) != gtid)) { 188 KMP_FATAL(LockUnsettingSetByAnother, func); 189 } 190 return __kmp_release_tas_lock(lck, gtid); 191 } 192 193 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 194 TCW_4(lck->lk.poll, KMP_LOCK_FREE(tas)); 195 } 196 197 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 198 __kmp_init_tas_lock(lck); 199 } 200 201 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 202 203 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 204 char const *const func = "omp_destroy_lock"; 205 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 206 __kmp_is_tas_lock_nestable(lck)) { 207 KMP_FATAL(LockNestableUsedAsSimple, func); 208 } 209 if (__kmp_get_tas_lock_owner(lck) != -1) { 210 KMP_FATAL(LockStillOwned, func); 211 } 212 __kmp_destroy_tas_lock(lck); 213 } 214 215 // nested test and set locks 216 217 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 218 KMP_DEBUG_ASSERT(gtid >= 0); 219 220 if (__kmp_get_tas_lock_owner(lck) == gtid) { 221 lck->lk.depth_locked += 1; 222 return KMP_LOCK_ACQUIRED_NEXT; 223 } else { 224 __kmp_acquire_tas_lock_timed_template(lck, gtid); 225 ANNOTATE_TAS_ACQUIRED(lck); 226 lck->lk.depth_locked = 1; 227 return KMP_LOCK_ACQUIRED_FIRST; 228 } 229 } 230 231 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 232 kmp_int32 gtid) { 233 char const *const func = "omp_set_nest_lock"; 234 if (!__kmp_is_tas_lock_nestable(lck)) { 235 KMP_FATAL(LockSimpleUsedAsNestable, func); 236 } 237 return __kmp_acquire_nested_tas_lock(lck, gtid); 238 } 239 240 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 241 int retval; 242 243 KMP_DEBUG_ASSERT(gtid >= 0); 244 245 if (__kmp_get_tas_lock_owner(lck) == gtid) { 246 retval = ++lck->lk.depth_locked; 247 } else if (!__kmp_test_tas_lock(lck, gtid)) { 248 retval = 0; 249 } else { 250 KMP_MB(); 251 retval = lck->lk.depth_locked = 1; 252 } 253 return retval; 254 } 255 256 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 257 kmp_int32 gtid) { 258 char const *const func = "omp_test_nest_lock"; 259 if (!__kmp_is_tas_lock_nestable(lck)) { 260 KMP_FATAL(LockSimpleUsedAsNestable, func); 261 } 262 return __kmp_test_nested_tas_lock(lck, gtid); 263 } 264 265 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 266 KMP_DEBUG_ASSERT(gtid >= 0); 267 268 KMP_MB(); 269 if (--(lck->lk.depth_locked) == 0) { 270 __kmp_release_tas_lock(lck, gtid); 271 return KMP_LOCK_RELEASED; 272 } 273 return KMP_LOCK_STILL_HELD; 274 } 275 276 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 277 kmp_int32 gtid) { 278 char const *const func = "omp_unset_nest_lock"; 279 KMP_MB(); /* in case another processor initialized lock */ 280 if (!__kmp_is_tas_lock_nestable(lck)) { 281 KMP_FATAL(LockSimpleUsedAsNestable, func); 282 } 283 if (__kmp_get_tas_lock_owner(lck) == -1) { 284 KMP_FATAL(LockUnsettingFree, func); 285 } 286 if (__kmp_get_tas_lock_owner(lck) != gtid) { 287 KMP_FATAL(LockUnsettingSetByAnother, func); 288 } 289 return __kmp_release_nested_tas_lock(lck, gtid); 290 } 291 292 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 293 __kmp_init_tas_lock(lck); 294 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 295 } 296 297 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 298 __kmp_init_nested_tas_lock(lck); 299 } 300 301 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 302 __kmp_destroy_tas_lock(lck); 303 lck->lk.depth_locked = 0; 304 } 305 306 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 307 char const *const func = "omp_destroy_nest_lock"; 308 if (!__kmp_is_tas_lock_nestable(lck)) { 309 KMP_FATAL(LockSimpleUsedAsNestable, func); 310 } 311 if (__kmp_get_tas_lock_owner(lck) != -1) { 312 KMP_FATAL(LockStillOwned, func); 313 } 314 __kmp_destroy_nested_tas_lock(lck); 315 } 316 317 #if KMP_USE_FUTEX 318 319 /* ------------------------------------------------------------------------ */ 320 /* futex locks */ 321 322 // futex locks are really just test and set locks, with a different method 323 // of handling contention. They take the same amount of space as test and 324 // set locks, and are allocated the same way (i.e. use the area allocated by 325 // the compiler for non-nested locks / allocate nested locks on the heap). 326 327 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 328 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 329 } 330 331 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 332 return lck->lk.depth_locked != -1; 333 } 334 335 __forceinline static int 336 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 337 kmp_int32 gtid_code = (gtid + 1) << 1; 338 339 KMP_MB(); 340 341 #ifdef USE_LOCK_PROFILE 342 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 343 if ((curr != 0) && (curr != gtid_code)) 344 __kmp_printf("LOCK CONTENTION: %p\n", lck); 345 /* else __kmp_printf( "." );*/ 346 #endif /* USE_LOCK_PROFILE */ 347 348 KMP_FSYNC_PREPARE(lck); 349 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 350 lck, lck->lk.poll, gtid)); 351 352 kmp_int32 poll_val; 353 354 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 355 &(lck->lk.poll), KMP_LOCK_FREE(futex), 356 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 357 358 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 359 KA_TRACE( 360 1000, 361 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 362 lck, gtid, poll_val, cond)); 363 364 // NOTE: if you try to use the following condition for this branch 365 // 366 // if ( poll_val & 1 == 0 ) 367 // 368 // Then the 12.0 compiler has a bug where the following block will 369 // always be skipped, regardless of the value of the LSB of poll_val. 370 if (!cond) { 371 // Try to set the lsb in the poll to indicate to the owner 372 // thread that they need to wake this thread up. 373 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 374 poll_val | KMP_LOCK_BUSY(1, futex))) { 375 KA_TRACE( 376 1000, 377 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 378 lck, lck->lk.poll, gtid)); 379 continue; 380 } 381 poll_val |= KMP_LOCK_BUSY(1, futex); 382 383 KA_TRACE(1000, 384 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 385 lck->lk.poll, gtid)); 386 } 387 388 KA_TRACE( 389 1000, 390 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 391 lck, gtid, poll_val)); 392 393 kmp_int32 rc; 394 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 395 NULL, 0)) != 0) { 396 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 397 "failed (rc=%d errno=%d)\n", 398 lck, gtid, poll_val, rc, errno)); 399 continue; 400 } 401 402 KA_TRACE(1000, 403 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 404 lck, gtid, poll_val)); 405 // This thread has now done a successful futex wait call and was entered on 406 // the OS futex queue. We must now perform a futex wake call when releasing 407 // the lock, as we have no idea how many other threads are in the queue. 408 gtid_code |= 1; 409 } 410 411 KMP_FSYNC_ACQUIRED(lck); 412 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 413 lck->lk.poll, gtid)); 414 return KMP_LOCK_ACQUIRED_FIRST; 415 } 416 417 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 418 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 419 ANNOTATE_FUTEX_ACQUIRED(lck); 420 return retval; 421 } 422 423 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 424 kmp_int32 gtid) { 425 char const *const func = "omp_set_lock"; 426 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 427 __kmp_is_futex_lock_nestable(lck)) { 428 KMP_FATAL(LockNestableUsedAsSimple, func); 429 } 430 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 431 KMP_FATAL(LockIsAlreadyOwned, func); 432 } 433 return __kmp_acquire_futex_lock(lck, gtid); 434 } 435 436 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 437 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 438 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 439 KMP_FSYNC_ACQUIRED(lck); 440 return TRUE; 441 } 442 return FALSE; 443 } 444 445 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 446 kmp_int32 gtid) { 447 char const *const func = "omp_test_lock"; 448 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 449 __kmp_is_futex_lock_nestable(lck)) { 450 KMP_FATAL(LockNestableUsedAsSimple, func); 451 } 452 return __kmp_test_futex_lock(lck, gtid); 453 } 454 455 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 456 KMP_MB(); /* Flush all pending memory write invalidates. */ 457 458 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 459 lck, lck->lk.poll, gtid)); 460 461 KMP_FSYNC_RELEASING(lck); 462 ANNOTATE_FUTEX_RELEASED(lck); 463 464 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 465 466 KA_TRACE(1000, 467 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 468 lck, gtid, poll_val)); 469 470 if (KMP_LOCK_STRIP(poll_val) & 1) { 471 KA_TRACE(1000, 472 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 473 lck, gtid)); 474 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 475 NULL, NULL, 0); 476 } 477 478 KMP_MB(); /* Flush all pending memory write invalidates. */ 479 480 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 481 lck->lk.poll, gtid)); 482 483 KMP_YIELD(TCR_4(__kmp_nth) > 484 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 485 return KMP_LOCK_RELEASED; 486 } 487 488 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 489 kmp_int32 gtid) { 490 char const *const func = "omp_unset_lock"; 491 KMP_MB(); /* in case another processor initialized lock */ 492 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 493 __kmp_is_futex_lock_nestable(lck)) { 494 KMP_FATAL(LockNestableUsedAsSimple, func); 495 } 496 if (__kmp_get_futex_lock_owner(lck) == -1) { 497 KMP_FATAL(LockUnsettingFree, func); 498 } 499 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 500 (__kmp_get_futex_lock_owner(lck) != gtid)) { 501 KMP_FATAL(LockUnsettingSetByAnother, func); 502 } 503 return __kmp_release_futex_lock(lck, gtid); 504 } 505 506 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 507 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 508 } 509 510 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 511 __kmp_init_futex_lock(lck); 512 } 513 514 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 515 516 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 517 char const *const func = "omp_destroy_lock"; 518 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 519 __kmp_is_futex_lock_nestable(lck)) { 520 KMP_FATAL(LockNestableUsedAsSimple, func); 521 } 522 if (__kmp_get_futex_lock_owner(lck) != -1) { 523 KMP_FATAL(LockStillOwned, func); 524 } 525 __kmp_destroy_futex_lock(lck); 526 } 527 528 // nested futex locks 529 530 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 531 KMP_DEBUG_ASSERT(gtid >= 0); 532 533 if (__kmp_get_futex_lock_owner(lck) == gtid) { 534 lck->lk.depth_locked += 1; 535 return KMP_LOCK_ACQUIRED_NEXT; 536 } else { 537 __kmp_acquire_futex_lock_timed_template(lck, gtid); 538 ANNOTATE_FUTEX_ACQUIRED(lck); 539 lck->lk.depth_locked = 1; 540 return KMP_LOCK_ACQUIRED_FIRST; 541 } 542 } 543 544 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 545 kmp_int32 gtid) { 546 char const *const func = "omp_set_nest_lock"; 547 if (!__kmp_is_futex_lock_nestable(lck)) { 548 KMP_FATAL(LockSimpleUsedAsNestable, func); 549 } 550 return __kmp_acquire_nested_futex_lock(lck, gtid); 551 } 552 553 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 554 int retval; 555 556 KMP_DEBUG_ASSERT(gtid >= 0); 557 558 if (__kmp_get_futex_lock_owner(lck) == gtid) { 559 retval = ++lck->lk.depth_locked; 560 } else if (!__kmp_test_futex_lock(lck, gtid)) { 561 retval = 0; 562 } else { 563 KMP_MB(); 564 retval = lck->lk.depth_locked = 1; 565 } 566 return retval; 567 } 568 569 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 570 kmp_int32 gtid) { 571 char const *const func = "omp_test_nest_lock"; 572 if (!__kmp_is_futex_lock_nestable(lck)) { 573 KMP_FATAL(LockSimpleUsedAsNestable, func); 574 } 575 return __kmp_test_nested_futex_lock(lck, gtid); 576 } 577 578 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 579 KMP_DEBUG_ASSERT(gtid >= 0); 580 581 KMP_MB(); 582 if (--(lck->lk.depth_locked) == 0) { 583 __kmp_release_futex_lock(lck, gtid); 584 return KMP_LOCK_RELEASED; 585 } 586 return KMP_LOCK_STILL_HELD; 587 } 588 589 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 590 kmp_int32 gtid) { 591 char const *const func = "omp_unset_nest_lock"; 592 KMP_MB(); /* in case another processor initialized lock */ 593 if (!__kmp_is_futex_lock_nestable(lck)) { 594 KMP_FATAL(LockSimpleUsedAsNestable, func); 595 } 596 if (__kmp_get_futex_lock_owner(lck) == -1) { 597 KMP_FATAL(LockUnsettingFree, func); 598 } 599 if (__kmp_get_futex_lock_owner(lck) != gtid) { 600 KMP_FATAL(LockUnsettingSetByAnother, func); 601 } 602 return __kmp_release_nested_futex_lock(lck, gtid); 603 } 604 605 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 606 __kmp_init_futex_lock(lck); 607 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 608 } 609 610 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 611 __kmp_init_nested_futex_lock(lck); 612 } 613 614 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 615 __kmp_destroy_futex_lock(lck); 616 lck->lk.depth_locked = 0; 617 } 618 619 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 620 char const *const func = "omp_destroy_nest_lock"; 621 if (!__kmp_is_futex_lock_nestable(lck)) { 622 KMP_FATAL(LockSimpleUsedAsNestable, func); 623 } 624 if (__kmp_get_futex_lock_owner(lck) != -1) { 625 KMP_FATAL(LockStillOwned, func); 626 } 627 __kmp_destroy_nested_futex_lock(lck); 628 } 629 630 #endif // KMP_USE_FUTEX 631 632 /* ------------------------------------------------------------------------ */ 633 /* ticket (bakery) locks */ 634 635 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 636 return std::atomic_load_explicit(&lck->lk.owner_id, 637 std::memory_order_relaxed) - 638 1; 639 } 640 641 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 642 return std::atomic_load_explicit(&lck->lk.depth_locked, 643 std::memory_order_relaxed) != -1; 644 } 645 646 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 647 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 648 std::memory_order_acquire) == my_ticket; 649 } 650 651 __forceinline static int 652 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 653 kmp_int32 gtid) { 654 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 655 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 656 657 #ifdef USE_LOCK_PROFILE 658 if (std::atomic_load_explicit(&lck->lk.now_serving, 659 std::memory_order_relaxed) != my_ticket) 660 __kmp_printf("LOCK CONTENTION: %p\n", lck); 661 /* else __kmp_printf( "." );*/ 662 #endif /* USE_LOCK_PROFILE */ 663 664 if (std::atomic_load_explicit(&lck->lk.now_serving, 665 std::memory_order_acquire) == my_ticket) { 666 return KMP_LOCK_ACQUIRED_FIRST; 667 } 668 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 669 return KMP_LOCK_ACQUIRED_FIRST; 670 } 671 672 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 673 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 674 ANNOTATE_TICKET_ACQUIRED(lck); 675 return retval; 676 } 677 678 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 679 kmp_int32 gtid) { 680 char const *const func = "omp_set_lock"; 681 682 if (!std::atomic_load_explicit(&lck->lk.initialized, 683 std::memory_order_relaxed)) { 684 KMP_FATAL(LockIsUninitialized, func); 685 } 686 if (lck->lk.self != lck) { 687 KMP_FATAL(LockIsUninitialized, func); 688 } 689 if (__kmp_is_ticket_lock_nestable(lck)) { 690 KMP_FATAL(LockNestableUsedAsSimple, func); 691 } 692 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 693 KMP_FATAL(LockIsAlreadyOwned, func); 694 } 695 696 __kmp_acquire_ticket_lock(lck, gtid); 697 698 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 699 std::memory_order_relaxed); 700 return KMP_LOCK_ACQUIRED_FIRST; 701 } 702 703 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 704 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 705 std::memory_order_relaxed); 706 707 if (std::atomic_load_explicit(&lck->lk.now_serving, 708 std::memory_order_relaxed) == my_ticket) { 709 kmp_uint32 next_ticket = my_ticket + 1; 710 if (std::atomic_compare_exchange_strong_explicit( 711 &lck->lk.next_ticket, &my_ticket, next_ticket, 712 std::memory_order_acquire, std::memory_order_acquire)) { 713 return TRUE; 714 } 715 } 716 return FALSE; 717 } 718 719 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 720 kmp_int32 gtid) { 721 char const *const func = "omp_test_lock"; 722 723 if (!std::atomic_load_explicit(&lck->lk.initialized, 724 std::memory_order_relaxed)) { 725 KMP_FATAL(LockIsUninitialized, func); 726 } 727 if (lck->lk.self != lck) { 728 KMP_FATAL(LockIsUninitialized, func); 729 } 730 if (__kmp_is_ticket_lock_nestable(lck)) { 731 KMP_FATAL(LockNestableUsedAsSimple, func); 732 } 733 734 int retval = __kmp_test_ticket_lock(lck, gtid); 735 736 if (retval) { 737 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 738 std::memory_order_relaxed); 739 } 740 return retval; 741 } 742 743 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 744 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 745 std::memory_order_relaxed) - 746 std::atomic_load_explicit(&lck->lk.now_serving, 747 std::memory_order_relaxed); 748 749 ANNOTATE_TICKET_RELEASED(lck); 750 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 751 std::memory_order_release); 752 753 KMP_YIELD(distance > 754 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 755 return KMP_LOCK_RELEASED; 756 } 757 758 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 759 kmp_int32 gtid) { 760 char const *const func = "omp_unset_lock"; 761 762 if (!std::atomic_load_explicit(&lck->lk.initialized, 763 std::memory_order_relaxed)) { 764 KMP_FATAL(LockIsUninitialized, func); 765 } 766 if (lck->lk.self != lck) { 767 KMP_FATAL(LockIsUninitialized, func); 768 } 769 if (__kmp_is_ticket_lock_nestable(lck)) { 770 KMP_FATAL(LockNestableUsedAsSimple, func); 771 } 772 if (__kmp_get_ticket_lock_owner(lck) == -1) { 773 KMP_FATAL(LockUnsettingFree, func); 774 } 775 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 776 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 777 KMP_FATAL(LockUnsettingSetByAnother, func); 778 } 779 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 780 return __kmp_release_ticket_lock(lck, gtid); 781 } 782 783 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 784 lck->lk.location = NULL; 785 lck->lk.self = lck; 786 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 787 std::memory_order_relaxed); 788 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 789 std::memory_order_relaxed); 790 std::atomic_store_explicit( 791 &lck->lk.owner_id, 0, 792 std::memory_order_relaxed); // no thread owns the lock. 793 std::atomic_store_explicit( 794 &lck->lk.depth_locked, -1, 795 std::memory_order_relaxed); // -1 => not a nested lock. 796 std::atomic_store_explicit(&lck->lk.initialized, true, 797 std::memory_order_release); 798 } 799 800 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 801 __kmp_init_ticket_lock(lck); 802 } 803 804 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 805 std::atomic_store_explicit(&lck->lk.initialized, false, 806 std::memory_order_release); 807 lck->lk.self = NULL; 808 lck->lk.location = NULL; 809 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 810 std::memory_order_relaxed); 811 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 812 std::memory_order_relaxed); 813 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 814 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 815 std::memory_order_relaxed); 816 } 817 818 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 819 char const *const func = "omp_destroy_lock"; 820 821 if (!std::atomic_load_explicit(&lck->lk.initialized, 822 std::memory_order_relaxed)) { 823 KMP_FATAL(LockIsUninitialized, func); 824 } 825 if (lck->lk.self != lck) { 826 KMP_FATAL(LockIsUninitialized, func); 827 } 828 if (__kmp_is_ticket_lock_nestable(lck)) { 829 KMP_FATAL(LockNestableUsedAsSimple, func); 830 } 831 if (__kmp_get_ticket_lock_owner(lck) != -1) { 832 KMP_FATAL(LockStillOwned, func); 833 } 834 __kmp_destroy_ticket_lock(lck); 835 } 836 837 // nested ticket locks 838 839 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 840 KMP_DEBUG_ASSERT(gtid >= 0); 841 842 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 843 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 844 std::memory_order_relaxed); 845 return KMP_LOCK_ACQUIRED_NEXT; 846 } else { 847 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 848 ANNOTATE_TICKET_ACQUIRED(lck); 849 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 850 std::memory_order_relaxed); 851 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 852 std::memory_order_relaxed); 853 return KMP_LOCK_ACQUIRED_FIRST; 854 } 855 } 856 857 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 858 kmp_int32 gtid) { 859 char const *const func = "omp_set_nest_lock"; 860 861 if (!std::atomic_load_explicit(&lck->lk.initialized, 862 std::memory_order_relaxed)) { 863 KMP_FATAL(LockIsUninitialized, func); 864 } 865 if (lck->lk.self != lck) { 866 KMP_FATAL(LockIsUninitialized, func); 867 } 868 if (!__kmp_is_ticket_lock_nestable(lck)) { 869 KMP_FATAL(LockSimpleUsedAsNestable, func); 870 } 871 return __kmp_acquire_nested_ticket_lock(lck, gtid); 872 } 873 874 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 875 int retval; 876 877 KMP_DEBUG_ASSERT(gtid >= 0); 878 879 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 880 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 881 std::memory_order_relaxed) + 882 1; 883 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 884 retval = 0; 885 } else { 886 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 887 std::memory_order_relaxed); 888 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 889 std::memory_order_relaxed); 890 retval = 1; 891 } 892 return retval; 893 } 894 895 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 896 kmp_int32 gtid) { 897 char const *const func = "omp_test_nest_lock"; 898 899 if (!std::atomic_load_explicit(&lck->lk.initialized, 900 std::memory_order_relaxed)) { 901 KMP_FATAL(LockIsUninitialized, func); 902 } 903 if (lck->lk.self != lck) { 904 KMP_FATAL(LockIsUninitialized, func); 905 } 906 if (!__kmp_is_ticket_lock_nestable(lck)) { 907 KMP_FATAL(LockSimpleUsedAsNestable, func); 908 } 909 return __kmp_test_nested_ticket_lock(lck, gtid); 910 } 911 912 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 913 KMP_DEBUG_ASSERT(gtid >= 0); 914 915 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 916 std::memory_order_relaxed) - 917 1) == 0) { 918 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 919 __kmp_release_ticket_lock(lck, gtid); 920 return KMP_LOCK_RELEASED; 921 } 922 return KMP_LOCK_STILL_HELD; 923 } 924 925 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 926 kmp_int32 gtid) { 927 char const *const func = "omp_unset_nest_lock"; 928 929 if (!std::atomic_load_explicit(&lck->lk.initialized, 930 std::memory_order_relaxed)) { 931 KMP_FATAL(LockIsUninitialized, func); 932 } 933 if (lck->lk.self != lck) { 934 KMP_FATAL(LockIsUninitialized, func); 935 } 936 if (!__kmp_is_ticket_lock_nestable(lck)) { 937 KMP_FATAL(LockSimpleUsedAsNestable, func); 938 } 939 if (__kmp_get_ticket_lock_owner(lck) == -1) { 940 KMP_FATAL(LockUnsettingFree, func); 941 } 942 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 943 KMP_FATAL(LockUnsettingSetByAnother, func); 944 } 945 return __kmp_release_nested_ticket_lock(lck, gtid); 946 } 947 948 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 949 __kmp_init_ticket_lock(lck); 950 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 951 std::memory_order_relaxed); 952 // >= 0 for nestable locks, -1 for simple locks 953 } 954 955 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 956 __kmp_init_nested_ticket_lock(lck); 957 } 958 959 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 960 __kmp_destroy_ticket_lock(lck); 961 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 962 std::memory_order_relaxed); 963 } 964 965 static void 966 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 967 char const *const func = "omp_destroy_nest_lock"; 968 969 if (!std::atomic_load_explicit(&lck->lk.initialized, 970 std::memory_order_relaxed)) { 971 KMP_FATAL(LockIsUninitialized, func); 972 } 973 if (lck->lk.self != lck) { 974 KMP_FATAL(LockIsUninitialized, func); 975 } 976 if (!__kmp_is_ticket_lock_nestable(lck)) { 977 KMP_FATAL(LockSimpleUsedAsNestable, func); 978 } 979 if (__kmp_get_ticket_lock_owner(lck) != -1) { 980 KMP_FATAL(LockStillOwned, func); 981 } 982 __kmp_destroy_nested_ticket_lock(lck); 983 } 984 985 // access functions to fields which don't exist for all lock kinds. 986 987 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 988 return std::atomic_load_explicit(&lck->lk.initialized, 989 std::memory_order_relaxed) && 990 (lck->lk.self == lck); 991 } 992 993 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 994 return lck->lk.location; 995 } 996 997 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 998 const ident_t *loc) { 999 lck->lk.location = loc; 1000 } 1001 1002 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 1003 return lck->lk.flags; 1004 } 1005 1006 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 1007 kmp_lock_flags_t flags) { 1008 lck->lk.flags = flags; 1009 } 1010 1011 /* ------------------------------------------------------------------------ */ 1012 /* queuing locks */ 1013 1014 /* First the states 1015 (head,tail) = 0, 0 means lock is unheld, nobody on queue 1016 UINT_MAX or -1, 0 means lock is held, nobody on queue 1017 h, h means lock held or about to transition, 1018 1 element on queue 1019 h, t h <> t, means lock is held or about to 1020 transition, >1 elements on queue 1021 1022 Now the transitions 1023 Acquire(0,0) = -1 ,0 1024 Release(0,0) = Error 1025 Acquire(-1,0) = h ,h h > 0 1026 Release(-1,0) = 0 ,0 1027 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1028 Release(h,h) = -1 ,0 h > 0 1029 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1030 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1031 1032 And pictorially 1033 1034 +-----+ 1035 | 0, 0|------- release -------> Error 1036 +-----+ 1037 | ^ 1038 acquire| |release 1039 | | 1040 | | 1041 v | 1042 +-----+ 1043 |-1, 0| 1044 +-----+ 1045 | ^ 1046 acquire| |release 1047 | | 1048 | | 1049 v | 1050 +-----+ 1051 | h, h| 1052 +-----+ 1053 | ^ 1054 acquire| |release 1055 | | 1056 | | 1057 v | 1058 +-----+ 1059 | h, t|----- acquire, release loopback ---+ 1060 +-----+ | 1061 ^ | 1062 | | 1063 +------------------------------------+ 1064 */ 1065 1066 #ifdef DEBUG_QUEUING_LOCKS 1067 1068 /* Stuff for circular trace buffer */ 1069 #define TRACE_BUF_ELE 1024 1070 static char traces[TRACE_BUF_ELE][128] = {0}; 1071 static int tc = 0; 1072 #define TRACE_LOCK(X, Y) \ 1073 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1074 #define TRACE_LOCK_T(X, Y, Z) \ 1075 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1076 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1077 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1078 Z, Q); 1079 1080 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1081 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1082 kmp_int32 tail_id) { 1083 kmp_int32 t, i; 1084 1085 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1086 1087 i = tc % TRACE_BUF_ELE; 1088 __kmp_printf_no_lock("%s\n", traces[i]); 1089 i = (i + 1) % TRACE_BUF_ELE; 1090 while (i != (tc % TRACE_BUF_ELE)) { 1091 __kmp_printf_no_lock("%s", traces[i]); 1092 i = (i + 1) % TRACE_BUF_ELE; 1093 } 1094 __kmp_printf_no_lock("\n"); 1095 1096 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1097 "next_wait:%d, head_id:%d, tail_id:%d\n", 1098 gtid + 1, this_thr->th.th_spin_here, 1099 this_thr->th.th_next_waiting, head_id, tail_id); 1100 1101 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1102 1103 if (lck->lk.head_id >= 1) { 1104 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1105 while (t > 0) { 1106 __kmp_printf_no_lock("-> %d ", t); 1107 t = __kmp_threads[t - 1]->th.th_next_waiting; 1108 } 1109 } 1110 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1111 __kmp_printf_no_lock("\n\n"); 1112 } 1113 1114 #endif /* DEBUG_QUEUING_LOCKS */ 1115 1116 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1117 return TCR_4(lck->lk.owner_id) - 1; 1118 } 1119 1120 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1121 return lck->lk.depth_locked != -1; 1122 } 1123 1124 /* Acquire a lock using a the queuing lock implementation */ 1125 template <bool takeTime> 1126 /* [TLW] The unused template above is left behind because of what BEB believes 1127 is a potential compiler problem with __forceinline. */ 1128 __forceinline static int 1129 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1130 kmp_int32 gtid) { 1131 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1132 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1133 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1134 volatile kmp_uint32 *spin_here_p; 1135 kmp_int32 need_mf = 1; 1136 1137 #if OMPT_SUPPORT 1138 omp_state_t prev_state = omp_state_undefined; 1139 #endif 1140 1141 KA_TRACE(1000, 1142 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1143 1144 KMP_FSYNC_PREPARE(lck); 1145 KMP_DEBUG_ASSERT(this_thr != NULL); 1146 spin_here_p = &this_thr->th.th_spin_here; 1147 1148 #ifdef DEBUG_QUEUING_LOCKS 1149 TRACE_LOCK(gtid + 1, "acq ent"); 1150 if (*spin_here_p) 1151 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1152 if (this_thr->th.th_next_waiting != 0) 1153 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1154 #endif 1155 KMP_DEBUG_ASSERT(!*spin_here_p); 1156 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1157 1158 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1159 head_id_p that may follow, not just in execution order, but also in 1160 visibility order. This way, when a releasing thread observes the changes to 1161 the queue by this thread, it can rightly assume that spin_here_p has 1162 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1163 not premature. If the releasing thread sets spin_here_p to FALSE before 1164 this thread sets it to TRUE, this thread will hang. */ 1165 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1166 1167 while (1) { 1168 kmp_int32 enqueued; 1169 kmp_int32 head; 1170 kmp_int32 tail; 1171 1172 head = *head_id_p; 1173 1174 switch (head) { 1175 1176 case -1: { 1177 #ifdef DEBUG_QUEUING_LOCKS 1178 tail = *tail_id_p; 1179 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1180 #endif 1181 tail = 0; /* to make sure next link asynchronously read is not set 1182 accidentally; this assignment prevents us from entering the 1183 if ( t > 0 ) condition in the enqueued case below, which is not 1184 necessary for this state transition */ 1185 1186 need_mf = 0; 1187 /* try (-1,0)->(tid,tid) */ 1188 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1189 KMP_PACK_64(-1, 0), 1190 KMP_PACK_64(gtid + 1, gtid + 1)); 1191 #ifdef DEBUG_QUEUING_LOCKS 1192 if (enqueued) 1193 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1194 #endif 1195 } break; 1196 1197 default: { 1198 tail = *tail_id_p; 1199 KMP_DEBUG_ASSERT(tail != gtid + 1); 1200 1201 #ifdef DEBUG_QUEUING_LOCKS 1202 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1203 #endif 1204 1205 if (tail == 0) { 1206 enqueued = FALSE; 1207 } else { 1208 need_mf = 0; 1209 /* try (h,t) or (h,h)->(h,tid) */ 1210 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1211 1212 #ifdef DEBUG_QUEUING_LOCKS 1213 if (enqueued) 1214 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1215 #endif 1216 } 1217 } break; 1218 1219 case 0: /* empty queue */ 1220 { 1221 kmp_int32 grabbed_lock; 1222 1223 #ifdef DEBUG_QUEUING_LOCKS 1224 tail = *tail_id_p; 1225 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1226 #endif 1227 /* try (0,0)->(-1,0) */ 1228 1229 /* only legal transition out of head = 0 is head = -1 with no change to 1230 * tail */ 1231 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1232 1233 if (grabbed_lock) { 1234 1235 *spin_here_p = FALSE; 1236 1237 KA_TRACE( 1238 1000, 1239 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1240 lck, gtid)); 1241 #ifdef DEBUG_QUEUING_LOCKS 1242 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1243 #endif 1244 1245 #if OMPT_SUPPORT 1246 if (ompt_enabled.enabled && prev_state != omp_state_undefined) { 1247 /* change the state before clearing wait_id */ 1248 this_thr->th.ompt_thread_info.state = prev_state; 1249 this_thr->th.ompt_thread_info.wait_id = 0; 1250 } 1251 #endif 1252 1253 KMP_FSYNC_ACQUIRED(lck); 1254 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1255 } 1256 enqueued = FALSE; 1257 } break; 1258 } 1259 1260 #if OMPT_SUPPORT 1261 if (ompt_enabled.enabled && prev_state == omp_state_undefined) { 1262 /* this thread will spin; set wait_id before entering wait state */ 1263 prev_state = this_thr->th.ompt_thread_info.state; 1264 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1265 this_thr->th.ompt_thread_info.state = omp_state_wait_lock; 1266 } 1267 #endif 1268 1269 if (enqueued) { 1270 if (tail > 0) { 1271 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1272 KMP_ASSERT(tail_thr != NULL); 1273 tail_thr->th.th_next_waiting = gtid + 1; 1274 /* corresponding wait for this write in release code */ 1275 } 1276 KA_TRACE(1000, 1277 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1278 lck, gtid)); 1279 1280 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1281 sleeps for throughput only here. */ 1282 KMP_MB(); 1283 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1284 1285 #ifdef DEBUG_QUEUING_LOCKS 1286 TRACE_LOCK(gtid + 1, "acq spin"); 1287 1288 if (this_thr->th.th_next_waiting != 0) 1289 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1290 #endif 1291 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1292 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1293 "waiting on queue\n", 1294 lck, gtid)); 1295 1296 #ifdef DEBUG_QUEUING_LOCKS 1297 TRACE_LOCK(gtid + 1, "acq exit 2"); 1298 #endif 1299 1300 #if OMPT_SUPPORT 1301 /* change the state before clearing wait_id */ 1302 this_thr->th.ompt_thread_info.state = prev_state; 1303 this_thr->th.ompt_thread_info.wait_id = 0; 1304 #endif 1305 1306 /* got lock, we were dequeued by the thread that released lock */ 1307 return KMP_LOCK_ACQUIRED_FIRST; 1308 } 1309 1310 /* Yield if number of threads > number of logical processors */ 1311 /* ToDo: Not sure why this should only be in oversubscription case, 1312 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1313 KMP_YIELD(TCR_4(__kmp_nth) > 1314 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1315 #ifdef DEBUG_QUEUING_LOCKS 1316 TRACE_LOCK(gtid + 1, "acq retry"); 1317 #endif 1318 } 1319 KMP_ASSERT2(0, "should not get here"); 1320 return KMP_LOCK_ACQUIRED_FIRST; 1321 } 1322 1323 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1324 KMP_DEBUG_ASSERT(gtid >= 0); 1325 1326 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1327 ANNOTATE_QUEUING_ACQUIRED(lck); 1328 return retval; 1329 } 1330 1331 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1332 kmp_int32 gtid) { 1333 char const *const func = "omp_set_lock"; 1334 if (lck->lk.initialized != lck) { 1335 KMP_FATAL(LockIsUninitialized, func); 1336 } 1337 if (__kmp_is_queuing_lock_nestable(lck)) { 1338 KMP_FATAL(LockNestableUsedAsSimple, func); 1339 } 1340 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1341 KMP_FATAL(LockIsAlreadyOwned, func); 1342 } 1343 1344 __kmp_acquire_queuing_lock(lck, gtid); 1345 1346 lck->lk.owner_id = gtid + 1; 1347 return KMP_LOCK_ACQUIRED_FIRST; 1348 } 1349 1350 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1351 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1352 kmp_int32 head; 1353 #ifdef KMP_DEBUG 1354 kmp_info_t *this_thr; 1355 #endif 1356 1357 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1358 KMP_DEBUG_ASSERT(gtid >= 0); 1359 #ifdef KMP_DEBUG 1360 this_thr = __kmp_thread_from_gtid(gtid); 1361 KMP_DEBUG_ASSERT(this_thr != NULL); 1362 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1363 #endif 1364 1365 head = *head_id_p; 1366 1367 if (head == 0) { /* nobody on queue, nobody holding */ 1368 /* try (0,0)->(-1,0) */ 1369 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1370 KA_TRACE(1000, 1371 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1372 KMP_FSYNC_ACQUIRED(lck); 1373 ANNOTATE_QUEUING_ACQUIRED(lck); 1374 return TRUE; 1375 } 1376 } 1377 1378 KA_TRACE(1000, 1379 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1380 return FALSE; 1381 } 1382 1383 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1384 kmp_int32 gtid) { 1385 char const *const func = "omp_test_lock"; 1386 if (lck->lk.initialized != lck) { 1387 KMP_FATAL(LockIsUninitialized, func); 1388 } 1389 if (__kmp_is_queuing_lock_nestable(lck)) { 1390 KMP_FATAL(LockNestableUsedAsSimple, func); 1391 } 1392 1393 int retval = __kmp_test_queuing_lock(lck, gtid); 1394 1395 if (retval) { 1396 lck->lk.owner_id = gtid + 1; 1397 } 1398 return retval; 1399 } 1400 1401 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1402 kmp_info_t *this_thr; 1403 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1404 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1405 1406 KA_TRACE(1000, 1407 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1408 KMP_DEBUG_ASSERT(gtid >= 0); 1409 this_thr = __kmp_thread_from_gtid(gtid); 1410 KMP_DEBUG_ASSERT(this_thr != NULL); 1411 #ifdef DEBUG_QUEUING_LOCKS 1412 TRACE_LOCK(gtid + 1, "rel ent"); 1413 1414 if (this_thr->th.th_spin_here) 1415 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1416 if (this_thr->th.th_next_waiting != 0) 1417 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1418 #endif 1419 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1420 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1421 1422 KMP_FSYNC_RELEASING(lck); 1423 ANNOTATE_QUEUING_RELEASED(lck); 1424 1425 while (1) { 1426 kmp_int32 dequeued; 1427 kmp_int32 head; 1428 kmp_int32 tail; 1429 1430 head = *head_id_p; 1431 1432 #ifdef DEBUG_QUEUING_LOCKS 1433 tail = *tail_id_p; 1434 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1435 if (head == 0) 1436 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1437 #endif 1438 KMP_DEBUG_ASSERT(head != 1439 0); /* holding the lock, head must be -1 or queue head */ 1440 1441 if (head == -1) { /* nobody on queue */ 1442 /* try (-1,0)->(0,0) */ 1443 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1444 KA_TRACE( 1445 1000, 1446 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1447 lck, gtid)); 1448 #ifdef DEBUG_QUEUING_LOCKS 1449 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1450 #endif 1451 1452 #if OMPT_SUPPORT 1453 /* nothing to do - no other thread is trying to shift blame */ 1454 #endif 1455 return KMP_LOCK_RELEASED; 1456 } 1457 dequeued = FALSE; 1458 } else { 1459 tail = *tail_id_p; 1460 if (head == tail) { /* only one thread on the queue */ 1461 #ifdef DEBUG_QUEUING_LOCKS 1462 if (head <= 0) 1463 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1464 #endif 1465 KMP_DEBUG_ASSERT(head > 0); 1466 1467 /* try (h,h)->(-1,0) */ 1468 dequeued = KMP_COMPARE_AND_STORE_REL64( 1469 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1470 KMP_PACK_64(-1, 0)); 1471 #ifdef DEBUG_QUEUING_LOCKS 1472 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1473 #endif 1474 1475 } else { 1476 volatile kmp_int32 *waiting_id_p; 1477 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1478 KMP_DEBUG_ASSERT(head_thr != NULL); 1479 waiting_id_p = &head_thr->th.th_next_waiting; 1480 1481 /* Does this require synchronous reads? */ 1482 #ifdef DEBUG_QUEUING_LOCKS 1483 if (head <= 0 || tail <= 0) 1484 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1485 #endif 1486 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1487 1488 /* try (h,t)->(h',t) or (t,t) */ 1489 KMP_MB(); 1490 /* make sure enqueuing thread has time to update next waiting thread 1491 * field */ 1492 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1493 KMP_NEQ, NULL); 1494 #ifdef DEBUG_QUEUING_LOCKS 1495 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1496 #endif 1497 dequeued = TRUE; 1498 } 1499 } 1500 1501 if (dequeued) { 1502 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1503 KMP_DEBUG_ASSERT(head_thr != NULL); 1504 1505 /* Does this require synchronous reads? */ 1506 #ifdef DEBUG_QUEUING_LOCKS 1507 if (head <= 0 || tail <= 0) 1508 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1509 #endif 1510 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1511 1512 /* For clean code only. Thread not released until next statement prevents 1513 race with acquire code. */ 1514 head_thr->th.th_next_waiting = 0; 1515 #ifdef DEBUG_QUEUING_LOCKS 1516 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1517 #endif 1518 1519 KMP_MB(); 1520 /* reset spin value */ 1521 head_thr->th.th_spin_here = FALSE; 1522 1523 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1524 "dequeuing\n", 1525 lck, gtid)); 1526 #ifdef DEBUG_QUEUING_LOCKS 1527 TRACE_LOCK(gtid + 1, "rel exit 2"); 1528 #endif 1529 return KMP_LOCK_RELEASED; 1530 } 1531 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1532 threads */ 1533 1534 #ifdef DEBUG_QUEUING_LOCKS 1535 TRACE_LOCK(gtid + 1, "rel retry"); 1536 #endif 1537 1538 } /* while */ 1539 KMP_ASSERT2(0, "should not get here"); 1540 return KMP_LOCK_RELEASED; 1541 } 1542 1543 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1544 kmp_int32 gtid) { 1545 char const *const func = "omp_unset_lock"; 1546 KMP_MB(); /* in case another processor initialized lock */ 1547 if (lck->lk.initialized != lck) { 1548 KMP_FATAL(LockIsUninitialized, func); 1549 } 1550 if (__kmp_is_queuing_lock_nestable(lck)) { 1551 KMP_FATAL(LockNestableUsedAsSimple, func); 1552 } 1553 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1554 KMP_FATAL(LockUnsettingFree, func); 1555 } 1556 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1557 KMP_FATAL(LockUnsettingSetByAnother, func); 1558 } 1559 lck->lk.owner_id = 0; 1560 return __kmp_release_queuing_lock(lck, gtid); 1561 } 1562 1563 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1564 lck->lk.location = NULL; 1565 lck->lk.head_id = 0; 1566 lck->lk.tail_id = 0; 1567 lck->lk.next_ticket = 0; 1568 lck->lk.now_serving = 0; 1569 lck->lk.owner_id = 0; // no thread owns the lock. 1570 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1571 lck->lk.initialized = lck; 1572 1573 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1574 } 1575 1576 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1577 __kmp_init_queuing_lock(lck); 1578 } 1579 1580 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1581 lck->lk.initialized = NULL; 1582 lck->lk.location = NULL; 1583 lck->lk.head_id = 0; 1584 lck->lk.tail_id = 0; 1585 lck->lk.next_ticket = 0; 1586 lck->lk.now_serving = 0; 1587 lck->lk.owner_id = 0; 1588 lck->lk.depth_locked = -1; 1589 } 1590 1591 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1592 char const *const func = "omp_destroy_lock"; 1593 if (lck->lk.initialized != lck) { 1594 KMP_FATAL(LockIsUninitialized, func); 1595 } 1596 if (__kmp_is_queuing_lock_nestable(lck)) { 1597 KMP_FATAL(LockNestableUsedAsSimple, func); 1598 } 1599 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1600 KMP_FATAL(LockStillOwned, func); 1601 } 1602 __kmp_destroy_queuing_lock(lck); 1603 } 1604 1605 // nested queuing locks 1606 1607 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1608 KMP_DEBUG_ASSERT(gtid >= 0); 1609 1610 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1611 lck->lk.depth_locked += 1; 1612 return KMP_LOCK_ACQUIRED_NEXT; 1613 } else { 1614 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1615 ANNOTATE_QUEUING_ACQUIRED(lck); 1616 KMP_MB(); 1617 lck->lk.depth_locked = 1; 1618 KMP_MB(); 1619 lck->lk.owner_id = gtid + 1; 1620 return KMP_LOCK_ACQUIRED_FIRST; 1621 } 1622 } 1623 1624 static int 1625 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1626 kmp_int32 gtid) { 1627 char const *const func = "omp_set_nest_lock"; 1628 if (lck->lk.initialized != lck) { 1629 KMP_FATAL(LockIsUninitialized, func); 1630 } 1631 if (!__kmp_is_queuing_lock_nestable(lck)) { 1632 KMP_FATAL(LockSimpleUsedAsNestable, func); 1633 } 1634 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1635 } 1636 1637 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1638 int retval; 1639 1640 KMP_DEBUG_ASSERT(gtid >= 0); 1641 1642 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1643 retval = ++lck->lk.depth_locked; 1644 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1645 retval = 0; 1646 } else { 1647 KMP_MB(); 1648 retval = lck->lk.depth_locked = 1; 1649 KMP_MB(); 1650 lck->lk.owner_id = gtid + 1; 1651 } 1652 return retval; 1653 } 1654 1655 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1656 kmp_int32 gtid) { 1657 char const *const func = "omp_test_nest_lock"; 1658 if (lck->lk.initialized != lck) { 1659 KMP_FATAL(LockIsUninitialized, func); 1660 } 1661 if (!__kmp_is_queuing_lock_nestable(lck)) { 1662 KMP_FATAL(LockSimpleUsedAsNestable, func); 1663 } 1664 return __kmp_test_nested_queuing_lock(lck, gtid); 1665 } 1666 1667 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1668 KMP_DEBUG_ASSERT(gtid >= 0); 1669 1670 KMP_MB(); 1671 if (--(lck->lk.depth_locked) == 0) { 1672 KMP_MB(); 1673 lck->lk.owner_id = 0; 1674 __kmp_release_queuing_lock(lck, gtid); 1675 return KMP_LOCK_RELEASED; 1676 } 1677 return KMP_LOCK_STILL_HELD; 1678 } 1679 1680 static int 1681 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1682 kmp_int32 gtid) { 1683 char const *const func = "omp_unset_nest_lock"; 1684 KMP_MB(); /* in case another processor initialized lock */ 1685 if (lck->lk.initialized != lck) { 1686 KMP_FATAL(LockIsUninitialized, func); 1687 } 1688 if (!__kmp_is_queuing_lock_nestable(lck)) { 1689 KMP_FATAL(LockSimpleUsedAsNestable, func); 1690 } 1691 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1692 KMP_FATAL(LockUnsettingFree, func); 1693 } 1694 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1695 KMP_FATAL(LockUnsettingSetByAnother, func); 1696 } 1697 return __kmp_release_nested_queuing_lock(lck, gtid); 1698 } 1699 1700 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1701 __kmp_init_queuing_lock(lck); 1702 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1703 } 1704 1705 static void 1706 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1707 __kmp_init_nested_queuing_lock(lck); 1708 } 1709 1710 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1711 __kmp_destroy_queuing_lock(lck); 1712 lck->lk.depth_locked = 0; 1713 } 1714 1715 static void 1716 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1717 char const *const func = "omp_destroy_nest_lock"; 1718 if (lck->lk.initialized != lck) { 1719 KMP_FATAL(LockIsUninitialized, func); 1720 } 1721 if (!__kmp_is_queuing_lock_nestable(lck)) { 1722 KMP_FATAL(LockSimpleUsedAsNestable, func); 1723 } 1724 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1725 KMP_FATAL(LockStillOwned, func); 1726 } 1727 __kmp_destroy_nested_queuing_lock(lck); 1728 } 1729 1730 // access functions to fields which don't exist for all lock kinds. 1731 1732 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 1733 return lck == lck->lk.initialized; 1734 } 1735 1736 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1737 return lck->lk.location; 1738 } 1739 1740 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1741 const ident_t *loc) { 1742 lck->lk.location = loc; 1743 } 1744 1745 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1746 return lck->lk.flags; 1747 } 1748 1749 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1750 kmp_lock_flags_t flags) { 1751 lck->lk.flags = flags; 1752 } 1753 1754 #if KMP_USE_ADAPTIVE_LOCKS 1755 1756 /* RTM Adaptive locks */ 1757 1758 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1759 1760 #include <immintrin.h> 1761 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1762 1763 #else 1764 1765 // Values from the status register after failed speculation. 1766 #define _XBEGIN_STARTED (~0u) 1767 #define _XABORT_EXPLICIT (1 << 0) 1768 #define _XABORT_RETRY (1 << 1) 1769 #define _XABORT_CONFLICT (1 << 2) 1770 #define _XABORT_CAPACITY (1 << 3) 1771 #define _XABORT_DEBUG (1 << 4) 1772 #define _XABORT_NESTED (1 << 5) 1773 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1774 1775 // Aborts for which it's worth trying again immediately 1776 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1777 1778 #define STRINGIZE_INTERNAL(arg) #arg 1779 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1780 1781 // Access to RTM instructions 1782 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1783 an abort. This is the same definition as the compiler intrinsic that will be 1784 supported at some point. */ 1785 static __inline int _xbegin() { 1786 int res = -1; 1787 1788 #if KMP_OS_WINDOWS 1789 #if KMP_ARCH_X86_64 1790 _asm { 1791 _emit 0xC7 1792 _emit 0xF8 1793 _emit 2 1794 _emit 0 1795 _emit 0 1796 _emit 0 1797 jmp L2 1798 mov res, eax 1799 L2: 1800 } 1801 #else /* IA32 */ 1802 _asm { 1803 _emit 0xC7 1804 _emit 0xF8 1805 _emit 2 1806 _emit 0 1807 _emit 0 1808 _emit 0 1809 jmp L2 1810 mov res, eax 1811 L2: 1812 } 1813 #endif // KMP_ARCH_X86_64 1814 #else 1815 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1816 returned in %eax(%rax) on abort. Other register values are restored, so 1817 don't need to be killed. 1818 1819 We must also mark 'res' as an input and an output, since otherwise 1820 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1821 the successful (i.e., non-abort) path. */ 1822 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1823 " .long 1f-1b-6\n" 1824 " jmp 2f\n" 1825 "1: movl %%eax,%0\n" 1826 "2:" 1827 : "+r"(res)::"memory", "%eax"); 1828 #endif // KMP_OS_WINDOWS 1829 return res; 1830 } 1831 1832 /* Transaction end */ 1833 static __inline void _xend() { 1834 #if KMP_OS_WINDOWS 1835 __asm { 1836 _emit 0x0f 1837 _emit 0x01 1838 _emit 0xd5 1839 } 1840 #else 1841 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1842 #endif 1843 } 1844 1845 /* This is a macro, the argument must be a single byte constant which can be 1846 evaluated by the inline assembler, since it is emitted as a byte into the 1847 assembly code. */ 1848 // clang-format off 1849 #if KMP_OS_WINDOWS 1850 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1851 #else 1852 #define _xabort(ARG) \ 1853 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1854 #endif 1855 // clang-format on 1856 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1857 1858 // Statistics is collected for testing purpose 1859 #if KMP_DEBUG_ADAPTIVE_LOCKS 1860 1861 // We accumulate speculative lock statistics when the lock is destroyed. We 1862 // keep locks that haven't been destroyed in the liveLocks list so that we can 1863 // grab their statistics too. 1864 static kmp_adaptive_lock_statistics_t destroyedStats; 1865 1866 // To hold the list of live locks. 1867 static kmp_adaptive_lock_info_t liveLocks; 1868 1869 // A lock so we can safely update the list of locks. 1870 static kmp_bootstrap_lock_t chain_lock; 1871 1872 // Initialize the list of stats. 1873 void __kmp_init_speculative_stats() { 1874 kmp_adaptive_lock_info_t *lck = &liveLocks; 1875 1876 memset((void *)&(lck->stats), 0, sizeof(lck->stats)); 1877 lck->stats.next = lck; 1878 lck->stats.prev = lck; 1879 1880 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1881 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1882 1883 __kmp_init_bootstrap_lock(&chain_lock); 1884 } 1885 1886 // Insert the lock into the circular list 1887 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1888 __kmp_acquire_bootstrap_lock(&chain_lock); 1889 1890 lck->stats.next = liveLocks.stats.next; 1891 lck->stats.prev = &liveLocks; 1892 1893 liveLocks.stats.next = lck; 1894 lck->stats.next->stats.prev = lck; 1895 1896 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1897 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1898 1899 __kmp_release_bootstrap_lock(&chain_lock); 1900 } 1901 1902 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1903 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1904 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1905 1906 kmp_adaptive_lock_info_t *n = lck->stats.next; 1907 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1908 1909 n->stats.prev = p; 1910 p->stats.next = n; 1911 } 1912 1913 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1914 memset((void *)&lck->stats, 0, sizeof(lck->stats)); 1915 __kmp_remember_lock(lck); 1916 } 1917 1918 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1919 kmp_adaptive_lock_info_t *lck) { 1920 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1921 1922 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1923 t->successfulSpeculations += s->successfulSpeculations; 1924 t->hardFailedSpeculations += s->hardFailedSpeculations; 1925 t->softFailedSpeculations += s->softFailedSpeculations; 1926 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1927 t->lemmingYields += s->lemmingYields; 1928 } 1929 1930 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1931 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 1932 1933 __kmp_acquire_bootstrap_lock(&chain_lock); 1934 1935 __kmp_add_stats(&destroyedStats, lck); 1936 __kmp_forget_lock(lck); 1937 1938 __kmp_release_bootstrap_lock(&chain_lock); 1939 } 1940 1941 static float percent(kmp_uint32 count, kmp_uint32 total) { 1942 return (total == 0) ? 0.0 : (100.0 * count) / total; 1943 } 1944 1945 static FILE *__kmp_open_stats_file() { 1946 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1947 return stdout; 1948 1949 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1950 char buffer[buffLen]; 1951 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1952 (kmp_int32)getpid()); 1953 FILE *result = fopen(&buffer[0], "w"); 1954 1955 // Maybe we should issue a warning here... 1956 return result ? result : stdout; 1957 } 1958 1959 void __kmp_print_speculative_stats() { 1960 if (__kmp_user_lock_kind != lk_adaptive) 1961 return; 1962 1963 FILE *statsFile = __kmp_open_stats_file(); 1964 1965 kmp_adaptive_lock_statistics_t total = destroyedStats; 1966 kmp_adaptive_lock_info_t *lck; 1967 1968 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1969 __kmp_add_stats(&total, lck); 1970 } 1971 kmp_adaptive_lock_statistics_t *t = &total; 1972 kmp_uint32 totalSections = 1973 t->nonSpeculativeAcquires + t->successfulSpeculations; 1974 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1975 t->hardFailedSpeculations + 1976 t->softFailedSpeculations; 1977 1978 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1979 fprintf(statsFile, " Lock parameters: \n" 1980 " max_soft_retries : %10d\n" 1981 " max_badness : %10d\n", 1982 __kmp_adaptive_backoff_params.max_soft_retries, 1983 __kmp_adaptive_backoff_params.max_badness); 1984 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1985 t->nonSpeculativeAcquireAttempts); 1986 fprintf(statsFile, " Total critical sections : %10d\n", 1987 totalSections); 1988 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1989 t->successfulSpeculations, 1990 percent(t->successfulSpeculations, totalSections)); 1991 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1992 t->nonSpeculativeAcquires, 1993 percent(t->nonSpeculativeAcquires, totalSections)); 1994 fprintf(statsFile, " Lemming yields : %10d\n\n", 1995 t->lemmingYields); 1996 1997 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 1998 totalSpeculations); 1999 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 2000 t->successfulSpeculations, 2001 percent(t->successfulSpeculations, totalSpeculations)); 2002 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 2003 t->softFailedSpeculations, 2004 percent(t->softFailedSpeculations, totalSpeculations)); 2005 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 2006 t->hardFailedSpeculations, 2007 percent(t->hardFailedSpeculations, totalSpeculations)); 2008 2009 if (statsFile != stdout) 2010 fclose(statsFile); 2011 } 2012 2013 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 2014 #else 2015 #define KMP_INC_STAT(lck, stat) 2016 2017 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2018 2019 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 2020 // It is enough to check that the head_id is zero. 2021 // We don't also need to check the tail. 2022 bool res = lck->lk.head_id == 0; 2023 2024 // We need a fence here, since we must ensure that no memory operations 2025 // from later in this thread float above that read. 2026 #if KMP_COMPILER_ICC 2027 _mm_mfence(); 2028 #else 2029 __sync_synchronize(); 2030 #endif 2031 2032 return res; 2033 } 2034 2035 // Functions for manipulating the badness 2036 static __inline void 2037 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 2038 // Reset the badness to zero so we eagerly try to speculate again 2039 lck->lk.adaptive.badness = 0; 2040 KMP_INC_STAT(lck, successfulSpeculations); 2041 } 2042 2043 // Create a bit mask with one more set bit. 2044 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2045 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2046 if (newBadness > lck->lk.adaptive.max_badness) { 2047 return; 2048 } else { 2049 lck->lk.adaptive.badness = newBadness; 2050 } 2051 } 2052 2053 // Check whether speculation should be attempted. 2054 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2055 kmp_int32 gtid) { 2056 kmp_uint32 badness = lck->lk.adaptive.badness; 2057 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2058 int res = (attempts & badness) == 0; 2059 return res; 2060 } 2061 2062 // Attempt to acquire only the speculative lock. 2063 // Does not back off to the non-speculative lock. 2064 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2065 kmp_int32 gtid) { 2066 int retries = lck->lk.adaptive.max_soft_retries; 2067 2068 // We don't explicitly count the start of speculation, rather we record the 2069 // results (success, hard fail, soft fail). The sum of all of those is the 2070 // total number of times we started speculation since all speculations must 2071 // end one of those ways. 2072 do { 2073 kmp_uint32 status = _xbegin(); 2074 // Switch this in to disable actual speculation but exercise at least some 2075 // of the rest of the code. Useful for debugging... 2076 // kmp_uint32 status = _XABORT_NESTED; 2077 2078 if (status == _XBEGIN_STARTED) { 2079 /* We have successfully started speculation. Check that no-one acquired 2080 the lock for real between when we last looked and now. This also gets 2081 the lock cache line into our read-set, which we need so that we'll 2082 abort if anyone later claims it for real. */ 2083 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2084 // Lock is now visibly acquired, so someone beat us to it. Abort the 2085 // transaction so we'll restart from _xbegin with the failure status. 2086 _xabort(0x01); 2087 KMP_ASSERT2(0, "should not get here"); 2088 } 2089 return 1; // Lock has been acquired (speculatively) 2090 } else { 2091 // We have aborted, update the statistics 2092 if (status & SOFT_ABORT_MASK) { 2093 KMP_INC_STAT(lck, softFailedSpeculations); 2094 // and loop round to retry. 2095 } else { 2096 KMP_INC_STAT(lck, hardFailedSpeculations); 2097 // Give up if we had a hard failure. 2098 break; 2099 } 2100 } 2101 } while (retries--); // Loop while we have retries, and didn't fail hard. 2102 2103 // Either we had a hard failure or we didn't succeed softly after 2104 // the full set of attempts, so back off the badness. 2105 __kmp_step_badness(lck); 2106 return 0; 2107 } 2108 2109 // Attempt to acquire the speculative lock, or back off to the non-speculative 2110 // one if the speculative lock cannot be acquired. 2111 // We can succeed speculatively, non-speculatively, or fail. 2112 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2113 // First try to acquire the lock speculatively 2114 if (__kmp_should_speculate(lck, gtid) && 2115 __kmp_test_adaptive_lock_only(lck, gtid)) 2116 return 1; 2117 2118 // Speculative acquisition failed, so try to acquire it non-speculatively. 2119 // Count the non-speculative acquire attempt 2120 lck->lk.adaptive.acquire_attempts++; 2121 2122 // Use base, non-speculative lock. 2123 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2124 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2125 return 1; // Lock is acquired (non-speculatively) 2126 } else { 2127 return 0; // Failed to acquire the lock, it's already visibly locked. 2128 } 2129 } 2130 2131 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2132 kmp_int32 gtid) { 2133 char const *const func = "omp_test_lock"; 2134 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2135 KMP_FATAL(LockIsUninitialized, func); 2136 } 2137 2138 int retval = __kmp_test_adaptive_lock(lck, gtid); 2139 2140 if (retval) { 2141 lck->lk.qlk.owner_id = gtid + 1; 2142 } 2143 return retval; 2144 } 2145 2146 // Block until we can acquire a speculative, adaptive lock. We check whether we 2147 // should be trying to speculate. If we should be, we check the real lock to see 2148 // if it is free, and, if not, pause without attempting to acquire it until it 2149 // is. Then we try the speculative acquire. This means that although we suffer 2150 // from lemmings a little (because all we can't acquire the lock speculatively 2151 // until the queue of threads waiting has cleared), we don't get into a state 2152 // where we can never acquire the lock speculatively (because we force the queue 2153 // to clear by preventing new arrivals from entering the queue). This does mean 2154 // that when we're trying to break lemmings, the lock is no longer fair. However 2155 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2156 // problem. 2157 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2158 kmp_int32 gtid) { 2159 if (__kmp_should_speculate(lck, gtid)) { 2160 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2161 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2162 return; 2163 // We tried speculation and failed, so give up. 2164 } else { 2165 // We can't try speculation until the lock is free, so we pause here 2166 // (without suspending on the queueing lock, to allow it to drain, then 2167 // try again. All other threads will also see the same result for 2168 // shouldSpeculate, so will be doing the same if they try to claim the 2169 // lock from now on. 2170 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2171 KMP_INC_STAT(lck, lemmingYields); 2172 __kmp_yield(TRUE); 2173 } 2174 2175 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2176 return; 2177 } 2178 } 2179 2180 // Speculative acquisition failed, so acquire it non-speculatively. 2181 // Count the non-speculative acquire attempt 2182 lck->lk.adaptive.acquire_attempts++; 2183 2184 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2185 // We have acquired the base lock, so count that. 2186 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2187 ANNOTATE_QUEUING_ACQUIRED(lck); 2188 } 2189 2190 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2191 kmp_int32 gtid) { 2192 char const *const func = "omp_set_lock"; 2193 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2194 KMP_FATAL(LockIsUninitialized, func); 2195 } 2196 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2197 KMP_FATAL(LockIsAlreadyOwned, func); 2198 } 2199 2200 __kmp_acquire_adaptive_lock(lck, gtid); 2201 2202 lck->lk.qlk.owner_id = gtid + 1; 2203 } 2204 2205 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2206 kmp_int32 gtid) { 2207 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2208 lck))) { // If the lock doesn't look claimed we must be speculating. 2209 // (Or the user's code is buggy and they're releasing without locking; 2210 // if we had XTEST we'd be able to check that case...) 2211 _xend(); // Exit speculation 2212 __kmp_update_badness_after_success(lck); 2213 } else { // Since the lock *is* visibly locked we're not speculating, 2214 // so should use the underlying lock's release scheme. 2215 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2216 } 2217 return KMP_LOCK_RELEASED; 2218 } 2219 2220 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2221 kmp_int32 gtid) { 2222 char const *const func = "omp_unset_lock"; 2223 KMP_MB(); /* in case another processor initialized lock */ 2224 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2225 KMP_FATAL(LockIsUninitialized, func); 2226 } 2227 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2228 KMP_FATAL(LockUnsettingFree, func); 2229 } 2230 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2231 KMP_FATAL(LockUnsettingSetByAnother, func); 2232 } 2233 lck->lk.qlk.owner_id = 0; 2234 __kmp_release_adaptive_lock(lck, gtid); 2235 return KMP_LOCK_RELEASED; 2236 } 2237 2238 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2239 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2240 lck->lk.adaptive.badness = 0; 2241 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2242 lck->lk.adaptive.max_soft_retries = 2243 __kmp_adaptive_backoff_params.max_soft_retries; 2244 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2245 #if KMP_DEBUG_ADAPTIVE_LOCKS 2246 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2247 #endif 2248 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2249 } 2250 2251 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2252 __kmp_init_adaptive_lock(lck); 2253 } 2254 2255 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2256 #if KMP_DEBUG_ADAPTIVE_LOCKS 2257 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2258 #endif 2259 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2260 // Nothing needed for the speculative part. 2261 } 2262 2263 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2264 char const *const func = "omp_destroy_lock"; 2265 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2266 KMP_FATAL(LockIsUninitialized, func); 2267 } 2268 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2269 KMP_FATAL(LockStillOwned, func); 2270 } 2271 __kmp_destroy_adaptive_lock(lck); 2272 } 2273 2274 #endif // KMP_USE_ADAPTIVE_LOCKS 2275 2276 /* ------------------------------------------------------------------------ */ 2277 /* DRDPA ticket locks */ 2278 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2279 2280 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2281 return TCR_4(lck->lk.owner_id) - 1; 2282 } 2283 2284 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2285 return lck->lk.depth_locked != -1; 2286 } 2287 2288 __forceinline static int 2289 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2290 kmp_uint64 ticket = 2291 KMP_TEST_THEN_INC64(RCAST(volatile kmp_int64 *, &lck->lk.next_ticket)); 2292 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2293 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2294 2295 #ifdef USE_LOCK_PROFILE 2296 if (TCR_8(polls[ticket & mask].poll) != ticket) 2297 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2298 /* else __kmp_printf( "." );*/ 2299 #endif /* USE_LOCK_PROFILE */ 2300 2301 // Now spin-wait, but reload the polls pointer and mask, in case the 2302 // polling area has been reconfigured. Unless it is reconfigured, the 2303 // reloads stay in L1 cache and are cheap. 2304 // 2305 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2306 // 2307 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2308 // and poll to be re-read every spin iteration. 2309 kmp_uint32 spins; 2310 2311 KMP_FSYNC_PREPARE(lck); 2312 KMP_INIT_YIELD(spins); 2313 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load 2314 // If we are oversubscribed, 2315 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2316 // CPU Pause is in the macros for yield. 2317 // 2318 KMP_YIELD(TCR_4(__kmp_nth) > 2319 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2320 KMP_YIELD_SPIN(spins); 2321 2322 // Re-read the mask and the poll pointer from the lock structure. 2323 // 2324 // Make certain that "mask" is read before "polls" !!! 2325 // 2326 // If another thread picks reconfigures the polling area and updates their 2327 // values, and we get the new value of mask and the old polls pointer, we 2328 // could access memory beyond the end of the old polling area. 2329 mask = TCR_8(lck->lk.mask); // volatile load 2330 polls = lck->lk.polls; // volatile load 2331 } 2332 2333 // Critical section starts here 2334 KMP_FSYNC_ACQUIRED(lck); 2335 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2336 ticket, lck)); 2337 lck->lk.now_serving = ticket; // non-volatile store 2338 2339 // Deallocate a garbage polling area if we know that we are the last 2340 // thread that could possibly access it. 2341 // 2342 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2343 // ticket. 2344 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2345 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls)); 2346 lck->lk.old_polls = NULL; 2347 lck->lk.cleanup_ticket = 0; 2348 } 2349 2350 // Check to see if we should reconfigure the polling area. 2351 // If there is still a garbage polling area to be deallocated from a 2352 // previous reconfiguration, let a later thread reconfigure it. 2353 if (lck->lk.old_polls == NULL) { 2354 bool reconfigure = false; 2355 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2356 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2357 2358 if (TCR_4(__kmp_nth) > 2359 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2360 // We are in oversubscription mode. Contract the polling area 2361 // down to a single location, if that hasn't been done already. 2362 if (num_polls > 1) { 2363 reconfigure = true; 2364 num_polls = TCR_4(lck->lk.num_polls); 2365 mask = 0; 2366 num_polls = 1; 2367 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2368 __kmp_allocate(num_polls * sizeof(*polls)); 2369 polls[0].poll = ticket; 2370 } 2371 } else { 2372 // We are in under/fully subscribed mode. Check the number of 2373 // threads waiting on the lock. The size of the polling area 2374 // should be at least the number of threads waiting. 2375 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2376 if (num_waiting > num_polls) { 2377 kmp_uint32 old_num_polls = num_polls; 2378 reconfigure = true; 2379 do { 2380 mask = (mask << 1) | 1; 2381 num_polls *= 2; 2382 } while (num_polls <= num_waiting); 2383 2384 // Allocate the new polling area, and copy the relevant portion 2385 // of the old polling area to the new area. __kmp_allocate() 2386 // zeroes the memory it allocates, and most of the old area is 2387 // just zero padding, so we only copy the release counters. 2388 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2389 __kmp_allocate(num_polls * sizeof(*polls)); 2390 kmp_uint32 i; 2391 for (i = 0; i < old_num_polls; i++) { 2392 polls[i].poll = old_polls[i].poll; 2393 } 2394 } 2395 } 2396 2397 if (reconfigure) { 2398 // Now write the updated fields back to the lock structure. 2399 // 2400 // Make certain that "polls" is written before "mask" !!! 2401 // 2402 // If another thread picks up the new value of mask and the old polls 2403 // pointer , it could access memory beyond the end of the old polling 2404 // area. 2405 // 2406 // On x86, we need memory fences. 2407 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2408 "lock %p to %d polls\n", 2409 ticket, lck, num_polls)); 2410 2411 lck->lk.old_polls = old_polls; // non-volatile store 2412 lck->lk.polls = polls; // volatile store 2413 2414 KMP_MB(); 2415 2416 lck->lk.num_polls = num_polls; // non-volatile store 2417 lck->lk.mask = mask; // volatile store 2418 2419 KMP_MB(); 2420 2421 // Only after the new polling area and mask have been flushed 2422 // to main memory can we update the cleanup ticket field. 2423 // 2424 // volatile load / non-volatile store 2425 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2426 } 2427 } 2428 return KMP_LOCK_ACQUIRED_FIRST; 2429 } 2430 2431 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2432 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2433 ANNOTATE_DRDPA_ACQUIRED(lck); 2434 return retval; 2435 } 2436 2437 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2438 kmp_int32 gtid) { 2439 char const *const func = "omp_set_lock"; 2440 if (lck->lk.initialized != lck) { 2441 KMP_FATAL(LockIsUninitialized, func); 2442 } 2443 if (__kmp_is_drdpa_lock_nestable(lck)) { 2444 KMP_FATAL(LockNestableUsedAsSimple, func); 2445 } 2446 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2447 KMP_FATAL(LockIsAlreadyOwned, func); 2448 } 2449 2450 __kmp_acquire_drdpa_lock(lck, gtid); 2451 2452 lck->lk.owner_id = gtid + 1; 2453 return KMP_LOCK_ACQUIRED_FIRST; 2454 } 2455 2456 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2457 // First get a ticket, then read the polls pointer and the mask. 2458 // The polls pointer must be read before the mask!!! (See above) 2459 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2460 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2461 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2462 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2463 kmp_uint64 next_ticket = ticket + 1; 2464 if (KMP_COMPARE_AND_STORE_ACQ64(&lck->lk.next_ticket, ticket, 2465 next_ticket)) { 2466 KMP_FSYNC_ACQUIRED(lck); 2467 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2468 ticket, lck)); 2469 lck->lk.now_serving = ticket; // non-volatile store 2470 2471 // Since no threads are waiting, there is no possibility that we would 2472 // want to reconfigure the polling area. We might have the cleanup ticket 2473 // value (which says that it is now safe to deallocate old_polls), but 2474 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2475 // routine isn't supposed to block, and we would risk blocks if we called 2476 // __kmp_free() to do the deallocation. 2477 return TRUE; 2478 } 2479 } 2480 return FALSE; 2481 } 2482 2483 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2484 kmp_int32 gtid) { 2485 char const *const func = "omp_test_lock"; 2486 if (lck->lk.initialized != lck) { 2487 KMP_FATAL(LockIsUninitialized, func); 2488 } 2489 if (__kmp_is_drdpa_lock_nestable(lck)) { 2490 KMP_FATAL(LockNestableUsedAsSimple, func); 2491 } 2492 2493 int retval = __kmp_test_drdpa_lock(lck, gtid); 2494 2495 if (retval) { 2496 lck->lk.owner_id = gtid + 1; 2497 } 2498 return retval; 2499 } 2500 2501 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2502 // Read the ticket value from the lock data struct, then the polls pointer and 2503 // the mask. The polls pointer must be read before the mask!!! (See above) 2504 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2505 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2506 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2507 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2508 ticket - 1, lck)); 2509 KMP_FSYNC_RELEASING(lck); 2510 ANNOTATE_DRDPA_RELEASED(lck); 2511 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2512 return KMP_LOCK_RELEASED; 2513 } 2514 2515 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2516 kmp_int32 gtid) { 2517 char const *const func = "omp_unset_lock"; 2518 KMP_MB(); /* in case another processor initialized lock */ 2519 if (lck->lk.initialized != lck) { 2520 KMP_FATAL(LockIsUninitialized, func); 2521 } 2522 if (__kmp_is_drdpa_lock_nestable(lck)) { 2523 KMP_FATAL(LockNestableUsedAsSimple, func); 2524 } 2525 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2526 KMP_FATAL(LockUnsettingFree, func); 2527 } 2528 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2529 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2530 KMP_FATAL(LockUnsettingSetByAnother, func); 2531 } 2532 lck->lk.owner_id = 0; 2533 return __kmp_release_drdpa_lock(lck, gtid); 2534 } 2535 2536 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2537 lck->lk.location = NULL; 2538 lck->lk.mask = 0; 2539 lck->lk.num_polls = 1; 2540 lck->lk.polls = 2541 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)__kmp_allocate( 2542 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2543 lck->lk.cleanup_ticket = 0; 2544 lck->lk.old_polls = NULL; 2545 lck->lk.next_ticket = 0; 2546 lck->lk.now_serving = 0; 2547 lck->lk.owner_id = 0; // no thread owns the lock. 2548 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2549 lck->lk.initialized = lck; 2550 2551 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2552 } 2553 2554 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2555 __kmp_init_drdpa_lock(lck); 2556 } 2557 2558 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2559 lck->lk.initialized = NULL; 2560 lck->lk.location = NULL; 2561 if (lck->lk.polls != NULL) { 2562 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.polls)); 2563 lck->lk.polls = NULL; 2564 } 2565 if (lck->lk.old_polls != NULL) { 2566 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls)); 2567 lck->lk.old_polls = NULL; 2568 } 2569 lck->lk.mask = 0; 2570 lck->lk.num_polls = 0; 2571 lck->lk.cleanup_ticket = 0; 2572 lck->lk.next_ticket = 0; 2573 lck->lk.now_serving = 0; 2574 lck->lk.owner_id = 0; 2575 lck->lk.depth_locked = -1; 2576 } 2577 2578 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2579 char const *const func = "omp_destroy_lock"; 2580 if (lck->lk.initialized != lck) { 2581 KMP_FATAL(LockIsUninitialized, func); 2582 } 2583 if (__kmp_is_drdpa_lock_nestable(lck)) { 2584 KMP_FATAL(LockNestableUsedAsSimple, func); 2585 } 2586 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2587 KMP_FATAL(LockStillOwned, func); 2588 } 2589 __kmp_destroy_drdpa_lock(lck); 2590 } 2591 2592 // nested drdpa ticket locks 2593 2594 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2595 KMP_DEBUG_ASSERT(gtid >= 0); 2596 2597 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2598 lck->lk.depth_locked += 1; 2599 return KMP_LOCK_ACQUIRED_NEXT; 2600 } else { 2601 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2602 ANNOTATE_DRDPA_ACQUIRED(lck); 2603 KMP_MB(); 2604 lck->lk.depth_locked = 1; 2605 KMP_MB(); 2606 lck->lk.owner_id = gtid + 1; 2607 return KMP_LOCK_ACQUIRED_FIRST; 2608 } 2609 } 2610 2611 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2612 kmp_int32 gtid) { 2613 char const *const func = "omp_set_nest_lock"; 2614 if (lck->lk.initialized != lck) { 2615 KMP_FATAL(LockIsUninitialized, func); 2616 } 2617 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2618 KMP_FATAL(LockSimpleUsedAsNestable, func); 2619 } 2620 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2621 } 2622 2623 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2624 int retval; 2625 2626 KMP_DEBUG_ASSERT(gtid >= 0); 2627 2628 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2629 retval = ++lck->lk.depth_locked; 2630 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2631 retval = 0; 2632 } else { 2633 KMP_MB(); 2634 retval = lck->lk.depth_locked = 1; 2635 KMP_MB(); 2636 lck->lk.owner_id = gtid + 1; 2637 } 2638 return retval; 2639 } 2640 2641 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2642 kmp_int32 gtid) { 2643 char const *const func = "omp_test_nest_lock"; 2644 if (lck->lk.initialized != lck) { 2645 KMP_FATAL(LockIsUninitialized, func); 2646 } 2647 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2648 KMP_FATAL(LockSimpleUsedAsNestable, func); 2649 } 2650 return __kmp_test_nested_drdpa_lock(lck, gtid); 2651 } 2652 2653 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2654 KMP_DEBUG_ASSERT(gtid >= 0); 2655 2656 KMP_MB(); 2657 if (--(lck->lk.depth_locked) == 0) { 2658 KMP_MB(); 2659 lck->lk.owner_id = 0; 2660 __kmp_release_drdpa_lock(lck, gtid); 2661 return KMP_LOCK_RELEASED; 2662 } 2663 return KMP_LOCK_STILL_HELD; 2664 } 2665 2666 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2667 kmp_int32 gtid) { 2668 char const *const func = "omp_unset_nest_lock"; 2669 KMP_MB(); /* in case another processor initialized lock */ 2670 if (lck->lk.initialized != lck) { 2671 KMP_FATAL(LockIsUninitialized, func); 2672 } 2673 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2674 KMP_FATAL(LockSimpleUsedAsNestable, func); 2675 } 2676 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2677 KMP_FATAL(LockUnsettingFree, func); 2678 } 2679 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2680 KMP_FATAL(LockUnsettingSetByAnother, func); 2681 } 2682 return __kmp_release_nested_drdpa_lock(lck, gtid); 2683 } 2684 2685 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2686 __kmp_init_drdpa_lock(lck); 2687 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2688 } 2689 2690 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2691 __kmp_init_nested_drdpa_lock(lck); 2692 } 2693 2694 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2695 __kmp_destroy_drdpa_lock(lck); 2696 lck->lk.depth_locked = 0; 2697 } 2698 2699 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2700 char const *const func = "omp_destroy_nest_lock"; 2701 if (lck->lk.initialized != lck) { 2702 KMP_FATAL(LockIsUninitialized, func); 2703 } 2704 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2705 KMP_FATAL(LockSimpleUsedAsNestable, func); 2706 } 2707 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2708 KMP_FATAL(LockStillOwned, func); 2709 } 2710 __kmp_destroy_nested_drdpa_lock(lck); 2711 } 2712 2713 // access functions to fields which don't exist for all lock kinds. 2714 2715 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 2716 return lck == lck->lk.initialized; 2717 } 2718 2719 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2720 return lck->lk.location; 2721 } 2722 2723 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2724 const ident_t *loc) { 2725 lck->lk.location = loc; 2726 } 2727 2728 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2729 return lck->lk.flags; 2730 } 2731 2732 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2733 kmp_lock_flags_t flags) { 2734 lck->lk.flags = flags; 2735 } 2736 2737 // Time stamp counter 2738 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2739 #define __kmp_tsc() __kmp_hardware_timestamp() 2740 // Runtime's default backoff parameters 2741 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2742 #else 2743 // Use nanoseconds for other platforms 2744 extern kmp_uint64 __kmp_now_nsec(); 2745 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2746 #define __kmp_tsc() __kmp_now_nsec() 2747 #endif 2748 2749 // A useful predicate for dealing with timestamps that may wrap. 2750 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2751 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2752 // Times where going clockwise is less distance than going anti-clockwise 2753 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2754 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2755 // signed(b) = 0 captures the actual difference 2756 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2757 return ((kmp_int64)b - (kmp_int64)a) > 0; 2758 } 2759 2760 // Truncated binary exponential backoff function 2761 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2762 // We could flatten this loop, but making it a nested loop gives better result 2763 kmp_uint32 i; 2764 for (i = boff->step; i > 0; i--) { 2765 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2766 do { 2767 KMP_CPU_PAUSE(); 2768 } while (before(__kmp_tsc(), goal)); 2769 } 2770 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2771 } 2772 2773 #if KMP_USE_DYNAMIC_LOCK 2774 2775 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2776 // lock word. 2777 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2778 kmp_dyna_lockseq_t seq) { 2779 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2780 KA_TRACE( 2781 20, 2782 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2783 } 2784 2785 #if KMP_USE_TSX 2786 2787 // HLE lock functions - imported from the testbed runtime. 2788 #define HLE_ACQUIRE ".byte 0xf2;" 2789 #define HLE_RELEASE ".byte 0xf3;" 2790 2791 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2792 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2793 return v; 2794 } 2795 2796 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2797 2798 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2799 // Use gtid for KMP_LOCK_BUSY if necessary 2800 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2801 int delay = 1; 2802 do { 2803 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2804 for (int i = delay; i != 0; --i) 2805 KMP_CPU_PAUSE(); 2806 delay = ((delay << 1) | 1) & 7; 2807 } 2808 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2809 } 2810 } 2811 2812 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2813 kmp_int32 gtid) { 2814 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2815 } 2816 2817 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2818 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2819 : "=m"(*lck) 2820 : "r"(KMP_LOCK_FREE(hle)) 2821 : "memory"); 2822 return KMP_LOCK_RELEASED; 2823 } 2824 2825 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2826 kmp_int32 gtid) { 2827 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2828 } 2829 2830 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2831 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2832 } 2833 2834 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2835 kmp_int32 gtid) { 2836 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2837 } 2838 2839 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2840 __kmp_init_queuing_lock(lck); 2841 } 2842 2843 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2844 __kmp_destroy_queuing_lock(lck); 2845 } 2846 2847 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2848 unsigned retries = 3, status; 2849 do { 2850 status = _xbegin(); 2851 if (status == _XBEGIN_STARTED) { 2852 if (__kmp_is_unlocked_queuing_lock(lck)) 2853 return; 2854 _xabort(0xff); 2855 } 2856 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2857 // Wait until lock becomes free 2858 while (!__kmp_is_unlocked_queuing_lock(lck)) 2859 __kmp_yield(TRUE); 2860 } else if (!(status & _XABORT_RETRY)) 2861 break; 2862 } while (retries--); 2863 2864 // Fall-back non-speculative lock (xchg) 2865 __kmp_acquire_queuing_lock(lck, gtid); 2866 } 2867 2868 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2869 kmp_int32 gtid) { 2870 __kmp_acquire_rtm_lock(lck, gtid); 2871 } 2872 2873 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2874 if (__kmp_is_unlocked_queuing_lock(lck)) { 2875 // Releasing from speculation 2876 _xend(); 2877 } else { 2878 // Releasing from a real lock 2879 __kmp_release_queuing_lock(lck, gtid); 2880 } 2881 return KMP_LOCK_RELEASED; 2882 } 2883 2884 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2885 kmp_int32 gtid) { 2886 return __kmp_release_rtm_lock(lck, gtid); 2887 } 2888 2889 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2890 unsigned retries = 3, status; 2891 do { 2892 status = _xbegin(); 2893 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2894 return 1; 2895 } 2896 if (!(status & _XABORT_RETRY)) 2897 break; 2898 } while (retries--); 2899 2900 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2901 } 2902 2903 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2904 kmp_int32 gtid) { 2905 return __kmp_test_rtm_lock(lck, gtid); 2906 } 2907 2908 #endif // KMP_USE_TSX 2909 2910 // Entry functions for indirect locks (first element of direct lock jump tables) 2911 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2912 kmp_dyna_lockseq_t tag); 2913 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2914 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2915 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2916 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2917 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2918 kmp_int32); 2919 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2920 kmp_int32); 2921 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2922 kmp_int32); 2923 2924 // Jump tables for the indirect lock functions 2925 // Only fill in the odd entries, that avoids the need to shift out the low bit 2926 2927 // init functions 2928 #define expand(l, op) 0, __kmp_init_direct_lock, 2929 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2930 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2931 #undef expand 2932 2933 // destroy functions 2934 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2935 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = { 2936 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2937 #undef expand 2938 2939 // set/acquire functions 2940 #define expand(l, op) \ 2941 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2942 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2943 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2944 #undef expand 2945 #define expand(l, op) \ 2946 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2947 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2948 __kmp_set_indirect_lock_with_checks, 0, 2949 KMP_FOREACH_D_LOCK(expand, acquire)}; 2950 #undef expand 2951 2952 // unset/release and test functions 2953 #define expand(l, op) \ 2954 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2955 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2956 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2957 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2958 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2959 #undef expand 2960 #define expand(l, op) \ 2961 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2962 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2963 __kmp_unset_indirect_lock_with_checks, 0, 2964 KMP_FOREACH_D_LOCK(expand, release)}; 2965 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2966 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2967 #undef expand 2968 2969 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2970 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2971 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2972 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2973 2974 // Jump tables for the indirect lock functions 2975 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2976 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2977 KMP_FOREACH_I_LOCK(expand, init)}; 2978 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { 2979 KMP_FOREACH_I_LOCK(expand, destroy)}; 2980 #undef expand 2981 2982 // set/acquire functions 2983 #define expand(l, op) \ 2984 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2985 static int (*indirect_set[])(kmp_user_lock_p, 2986 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 2987 #undef expand 2988 #define expand(l, op) \ 2989 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2990 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2991 KMP_FOREACH_I_LOCK(expand, acquire)}; 2992 #undef expand 2993 2994 // unset/release and test functions 2995 #define expand(l, op) \ 2996 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2997 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 2998 KMP_FOREACH_I_LOCK(expand, release)}; 2999 static int (*indirect_test[])(kmp_user_lock_p, 3000 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3001 #undef expand 3002 #define expand(l, op) \ 3003 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3004 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3005 KMP_FOREACH_I_LOCK(expand, release)}; 3006 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3007 KMP_FOREACH_I_LOCK(expand, test)}; 3008 #undef expand 3009 3010 // Exposes only one jump tables (*lock or *lock_with_checks). 3011 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3012 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3013 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3014 3015 // Lock index table. 3016 kmp_indirect_lock_table_t __kmp_i_lock_table; 3017 3018 // Size of indirect locks. 3019 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3020 3021 // Jump tables for lock accessor/modifier. 3022 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3023 const ident_t *) = {0}; 3024 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3025 kmp_lock_flags_t) = {0}; 3026 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3027 kmp_user_lock_p) = {0}; 3028 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3029 kmp_user_lock_p) = {0}; 3030 3031 // Use different lock pools for different lock types. 3032 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3033 3034 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3035 // the indirect lock table holds the address and type of the allocated indrect 3036 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3037 // full. A destroyed indirect lock object is returned to the reusable pool of 3038 // locks, unique to each lock type. 3039 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3040 kmp_int32 gtid, 3041 kmp_indirect_locktag_t tag) { 3042 kmp_indirect_lock_t *lck; 3043 kmp_lock_index_t idx; 3044 3045 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3046 3047 if (__kmp_indirect_lock_pool[tag] != NULL) { 3048 // Reuse the allocated and destroyed lock object 3049 lck = __kmp_indirect_lock_pool[tag]; 3050 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3051 idx = lck->lock->pool.index; 3052 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3053 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3054 lck)); 3055 } else { 3056 idx = __kmp_i_lock_table.next; 3057 // Check capacity and double the size if it is full 3058 if (idx == __kmp_i_lock_table.size) { 3059 // Double up the space for block pointers 3060 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3061 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 3062 2 * row * sizeof(kmp_indirect_lock_t *)); 3063 KMP_MEMCPY(new_table, __kmp_i_lock_table.table, 3064 row * sizeof(kmp_indirect_lock_t *)); 3065 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3066 __kmp_i_lock_table.table = new_table; 3067 __kmp_free(old_table); 3068 // Allocate new objects in the new blocks 3069 for (int i = row; i < 2 * row; ++i) 3070 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3071 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3072 __kmp_i_lock_table.size = 2 * idx; 3073 } 3074 __kmp_i_lock_table.next++; 3075 lck = KMP_GET_I_LOCK(idx); 3076 // Allocate a new base lock object 3077 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3078 KA_TRACE(20, 3079 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3080 } 3081 3082 __kmp_release_lock(&__kmp_global_lock, gtid); 3083 3084 lck->type = tag; 3085 3086 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3087 *((kmp_lock_index_t *)user_lock) = idx 3088 << 1; // indirect lock word must be even 3089 } else { 3090 *((kmp_indirect_lock_t **)user_lock) = lck; 3091 } 3092 3093 return lck; 3094 } 3095 3096 // User lock lookup for dynamically dispatched locks. 3097 static __forceinline kmp_indirect_lock_t * 3098 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3099 if (__kmp_env_consistency_check) { 3100 kmp_indirect_lock_t *lck = NULL; 3101 if (user_lock == NULL) { 3102 KMP_FATAL(LockIsUninitialized, func); 3103 } 3104 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3105 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3106 if (idx >= __kmp_i_lock_table.size) { 3107 KMP_FATAL(LockIsUninitialized, func); 3108 } 3109 lck = KMP_GET_I_LOCK(idx); 3110 } else { 3111 lck = *((kmp_indirect_lock_t **)user_lock); 3112 } 3113 if (lck == NULL) { 3114 KMP_FATAL(LockIsUninitialized, func); 3115 } 3116 return lck; 3117 } else { 3118 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3119 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3120 } else { 3121 return *((kmp_indirect_lock_t **)user_lock); 3122 } 3123 } 3124 } 3125 3126 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3127 kmp_dyna_lockseq_t seq) { 3128 #if KMP_USE_ADAPTIVE_LOCKS 3129 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3130 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3131 seq = lockseq_queuing; 3132 } 3133 #endif 3134 #if KMP_USE_TSX 3135 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3136 seq = lockseq_queuing; 3137 } 3138 #endif 3139 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3140 kmp_indirect_lock_t *l = 3141 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3142 KMP_I_LOCK_FUNC(l, init)(l->lock); 3143 KA_TRACE( 3144 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3145 seq)); 3146 } 3147 3148 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3149 kmp_uint32 gtid = __kmp_entry_gtid(); 3150 kmp_indirect_lock_t *l = 3151 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3152 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3153 kmp_indirect_locktag_t tag = l->type; 3154 3155 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3156 3157 // Use the base lock's space to keep the pool chain. 3158 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3159 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3160 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3161 } 3162 __kmp_indirect_lock_pool[tag] = l; 3163 3164 __kmp_release_lock(&__kmp_global_lock, gtid); 3165 } 3166 3167 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3168 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3169 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3170 } 3171 3172 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3173 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3174 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3175 } 3176 3177 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3178 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3179 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3180 } 3181 3182 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3183 kmp_int32 gtid) { 3184 kmp_indirect_lock_t *l = 3185 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3186 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3187 } 3188 3189 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3190 kmp_int32 gtid) { 3191 kmp_indirect_lock_t *l = 3192 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3193 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3194 } 3195 3196 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3197 kmp_int32 gtid) { 3198 kmp_indirect_lock_t *l = 3199 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3200 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3201 } 3202 3203 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3204 3205 // This is used only in kmp_error.cpp when consistency checking is on. 3206 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3207 switch (seq) { 3208 case lockseq_tas: 3209 case lockseq_nested_tas: 3210 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3211 #if KMP_USE_FUTEX 3212 case lockseq_futex: 3213 case lockseq_nested_futex: 3214 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3215 #endif 3216 case lockseq_ticket: 3217 case lockseq_nested_ticket: 3218 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3219 case lockseq_queuing: 3220 case lockseq_nested_queuing: 3221 #if KMP_USE_ADAPTIVE_LOCKS 3222 case lockseq_adaptive: 3223 #endif 3224 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3225 case lockseq_drdpa: 3226 case lockseq_nested_drdpa: 3227 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3228 default: 3229 return 0; 3230 } 3231 } 3232 3233 // Initializes data for dynamic user locks. 3234 void __kmp_init_dynamic_user_locks() { 3235 // Initialize jump table for the lock functions 3236 if (__kmp_env_consistency_check) { 3237 __kmp_direct_set = direct_set_check; 3238 __kmp_direct_unset = direct_unset_check; 3239 __kmp_direct_test = direct_test_check; 3240 __kmp_indirect_set = indirect_set_check; 3241 __kmp_indirect_unset = indirect_unset_check; 3242 __kmp_indirect_test = indirect_test_check; 3243 } else { 3244 __kmp_direct_set = direct_set; 3245 __kmp_direct_unset = direct_unset; 3246 __kmp_direct_test = direct_test; 3247 __kmp_indirect_set = indirect_set; 3248 __kmp_indirect_unset = indirect_unset; 3249 __kmp_indirect_test = indirect_test; 3250 } 3251 // If the user locks have already been initialized, then return. Allow the 3252 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3253 // new lock tables if they have already been allocated. 3254 if (__kmp_init_user_locks) 3255 return; 3256 3257 // Initialize lock index table 3258 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3259 __kmp_i_lock_table.table = 3260 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3261 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3262 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3263 __kmp_i_lock_table.next = 0; 3264 3265 // Indirect lock size 3266 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3267 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3268 #if KMP_USE_ADAPTIVE_LOCKS 3269 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3270 #endif 3271 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3272 #if KMP_USE_TSX 3273 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3274 #endif 3275 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3276 #if KMP_USE_FUTEX 3277 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3278 #endif 3279 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3280 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3281 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3282 3283 // Initialize lock accessor/modifier 3284 #define fill_jumps(table, expand, sep) \ 3285 { \ 3286 table[locktag##sep##ticket] = expand(ticket); \ 3287 table[locktag##sep##queuing] = expand(queuing); \ 3288 table[locktag##sep##drdpa] = expand(drdpa); \ 3289 } 3290 3291 #if KMP_USE_ADAPTIVE_LOCKS 3292 #define fill_table(table, expand) \ 3293 { \ 3294 fill_jumps(table, expand, _); \ 3295 table[locktag_adaptive] = expand(queuing); \ 3296 fill_jumps(table, expand, _nested_); \ 3297 } 3298 #else 3299 #define fill_table(table, expand) \ 3300 { \ 3301 fill_jumps(table, expand, _); \ 3302 fill_jumps(table, expand, _nested_); \ 3303 } 3304 #endif // KMP_USE_ADAPTIVE_LOCKS 3305 3306 #define expand(l) \ 3307 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3308 fill_table(__kmp_indirect_set_location, expand); 3309 #undef expand 3310 #define expand(l) \ 3311 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3312 fill_table(__kmp_indirect_set_flags, expand); 3313 #undef expand 3314 #define expand(l) \ 3315 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3316 fill_table(__kmp_indirect_get_location, expand); 3317 #undef expand 3318 #define expand(l) \ 3319 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3320 fill_table(__kmp_indirect_get_flags, expand); 3321 #undef expand 3322 3323 __kmp_init_user_locks = TRUE; 3324 } 3325 3326 // Clean up the lock table. 3327 void __kmp_cleanup_indirect_user_locks() { 3328 kmp_lock_index_t i; 3329 int k; 3330 3331 // Clean up locks in the pools first (they were already destroyed before going 3332 // into the pools). 3333 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3334 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3335 while (l != NULL) { 3336 kmp_indirect_lock_t *ll = l; 3337 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3338 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3339 ll)); 3340 __kmp_free(ll->lock); 3341 ll->lock = NULL; 3342 } 3343 __kmp_indirect_lock_pool[k] = NULL; 3344 } 3345 // Clean up the remaining undestroyed locks. 3346 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3347 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3348 if (l->lock != NULL) { 3349 // Locks not destroyed explicitly need to be destroyed here. 3350 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3351 KA_TRACE( 3352 20, 3353 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3354 l)); 3355 __kmp_free(l->lock); 3356 } 3357 } 3358 // Free the table 3359 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3360 __kmp_free(__kmp_i_lock_table.table[i]); 3361 __kmp_free(__kmp_i_lock_table.table); 3362 3363 __kmp_init_user_locks = FALSE; 3364 } 3365 3366 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3367 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3368 3369 #else // KMP_USE_DYNAMIC_LOCK 3370 3371 /* user locks 3372 * They are implemented as a table of function pointers which are set to the 3373 * lock functions of the appropriate kind, once that has been determined. */ 3374 3375 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3376 3377 size_t __kmp_base_user_lock_size = 0; 3378 size_t __kmp_user_lock_size = 0; 3379 3380 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3381 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3382 kmp_int32 gtid) = NULL; 3383 3384 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3385 kmp_int32 gtid) = NULL; 3386 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3387 kmp_int32 gtid) = NULL; 3388 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3389 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3390 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3391 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3392 kmp_int32 gtid) = NULL; 3393 3394 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3395 kmp_int32 gtid) = NULL; 3396 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3397 kmp_int32 gtid) = NULL; 3398 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3399 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3400 3401 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3402 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3403 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3404 const ident_t *loc) = NULL; 3405 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3406 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3407 kmp_lock_flags_t flags) = NULL; 3408 3409 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3410 switch (user_lock_kind) { 3411 case lk_default: 3412 default: 3413 KMP_ASSERT(0); 3414 3415 case lk_tas: { 3416 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3417 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3418 3419 __kmp_get_user_lock_owner_ = 3420 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3421 3422 if (__kmp_env_consistency_check) { 3423 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3424 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3425 } else { 3426 KMP_BIND_USER_LOCK(tas); 3427 KMP_BIND_NESTED_USER_LOCK(tas); 3428 } 3429 3430 __kmp_destroy_user_lock_ = 3431 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3432 3433 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3434 3435 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3436 3437 __kmp_set_user_lock_location_ = 3438 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3439 3440 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3441 3442 __kmp_set_user_lock_flags_ = 3443 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3444 } break; 3445 3446 #if KMP_USE_FUTEX 3447 3448 case lk_futex: { 3449 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3450 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3451 3452 __kmp_get_user_lock_owner_ = 3453 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3454 3455 if (__kmp_env_consistency_check) { 3456 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3457 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3458 } else { 3459 KMP_BIND_USER_LOCK(futex); 3460 KMP_BIND_NESTED_USER_LOCK(futex); 3461 } 3462 3463 __kmp_destroy_user_lock_ = 3464 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3465 3466 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3467 3468 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3469 3470 __kmp_set_user_lock_location_ = 3471 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3472 3473 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3474 3475 __kmp_set_user_lock_flags_ = 3476 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3477 } break; 3478 3479 #endif // KMP_USE_FUTEX 3480 3481 case lk_ticket: { 3482 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3483 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3484 3485 __kmp_get_user_lock_owner_ = 3486 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3487 3488 if (__kmp_env_consistency_check) { 3489 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3490 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3491 } else { 3492 KMP_BIND_USER_LOCK(ticket); 3493 KMP_BIND_NESTED_USER_LOCK(ticket); 3494 } 3495 3496 __kmp_destroy_user_lock_ = 3497 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3498 3499 __kmp_is_user_lock_initialized_ = 3500 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3501 3502 __kmp_get_user_lock_location_ = 3503 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3504 3505 __kmp_set_user_lock_location_ = (void (*)( 3506 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3507 3508 __kmp_get_user_lock_flags_ = 3509 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3510 3511 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3512 &__kmp_set_ticket_lock_flags); 3513 } break; 3514 3515 case lk_queuing: { 3516 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3517 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3518 3519 __kmp_get_user_lock_owner_ = 3520 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3521 3522 if (__kmp_env_consistency_check) { 3523 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3524 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3525 } else { 3526 KMP_BIND_USER_LOCK(queuing); 3527 KMP_BIND_NESTED_USER_LOCK(queuing); 3528 } 3529 3530 __kmp_destroy_user_lock_ = 3531 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3532 3533 __kmp_is_user_lock_initialized_ = 3534 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3535 3536 __kmp_get_user_lock_location_ = 3537 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3538 3539 __kmp_set_user_lock_location_ = (void (*)( 3540 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3541 3542 __kmp_get_user_lock_flags_ = 3543 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3544 3545 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3546 &__kmp_set_queuing_lock_flags); 3547 } break; 3548 3549 #if KMP_USE_ADAPTIVE_LOCKS 3550 case lk_adaptive: { 3551 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3552 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3553 3554 __kmp_get_user_lock_owner_ = 3555 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3556 3557 if (__kmp_env_consistency_check) { 3558 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3559 } else { 3560 KMP_BIND_USER_LOCK(adaptive); 3561 } 3562 3563 __kmp_destroy_user_lock_ = 3564 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3565 3566 __kmp_is_user_lock_initialized_ = 3567 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3568 3569 __kmp_get_user_lock_location_ = 3570 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3571 3572 __kmp_set_user_lock_location_ = (void (*)( 3573 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3574 3575 __kmp_get_user_lock_flags_ = 3576 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3577 3578 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3579 &__kmp_set_queuing_lock_flags); 3580 3581 } break; 3582 #endif // KMP_USE_ADAPTIVE_LOCKS 3583 3584 case lk_drdpa: { 3585 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3586 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3587 3588 __kmp_get_user_lock_owner_ = 3589 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3590 3591 if (__kmp_env_consistency_check) { 3592 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3593 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3594 } else { 3595 KMP_BIND_USER_LOCK(drdpa); 3596 KMP_BIND_NESTED_USER_LOCK(drdpa); 3597 } 3598 3599 __kmp_destroy_user_lock_ = 3600 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3601 3602 __kmp_is_user_lock_initialized_ = 3603 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3604 3605 __kmp_get_user_lock_location_ = 3606 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3607 3608 __kmp_set_user_lock_location_ = (void (*)( 3609 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3610 3611 __kmp_get_user_lock_flags_ = 3612 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3613 3614 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3615 &__kmp_set_drdpa_lock_flags); 3616 } break; 3617 } 3618 } 3619 3620 // ---------------------------------------------------------------------------- 3621 // User lock table & lock allocation 3622 3623 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3624 kmp_user_lock_p __kmp_lock_pool = NULL; 3625 3626 // Lock block-allocation support. 3627 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3628 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3629 3630 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3631 // Assume that kmp_global_lock is held upon entry/exit. 3632 kmp_lock_index_t index; 3633 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3634 kmp_lock_index_t size; 3635 kmp_user_lock_p *table; 3636 // Reallocate lock table. 3637 if (__kmp_user_lock_table.allocated == 0) { 3638 size = 1024; 3639 } else { 3640 size = __kmp_user_lock_table.allocated * 2; 3641 } 3642 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3643 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3644 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3645 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3646 // We cannot free the previous table now, since it may be in use by other 3647 // threads. So save the pointer to the previous table in in the first 3648 // element of the new table. All the tables will be organized into a list, 3649 // and could be freed when library shutting down. 3650 __kmp_user_lock_table.table = table; 3651 __kmp_user_lock_table.allocated = size; 3652 } 3653 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3654 __kmp_user_lock_table.allocated); 3655 index = __kmp_user_lock_table.used; 3656 __kmp_user_lock_table.table[index] = lck; 3657 ++__kmp_user_lock_table.used; 3658 return index; 3659 } 3660 3661 static kmp_user_lock_p __kmp_lock_block_allocate() { 3662 // Assume that kmp_global_lock is held upon entry/exit. 3663 static int last_index = 0; 3664 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3665 // Restart the index. 3666 last_index = 0; 3667 // Need to allocate a new block. 3668 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3669 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3670 char *buffer = 3671 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3672 // Set up the new block. 3673 kmp_block_of_locks *new_block = 3674 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3675 new_block->next_block = __kmp_lock_blocks; 3676 new_block->locks = (void *)buffer; 3677 // Publish the new block. 3678 KMP_MB(); 3679 __kmp_lock_blocks = new_block; 3680 } 3681 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3682 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3683 last_index++; 3684 return ret; 3685 } 3686 3687 // Get memory for a lock. It may be freshly allocated memory or reused memory 3688 // from lock pool. 3689 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3690 kmp_lock_flags_t flags) { 3691 kmp_user_lock_p lck; 3692 kmp_lock_index_t index; 3693 KMP_DEBUG_ASSERT(user_lock); 3694 3695 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3696 3697 if (__kmp_lock_pool == NULL) { 3698 // Lock pool is empty. Allocate new memory. 3699 3700 // ANNOTATION: Found no good way to express the syncronisation 3701 // between allocation and usage, so ignore the allocation 3702 ANNOTATE_IGNORE_WRITES_BEGIN(); 3703 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3704 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3705 } else { 3706 lck = __kmp_lock_block_allocate(); 3707 } 3708 ANNOTATE_IGNORE_WRITES_END(); 3709 3710 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3711 // and debugger has info on all allocated locks. 3712 index = __kmp_lock_table_insert(lck); 3713 } else { 3714 // Pick up lock from pool. 3715 lck = __kmp_lock_pool; 3716 index = __kmp_lock_pool->pool.index; 3717 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3718 } 3719 3720 // We could potentially differentiate between nested and regular locks 3721 // here, and do the lock table lookup for regular locks only. 3722 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3723 *((kmp_lock_index_t *)user_lock) = index; 3724 } else { 3725 *((kmp_user_lock_p *)user_lock) = lck; 3726 } 3727 3728 // mark the lock if it is critical section lock. 3729 __kmp_set_user_lock_flags(lck, flags); 3730 3731 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3732 3733 return lck; 3734 } 3735 3736 // Put lock's memory to pool for reusing. 3737 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3738 kmp_user_lock_p lck) { 3739 KMP_DEBUG_ASSERT(user_lock != NULL); 3740 KMP_DEBUG_ASSERT(lck != NULL); 3741 3742 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3743 3744 lck->pool.next = __kmp_lock_pool; 3745 __kmp_lock_pool = lck; 3746 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3747 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3748 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3749 lck->pool.index = index; 3750 } 3751 3752 __kmp_release_lock(&__kmp_global_lock, gtid); 3753 } 3754 3755 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3756 kmp_user_lock_p lck = NULL; 3757 3758 if (__kmp_env_consistency_check) { 3759 if (user_lock == NULL) { 3760 KMP_FATAL(LockIsUninitialized, func); 3761 } 3762 } 3763 3764 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3765 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3766 if (__kmp_env_consistency_check) { 3767 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3768 KMP_FATAL(LockIsUninitialized, func); 3769 } 3770 } 3771 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3772 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3773 lck = __kmp_user_lock_table.table[index]; 3774 } else { 3775 lck = *((kmp_user_lock_p *)user_lock); 3776 } 3777 3778 if (__kmp_env_consistency_check) { 3779 if (lck == NULL) { 3780 KMP_FATAL(LockIsUninitialized, func); 3781 } 3782 } 3783 3784 return lck; 3785 } 3786 3787 void __kmp_cleanup_user_locks(void) { 3788 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3789 // iterating through lock table (it includes all the locks, dead or alive). 3790 __kmp_lock_pool = NULL; 3791 3792 #define IS_CRITICAL(lck) \ 3793 ((__kmp_get_user_lock_flags_ != NULL) && \ 3794 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3795 3796 // Loop through lock table, free all locks. 3797 // Do not free item [0], it is reserved for lock tables list. 3798 // 3799 // FIXME - we are iterating through a list of (pointers to) objects of type 3800 // union kmp_user_lock, but we have no way of knowing whether the base type is 3801 // currently "pool" or whatever the global user lock type is. 3802 // 3803 // We are relying on the fact that for all of the user lock types 3804 // (except "tas"), the first field in the lock struct is the "initialized" 3805 // field, which is set to the address of the lock object itself when 3806 // the lock is initialized. When the union is of type "pool", the 3807 // first field is a pointer to the next object in the free list, which 3808 // will not be the same address as the object itself. 3809 // 3810 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3811 // for "pool" objects on the free list. This must happen as the "location" 3812 // field of real user locks overlaps the "index" field of "pool" objects. 3813 // 3814 // It would be better to run through the free list, and remove all "pool" 3815 // objects from the lock table before executing this loop. However, 3816 // "pool" objects do not always have their index field set (only on 3817 // lin_32e), and I don't want to search the lock table for the address 3818 // of every "pool" object on the free list. 3819 while (__kmp_user_lock_table.used > 1) { 3820 const ident *loc; 3821 3822 // reduce __kmp_user_lock_table.used before freeing the lock, 3823 // so that state of locks is consistent 3824 kmp_user_lock_p lck = 3825 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3826 3827 if ((__kmp_is_user_lock_initialized_ != NULL) && 3828 (*__kmp_is_user_lock_initialized_)(lck)) { 3829 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3830 // it is NOT a critical section (user is not responsible for destroying 3831 // criticals) AND we know source location to report. 3832 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3833 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3834 (loc->psource != NULL)) { 3835 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3836 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3837 __kmp_str_loc_free(&str_loc); 3838 } 3839 3840 #ifdef KMP_DEBUG 3841 if (IS_CRITICAL(lck)) { 3842 KA_TRACE( 3843 20, 3844 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3845 lck, *(void **)lck)); 3846 } else { 3847 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3848 *(void **)lck)); 3849 } 3850 #endif // KMP_DEBUG 3851 3852 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3853 __kmp_destroy_user_lock(lck); 3854 } 3855 3856 // Free the lock if block allocation of locks is not used. 3857 if (__kmp_lock_blocks == NULL) { 3858 __kmp_free(lck); 3859 } 3860 } 3861 3862 #undef IS_CRITICAL 3863 3864 // delete lock table(s). 3865 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3866 __kmp_user_lock_table.table = NULL; 3867 __kmp_user_lock_table.allocated = 0; 3868 3869 while (table_ptr != NULL) { 3870 // In the first element we saved the pointer to the previous 3871 // (smaller) lock table. 3872 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3873 __kmp_free(table_ptr); 3874 table_ptr = next; 3875 } 3876 3877 // Free buffers allocated for blocks of locks. 3878 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3879 __kmp_lock_blocks = NULL; 3880 3881 while (block_ptr != NULL) { 3882 kmp_block_of_locks_t *next = block_ptr->next_block; 3883 __kmp_free(block_ptr->locks); 3884 // *block_ptr itself was allocated at the end of the locks vector. 3885 block_ptr = next; 3886 } 3887 3888 TCW_4(__kmp_init_user_locks, FALSE); 3889 } 3890 3891 #endif // KMP_USE_DYNAMIC_LOCK 3892