1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 #include <atomic> 18 19 #include "kmp.h" 20 #include "kmp_i18n.h" 21 #include "kmp_io.h" 22 #include "kmp_itt.h" 23 #include "kmp_lock.h" 24 25 #include "tsan_annotations.h" 26 27 #if KMP_USE_FUTEX 28 #include <sys/syscall.h> 29 #include <unistd.h> 30 // We should really include <futex.h>, but that causes compatibility problems on 31 // different Linux* OS distributions that either require that you include (or 32 // break when you try to include) <pci/types.h>. Since all we need is the two 33 // macros below (which are part of the kernel ABI, so can't change) we just 34 // define the constants here and don't include <futex.h> 35 #ifndef FUTEX_WAIT 36 #define FUTEX_WAIT 0 37 #endif 38 #ifndef FUTEX_WAKE 39 #define FUTEX_WAKE 1 40 #endif 41 #endif 42 43 /* Implement spin locks for internal library use. */ 44 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 45 46 void __kmp_validate_locks(void) { 47 int i; 48 kmp_uint32 x, y; 49 50 /* Check to make sure unsigned arithmetic does wraps properly */ 51 x = ~((kmp_uint32)0) - 2; 52 y = x - 2; 53 54 for (i = 0; i < 8; ++i, ++x, ++y) { 55 kmp_uint32 z = (x - y); 56 KMP_ASSERT(z == 2); 57 } 58 59 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 60 } 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // For the non-nested locks, we can only assume that the first 4 bytes were 66 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 67 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 68 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 69 // 70 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 71 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 72 73 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 74 return KMP_LOCK_STRIP(TCR_4(lck->lk.poll)) - 1; 75 } 76 77 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 78 return lck->lk.depth_locked != -1; 79 } 80 81 __forceinline static int 82 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 83 KMP_MB(); 84 85 #ifdef USE_LOCK_PROFILE 86 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 87 if ((curr != 0) && (curr != gtid + 1)) 88 __kmp_printf("LOCK CONTENTION: %p\n", lck); 89 /* else __kmp_printf( "." );*/ 90 #endif /* USE_LOCK_PROFILE */ 91 92 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 93 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 94 KMP_LOCK_BUSY(gtid + 1, tas))) { 95 KMP_FSYNC_ACQUIRED(lck); 96 return KMP_LOCK_ACQUIRED_FIRST; 97 } 98 99 kmp_uint32 spins; 100 KMP_FSYNC_PREPARE(lck); 101 KMP_INIT_YIELD(spins); 102 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 103 KMP_YIELD(TRUE); 104 } else { 105 KMP_YIELD_SPIN(spins); 106 } 107 108 kmp_backoff_t backoff = __kmp_spin_backoff_params; 109 while ((lck->lk.poll != KMP_LOCK_FREE(tas)) || 110 (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 111 KMP_LOCK_BUSY(gtid + 1, tas)))) { 112 113 __kmp_spin_backoff(&backoff); 114 if (TCR_4(__kmp_nth) > 115 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 116 KMP_YIELD(TRUE); 117 } else { 118 KMP_YIELD_SPIN(spins); 119 } 120 } 121 KMP_FSYNC_ACQUIRED(lck); 122 return KMP_LOCK_ACQUIRED_FIRST; 123 } 124 125 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 126 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 127 ANNOTATE_TAS_ACQUIRED(lck); 128 return retval; 129 } 130 131 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 132 kmp_int32 gtid) { 133 char const *const func = "omp_set_lock"; 134 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 135 __kmp_is_tas_lock_nestable(lck)) { 136 KMP_FATAL(LockNestableUsedAsSimple, func); 137 } 138 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 139 KMP_FATAL(LockIsAlreadyOwned, func); 140 } 141 return __kmp_acquire_tas_lock(lck, gtid); 142 } 143 144 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 145 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 146 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 147 KMP_LOCK_BUSY(gtid + 1, tas))) { 148 KMP_FSYNC_ACQUIRED(lck); 149 return TRUE; 150 } 151 return FALSE; 152 } 153 154 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 155 kmp_int32 gtid) { 156 char const *const func = "omp_test_lock"; 157 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 158 __kmp_is_tas_lock_nestable(lck)) { 159 KMP_FATAL(LockNestableUsedAsSimple, func); 160 } 161 return __kmp_test_tas_lock(lck, gtid); 162 } 163 164 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 165 KMP_MB(); /* Flush all pending memory write invalidates. */ 166 167 KMP_FSYNC_RELEASING(lck); 168 ANNOTATE_TAS_RELEASED(lck); 169 KMP_ST_REL32(&(lck->lk.poll), KMP_LOCK_FREE(tas)); 170 KMP_MB(); /* Flush all pending memory write invalidates. */ 171 172 KMP_YIELD(TCR_4(__kmp_nth) > 173 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 174 return KMP_LOCK_RELEASED; 175 } 176 177 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 178 kmp_int32 gtid) { 179 char const *const func = "omp_unset_lock"; 180 KMP_MB(); /* in case another processor initialized lock */ 181 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 182 __kmp_is_tas_lock_nestable(lck)) { 183 KMP_FATAL(LockNestableUsedAsSimple, func); 184 } 185 if (__kmp_get_tas_lock_owner(lck) == -1) { 186 KMP_FATAL(LockUnsettingFree, func); 187 } 188 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 189 (__kmp_get_tas_lock_owner(lck) != gtid)) { 190 KMP_FATAL(LockUnsettingSetByAnother, func); 191 } 192 return __kmp_release_tas_lock(lck, gtid); 193 } 194 195 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 196 TCW_4(lck->lk.poll, KMP_LOCK_FREE(tas)); 197 } 198 199 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 200 __kmp_init_tas_lock(lck); 201 } 202 203 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 204 205 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 206 char const *const func = "omp_destroy_lock"; 207 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 208 __kmp_is_tas_lock_nestable(lck)) { 209 KMP_FATAL(LockNestableUsedAsSimple, func); 210 } 211 if (__kmp_get_tas_lock_owner(lck) != -1) { 212 KMP_FATAL(LockStillOwned, func); 213 } 214 __kmp_destroy_tas_lock(lck); 215 } 216 217 // nested test and set locks 218 219 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 220 KMP_DEBUG_ASSERT(gtid >= 0); 221 222 if (__kmp_get_tas_lock_owner(lck) == gtid) { 223 lck->lk.depth_locked += 1; 224 return KMP_LOCK_ACQUIRED_NEXT; 225 } else { 226 __kmp_acquire_tas_lock_timed_template(lck, gtid); 227 ANNOTATE_TAS_ACQUIRED(lck); 228 lck->lk.depth_locked = 1; 229 return KMP_LOCK_ACQUIRED_FIRST; 230 } 231 } 232 233 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 234 kmp_int32 gtid) { 235 char const *const func = "omp_set_nest_lock"; 236 if (!__kmp_is_tas_lock_nestable(lck)) { 237 KMP_FATAL(LockSimpleUsedAsNestable, func); 238 } 239 return __kmp_acquire_nested_tas_lock(lck, gtid); 240 } 241 242 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 243 int retval; 244 245 KMP_DEBUG_ASSERT(gtid >= 0); 246 247 if (__kmp_get_tas_lock_owner(lck) == gtid) { 248 retval = ++lck->lk.depth_locked; 249 } else if (!__kmp_test_tas_lock(lck, gtid)) { 250 retval = 0; 251 } else { 252 KMP_MB(); 253 retval = lck->lk.depth_locked = 1; 254 } 255 return retval; 256 } 257 258 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 259 kmp_int32 gtid) { 260 char const *const func = "omp_test_nest_lock"; 261 if (!__kmp_is_tas_lock_nestable(lck)) { 262 KMP_FATAL(LockSimpleUsedAsNestable, func); 263 } 264 return __kmp_test_nested_tas_lock(lck, gtid); 265 } 266 267 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 268 KMP_DEBUG_ASSERT(gtid >= 0); 269 270 KMP_MB(); 271 if (--(lck->lk.depth_locked) == 0) { 272 __kmp_release_tas_lock(lck, gtid); 273 return KMP_LOCK_RELEASED; 274 } 275 return KMP_LOCK_STILL_HELD; 276 } 277 278 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 279 kmp_int32 gtid) { 280 char const *const func = "omp_unset_nest_lock"; 281 KMP_MB(); /* in case another processor initialized lock */ 282 if (!__kmp_is_tas_lock_nestable(lck)) { 283 KMP_FATAL(LockSimpleUsedAsNestable, func); 284 } 285 if (__kmp_get_tas_lock_owner(lck) == -1) { 286 KMP_FATAL(LockUnsettingFree, func); 287 } 288 if (__kmp_get_tas_lock_owner(lck) != gtid) { 289 KMP_FATAL(LockUnsettingSetByAnother, func); 290 } 291 return __kmp_release_nested_tas_lock(lck, gtid); 292 } 293 294 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 295 __kmp_init_tas_lock(lck); 296 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 297 } 298 299 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 300 __kmp_init_nested_tas_lock(lck); 301 } 302 303 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 304 __kmp_destroy_tas_lock(lck); 305 lck->lk.depth_locked = 0; 306 } 307 308 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 309 char const *const func = "omp_destroy_nest_lock"; 310 if (!__kmp_is_tas_lock_nestable(lck)) { 311 KMP_FATAL(LockSimpleUsedAsNestable, func); 312 } 313 if (__kmp_get_tas_lock_owner(lck) != -1) { 314 KMP_FATAL(LockStillOwned, func); 315 } 316 __kmp_destroy_nested_tas_lock(lck); 317 } 318 319 #if KMP_USE_FUTEX 320 321 /* ------------------------------------------------------------------------ */ 322 /* futex locks */ 323 324 // futex locks are really just test and set locks, with a different method 325 // of handling contention. They take the same amount of space as test and 326 // set locks, and are allocated the same way (i.e. use the area allocated by 327 // the compiler for non-nested locks / allocate nested locks on the heap). 328 329 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 330 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 331 } 332 333 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 334 return lck->lk.depth_locked != -1; 335 } 336 337 __forceinline static int 338 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 339 kmp_int32 gtid_code = (gtid + 1) << 1; 340 341 KMP_MB(); 342 343 #ifdef USE_LOCK_PROFILE 344 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 345 if ((curr != 0) && (curr != gtid_code)) 346 __kmp_printf("LOCK CONTENTION: %p\n", lck); 347 /* else __kmp_printf( "." );*/ 348 #endif /* USE_LOCK_PROFILE */ 349 350 KMP_FSYNC_PREPARE(lck); 351 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 352 lck, lck->lk.poll, gtid)); 353 354 kmp_int32 poll_val; 355 356 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 357 &(lck->lk.poll), KMP_LOCK_FREE(futex), 358 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 359 360 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 361 KA_TRACE( 362 1000, 363 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 364 lck, gtid, poll_val, cond)); 365 366 // NOTE: if you try to use the following condition for this branch 367 // 368 // if ( poll_val & 1 == 0 ) 369 // 370 // Then the 12.0 compiler has a bug where the following block will 371 // always be skipped, regardless of the value of the LSB of poll_val. 372 if (!cond) { 373 // Try to set the lsb in the poll to indicate to the owner 374 // thread that they need to wake this thread up. 375 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 376 poll_val | KMP_LOCK_BUSY(1, futex))) { 377 KA_TRACE( 378 1000, 379 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 380 lck, lck->lk.poll, gtid)); 381 continue; 382 } 383 poll_val |= KMP_LOCK_BUSY(1, futex); 384 385 KA_TRACE(1000, 386 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 387 lck->lk.poll, gtid)); 388 } 389 390 KA_TRACE( 391 1000, 392 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 393 lck, gtid, poll_val)); 394 395 kmp_int32 rc; 396 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 397 NULL, 0)) != 0) { 398 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 399 "failed (rc=%d errno=%d)\n", 400 lck, gtid, poll_val, rc, errno)); 401 continue; 402 } 403 404 KA_TRACE(1000, 405 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 406 lck, gtid, poll_val)); 407 // This thread has now done a successful futex wait call and was entered on 408 // the OS futex queue. We must now perform a futex wake call when releasing 409 // the lock, as we have no idea how many other threads are in the queue. 410 gtid_code |= 1; 411 } 412 413 KMP_FSYNC_ACQUIRED(lck); 414 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 415 lck->lk.poll, gtid)); 416 return KMP_LOCK_ACQUIRED_FIRST; 417 } 418 419 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 420 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 421 ANNOTATE_FUTEX_ACQUIRED(lck); 422 return retval; 423 } 424 425 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 426 kmp_int32 gtid) { 427 char const *const func = "omp_set_lock"; 428 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 429 __kmp_is_futex_lock_nestable(lck)) { 430 KMP_FATAL(LockNestableUsedAsSimple, func); 431 } 432 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 433 KMP_FATAL(LockIsAlreadyOwned, func); 434 } 435 return __kmp_acquire_futex_lock(lck, gtid); 436 } 437 438 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 439 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 440 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 441 KMP_FSYNC_ACQUIRED(lck); 442 return TRUE; 443 } 444 return FALSE; 445 } 446 447 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 448 kmp_int32 gtid) { 449 char const *const func = "omp_test_lock"; 450 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 451 __kmp_is_futex_lock_nestable(lck)) { 452 KMP_FATAL(LockNestableUsedAsSimple, func); 453 } 454 return __kmp_test_futex_lock(lck, gtid); 455 } 456 457 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 458 KMP_MB(); /* Flush all pending memory write invalidates. */ 459 460 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 461 lck, lck->lk.poll, gtid)); 462 463 KMP_FSYNC_RELEASING(lck); 464 ANNOTATE_FUTEX_RELEASED(lck); 465 466 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 467 468 KA_TRACE(1000, 469 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 470 lck, gtid, poll_val)); 471 472 if (KMP_LOCK_STRIP(poll_val) & 1) { 473 KA_TRACE(1000, 474 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 475 lck, gtid)); 476 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 477 NULL, NULL, 0); 478 } 479 480 KMP_MB(); /* Flush all pending memory write invalidates. */ 481 482 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 483 lck->lk.poll, gtid)); 484 485 KMP_YIELD(TCR_4(__kmp_nth) > 486 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 487 return KMP_LOCK_RELEASED; 488 } 489 490 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 491 kmp_int32 gtid) { 492 char const *const func = "omp_unset_lock"; 493 KMP_MB(); /* in case another processor initialized lock */ 494 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 495 __kmp_is_futex_lock_nestable(lck)) { 496 KMP_FATAL(LockNestableUsedAsSimple, func); 497 } 498 if (__kmp_get_futex_lock_owner(lck) == -1) { 499 KMP_FATAL(LockUnsettingFree, func); 500 } 501 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 502 (__kmp_get_futex_lock_owner(lck) != gtid)) { 503 KMP_FATAL(LockUnsettingSetByAnother, func); 504 } 505 return __kmp_release_futex_lock(lck, gtid); 506 } 507 508 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 509 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 510 } 511 512 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 513 __kmp_init_futex_lock(lck); 514 } 515 516 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 517 518 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 519 char const *const func = "omp_destroy_lock"; 520 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 521 __kmp_is_futex_lock_nestable(lck)) { 522 KMP_FATAL(LockNestableUsedAsSimple, func); 523 } 524 if (__kmp_get_futex_lock_owner(lck) != -1) { 525 KMP_FATAL(LockStillOwned, func); 526 } 527 __kmp_destroy_futex_lock(lck); 528 } 529 530 // nested futex locks 531 532 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 533 KMP_DEBUG_ASSERT(gtid >= 0); 534 535 if (__kmp_get_futex_lock_owner(lck) == gtid) { 536 lck->lk.depth_locked += 1; 537 return KMP_LOCK_ACQUIRED_NEXT; 538 } else { 539 __kmp_acquire_futex_lock_timed_template(lck, gtid); 540 ANNOTATE_FUTEX_ACQUIRED(lck); 541 lck->lk.depth_locked = 1; 542 return KMP_LOCK_ACQUIRED_FIRST; 543 } 544 } 545 546 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 547 kmp_int32 gtid) { 548 char const *const func = "omp_set_nest_lock"; 549 if (!__kmp_is_futex_lock_nestable(lck)) { 550 KMP_FATAL(LockSimpleUsedAsNestable, func); 551 } 552 return __kmp_acquire_nested_futex_lock(lck, gtid); 553 } 554 555 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 556 int retval; 557 558 KMP_DEBUG_ASSERT(gtid >= 0); 559 560 if (__kmp_get_futex_lock_owner(lck) == gtid) { 561 retval = ++lck->lk.depth_locked; 562 } else if (!__kmp_test_futex_lock(lck, gtid)) { 563 retval = 0; 564 } else { 565 KMP_MB(); 566 retval = lck->lk.depth_locked = 1; 567 } 568 return retval; 569 } 570 571 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 572 kmp_int32 gtid) { 573 char const *const func = "omp_test_nest_lock"; 574 if (!__kmp_is_futex_lock_nestable(lck)) { 575 KMP_FATAL(LockSimpleUsedAsNestable, func); 576 } 577 return __kmp_test_nested_futex_lock(lck, gtid); 578 } 579 580 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 581 KMP_DEBUG_ASSERT(gtid >= 0); 582 583 KMP_MB(); 584 if (--(lck->lk.depth_locked) == 0) { 585 __kmp_release_futex_lock(lck, gtid); 586 return KMP_LOCK_RELEASED; 587 } 588 return KMP_LOCK_STILL_HELD; 589 } 590 591 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 592 kmp_int32 gtid) { 593 char const *const func = "omp_unset_nest_lock"; 594 KMP_MB(); /* in case another processor initialized lock */ 595 if (!__kmp_is_futex_lock_nestable(lck)) { 596 KMP_FATAL(LockSimpleUsedAsNestable, func); 597 } 598 if (__kmp_get_futex_lock_owner(lck) == -1) { 599 KMP_FATAL(LockUnsettingFree, func); 600 } 601 if (__kmp_get_futex_lock_owner(lck) != gtid) { 602 KMP_FATAL(LockUnsettingSetByAnother, func); 603 } 604 return __kmp_release_nested_futex_lock(lck, gtid); 605 } 606 607 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 608 __kmp_init_futex_lock(lck); 609 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 610 } 611 612 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 613 __kmp_init_nested_futex_lock(lck); 614 } 615 616 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 617 __kmp_destroy_futex_lock(lck); 618 lck->lk.depth_locked = 0; 619 } 620 621 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 622 char const *const func = "omp_destroy_nest_lock"; 623 if (!__kmp_is_futex_lock_nestable(lck)) { 624 KMP_FATAL(LockSimpleUsedAsNestable, func); 625 } 626 if (__kmp_get_futex_lock_owner(lck) != -1) { 627 KMP_FATAL(LockStillOwned, func); 628 } 629 __kmp_destroy_nested_futex_lock(lck); 630 } 631 632 #endif // KMP_USE_FUTEX 633 634 /* ------------------------------------------------------------------------ */ 635 /* ticket (bakery) locks */ 636 637 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 638 return std::atomic_load_explicit(&lck->lk.owner_id, 639 std::memory_order_relaxed) - 640 1; 641 } 642 643 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 644 return std::atomic_load_explicit(&lck->lk.depth_locked, 645 std::memory_order_relaxed) != -1; 646 } 647 648 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 649 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 650 std::memory_order_acquire) == my_ticket; 651 } 652 653 __forceinline static int 654 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 655 kmp_int32 gtid) { 656 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 657 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 658 659 #ifdef USE_LOCK_PROFILE 660 if (std::atomic_load_explicit(&lck->lk.now_serving, 661 std::memory_order_relaxed) != my_ticket) 662 __kmp_printf("LOCK CONTENTION: %p\n", lck); 663 /* else __kmp_printf( "." );*/ 664 #endif /* USE_LOCK_PROFILE */ 665 666 if (std::atomic_load_explicit(&lck->lk.now_serving, 667 std::memory_order_acquire) == my_ticket) { 668 return KMP_LOCK_ACQUIRED_FIRST; 669 } 670 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 671 return KMP_LOCK_ACQUIRED_FIRST; 672 } 673 674 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 675 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 676 ANNOTATE_TICKET_ACQUIRED(lck); 677 return retval; 678 } 679 680 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 681 kmp_int32 gtid) { 682 char const *const func = "omp_set_lock"; 683 684 if (!std::atomic_load_explicit(&lck->lk.initialized, 685 std::memory_order_relaxed)) { 686 KMP_FATAL(LockIsUninitialized, func); 687 } 688 if (lck->lk.self != lck) { 689 KMP_FATAL(LockIsUninitialized, func); 690 } 691 if (__kmp_is_ticket_lock_nestable(lck)) { 692 KMP_FATAL(LockNestableUsedAsSimple, func); 693 } 694 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 695 KMP_FATAL(LockIsAlreadyOwned, func); 696 } 697 698 __kmp_acquire_ticket_lock(lck, gtid); 699 700 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 701 std::memory_order_relaxed); 702 return KMP_LOCK_ACQUIRED_FIRST; 703 } 704 705 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 706 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 707 std::memory_order_relaxed); 708 709 if (std::atomic_load_explicit(&lck->lk.now_serving, 710 std::memory_order_relaxed) == my_ticket) { 711 kmp_uint32 next_ticket = my_ticket + 1; 712 if (std::atomic_compare_exchange_strong_explicit( 713 &lck->lk.next_ticket, &my_ticket, next_ticket, 714 std::memory_order_acquire, std::memory_order_acquire)) { 715 return TRUE; 716 } 717 } 718 return FALSE; 719 } 720 721 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 722 kmp_int32 gtid) { 723 char const *const func = "omp_test_lock"; 724 725 if (!std::atomic_load_explicit(&lck->lk.initialized, 726 std::memory_order_relaxed)) { 727 KMP_FATAL(LockIsUninitialized, func); 728 } 729 if (lck->lk.self != lck) { 730 KMP_FATAL(LockIsUninitialized, func); 731 } 732 if (__kmp_is_ticket_lock_nestable(lck)) { 733 KMP_FATAL(LockNestableUsedAsSimple, func); 734 } 735 736 int retval = __kmp_test_ticket_lock(lck, gtid); 737 738 if (retval) { 739 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 740 std::memory_order_relaxed); 741 } 742 return retval; 743 } 744 745 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 746 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 747 std::memory_order_relaxed) - 748 std::atomic_load_explicit(&lck->lk.now_serving, 749 std::memory_order_relaxed); 750 751 ANNOTATE_TICKET_RELEASED(lck); 752 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 753 std::memory_order_release); 754 755 KMP_YIELD(distance > 756 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 757 return KMP_LOCK_RELEASED; 758 } 759 760 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 761 kmp_int32 gtid) { 762 char const *const func = "omp_unset_lock"; 763 764 if (!std::atomic_load_explicit(&lck->lk.initialized, 765 std::memory_order_relaxed)) { 766 KMP_FATAL(LockIsUninitialized, func); 767 } 768 if (lck->lk.self != lck) { 769 KMP_FATAL(LockIsUninitialized, func); 770 } 771 if (__kmp_is_ticket_lock_nestable(lck)) { 772 KMP_FATAL(LockNestableUsedAsSimple, func); 773 } 774 if (__kmp_get_ticket_lock_owner(lck) == -1) { 775 KMP_FATAL(LockUnsettingFree, func); 776 } 777 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 778 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 779 KMP_FATAL(LockUnsettingSetByAnother, func); 780 } 781 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 782 return __kmp_release_ticket_lock(lck, gtid); 783 } 784 785 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 786 lck->lk.location = NULL; 787 lck->lk.self = lck; 788 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 789 std::memory_order_relaxed); 790 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 791 std::memory_order_relaxed); 792 std::atomic_store_explicit( 793 &lck->lk.owner_id, 0, 794 std::memory_order_relaxed); // no thread owns the lock. 795 std::atomic_store_explicit( 796 &lck->lk.depth_locked, -1, 797 std::memory_order_relaxed); // -1 => not a nested lock. 798 std::atomic_store_explicit(&lck->lk.initialized, true, 799 std::memory_order_release); 800 } 801 802 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 803 __kmp_init_ticket_lock(lck); 804 } 805 806 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 807 std::atomic_store_explicit(&lck->lk.initialized, false, 808 std::memory_order_release); 809 lck->lk.self = NULL; 810 lck->lk.location = NULL; 811 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 812 std::memory_order_relaxed); 813 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 814 std::memory_order_relaxed); 815 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 816 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 817 std::memory_order_relaxed); 818 } 819 820 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 821 char const *const func = "omp_destroy_lock"; 822 823 if (!std::atomic_load_explicit(&lck->lk.initialized, 824 std::memory_order_relaxed)) { 825 KMP_FATAL(LockIsUninitialized, func); 826 } 827 if (lck->lk.self != lck) { 828 KMP_FATAL(LockIsUninitialized, func); 829 } 830 if (__kmp_is_ticket_lock_nestable(lck)) { 831 KMP_FATAL(LockNestableUsedAsSimple, func); 832 } 833 if (__kmp_get_ticket_lock_owner(lck) != -1) { 834 KMP_FATAL(LockStillOwned, func); 835 } 836 __kmp_destroy_ticket_lock(lck); 837 } 838 839 // nested ticket locks 840 841 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 842 KMP_DEBUG_ASSERT(gtid >= 0); 843 844 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 845 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 846 std::memory_order_relaxed); 847 return KMP_LOCK_ACQUIRED_NEXT; 848 } else { 849 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 850 ANNOTATE_TICKET_ACQUIRED(lck); 851 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 852 std::memory_order_relaxed); 853 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 854 std::memory_order_relaxed); 855 return KMP_LOCK_ACQUIRED_FIRST; 856 } 857 } 858 859 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 860 kmp_int32 gtid) { 861 char const *const func = "omp_set_nest_lock"; 862 863 if (!std::atomic_load_explicit(&lck->lk.initialized, 864 std::memory_order_relaxed)) { 865 KMP_FATAL(LockIsUninitialized, func); 866 } 867 if (lck->lk.self != lck) { 868 KMP_FATAL(LockIsUninitialized, func); 869 } 870 if (!__kmp_is_ticket_lock_nestable(lck)) { 871 KMP_FATAL(LockSimpleUsedAsNestable, func); 872 } 873 return __kmp_acquire_nested_ticket_lock(lck, gtid); 874 } 875 876 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 877 int retval; 878 879 KMP_DEBUG_ASSERT(gtid >= 0); 880 881 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 882 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 883 std::memory_order_relaxed) + 884 1; 885 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 886 retval = 0; 887 } else { 888 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 889 std::memory_order_relaxed); 890 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 891 std::memory_order_relaxed); 892 retval = 1; 893 } 894 return retval; 895 } 896 897 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 898 kmp_int32 gtid) { 899 char const *const func = "omp_test_nest_lock"; 900 901 if (!std::atomic_load_explicit(&lck->lk.initialized, 902 std::memory_order_relaxed)) { 903 KMP_FATAL(LockIsUninitialized, func); 904 } 905 if (lck->lk.self != lck) { 906 KMP_FATAL(LockIsUninitialized, func); 907 } 908 if (!__kmp_is_ticket_lock_nestable(lck)) { 909 KMP_FATAL(LockSimpleUsedAsNestable, func); 910 } 911 return __kmp_test_nested_ticket_lock(lck, gtid); 912 } 913 914 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 915 KMP_DEBUG_ASSERT(gtid >= 0); 916 917 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 918 std::memory_order_relaxed) - 919 1) == 0) { 920 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 921 __kmp_release_ticket_lock(lck, gtid); 922 return KMP_LOCK_RELEASED; 923 } 924 return KMP_LOCK_STILL_HELD; 925 } 926 927 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 928 kmp_int32 gtid) { 929 char const *const func = "omp_unset_nest_lock"; 930 931 if (!std::atomic_load_explicit(&lck->lk.initialized, 932 std::memory_order_relaxed)) { 933 KMP_FATAL(LockIsUninitialized, func); 934 } 935 if (lck->lk.self != lck) { 936 KMP_FATAL(LockIsUninitialized, func); 937 } 938 if (!__kmp_is_ticket_lock_nestable(lck)) { 939 KMP_FATAL(LockSimpleUsedAsNestable, func); 940 } 941 if (__kmp_get_ticket_lock_owner(lck) == -1) { 942 KMP_FATAL(LockUnsettingFree, func); 943 } 944 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 945 KMP_FATAL(LockUnsettingSetByAnother, func); 946 } 947 return __kmp_release_nested_ticket_lock(lck, gtid); 948 } 949 950 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 951 __kmp_init_ticket_lock(lck); 952 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 953 std::memory_order_relaxed); 954 // >= 0 for nestable locks, -1 for simple locks 955 } 956 957 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 958 __kmp_init_nested_ticket_lock(lck); 959 } 960 961 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 962 __kmp_destroy_ticket_lock(lck); 963 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 964 std::memory_order_relaxed); 965 } 966 967 static void 968 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 969 char const *const func = "omp_destroy_nest_lock"; 970 971 if (!std::atomic_load_explicit(&lck->lk.initialized, 972 std::memory_order_relaxed)) { 973 KMP_FATAL(LockIsUninitialized, func); 974 } 975 if (lck->lk.self != lck) { 976 KMP_FATAL(LockIsUninitialized, func); 977 } 978 if (!__kmp_is_ticket_lock_nestable(lck)) { 979 KMP_FATAL(LockSimpleUsedAsNestable, func); 980 } 981 if (__kmp_get_ticket_lock_owner(lck) != -1) { 982 KMP_FATAL(LockStillOwned, func); 983 } 984 __kmp_destroy_nested_ticket_lock(lck); 985 } 986 987 // access functions to fields which don't exist for all lock kinds. 988 989 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 990 return std::atomic_load_explicit(&lck->lk.initialized, 991 std::memory_order_relaxed) && 992 (lck->lk.self == lck); 993 } 994 995 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 996 return lck->lk.location; 997 } 998 999 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 1000 const ident_t *loc) { 1001 lck->lk.location = loc; 1002 } 1003 1004 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 1005 return lck->lk.flags; 1006 } 1007 1008 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 1009 kmp_lock_flags_t flags) { 1010 lck->lk.flags = flags; 1011 } 1012 1013 /* ------------------------------------------------------------------------ */ 1014 /* queuing locks */ 1015 1016 /* First the states 1017 (head,tail) = 0, 0 means lock is unheld, nobody on queue 1018 UINT_MAX or -1, 0 means lock is held, nobody on queue 1019 h, h means lock held or about to transition, 1020 1 element on queue 1021 h, t h <> t, means lock is held or about to 1022 transition, >1 elements on queue 1023 1024 Now the transitions 1025 Acquire(0,0) = -1 ,0 1026 Release(0,0) = Error 1027 Acquire(-1,0) = h ,h h > 0 1028 Release(-1,0) = 0 ,0 1029 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1030 Release(h,h) = -1 ,0 h > 0 1031 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1032 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1033 1034 And pictorially 1035 1036 +-----+ 1037 | 0, 0|------- release -------> Error 1038 +-----+ 1039 | ^ 1040 acquire| |release 1041 | | 1042 | | 1043 v | 1044 +-----+ 1045 |-1, 0| 1046 +-----+ 1047 | ^ 1048 acquire| |release 1049 | | 1050 | | 1051 v | 1052 +-----+ 1053 | h, h| 1054 +-----+ 1055 | ^ 1056 acquire| |release 1057 | | 1058 | | 1059 v | 1060 +-----+ 1061 | h, t|----- acquire, release loopback ---+ 1062 +-----+ | 1063 ^ | 1064 | | 1065 +------------------------------------+ 1066 */ 1067 1068 #ifdef DEBUG_QUEUING_LOCKS 1069 1070 /* Stuff for circular trace buffer */ 1071 #define TRACE_BUF_ELE 1024 1072 static char traces[TRACE_BUF_ELE][128] = {0}; 1073 static int tc = 0; 1074 #define TRACE_LOCK(X, Y) \ 1075 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1076 #define TRACE_LOCK_T(X, Y, Z) \ 1077 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1078 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1079 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1080 Z, Q); 1081 1082 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1083 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1084 kmp_int32 tail_id) { 1085 kmp_int32 t, i; 1086 1087 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1088 1089 i = tc % TRACE_BUF_ELE; 1090 __kmp_printf_no_lock("%s\n", traces[i]); 1091 i = (i + 1) % TRACE_BUF_ELE; 1092 while (i != (tc % TRACE_BUF_ELE)) { 1093 __kmp_printf_no_lock("%s", traces[i]); 1094 i = (i + 1) % TRACE_BUF_ELE; 1095 } 1096 __kmp_printf_no_lock("\n"); 1097 1098 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1099 "next_wait:%d, head_id:%d, tail_id:%d\n", 1100 gtid + 1, this_thr->th.th_spin_here, 1101 this_thr->th.th_next_waiting, head_id, tail_id); 1102 1103 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1104 1105 if (lck->lk.head_id >= 1) { 1106 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1107 while (t > 0) { 1108 __kmp_printf_no_lock("-> %d ", t); 1109 t = __kmp_threads[t - 1]->th.th_next_waiting; 1110 } 1111 } 1112 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1113 __kmp_printf_no_lock("\n\n"); 1114 } 1115 1116 #endif /* DEBUG_QUEUING_LOCKS */ 1117 1118 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1119 return TCR_4(lck->lk.owner_id) - 1; 1120 } 1121 1122 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1123 return lck->lk.depth_locked != -1; 1124 } 1125 1126 /* Acquire a lock using a the queuing lock implementation */ 1127 template <bool takeTime> 1128 /* [TLW] The unused template above is left behind because of what BEB believes 1129 is a potential compiler problem with __forceinline. */ 1130 __forceinline static int 1131 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1132 kmp_int32 gtid) { 1133 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1134 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1135 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1136 volatile kmp_uint32 *spin_here_p; 1137 kmp_int32 need_mf = 1; 1138 1139 #if OMPT_SUPPORT 1140 ompt_state_t prev_state = ompt_state_undefined; 1141 #endif 1142 1143 KA_TRACE(1000, 1144 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1145 1146 KMP_FSYNC_PREPARE(lck); 1147 KMP_DEBUG_ASSERT(this_thr != NULL); 1148 spin_here_p = &this_thr->th.th_spin_here; 1149 1150 #ifdef DEBUG_QUEUING_LOCKS 1151 TRACE_LOCK(gtid + 1, "acq ent"); 1152 if (*spin_here_p) 1153 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1154 if (this_thr->th.th_next_waiting != 0) 1155 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1156 #endif 1157 KMP_DEBUG_ASSERT(!*spin_here_p); 1158 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1159 1160 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1161 head_id_p that may follow, not just in execution order, but also in 1162 visibility order. This way, when a releasing thread observes the changes to 1163 the queue by this thread, it can rightly assume that spin_here_p has 1164 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1165 not premature. If the releasing thread sets spin_here_p to FALSE before 1166 this thread sets it to TRUE, this thread will hang. */ 1167 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1168 1169 while (1) { 1170 kmp_int32 enqueued; 1171 kmp_int32 head; 1172 kmp_int32 tail; 1173 1174 head = *head_id_p; 1175 1176 switch (head) { 1177 1178 case -1: { 1179 #ifdef DEBUG_QUEUING_LOCKS 1180 tail = *tail_id_p; 1181 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1182 #endif 1183 tail = 0; /* to make sure next link asynchronously read is not set 1184 accidentally; this assignment prevents us from entering the 1185 if ( t > 0 ) condition in the enqueued case below, which is not 1186 necessary for this state transition */ 1187 1188 need_mf = 0; 1189 /* try (-1,0)->(tid,tid) */ 1190 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1191 KMP_PACK_64(-1, 0), 1192 KMP_PACK_64(gtid + 1, gtid + 1)); 1193 #ifdef DEBUG_QUEUING_LOCKS 1194 if (enqueued) 1195 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1196 #endif 1197 } break; 1198 1199 default: { 1200 tail = *tail_id_p; 1201 KMP_DEBUG_ASSERT(tail != gtid + 1); 1202 1203 #ifdef DEBUG_QUEUING_LOCKS 1204 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1205 #endif 1206 1207 if (tail == 0) { 1208 enqueued = FALSE; 1209 } else { 1210 need_mf = 0; 1211 /* try (h,t) or (h,h)->(h,tid) */ 1212 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1213 1214 #ifdef DEBUG_QUEUING_LOCKS 1215 if (enqueued) 1216 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1217 #endif 1218 } 1219 } break; 1220 1221 case 0: /* empty queue */ 1222 { 1223 kmp_int32 grabbed_lock; 1224 1225 #ifdef DEBUG_QUEUING_LOCKS 1226 tail = *tail_id_p; 1227 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1228 #endif 1229 /* try (0,0)->(-1,0) */ 1230 1231 /* only legal transition out of head = 0 is head = -1 with no change to 1232 * tail */ 1233 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1234 1235 if (grabbed_lock) { 1236 1237 *spin_here_p = FALSE; 1238 1239 KA_TRACE( 1240 1000, 1241 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1242 lck, gtid)); 1243 #ifdef DEBUG_QUEUING_LOCKS 1244 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1245 #endif 1246 1247 #if OMPT_SUPPORT 1248 if (ompt_enabled && prev_state != ompt_state_undefined) { 1249 /* change the state before clearing wait_id */ 1250 this_thr->th.ompt_thread_info.state = prev_state; 1251 this_thr->th.ompt_thread_info.wait_id = 0; 1252 } 1253 #endif 1254 1255 KMP_FSYNC_ACQUIRED(lck); 1256 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1257 } 1258 enqueued = FALSE; 1259 } break; 1260 } 1261 1262 #if OMPT_SUPPORT 1263 if (ompt_enabled && prev_state == ompt_state_undefined) { 1264 /* this thread will spin; set wait_id before entering wait state */ 1265 prev_state = this_thr->th.ompt_thread_info.state; 1266 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1267 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1268 } 1269 #endif 1270 1271 if (enqueued) { 1272 if (tail > 0) { 1273 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1274 KMP_ASSERT(tail_thr != NULL); 1275 tail_thr->th.th_next_waiting = gtid + 1; 1276 /* corresponding wait for this write in release code */ 1277 } 1278 KA_TRACE(1000, 1279 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1280 lck, gtid)); 1281 1282 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1283 sleeps for throughput only here. */ 1284 KMP_MB(); 1285 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1286 1287 #ifdef DEBUG_QUEUING_LOCKS 1288 TRACE_LOCK(gtid + 1, "acq spin"); 1289 1290 if (this_thr->th.th_next_waiting != 0) 1291 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1292 #endif 1293 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1294 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1295 "waiting on queue\n", 1296 lck, gtid)); 1297 1298 #ifdef DEBUG_QUEUING_LOCKS 1299 TRACE_LOCK(gtid + 1, "acq exit 2"); 1300 #endif 1301 1302 #if OMPT_SUPPORT 1303 /* change the state before clearing wait_id */ 1304 this_thr->th.ompt_thread_info.state = prev_state; 1305 this_thr->th.ompt_thread_info.wait_id = 0; 1306 #endif 1307 1308 /* got lock, we were dequeued by the thread that released lock */ 1309 return KMP_LOCK_ACQUIRED_FIRST; 1310 } 1311 1312 /* Yield if number of threads > number of logical processors */ 1313 /* ToDo: Not sure why this should only be in oversubscription case, 1314 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1315 KMP_YIELD(TCR_4(__kmp_nth) > 1316 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1317 #ifdef DEBUG_QUEUING_LOCKS 1318 TRACE_LOCK(gtid + 1, "acq retry"); 1319 #endif 1320 } 1321 KMP_ASSERT2(0, "should not get here"); 1322 return KMP_LOCK_ACQUIRED_FIRST; 1323 } 1324 1325 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1326 KMP_DEBUG_ASSERT(gtid >= 0); 1327 1328 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1329 ANNOTATE_QUEUING_ACQUIRED(lck); 1330 return retval; 1331 } 1332 1333 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1334 kmp_int32 gtid) { 1335 char const *const func = "omp_set_lock"; 1336 if (lck->lk.initialized != lck) { 1337 KMP_FATAL(LockIsUninitialized, func); 1338 } 1339 if (__kmp_is_queuing_lock_nestable(lck)) { 1340 KMP_FATAL(LockNestableUsedAsSimple, func); 1341 } 1342 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1343 KMP_FATAL(LockIsAlreadyOwned, func); 1344 } 1345 1346 __kmp_acquire_queuing_lock(lck, gtid); 1347 1348 lck->lk.owner_id = gtid + 1; 1349 return KMP_LOCK_ACQUIRED_FIRST; 1350 } 1351 1352 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1353 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1354 kmp_int32 head; 1355 #ifdef KMP_DEBUG 1356 kmp_info_t *this_thr; 1357 #endif 1358 1359 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1360 KMP_DEBUG_ASSERT(gtid >= 0); 1361 #ifdef KMP_DEBUG 1362 this_thr = __kmp_thread_from_gtid(gtid); 1363 KMP_DEBUG_ASSERT(this_thr != NULL); 1364 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1365 #endif 1366 1367 head = *head_id_p; 1368 1369 if (head == 0) { /* nobody on queue, nobody holding */ 1370 /* try (0,0)->(-1,0) */ 1371 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1372 KA_TRACE(1000, 1373 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1374 KMP_FSYNC_ACQUIRED(lck); 1375 ANNOTATE_QUEUING_ACQUIRED(lck); 1376 return TRUE; 1377 } 1378 } 1379 1380 KA_TRACE(1000, 1381 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1382 return FALSE; 1383 } 1384 1385 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1386 kmp_int32 gtid) { 1387 char const *const func = "omp_test_lock"; 1388 if (lck->lk.initialized != lck) { 1389 KMP_FATAL(LockIsUninitialized, func); 1390 } 1391 if (__kmp_is_queuing_lock_nestable(lck)) { 1392 KMP_FATAL(LockNestableUsedAsSimple, func); 1393 } 1394 1395 int retval = __kmp_test_queuing_lock(lck, gtid); 1396 1397 if (retval) { 1398 lck->lk.owner_id = gtid + 1; 1399 } 1400 return retval; 1401 } 1402 1403 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1404 kmp_info_t *this_thr; 1405 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1406 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1407 1408 KA_TRACE(1000, 1409 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1410 KMP_DEBUG_ASSERT(gtid >= 0); 1411 this_thr = __kmp_thread_from_gtid(gtid); 1412 KMP_DEBUG_ASSERT(this_thr != NULL); 1413 #ifdef DEBUG_QUEUING_LOCKS 1414 TRACE_LOCK(gtid + 1, "rel ent"); 1415 1416 if (this_thr->th.th_spin_here) 1417 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1418 if (this_thr->th.th_next_waiting != 0) 1419 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1420 #endif 1421 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1422 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1423 1424 KMP_FSYNC_RELEASING(lck); 1425 ANNOTATE_QUEUING_RELEASED(lck); 1426 1427 while (1) { 1428 kmp_int32 dequeued; 1429 kmp_int32 head; 1430 kmp_int32 tail; 1431 1432 head = *head_id_p; 1433 1434 #ifdef DEBUG_QUEUING_LOCKS 1435 tail = *tail_id_p; 1436 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1437 if (head == 0) 1438 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1439 #endif 1440 KMP_DEBUG_ASSERT(head != 1441 0); /* holding the lock, head must be -1 or queue head */ 1442 1443 if (head == -1) { /* nobody on queue */ 1444 /* try (-1,0)->(0,0) */ 1445 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1446 KA_TRACE( 1447 1000, 1448 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1449 lck, gtid)); 1450 #ifdef DEBUG_QUEUING_LOCKS 1451 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1452 #endif 1453 1454 #if OMPT_SUPPORT 1455 /* nothing to do - no other thread is trying to shift blame */ 1456 #endif 1457 return KMP_LOCK_RELEASED; 1458 } 1459 dequeued = FALSE; 1460 } else { 1461 tail = *tail_id_p; 1462 if (head == tail) { /* only one thread on the queue */ 1463 #ifdef DEBUG_QUEUING_LOCKS 1464 if (head <= 0) 1465 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1466 #endif 1467 KMP_DEBUG_ASSERT(head > 0); 1468 1469 /* try (h,h)->(-1,0) */ 1470 dequeued = KMP_COMPARE_AND_STORE_REL64( 1471 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1472 KMP_PACK_64(-1, 0)); 1473 #ifdef DEBUG_QUEUING_LOCKS 1474 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1475 #endif 1476 1477 } else { 1478 volatile kmp_int32 *waiting_id_p; 1479 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1480 KMP_DEBUG_ASSERT(head_thr != NULL); 1481 waiting_id_p = &head_thr->th.th_next_waiting; 1482 1483 /* Does this require synchronous reads? */ 1484 #ifdef DEBUG_QUEUING_LOCKS 1485 if (head <= 0 || tail <= 0) 1486 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1487 #endif 1488 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1489 1490 /* try (h,t)->(h',t) or (t,t) */ 1491 KMP_MB(); 1492 /* make sure enqueuing thread has time to update next waiting thread 1493 * field */ 1494 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1495 KMP_NEQ, NULL); 1496 #ifdef DEBUG_QUEUING_LOCKS 1497 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1498 #endif 1499 dequeued = TRUE; 1500 } 1501 } 1502 1503 if (dequeued) { 1504 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1505 KMP_DEBUG_ASSERT(head_thr != NULL); 1506 1507 /* Does this require synchronous reads? */ 1508 #ifdef DEBUG_QUEUING_LOCKS 1509 if (head <= 0 || tail <= 0) 1510 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1511 #endif 1512 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1513 1514 /* For clean code only. Thread not released until next statement prevents 1515 race with acquire code. */ 1516 head_thr->th.th_next_waiting = 0; 1517 #ifdef DEBUG_QUEUING_LOCKS 1518 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1519 #endif 1520 1521 KMP_MB(); 1522 /* reset spin value */ 1523 head_thr->th.th_spin_here = FALSE; 1524 1525 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1526 "dequeuing\n", 1527 lck, gtid)); 1528 #ifdef DEBUG_QUEUING_LOCKS 1529 TRACE_LOCK(gtid + 1, "rel exit 2"); 1530 #endif 1531 return KMP_LOCK_RELEASED; 1532 } 1533 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1534 threads */ 1535 1536 #ifdef DEBUG_QUEUING_LOCKS 1537 TRACE_LOCK(gtid + 1, "rel retry"); 1538 #endif 1539 1540 } /* while */ 1541 KMP_ASSERT2(0, "should not get here"); 1542 return KMP_LOCK_RELEASED; 1543 } 1544 1545 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1546 kmp_int32 gtid) { 1547 char const *const func = "omp_unset_lock"; 1548 KMP_MB(); /* in case another processor initialized lock */ 1549 if (lck->lk.initialized != lck) { 1550 KMP_FATAL(LockIsUninitialized, func); 1551 } 1552 if (__kmp_is_queuing_lock_nestable(lck)) { 1553 KMP_FATAL(LockNestableUsedAsSimple, func); 1554 } 1555 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1556 KMP_FATAL(LockUnsettingFree, func); 1557 } 1558 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1559 KMP_FATAL(LockUnsettingSetByAnother, func); 1560 } 1561 lck->lk.owner_id = 0; 1562 return __kmp_release_queuing_lock(lck, gtid); 1563 } 1564 1565 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1566 lck->lk.location = NULL; 1567 lck->lk.head_id = 0; 1568 lck->lk.tail_id = 0; 1569 lck->lk.next_ticket = 0; 1570 lck->lk.now_serving = 0; 1571 lck->lk.owner_id = 0; // no thread owns the lock. 1572 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1573 lck->lk.initialized = lck; 1574 1575 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1576 } 1577 1578 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1579 __kmp_init_queuing_lock(lck); 1580 } 1581 1582 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1583 lck->lk.initialized = NULL; 1584 lck->lk.location = NULL; 1585 lck->lk.head_id = 0; 1586 lck->lk.tail_id = 0; 1587 lck->lk.next_ticket = 0; 1588 lck->lk.now_serving = 0; 1589 lck->lk.owner_id = 0; 1590 lck->lk.depth_locked = -1; 1591 } 1592 1593 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1594 char const *const func = "omp_destroy_lock"; 1595 if (lck->lk.initialized != lck) { 1596 KMP_FATAL(LockIsUninitialized, func); 1597 } 1598 if (__kmp_is_queuing_lock_nestable(lck)) { 1599 KMP_FATAL(LockNestableUsedAsSimple, func); 1600 } 1601 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1602 KMP_FATAL(LockStillOwned, func); 1603 } 1604 __kmp_destroy_queuing_lock(lck); 1605 } 1606 1607 // nested queuing locks 1608 1609 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1610 KMP_DEBUG_ASSERT(gtid >= 0); 1611 1612 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1613 lck->lk.depth_locked += 1; 1614 return KMP_LOCK_ACQUIRED_NEXT; 1615 } else { 1616 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1617 ANNOTATE_QUEUING_ACQUIRED(lck); 1618 KMP_MB(); 1619 lck->lk.depth_locked = 1; 1620 KMP_MB(); 1621 lck->lk.owner_id = gtid + 1; 1622 return KMP_LOCK_ACQUIRED_FIRST; 1623 } 1624 } 1625 1626 static int 1627 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1628 kmp_int32 gtid) { 1629 char const *const func = "omp_set_nest_lock"; 1630 if (lck->lk.initialized != lck) { 1631 KMP_FATAL(LockIsUninitialized, func); 1632 } 1633 if (!__kmp_is_queuing_lock_nestable(lck)) { 1634 KMP_FATAL(LockSimpleUsedAsNestable, func); 1635 } 1636 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1637 } 1638 1639 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1640 int retval; 1641 1642 KMP_DEBUG_ASSERT(gtid >= 0); 1643 1644 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1645 retval = ++lck->lk.depth_locked; 1646 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1647 retval = 0; 1648 } else { 1649 KMP_MB(); 1650 retval = lck->lk.depth_locked = 1; 1651 KMP_MB(); 1652 lck->lk.owner_id = gtid + 1; 1653 } 1654 return retval; 1655 } 1656 1657 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1658 kmp_int32 gtid) { 1659 char const *const func = "omp_test_nest_lock"; 1660 if (lck->lk.initialized != lck) { 1661 KMP_FATAL(LockIsUninitialized, func); 1662 } 1663 if (!__kmp_is_queuing_lock_nestable(lck)) { 1664 KMP_FATAL(LockSimpleUsedAsNestable, func); 1665 } 1666 return __kmp_test_nested_queuing_lock(lck, gtid); 1667 } 1668 1669 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1670 KMP_DEBUG_ASSERT(gtid >= 0); 1671 1672 KMP_MB(); 1673 if (--(lck->lk.depth_locked) == 0) { 1674 KMP_MB(); 1675 lck->lk.owner_id = 0; 1676 __kmp_release_queuing_lock(lck, gtid); 1677 return KMP_LOCK_RELEASED; 1678 } 1679 return KMP_LOCK_STILL_HELD; 1680 } 1681 1682 static int 1683 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1684 kmp_int32 gtid) { 1685 char const *const func = "omp_unset_nest_lock"; 1686 KMP_MB(); /* in case another processor initialized lock */ 1687 if (lck->lk.initialized != lck) { 1688 KMP_FATAL(LockIsUninitialized, func); 1689 } 1690 if (!__kmp_is_queuing_lock_nestable(lck)) { 1691 KMP_FATAL(LockSimpleUsedAsNestable, func); 1692 } 1693 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1694 KMP_FATAL(LockUnsettingFree, func); 1695 } 1696 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1697 KMP_FATAL(LockUnsettingSetByAnother, func); 1698 } 1699 return __kmp_release_nested_queuing_lock(lck, gtid); 1700 } 1701 1702 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1703 __kmp_init_queuing_lock(lck); 1704 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1705 } 1706 1707 static void 1708 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1709 __kmp_init_nested_queuing_lock(lck); 1710 } 1711 1712 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1713 __kmp_destroy_queuing_lock(lck); 1714 lck->lk.depth_locked = 0; 1715 } 1716 1717 static void 1718 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1719 char const *const func = "omp_destroy_nest_lock"; 1720 if (lck->lk.initialized != lck) { 1721 KMP_FATAL(LockIsUninitialized, func); 1722 } 1723 if (!__kmp_is_queuing_lock_nestable(lck)) { 1724 KMP_FATAL(LockSimpleUsedAsNestable, func); 1725 } 1726 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1727 KMP_FATAL(LockStillOwned, func); 1728 } 1729 __kmp_destroy_nested_queuing_lock(lck); 1730 } 1731 1732 // access functions to fields which don't exist for all lock kinds. 1733 1734 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 1735 return lck == lck->lk.initialized; 1736 } 1737 1738 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1739 return lck->lk.location; 1740 } 1741 1742 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1743 const ident_t *loc) { 1744 lck->lk.location = loc; 1745 } 1746 1747 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1748 return lck->lk.flags; 1749 } 1750 1751 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1752 kmp_lock_flags_t flags) { 1753 lck->lk.flags = flags; 1754 } 1755 1756 #if KMP_USE_ADAPTIVE_LOCKS 1757 1758 /* RTM Adaptive locks */ 1759 1760 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1761 1762 #include <immintrin.h> 1763 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1764 1765 #else 1766 1767 // Values from the status register after failed speculation. 1768 #define _XBEGIN_STARTED (~0u) 1769 #define _XABORT_EXPLICIT (1 << 0) 1770 #define _XABORT_RETRY (1 << 1) 1771 #define _XABORT_CONFLICT (1 << 2) 1772 #define _XABORT_CAPACITY (1 << 3) 1773 #define _XABORT_DEBUG (1 << 4) 1774 #define _XABORT_NESTED (1 << 5) 1775 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1776 1777 // Aborts for which it's worth trying again immediately 1778 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1779 1780 #define STRINGIZE_INTERNAL(arg) #arg 1781 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1782 1783 // Access to RTM instructions 1784 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1785 an abort. This is the same definition as the compiler intrinsic that will be 1786 supported at some point. */ 1787 static __inline int _xbegin() { 1788 int res = -1; 1789 1790 #if KMP_OS_WINDOWS 1791 #if KMP_ARCH_X86_64 1792 _asm { 1793 _emit 0xC7 1794 _emit 0xF8 1795 _emit 2 1796 _emit 0 1797 _emit 0 1798 _emit 0 1799 jmp L2 1800 mov res, eax 1801 L2: 1802 } 1803 #else /* IA32 */ 1804 _asm { 1805 _emit 0xC7 1806 _emit 0xF8 1807 _emit 2 1808 _emit 0 1809 _emit 0 1810 _emit 0 1811 jmp L2 1812 mov res, eax 1813 L2: 1814 } 1815 #endif // KMP_ARCH_X86_64 1816 #else 1817 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1818 returned in %eax(%rax) on abort. Other register values are restored, so 1819 don't need to be killed. 1820 1821 We must also mark 'res' as an input and an output, since otherwise 1822 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1823 the successful (i.e., non-abort) path. */ 1824 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1825 " .long 1f-1b-6\n" 1826 " jmp 2f\n" 1827 "1: movl %%eax,%0\n" 1828 "2:" 1829 : "+r"(res)::"memory", "%eax"); 1830 #endif // KMP_OS_WINDOWS 1831 return res; 1832 } 1833 1834 /* Transaction end */ 1835 static __inline void _xend() { 1836 #if KMP_OS_WINDOWS 1837 __asm { 1838 _emit 0x0f 1839 _emit 0x01 1840 _emit 0xd5 1841 } 1842 #else 1843 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1844 #endif 1845 } 1846 1847 /* This is a macro, the argument must be a single byte constant which can be 1848 evaluated by the inline assembler, since it is emitted as a byte into the 1849 assembly code. */ 1850 // clang-format off 1851 #if KMP_OS_WINDOWS 1852 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1853 #else 1854 #define _xabort(ARG) \ 1855 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1856 #endif 1857 // clang-format on 1858 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1859 1860 // Statistics is collected for testing purpose 1861 #if KMP_DEBUG_ADAPTIVE_LOCKS 1862 1863 // We accumulate speculative lock statistics when the lock is destroyed. We 1864 // keep locks that haven't been destroyed in the liveLocks list so that we can 1865 // grab their statistics too. 1866 static kmp_adaptive_lock_statistics_t destroyedStats; 1867 1868 // To hold the list of live locks. 1869 static kmp_adaptive_lock_info_t liveLocks; 1870 1871 // A lock so we can safely update the list of locks. 1872 static kmp_bootstrap_lock_t chain_lock; 1873 1874 // Initialize the list of stats. 1875 void __kmp_init_speculative_stats() { 1876 kmp_adaptive_lock_info_t *lck = &liveLocks; 1877 1878 memset((void *)&(lck->stats), 0, sizeof(lck->stats)); 1879 lck->stats.next = lck; 1880 lck->stats.prev = lck; 1881 1882 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1883 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1884 1885 __kmp_init_bootstrap_lock(&chain_lock); 1886 } 1887 1888 // Insert the lock into the circular list 1889 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1890 __kmp_acquire_bootstrap_lock(&chain_lock); 1891 1892 lck->stats.next = liveLocks.stats.next; 1893 lck->stats.prev = &liveLocks; 1894 1895 liveLocks.stats.next = lck; 1896 lck->stats.next->stats.prev = lck; 1897 1898 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1899 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1900 1901 __kmp_release_bootstrap_lock(&chain_lock); 1902 } 1903 1904 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1905 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1906 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1907 1908 kmp_adaptive_lock_info_t *n = lck->stats.next; 1909 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1910 1911 n->stats.prev = p; 1912 p->stats.next = n; 1913 } 1914 1915 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1916 memset((void *)&lck->stats, 0, sizeof(lck->stats)); 1917 __kmp_remember_lock(lck); 1918 } 1919 1920 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1921 kmp_adaptive_lock_info_t *lck) { 1922 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1923 1924 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1925 t->successfulSpeculations += s->successfulSpeculations; 1926 t->hardFailedSpeculations += s->hardFailedSpeculations; 1927 t->softFailedSpeculations += s->softFailedSpeculations; 1928 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1929 t->lemmingYields += s->lemmingYields; 1930 } 1931 1932 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1933 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 1934 1935 __kmp_acquire_bootstrap_lock(&chain_lock); 1936 1937 __kmp_add_stats(&destroyedStats, lck); 1938 __kmp_forget_lock(lck); 1939 1940 __kmp_release_bootstrap_lock(&chain_lock); 1941 } 1942 1943 static float percent(kmp_uint32 count, kmp_uint32 total) { 1944 return (total == 0) ? 0.0 : (100.0 * count) / total; 1945 } 1946 1947 static FILE *__kmp_open_stats_file() { 1948 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1949 return stdout; 1950 1951 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1952 char buffer[buffLen]; 1953 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1954 (kmp_int32)getpid()); 1955 FILE *result = fopen(&buffer[0], "w"); 1956 1957 // Maybe we should issue a warning here... 1958 return result ? result : stdout; 1959 } 1960 1961 void __kmp_print_speculative_stats() { 1962 if (__kmp_user_lock_kind != lk_adaptive) 1963 return; 1964 1965 FILE *statsFile = __kmp_open_stats_file(); 1966 1967 kmp_adaptive_lock_statistics_t total = destroyedStats; 1968 kmp_adaptive_lock_info_t *lck; 1969 1970 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1971 __kmp_add_stats(&total, lck); 1972 } 1973 kmp_adaptive_lock_statistics_t *t = &total; 1974 kmp_uint32 totalSections = 1975 t->nonSpeculativeAcquires + t->successfulSpeculations; 1976 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1977 t->hardFailedSpeculations + 1978 t->softFailedSpeculations; 1979 1980 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1981 fprintf(statsFile, " Lock parameters: \n" 1982 " max_soft_retries : %10d\n" 1983 " max_badness : %10d\n", 1984 __kmp_adaptive_backoff_params.max_soft_retries, 1985 __kmp_adaptive_backoff_params.max_badness); 1986 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1987 t->nonSpeculativeAcquireAttempts); 1988 fprintf(statsFile, " Total critical sections : %10d\n", 1989 totalSections); 1990 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1991 t->successfulSpeculations, 1992 percent(t->successfulSpeculations, totalSections)); 1993 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1994 t->nonSpeculativeAcquires, 1995 percent(t->nonSpeculativeAcquires, totalSections)); 1996 fprintf(statsFile, " Lemming yields : %10d\n\n", 1997 t->lemmingYields); 1998 1999 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 2000 totalSpeculations); 2001 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 2002 t->successfulSpeculations, 2003 percent(t->successfulSpeculations, totalSpeculations)); 2004 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 2005 t->softFailedSpeculations, 2006 percent(t->softFailedSpeculations, totalSpeculations)); 2007 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 2008 t->hardFailedSpeculations, 2009 percent(t->hardFailedSpeculations, totalSpeculations)); 2010 2011 if (statsFile != stdout) 2012 fclose(statsFile); 2013 } 2014 2015 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 2016 #else 2017 #define KMP_INC_STAT(lck, stat) 2018 2019 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2020 2021 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 2022 // It is enough to check that the head_id is zero. 2023 // We don't also need to check the tail. 2024 bool res = lck->lk.head_id == 0; 2025 2026 // We need a fence here, since we must ensure that no memory operations 2027 // from later in this thread float above that read. 2028 #if KMP_COMPILER_ICC 2029 _mm_mfence(); 2030 #else 2031 __sync_synchronize(); 2032 #endif 2033 2034 return res; 2035 } 2036 2037 // Functions for manipulating the badness 2038 static __inline void 2039 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 2040 // Reset the badness to zero so we eagerly try to speculate again 2041 lck->lk.adaptive.badness = 0; 2042 KMP_INC_STAT(lck, successfulSpeculations); 2043 } 2044 2045 // Create a bit mask with one more set bit. 2046 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2047 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2048 if (newBadness > lck->lk.adaptive.max_badness) { 2049 return; 2050 } else { 2051 lck->lk.adaptive.badness = newBadness; 2052 } 2053 } 2054 2055 // Check whether speculation should be attempted. 2056 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2057 kmp_int32 gtid) { 2058 kmp_uint32 badness = lck->lk.adaptive.badness; 2059 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2060 int res = (attempts & badness) == 0; 2061 return res; 2062 } 2063 2064 // Attempt to acquire only the speculative lock. 2065 // Does not back off to the non-speculative lock. 2066 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2067 kmp_int32 gtid) { 2068 int retries = lck->lk.adaptive.max_soft_retries; 2069 2070 // We don't explicitly count the start of speculation, rather we record the 2071 // results (success, hard fail, soft fail). The sum of all of those is the 2072 // total number of times we started speculation since all speculations must 2073 // end one of those ways. 2074 do { 2075 kmp_uint32 status = _xbegin(); 2076 // Switch this in to disable actual speculation but exercise at least some 2077 // of the rest of the code. Useful for debugging... 2078 // kmp_uint32 status = _XABORT_NESTED; 2079 2080 if (status == _XBEGIN_STARTED) { 2081 /* We have successfully started speculation. Check that no-one acquired 2082 the lock for real between when we last looked and now. This also gets 2083 the lock cache line into our read-set, which we need so that we'll 2084 abort if anyone later claims it for real. */ 2085 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2086 // Lock is now visibly acquired, so someone beat us to it. Abort the 2087 // transaction so we'll restart from _xbegin with the failure status. 2088 _xabort(0x01); 2089 KMP_ASSERT2(0, "should not get here"); 2090 } 2091 return 1; // Lock has been acquired (speculatively) 2092 } else { 2093 // We have aborted, update the statistics 2094 if (status & SOFT_ABORT_MASK) { 2095 KMP_INC_STAT(lck, softFailedSpeculations); 2096 // and loop round to retry. 2097 } else { 2098 KMP_INC_STAT(lck, hardFailedSpeculations); 2099 // Give up if we had a hard failure. 2100 break; 2101 } 2102 } 2103 } while (retries--); // Loop while we have retries, and didn't fail hard. 2104 2105 // Either we had a hard failure or we didn't succeed softly after 2106 // the full set of attempts, so back off the badness. 2107 __kmp_step_badness(lck); 2108 return 0; 2109 } 2110 2111 // Attempt to acquire the speculative lock, or back off to the non-speculative 2112 // one if the speculative lock cannot be acquired. 2113 // We can succeed speculatively, non-speculatively, or fail. 2114 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2115 // First try to acquire the lock speculatively 2116 if (__kmp_should_speculate(lck, gtid) && 2117 __kmp_test_adaptive_lock_only(lck, gtid)) 2118 return 1; 2119 2120 // Speculative acquisition failed, so try to acquire it non-speculatively. 2121 // Count the non-speculative acquire attempt 2122 lck->lk.adaptive.acquire_attempts++; 2123 2124 // Use base, non-speculative lock. 2125 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2126 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2127 return 1; // Lock is acquired (non-speculatively) 2128 } else { 2129 return 0; // Failed to acquire the lock, it's already visibly locked. 2130 } 2131 } 2132 2133 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2134 kmp_int32 gtid) { 2135 char const *const func = "omp_test_lock"; 2136 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2137 KMP_FATAL(LockIsUninitialized, func); 2138 } 2139 2140 int retval = __kmp_test_adaptive_lock(lck, gtid); 2141 2142 if (retval) { 2143 lck->lk.qlk.owner_id = gtid + 1; 2144 } 2145 return retval; 2146 } 2147 2148 // Block until we can acquire a speculative, adaptive lock. We check whether we 2149 // should be trying to speculate. If we should be, we check the real lock to see 2150 // if it is free, and, if not, pause without attempting to acquire it until it 2151 // is. Then we try the speculative acquire. This means that although we suffer 2152 // from lemmings a little (because all we can't acquire the lock speculatively 2153 // until the queue of threads waiting has cleared), we don't get into a state 2154 // where we can never acquire the lock speculatively (because we force the queue 2155 // to clear by preventing new arrivals from entering the queue). This does mean 2156 // that when we're trying to break lemmings, the lock is no longer fair. However 2157 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2158 // problem. 2159 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2160 kmp_int32 gtid) { 2161 if (__kmp_should_speculate(lck, gtid)) { 2162 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2163 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2164 return; 2165 // We tried speculation and failed, so give up. 2166 } else { 2167 // We can't try speculation until the lock is free, so we pause here 2168 // (without suspending on the queueing lock, to allow it to drain, then 2169 // try again. All other threads will also see the same result for 2170 // shouldSpeculate, so will be doing the same if they try to claim the 2171 // lock from now on. 2172 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2173 KMP_INC_STAT(lck, lemmingYields); 2174 __kmp_yield(TRUE); 2175 } 2176 2177 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2178 return; 2179 } 2180 } 2181 2182 // Speculative acquisition failed, so acquire it non-speculatively. 2183 // Count the non-speculative acquire attempt 2184 lck->lk.adaptive.acquire_attempts++; 2185 2186 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2187 // We have acquired the base lock, so count that. 2188 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2189 ANNOTATE_QUEUING_ACQUIRED(lck); 2190 } 2191 2192 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2193 kmp_int32 gtid) { 2194 char const *const func = "omp_set_lock"; 2195 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2196 KMP_FATAL(LockIsUninitialized, func); 2197 } 2198 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2199 KMP_FATAL(LockIsAlreadyOwned, func); 2200 } 2201 2202 __kmp_acquire_adaptive_lock(lck, gtid); 2203 2204 lck->lk.qlk.owner_id = gtid + 1; 2205 } 2206 2207 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2208 kmp_int32 gtid) { 2209 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2210 lck))) { // If the lock doesn't look claimed we must be speculating. 2211 // (Or the user's code is buggy and they're releasing without locking; 2212 // if we had XTEST we'd be able to check that case...) 2213 _xend(); // Exit speculation 2214 __kmp_update_badness_after_success(lck); 2215 } else { // Since the lock *is* visibly locked we're not speculating, 2216 // so should use the underlying lock's release scheme. 2217 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2218 } 2219 return KMP_LOCK_RELEASED; 2220 } 2221 2222 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2223 kmp_int32 gtid) { 2224 char const *const func = "omp_unset_lock"; 2225 KMP_MB(); /* in case another processor initialized lock */ 2226 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2227 KMP_FATAL(LockIsUninitialized, func); 2228 } 2229 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2230 KMP_FATAL(LockUnsettingFree, func); 2231 } 2232 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2233 KMP_FATAL(LockUnsettingSetByAnother, func); 2234 } 2235 lck->lk.qlk.owner_id = 0; 2236 __kmp_release_adaptive_lock(lck, gtid); 2237 return KMP_LOCK_RELEASED; 2238 } 2239 2240 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2241 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2242 lck->lk.adaptive.badness = 0; 2243 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2244 lck->lk.adaptive.max_soft_retries = 2245 __kmp_adaptive_backoff_params.max_soft_retries; 2246 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2247 #if KMP_DEBUG_ADAPTIVE_LOCKS 2248 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2249 #endif 2250 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2251 } 2252 2253 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2254 __kmp_init_adaptive_lock(lck); 2255 } 2256 2257 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2258 #if KMP_DEBUG_ADAPTIVE_LOCKS 2259 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2260 #endif 2261 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2262 // Nothing needed for the speculative part. 2263 } 2264 2265 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2266 char const *const func = "omp_destroy_lock"; 2267 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2268 KMP_FATAL(LockIsUninitialized, func); 2269 } 2270 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2271 KMP_FATAL(LockStillOwned, func); 2272 } 2273 __kmp_destroy_adaptive_lock(lck); 2274 } 2275 2276 #endif // KMP_USE_ADAPTIVE_LOCKS 2277 2278 /* ------------------------------------------------------------------------ */ 2279 /* DRDPA ticket locks */ 2280 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2281 2282 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2283 return TCR_4(lck->lk.owner_id) - 1; 2284 } 2285 2286 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2287 return lck->lk.depth_locked != -1; 2288 } 2289 2290 __forceinline static int 2291 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2292 kmp_uint64 ticket = 2293 KMP_TEST_THEN_INC64(RCAST(volatile kmp_int64 *, &lck->lk.next_ticket)); 2294 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2295 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2296 2297 #ifdef USE_LOCK_PROFILE 2298 if (TCR_8(polls[ticket & mask].poll) != ticket) 2299 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2300 /* else __kmp_printf( "." );*/ 2301 #endif /* USE_LOCK_PROFILE */ 2302 2303 // Now spin-wait, but reload the polls pointer and mask, in case the 2304 // polling area has been reconfigured. Unless it is reconfigured, the 2305 // reloads stay in L1 cache and are cheap. 2306 // 2307 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2308 // 2309 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2310 // and poll to be re-read every spin iteration. 2311 kmp_uint32 spins; 2312 2313 KMP_FSYNC_PREPARE(lck); 2314 KMP_INIT_YIELD(spins); 2315 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load 2316 // If we are oversubscribed, 2317 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2318 // CPU Pause is in the macros for yield. 2319 // 2320 KMP_YIELD(TCR_4(__kmp_nth) > 2321 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2322 KMP_YIELD_SPIN(spins); 2323 2324 // Re-read the mask and the poll pointer from the lock structure. 2325 // 2326 // Make certain that "mask" is read before "polls" !!! 2327 // 2328 // If another thread picks reconfigures the polling area and updates their 2329 // values, and we get the new value of mask and the old polls pointer, we 2330 // could access memory beyond the end of the old polling area. 2331 mask = TCR_8(lck->lk.mask); // volatile load 2332 polls = lck->lk.polls; // volatile load 2333 } 2334 2335 // Critical section starts here 2336 KMP_FSYNC_ACQUIRED(lck); 2337 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2338 ticket, lck)); 2339 lck->lk.now_serving = ticket; // non-volatile store 2340 2341 // Deallocate a garbage polling area if we know that we are the last 2342 // thread that could possibly access it. 2343 // 2344 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2345 // ticket. 2346 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2347 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls)); 2348 lck->lk.old_polls = NULL; 2349 lck->lk.cleanup_ticket = 0; 2350 } 2351 2352 // Check to see if we should reconfigure the polling area. 2353 // If there is still a garbage polling area to be deallocated from a 2354 // previous reconfiguration, let a later thread reconfigure it. 2355 if (lck->lk.old_polls == NULL) { 2356 bool reconfigure = false; 2357 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2358 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2359 2360 if (TCR_4(__kmp_nth) > 2361 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2362 // We are in oversubscription mode. Contract the polling area 2363 // down to a single location, if that hasn't been done already. 2364 if (num_polls > 1) { 2365 reconfigure = true; 2366 num_polls = TCR_4(lck->lk.num_polls); 2367 mask = 0; 2368 num_polls = 1; 2369 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2370 __kmp_allocate(num_polls * sizeof(*polls)); 2371 polls[0].poll = ticket; 2372 } 2373 } else { 2374 // We are in under/fully subscribed mode. Check the number of 2375 // threads waiting on the lock. The size of the polling area 2376 // should be at least the number of threads waiting. 2377 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2378 if (num_waiting > num_polls) { 2379 kmp_uint32 old_num_polls = num_polls; 2380 reconfigure = true; 2381 do { 2382 mask = (mask << 1) | 1; 2383 num_polls *= 2; 2384 } while (num_polls <= num_waiting); 2385 2386 // Allocate the new polling area, and copy the relevant portion 2387 // of the old polling area to the new area. __kmp_allocate() 2388 // zeroes the memory it allocates, and most of the old area is 2389 // just zero padding, so we only copy the release counters. 2390 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2391 __kmp_allocate(num_polls * sizeof(*polls)); 2392 kmp_uint32 i; 2393 for (i = 0; i < old_num_polls; i++) { 2394 polls[i].poll = old_polls[i].poll; 2395 } 2396 } 2397 } 2398 2399 if (reconfigure) { 2400 // Now write the updated fields back to the lock structure. 2401 // 2402 // Make certain that "polls" is written before "mask" !!! 2403 // 2404 // If another thread picks up the new value of mask and the old polls 2405 // pointer , it could access memory beyond the end of the old polling 2406 // area. 2407 // 2408 // On x86, we need memory fences. 2409 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2410 "lock %p to %d polls\n", 2411 ticket, lck, num_polls)); 2412 2413 lck->lk.old_polls = old_polls; // non-volatile store 2414 lck->lk.polls = polls; // volatile store 2415 2416 KMP_MB(); 2417 2418 lck->lk.num_polls = num_polls; // non-volatile store 2419 lck->lk.mask = mask; // volatile store 2420 2421 KMP_MB(); 2422 2423 // Only after the new polling area and mask have been flushed 2424 // to main memory can we update the cleanup ticket field. 2425 // 2426 // volatile load / non-volatile store 2427 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2428 } 2429 } 2430 return KMP_LOCK_ACQUIRED_FIRST; 2431 } 2432 2433 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2434 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2435 ANNOTATE_DRDPA_ACQUIRED(lck); 2436 return retval; 2437 } 2438 2439 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2440 kmp_int32 gtid) { 2441 char const *const func = "omp_set_lock"; 2442 if (lck->lk.initialized != lck) { 2443 KMP_FATAL(LockIsUninitialized, func); 2444 } 2445 if (__kmp_is_drdpa_lock_nestable(lck)) { 2446 KMP_FATAL(LockNestableUsedAsSimple, func); 2447 } 2448 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2449 KMP_FATAL(LockIsAlreadyOwned, func); 2450 } 2451 2452 __kmp_acquire_drdpa_lock(lck, gtid); 2453 2454 lck->lk.owner_id = gtid + 1; 2455 return KMP_LOCK_ACQUIRED_FIRST; 2456 } 2457 2458 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2459 // First get a ticket, then read the polls pointer and the mask. 2460 // The polls pointer must be read before the mask!!! (See above) 2461 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2462 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2463 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2464 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2465 kmp_uint64 next_ticket = ticket + 1; 2466 if (KMP_COMPARE_AND_STORE_ACQ64(&lck->lk.next_ticket, ticket, 2467 next_ticket)) { 2468 KMP_FSYNC_ACQUIRED(lck); 2469 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2470 ticket, lck)); 2471 lck->lk.now_serving = ticket; // non-volatile store 2472 2473 // Since no threads are waiting, there is no possibility that we would 2474 // want to reconfigure the polling area. We might have the cleanup ticket 2475 // value (which says that it is now safe to deallocate old_polls), but 2476 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2477 // routine isn't supposed to block, and we would risk blocks if we called 2478 // __kmp_free() to do the deallocation. 2479 return TRUE; 2480 } 2481 } 2482 return FALSE; 2483 } 2484 2485 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2486 kmp_int32 gtid) { 2487 char const *const func = "omp_test_lock"; 2488 if (lck->lk.initialized != lck) { 2489 KMP_FATAL(LockIsUninitialized, func); 2490 } 2491 if (__kmp_is_drdpa_lock_nestable(lck)) { 2492 KMP_FATAL(LockNestableUsedAsSimple, func); 2493 } 2494 2495 int retval = __kmp_test_drdpa_lock(lck, gtid); 2496 2497 if (retval) { 2498 lck->lk.owner_id = gtid + 1; 2499 } 2500 return retval; 2501 } 2502 2503 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2504 // Read the ticket value from the lock data struct, then the polls pointer and 2505 // the mask. The polls pointer must be read before the mask!!! (See above) 2506 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2507 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2508 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2509 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2510 ticket - 1, lck)); 2511 KMP_FSYNC_RELEASING(lck); 2512 ANNOTATE_DRDPA_RELEASED(lck); 2513 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2514 return KMP_LOCK_RELEASED; 2515 } 2516 2517 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2518 kmp_int32 gtid) { 2519 char const *const func = "omp_unset_lock"; 2520 KMP_MB(); /* in case another processor initialized lock */ 2521 if (lck->lk.initialized != lck) { 2522 KMP_FATAL(LockIsUninitialized, func); 2523 } 2524 if (__kmp_is_drdpa_lock_nestable(lck)) { 2525 KMP_FATAL(LockNestableUsedAsSimple, func); 2526 } 2527 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2528 KMP_FATAL(LockUnsettingFree, func); 2529 } 2530 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2531 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2532 KMP_FATAL(LockUnsettingSetByAnother, func); 2533 } 2534 lck->lk.owner_id = 0; 2535 return __kmp_release_drdpa_lock(lck, gtid); 2536 } 2537 2538 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2539 lck->lk.location = NULL; 2540 lck->lk.mask = 0; 2541 lck->lk.num_polls = 1; 2542 lck->lk.polls = 2543 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)__kmp_allocate( 2544 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2545 lck->lk.cleanup_ticket = 0; 2546 lck->lk.old_polls = NULL; 2547 lck->lk.next_ticket = 0; 2548 lck->lk.now_serving = 0; 2549 lck->lk.owner_id = 0; // no thread owns the lock. 2550 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2551 lck->lk.initialized = lck; 2552 2553 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2554 } 2555 2556 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2557 __kmp_init_drdpa_lock(lck); 2558 } 2559 2560 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2561 lck->lk.initialized = NULL; 2562 lck->lk.location = NULL; 2563 if (lck->lk.polls != NULL) { 2564 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.polls)); 2565 lck->lk.polls = NULL; 2566 } 2567 if (lck->lk.old_polls != NULL) { 2568 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls)); 2569 lck->lk.old_polls = NULL; 2570 } 2571 lck->lk.mask = 0; 2572 lck->lk.num_polls = 0; 2573 lck->lk.cleanup_ticket = 0; 2574 lck->lk.next_ticket = 0; 2575 lck->lk.now_serving = 0; 2576 lck->lk.owner_id = 0; 2577 lck->lk.depth_locked = -1; 2578 } 2579 2580 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2581 char const *const func = "omp_destroy_lock"; 2582 if (lck->lk.initialized != lck) { 2583 KMP_FATAL(LockIsUninitialized, func); 2584 } 2585 if (__kmp_is_drdpa_lock_nestable(lck)) { 2586 KMP_FATAL(LockNestableUsedAsSimple, func); 2587 } 2588 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2589 KMP_FATAL(LockStillOwned, func); 2590 } 2591 __kmp_destroy_drdpa_lock(lck); 2592 } 2593 2594 // nested drdpa ticket locks 2595 2596 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2597 KMP_DEBUG_ASSERT(gtid >= 0); 2598 2599 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2600 lck->lk.depth_locked += 1; 2601 return KMP_LOCK_ACQUIRED_NEXT; 2602 } else { 2603 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2604 ANNOTATE_DRDPA_ACQUIRED(lck); 2605 KMP_MB(); 2606 lck->lk.depth_locked = 1; 2607 KMP_MB(); 2608 lck->lk.owner_id = gtid + 1; 2609 return KMP_LOCK_ACQUIRED_FIRST; 2610 } 2611 } 2612 2613 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2614 kmp_int32 gtid) { 2615 char const *const func = "omp_set_nest_lock"; 2616 if (lck->lk.initialized != lck) { 2617 KMP_FATAL(LockIsUninitialized, func); 2618 } 2619 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2620 KMP_FATAL(LockSimpleUsedAsNestable, func); 2621 } 2622 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2623 } 2624 2625 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2626 int retval; 2627 2628 KMP_DEBUG_ASSERT(gtid >= 0); 2629 2630 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2631 retval = ++lck->lk.depth_locked; 2632 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2633 retval = 0; 2634 } else { 2635 KMP_MB(); 2636 retval = lck->lk.depth_locked = 1; 2637 KMP_MB(); 2638 lck->lk.owner_id = gtid + 1; 2639 } 2640 return retval; 2641 } 2642 2643 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2644 kmp_int32 gtid) { 2645 char const *const func = "omp_test_nest_lock"; 2646 if (lck->lk.initialized != lck) { 2647 KMP_FATAL(LockIsUninitialized, func); 2648 } 2649 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2650 KMP_FATAL(LockSimpleUsedAsNestable, func); 2651 } 2652 return __kmp_test_nested_drdpa_lock(lck, gtid); 2653 } 2654 2655 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2656 KMP_DEBUG_ASSERT(gtid >= 0); 2657 2658 KMP_MB(); 2659 if (--(lck->lk.depth_locked) == 0) { 2660 KMP_MB(); 2661 lck->lk.owner_id = 0; 2662 __kmp_release_drdpa_lock(lck, gtid); 2663 return KMP_LOCK_RELEASED; 2664 } 2665 return KMP_LOCK_STILL_HELD; 2666 } 2667 2668 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2669 kmp_int32 gtid) { 2670 char const *const func = "omp_unset_nest_lock"; 2671 KMP_MB(); /* in case another processor initialized lock */ 2672 if (lck->lk.initialized != lck) { 2673 KMP_FATAL(LockIsUninitialized, func); 2674 } 2675 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2676 KMP_FATAL(LockSimpleUsedAsNestable, func); 2677 } 2678 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2679 KMP_FATAL(LockUnsettingFree, func); 2680 } 2681 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2682 KMP_FATAL(LockUnsettingSetByAnother, func); 2683 } 2684 return __kmp_release_nested_drdpa_lock(lck, gtid); 2685 } 2686 2687 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2688 __kmp_init_drdpa_lock(lck); 2689 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2690 } 2691 2692 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2693 __kmp_init_nested_drdpa_lock(lck); 2694 } 2695 2696 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2697 __kmp_destroy_drdpa_lock(lck); 2698 lck->lk.depth_locked = 0; 2699 } 2700 2701 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2702 char const *const func = "omp_destroy_nest_lock"; 2703 if (lck->lk.initialized != lck) { 2704 KMP_FATAL(LockIsUninitialized, func); 2705 } 2706 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2707 KMP_FATAL(LockSimpleUsedAsNestable, func); 2708 } 2709 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2710 KMP_FATAL(LockStillOwned, func); 2711 } 2712 __kmp_destroy_nested_drdpa_lock(lck); 2713 } 2714 2715 // access functions to fields which don't exist for all lock kinds. 2716 2717 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 2718 return lck == lck->lk.initialized; 2719 } 2720 2721 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2722 return lck->lk.location; 2723 } 2724 2725 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2726 const ident_t *loc) { 2727 lck->lk.location = loc; 2728 } 2729 2730 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2731 return lck->lk.flags; 2732 } 2733 2734 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2735 kmp_lock_flags_t flags) { 2736 lck->lk.flags = flags; 2737 } 2738 2739 // Time stamp counter 2740 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2741 #define __kmp_tsc() __kmp_hardware_timestamp() 2742 // Runtime's default backoff parameters 2743 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2744 #else 2745 // Use nanoseconds for other platforms 2746 extern kmp_uint64 __kmp_now_nsec(); 2747 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2748 #define __kmp_tsc() __kmp_now_nsec() 2749 #endif 2750 2751 // A useful predicate for dealing with timestamps that may wrap. 2752 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2753 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2754 // Times where going clockwise is less distance than going anti-clockwise 2755 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2756 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2757 // signed(b) = 0 captures the actual difference 2758 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2759 return ((kmp_int64)b - (kmp_int64)a) > 0; 2760 } 2761 2762 // Truncated binary exponential backoff function 2763 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2764 // We could flatten this loop, but making it a nested loop gives better result 2765 kmp_uint32 i; 2766 for (i = boff->step; i > 0; i--) { 2767 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2768 do { 2769 KMP_CPU_PAUSE(); 2770 } while (before(__kmp_tsc(), goal)); 2771 } 2772 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2773 } 2774 2775 #if KMP_USE_DYNAMIC_LOCK 2776 2777 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2778 // lock word. 2779 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2780 kmp_dyna_lockseq_t seq) { 2781 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2782 KA_TRACE( 2783 20, 2784 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2785 } 2786 2787 #if KMP_USE_TSX 2788 2789 // HLE lock functions - imported from the testbed runtime. 2790 #define HLE_ACQUIRE ".byte 0xf2;" 2791 #define HLE_RELEASE ".byte 0xf3;" 2792 2793 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2794 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2795 return v; 2796 } 2797 2798 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2799 2800 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2801 // Use gtid for KMP_LOCK_BUSY if necessary 2802 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2803 int delay = 1; 2804 do { 2805 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2806 for (int i = delay; i != 0; --i) 2807 KMP_CPU_PAUSE(); 2808 delay = ((delay << 1) | 1) & 7; 2809 } 2810 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2811 } 2812 } 2813 2814 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2815 kmp_int32 gtid) { 2816 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2817 } 2818 2819 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2820 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2821 : "=m"(*lck) 2822 : "r"(KMP_LOCK_FREE(hle)) 2823 : "memory"); 2824 return KMP_LOCK_RELEASED; 2825 } 2826 2827 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2828 kmp_int32 gtid) { 2829 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2830 } 2831 2832 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2833 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2834 } 2835 2836 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2837 kmp_int32 gtid) { 2838 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2839 } 2840 2841 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2842 __kmp_init_queuing_lock(lck); 2843 } 2844 2845 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2846 __kmp_destroy_queuing_lock(lck); 2847 } 2848 2849 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2850 unsigned retries = 3, status; 2851 do { 2852 status = _xbegin(); 2853 if (status == _XBEGIN_STARTED) { 2854 if (__kmp_is_unlocked_queuing_lock(lck)) 2855 return; 2856 _xabort(0xff); 2857 } 2858 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2859 // Wait until lock becomes free 2860 while (!__kmp_is_unlocked_queuing_lock(lck)) 2861 __kmp_yield(TRUE); 2862 } else if (!(status & _XABORT_RETRY)) 2863 break; 2864 } while (retries--); 2865 2866 // Fall-back non-speculative lock (xchg) 2867 __kmp_acquire_queuing_lock(lck, gtid); 2868 } 2869 2870 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2871 kmp_int32 gtid) { 2872 __kmp_acquire_rtm_lock(lck, gtid); 2873 } 2874 2875 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2876 if (__kmp_is_unlocked_queuing_lock(lck)) { 2877 // Releasing from speculation 2878 _xend(); 2879 } else { 2880 // Releasing from a real lock 2881 __kmp_release_queuing_lock(lck, gtid); 2882 } 2883 return KMP_LOCK_RELEASED; 2884 } 2885 2886 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2887 kmp_int32 gtid) { 2888 return __kmp_release_rtm_lock(lck, gtid); 2889 } 2890 2891 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2892 unsigned retries = 3, status; 2893 do { 2894 status = _xbegin(); 2895 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2896 return 1; 2897 } 2898 if (!(status & _XABORT_RETRY)) 2899 break; 2900 } while (retries--); 2901 2902 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2903 } 2904 2905 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2906 kmp_int32 gtid) { 2907 return __kmp_test_rtm_lock(lck, gtid); 2908 } 2909 2910 #endif // KMP_USE_TSX 2911 2912 // Entry functions for indirect locks (first element of direct lock jump tables) 2913 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2914 kmp_dyna_lockseq_t tag); 2915 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2916 static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2917 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2918 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2919 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2920 kmp_int32); 2921 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2922 kmp_int32); 2923 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2924 kmp_int32); 2925 2926 // Jump tables for the indirect lock functions 2927 // Only fill in the odd entries, that avoids the need to shift out the low bit 2928 2929 // init functions 2930 #define expand(l, op) 0, __kmp_init_direct_lock, 2931 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2932 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2933 #undef expand 2934 2935 // destroy functions 2936 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2937 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = { 2938 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2939 #undef expand 2940 2941 // set/acquire functions 2942 #define expand(l, op) \ 2943 0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2944 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2945 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2946 #undef expand 2947 #define expand(l, op) \ 2948 0, (void (*)(kmp_dyna_lock_t *, \ 2949 kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2950 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2951 __kmp_set_indirect_lock_with_checks, 0, 2952 KMP_FOREACH_D_LOCK(expand, acquire)}; 2953 #undef expand 2954 2955 // unset/release and test functions 2956 #define expand(l, op) \ 2957 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2958 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2959 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2960 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2961 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2962 #undef expand 2963 #define expand(l, op) \ 2964 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2965 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2966 __kmp_unset_indirect_lock_with_checks, 0, 2967 KMP_FOREACH_D_LOCK(expand, release)}; 2968 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2969 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2970 #undef expand 2971 2972 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2973 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2974 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2975 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2976 2977 // Jump tables for the indirect lock functions 2978 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2979 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2980 KMP_FOREACH_I_LOCK(expand, init)}; 2981 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { 2982 KMP_FOREACH_I_LOCK(expand, destroy)}; 2983 #undef expand 2984 2985 // set/acquire functions 2986 #define expand(l, op) \ 2987 (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2988 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { 2989 KMP_FOREACH_I_LOCK(expand, acquire)}; 2990 #undef expand 2991 #define expand(l, op) \ 2992 (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2993 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2994 KMP_FOREACH_I_LOCK(expand, acquire)}; 2995 #undef expand 2996 2997 // unset/release and test functions 2998 #define expand(l, op) \ 2999 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 3000 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 3001 KMP_FOREACH_I_LOCK(expand, release)}; 3002 static int (*indirect_test[])(kmp_user_lock_p, 3003 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3004 #undef expand 3005 #define expand(l, op) \ 3006 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3007 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3008 KMP_FOREACH_I_LOCK(expand, release)}; 3009 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3010 KMP_FOREACH_I_LOCK(expand, test)}; 3011 #undef expand 3012 3013 // Exposes only one jump tables (*lock or *lock_with_checks). 3014 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3015 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3016 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3017 3018 // Lock index table. 3019 kmp_indirect_lock_table_t __kmp_i_lock_table; 3020 3021 // Size of indirect locks. 3022 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3023 3024 // Jump tables for lock accessor/modifier. 3025 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3026 const ident_t *) = {0}; 3027 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3028 kmp_lock_flags_t) = {0}; 3029 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3030 kmp_user_lock_p) = {0}; 3031 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3032 kmp_user_lock_p) = {0}; 3033 3034 // Use different lock pools for different lock types. 3035 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3036 3037 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3038 // the indirect lock table holds the address and type of the allocated indrect 3039 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3040 // full. A destroyed indirect lock object is returned to the reusable pool of 3041 // locks, unique to each lock type. 3042 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3043 kmp_int32 gtid, 3044 kmp_indirect_locktag_t tag) { 3045 kmp_indirect_lock_t *lck; 3046 kmp_lock_index_t idx; 3047 3048 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3049 3050 if (__kmp_indirect_lock_pool[tag] != NULL) { 3051 // Reuse the allocated and destroyed lock object 3052 lck = __kmp_indirect_lock_pool[tag]; 3053 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3054 idx = lck->lock->pool.index; 3055 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3056 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3057 lck)); 3058 } else { 3059 idx = __kmp_i_lock_table.next; 3060 // Check capacity and double the size if it is full 3061 if (idx == __kmp_i_lock_table.size) { 3062 // Double up the space for block pointers 3063 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3064 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3065 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( 3066 2 * row * sizeof(kmp_indirect_lock_t *)); 3067 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, 3068 row * sizeof(kmp_indirect_lock_t *)); 3069 __kmp_free(old_table); 3070 // Allocate new objects in the new blocks 3071 for (int i = row; i < 2 * row; ++i) 3072 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3073 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3074 __kmp_i_lock_table.size = 2 * idx; 3075 } 3076 __kmp_i_lock_table.next++; 3077 lck = KMP_GET_I_LOCK(idx); 3078 // Allocate a new base lock object 3079 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3080 KA_TRACE(20, 3081 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3082 } 3083 3084 __kmp_release_lock(&__kmp_global_lock, gtid); 3085 3086 lck->type = tag; 3087 3088 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3089 *((kmp_lock_index_t *)user_lock) = idx 3090 << 1; // indirect lock word must be even 3091 } else { 3092 *((kmp_indirect_lock_t **)user_lock) = lck; 3093 } 3094 3095 return lck; 3096 } 3097 3098 // User lock lookup for dynamically dispatched locks. 3099 static __forceinline kmp_indirect_lock_t * 3100 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3101 if (__kmp_env_consistency_check) { 3102 kmp_indirect_lock_t *lck = NULL; 3103 if (user_lock == NULL) { 3104 KMP_FATAL(LockIsUninitialized, func); 3105 } 3106 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3107 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3108 if (idx >= __kmp_i_lock_table.size) { 3109 KMP_FATAL(LockIsUninitialized, func); 3110 } 3111 lck = KMP_GET_I_LOCK(idx); 3112 } else { 3113 lck = *((kmp_indirect_lock_t **)user_lock); 3114 } 3115 if (lck == NULL) { 3116 KMP_FATAL(LockIsUninitialized, func); 3117 } 3118 return lck; 3119 } else { 3120 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3121 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3122 } else { 3123 return *((kmp_indirect_lock_t **)user_lock); 3124 } 3125 } 3126 } 3127 3128 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3129 kmp_dyna_lockseq_t seq) { 3130 #if KMP_USE_ADAPTIVE_LOCKS 3131 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3132 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3133 seq = lockseq_queuing; 3134 } 3135 #endif 3136 #if KMP_USE_TSX 3137 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3138 seq = lockseq_queuing; 3139 } 3140 #endif 3141 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3142 kmp_indirect_lock_t *l = 3143 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3144 KMP_I_LOCK_FUNC(l, init)(l->lock); 3145 KA_TRACE( 3146 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3147 seq)); 3148 } 3149 3150 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3151 kmp_uint32 gtid = __kmp_entry_gtid(); 3152 kmp_indirect_lock_t *l = 3153 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3154 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3155 kmp_indirect_locktag_t tag = l->type; 3156 3157 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3158 3159 // Use the base lock's space to keep the pool chain. 3160 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3161 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3162 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3163 } 3164 __kmp_indirect_lock_pool[tag] = l; 3165 3166 __kmp_release_lock(&__kmp_global_lock, gtid); 3167 } 3168 3169 static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3170 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3171 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3172 } 3173 3174 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3175 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3176 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3177 } 3178 3179 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3180 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3181 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3182 } 3183 3184 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3185 kmp_int32 gtid) { 3186 kmp_indirect_lock_t *l = 3187 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3188 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3189 } 3190 3191 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3192 kmp_int32 gtid) { 3193 kmp_indirect_lock_t *l = 3194 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3195 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3196 } 3197 3198 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3199 kmp_int32 gtid) { 3200 kmp_indirect_lock_t *l = 3201 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3202 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3203 } 3204 3205 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3206 3207 // This is used only in kmp_error.cpp when consistency checking is on. 3208 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3209 switch (seq) { 3210 case lockseq_tas: 3211 case lockseq_nested_tas: 3212 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3213 #if KMP_USE_FUTEX 3214 case lockseq_futex: 3215 case lockseq_nested_futex: 3216 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3217 #endif 3218 case lockseq_ticket: 3219 case lockseq_nested_ticket: 3220 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3221 case lockseq_queuing: 3222 case lockseq_nested_queuing: 3223 #if KMP_USE_ADAPTIVE_LOCKS 3224 case lockseq_adaptive: 3225 #endif 3226 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3227 case lockseq_drdpa: 3228 case lockseq_nested_drdpa: 3229 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3230 default: 3231 return 0; 3232 } 3233 } 3234 3235 // Initializes data for dynamic user locks. 3236 void __kmp_init_dynamic_user_locks() { 3237 // Initialize jump table for the lock functions 3238 if (__kmp_env_consistency_check) { 3239 __kmp_direct_set = direct_set_check; 3240 __kmp_direct_unset = direct_unset_check; 3241 __kmp_direct_test = direct_test_check; 3242 __kmp_indirect_set = indirect_set_check; 3243 __kmp_indirect_unset = indirect_unset_check; 3244 __kmp_indirect_test = indirect_test_check; 3245 } else { 3246 __kmp_direct_set = direct_set; 3247 __kmp_direct_unset = direct_unset; 3248 __kmp_direct_test = direct_test; 3249 __kmp_indirect_set = indirect_set; 3250 __kmp_indirect_unset = indirect_unset; 3251 __kmp_indirect_test = indirect_test; 3252 } 3253 // If the user locks have already been initialized, then return. Allow the 3254 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3255 // new lock tables if they have already been allocated. 3256 if (__kmp_init_user_locks) 3257 return; 3258 3259 // Initialize lock index table 3260 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3261 __kmp_i_lock_table.table = 3262 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3263 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3264 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3265 __kmp_i_lock_table.next = 0; 3266 3267 // Indirect lock size 3268 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3269 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3270 #if KMP_USE_ADAPTIVE_LOCKS 3271 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3272 #endif 3273 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3274 #if KMP_USE_TSX 3275 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3276 #endif 3277 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3278 #if KMP_USE_FUTEX 3279 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3280 #endif 3281 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3282 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3283 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3284 3285 // Initialize lock accessor/modifier 3286 #define fill_jumps(table, expand, sep) \ 3287 { \ 3288 table[locktag##sep##ticket] = expand(ticket); \ 3289 table[locktag##sep##queuing] = expand(queuing); \ 3290 table[locktag##sep##drdpa] = expand(drdpa); \ 3291 } 3292 3293 #if KMP_USE_ADAPTIVE_LOCKS 3294 #define fill_table(table, expand) \ 3295 { \ 3296 fill_jumps(table, expand, _); \ 3297 table[locktag_adaptive] = expand(queuing); \ 3298 fill_jumps(table, expand, _nested_); \ 3299 } 3300 #else 3301 #define fill_table(table, expand) \ 3302 { \ 3303 fill_jumps(table, expand, _); \ 3304 fill_jumps(table, expand, _nested_); \ 3305 } 3306 #endif // KMP_USE_ADAPTIVE_LOCKS 3307 3308 #define expand(l) \ 3309 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3310 fill_table(__kmp_indirect_set_location, expand); 3311 #undef expand 3312 #define expand(l) \ 3313 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3314 fill_table(__kmp_indirect_set_flags, expand); 3315 #undef expand 3316 #define expand(l) \ 3317 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3318 fill_table(__kmp_indirect_get_location, expand); 3319 #undef expand 3320 #define expand(l) \ 3321 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3322 fill_table(__kmp_indirect_get_flags, expand); 3323 #undef expand 3324 3325 __kmp_init_user_locks = TRUE; 3326 } 3327 3328 // Clean up the lock table. 3329 void __kmp_cleanup_indirect_user_locks() { 3330 kmp_lock_index_t i; 3331 int k; 3332 3333 // Clean up locks in the pools first (they were already destroyed before going 3334 // into the pools). 3335 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3336 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3337 while (l != NULL) { 3338 kmp_indirect_lock_t *ll = l; 3339 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3340 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3341 ll)); 3342 __kmp_free(ll->lock); 3343 ll->lock = NULL; 3344 } 3345 __kmp_indirect_lock_pool[k] = NULL; 3346 } 3347 // Clean up the remaining undestroyed locks. 3348 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3349 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3350 if (l->lock != NULL) { 3351 // Locks not destroyed explicitly need to be destroyed here. 3352 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3353 KA_TRACE( 3354 20, 3355 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3356 l)); 3357 __kmp_free(l->lock); 3358 } 3359 } 3360 // Free the table 3361 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3362 __kmp_free(__kmp_i_lock_table.table[i]); 3363 __kmp_free(__kmp_i_lock_table.table); 3364 3365 __kmp_init_user_locks = FALSE; 3366 } 3367 3368 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3369 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3370 3371 #else // KMP_USE_DYNAMIC_LOCK 3372 3373 /* user locks 3374 * They are implemented as a table of function pointers which are set to the 3375 * lock functions of the appropriate kind, once that has been determined. */ 3376 3377 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3378 3379 size_t __kmp_base_user_lock_size = 0; 3380 size_t __kmp_user_lock_size = 0; 3381 3382 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3383 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3384 kmp_int32 gtid) = NULL; 3385 3386 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3387 kmp_int32 gtid) = NULL; 3388 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3389 kmp_int32 gtid) = NULL; 3390 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3391 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3392 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3393 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3394 kmp_int32 gtid) = NULL; 3395 3396 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3397 kmp_int32 gtid) = NULL; 3398 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3399 kmp_int32 gtid) = NULL; 3400 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3401 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3402 3403 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3404 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3405 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3406 const ident_t *loc) = NULL; 3407 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3408 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3409 kmp_lock_flags_t flags) = NULL; 3410 3411 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3412 switch (user_lock_kind) { 3413 case lk_default: 3414 default: 3415 KMP_ASSERT(0); 3416 3417 case lk_tas: { 3418 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3419 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3420 3421 __kmp_get_user_lock_owner_ = 3422 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3423 3424 if (__kmp_env_consistency_check) { 3425 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3426 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3427 } else { 3428 KMP_BIND_USER_LOCK(tas); 3429 KMP_BIND_NESTED_USER_LOCK(tas); 3430 } 3431 3432 __kmp_destroy_user_lock_ = 3433 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3434 3435 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3436 3437 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3438 3439 __kmp_set_user_lock_location_ = 3440 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3441 3442 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3443 3444 __kmp_set_user_lock_flags_ = 3445 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3446 } break; 3447 3448 #if KMP_USE_FUTEX 3449 3450 case lk_futex: { 3451 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3452 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3453 3454 __kmp_get_user_lock_owner_ = 3455 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3456 3457 if (__kmp_env_consistency_check) { 3458 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3459 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3460 } else { 3461 KMP_BIND_USER_LOCK(futex); 3462 KMP_BIND_NESTED_USER_LOCK(futex); 3463 } 3464 3465 __kmp_destroy_user_lock_ = 3466 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3467 3468 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3469 3470 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3471 3472 __kmp_set_user_lock_location_ = 3473 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3474 3475 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3476 3477 __kmp_set_user_lock_flags_ = 3478 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3479 } break; 3480 3481 #endif // KMP_USE_FUTEX 3482 3483 case lk_ticket: { 3484 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3485 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3486 3487 __kmp_get_user_lock_owner_ = 3488 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3489 3490 if (__kmp_env_consistency_check) { 3491 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3492 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3493 } else { 3494 KMP_BIND_USER_LOCK(ticket); 3495 KMP_BIND_NESTED_USER_LOCK(ticket); 3496 } 3497 3498 __kmp_destroy_user_lock_ = 3499 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3500 3501 __kmp_is_user_lock_initialized_ = 3502 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3503 3504 __kmp_get_user_lock_location_ = 3505 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3506 3507 __kmp_set_user_lock_location_ = (void (*)( 3508 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3509 3510 __kmp_get_user_lock_flags_ = 3511 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3512 3513 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3514 &__kmp_set_ticket_lock_flags); 3515 } break; 3516 3517 case lk_queuing: { 3518 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3519 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3520 3521 __kmp_get_user_lock_owner_ = 3522 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3523 3524 if (__kmp_env_consistency_check) { 3525 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3526 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3527 } else { 3528 KMP_BIND_USER_LOCK(queuing); 3529 KMP_BIND_NESTED_USER_LOCK(queuing); 3530 } 3531 3532 __kmp_destroy_user_lock_ = 3533 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3534 3535 __kmp_is_user_lock_initialized_ = 3536 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3537 3538 __kmp_get_user_lock_location_ = 3539 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3540 3541 __kmp_set_user_lock_location_ = (void (*)( 3542 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3543 3544 __kmp_get_user_lock_flags_ = 3545 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3546 3547 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3548 &__kmp_set_queuing_lock_flags); 3549 } break; 3550 3551 #if KMP_USE_ADAPTIVE_LOCKS 3552 case lk_adaptive: { 3553 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3554 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3555 3556 __kmp_get_user_lock_owner_ = 3557 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3558 3559 if (__kmp_env_consistency_check) { 3560 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3561 } else { 3562 KMP_BIND_USER_LOCK(adaptive); 3563 } 3564 3565 __kmp_destroy_user_lock_ = 3566 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3567 3568 __kmp_is_user_lock_initialized_ = 3569 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3570 3571 __kmp_get_user_lock_location_ = 3572 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3573 3574 __kmp_set_user_lock_location_ = (void (*)( 3575 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3576 3577 __kmp_get_user_lock_flags_ = 3578 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3579 3580 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3581 &__kmp_set_queuing_lock_flags); 3582 3583 } break; 3584 #endif // KMP_USE_ADAPTIVE_LOCKS 3585 3586 case lk_drdpa: { 3587 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3588 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3589 3590 __kmp_get_user_lock_owner_ = 3591 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3592 3593 if (__kmp_env_consistency_check) { 3594 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3595 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3596 } else { 3597 KMP_BIND_USER_LOCK(drdpa); 3598 KMP_BIND_NESTED_USER_LOCK(drdpa); 3599 } 3600 3601 __kmp_destroy_user_lock_ = 3602 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3603 3604 __kmp_is_user_lock_initialized_ = 3605 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3606 3607 __kmp_get_user_lock_location_ = 3608 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3609 3610 __kmp_set_user_lock_location_ = (void (*)( 3611 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3612 3613 __kmp_get_user_lock_flags_ = 3614 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3615 3616 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3617 &__kmp_set_drdpa_lock_flags); 3618 } break; 3619 } 3620 } 3621 3622 // ---------------------------------------------------------------------------- 3623 // User lock table & lock allocation 3624 3625 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3626 kmp_user_lock_p __kmp_lock_pool = NULL; 3627 3628 // Lock block-allocation support. 3629 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3630 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3631 3632 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3633 // Assume that kmp_global_lock is held upon entry/exit. 3634 kmp_lock_index_t index; 3635 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3636 kmp_lock_index_t size; 3637 kmp_user_lock_p *table; 3638 // Reallocate lock table. 3639 if (__kmp_user_lock_table.allocated == 0) { 3640 size = 1024; 3641 } else { 3642 size = __kmp_user_lock_table.allocated * 2; 3643 } 3644 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3645 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3646 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3647 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3648 // We cannot free the previous table now, since it may be in use by other 3649 // threads. So save the pointer to the previous table in in the first 3650 // element of the new table. All the tables will be organized into a list, 3651 // and could be freed when library shutting down. 3652 __kmp_user_lock_table.table = table; 3653 __kmp_user_lock_table.allocated = size; 3654 } 3655 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3656 __kmp_user_lock_table.allocated); 3657 index = __kmp_user_lock_table.used; 3658 __kmp_user_lock_table.table[index] = lck; 3659 ++__kmp_user_lock_table.used; 3660 return index; 3661 } 3662 3663 static kmp_user_lock_p __kmp_lock_block_allocate() { 3664 // Assume that kmp_global_lock is held upon entry/exit. 3665 static int last_index = 0; 3666 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3667 // Restart the index. 3668 last_index = 0; 3669 // Need to allocate a new block. 3670 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3671 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3672 char *buffer = 3673 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3674 // Set up the new block. 3675 kmp_block_of_locks *new_block = 3676 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3677 new_block->next_block = __kmp_lock_blocks; 3678 new_block->locks = (void *)buffer; 3679 // Publish the new block. 3680 KMP_MB(); 3681 __kmp_lock_blocks = new_block; 3682 } 3683 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3684 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3685 last_index++; 3686 return ret; 3687 } 3688 3689 // Get memory for a lock. It may be freshly allocated memory or reused memory 3690 // from lock pool. 3691 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3692 kmp_lock_flags_t flags) { 3693 kmp_user_lock_p lck; 3694 kmp_lock_index_t index; 3695 KMP_DEBUG_ASSERT(user_lock); 3696 3697 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3698 3699 if (__kmp_lock_pool == NULL) { 3700 // Lock pool is empty. Allocate new memory. 3701 3702 // ANNOTATION: Found no good way to express the syncronisation 3703 // between allocation and usage, so ignore the allocation 3704 ANNOTATE_IGNORE_WRITES_BEGIN(); 3705 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3706 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3707 } else { 3708 lck = __kmp_lock_block_allocate(); 3709 } 3710 ANNOTATE_IGNORE_WRITES_END(); 3711 3712 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3713 // and debugger has info on all allocated locks. 3714 index = __kmp_lock_table_insert(lck); 3715 } else { 3716 // Pick up lock from pool. 3717 lck = __kmp_lock_pool; 3718 index = __kmp_lock_pool->pool.index; 3719 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3720 } 3721 3722 // We could potentially differentiate between nested and regular locks 3723 // here, and do the lock table lookup for regular locks only. 3724 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3725 *((kmp_lock_index_t *)user_lock) = index; 3726 } else { 3727 *((kmp_user_lock_p *)user_lock) = lck; 3728 } 3729 3730 // mark the lock if it is critical section lock. 3731 __kmp_set_user_lock_flags(lck, flags); 3732 3733 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3734 3735 return lck; 3736 } 3737 3738 // Put lock's memory to pool for reusing. 3739 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3740 kmp_user_lock_p lck) { 3741 KMP_DEBUG_ASSERT(user_lock != NULL); 3742 KMP_DEBUG_ASSERT(lck != NULL); 3743 3744 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3745 3746 lck->pool.next = __kmp_lock_pool; 3747 __kmp_lock_pool = lck; 3748 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3749 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3750 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3751 lck->pool.index = index; 3752 } 3753 3754 __kmp_release_lock(&__kmp_global_lock, gtid); 3755 } 3756 3757 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3758 kmp_user_lock_p lck = NULL; 3759 3760 if (__kmp_env_consistency_check) { 3761 if (user_lock == NULL) { 3762 KMP_FATAL(LockIsUninitialized, func); 3763 } 3764 } 3765 3766 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3767 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3768 if (__kmp_env_consistency_check) { 3769 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3770 KMP_FATAL(LockIsUninitialized, func); 3771 } 3772 } 3773 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3774 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3775 lck = __kmp_user_lock_table.table[index]; 3776 } else { 3777 lck = *((kmp_user_lock_p *)user_lock); 3778 } 3779 3780 if (__kmp_env_consistency_check) { 3781 if (lck == NULL) { 3782 KMP_FATAL(LockIsUninitialized, func); 3783 } 3784 } 3785 3786 return lck; 3787 } 3788 3789 void __kmp_cleanup_user_locks(void) { 3790 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3791 // iterating through lock table (it includes all the locks, dead or alive). 3792 __kmp_lock_pool = NULL; 3793 3794 #define IS_CRITICAL(lck) \ 3795 ((__kmp_get_user_lock_flags_ != NULL) && \ 3796 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3797 3798 // Loop through lock table, free all locks. 3799 // Do not free item [0], it is reserved for lock tables list. 3800 // 3801 // FIXME - we are iterating through a list of (pointers to) objects of type 3802 // union kmp_user_lock, but we have no way of knowing whether the base type is 3803 // currently "pool" or whatever the global user lock type is. 3804 // 3805 // We are relying on the fact that for all of the user lock types 3806 // (except "tas"), the first field in the lock struct is the "initialized" 3807 // field, which is set to the address of the lock object itself when 3808 // the lock is initialized. When the union is of type "pool", the 3809 // first field is a pointer to the next object in the free list, which 3810 // will not be the same address as the object itself. 3811 // 3812 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3813 // for "pool" objects on the free list. This must happen as the "location" 3814 // field of real user locks overlaps the "index" field of "pool" objects. 3815 // 3816 // It would be better to run through the free list, and remove all "pool" 3817 // objects from the lock table before executing this loop. However, 3818 // "pool" objects do not always have their index field set (only on 3819 // lin_32e), and I don't want to search the lock table for the address 3820 // of every "pool" object on the free list. 3821 while (__kmp_user_lock_table.used > 1) { 3822 const ident *loc; 3823 3824 // reduce __kmp_user_lock_table.used before freeing the lock, 3825 // so that state of locks is consistent 3826 kmp_user_lock_p lck = 3827 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3828 3829 if ((__kmp_is_user_lock_initialized_ != NULL) && 3830 (*__kmp_is_user_lock_initialized_)(lck)) { 3831 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3832 // it is NOT a critical section (user is not responsible for destroying 3833 // criticals) AND we know source location to report. 3834 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3835 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3836 (loc->psource != NULL)) { 3837 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3838 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3839 __kmp_str_loc_free(&str_loc); 3840 } 3841 3842 #ifdef KMP_DEBUG 3843 if (IS_CRITICAL(lck)) { 3844 KA_TRACE( 3845 20, 3846 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3847 lck, *(void **)lck)); 3848 } else { 3849 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3850 *(void **)lck)); 3851 } 3852 #endif // KMP_DEBUG 3853 3854 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3855 __kmp_destroy_user_lock(lck); 3856 } 3857 3858 // Free the lock if block allocation of locks is not used. 3859 if (__kmp_lock_blocks == NULL) { 3860 __kmp_free(lck); 3861 } 3862 } 3863 3864 #undef IS_CRITICAL 3865 3866 // delete lock table(s). 3867 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3868 __kmp_user_lock_table.table = NULL; 3869 __kmp_user_lock_table.allocated = 0; 3870 3871 while (table_ptr != NULL) { 3872 // In the first element we saved the pointer to the previous 3873 // (smaller) lock table. 3874 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3875 __kmp_free(table_ptr); 3876 table_ptr = next; 3877 } 3878 3879 // Free buffers allocated for blocks of locks. 3880 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3881 __kmp_lock_blocks = NULL; 3882 3883 while (block_ptr != NULL) { 3884 kmp_block_of_locks_t *next = block_ptr->next_block; 3885 __kmp_free(block_ptr->locks); 3886 // *block_ptr itself was allocated at the end of the locks vector. 3887 block_ptr = next; 3888 } 3889 3890 TCW_4(__kmp_init_user_locks, FALSE); 3891 } 3892 3893 #endif // KMP_USE_DYNAMIC_LOCK 3894