1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include <stddef.h> 15 #include <atomic> 16 17 #include "kmp.h" 18 #include "kmp_i18n.h" 19 #include "kmp_io.h" 20 #include "kmp_itt.h" 21 #include "kmp_lock.h" 22 23 #include "tsan_annotations.h" 24 25 #if KMP_USE_FUTEX 26 #include <sys/syscall.h> 27 #include <unistd.h> 28 // We should really include <futex.h>, but that causes compatibility problems on 29 // different Linux* OS distributions that either require that you include (or 30 // break when you try to include) <pci/types.h>. Since all we need is the two 31 // macros below (which are part of the kernel ABI, so can't change) we just 32 // define the constants here and don't include <futex.h> 33 #ifndef FUTEX_WAIT 34 #define FUTEX_WAIT 0 35 #endif 36 #ifndef FUTEX_WAKE 37 #define FUTEX_WAKE 1 38 #endif 39 #endif 40 41 /* Implement spin locks for internal library use. */ 42 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 43 44 void __kmp_validate_locks(void) { 45 int i; 46 kmp_uint32 x, y; 47 48 /* Check to make sure unsigned arithmetic does wraps properly */ 49 x = ~((kmp_uint32)0) - 2; 50 y = x - 2; 51 52 for (i = 0; i < 8; ++i, ++x, ++y) { 53 kmp_uint32 z = (x - y); 54 KMP_ASSERT(z == 2); 55 } 56 57 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 58 } 59 60 /* ------------------------------------------------------------------------ */ 61 /* test and set locks */ 62 63 // For the non-nested locks, we can only assume that the first 4 bytes were 64 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 65 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 66 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 67 // 68 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 69 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 70 71 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 72 return KMP_LOCK_STRIP(TCR_4(lck->lk.poll)) - 1; 73 } 74 75 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 76 return lck->lk.depth_locked != -1; 77 } 78 79 __forceinline static int 80 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 81 KMP_MB(); 82 83 #ifdef USE_LOCK_PROFILE 84 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 85 if ((curr != 0) && (curr != gtid + 1)) 86 __kmp_printf("LOCK CONTENTION: %p\n", lck); 87 /* else __kmp_printf( "." );*/ 88 #endif /* USE_LOCK_PROFILE */ 89 90 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 91 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 92 KMP_LOCK_BUSY(gtid + 1, tas))) { 93 KMP_FSYNC_ACQUIRED(lck); 94 return KMP_LOCK_ACQUIRED_FIRST; 95 } 96 97 kmp_uint32 spins; 98 KMP_FSYNC_PREPARE(lck); 99 KMP_INIT_YIELD(spins); 100 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 101 KMP_YIELD(TRUE); 102 } else { 103 KMP_YIELD_SPIN(spins); 104 } 105 106 kmp_backoff_t backoff = __kmp_spin_backoff_params; 107 while ((lck->lk.poll != KMP_LOCK_FREE(tas)) || 108 (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 109 KMP_LOCK_BUSY(gtid + 1, tas)))) { 110 111 __kmp_spin_backoff(&backoff); 112 if (TCR_4(__kmp_nth) > 113 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 114 KMP_YIELD(TRUE); 115 } else { 116 KMP_YIELD_SPIN(spins); 117 } 118 } 119 KMP_FSYNC_ACQUIRED(lck); 120 return KMP_LOCK_ACQUIRED_FIRST; 121 } 122 123 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 124 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 125 ANNOTATE_TAS_ACQUIRED(lck); 126 return retval; 127 } 128 129 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 130 kmp_int32 gtid) { 131 char const *const func = "omp_set_lock"; 132 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 133 __kmp_is_tas_lock_nestable(lck)) { 134 KMP_FATAL(LockNestableUsedAsSimple, func); 135 } 136 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 137 KMP_FATAL(LockIsAlreadyOwned, func); 138 } 139 return __kmp_acquire_tas_lock(lck, gtid); 140 } 141 142 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 143 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 144 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 145 KMP_LOCK_BUSY(gtid + 1, tas))) { 146 KMP_FSYNC_ACQUIRED(lck); 147 return TRUE; 148 } 149 return FALSE; 150 } 151 152 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 153 kmp_int32 gtid) { 154 char const *const func = "omp_test_lock"; 155 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 156 __kmp_is_tas_lock_nestable(lck)) { 157 KMP_FATAL(LockNestableUsedAsSimple, func); 158 } 159 return __kmp_test_tas_lock(lck, gtid); 160 } 161 162 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 163 KMP_MB(); /* Flush all pending memory write invalidates. */ 164 165 KMP_FSYNC_RELEASING(lck); 166 ANNOTATE_TAS_RELEASED(lck); 167 KMP_ST_REL32(&(lck->lk.poll), KMP_LOCK_FREE(tas)); 168 KMP_MB(); /* Flush all pending memory write invalidates. */ 169 170 KMP_YIELD(TCR_4(__kmp_nth) > 171 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 172 return KMP_LOCK_RELEASED; 173 } 174 175 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 176 kmp_int32 gtid) { 177 char const *const func = "omp_unset_lock"; 178 KMP_MB(); /* in case another processor initialized lock */ 179 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 180 __kmp_is_tas_lock_nestable(lck)) { 181 KMP_FATAL(LockNestableUsedAsSimple, func); 182 } 183 if (__kmp_get_tas_lock_owner(lck) == -1) { 184 KMP_FATAL(LockUnsettingFree, func); 185 } 186 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 187 (__kmp_get_tas_lock_owner(lck) != gtid)) { 188 KMP_FATAL(LockUnsettingSetByAnother, func); 189 } 190 return __kmp_release_tas_lock(lck, gtid); 191 } 192 193 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 194 TCW_4(lck->lk.poll, KMP_LOCK_FREE(tas)); 195 } 196 197 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 198 __kmp_init_tas_lock(lck); 199 } 200 201 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 202 203 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 204 char const *const func = "omp_destroy_lock"; 205 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 206 __kmp_is_tas_lock_nestable(lck)) { 207 KMP_FATAL(LockNestableUsedAsSimple, func); 208 } 209 if (__kmp_get_tas_lock_owner(lck) != -1) { 210 KMP_FATAL(LockStillOwned, func); 211 } 212 __kmp_destroy_tas_lock(lck); 213 } 214 215 // nested test and set locks 216 217 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 218 KMP_DEBUG_ASSERT(gtid >= 0); 219 220 if (__kmp_get_tas_lock_owner(lck) == gtid) { 221 lck->lk.depth_locked += 1; 222 return KMP_LOCK_ACQUIRED_NEXT; 223 } else { 224 __kmp_acquire_tas_lock_timed_template(lck, gtid); 225 ANNOTATE_TAS_ACQUIRED(lck); 226 lck->lk.depth_locked = 1; 227 return KMP_LOCK_ACQUIRED_FIRST; 228 } 229 } 230 231 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 232 kmp_int32 gtid) { 233 char const *const func = "omp_set_nest_lock"; 234 if (!__kmp_is_tas_lock_nestable(lck)) { 235 KMP_FATAL(LockSimpleUsedAsNestable, func); 236 } 237 return __kmp_acquire_nested_tas_lock(lck, gtid); 238 } 239 240 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 241 int retval; 242 243 KMP_DEBUG_ASSERT(gtid >= 0); 244 245 if (__kmp_get_tas_lock_owner(lck) == gtid) { 246 retval = ++lck->lk.depth_locked; 247 } else if (!__kmp_test_tas_lock(lck, gtid)) { 248 retval = 0; 249 } else { 250 KMP_MB(); 251 retval = lck->lk.depth_locked = 1; 252 } 253 return retval; 254 } 255 256 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 257 kmp_int32 gtid) { 258 char const *const func = "omp_test_nest_lock"; 259 if (!__kmp_is_tas_lock_nestable(lck)) { 260 KMP_FATAL(LockSimpleUsedAsNestable, func); 261 } 262 return __kmp_test_nested_tas_lock(lck, gtid); 263 } 264 265 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 266 KMP_DEBUG_ASSERT(gtid >= 0); 267 268 KMP_MB(); 269 if (--(lck->lk.depth_locked) == 0) { 270 __kmp_release_tas_lock(lck, gtid); 271 return KMP_LOCK_RELEASED; 272 } 273 return KMP_LOCK_STILL_HELD; 274 } 275 276 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 277 kmp_int32 gtid) { 278 char const *const func = "omp_unset_nest_lock"; 279 KMP_MB(); /* in case another processor initialized lock */ 280 if (!__kmp_is_tas_lock_nestable(lck)) { 281 KMP_FATAL(LockSimpleUsedAsNestable, func); 282 } 283 if (__kmp_get_tas_lock_owner(lck) == -1) { 284 KMP_FATAL(LockUnsettingFree, func); 285 } 286 if (__kmp_get_tas_lock_owner(lck) != gtid) { 287 KMP_FATAL(LockUnsettingSetByAnother, func); 288 } 289 return __kmp_release_nested_tas_lock(lck, gtid); 290 } 291 292 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 293 __kmp_init_tas_lock(lck); 294 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 295 } 296 297 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 298 __kmp_init_nested_tas_lock(lck); 299 } 300 301 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 302 __kmp_destroy_tas_lock(lck); 303 lck->lk.depth_locked = 0; 304 } 305 306 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 307 char const *const func = "omp_destroy_nest_lock"; 308 if (!__kmp_is_tas_lock_nestable(lck)) { 309 KMP_FATAL(LockSimpleUsedAsNestable, func); 310 } 311 if (__kmp_get_tas_lock_owner(lck) != -1) { 312 KMP_FATAL(LockStillOwned, func); 313 } 314 __kmp_destroy_nested_tas_lock(lck); 315 } 316 317 #if KMP_USE_FUTEX 318 319 /* ------------------------------------------------------------------------ */ 320 /* futex locks */ 321 322 // futex locks are really just test and set locks, with a different method 323 // of handling contention. They take the same amount of space as test and 324 // set locks, and are allocated the same way (i.e. use the area allocated by 325 // the compiler for non-nested locks / allocate nested locks on the heap). 326 327 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 328 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 329 } 330 331 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 332 return lck->lk.depth_locked != -1; 333 } 334 335 __forceinline static int 336 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 337 kmp_int32 gtid_code = (gtid + 1) << 1; 338 339 KMP_MB(); 340 341 #ifdef USE_LOCK_PROFILE 342 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 343 if ((curr != 0) && (curr != gtid_code)) 344 __kmp_printf("LOCK CONTENTION: %p\n", lck); 345 /* else __kmp_printf( "." );*/ 346 #endif /* USE_LOCK_PROFILE */ 347 348 KMP_FSYNC_PREPARE(lck); 349 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 350 lck, lck->lk.poll, gtid)); 351 352 kmp_int32 poll_val; 353 354 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 355 &(lck->lk.poll), KMP_LOCK_FREE(futex), 356 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 357 358 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 359 KA_TRACE( 360 1000, 361 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 362 lck, gtid, poll_val, cond)); 363 364 // NOTE: if you try to use the following condition for this branch 365 // 366 // if ( poll_val & 1 == 0 ) 367 // 368 // Then the 12.0 compiler has a bug where the following block will 369 // always be skipped, regardless of the value of the LSB of poll_val. 370 if (!cond) { 371 // Try to set the lsb in the poll to indicate to the owner 372 // thread that they need to wake this thread up. 373 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 374 poll_val | KMP_LOCK_BUSY(1, futex))) { 375 KA_TRACE( 376 1000, 377 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 378 lck, lck->lk.poll, gtid)); 379 continue; 380 } 381 poll_val |= KMP_LOCK_BUSY(1, futex); 382 383 KA_TRACE(1000, 384 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 385 lck->lk.poll, gtid)); 386 } 387 388 KA_TRACE( 389 1000, 390 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 391 lck, gtid, poll_val)); 392 393 kmp_int32 rc; 394 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 395 NULL, 0)) != 0) { 396 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 397 "failed (rc=%d errno=%d)\n", 398 lck, gtid, poll_val, rc, errno)); 399 continue; 400 } 401 402 KA_TRACE(1000, 403 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 404 lck, gtid, poll_val)); 405 // This thread has now done a successful futex wait call and was entered on 406 // the OS futex queue. We must now perform a futex wake call when releasing 407 // the lock, as we have no idea how many other threads are in the queue. 408 gtid_code |= 1; 409 } 410 411 KMP_FSYNC_ACQUIRED(lck); 412 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 413 lck->lk.poll, gtid)); 414 return KMP_LOCK_ACQUIRED_FIRST; 415 } 416 417 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 418 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 419 ANNOTATE_FUTEX_ACQUIRED(lck); 420 return retval; 421 } 422 423 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 424 kmp_int32 gtid) { 425 char const *const func = "omp_set_lock"; 426 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 427 __kmp_is_futex_lock_nestable(lck)) { 428 KMP_FATAL(LockNestableUsedAsSimple, func); 429 } 430 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 431 KMP_FATAL(LockIsAlreadyOwned, func); 432 } 433 return __kmp_acquire_futex_lock(lck, gtid); 434 } 435 436 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 437 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 438 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 439 KMP_FSYNC_ACQUIRED(lck); 440 return TRUE; 441 } 442 return FALSE; 443 } 444 445 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 446 kmp_int32 gtid) { 447 char const *const func = "omp_test_lock"; 448 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 449 __kmp_is_futex_lock_nestable(lck)) { 450 KMP_FATAL(LockNestableUsedAsSimple, func); 451 } 452 return __kmp_test_futex_lock(lck, gtid); 453 } 454 455 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 456 KMP_MB(); /* Flush all pending memory write invalidates. */ 457 458 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 459 lck, lck->lk.poll, gtid)); 460 461 KMP_FSYNC_RELEASING(lck); 462 ANNOTATE_FUTEX_RELEASED(lck); 463 464 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 465 466 KA_TRACE(1000, 467 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 468 lck, gtid, poll_val)); 469 470 if (KMP_LOCK_STRIP(poll_val) & 1) { 471 KA_TRACE(1000, 472 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 473 lck, gtid)); 474 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 475 NULL, NULL, 0); 476 } 477 478 KMP_MB(); /* Flush all pending memory write invalidates. */ 479 480 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 481 lck->lk.poll, gtid)); 482 483 KMP_YIELD(TCR_4(__kmp_nth) > 484 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 485 return KMP_LOCK_RELEASED; 486 } 487 488 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 489 kmp_int32 gtid) { 490 char const *const func = "omp_unset_lock"; 491 KMP_MB(); /* in case another processor initialized lock */ 492 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 493 __kmp_is_futex_lock_nestable(lck)) { 494 KMP_FATAL(LockNestableUsedAsSimple, func); 495 } 496 if (__kmp_get_futex_lock_owner(lck) == -1) { 497 KMP_FATAL(LockUnsettingFree, func); 498 } 499 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 500 (__kmp_get_futex_lock_owner(lck) != gtid)) { 501 KMP_FATAL(LockUnsettingSetByAnother, func); 502 } 503 return __kmp_release_futex_lock(lck, gtid); 504 } 505 506 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 507 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 508 } 509 510 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 511 __kmp_init_futex_lock(lck); 512 } 513 514 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 515 516 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 517 char const *const func = "omp_destroy_lock"; 518 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 519 __kmp_is_futex_lock_nestable(lck)) { 520 KMP_FATAL(LockNestableUsedAsSimple, func); 521 } 522 if (__kmp_get_futex_lock_owner(lck) != -1) { 523 KMP_FATAL(LockStillOwned, func); 524 } 525 __kmp_destroy_futex_lock(lck); 526 } 527 528 // nested futex locks 529 530 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 531 KMP_DEBUG_ASSERT(gtid >= 0); 532 533 if (__kmp_get_futex_lock_owner(lck) == gtid) { 534 lck->lk.depth_locked += 1; 535 return KMP_LOCK_ACQUIRED_NEXT; 536 } else { 537 __kmp_acquire_futex_lock_timed_template(lck, gtid); 538 ANNOTATE_FUTEX_ACQUIRED(lck); 539 lck->lk.depth_locked = 1; 540 return KMP_LOCK_ACQUIRED_FIRST; 541 } 542 } 543 544 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 545 kmp_int32 gtid) { 546 char const *const func = "omp_set_nest_lock"; 547 if (!__kmp_is_futex_lock_nestable(lck)) { 548 KMP_FATAL(LockSimpleUsedAsNestable, func); 549 } 550 return __kmp_acquire_nested_futex_lock(lck, gtid); 551 } 552 553 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 554 int retval; 555 556 KMP_DEBUG_ASSERT(gtid >= 0); 557 558 if (__kmp_get_futex_lock_owner(lck) == gtid) { 559 retval = ++lck->lk.depth_locked; 560 } else if (!__kmp_test_futex_lock(lck, gtid)) { 561 retval = 0; 562 } else { 563 KMP_MB(); 564 retval = lck->lk.depth_locked = 1; 565 } 566 return retval; 567 } 568 569 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 570 kmp_int32 gtid) { 571 char const *const func = "omp_test_nest_lock"; 572 if (!__kmp_is_futex_lock_nestable(lck)) { 573 KMP_FATAL(LockSimpleUsedAsNestable, func); 574 } 575 return __kmp_test_nested_futex_lock(lck, gtid); 576 } 577 578 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 579 KMP_DEBUG_ASSERT(gtid >= 0); 580 581 KMP_MB(); 582 if (--(lck->lk.depth_locked) == 0) { 583 __kmp_release_futex_lock(lck, gtid); 584 return KMP_LOCK_RELEASED; 585 } 586 return KMP_LOCK_STILL_HELD; 587 } 588 589 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 590 kmp_int32 gtid) { 591 char const *const func = "omp_unset_nest_lock"; 592 KMP_MB(); /* in case another processor initialized lock */ 593 if (!__kmp_is_futex_lock_nestable(lck)) { 594 KMP_FATAL(LockSimpleUsedAsNestable, func); 595 } 596 if (__kmp_get_futex_lock_owner(lck) == -1) { 597 KMP_FATAL(LockUnsettingFree, func); 598 } 599 if (__kmp_get_futex_lock_owner(lck) != gtid) { 600 KMP_FATAL(LockUnsettingSetByAnother, func); 601 } 602 return __kmp_release_nested_futex_lock(lck, gtid); 603 } 604 605 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 606 __kmp_init_futex_lock(lck); 607 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 608 } 609 610 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 611 __kmp_init_nested_futex_lock(lck); 612 } 613 614 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 615 __kmp_destroy_futex_lock(lck); 616 lck->lk.depth_locked = 0; 617 } 618 619 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 620 char const *const func = "omp_destroy_nest_lock"; 621 if (!__kmp_is_futex_lock_nestable(lck)) { 622 KMP_FATAL(LockSimpleUsedAsNestable, func); 623 } 624 if (__kmp_get_futex_lock_owner(lck) != -1) { 625 KMP_FATAL(LockStillOwned, func); 626 } 627 __kmp_destroy_nested_futex_lock(lck); 628 } 629 630 #endif // KMP_USE_FUTEX 631 632 /* ------------------------------------------------------------------------ */ 633 /* ticket (bakery) locks */ 634 635 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 636 return std::atomic_load_explicit(&lck->lk.owner_id, 637 std::memory_order_relaxed) - 638 1; 639 } 640 641 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 642 return std::atomic_load_explicit(&lck->lk.depth_locked, 643 std::memory_order_relaxed) != -1; 644 } 645 646 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 647 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 648 std::memory_order_acquire) == my_ticket; 649 } 650 651 __forceinline static int 652 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 653 kmp_int32 gtid) { 654 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 655 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 656 657 #ifdef USE_LOCK_PROFILE 658 if (std::atomic_load_explicit(&lck->lk.now_serving, 659 std::memory_order_relaxed) != my_ticket) 660 __kmp_printf("LOCK CONTENTION: %p\n", lck); 661 /* else __kmp_printf( "." );*/ 662 #endif /* USE_LOCK_PROFILE */ 663 664 if (std::atomic_load_explicit(&lck->lk.now_serving, 665 std::memory_order_acquire) == my_ticket) { 666 return KMP_LOCK_ACQUIRED_FIRST; 667 } 668 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 669 return KMP_LOCK_ACQUIRED_FIRST; 670 } 671 672 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 673 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 674 ANNOTATE_TICKET_ACQUIRED(lck); 675 return retval; 676 } 677 678 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 679 kmp_int32 gtid) { 680 char const *const func = "omp_set_lock"; 681 682 if (!std::atomic_load_explicit(&lck->lk.initialized, 683 std::memory_order_relaxed)) { 684 KMP_FATAL(LockIsUninitialized, func); 685 } 686 if (lck->lk.self != lck) { 687 KMP_FATAL(LockIsUninitialized, func); 688 } 689 if (__kmp_is_ticket_lock_nestable(lck)) { 690 KMP_FATAL(LockNestableUsedAsSimple, func); 691 } 692 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 693 KMP_FATAL(LockIsAlreadyOwned, func); 694 } 695 696 __kmp_acquire_ticket_lock(lck, gtid); 697 698 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 699 std::memory_order_relaxed); 700 return KMP_LOCK_ACQUIRED_FIRST; 701 } 702 703 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 704 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 705 std::memory_order_relaxed); 706 707 if (std::atomic_load_explicit(&lck->lk.now_serving, 708 std::memory_order_relaxed) == my_ticket) { 709 kmp_uint32 next_ticket = my_ticket + 1; 710 if (std::atomic_compare_exchange_strong_explicit( 711 &lck->lk.next_ticket, &my_ticket, next_ticket, 712 std::memory_order_acquire, std::memory_order_acquire)) { 713 return TRUE; 714 } 715 } 716 return FALSE; 717 } 718 719 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 720 kmp_int32 gtid) { 721 char const *const func = "omp_test_lock"; 722 723 if (!std::atomic_load_explicit(&lck->lk.initialized, 724 std::memory_order_relaxed)) { 725 KMP_FATAL(LockIsUninitialized, func); 726 } 727 if (lck->lk.self != lck) { 728 KMP_FATAL(LockIsUninitialized, func); 729 } 730 if (__kmp_is_ticket_lock_nestable(lck)) { 731 KMP_FATAL(LockNestableUsedAsSimple, func); 732 } 733 734 int retval = __kmp_test_ticket_lock(lck, gtid); 735 736 if (retval) { 737 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 738 std::memory_order_relaxed); 739 } 740 return retval; 741 } 742 743 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 744 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 745 std::memory_order_relaxed) - 746 std::atomic_load_explicit(&lck->lk.now_serving, 747 std::memory_order_relaxed); 748 749 ANNOTATE_TICKET_RELEASED(lck); 750 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 751 std::memory_order_release); 752 753 KMP_YIELD(distance > 754 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 755 return KMP_LOCK_RELEASED; 756 } 757 758 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 759 kmp_int32 gtid) { 760 char const *const func = "omp_unset_lock"; 761 762 if (!std::atomic_load_explicit(&lck->lk.initialized, 763 std::memory_order_relaxed)) { 764 KMP_FATAL(LockIsUninitialized, func); 765 } 766 if (lck->lk.self != lck) { 767 KMP_FATAL(LockIsUninitialized, func); 768 } 769 if (__kmp_is_ticket_lock_nestable(lck)) { 770 KMP_FATAL(LockNestableUsedAsSimple, func); 771 } 772 if (__kmp_get_ticket_lock_owner(lck) == -1) { 773 KMP_FATAL(LockUnsettingFree, func); 774 } 775 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 776 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 777 KMP_FATAL(LockUnsettingSetByAnother, func); 778 } 779 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 780 return __kmp_release_ticket_lock(lck, gtid); 781 } 782 783 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 784 lck->lk.location = NULL; 785 lck->lk.self = lck; 786 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 787 std::memory_order_relaxed); 788 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 789 std::memory_order_relaxed); 790 std::atomic_store_explicit( 791 &lck->lk.owner_id, 0, 792 std::memory_order_relaxed); // no thread owns the lock. 793 std::atomic_store_explicit( 794 &lck->lk.depth_locked, -1, 795 std::memory_order_relaxed); // -1 => not a nested lock. 796 std::atomic_store_explicit(&lck->lk.initialized, true, 797 std::memory_order_release); 798 } 799 800 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 801 __kmp_init_ticket_lock(lck); 802 } 803 804 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 805 std::atomic_store_explicit(&lck->lk.initialized, false, 806 std::memory_order_release); 807 lck->lk.self = NULL; 808 lck->lk.location = NULL; 809 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 810 std::memory_order_relaxed); 811 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 812 std::memory_order_relaxed); 813 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 814 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 815 std::memory_order_relaxed); 816 } 817 818 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 819 char const *const func = "omp_destroy_lock"; 820 821 if (!std::atomic_load_explicit(&lck->lk.initialized, 822 std::memory_order_relaxed)) { 823 KMP_FATAL(LockIsUninitialized, func); 824 } 825 if (lck->lk.self != lck) { 826 KMP_FATAL(LockIsUninitialized, func); 827 } 828 if (__kmp_is_ticket_lock_nestable(lck)) { 829 KMP_FATAL(LockNestableUsedAsSimple, func); 830 } 831 if (__kmp_get_ticket_lock_owner(lck) != -1) { 832 KMP_FATAL(LockStillOwned, func); 833 } 834 __kmp_destroy_ticket_lock(lck); 835 } 836 837 // nested ticket locks 838 839 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 840 KMP_DEBUG_ASSERT(gtid >= 0); 841 842 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 843 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 844 std::memory_order_relaxed); 845 return KMP_LOCK_ACQUIRED_NEXT; 846 } else { 847 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 848 ANNOTATE_TICKET_ACQUIRED(lck); 849 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 850 std::memory_order_relaxed); 851 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 852 std::memory_order_relaxed); 853 return KMP_LOCK_ACQUIRED_FIRST; 854 } 855 } 856 857 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 858 kmp_int32 gtid) { 859 char const *const func = "omp_set_nest_lock"; 860 861 if (!std::atomic_load_explicit(&lck->lk.initialized, 862 std::memory_order_relaxed)) { 863 KMP_FATAL(LockIsUninitialized, func); 864 } 865 if (lck->lk.self != lck) { 866 KMP_FATAL(LockIsUninitialized, func); 867 } 868 if (!__kmp_is_ticket_lock_nestable(lck)) { 869 KMP_FATAL(LockSimpleUsedAsNestable, func); 870 } 871 return __kmp_acquire_nested_ticket_lock(lck, gtid); 872 } 873 874 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 875 int retval; 876 877 KMP_DEBUG_ASSERT(gtid >= 0); 878 879 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 880 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 881 std::memory_order_relaxed) + 882 1; 883 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 884 retval = 0; 885 } else { 886 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 887 std::memory_order_relaxed); 888 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 889 std::memory_order_relaxed); 890 retval = 1; 891 } 892 return retval; 893 } 894 895 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 896 kmp_int32 gtid) { 897 char const *const func = "omp_test_nest_lock"; 898 899 if (!std::atomic_load_explicit(&lck->lk.initialized, 900 std::memory_order_relaxed)) { 901 KMP_FATAL(LockIsUninitialized, func); 902 } 903 if (lck->lk.self != lck) { 904 KMP_FATAL(LockIsUninitialized, func); 905 } 906 if (!__kmp_is_ticket_lock_nestable(lck)) { 907 KMP_FATAL(LockSimpleUsedAsNestable, func); 908 } 909 return __kmp_test_nested_ticket_lock(lck, gtid); 910 } 911 912 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 913 KMP_DEBUG_ASSERT(gtid >= 0); 914 915 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 916 std::memory_order_relaxed) - 917 1) == 0) { 918 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 919 __kmp_release_ticket_lock(lck, gtid); 920 return KMP_LOCK_RELEASED; 921 } 922 return KMP_LOCK_STILL_HELD; 923 } 924 925 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 926 kmp_int32 gtid) { 927 char const *const func = "omp_unset_nest_lock"; 928 929 if (!std::atomic_load_explicit(&lck->lk.initialized, 930 std::memory_order_relaxed)) { 931 KMP_FATAL(LockIsUninitialized, func); 932 } 933 if (lck->lk.self != lck) { 934 KMP_FATAL(LockIsUninitialized, func); 935 } 936 if (!__kmp_is_ticket_lock_nestable(lck)) { 937 KMP_FATAL(LockSimpleUsedAsNestable, func); 938 } 939 if (__kmp_get_ticket_lock_owner(lck) == -1) { 940 KMP_FATAL(LockUnsettingFree, func); 941 } 942 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 943 KMP_FATAL(LockUnsettingSetByAnother, func); 944 } 945 return __kmp_release_nested_ticket_lock(lck, gtid); 946 } 947 948 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 949 __kmp_init_ticket_lock(lck); 950 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 951 std::memory_order_relaxed); 952 // >= 0 for nestable locks, -1 for simple locks 953 } 954 955 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 956 __kmp_init_nested_ticket_lock(lck); 957 } 958 959 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 960 __kmp_destroy_ticket_lock(lck); 961 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 962 std::memory_order_relaxed); 963 } 964 965 static void 966 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 967 char const *const func = "omp_destroy_nest_lock"; 968 969 if (!std::atomic_load_explicit(&lck->lk.initialized, 970 std::memory_order_relaxed)) { 971 KMP_FATAL(LockIsUninitialized, func); 972 } 973 if (lck->lk.self != lck) { 974 KMP_FATAL(LockIsUninitialized, func); 975 } 976 if (!__kmp_is_ticket_lock_nestable(lck)) { 977 KMP_FATAL(LockSimpleUsedAsNestable, func); 978 } 979 if (__kmp_get_ticket_lock_owner(lck) != -1) { 980 KMP_FATAL(LockStillOwned, func); 981 } 982 __kmp_destroy_nested_ticket_lock(lck); 983 } 984 985 // access functions to fields which don't exist for all lock kinds. 986 987 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 988 return std::atomic_load_explicit(&lck->lk.initialized, 989 std::memory_order_relaxed) && 990 (lck->lk.self == lck); 991 } 992 993 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 994 return lck->lk.location; 995 } 996 997 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 998 const ident_t *loc) { 999 lck->lk.location = loc; 1000 } 1001 1002 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 1003 return lck->lk.flags; 1004 } 1005 1006 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 1007 kmp_lock_flags_t flags) { 1008 lck->lk.flags = flags; 1009 } 1010 1011 /* ------------------------------------------------------------------------ */ 1012 /* queuing locks */ 1013 1014 /* First the states 1015 (head,tail) = 0, 0 means lock is unheld, nobody on queue 1016 UINT_MAX or -1, 0 means lock is held, nobody on queue 1017 h, h means lock held or about to transition, 1018 1 element on queue 1019 h, t h <> t, means lock is held or about to 1020 transition, >1 elements on queue 1021 1022 Now the transitions 1023 Acquire(0,0) = -1 ,0 1024 Release(0,0) = Error 1025 Acquire(-1,0) = h ,h h > 0 1026 Release(-1,0) = 0 ,0 1027 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1028 Release(h,h) = -1 ,0 h > 0 1029 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1030 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1031 1032 And pictorially 1033 1034 +-----+ 1035 | 0, 0|------- release -------> Error 1036 +-----+ 1037 | ^ 1038 acquire| |release 1039 | | 1040 | | 1041 v | 1042 +-----+ 1043 |-1, 0| 1044 +-----+ 1045 | ^ 1046 acquire| |release 1047 | | 1048 | | 1049 v | 1050 +-----+ 1051 | h, h| 1052 +-----+ 1053 | ^ 1054 acquire| |release 1055 | | 1056 | | 1057 v | 1058 +-----+ 1059 | h, t|----- acquire, release loopback ---+ 1060 +-----+ | 1061 ^ | 1062 | | 1063 +------------------------------------+ 1064 */ 1065 1066 #ifdef DEBUG_QUEUING_LOCKS 1067 1068 /* Stuff for circular trace buffer */ 1069 #define TRACE_BUF_ELE 1024 1070 static char traces[TRACE_BUF_ELE][128] = {0}; 1071 static int tc = 0; 1072 #define TRACE_LOCK(X, Y) \ 1073 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1074 #define TRACE_LOCK_T(X, Y, Z) \ 1075 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1076 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1077 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1078 Z, Q); 1079 1080 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1081 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1082 kmp_int32 tail_id) { 1083 kmp_int32 t, i; 1084 1085 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1086 1087 i = tc % TRACE_BUF_ELE; 1088 __kmp_printf_no_lock("%s\n", traces[i]); 1089 i = (i + 1) % TRACE_BUF_ELE; 1090 while (i != (tc % TRACE_BUF_ELE)) { 1091 __kmp_printf_no_lock("%s", traces[i]); 1092 i = (i + 1) % TRACE_BUF_ELE; 1093 } 1094 __kmp_printf_no_lock("\n"); 1095 1096 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1097 "next_wait:%d, head_id:%d, tail_id:%d\n", 1098 gtid + 1, this_thr->th.th_spin_here, 1099 this_thr->th.th_next_waiting, head_id, tail_id); 1100 1101 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1102 1103 if (lck->lk.head_id >= 1) { 1104 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1105 while (t > 0) { 1106 __kmp_printf_no_lock("-> %d ", t); 1107 t = __kmp_threads[t - 1]->th.th_next_waiting; 1108 } 1109 } 1110 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1111 __kmp_printf_no_lock("\n\n"); 1112 } 1113 1114 #endif /* DEBUG_QUEUING_LOCKS */ 1115 1116 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1117 return TCR_4(lck->lk.owner_id) - 1; 1118 } 1119 1120 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1121 return lck->lk.depth_locked != -1; 1122 } 1123 1124 /* Acquire a lock using a the queuing lock implementation */ 1125 template <bool takeTime> 1126 /* [TLW] The unused template above is left behind because of what BEB believes 1127 is a potential compiler problem with __forceinline. */ 1128 __forceinline static int 1129 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1130 kmp_int32 gtid) { 1131 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1132 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1133 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1134 volatile kmp_uint32 *spin_here_p; 1135 kmp_int32 need_mf = 1; 1136 1137 #if OMPT_SUPPORT 1138 omp_state_t prev_state = omp_state_undefined; 1139 #endif 1140 1141 KA_TRACE(1000, 1142 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1143 1144 KMP_FSYNC_PREPARE(lck); 1145 KMP_DEBUG_ASSERT(this_thr != NULL); 1146 spin_here_p = &this_thr->th.th_spin_here; 1147 1148 #ifdef DEBUG_QUEUING_LOCKS 1149 TRACE_LOCK(gtid + 1, "acq ent"); 1150 if (*spin_here_p) 1151 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1152 if (this_thr->th.th_next_waiting != 0) 1153 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1154 #endif 1155 KMP_DEBUG_ASSERT(!*spin_here_p); 1156 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1157 1158 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1159 head_id_p that may follow, not just in execution order, but also in 1160 visibility order. This way, when a releasing thread observes the changes to 1161 the queue by this thread, it can rightly assume that spin_here_p has 1162 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1163 not premature. If the releasing thread sets spin_here_p to FALSE before 1164 this thread sets it to TRUE, this thread will hang. */ 1165 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1166 1167 while (1) { 1168 kmp_int32 enqueued; 1169 kmp_int32 head; 1170 kmp_int32 tail; 1171 1172 head = *head_id_p; 1173 1174 switch (head) { 1175 1176 case -1: { 1177 #ifdef DEBUG_QUEUING_LOCKS 1178 tail = *tail_id_p; 1179 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1180 #endif 1181 tail = 0; /* to make sure next link asynchronously read is not set 1182 accidentally; this assignment prevents us from entering the 1183 if ( t > 0 ) condition in the enqueued case below, which is not 1184 necessary for this state transition */ 1185 1186 need_mf = 0; 1187 /* try (-1,0)->(tid,tid) */ 1188 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1189 KMP_PACK_64(-1, 0), 1190 KMP_PACK_64(gtid + 1, gtid + 1)); 1191 #ifdef DEBUG_QUEUING_LOCKS 1192 if (enqueued) 1193 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1194 #endif 1195 } break; 1196 1197 default: { 1198 tail = *tail_id_p; 1199 KMP_DEBUG_ASSERT(tail != gtid + 1); 1200 1201 #ifdef DEBUG_QUEUING_LOCKS 1202 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1203 #endif 1204 1205 if (tail == 0) { 1206 enqueued = FALSE; 1207 } else { 1208 need_mf = 0; 1209 /* try (h,t) or (h,h)->(h,tid) */ 1210 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1211 1212 #ifdef DEBUG_QUEUING_LOCKS 1213 if (enqueued) 1214 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1215 #endif 1216 } 1217 } break; 1218 1219 case 0: /* empty queue */ 1220 { 1221 kmp_int32 grabbed_lock; 1222 1223 #ifdef DEBUG_QUEUING_LOCKS 1224 tail = *tail_id_p; 1225 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1226 #endif 1227 /* try (0,0)->(-1,0) */ 1228 1229 /* only legal transition out of head = 0 is head = -1 with no change to 1230 * tail */ 1231 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1232 1233 if (grabbed_lock) { 1234 1235 *spin_here_p = FALSE; 1236 1237 KA_TRACE( 1238 1000, 1239 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1240 lck, gtid)); 1241 #ifdef DEBUG_QUEUING_LOCKS 1242 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1243 #endif 1244 1245 #if OMPT_SUPPORT 1246 if (ompt_enabled.enabled && prev_state != omp_state_undefined) { 1247 /* change the state before clearing wait_id */ 1248 this_thr->th.ompt_thread_info.state = prev_state; 1249 this_thr->th.ompt_thread_info.wait_id = 0; 1250 } 1251 #endif 1252 1253 KMP_FSYNC_ACQUIRED(lck); 1254 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1255 } 1256 enqueued = FALSE; 1257 } break; 1258 } 1259 1260 #if OMPT_SUPPORT 1261 if (ompt_enabled.enabled && prev_state == omp_state_undefined) { 1262 /* this thread will spin; set wait_id before entering wait state */ 1263 prev_state = this_thr->th.ompt_thread_info.state; 1264 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1265 this_thr->th.ompt_thread_info.state = omp_state_wait_lock; 1266 } 1267 #endif 1268 1269 if (enqueued) { 1270 if (tail > 0) { 1271 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1272 KMP_ASSERT(tail_thr != NULL); 1273 tail_thr->th.th_next_waiting = gtid + 1; 1274 /* corresponding wait for this write in release code */ 1275 } 1276 KA_TRACE(1000, 1277 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1278 lck, gtid)); 1279 1280 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1281 sleeps for throughput only here. */ 1282 KMP_MB(); 1283 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1284 1285 #ifdef DEBUG_QUEUING_LOCKS 1286 TRACE_LOCK(gtid + 1, "acq spin"); 1287 1288 if (this_thr->th.th_next_waiting != 0) 1289 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1290 #endif 1291 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1292 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1293 "waiting on queue\n", 1294 lck, gtid)); 1295 1296 #ifdef DEBUG_QUEUING_LOCKS 1297 TRACE_LOCK(gtid + 1, "acq exit 2"); 1298 #endif 1299 1300 #if OMPT_SUPPORT 1301 /* change the state before clearing wait_id */ 1302 this_thr->th.ompt_thread_info.state = prev_state; 1303 this_thr->th.ompt_thread_info.wait_id = 0; 1304 #endif 1305 1306 /* got lock, we were dequeued by the thread that released lock */ 1307 return KMP_LOCK_ACQUIRED_FIRST; 1308 } 1309 1310 /* Yield if number of threads > number of logical processors */ 1311 /* ToDo: Not sure why this should only be in oversubscription case, 1312 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1313 KMP_YIELD(TCR_4(__kmp_nth) > 1314 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1315 #ifdef DEBUG_QUEUING_LOCKS 1316 TRACE_LOCK(gtid + 1, "acq retry"); 1317 #endif 1318 } 1319 KMP_ASSERT2(0, "should not get here"); 1320 return KMP_LOCK_ACQUIRED_FIRST; 1321 } 1322 1323 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1324 KMP_DEBUG_ASSERT(gtid >= 0); 1325 1326 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1327 ANNOTATE_QUEUING_ACQUIRED(lck); 1328 return retval; 1329 } 1330 1331 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1332 kmp_int32 gtid) { 1333 char const *const func = "omp_set_lock"; 1334 if (lck->lk.initialized != lck) { 1335 KMP_FATAL(LockIsUninitialized, func); 1336 } 1337 if (__kmp_is_queuing_lock_nestable(lck)) { 1338 KMP_FATAL(LockNestableUsedAsSimple, func); 1339 } 1340 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1341 KMP_FATAL(LockIsAlreadyOwned, func); 1342 } 1343 1344 __kmp_acquire_queuing_lock(lck, gtid); 1345 1346 lck->lk.owner_id = gtid + 1; 1347 return KMP_LOCK_ACQUIRED_FIRST; 1348 } 1349 1350 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1351 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1352 kmp_int32 head; 1353 #ifdef KMP_DEBUG 1354 kmp_info_t *this_thr; 1355 #endif 1356 1357 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1358 KMP_DEBUG_ASSERT(gtid >= 0); 1359 #ifdef KMP_DEBUG 1360 this_thr = __kmp_thread_from_gtid(gtid); 1361 KMP_DEBUG_ASSERT(this_thr != NULL); 1362 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1363 #endif 1364 1365 head = *head_id_p; 1366 1367 if (head == 0) { /* nobody on queue, nobody holding */ 1368 /* try (0,0)->(-1,0) */ 1369 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1370 KA_TRACE(1000, 1371 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1372 KMP_FSYNC_ACQUIRED(lck); 1373 ANNOTATE_QUEUING_ACQUIRED(lck); 1374 return TRUE; 1375 } 1376 } 1377 1378 KA_TRACE(1000, 1379 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1380 return FALSE; 1381 } 1382 1383 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1384 kmp_int32 gtid) { 1385 char const *const func = "omp_test_lock"; 1386 if (lck->lk.initialized != lck) { 1387 KMP_FATAL(LockIsUninitialized, func); 1388 } 1389 if (__kmp_is_queuing_lock_nestable(lck)) { 1390 KMP_FATAL(LockNestableUsedAsSimple, func); 1391 } 1392 1393 int retval = __kmp_test_queuing_lock(lck, gtid); 1394 1395 if (retval) { 1396 lck->lk.owner_id = gtid + 1; 1397 } 1398 return retval; 1399 } 1400 1401 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1402 kmp_info_t *this_thr; 1403 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1404 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1405 1406 KA_TRACE(1000, 1407 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1408 KMP_DEBUG_ASSERT(gtid >= 0); 1409 this_thr = __kmp_thread_from_gtid(gtid); 1410 KMP_DEBUG_ASSERT(this_thr != NULL); 1411 #ifdef DEBUG_QUEUING_LOCKS 1412 TRACE_LOCK(gtid + 1, "rel ent"); 1413 1414 if (this_thr->th.th_spin_here) 1415 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1416 if (this_thr->th.th_next_waiting != 0) 1417 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1418 #endif 1419 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1420 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1421 1422 KMP_FSYNC_RELEASING(lck); 1423 ANNOTATE_QUEUING_RELEASED(lck); 1424 1425 while (1) { 1426 kmp_int32 dequeued; 1427 kmp_int32 head; 1428 kmp_int32 tail; 1429 1430 head = *head_id_p; 1431 1432 #ifdef DEBUG_QUEUING_LOCKS 1433 tail = *tail_id_p; 1434 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1435 if (head == 0) 1436 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1437 #endif 1438 KMP_DEBUG_ASSERT(head != 1439 0); /* holding the lock, head must be -1 or queue head */ 1440 1441 if (head == -1) { /* nobody on queue */ 1442 /* try (-1,0)->(0,0) */ 1443 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1444 KA_TRACE( 1445 1000, 1446 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1447 lck, gtid)); 1448 #ifdef DEBUG_QUEUING_LOCKS 1449 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1450 #endif 1451 1452 #if OMPT_SUPPORT 1453 /* nothing to do - no other thread is trying to shift blame */ 1454 #endif 1455 return KMP_LOCK_RELEASED; 1456 } 1457 dequeued = FALSE; 1458 } else { 1459 KMP_MB(); 1460 tail = *tail_id_p; 1461 if (head == tail) { /* only one thread on the queue */ 1462 #ifdef DEBUG_QUEUING_LOCKS 1463 if (head <= 0) 1464 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1465 #endif 1466 KMP_DEBUG_ASSERT(head > 0); 1467 1468 /* try (h,h)->(-1,0) */ 1469 dequeued = KMP_COMPARE_AND_STORE_REL64( 1470 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1471 KMP_PACK_64(-1, 0)); 1472 #ifdef DEBUG_QUEUING_LOCKS 1473 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1474 #endif 1475 1476 } else { 1477 volatile kmp_int32 *waiting_id_p; 1478 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1479 KMP_DEBUG_ASSERT(head_thr != NULL); 1480 waiting_id_p = &head_thr->th.th_next_waiting; 1481 1482 /* Does this require synchronous reads? */ 1483 #ifdef DEBUG_QUEUING_LOCKS 1484 if (head <= 0 || tail <= 0) 1485 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1486 #endif 1487 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1488 1489 /* try (h,t)->(h',t) or (t,t) */ 1490 KMP_MB(); 1491 /* make sure enqueuing thread has time to update next waiting thread 1492 * field */ 1493 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1494 KMP_NEQ, NULL); 1495 #ifdef DEBUG_QUEUING_LOCKS 1496 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1497 #endif 1498 dequeued = TRUE; 1499 } 1500 } 1501 1502 if (dequeued) { 1503 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1504 KMP_DEBUG_ASSERT(head_thr != NULL); 1505 1506 /* Does this require synchronous reads? */ 1507 #ifdef DEBUG_QUEUING_LOCKS 1508 if (head <= 0 || tail <= 0) 1509 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1510 #endif 1511 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1512 1513 /* For clean code only. Thread not released until next statement prevents 1514 race with acquire code. */ 1515 head_thr->th.th_next_waiting = 0; 1516 #ifdef DEBUG_QUEUING_LOCKS 1517 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1518 #endif 1519 1520 KMP_MB(); 1521 /* reset spin value */ 1522 head_thr->th.th_spin_here = FALSE; 1523 1524 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1525 "dequeuing\n", 1526 lck, gtid)); 1527 #ifdef DEBUG_QUEUING_LOCKS 1528 TRACE_LOCK(gtid + 1, "rel exit 2"); 1529 #endif 1530 return KMP_LOCK_RELEASED; 1531 } 1532 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1533 threads */ 1534 1535 #ifdef DEBUG_QUEUING_LOCKS 1536 TRACE_LOCK(gtid + 1, "rel retry"); 1537 #endif 1538 1539 } /* while */ 1540 KMP_ASSERT2(0, "should not get here"); 1541 return KMP_LOCK_RELEASED; 1542 } 1543 1544 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1545 kmp_int32 gtid) { 1546 char const *const func = "omp_unset_lock"; 1547 KMP_MB(); /* in case another processor initialized lock */ 1548 if (lck->lk.initialized != lck) { 1549 KMP_FATAL(LockIsUninitialized, func); 1550 } 1551 if (__kmp_is_queuing_lock_nestable(lck)) { 1552 KMP_FATAL(LockNestableUsedAsSimple, func); 1553 } 1554 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1555 KMP_FATAL(LockUnsettingFree, func); 1556 } 1557 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1558 KMP_FATAL(LockUnsettingSetByAnother, func); 1559 } 1560 lck->lk.owner_id = 0; 1561 return __kmp_release_queuing_lock(lck, gtid); 1562 } 1563 1564 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1565 lck->lk.location = NULL; 1566 lck->lk.head_id = 0; 1567 lck->lk.tail_id = 0; 1568 lck->lk.next_ticket = 0; 1569 lck->lk.now_serving = 0; 1570 lck->lk.owner_id = 0; // no thread owns the lock. 1571 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1572 lck->lk.initialized = lck; 1573 1574 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1575 } 1576 1577 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1578 __kmp_init_queuing_lock(lck); 1579 } 1580 1581 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1582 lck->lk.initialized = NULL; 1583 lck->lk.location = NULL; 1584 lck->lk.head_id = 0; 1585 lck->lk.tail_id = 0; 1586 lck->lk.next_ticket = 0; 1587 lck->lk.now_serving = 0; 1588 lck->lk.owner_id = 0; 1589 lck->lk.depth_locked = -1; 1590 } 1591 1592 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1593 char const *const func = "omp_destroy_lock"; 1594 if (lck->lk.initialized != lck) { 1595 KMP_FATAL(LockIsUninitialized, func); 1596 } 1597 if (__kmp_is_queuing_lock_nestable(lck)) { 1598 KMP_FATAL(LockNestableUsedAsSimple, func); 1599 } 1600 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1601 KMP_FATAL(LockStillOwned, func); 1602 } 1603 __kmp_destroy_queuing_lock(lck); 1604 } 1605 1606 // nested queuing locks 1607 1608 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1609 KMP_DEBUG_ASSERT(gtid >= 0); 1610 1611 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1612 lck->lk.depth_locked += 1; 1613 return KMP_LOCK_ACQUIRED_NEXT; 1614 } else { 1615 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1616 ANNOTATE_QUEUING_ACQUIRED(lck); 1617 KMP_MB(); 1618 lck->lk.depth_locked = 1; 1619 KMP_MB(); 1620 lck->lk.owner_id = gtid + 1; 1621 return KMP_LOCK_ACQUIRED_FIRST; 1622 } 1623 } 1624 1625 static int 1626 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1627 kmp_int32 gtid) { 1628 char const *const func = "omp_set_nest_lock"; 1629 if (lck->lk.initialized != lck) { 1630 KMP_FATAL(LockIsUninitialized, func); 1631 } 1632 if (!__kmp_is_queuing_lock_nestable(lck)) { 1633 KMP_FATAL(LockSimpleUsedAsNestable, func); 1634 } 1635 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1636 } 1637 1638 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1639 int retval; 1640 1641 KMP_DEBUG_ASSERT(gtid >= 0); 1642 1643 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1644 retval = ++lck->lk.depth_locked; 1645 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1646 retval = 0; 1647 } else { 1648 KMP_MB(); 1649 retval = lck->lk.depth_locked = 1; 1650 KMP_MB(); 1651 lck->lk.owner_id = gtid + 1; 1652 } 1653 return retval; 1654 } 1655 1656 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1657 kmp_int32 gtid) { 1658 char const *const func = "omp_test_nest_lock"; 1659 if (lck->lk.initialized != lck) { 1660 KMP_FATAL(LockIsUninitialized, func); 1661 } 1662 if (!__kmp_is_queuing_lock_nestable(lck)) { 1663 KMP_FATAL(LockSimpleUsedAsNestable, func); 1664 } 1665 return __kmp_test_nested_queuing_lock(lck, gtid); 1666 } 1667 1668 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1669 KMP_DEBUG_ASSERT(gtid >= 0); 1670 1671 KMP_MB(); 1672 if (--(lck->lk.depth_locked) == 0) { 1673 KMP_MB(); 1674 lck->lk.owner_id = 0; 1675 __kmp_release_queuing_lock(lck, gtid); 1676 return KMP_LOCK_RELEASED; 1677 } 1678 return KMP_LOCK_STILL_HELD; 1679 } 1680 1681 static int 1682 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1683 kmp_int32 gtid) { 1684 char const *const func = "omp_unset_nest_lock"; 1685 KMP_MB(); /* in case another processor initialized lock */ 1686 if (lck->lk.initialized != lck) { 1687 KMP_FATAL(LockIsUninitialized, func); 1688 } 1689 if (!__kmp_is_queuing_lock_nestable(lck)) { 1690 KMP_FATAL(LockSimpleUsedAsNestable, func); 1691 } 1692 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1693 KMP_FATAL(LockUnsettingFree, func); 1694 } 1695 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1696 KMP_FATAL(LockUnsettingSetByAnother, func); 1697 } 1698 return __kmp_release_nested_queuing_lock(lck, gtid); 1699 } 1700 1701 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1702 __kmp_init_queuing_lock(lck); 1703 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1704 } 1705 1706 static void 1707 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1708 __kmp_init_nested_queuing_lock(lck); 1709 } 1710 1711 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1712 __kmp_destroy_queuing_lock(lck); 1713 lck->lk.depth_locked = 0; 1714 } 1715 1716 static void 1717 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1718 char const *const func = "omp_destroy_nest_lock"; 1719 if (lck->lk.initialized != lck) { 1720 KMP_FATAL(LockIsUninitialized, func); 1721 } 1722 if (!__kmp_is_queuing_lock_nestable(lck)) { 1723 KMP_FATAL(LockSimpleUsedAsNestable, func); 1724 } 1725 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1726 KMP_FATAL(LockStillOwned, func); 1727 } 1728 __kmp_destroy_nested_queuing_lock(lck); 1729 } 1730 1731 // access functions to fields which don't exist for all lock kinds. 1732 1733 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 1734 return lck == lck->lk.initialized; 1735 } 1736 1737 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1738 return lck->lk.location; 1739 } 1740 1741 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1742 const ident_t *loc) { 1743 lck->lk.location = loc; 1744 } 1745 1746 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1747 return lck->lk.flags; 1748 } 1749 1750 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1751 kmp_lock_flags_t flags) { 1752 lck->lk.flags = flags; 1753 } 1754 1755 #if KMP_USE_ADAPTIVE_LOCKS 1756 1757 /* RTM Adaptive locks */ 1758 1759 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1760 1761 #include <immintrin.h> 1762 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1763 1764 #else 1765 1766 // Values from the status register after failed speculation. 1767 #define _XBEGIN_STARTED (~0u) 1768 #define _XABORT_EXPLICIT (1 << 0) 1769 #define _XABORT_RETRY (1 << 1) 1770 #define _XABORT_CONFLICT (1 << 2) 1771 #define _XABORT_CAPACITY (1 << 3) 1772 #define _XABORT_DEBUG (1 << 4) 1773 #define _XABORT_NESTED (1 << 5) 1774 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1775 1776 // Aborts for which it's worth trying again immediately 1777 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1778 1779 #define STRINGIZE_INTERNAL(arg) #arg 1780 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1781 1782 // Access to RTM instructions 1783 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1784 an abort. This is the same definition as the compiler intrinsic that will be 1785 supported at some point. */ 1786 static __inline int _xbegin() { 1787 int res = -1; 1788 1789 #if KMP_OS_WINDOWS 1790 #if KMP_ARCH_X86_64 1791 _asm { 1792 _emit 0xC7 1793 _emit 0xF8 1794 _emit 2 1795 _emit 0 1796 _emit 0 1797 _emit 0 1798 jmp L2 1799 mov res, eax 1800 L2: 1801 } 1802 #else /* IA32 */ 1803 _asm { 1804 _emit 0xC7 1805 _emit 0xF8 1806 _emit 2 1807 _emit 0 1808 _emit 0 1809 _emit 0 1810 jmp L2 1811 mov res, eax 1812 L2: 1813 } 1814 #endif // KMP_ARCH_X86_64 1815 #else 1816 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1817 returned in %eax(%rax) on abort. Other register values are restored, so 1818 don't need to be killed. 1819 1820 We must also mark 'res' as an input and an output, since otherwise 1821 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1822 the successful (i.e., non-abort) path. */ 1823 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1824 " .long 1f-1b-6\n" 1825 " jmp 2f\n" 1826 "1: movl %%eax,%0\n" 1827 "2:" 1828 : "+r"(res)::"memory", "%eax"); 1829 #endif // KMP_OS_WINDOWS 1830 return res; 1831 } 1832 1833 /* Transaction end */ 1834 static __inline void _xend() { 1835 #if KMP_OS_WINDOWS 1836 __asm { 1837 _emit 0x0f 1838 _emit 0x01 1839 _emit 0xd5 1840 } 1841 #else 1842 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1843 #endif 1844 } 1845 1846 /* This is a macro, the argument must be a single byte constant which can be 1847 evaluated by the inline assembler, since it is emitted as a byte into the 1848 assembly code. */ 1849 // clang-format off 1850 #if KMP_OS_WINDOWS 1851 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1852 #else 1853 #define _xabort(ARG) \ 1854 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1855 #endif 1856 // clang-format on 1857 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1858 1859 // Statistics is collected for testing purpose 1860 #if KMP_DEBUG_ADAPTIVE_LOCKS 1861 1862 // We accumulate speculative lock statistics when the lock is destroyed. We 1863 // keep locks that haven't been destroyed in the liveLocks list so that we can 1864 // grab their statistics too. 1865 static kmp_adaptive_lock_statistics_t destroyedStats; 1866 1867 // To hold the list of live locks. 1868 static kmp_adaptive_lock_info_t liveLocks; 1869 1870 // A lock so we can safely update the list of locks. 1871 static kmp_bootstrap_lock_t chain_lock; 1872 1873 // Initialize the list of stats. 1874 void __kmp_init_speculative_stats() { 1875 kmp_adaptive_lock_info_t *lck = &liveLocks; 1876 1877 memset((void *)&(lck->stats), 0, sizeof(lck->stats)); 1878 lck->stats.next = lck; 1879 lck->stats.prev = lck; 1880 1881 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1882 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1883 1884 __kmp_init_bootstrap_lock(&chain_lock); 1885 } 1886 1887 // Insert the lock into the circular list 1888 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1889 __kmp_acquire_bootstrap_lock(&chain_lock); 1890 1891 lck->stats.next = liveLocks.stats.next; 1892 lck->stats.prev = &liveLocks; 1893 1894 liveLocks.stats.next = lck; 1895 lck->stats.next->stats.prev = lck; 1896 1897 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1898 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1899 1900 __kmp_release_bootstrap_lock(&chain_lock); 1901 } 1902 1903 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1904 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1905 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1906 1907 kmp_adaptive_lock_info_t *n = lck->stats.next; 1908 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1909 1910 n->stats.prev = p; 1911 p->stats.next = n; 1912 } 1913 1914 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1915 memset((void *)&lck->stats, 0, sizeof(lck->stats)); 1916 __kmp_remember_lock(lck); 1917 } 1918 1919 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1920 kmp_adaptive_lock_info_t *lck) { 1921 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1922 1923 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1924 t->successfulSpeculations += s->successfulSpeculations; 1925 t->hardFailedSpeculations += s->hardFailedSpeculations; 1926 t->softFailedSpeculations += s->softFailedSpeculations; 1927 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1928 t->lemmingYields += s->lemmingYields; 1929 } 1930 1931 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1932 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 1933 1934 __kmp_acquire_bootstrap_lock(&chain_lock); 1935 1936 __kmp_add_stats(&destroyedStats, lck); 1937 __kmp_forget_lock(lck); 1938 1939 __kmp_release_bootstrap_lock(&chain_lock); 1940 } 1941 1942 static float percent(kmp_uint32 count, kmp_uint32 total) { 1943 return (total == 0) ? 0.0 : (100.0 * count) / total; 1944 } 1945 1946 static FILE *__kmp_open_stats_file() { 1947 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1948 return stdout; 1949 1950 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1951 char buffer[buffLen]; 1952 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1953 (kmp_int32)getpid()); 1954 FILE *result = fopen(&buffer[0], "w"); 1955 1956 // Maybe we should issue a warning here... 1957 return result ? result : stdout; 1958 } 1959 1960 void __kmp_print_speculative_stats() { 1961 if (__kmp_user_lock_kind != lk_adaptive) 1962 return; 1963 1964 FILE *statsFile = __kmp_open_stats_file(); 1965 1966 kmp_adaptive_lock_statistics_t total = destroyedStats; 1967 kmp_adaptive_lock_info_t *lck; 1968 1969 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1970 __kmp_add_stats(&total, lck); 1971 } 1972 kmp_adaptive_lock_statistics_t *t = &total; 1973 kmp_uint32 totalSections = 1974 t->nonSpeculativeAcquires + t->successfulSpeculations; 1975 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1976 t->hardFailedSpeculations + 1977 t->softFailedSpeculations; 1978 1979 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1980 fprintf(statsFile, " Lock parameters: \n" 1981 " max_soft_retries : %10d\n" 1982 " max_badness : %10d\n", 1983 __kmp_adaptive_backoff_params.max_soft_retries, 1984 __kmp_adaptive_backoff_params.max_badness); 1985 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1986 t->nonSpeculativeAcquireAttempts); 1987 fprintf(statsFile, " Total critical sections : %10d\n", 1988 totalSections); 1989 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1990 t->successfulSpeculations, 1991 percent(t->successfulSpeculations, totalSections)); 1992 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1993 t->nonSpeculativeAcquires, 1994 percent(t->nonSpeculativeAcquires, totalSections)); 1995 fprintf(statsFile, " Lemming yields : %10d\n\n", 1996 t->lemmingYields); 1997 1998 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 1999 totalSpeculations); 2000 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 2001 t->successfulSpeculations, 2002 percent(t->successfulSpeculations, totalSpeculations)); 2003 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 2004 t->softFailedSpeculations, 2005 percent(t->softFailedSpeculations, totalSpeculations)); 2006 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 2007 t->hardFailedSpeculations, 2008 percent(t->hardFailedSpeculations, totalSpeculations)); 2009 2010 if (statsFile != stdout) 2011 fclose(statsFile); 2012 } 2013 2014 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 2015 #else 2016 #define KMP_INC_STAT(lck, stat) 2017 2018 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2019 2020 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 2021 // It is enough to check that the head_id is zero. 2022 // We don't also need to check the tail. 2023 bool res = lck->lk.head_id == 0; 2024 2025 // We need a fence here, since we must ensure that no memory operations 2026 // from later in this thread float above that read. 2027 #if KMP_COMPILER_ICC 2028 _mm_mfence(); 2029 #else 2030 __sync_synchronize(); 2031 #endif 2032 2033 return res; 2034 } 2035 2036 // Functions for manipulating the badness 2037 static __inline void 2038 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 2039 // Reset the badness to zero so we eagerly try to speculate again 2040 lck->lk.adaptive.badness = 0; 2041 KMP_INC_STAT(lck, successfulSpeculations); 2042 } 2043 2044 // Create a bit mask with one more set bit. 2045 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2046 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2047 if (newBadness > lck->lk.adaptive.max_badness) { 2048 return; 2049 } else { 2050 lck->lk.adaptive.badness = newBadness; 2051 } 2052 } 2053 2054 // Check whether speculation should be attempted. 2055 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2056 kmp_int32 gtid) { 2057 kmp_uint32 badness = lck->lk.adaptive.badness; 2058 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2059 int res = (attempts & badness) == 0; 2060 return res; 2061 } 2062 2063 // Attempt to acquire only the speculative lock. 2064 // Does not back off to the non-speculative lock. 2065 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2066 kmp_int32 gtid) { 2067 int retries = lck->lk.adaptive.max_soft_retries; 2068 2069 // We don't explicitly count the start of speculation, rather we record the 2070 // results (success, hard fail, soft fail). The sum of all of those is the 2071 // total number of times we started speculation since all speculations must 2072 // end one of those ways. 2073 do { 2074 kmp_uint32 status = _xbegin(); 2075 // Switch this in to disable actual speculation but exercise at least some 2076 // of the rest of the code. Useful for debugging... 2077 // kmp_uint32 status = _XABORT_NESTED; 2078 2079 if (status == _XBEGIN_STARTED) { 2080 /* We have successfully started speculation. Check that no-one acquired 2081 the lock for real between when we last looked and now. This also gets 2082 the lock cache line into our read-set, which we need so that we'll 2083 abort if anyone later claims it for real. */ 2084 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2085 // Lock is now visibly acquired, so someone beat us to it. Abort the 2086 // transaction so we'll restart from _xbegin with the failure status. 2087 _xabort(0x01); 2088 KMP_ASSERT2(0, "should not get here"); 2089 } 2090 return 1; // Lock has been acquired (speculatively) 2091 } else { 2092 // We have aborted, update the statistics 2093 if (status & SOFT_ABORT_MASK) { 2094 KMP_INC_STAT(lck, softFailedSpeculations); 2095 // and loop round to retry. 2096 } else { 2097 KMP_INC_STAT(lck, hardFailedSpeculations); 2098 // Give up if we had a hard failure. 2099 break; 2100 } 2101 } 2102 } while (retries--); // Loop while we have retries, and didn't fail hard. 2103 2104 // Either we had a hard failure or we didn't succeed softly after 2105 // the full set of attempts, so back off the badness. 2106 __kmp_step_badness(lck); 2107 return 0; 2108 } 2109 2110 // Attempt to acquire the speculative lock, or back off to the non-speculative 2111 // one if the speculative lock cannot be acquired. 2112 // We can succeed speculatively, non-speculatively, or fail. 2113 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2114 // First try to acquire the lock speculatively 2115 if (__kmp_should_speculate(lck, gtid) && 2116 __kmp_test_adaptive_lock_only(lck, gtid)) 2117 return 1; 2118 2119 // Speculative acquisition failed, so try to acquire it non-speculatively. 2120 // Count the non-speculative acquire attempt 2121 lck->lk.adaptive.acquire_attempts++; 2122 2123 // Use base, non-speculative lock. 2124 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2125 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2126 return 1; // Lock is acquired (non-speculatively) 2127 } else { 2128 return 0; // Failed to acquire the lock, it's already visibly locked. 2129 } 2130 } 2131 2132 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2133 kmp_int32 gtid) { 2134 char const *const func = "omp_test_lock"; 2135 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2136 KMP_FATAL(LockIsUninitialized, func); 2137 } 2138 2139 int retval = __kmp_test_adaptive_lock(lck, gtid); 2140 2141 if (retval) { 2142 lck->lk.qlk.owner_id = gtid + 1; 2143 } 2144 return retval; 2145 } 2146 2147 // Block until we can acquire a speculative, adaptive lock. We check whether we 2148 // should be trying to speculate. If we should be, we check the real lock to see 2149 // if it is free, and, if not, pause without attempting to acquire it until it 2150 // is. Then we try the speculative acquire. This means that although we suffer 2151 // from lemmings a little (because all we can't acquire the lock speculatively 2152 // until the queue of threads waiting has cleared), we don't get into a state 2153 // where we can never acquire the lock speculatively (because we force the queue 2154 // to clear by preventing new arrivals from entering the queue). This does mean 2155 // that when we're trying to break lemmings, the lock is no longer fair. However 2156 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2157 // problem. 2158 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2159 kmp_int32 gtid) { 2160 if (__kmp_should_speculate(lck, gtid)) { 2161 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2162 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2163 return; 2164 // We tried speculation and failed, so give up. 2165 } else { 2166 // We can't try speculation until the lock is free, so we pause here 2167 // (without suspending on the queueing lock, to allow it to drain, then 2168 // try again. All other threads will also see the same result for 2169 // shouldSpeculate, so will be doing the same if they try to claim the 2170 // lock from now on. 2171 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2172 KMP_INC_STAT(lck, lemmingYields); 2173 __kmp_yield(TRUE); 2174 } 2175 2176 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2177 return; 2178 } 2179 } 2180 2181 // Speculative acquisition failed, so acquire it non-speculatively. 2182 // Count the non-speculative acquire attempt 2183 lck->lk.adaptive.acquire_attempts++; 2184 2185 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2186 // We have acquired the base lock, so count that. 2187 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2188 ANNOTATE_QUEUING_ACQUIRED(lck); 2189 } 2190 2191 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2192 kmp_int32 gtid) { 2193 char const *const func = "omp_set_lock"; 2194 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2195 KMP_FATAL(LockIsUninitialized, func); 2196 } 2197 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2198 KMP_FATAL(LockIsAlreadyOwned, func); 2199 } 2200 2201 __kmp_acquire_adaptive_lock(lck, gtid); 2202 2203 lck->lk.qlk.owner_id = gtid + 1; 2204 } 2205 2206 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2207 kmp_int32 gtid) { 2208 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2209 lck))) { // If the lock doesn't look claimed we must be speculating. 2210 // (Or the user's code is buggy and they're releasing without locking; 2211 // if we had XTEST we'd be able to check that case...) 2212 _xend(); // Exit speculation 2213 __kmp_update_badness_after_success(lck); 2214 } else { // Since the lock *is* visibly locked we're not speculating, 2215 // so should use the underlying lock's release scheme. 2216 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2217 } 2218 return KMP_LOCK_RELEASED; 2219 } 2220 2221 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2222 kmp_int32 gtid) { 2223 char const *const func = "omp_unset_lock"; 2224 KMP_MB(); /* in case another processor initialized lock */ 2225 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2226 KMP_FATAL(LockIsUninitialized, func); 2227 } 2228 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2229 KMP_FATAL(LockUnsettingFree, func); 2230 } 2231 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2232 KMP_FATAL(LockUnsettingSetByAnother, func); 2233 } 2234 lck->lk.qlk.owner_id = 0; 2235 __kmp_release_adaptive_lock(lck, gtid); 2236 return KMP_LOCK_RELEASED; 2237 } 2238 2239 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2240 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2241 lck->lk.adaptive.badness = 0; 2242 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2243 lck->lk.adaptive.max_soft_retries = 2244 __kmp_adaptive_backoff_params.max_soft_retries; 2245 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2246 #if KMP_DEBUG_ADAPTIVE_LOCKS 2247 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2248 #endif 2249 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2250 } 2251 2252 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2253 __kmp_init_adaptive_lock(lck); 2254 } 2255 2256 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2257 #if KMP_DEBUG_ADAPTIVE_LOCKS 2258 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2259 #endif 2260 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2261 // Nothing needed for the speculative part. 2262 } 2263 2264 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2265 char const *const func = "omp_destroy_lock"; 2266 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2267 KMP_FATAL(LockIsUninitialized, func); 2268 } 2269 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2270 KMP_FATAL(LockStillOwned, func); 2271 } 2272 __kmp_destroy_adaptive_lock(lck); 2273 } 2274 2275 #endif // KMP_USE_ADAPTIVE_LOCKS 2276 2277 /* ------------------------------------------------------------------------ */ 2278 /* DRDPA ticket locks */ 2279 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2280 2281 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2282 return TCR_4(lck->lk.owner_id) - 1; 2283 } 2284 2285 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2286 return lck->lk.depth_locked != -1; 2287 } 2288 2289 __forceinline static int 2290 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2291 kmp_uint64 ticket = 2292 KMP_TEST_THEN_INC64(RCAST(volatile kmp_int64 *, &lck->lk.next_ticket)); 2293 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2294 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2295 2296 #ifdef USE_LOCK_PROFILE 2297 if (TCR_8(polls[ticket & mask].poll) != ticket) 2298 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2299 /* else __kmp_printf( "." );*/ 2300 #endif /* USE_LOCK_PROFILE */ 2301 2302 // Now spin-wait, but reload the polls pointer and mask, in case the 2303 // polling area has been reconfigured. Unless it is reconfigured, the 2304 // reloads stay in L1 cache and are cheap. 2305 // 2306 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2307 // 2308 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2309 // and poll to be re-read every spin iteration. 2310 kmp_uint32 spins; 2311 2312 KMP_FSYNC_PREPARE(lck); 2313 KMP_INIT_YIELD(spins); 2314 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load 2315 // If we are oversubscribed, 2316 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2317 // CPU Pause is in the macros for yield. 2318 // 2319 KMP_YIELD(TCR_4(__kmp_nth) > 2320 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2321 KMP_YIELD_SPIN(spins); 2322 2323 // Re-read the mask and the poll pointer from the lock structure. 2324 // 2325 // Make certain that "mask" is read before "polls" !!! 2326 // 2327 // If another thread picks reconfigures the polling area and updates their 2328 // values, and we get the new value of mask and the old polls pointer, we 2329 // could access memory beyond the end of the old polling area. 2330 mask = TCR_8(lck->lk.mask); // volatile load 2331 polls = lck->lk.polls; // volatile load 2332 } 2333 2334 // Critical section starts here 2335 KMP_FSYNC_ACQUIRED(lck); 2336 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2337 ticket, lck)); 2338 lck->lk.now_serving = ticket; // non-volatile store 2339 2340 // Deallocate a garbage polling area if we know that we are the last 2341 // thread that could possibly access it. 2342 // 2343 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2344 // ticket. 2345 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2346 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls)); 2347 lck->lk.old_polls = NULL; 2348 lck->lk.cleanup_ticket = 0; 2349 } 2350 2351 // Check to see if we should reconfigure the polling area. 2352 // If there is still a garbage polling area to be deallocated from a 2353 // previous reconfiguration, let a later thread reconfigure it. 2354 if (lck->lk.old_polls == NULL) { 2355 bool reconfigure = false; 2356 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2357 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2358 2359 if (TCR_4(__kmp_nth) > 2360 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2361 // We are in oversubscription mode. Contract the polling area 2362 // down to a single location, if that hasn't been done already. 2363 if (num_polls > 1) { 2364 reconfigure = true; 2365 num_polls = TCR_4(lck->lk.num_polls); 2366 mask = 0; 2367 num_polls = 1; 2368 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2369 __kmp_allocate(num_polls * sizeof(*polls)); 2370 polls[0].poll = ticket; 2371 } 2372 } else { 2373 // We are in under/fully subscribed mode. Check the number of 2374 // threads waiting on the lock. The size of the polling area 2375 // should be at least the number of threads waiting. 2376 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2377 if (num_waiting > num_polls) { 2378 kmp_uint32 old_num_polls = num_polls; 2379 reconfigure = true; 2380 do { 2381 mask = (mask << 1) | 1; 2382 num_polls *= 2; 2383 } while (num_polls <= num_waiting); 2384 2385 // Allocate the new polling area, and copy the relevant portion 2386 // of the old polling area to the new area. __kmp_allocate() 2387 // zeroes the memory it allocates, and most of the old area is 2388 // just zero padding, so we only copy the release counters. 2389 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2390 __kmp_allocate(num_polls * sizeof(*polls)); 2391 kmp_uint32 i; 2392 for (i = 0; i < old_num_polls; i++) { 2393 polls[i].poll = old_polls[i].poll; 2394 } 2395 } 2396 } 2397 2398 if (reconfigure) { 2399 // Now write the updated fields back to the lock structure. 2400 // 2401 // Make certain that "polls" is written before "mask" !!! 2402 // 2403 // If another thread picks up the new value of mask and the old polls 2404 // pointer , it could access memory beyond the end of the old polling 2405 // area. 2406 // 2407 // On x86, we need memory fences. 2408 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2409 "lock %p to %d polls\n", 2410 ticket, lck, num_polls)); 2411 2412 lck->lk.old_polls = old_polls; // non-volatile store 2413 lck->lk.polls = polls; // volatile store 2414 2415 KMP_MB(); 2416 2417 lck->lk.num_polls = num_polls; // non-volatile store 2418 lck->lk.mask = mask; // volatile store 2419 2420 KMP_MB(); 2421 2422 // Only after the new polling area and mask have been flushed 2423 // to main memory can we update the cleanup ticket field. 2424 // 2425 // volatile load / non-volatile store 2426 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2427 } 2428 } 2429 return KMP_LOCK_ACQUIRED_FIRST; 2430 } 2431 2432 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2433 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2434 ANNOTATE_DRDPA_ACQUIRED(lck); 2435 return retval; 2436 } 2437 2438 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2439 kmp_int32 gtid) { 2440 char const *const func = "omp_set_lock"; 2441 if (lck->lk.initialized != lck) { 2442 KMP_FATAL(LockIsUninitialized, func); 2443 } 2444 if (__kmp_is_drdpa_lock_nestable(lck)) { 2445 KMP_FATAL(LockNestableUsedAsSimple, func); 2446 } 2447 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2448 KMP_FATAL(LockIsAlreadyOwned, func); 2449 } 2450 2451 __kmp_acquire_drdpa_lock(lck, gtid); 2452 2453 lck->lk.owner_id = gtid + 1; 2454 return KMP_LOCK_ACQUIRED_FIRST; 2455 } 2456 2457 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2458 // First get a ticket, then read the polls pointer and the mask. 2459 // The polls pointer must be read before the mask!!! (See above) 2460 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2461 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2462 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2463 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2464 kmp_uint64 next_ticket = ticket + 1; 2465 if (KMP_COMPARE_AND_STORE_ACQ64(&lck->lk.next_ticket, ticket, 2466 next_ticket)) { 2467 KMP_FSYNC_ACQUIRED(lck); 2468 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2469 ticket, lck)); 2470 lck->lk.now_serving = ticket; // non-volatile store 2471 2472 // Since no threads are waiting, there is no possibility that we would 2473 // want to reconfigure the polling area. We might have the cleanup ticket 2474 // value (which says that it is now safe to deallocate old_polls), but 2475 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2476 // routine isn't supposed to block, and we would risk blocks if we called 2477 // __kmp_free() to do the deallocation. 2478 return TRUE; 2479 } 2480 } 2481 return FALSE; 2482 } 2483 2484 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2485 kmp_int32 gtid) { 2486 char const *const func = "omp_test_lock"; 2487 if (lck->lk.initialized != lck) { 2488 KMP_FATAL(LockIsUninitialized, func); 2489 } 2490 if (__kmp_is_drdpa_lock_nestable(lck)) { 2491 KMP_FATAL(LockNestableUsedAsSimple, func); 2492 } 2493 2494 int retval = __kmp_test_drdpa_lock(lck, gtid); 2495 2496 if (retval) { 2497 lck->lk.owner_id = gtid + 1; 2498 } 2499 return retval; 2500 } 2501 2502 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2503 // Read the ticket value from the lock data struct, then the polls pointer and 2504 // the mask. The polls pointer must be read before the mask!!! (See above) 2505 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2506 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; 2507 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2508 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2509 ticket - 1, lck)); 2510 KMP_FSYNC_RELEASING(lck); 2511 ANNOTATE_DRDPA_RELEASED(lck); 2512 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2513 return KMP_LOCK_RELEASED; 2514 } 2515 2516 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2517 kmp_int32 gtid) { 2518 char const *const func = "omp_unset_lock"; 2519 KMP_MB(); /* in case another processor initialized lock */ 2520 if (lck->lk.initialized != lck) { 2521 KMP_FATAL(LockIsUninitialized, func); 2522 } 2523 if (__kmp_is_drdpa_lock_nestable(lck)) { 2524 KMP_FATAL(LockNestableUsedAsSimple, func); 2525 } 2526 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2527 KMP_FATAL(LockUnsettingFree, func); 2528 } 2529 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2530 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2531 KMP_FATAL(LockUnsettingSetByAnother, func); 2532 } 2533 lck->lk.owner_id = 0; 2534 return __kmp_release_drdpa_lock(lck, gtid); 2535 } 2536 2537 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2538 lck->lk.location = NULL; 2539 lck->lk.mask = 0; 2540 lck->lk.num_polls = 1; 2541 lck->lk.polls = 2542 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)__kmp_allocate( 2543 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2544 lck->lk.cleanup_ticket = 0; 2545 lck->lk.old_polls = NULL; 2546 lck->lk.next_ticket = 0; 2547 lck->lk.now_serving = 0; 2548 lck->lk.owner_id = 0; // no thread owns the lock. 2549 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2550 lck->lk.initialized = lck; 2551 2552 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2553 } 2554 2555 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2556 __kmp_init_drdpa_lock(lck); 2557 } 2558 2559 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2560 lck->lk.initialized = NULL; 2561 lck->lk.location = NULL; 2562 if (lck->lk.polls != NULL) { 2563 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.polls)); 2564 lck->lk.polls = NULL; 2565 } 2566 if (lck->lk.old_polls != NULL) { 2567 __kmp_free(CCAST(kmp_base_drdpa_lock::kmp_lock_poll *, lck->lk.old_polls)); 2568 lck->lk.old_polls = NULL; 2569 } 2570 lck->lk.mask = 0; 2571 lck->lk.num_polls = 0; 2572 lck->lk.cleanup_ticket = 0; 2573 lck->lk.next_ticket = 0; 2574 lck->lk.now_serving = 0; 2575 lck->lk.owner_id = 0; 2576 lck->lk.depth_locked = -1; 2577 } 2578 2579 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2580 char const *const func = "omp_destroy_lock"; 2581 if (lck->lk.initialized != lck) { 2582 KMP_FATAL(LockIsUninitialized, func); 2583 } 2584 if (__kmp_is_drdpa_lock_nestable(lck)) { 2585 KMP_FATAL(LockNestableUsedAsSimple, func); 2586 } 2587 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2588 KMP_FATAL(LockStillOwned, func); 2589 } 2590 __kmp_destroy_drdpa_lock(lck); 2591 } 2592 2593 // nested drdpa ticket locks 2594 2595 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2596 KMP_DEBUG_ASSERT(gtid >= 0); 2597 2598 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2599 lck->lk.depth_locked += 1; 2600 return KMP_LOCK_ACQUIRED_NEXT; 2601 } else { 2602 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2603 ANNOTATE_DRDPA_ACQUIRED(lck); 2604 KMP_MB(); 2605 lck->lk.depth_locked = 1; 2606 KMP_MB(); 2607 lck->lk.owner_id = gtid + 1; 2608 return KMP_LOCK_ACQUIRED_FIRST; 2609 } 2610 } 2611 2612 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2613 kmp_int32 gtid) { 2614 char const *const func = "omp_set_nest_lock"; 2615 if (lck->lk.initialized != lck) { 2616 KMP_FATAL(LockIsUninitialized, func); 2617 } 2618 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2619 KMP_FATAL(LockSimpleUsedAsNestable, func); 2620 } 2621 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2622 } 2623 2624 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2625 int retval; 2626 2627 KMP_DEBUG_ASSERT(gtid >= 0); 2628 2629 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2630 retval = ++lck->lk.depth_locked; 2631 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2632 retval = 0; 2633 } else { 2634 KMP_MB(); 2635 retval = lck->lk.depth_locked = 1; 2636 KMP_MB(); 2637 lck->lk.owner_id = gtid + 1; 2638 } 2639 return retval; 2640 } 2641 2642 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2643 kmp_int32 gtid) { 2644 char const *const func = "omp_test_nest_lock"; 2645 if (lck->lk.initialized != lck) { 2646 KMP_FATAL(LockIsUninitialized, func); 2647 } 2648 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2649 KMP_FATAL(LockSimpleUsedAsNestable, func); 2650 } 2651 return __kmp_test_nested_drdpa_lock(lck, gtid); 2652 } 2653 2654 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2655 KMP_DEBUG_ASSERT(gtid >= 0); 2656 2657 KMP_MB(); 2658 if (--(lck->lk.depth_locked) == 0) { 2659 KMP_MB(); 2660 lck->lk.owner_id = 0; 2661 __kmp_release_drdpa_lock(lck, gtid); 2662 return KMP_LOCK_RELEASED; 2663 } 2664 return KMP_LOCK_STILL_HELD; 2665 } 2666 2667 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2668 kmp_int32 gtid) { 2669 char const *const func = "omp_unset_nest_lock"; 2670 KMP_MB(); /* in case another processor initialized lock */ 2671 if (lck->lk.initialized != lck) { 2672 KMP_FATAL(LockIsUninitialized, func); 2673 } 2674 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2675 KMP_FATAL(LockSimpleUsedAsNestable, func); 2676 } 2677 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2678 KMP_FATAL(LockUnsettingFree, func); 2679 } 2680 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2681 KMP_FATAL(LockUnsettingSetByAnother, func); 2682 } 2683 return __kmp_release_nested_drdpa_lock(lck, gtid); 2684 } 2685 2686 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2687 __kmp_init_drdpa_lock(lck); 2688 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2689 } 2690 2691 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2692 __kmp_init_nested_drdpa_lock(lck); 2693 } 2694 2695 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2696 __kmp_destroy_drdpa_lock(lck); 2697 lck->lk.depth_locked = 0; 2698 } 2699 2700 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2701 char const *const func = "omp_destroy_nest_lock"; 2702 if (lck->lk.initialized != lck) { 2703 KMP_FATAL(LockIsUninitialized, func); 2704 } 2705 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2706 KMP_FATAL(LockSimpleUsedAsNestable, func); 2707 } 2708 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2709 KMP_FATAL(LockStillOwned, func); 2710 } 2711 __kmp_destroy_nested_drdpa_lock(lck); 2712 } 2713 2714 // access functions to fields which don't exist for all lock kinds. 2715 2716 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 2717 return lck == lck->lk.initialized; 2718 } 2719 2720 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2721 return lck->lk.location; 2722 } 2723 2724 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2725 const ident_t *loc) { 2726 lck->lk.location = loc; 2727 } 2728 2729 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2730 return lck->lk.flags; 2731 } 2732 2733 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2734 kmp_lock_flags_t flags) { 2735 lck->lk.flags = flags; 2736 } 2737 2738 // Time stamp counter 2739 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2740 #define __kmp_tsc() __kmp_hardware_timestamp() 2741 // Runtime's default backoff parameters 2742 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2743 #else 2744 // Use nanoseconds for other platforms 2745 extern kmp_uint64 __kmp_now_nsec(); 2746 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2747 #define __kmp_tsc() __kmp_now_nsec() 2748 #endif 2749 2750 // A useful predicate for dealing with timestamps that may wrap. 2751 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2752 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2753 // Times where going clockwise is less distance than going anti-clockwise 2754 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2755 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2756 // signed(b) = 0 captures the actual difference 2757 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2758 return ((kmp_int64)b - (kmp_int64)a) > 0; 2759 } 2760 2761 // Truncated binary exponential backoff function 2762 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2763 // We could flatten this loop, but making it a nested loop gives better result 2764 kmp_uint32 i; 2765 for (i = boff->step; i > 0; i--) { 2766 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2767 do { 2768 KMP_CPU_PAUSE(); 2769 } while (before(__kmp_tsc(), goal)); 2770 } 2771 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2772 } 2773 2774 #if KMP_USE_DYNAMIC_LOCK 2775 2776 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2777 // lock word. 2778 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2779 kmp_dyna_lockseq_t seq) { 2780 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2781 KA_TRACE( 2782 20, 2783 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2784 } 2785 2786 #if KMP_USE_TSX 2787 2788 // HLE lock functions - imported from the testbed runtime. 2789 #define HLE_ACQUIRE ".byte 0xf2;" 2790 #define HLE_RELEASE ".byte 0xf3;" 2791 2792 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2793 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2794 return v; 2795 } 2796 2797 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2798 2799 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2800 // Use gtid for KMP_LOCK_BUSY if necessary 2801 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2802 int delay = 1; 2803 do { 2804 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2805 for (int i = delay; i != 0; --i) 2806 KMP_CPU_PAUSE(); 2807 delay = ((delay << 1) | 1) & 7; 2808 } 2809 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2810 } 2811 } 2812 2813 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2814 kmp_int32 gtid) { 2815 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2816 } 2817 2818 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2819 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2820 : "=m"(*lck) 2821 : "r"(KMP_LOCK_FREE(hle)) 2822 : "memory"); 2823 return KMP_LOCK_RELEASED; 2824 } 2825 2826 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2827 kmp_int32 gtid) { 2828 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2829 } 2830 2831 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2832 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2833 } 2834 2835 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2836 kmp_int32 gtid) { 2837 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2838 } 2839 2840 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2841 __kmp_init_queuing_lock(lck); 2842 } 2843 2844 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2845 __kmp_destroy_queuing_lock(lck); 2846 } 2847 2848 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2849 unsigned retries = 3, status; 2850 do { 2851 status = _xbegin(); 2852 if (status == _XBEGIN_STARTED) { 2853 if (__kmp_is_unlocked_queuing_lock(lck)) 2854 return; 2855 _xabort(0xff); 2856 } 2857 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2858 // Wait until lock becomes free 2859 while (!__kmp_is_unlocked_queuing_lock(lck)) 2860 __kmp_yield(TRUE); 2861 } else if (!(status & _XABORT_RETRY)) 2862 break; 2863 } while (retries--); 2864 2865 // Fall-back non-speculative lock (xchg) 2866 __kmp_acquire_queuing_lock(lck, gtid); 2867 } 2868 2869 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2870 kmp_int32 gtid) { 2871 __kmp_acquire_rtm_lock(lck, gtid); 2872 } 2873 2874 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2875 if (__kmp_is_unlocked_queuing_lock(lck)) { 2876 // Releasing from speculation 2877 _xend(); 2878 } else { 2879 // Releasing from a real lock 2880 __kmp_release_queuing_lock(lck, gtid); 2881 } 2882 return KMP_LOCK_RELEASED; 2883 } 2884 2885 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2886 kmp_int32 gtid) { 2887 return __kmp_release_rtm_lock(lck, gtid); 2888 } 2889 2890 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2891 unsigned retries = 3, status; 2892 do { 2893 status = _xbegin(); 2894 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2895 return 1; 2896 } 2897 if (!(status & _XABORT_RETRY)) 2898 break; 2899 } while (retries--); 2900 2901 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2902 } 2903 2904 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2905 kmp_int32 gtid) { 2906 return __kmp_test_rtm_lock(lck, gtid); 2907 } 2908 2909 #endif // KMP_USE_TSX 2910 2911 // Entry functions for indirect locks (first element of direct lock jump tables) 2912 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2913 kmp_dyna_lockseq_t tag); 2914 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2915 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2916 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2917 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2918 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2919 kmp_int32); 2920 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2921 kmp_int32); 2922 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2923 kmp_int32); 2924 2925 // Jump tables for the indirect lock functions 2926 // Only fill in the odd entries, that avoids the need to shift out the low bit 2927 2928 // init functions 2929 #define expand(l, op) 0, __kmp_init_direct_lock, 2930 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2931 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2932 #undef expand 2933 2934 // destroy functions 2935 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2936 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = { 2937 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2938 #undef expand 2939 2940 // set/acquire functions 2941 #define expand(l, op) \ 2942 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2943 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2944 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2945 #undef expand 2946 #define expand(l, op) \ 2947 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2948 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2949 __kmp_set_indirect_lock_with_checks, 0, 2950 KMP_FOREACH_D_LOCK(expand, acquire)}; 2951 #undef expand 2952 2953 // unset/release and test functions 2954 #define expand(l, op) \ 2955 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2956 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2957 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2958 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2959 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2960 #undef expand 2961 #define expand(l, op) \ 2962 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2963 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2964 __kmp_unset_indirect_lock_with_checks, 0, 2965 KMP_FOREACH_D_LOCK(expand, release)}; 2966 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2967 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2968 #undef expand 2969 2970 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2971 int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2972 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2973 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2974 2975 // Jump tables for the indirect lock functions 2976 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2977 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2978 KMP_FOREACH_I_LOCK(expand, init)}; 2979 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { 2980 KMP_FOREACH_I_LOCK(expand, destroy)}; 2981 #undef expand 2982 2983 // set/acquire functions 2984 #define expand(l, op) \ 2985 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2986 static int (*indirect_set[])(kmp_user_lock_p, 2987 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 2988 #undef expand 2989 #define expand(l, op) \ 2990 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2991 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2992 KMP_FOREACH_I_LOCK(expand, acquire)}; 2993 #undef expand 2994 2995 // unset/release and test functions 2996 #define expand(l, op) \ 2997 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2998 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 2999 KMP_FOREACH_I_LOCK(expand, release)}; 3000 static int (*indirect_test[])(kmp_user_lock_p, 3001 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3002 #undef expand 3003 #define expand(l, op) \ 3004 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3005 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3006 KMP_FOREACH_I_LOCK(expand, release)}; 3007 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3008 KMP_FOREACH_I_LOCK(expand, test)}; 3009 #undef expand 3010 3011 // Exposes only one jump tables (*lock or *lock_with_checks). 3012 int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3013 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3014 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3015 3016 // Lock index table. 3017 kmp_indirect_lock_table_t __kmp_i_lock_table; 3018 3019 // Size of indirect locks. 3020 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3021 3022 // Jump tables for lock accessor/modifier. 3023 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3024 const ident_t *) = {0}; 3025 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3026 kmp_lock_flags_t) = {0}; 3027 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3028 kmp_user_lock_p) = {0}; 3029 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3030 kmp_user_lock_p) = {0}; 3031 3032 // Use different lock pools for different lock types. 3033 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3034 3035 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3036 // the indirect lock table holds the address and type of the allocated indrect 3037 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3038 // full. A destroyed indirect lock object is returned to the reusable pool of 3039 // locks, unique to each lock type. 3040 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3041 kmp_int32 gtid, 3042 kmp_indirect_locktag_t tag) { 3043 kmp_indirect_lock_t *lck; 3044 kmp_lock_index_t idx; 3045 3046 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3047 3048 if (__kmp_indirect_lock_pool[tag] != NULL) { 3049 // Reuse the allocated and destroyed lock object 3050 lck = __kmp_indirect_lock_pool[tag]; 3051 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3052 idx = lck->lock->pool.index; 3053 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3054 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3055 lck)); 3056 } else { 3057 idx = __kmp_i_lock_table.next; 3058 // Check capacity and double the size if it is full 3059 if (idx == __kmp_i_lock_table.size) { 3060 // Double up the space for block pointers 3061 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3062 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 3063 2 * row * sizeof(kmp_indirect_lock_t *)); 3064 KMP_MEMCPY(new_table, __kmp_i_lock_table.table, 3065 row * sizeof(kmp_indirect_lock_t *)); 3066 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3067 __kmp_i_lock_table.table = new_table; 3068 __kmp_free(old_table); 3069 // Allocate new objects in the new blocks 3070 for (int i = row; i < 2 * row; ++i) 3071 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3072 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3073 __kmp_i_lock_table.size = 2 * idx; 3074 } 3075 __kmp_i_lock_table.next++; 3076 lck = KMP_GET_I_LOCK(idx); 3077 // Allocate a new base lock object 3078 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3079 KA_TRACE(20, 3080 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3081 } 3082 3083 __kmp_release_lock(&__kmp_global_lock, gtid); 3084 3085 lck->type = tag; 3086 3087 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3088 *((kmp_lock_index_t *)user_lock) = idx 3089 << 1; // indirect lock word must be even 3090 } else { 3091 *((kmp_indirect_lock_t **)user_lock) = lck; 3092 } 3093 3094 return lck; 3095 } 3096 3097 // User lock lookup for dynamically dispatched locks. 3098 static __forceinline kmp_indirect_lock_t * 3099 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3100 if (__kmp_env_consistency_check) { 3101 kmp_indirect_lock_t *lck = NULL; 3102 if (user_lock == NULL) { 3103 KMP_FATAL(LockIsUninitialized, func); 3104 } 3105 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3106 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3107 if (idx >= __kmp_i_lock_table.size) { 3108 KMP_FATAL(LockIsUninitialized, func); 3109 } 3110 lck = KMP_GET_I_LOCK(idx); 3111 } else { 3112 lck = *((kmp_indirect_lock_t **)user_lock); 3113 } 3114 if (lck == NULL) { 3115 KMP_FATAL(LockIsUninitialized, func); 3116 } 3117 return lck; 3118 } else { 3119 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3120 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3121 } else { 3122 return *((kmp_indirect_lock_t **)user_lock); 3123 } 3124 } 3125 } 3126 3127 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3128 kmp_dyna_lockseq_t seq) { 3129 #if KMP_USE_ADAPTIVE_LOCKS 3130 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3131 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3132 seq = lockseq_queuing; 3133 } 3134 #endif 3135 #if KMP_USE_TSX 3136 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3137 seq = lockseq_queuing; 3138 } 3139 #endif 3140 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3141 kmp_indirect_lock_t *l = 3142 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3143 KMP_I_LOCK_FUNC(l, init)(l->lock); 3144 KA_TRACE( 3145 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3146 seq)); 3147 } 3148 3149 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3150 kmp_uint32 gtid = __kmp_entry_gtid(); 3151 kmp_indirect_lock_t *l = 3152 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3153 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3154 kmp_indirect_locktag_t tag = l->type; 3155 3156 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3157 3158 // Use the base lock's space to keep the pool chain. 3159 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3160 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3161 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3162 } 3163 __kmp_indirect_lock_pool[tag] = l; 3164 3165 __kmp_release_lock(&__kmp_global_lock, gtid); 3166 } 3167 3168 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3169 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3170 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3171 } 3172 3173 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3174 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3175 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3176 } 3177 3178 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3179 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3180 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3181 } 3182 3183 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3184 kmp_int32 gtid) { 3185 kmp_indirect_lock_t *l = 3186 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3187 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3188 } 3189 3190 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3191 kmp_int32 gtid) { 3192 kmp_indirect_lock_t *l = 3193 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3194 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3195 } 3196 3197 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3198 kmp_int32 gtid) { 3199 kmp_indirect_lock_t *l = 3200 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3201 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3202 } 3203 3204 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3205 3206 // This is used only in kmp_error.cpp when consistency checking is on. 3207 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3208 switch (seq) { 3209 case lockseq_tas: 3210 case lockseq_nested_tas: 3211 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3212 #if KMP_USE_FUTEX 3213 case lockseq_futex: 3214 case lockseq_nested_futex: 3215 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3216 #endif 3217 case lockseq_ticket: 3218 case lockseq_nested_ticket: 3219 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3220 case lockseq_queuing: 3221 case lockseq_nested_queuing: 3222 #if KMP_USE_ADAPTIVE_LOCKS 3223 case lockseq_adaptive: 3224 #endif 3225 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3226 case lockseq_drdpa: 3227 case lockseq_nested_drdpa: 3228 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3229 default: 3230 return 0; 3231 } 3232 } 3233 3234 // Initializes data for dynamic user locks. 3235 void __kmp_init_dynamic_user_locks() { 3236 // Initialize jump table for the lock functions 3237 if (__kmp_env_consistency_check) { 3238 __kmp_direct_set = direct_set_check; 3239 __kmp_direct_unset = direct_unset_check; 3240 __kmp_direct_test = direct_test_check; 3241 __kmp_indirect_set = indirect_set_check; 3242 __kmp_indirect_unset = indirect_unset_check; 3243 __kmp_indirect_test = indirect_test_check; 3244 } else { 3245 __kmp_direct_set = direct_set; 3246 __kmp_direct_unset = direct_unset; 3247 __kmp_direct_test = direct_test; 3248 __kmp_indirect_set = indirect_set; 3249 __kmp_indirect_unset = indirect_unset; 3250 __kmp_indirect_test = indirect_test; 3251 } 3252 // If the user locks have already been initialized, then return. Allow the 3253 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3254 // new lock tables if they have already been allocated. 3255 if (__kmp_init_user_locks) 3256 return; 3257 3258 // Initialize lock index table 3259 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3260 __kmp_i_lock_table.table = 3261 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3262 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3263 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3264 __kmp_i_lock_table.next = 0; 3265 3266 // Indirect lock size 3267 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3268 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3269 #if KMP_USE_ADAPTIVE_LOCKS 3270 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3271 #endif 3272 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3273 #if KMP_USE_TSX 3274 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3275 #endif 3276 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3277 #if KMP_USE_FUTEX 3278 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3279 #endif 3280 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3281 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3282 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3283 3284 // Initialize lock accessor/modifier 3285 #define fill_jumps(table, expand, sep) \ 3286 { \ 3287 table[locktag##sep##ticket] = expand(ticket); \ 3288 table[locktag##sep##queuing] = expand(queuing); \ 3289 table[locktag##sep##drdpa] = expand(drdpa); \ 3290 } 3291 3292 #if KMP_USE_ADAPTIVE_LOCKS 3293 #define fill_table(table, expand) \ 3294 { \ 3295 fill_jumps(table, expand, _); \ 3296 table[locktag_adaptive] = expand(queuing); \ 3297 fill_jumps(table, expand, _nested_); \ 3298 } 3299 #else 3300 #define fill_table(table, expand) \ 3301 { \ 3302 fill_jumps(table, expand, _); \ 3303 fill_jumps(table, expand, _nested_); \ 3304 } 3305 #endif // KMP_USE_ADAPTIVE_LOCKS 3306 3307 #define expand(l) \ 3308 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3309 fill_table(__kmp_indirect_set_location, expand); 3310 #undef expand 3311 #define expand(l) \ 3312 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3313 fill_table(__kmp_indirect_set_flags, expand); 3314 #undef expand 3315 #define expand(l) \ 3316 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3317 fill_table(__kmp_indirect_get_location, expand); 3318 #undef expand 3319 #define expand(l) \ 3320 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3321 fill_table(__kmp_indirect_get_flags, expand); 3322 #undef expand 3323 3324 __kmp_init_user_locks = TRUE; 3325 } 3326 3327 // Clean up the lock table. 3328 void __kmp_cleanup_indirect_user_locks() { 3329 kmp_lock_index_t i; 3330 int k; 3331 3332 // Clean up locks in the pools first (they were already destroyed before going 3333 // into the pools). 3334 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3335 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3336 while (l != NULL) { 3337 kmp_indirect_lock_t *ll = l; 3338 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3339 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3340 ll)); 3341 __kmp_free(ll->lock); 3342 ll->lock = NULL; 3343 } 3344 __kmp_indirect_lock_pool[k] = NULL; 3345 } 3346 // Clean up the remaining undestroyed locks. 3347 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3348 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3349 if (l->lock != NULL) { 3350 // Locks not destroyed explicitly need to be destroyed here. 3351 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3352 KA_TRACE( 3353 20, 3354 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3355 l)); 3356 __kmp_free(l->lock); 3357 } 3358 } 3359 // Free the table 3360 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3361 __kmp_free(__kmp_i_lock_table.table[i]); 3362 __kmp_free(__kmp_i_lock_table.table); 3363 3364 __kmp_init_user_locks = FALSE; 3365 } 3366 3367 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3368 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3369 3370 #else // KMP_USE_DYNAMIC_LOCK 3371 3372 /* user locks 3373 * They are implemented as a table of function pointers which are set to the 3374 * lock functions of the appropriate kind, once that has been determined. */ 3375 3376 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3377 3378 size_t __kmp_base_user_lock_size = 0; 3379 size_t __kmp_user_lock_size = 0; 3380 3381 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3382 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3383 kmp_int32 gtid) = NULL; 3384 3385 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3386 kmp_int32 gtid) = NULL; 3387 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3388 kmp_int32 gtid) = NULL; 3389 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3390 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3391 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3392 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3393 kmp_int32 gtid) = NULL; 3394 3395 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3396 kmp_int32 gtid) = NULL; 3397 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3398 kmp_int32 gtid) = NULL; 3399 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3400 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3401 3402 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3403 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3404 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3405 const ident_t *loc) = NULL; 3406 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3407 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3408 kmp_lock_flags_t flags) = NULL; 3409 3410 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3411 switch (user_lock_kind) { 3412 case lk_default: 3413 default: 3414 KMP_ASSERT(0); 3415 3416 case lk_tas: { 3417 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3418 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3419 3420 __kmp_get_user_lock_owner_ = 3421 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3422 3423 if (__kmp_env_consistency_check) { 3424 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3425 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3426 } else { 3427 KMP_BIND_USER_LOCK(tas); 3428 KMP_BIND_NESTED_USER_LOCK(tas); 3429 } 3430 3431 __kmp_destroy_user_lock_ = 3432 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3433 3434 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3435 3436 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3437 3438 __kmp_set_user_lock_location_ = 3439 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3440 3441 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3442 3443 __kmp_set_user_lock_flags_ = 3444 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3445 } break; 3446 3447 #if KMP_USE_FUTEX 3448 3449 case lk_futex: { 3450 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3451 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3452 3453 __kmp_get_user_lock_owner_ = 3454 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3455 3456 if (__kmp_env_consistency_check) { 3457 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3458 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3459 } else { 3460 KMP_BIND_USER_LOCK(futex); 3461 KMP_BIND_NESTED_USER_LOCK(futex); 3462 } 3463 3464 __kmp_destroy_user_lock_ = 3465 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3466 3467 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3468 3469 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3470 3471 __kmp_set_user_lock_location_ = 3472 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3473 3474 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3475 3476 __kmp_set_user_lock_flags_ = 3477 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3478 } break; 3479 3480 #endif // KMP_USE_FUTEX 3481 3482 case lk_ticket: { 3483 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3484 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3485 3486 __kmp_get_user_lock_owner_ = 3487 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3488 3489 if (__kmp_env_consistency_check) { 3490 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3491 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3492 } else { 3493 KMP_BIND_USER_LOCK(ticket); 3494 KMP_BIND_NESTED_USER_LOCK(ticket); 3495 } 3496 3497 __kmp_destroy_user_lock_ = 3498 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3499 3500 __kmp_is_user_lock_initialized_ = 3501 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3502 3503 __kmp_get_user_lock_location_ = 3504 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3505 3506 __kmp_set_user_lock_location_ = (void (*)( 3507 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3508 3509 __kmp_get_user_lock_flags_ = 3510 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3511 3512 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3513 &__kmp_set_ticket_lock_flags); 3514 } break; 3515 3516 case lk_queuing: { 3517 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3518 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3519 3520 __kmp_get_user_lock_owner_ = 3521 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3522 3523 if (__kmp_env_consistency_check) { 3524 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3525 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3526 } else { 3527 KMP_BIND_USER_LOCK(queuing); 3528 KMP_BIND_NESTED_USER_LOCK(queuing); 3529 } 3530 3531 __kmp_destroy_user_lock_ = 3532 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3533 3534 __kmp_is_user_lock_initialized_ = 3535 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3536 3537 __kmp_get_user_lock_location_ = 3538 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3539 3540 __kmp_set_user_lock_location_ = (void (*)( 3541 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3542 3543 __kmp_get_user_lock_flags_ = 3544 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3545 3546 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3547 &__kmp_set_queuing_lock_flags); 3548 } break; 3549 3550 #if KMP_USE_ADAPTIVE_LOCKS 3551 case lk_adaptive: { 3552 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3553 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3554 3555 __kmp_get_user_lock_owner_ = 3556 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3557 3558 if (__kmp_env_consistency_check) { 3559 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3560 } else { 3561 KMP_BIND_USER_LOCK(adaptive); 3562 } 3563 3564 __kmp_destroy_user_lock_ = 3565 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3566 3567 __kmp_is_user_lock_initialized_ = 3568 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3569 3570 __kmp_get_user_lock_location_ = 3571 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3572 3573 __kmp_set_user_lock_location_ = (void (*)( 3574 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3575 3576 __kmp_get_user_lock_flags_ = 3577 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3578 3579 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3580 &__kmp_set_queuing_lock_flags); 3581 3582 } break; 3583 #endif // KMP_USE_ADAPTIVE_LOCKS 3584 3585 case lk_drdpa: { 3586 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3587 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3588 3589 __kmp_get_user_lock_owner_ = 3590 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3591 3592 if (__kmp_env_consistency_check) { 3593 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3594 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3595 } else { 3596 KMP_BIND_USER_LOCK(drdpa); 3597 KMP_BIND_NESTED_USER_LOCK(drdpa); 3598 } 3599 3600 __kmp_destroy_user_lock_ = 3601 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3602 3603 __kmp_is_user_lock_initialized_ = 3604 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3605 3606 __kmp_get_user_lock_location_ = 3607 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3608 3609 __kmp_set_user_lock_location_ = (void (*)( 3610 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3611 3612 __kmp_get_user_lock_flags_ = 3613 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3614 3615 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3616 &__kmp_set_drdpa_lock_flags); 3617 } break; 3618 } 3619 } 3620 3621 // ---------------------------------------------------------------------------- 3622 // User lock table & lock allocation 3623 3624 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3625 kmp_user_lock_p __kmp_lock_pool = NULL; 3626 3627 // Lock block-allocation support. 3628 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3629 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3630 3631 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3632 // Assume that kmp_global_lock is held upon entry/exit. 3633 kmp_lock_index_t index; 3634 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3635 kmp_lock_index_t size; 3636 kmp_user_lock_p *table; 3637 // Reallocate lock table. 3638 if (__kmp_user_lock_table.allocated == 0) { 3639 size = 1024; 3640 } else { 3641 size = __kmp_user_lock_table.allocated * 2; 3642 } 3643 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3644 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3645 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3646 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3647 // We cannot free the previous table now, since it may be in use by other 3648 // threads. So save the pointer to the previous table in in the first 3649 // element of the new table. All the tables will be organized into a list, 3650 // and could be freed when library shutting down. 3651 __kmp_user_lock_table.table = table; 3652 __kmp_user_lock_table.allocated = size; 3653 } 3654 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3655 __kmp_user_lock_table.allocated); 3656 index = __kmp_user_lock_table.used; 3657 __kmp_user_lock_table.table[index] = lck; 3658 ++__kmp_user_lock_table.used; 3659 return index; 3660 } 3661 3662 static kmp_user_lock_p __kmp_lock_block_allocate() { 3663 // Assume that kmp_global_lock is held upon entry/exit. 3664 static int last_index = 0; 3665 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3666 // Restart the index. 3667 last_index = 0; 3668 // Need to allocate a new block. 3669 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3670 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3671 char *buffer = 3672 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3673 // Set up the new block. 3674 kmp_block_of_locks *new_block = 3675 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3676 new_block->next_block = __kmp_lock_blocks; 3677 new_block->locks = (void *)buffer; 3678 // Publish the new block. 3679 KMP_MB(); 3680 __kmp_lock_blocks = new_block; 3681 } 3682 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3683 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3684 last_index++; 3685 return ret; 3686 } 3687 3688 // Get memory for a lock. It may be freshly allocated memory or reused memory 3689 // from lock pool. 3690 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3691 kmp_lock_flags_t flags) { 3692 kmp_user_lock_p lck; 3693 kmp_lock_index_t index; 3694 KMP_DEBUG_ASSERT(user_lock); 3695 3696 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3697 3698 if (__kmp_lock_pool == NULL) { 3699 // Lock pool is empty. Allocate new memory. 3700 3701 // ANNOTATION: Found no good way to express the syncronisation 3702 // between allocation and usage, so ignore the allocation 3703 ANNOTATE_IGNORE_WRITES_BEGIN(); 3704 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3705 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3706 } else { 3707 lck = __kmp_lock_block_allocate(); 3708 } 3709 ANNOTATE_IGNORE_WRITES_END(); 3710 3711 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3712 // and debugger has info on all allocated locks. 3713 index = __kmp_lock_table_insert(lck); 3714 } else { 3715 // Pick up lock from pool. 3716 lck = __kmp_lock_pool; 3717 index = __kmp_lock_pool->pool.index; 3718 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3719 } 3720 3721 // We could potentially differentiate between nested and regular locks 3722 // here, and do the lock table lookup for regular locks only. 3723 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3724 *((kmp_lock_index_t *)user_lock) = index; 3725 } else { 3726 *((kmp_user_lock_p *)user_lock) = lck; 3727 } 3728 3729 // mark the lock if it is critical section lock. 3730 __kmp_set_user_lock_flags(lck, flags); 3731 3732 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3733 3734 return lck; 3735 } 3736 3737 // Put lock's memory to pool for reusing. 3738 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3739 kmp_user_lock_p lck) { 3740 KMP_DEBUG_ASSERT(user_lock != NULL); 3741 KMP_DEBUG_ASSERT(lck != NULL); 3742 3743 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3744 3745 lck->pool.next = __kmp_lock_pool; 3746 __kmp_lock_pool = lck; 3747 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3748 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3749 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3750 lck->pool.index = index; 3751 } 3752 3753 __kmp_release_lock(&__kmp_global_lock, gtid); 3754 } 3755 3756 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3757 kmp_user_lock_p lck = NULL; 3758 3759 if (__kmp_env_consistency_check) { 3760 if (user_lock == NULL) { 3761 KMP_FATAL(LockIsUninitialized, func); 3762 } 3763 } 3764 3765 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3766 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3767 if (__kmp_env_consistency_check) { 3768 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3769 KMP_FATAL(LockIsUninitialized, func); 3770 } 3771 } 3772 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3773 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3774 lck = __kmp_user_lock_table.table[index]; 3775 } else { 3776 lck = *((kmp_user_lock_p *)user_lock); 3777 } 3778 3779 if (__kmp_env_consistency_check) { 3780 if (lck == NULL) { 3781 KMP_FATAL(LockIsUninitialized, func); 3782 } 3783 } 3784 3785 return lck; 3786 } 3787 3788 void __kmp_cleanup_user_locks(void) { 3789 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3790 // iterating through lock table (it includes all the locks, dead or alive). 3791 __kmp_lock_pool = NULL; 3792 3793 #define IS_CRITICAL(lck) \ 3794 ((__kmp_get_user_lock_flags_ != NULL) && \ 3795 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3796 3797 // Loop through lock table, free all locks. 3798 // Do not free item [0], it is reserved for lock tables list. 3799 // 3800 // FIXME - we are iterating through a list of (pointers to) objects of type 3801 // union kmp_user_lock, but we have no way of knowing whether the base type is 3802 // currently "pool" or whatever the global user lock type is. 3803 // 3804 // We are relying on the fact that for all of the user lock types 3805 // (except "tas"), the first field in the lock struct is the "initialized" 3806 // field, which is set to the address of the lock object itself when 3807 // the lock is initialized. When the union is of type "pool", the 3808 // first field is a pointer to the next object in the free list, which 3809 // will not be the same address as the object itself. 3810 // 3811 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3812 // for "pool" objects on the free list. This must happen as the "location" 3813 // field of real user locks overlaps the "index" field of "pool" objects. 3814 // 3815 // It would be better to run through the free list, and remove all "pool" 3816 // objects from the lock table before executing this loop. However, 3817 // "pool" objects do not always have their index field set (only on 3818 // lin_32e), and I don't want to search the lock table for the address 3819 // of every "pool" object on the free list. 3820 while (__kmp_user_lock_table.used > 1) { 3821 const ident *loc; 3822 3823 // reduce __kmp_user_lock_table.used before freeing the lock, 3824 // so that state of locks is consistent 3825 kmp_user_lock_p lck = 3826 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3827 3828 if ((__kmp_is_user_lock_initialized_ != NULL) && 3829 (*__kmp_is_user_lock_initialized_)(lck)) { 3830 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3831 // it is NOT a critical section (user is not responsible for destroying 3832 // criticals) AND we know source location to report. 3833 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3834 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3835 (loc->psource != NULL)) { 3836 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3837 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3838 __kmp_str_loc_free(&str_loc); 3839 } 3840 3841 #ifdef KMP_DEBUG 3842 if (IS_CRITICAL(lck)) { 3843 KA_TRACE( 3844 20, 3845 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3846 lck, *(void **)lck)); 3847 } else { 3848 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3849 *(void **)lck)); 3850 } 3851 #endif // KMP_DEBUG 3852 3853 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3854 __kmp_destroy_user_lock(lck); 3855 } 3856 3857 // Free the lock if block allocation of locks is not used. 3858 if (__kmp_lock_blocks == NULL) { 3859 __kmp_free(lck); 3860 } 3861 } 3862 3863 #undef IS_CRITICAL 3864 3865 // delete lock table(s). 3866 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3867 __kmp_user_lock_table.table = NULL; 3868 __kmp_user_lock_table.allocated = 0; 3869 3870 while (table_ptr != NULL) { 3871 // In the first element we saved the pointer to the previous 3872 // (smaller) lock table. 3873 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3874 __kmp_free(table_ptr); 3875 table_ptr = next; 3876 } 3877 3878 // Free buffers allocated for blocks of locks. 3879 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3880 __kmp_lock_blocks = NULL; 3881 3882 while (block_ptr != NULL) { 3883 kmp_block_of_locks_t *next = block_ptr->next_block; 3884 __kmp_free(block_ptr->locks); 3885 // *block_ptr itself was allocated at the end of the locks vector. 3886 block_ptr = next; 3887 } 3888 3889 TCW_4(__kmp_init_user_locks, FALSE); 3890 } 3891 3892 #endif // KMP_USE_DYNAMIC_LOCK 3893