1 /* 2 * kmp_lock.cpp -- lock-related functions 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include <stddef.h> 17 #include <atomic> 18 19 #include "kmp.h" 20 #include "kmp_i18n.h" 21 #include "kmp_io.h" 22 #include "kmp_itt.h" 23 #include "kmp_lock.h" 24 25 #include "tsan_annotations.h" 26 27 #if KMP_USE_FUTEX 28 #include <sys/syscall.h> 29 #include <unistd.h> 30 // We should really include <futex.h>, but that causes compatibility problems on 31 // different Linux* OS distributions that either require that you include (or 32 // break when you try to include) <pci/types.h>. Since all we need is the two 33 // macros below (which are part of the kernel ABI, so can't change) we just 34 // define the constants here and don't include <futex.h> 35 #ifndef FUTEX_WAIT 36 #define FUTEX_WAIT 0 37 #endif 38 #ifndef FUTEX_WAKE 39 #define FUTEX_WAKE 1 40 #endif 41 #endif 42 43 /* Implement spin locks for internal library use. */ 44 /* The algorithm implemented is Lamport's bakery lock [1974]. */ 45 46 void __kmp_validate_locks(void) { 47 int i; 48 kmp_uint32 x, y; 49 50 /* Check to make sure unsigned arithmetic does wraps properly */ 51 x = ~((kmp_uint32)0) - 2; 52 y = x - 2; 53 54 for (i = 0; i < 8; ++i, ++x, ++y) { 55 kmp_uint32 z = (x - y); 56 KMP_ASSERT(z == 2); 57 } 58 59 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 60 } 61 62 /* ------------------------------------------------------------------------ */ 63 /* test and set locks */ 64 65 // For the non-nested locks, we can only assume that the first 4 bytes were 66 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 67 // compiler only allocates a 4 byte pointer on IA-32 architecture. On 68 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 69 // 70 // gcc reserves >= 8 bytes for nested locks, so we can assume that the 71 // entire 8 bytes were allocated for nested locks on all 64-bit platforms. 72 73 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 74 return KMP_LOCK_STRIP(TCR_4(lck->lk.poll)) - 1; 75 } 76 77 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 78 return lck->lk.depth_locked != -1; 79 } 80 81 __forceinline static int 82 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 83 KMP_MB(); 84 85 #ifdef USE_LOCK_PROFILE 86 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 87 if ((curr != 0) && (curr != gtid + 1)) 88 __kmp_printf("LOCK CONTENTION: %p\n", lck); 89 /* else __kmp_printf( "." );*/ 90 #endif /* USE_LOCK_PROFILE */ 91 92 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 93 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 94 KMP_LOCK_BUSY(gtid + 1, tas))) { 95 KMP_FSYNC_ACQUIRED(lck); 96 return KMP_LOCK_ACQUIRED_FIRST; 97 } 98 99 kmp_uint32 spins; 100 KMP_FSYNC_PREPARE(lck); 101 KMP_INIT_YIELD(spins); 102 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 103 KMP_YIELD(TRUE); 104 } else { 105 KMP_YIELD_SPIN(spins); 106 } 107 108 kmp_backoff_t backoff = __kmp_spin_backoff_params; 109 while ((lck->lk.poll != KMP_LOCK_FREE(tas)) || 110 (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 111 KMP_LOCK_BUSY(gtid + 1, tas)))) { 112 113 __kmp_spin_backoff(&backoff); 114 if (TCR_4(__kmp_nth) > 115 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 116 KMP_YIELD(TRUE); 117 } else { 118 KMP_YIELD_SPIN(spins); 119 } 120 } 121 KMP_FSYNC_ACQUIRED(lck); 122 return KMP_LOCK_ACQUIRED_FIRST; 123 } 124 125 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 126 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 127 ANNOTATE_TAS_ACQUIRED(lck); 128 return retval; 129 } 130 131 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 132 kmp_int32 gtid) { 133 char const *const func = "omp_set_lock"; 134 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 135 __kmp_is_tas_lock_nestable(lck)) { 136 KMP_FATAL(LockNestableUsedAsSimple, func); 137 } 138 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 139 KMP_FATAL(LockIsAlreadyOwned, func); 140 } 141 return __kmp_acquire_tas_lock(lck, gtid); 142 } 143 144 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 145 if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && 146 KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), 147 KMP_LOCK_BUSY(gtid + 1, tas))) { 148 KMP_FSYNC_ACQUIRED(lck); 149 return TRUE; 150 } 151 return FALSE; 152 } 153 154 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 155 kmp_int32 gtid) { 156 char const *const func = "omp_test_lock"; 157 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 158 __kmp_is_tas_lock_nestable(lck)) { 159 KMP_FATAL(LockNestableUsedAsSimple, func); 160 } 161 return __kmp_test_tas_lock(lck, gtid); 162 } 163 164 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 165 KMP_MB(); /* Flush all pending memory write invalidates. */ 166 167 KMP_FSYNC_RELEASING(lck); 168 ANNOTATE_TAS_RELEASED(lck); 169 KMP_ST_REL32(&(lck->lk.poll), KMP_LOCK_FREE(tas)); 170 KMP_MB(); /* Flush all pending memory write invalidates. */ 171 172 KMP_YIELD(TCR_4(__kmp_nth) > 173 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 174 return KMP_LOCK_RELEASED; 175 } 176 177 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 178 kmp_int32 gtid) { 179 char const *const func = "omp_unset_lock"; 180 KMP_MB(); /* in case another processor initialized lock */ 181 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 182 __kmp_is_tas_lock_nestable(lck)) { 183 KMP_FATAL(LockNestableUsedAsSimple, func); 184 } 185 if (__kmp_get_tas_lock_owner(lck) == -1) { 186 KMP_FATAL(LockUnsettingFree, func); 187 } 188 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 189 (__kmp_get_tas_lock_owner(lck) != gtid)) { 190 KMP_FATAL(LockUnsettingSetByAnother, func); 191 } 192 return __kmp_release_tas_lock(lck, gtid); 193 } 194 195 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 196 TCW_4(lck->lk.poll, KMP_LOCK_FREE(tas)); 197 } 198 199 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 200 __kmp_init_tas_lock(lck); 201 } 202 203 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 204 205 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 206 char const *const func = "omp_destroy_lock"; 207 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 208 __kmp_is_tas_lock_nestable(lck)) { 209 KMP_FATAL(LockNestableUsedAsSimple, func); 210 } 211 if (__kmp_get_tas_lock_owner(lck) != -1) { 212 KMP_FATAL(LockStillOwned, func); 213 } 214 __kmp_destroy_tas_lock(lck); 215 } 216 217 // nested test and set locks 218 219 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 220 KMP_DEBUG_ASSERT(gtid >= 0); 221 222 if (__kmp_get_tas_lock_owner(lck) == gtid) { 223 lck->lk.depth_locked += 1; 224 return KMP_LOCK_ACQUIRED_NEXT; 225 } else { 226 __kmp_acquire_tas_lock_timed_template(lck, gtid); 227 ANNOTATE_TAS_ACQUIRED(lck); 228 lck->lk.depth_locked = 1; 229 return KMP_LOCK_ACQUIRED_FIRST; 230 } 231 } 232 233 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 234 kmp_int32 gtid) { 235 char const *const func = "omp_set_nest_lock"; 236 if (!__kmp_is_tas_lock_nestable(lck)) { 237 KMP_FATAL(LockSimpleUsedAsNestable, func); 238 } 239 return __kmp_acquire_nested_tas_lock(lck, gtid); 240 } 241 242 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 243 int retval; 244 245 KMP_DEBUG_ASSERT(gtid >= 0); 246 247 if (__kmp_get_tas_lock_owner(lck) == gtid) { 248 retval = ++lck->lk.depth_locked; 249 } else if (!__kmp_test_tas_lock(lck, gtid)) { 250 retval = 0; 251 } else { 252 KMP_MB(); 253 retval = lck->lk.depth_locked = 1; 254 } 255 return retval; 256 } 257 258 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 259 kmp_int32 gtid) { 260 char const *const func = "omp_test_nest_lock"; 261 if (!__kmp_is_tas_lock_nestable(lck)) { 262 KMP_FATAL(LockSimpleUsedAsNestable, func); 263 } 264 return __kmp_test_nested_tas_lock(lck, gtid); 265 } 266 267 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 268 KMP_DEBUG_ASSERT(gtid >= 0); 269 270 KMP_MB(); 271 if (--(lck->lk.depth_locked) == 0) { 272 __kmp_release_tas_lock(lck, gtid); 273 return KMP_LOCK_RELEASED; 274 } 275 return KMP_LOCK_STILL_HELD; 276 } 277 278 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 279 kmp_int32 gtid) { 280 char const *const func = "omp_unset_nest_lock"; 281 KMP_MB(); /* in case another processor initialized lock */ 282 if (!__kmp_is_tas_lock_nestable(lck)) { 283 KMP_FATAL(LockSimpleUsedAsNestable, func); 284 } 285 if (__kmp_get_tas_lock_owner(lck) == -1) { 286 KMP_FATAL(LockUnsettingFree, func); 287 } 288 if (__kmp_get_tas_lock_owner(lck) != gtid) { 289 KMP_FATAL(LockUnsettingSetByAnother, func); 290 } 291 return __kmp_release_nested_tas_lock(lck, gtid); 292 } 293 294 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 295 __kmp_init_tas_lock(lck); 296 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 297 } 298 299 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 300 __kmp_init_nested_tas_lock(lck); 301 } 302 303 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 304 __kmp_destroy_tas_lock(lck); 305 lck->lk.depth_locked = 0; 306 } 307 308 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 309 char const *const func = "omp_destroy_nest_lock"; 310 if (!__kmp_is_tas_lock_nestable(lck)) { 311 KMP_FATAL(LockSimpleUsedAsNestable, func); 312 } 313 if (__kmp_get_tas_lock_owner(lck) != -1) { 314 KMP_FATAL(LockStillOwned, func); 315 } 316 __kmp_destroy_nested_tas_lock(lck); 317 } 318 319 #if KMP_USE_FUTEX 320 321 /* ------------------------------------------------------------------------ */ 322 /* futex locks */ 323 324 // futex locks are really just test and set locks, with a different method 325 // of handling contention. They take the same amount of space as test and 326 // set locks, and are allocated the same way (i.e. use the area allocated by 327 // the compiler for non-nested locks / allocate nested locks on the heap). 328 329 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 330 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 331 } 332 333 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 334 return lck->lk.depth_locked != -1; 335 } 336 337 __forceinline static int 338 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 339 kmp_int32 gtid_code = (gtid + 1) << 1; 340 341 KMP_MB(); 342 343 #ifdef USE_LOCK_PROFILE 344 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 345 if ((curr != 0) && (curr != gtid_code)) 346 __kmp_printf("LOCK CONTENTION: %p\n", lck); 347 /* else __kmp_printf( "." );*/ 348 #endif /* USE_LOCK_PROFILE */ 349 350 KMP_FSYNC_PREPARE(lck); 351 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 352 lck, lck->lk.poll, gtid)); 353 354 kmp_int32 poll_val; 355 356 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 357 &(lck->lk.poll), KMP_LOCK_FREE(futex), 358 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 359 360 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 361 KA_TRACE( 362 1000, 363 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 364 lck, gtid, poll_val, cond)); 365 366 // NOTE: if you try to use the following condition for this branch 367 // 368 // if ( poll_val & 1 == 0 ) 369 // 370 // Then the 12.0 compiler has a bug where the following block will 371 // always be skipped, regardless of the value of the LSB of poll_val. 372 if (!cond) { 373 // Try to set the lsb in the poll to indicate to the owner 374 // thread that they need to wake this thread up. 375 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 376 poll_val | KMP_LOCK_BUSY(1, futex))) { 377 KA_TRACE( 378 1000, 379 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 380 lck, lck->lk.poll, gtid)); 381 continue; 382 } 383 poll_val |= KMP_LOCK_BUSY(1, futex); 384 385 KA_TRACE(1000, 386 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 387 lck->lk.poll, gtid)); 388 } 389 390 KA_TRACE( 391 1000, 392 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 393 lck, gtid, poll_val)); 394 395 kmp_int32 rc; 396 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 397 NULL, 0)) != 0) { 398 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 399 "failed (rc=%d errno=%d)\n", 400 lck, gtid, poll_val, rc, errno)); 401 continue; 402 } 403 404 KA_TRACE(1000, 405 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 406 lck, gtid, poll_val)); 407 // This thread has now done a successful futex wait call and was entered on 408 // the OS futex queue. We must now perform a futex wake call when releasing 409 // the lock, as we have no idea how many other threads are in the queue. 410 gtid_code |= 1; 411 } 412 413 KMP_FSYNC_ACQUIRED(lck); 414 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 415 lck->lk.poll, gtid)); 416 return KMP_LOCK_ACQUIRED_FIRST; 417 } 418 419 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 420 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 421 ANNOTATE_FUTEX_ACQUIRED(lck); 422 return retval; 423 } 424 425 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 426 kmp_int32 gtid) { 427 char const *const func = "omp_set_lock"; 428 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 429 __kmp_is_futex_lock_nestable(lck)) { 430 KMP_FATAL(LockNestableUsedAsSimple, func); 431 } 432 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 433 KMP_FATAL(LockIsAlreadyOwned, func); 434 } 435 return __kmp_acquire_futex_lock(lck, gtid); 436 } 437 438 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 439 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 440 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 441 KMP_FSYNC_ACQUIRED(lck); 442 return TRUE; 443 } 444 return FALSE; 445 } 446 447 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 448 kmp_int32 gtid) { 449 char const *const func = "omp_test_lock"; 450 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 451 __kmp_is_futex_lock_nestable(lck)) { 452 KMP_FATAL(LockNestableUsedAsSimple, func); 453 } 454 return __kmp_test_futex_lock(lck, gtid); 455 } 456 457 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 458 KMP_MB(); /* Flush all pending memory write invalidates. */ 459 460 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 461 lck, lck->lk.poll, gtid)); 462 463 KMP_FSYNC_RELEASING(lck); 464 ANNOTATE_FUTEX_RELEASED(lck); 465 466 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 467 468 KA_TRACE(1000, 469 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 470 lck, gtid, poll_val)); 471 472 if (KMP_LOCK_STRIP(poll_val) & 1) { 473 KA_TRACE(1000, 474 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 475 lck, gtid)); 476 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 477 NULL, NULL, 0); 478 } 479 480 KMP_MB(); /* Flush all pending memory write invalidates. */ 481 482 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 483 lck->lk.poll, gtid)); 484 485 KMP_YIELD(TCR_4(__kmp_nth) > 486 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 487 return KMP_LOCK_RELEASED; 488 } 489 490 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 491 kmp_int32 gtid) { 492 char const *const func = "omp_unset_lock"; 493 KMP_MB(); /* in case another processor initialized lock */ 494 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 495 __kmp_is_futex_lock_nestable(lck)) { 496 KMP_FATAL(LockNestableUsedAsSimple, func); 497 } 498 if (__kmp_get_futex_lock_owner(lck) == -1) { 499 KMP_FATAL(LockUnsettingFree, func); 500 } 501 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 502 (__kmp_get_futex_lock_owner(lck) != gtid)) { 503 KMP_FATAL(LockUnsettingSetByAnother, func); 504 } 505 return __kmp_release_futex_lock(lck, gtid); 506 } 507 508 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 509 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 510 } 511 512 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 513 __kmp_init_futex_lock(lck); 514 } 515 516 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 517 518 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 519 char const *const func = "omp_destroy_lock"; 520 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 521 __kmp_is_futex_lock_nestable(lck)) { 522 KMP_FATAL(LockNestableUsedAsSimple, func); 523 } 524 if (__kmp_get_futex_lock_owner(lck) != -1) { 525 KMP_FATAL(LockStillOwned, func); 526 } 527 __kmp_destroy_futex_lock(lck); 528 } 529 530 // nested futex locks 531 532 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 533 KMP_DEBUG_ASSERT(gtid >= 0); 534 535 if (__kmp_get_futex_lock_owner(lck) == gtid) { 536 lck->lk.depth_locked += 1; 537 return KMP_LOCK_ACQUIRED_NEXT; 538 } else { 539 __kmp_acquire_futex_lock_timed_template(lck, gtid); 540 ANNOTATE_FUTEX_ACQUIRED(lck); 541 lck->lk.depth_locked = 1; 542 return KMP_LOCK_ACQUIRED_FIRST; 543 } 544 } 545 546 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 547 kmp_int32 gtid) { 548 char const *const func = "omp_set_nest_lock"; 549 if (!__kmp_is_futex_lock_nestable(lck)) { 550 KMP_FATAL(LockSimpleUsedAsNestable, func); 551 } 552 return __kmp_acquire_nested_futex_lock(lck, gtid); 553 } 554 555 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 556 int retval; 557 558 KMP_DEBUG_ASSERT(gtid >= 0); 559 560 if (__kmp_get_futex_lock_owner(lck) == gtid) { 561 retval = ++lck->lk.depth_locked; 562 } else if (!__kmp_test_futex_lock(lck, gtid)) { 563 retval = 0; 564 } else { 565 KMP_MB(); 566 retval = lck->lk.depth_locked = 1; 567 } 568 return retval; 569 } 570 571 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 572 kmp_int32 gtid) { 573 char const *const func = "omp_test_nest_lock"; 574 if (!__kmp_is_futex_lock_nestable(lck)) { 575 KMP_FATAL(LockSimpleUsedAsNestable, func); 576 } 577 return __kmp_test_nested_futex_lock(lck, gtid); 578 } 579 580 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 581 KMP_DEBUG_ASSERT(gtid >= 0); 582 583 KMP_MB(); 584 if (--(lck->lk.depth_locked) == 0) { 585 __kmp_release_futex_lock(lck, gtid); 586 return KMP_LOCK_RELEASED; 587 } 588 return KMP_LOCK_STILL_HELD; 589 } 590 591 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 592 kmp_int32 gtid) { 593 char const *const func = "omp_unset_nest_lock"; 594 KMP_MB(); /* in case another processor initialized lock */ 595 if (!__kmp_is_futex_lock_nestable(lck)) { 596 KMP_FATAL(LockSimpleUsedAsNestable, func); 597 } 598 if (__kmp_get_futex_lock_owner(lck) == -1) { 599 KMP_FATAL(LockUnsettingFree, func); 600 } 601 if (__kmp_get_futex_lock_owner(lck) != gtid) { 602 KMP_FATAL(LockUnsettingSetByAnother, func); 603 } 604 return __kmp_release_nested_futex_lock(lck, gtid); 605 } 606 607 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 608 __kmp_init_futex_lock(lck); 609 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 610 } 611 612 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 613 __kmp_init_nested_futex_lock(lck); 614 } 615 616 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 617 __kmp_destroy_futex_lock(lck); 618 lck->lk.depth_locked = 0; 619 } 620 621 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 622 char const *const func = "omp_destroy_nest_lock"; 623 if (!__kmp_is_futex_lock_nestable(lck)) { 624 KMP_FATAL(LockSimpleUsedAsNestable, func); 625 } 626 if (__kmp_get_futex_lock_owner(lck) != -1) { 627 KMP_FATAL(LockStillOwned, func); 628 } 629 __kmp_destroy_nested_futex_lock(lck); 630 } 631 632 #endif // KMP_USE_FUTEX 633 634 /* ------------------------------------------------------------------------ */ 635 /* ticket (bakery) locks */ 636 637 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 638 return std::atomic_load_explicit(&lck->lk.owner_id, 639 std::memory_order_relaxed) - 640 1; 641 } 642 643 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 644 return std::atomic_load_explicit(&lck->lk.depth_locked, 645 std::memory_order_relaxed) != -1; 646 } 647 648 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 649 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 650 std::memory_order_acquire) == my_ticket; 651 } 652 653 __forceinline static int 654 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 655 kmp_int32 gtid) { 656 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 657 &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 658 659 #ifdef USE_LOCK_PROFILE 660 if (std::atomic_load_explicit(&lck->lk.now_serving, 661 std::memory_order_relaxed) != my_ticket) 662 __kmp_printf("LOCK CONTENTION: %p\n", lck); 663 /* else __kmp_printf( "." );*/ 664 #endif /* USE_LOCK_PROFILE */ 665 666 if (std::atomic_load_explicit(&lck->lk.now_serving, 667 std::memory_order_acquire) == my_ticket) { 668 return KMP_LOCK_ACQUIRED_FIRST; 669 } 670 KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 671 return KMP_LOCK_ACQUIRED_FIRST; 672 } 673 674 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 675 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 676 ANNOTATE_TICKET_ACQUIRED(lck); 677 return retval; 678 } 679 680 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 681 kmp_int32 gtid) { 682 char const *const func = "omp_set_lock"; 683 684 if (!std::atomic_load_explicit(&lck->lk.initialized, 685 std::memory_order_relaxed)) { 686 KMP_FATAL(LockIsUninitialized, func); 687 } 688 if (lck->lk.self != lck) { 689 KMP_FATAL(LockIsUninitialized, func); 690 } 691 if (__kmp_is_ticket_lock_nestable(lck)) { 692 KMP_FATAL(LockNestableUsedAsSimple, func); 693 } 694 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 695 KMP_FATAL(LockIsAlreadyOwned, func); 696 } 697 698 __kmp_acquire_ticket_lock(lck, gtid); 699 700 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 701 std::memory_order_relaxed); 702 return KMP_LOCK_ACQUIRED_FIRST; 703 } 704 705 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 706 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 707 std::memory_order_relaxed); 708 709 if (std::atomic_load_explicit(&lck->lk.now_serving, 710 std::memory_order_relaxed) == my_ticket) { 711 kmp_uint32 next_ticket = my_ticket + 1; 712 if (std::atomic_compare_exchange_strong_explicit( 713 &lck->lk.next_ticket, &my_ticket, next_ticket, 714 std::memory_order_acquire, std::memory_order_acquire)) { 715 return TRUE; 716 } 717 } 718 return FALSE; 719 } 720 721 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 722 kmp_int32 gtid) { 723 char const *const func = "omp_test_lock"; 724 725 if (!std::atomic_load_explicit(&lck->lk.initialized, 726 std::memory_order_relaxed)) { 727 KMP_FATAL(LockIsUninitialized, func); 728 } 729 if (lck->lk.self != lck) { 730 KMP_FATAL(LockIsUninitialized, func); 731 } 732 if (__kmp_is_ticket_lock_nestable(lck)) { 733 KMP_FATAL(LockNestableUsedAsSimple, func); 734 } 735 736 int retval = __kmp_test_ticket_lock(lck, gtid); 737 738 if (retval) { 739 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 740 std::memory_order_relaxed); 741 } 742 return retval; 743 } 744 745 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 746 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 747 std::memory_order_relaxed) - 748 std::atomic_load_explicit(&lck->lk.now_serving, 749 std::memory_order_relaxed); 750 751 ANNOTATE_TICKET_RELEASED(lck); 752 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 753 std::memory_order_release); 754 755 KMP_YIELD(distance > 756 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 757 return KMP_LOCK_RELEASED; 758 } 759 760 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 761 kmp_int32 gtid) { 762 char const *const func = "omp_unset_lock"; 763 764 if (!std::atomic_load_explicit(&lck->lk.initialized, 765 std::memory_order_relaxed)) { 766 KMP_FATAL(LockIsUninitialized, func); 767 } 768 if (lck->lk.self != lck) { 769 KMP_FATAL(LockIsUninitialized, func); 770 } 771 if (__kmp_is_ticket_lock_nestable(lck)) { 772 KMP_FATAL(LockNestableUsedAsSimple, func); 773 } 774 if (__kmp_get_ticket_lock_owner(lck) == -1) { 775 KMP_FATAL(LockUnsettingFree, func); 776 } 777 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 778 (__kmp_get_ticket_lock_owner(lck) != gtid)) { 779 KMP_FATAL(LockUnsettingSetByAnother, func); 780 } 781 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 782 return __kmp_release_ticket_lock(lck, gtid); 783 } 784 785 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 786 lck->lk.location = NULL; 787 lck->lk.self = lck; 788 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 789 std::memory_order_relaxed); 790 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 791 std::memory_order_relaxed); 792 std::atomic_store_explicit( 793 &lck->lk.owner_id, 0, 794 std::memory_order_relaxed); // no thread owns the lock. 795 std::atomic_store_explicit( 796 &lck->lk.depth_locked, -1, 797 std::memory_order_relaxed); // -1 => not a nested lock. 798 std::atomic_store_explicit(&lck->lk.initialized, true, 799 std::memory_order_release); 800 } 801 802 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 803 __kmp_init_ticket_lock(lck); 804 } 805 806 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 807 std::atomic_store_explicit(&lck->lk.initialized, false, 808 std::memory_order_release); 809 lck->lk.self = NULL; 810 lck->lk.location = NULL; 811 std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 812 std::memory_order_relaxed); 813 std::atomic_store_explicit(&lck->lk.now_serving, 0U, 814 std::memory_order_relaxed); 815 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 816 std::atomic_store_explicit(&lck->lk.depth_locked, -1, 817 std::memory_order_relaxed); 818 } 819 820 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 821 char const *const func = "omp_destroy_lock"; 822 823 if (!std::atomic_load_explicit(&lck->lk.initialized, 824 std::memory_order_relaxed)) { 825 KMP_FATAL(LockIsUninitialized, func); 826 } 827 if (lck->lk.self != lck) { 828 KMP_FATAL(LockIsUninitialized, func); 829 } 830 if (__kmp_is_ticket_lock_nestable(lck)) { 831 KMP_FATAL(LockNestableUsedAsSimple, func); 832 } 833 if (__kmp_get_ticket_lock_owner(lck) != -1) { 834 KMP_FATAL(LockStillOwned, func); 835 } 836 __kmp_destroy_ticket_lock(lck); 837 } 838 839 // nested ticket locks 840 841 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 842 KMP_DEBUG_ASSERT(gtid >= 0); 843 844 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 845 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 846 std::memory_order_relaxed); 847 return KMP_LOCK_ACQUIRED_NEXT; 848 } else { 849 __kmp_acquire_ticket_lock_timed_template(lck, gtid); 850 ANNOTATE_TICKET_ACQUIRED(lck); 851 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 852 std::memory_order_relaxed); 853 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 854 std::memory_order_relaxed); 855 return KMP_LOCK_ACQUIRED_FIRST; 856 } 857 } 858 859 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 860 kmp_int32 gtid) { 861 char const *const func = "omp_set_nest_lock"; 862 863 if (!std::atomic_load_explicit(&lck->lk.initialized, 864 std::memory_order_relaxed)) { 865 KMP_FATAL(LockIsUninitialized, func); 866 } 867 if (lck->lk.self != lck) { 868 KMP_FATAL(LockIsUninitialized, func); 869 } 870 if (!__kmp_is_ticket_lock_nestable(lck)) { 871 KMP_FATAL(LockSimpleUsedAsNestable, func); 872 } 873 return __kmp_acquire_nested_ticket_lock(lck, gtid); 874 } 875 876 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 877 int retval; 878 879 KMP_DEBUG_ASSERT(gtid >= 0); 880 881 if (__kmp_get_ticket_lock_owner(lck) == gtid) { 882 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 883 std::memory_order_relaxed) + 884 1; 885 } else if (!__kmp_test_ticket_lock(lck, gtid)) { 886 retval = 0; 887 } else { 888 std::atomic_store_explicit(&lck->lk.depth_locked, 1, 889 std::memory_order_relaxed); 890 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 891 std::memory_order_relaxed); 892 retval = 1; 893 } 894 return retval; 895 } 896 897 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 898 kmp_int32 gtid) { 899 char const *const func = "omp_test_nest_lock"; 900 901 if (!std::atomic_load_explicit(&lck->lk.initialized, 902 std::memory_order_relaxed)) { 903 KMP_FATAL(LockIsUninitialized, func); 904 } 905 if (lck->lk.self != lck) { 906 KMP_FATAL(LockIsUninitialized, func); 907 } 908 if (!__kmp_is_ticket_lock_nestable(lck)) { 909 KMP_FATAL(LockSimpleUsedAsNestable, func); 910 } 911 return __kmp_test_nested_ticket_lock(lck, gtid); 912 } 913 914 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 915 KMP_DEBUG_ASSERT(gtid >= 0); 916 917 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 918 std::memory_order_relaxed) - 919 1) == 0) { 920 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 921 __kmp_release_ticket_lock(lck, gtid); 922 return KMP_LOCK_RELEASED; 923 } 924 return KMP_LOCK_STILL_HELD; 925 } 926 927 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 928 kmp_int32 gtid) { 929 char const *const func = "omp_unset_nest_lock"; 930 931 if (!std::atomic_load_explicit(&lck->lk.initialized, 932 std::memory_order_relaxed)) { 933 KMP_FATAL(LockIsUninitialized, func); 934 } 935 if (lck->lk.self != lck) { 936 KMP_FATAL(LockIsUninitialized, func); 937 } 938 if (!__kmp_is_ticket_lock_nestable(lck)) { 939 KMP_FATAL(LockSimpleUsedAsNestable, func); 940 } 941 if (__kmp_get_ticket_lock_owner(lck) == -1) { 942 KMP_FATAL(LockUnsettingFree, func); 943 } 944 if (__kmp_get_ticket_lock_owner(lck) != gtid) { 945 KMP_FATAL(LockUnsettingSetByAnother, func); 946 } 947 return __kmp_release_nested_ticket_lock(lck, gtid); 948 } 949 950 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 951 __kmp_init_ticket_lock(lck); 952 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 953 std::memory_order_relaxed); // >= 0 for nestable 954 // locks, -1 for simple 955 // locks 956 } 957 958 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 959 __kmp_init_nested_ticket_lock(lck); 960 } 961 962 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 963 __kmp_destroy_ticket_lock(lck); 964 std::atomic_store_explicit(&lck->lk.depth_locked, 0, 965 std::memory_order_relaxed); 966 } 967 968 static void 969 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 970 char const *const func = "omp_destroy_nest_lock"; 971 972 if (!std::atomic_load_explicit(&lck->lk.initialized, 973 std::memory_order_relaxed)) { 974 KMP_FATAL(LockIsUninitialized, func); 975 } 976 if (lck->lk.self != lck) { 977 KMP_FATAL(LockIsUninitialized, func); 978 } 979 if (!__kmp_is_ticket_lock_nestable(lck)) { 980 KMP_FATAL(LockSimpleUsedAsNestable, func); 981 } 982 if (__kmp_get_ticket_lock_owner(lck) != -1) { 983 KMP_FATAL(LockStillOwned, func); 984 } 985 __kmp_destroy_nested_ticket_lock(lck); 986 } 987 988 // access functions to fields which don't exist for all lock kinds. 989 990 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 991 return std::atomic_load_explicit(&lck->lk.initialized, 992 std::memory_order_relaxed) && 993 (lck->lk.self == lck); 994 } 995 996 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 997 return lck->lk.location; 998 } 999 1000 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 1001 const ident_t *loc) { 1002 lck->lk.location = loc; 1003 } 1004 1005 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 1006 return lck->lk.flags; 1007 } 1008 1009 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 1010 kmp_lock_flags_t flags) { 1011 lck->lk.flags = flags; 1012 } 1013 1014 /* ------------------------------------------------------------------------ */ 1015 /* queuing locks */ 1016 1017 /* First the states 1018 (head,tail) = 0, 0 means lock is unheld, nobody on queue 1019 UINT_MAX or -1, 0 means lock is held, nobody on queue 1020 h, h means lock held or about to transition, 1021 1 element on queue 1022 h, t h <> t, means lock is held or about to 1023 transition, >1 elements on queue 1024 1025 Now the transitions 1026 Acquire(0,0) = -1 ,0 1027 Release(0,0) = Error 1028 Acquire(-1,0) = h ,h h > 0 1029 Release(-1,0) = 0 ,0 1030 Acquire(h,h) = h ,t h > 0, t > 0, h <> t 1031 Release(h,h) = -1 ,0 h > 0 1032 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 1033 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 1034 1035 And pictorially 1036 1037 +-----+ 1038 | 0, 0|------- release -------> Error 1039 +-----+ 1040 | ^ 1041 acquire| |release 1042 | | 1043 | | 1044 v | 1045 +-----+ 1046 |-1, 0| 1047 +-----+ 1048 | ^ 1049 acquire| |release 1050 | | 1051 | | 1052 v | 1053 +-----+ 1054 | h, h| 1055 +-----+ 1056 | ^ 1057 acquire| |release 1058 | | 1059 | | 1060 v | 1061 +-----+ 1062 | h, t|----- acquire, release loopback ---+ 1063 +-----+ | 1064 ^ | 1065 | | 1066 +------------------------------------+ 1067 */ 1068 1069 #ifdef DEBUG_QUEUING_LOCKS 1070 1071 /* Stuff for circular trace buffer */ 1072 #define TRACE_BUF_ELE 1024 1073 static char traces[TRACE_BUF_ELE][128] = {0}; 1074 static int tc = 0; 1075 #define TRACE_LOCK(X, Y) \ 1076 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1077 #define TRACE_LOCK_T(X, Y, Z) \ 1078 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1079 #define TRACE_LOCK_HT(X, Y, Z, Q) \ 1080 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1081 Z, Q); 1082 1083 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1084 kmp_queuing_lock_t *lck, kmp_int32 head_id, 1085 kmp_int32 tail_id) { 1086 kmp_int32 t, i; 1087 1088 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1089 1090 i = tc % TRACE_BUF_ELE; 1091 __kmp_printf_no_lock("%s\n", traces[i]); 1092 i = (i + 1) % TRACE_BUF_ELE; 1093 while (i != (tc % TRACE_BUF_ELE)) { 1094 __kmp_printf_no_lock("%s", traces[i]); 1095 i = (i + 1) % TRACE_BUF_ELE; 1096 } 1097 __kmp_printf_no_lock("\n"); 1098 1099 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1100 "next_wait:%d, head_id:%d, tail_id:%d\n", 1101 gtid + 1, this_thr->th.th_spin_here, 1102 this_thr->th.th_next_waiting, head_id, tail_id); 1103 1104 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1105 1106 if (lck->lk.head_id >= 1) { 1107 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1108 while (t > 0) { 1109 __kmp_printf_no_lock("-> %d ", t); 1110 t = __kmp_threads[t - 1]->th.th_next_waiting; 1111 } 1112 } 1113 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1114 __kmp_printf_no_lock("\n\n"); 1115 } 1116 1117 #endif /* DEBUG_QUEUING_LOCKS */ 1118 1119 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1120 return TCR_4(lck->lk.owner_id) - 1; 1121 } 1122 1123 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1124 return lck->lk.depth_locked != -1; 1125 } 1126 1127 /* Acquire a lock using a the queuing lock implementation */ 1128 template <bool takeTime> 1129 /* [TLW] The unused template above is left behind because of what BEB believes 1130 is a potential compiler problem with __forceinline. */ 1131 __forceinline static int 1132 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1133 kmp_int32 gtid) { 1134 register kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1135 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1136 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1137 volatile kmp_uint32 *spin_here_p; 1138 kmp_int32 need_mf = 1; 1139 1140 #if OMPT_SUPPORT 1141 ompt_state_t prev_state = ompt_state_undefined; 1142 #endif 1143 1144 KA_TRACE(1000, 1145 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1146 1147 KMP_FSYNC_PREPARE(lck); 1148 KMP_DEBUG_ASSERT(this_thr != NULL); 1149 spin_here_p = &this_thr->th.th_spin_here; 1150 1151 #ifdef DEBUG_QUEUING_LOCKS 1152 TRACE_LOCK(gtid + 1, "acq ent"); 1153 if (*spin_here_p) 1154 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1155 if (this_thr->th.th_next_waiting != 0) 1156 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1157 #endif 1158 KMP_DEBUG_ASSERT(!*spin_here_p); 1159 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1160 1161 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1162 head_id_p that may follow, not just in execution order, but also in 1163 visibility order. This way, when a releasing thread observes the changes to 1164 the queue by this thread, it can rightly assume that spin_here_p has 1165 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1166 not premature. If the releasing thread sets spin_here_p to FALSE before 1167 this thread sets it to TRUE, this thread will hang. */ 1168 *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1169 1170 while (1) { 1171 kmp_int32 enqueued; 1172 kmp_int32 head; 1173 kmp_int32 tail; 1174 1175 head = *head_id_p; 1176 1177 switch (head) { 1178 1179 case -1: { 1180 #ifdef DEBUG_QUEUING_LOCKS 1181 tail = *tail_id_p; 1182 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1183 #endif 1184 tail = 0; /* to make sure next link asynchronously read is not set 1185 accidentally; this assignment prevents us from entering the 1186 if ( t > 0 ) condition in the enqueued case below, which is not 1187 necessary for this state transition */ 1188 1189 need_mf = 0; 1190 /* try (-1,0)->(tid,tid) */ 1191 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1192 KMP_PACK_64(-1, 0), 1193 KMP_PACK_64(gtid + 1, gtid + 1)); 1194 #ifdef DEBUG_QUEUING_LOCKS 1195 if (enqueued) 1196 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1197 #endif 1198 } break; 1199 1200 default: { 1201 tail = *tail_id_p; 1202 KMP_DEBUG_ASSERT(tail != gtid + 1); 1203 1204 #ifdef DEBUG_QUEUING_LOCKS 1205 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1206 #endif 1207 1208 if (tail == 0) { 1209 enqueued = FALSE; 1210 } else { 1211 need_mf = 0; 1212 /* try (h,t) or (h,h)->(h,tid) */ 1213 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1214 1215 #ifdef DEBUG_QUEUING_LOCKS 1216 if (enqueued) 1217 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1218 #endif 1219 } 1220 } break; 1221 1222 case 0: /* empty queue */ 1223 { 1224 kmp_int32 grabbed_lock; 1225 1226 #ifdef DEBUG_QUEUING_LOCKS 1227 tail = *tail_id_p; 1228 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1229 #endif 1230 /* try (0,0)->(-1,0) */ 1231 1232 /* only legal transition out of head = 0 is head = -1 with no change to 1233 * tail */ 1234 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1235 1236 if (grabbed_lock) { 1237 1238 *spin_here_p = FALSE; 1239 1240 KA_TRACE( 1241 1000, 1242 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1243 lck, gtid)); 1244 #ifdef DEBUG_QUEUING_LOCKS 1245 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1246 #endif 1247 1248 #if OMPT_SUPPORT 1249 if (ompt_enabled && prev_state != ompt_state_undefined) { 1250 /* change the state before clearing wait_id */ 1251 this_thr->th.ompt_thread_info.state = prev_state; 1252 this_thr->th.ompt_thread_info.wait_id = 0; 1253 } 1254 #endif 1255 1256 KMP_FSYNC_ACQUIRED(lck); 1257 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1258 } 1259 enqueued = FALSE; 1260 } break; 1261 } 1262 1263 #if OMPT_SUPPORT 1264 if (ompt_enabled && prev_state == ompt_state_undefined) { 1265 /* this thread will spin; set wait_id before entering wait state */ 1266 prev_state = this_thr->th.ompt_thread_info.state; 1267 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1268 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1269 } 1270 #endif 1271 1272 if (enqueued) { 1273 if (tail > 0) { 1274 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1275 KMP_ASSERT(tail_thr != NULL); 1276 tail_thr->th.th_next_waiting = gtid + 1; 1277 /* corresponding wait for this write in release code */ 1278 } 1279 KA_TRACE(1000, 1280 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1281 lck, gtid)); 1282 1283 /* ToDo: May want to consider using __kmp_wait_sleep or something that 1284 sleeps for throughput only here. */ 1285 KMP_MB(); 1286 KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); 1287 1288 #ifdef DEBUG_QUEUING_LOCKS 1289 TRACE_LOCK(gtid + 1, "acq spin"); 1290 1291 if (this_thr->th.th_next_waiting != 0) 1292 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1293 #endif 1294 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1295 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1296 "waiting on queue\n", 1297 lck, gtid)); 1298 1299 #ifdef DEBUG_QUEUING_LOCKS 1300 TRACE_LOCK(gtid + 1, "acq exit 2"); 1301 #endif 1302 1303 #if OMPT_SUPPORT 1304 /* change the state before clearing wait_id */ 1305 this_thr->th.ompt_thread_info.state = prev_state; 1306 this_thr->th.ompt_thread_info.wait_id = 0; 1307 #endif 1308 1309 /* got lock, we were dequeued by the thread that released lock */ 1310 return KMP_LOCK_ACQUIRED_FIRST; 1311 } 1312 1313 /* Yield if number of threads > number of logical processors */ 1314 /* ToDo: Not sure why this should only be in oversubscription case, 1315 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1316 KMP_YIELD(TCR_4(__kmp_nth) > 1317 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 1318 #ifdef DEBUG_QUEUING_LOCKS 1319 TRACE_LOCK(gtid + 1, "acq retry"); 1320 #endif 1321 } 1322 KMP_ASSERT2(0, "should not get here"); 1323 return KMP_LOCK_ACQUIRED_FIRST; 1324 } 1325 1326 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1327 KMP_DEBUG_ASSERT(gtid >= 0); 1328 1329 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1330 ANNOTATE_QUEUING_ACQUIRED(lck); 1331 return retval; 1332 } 1333 1334 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1335 kmp_int32 gtid) { 1336 char const *const func = "omp_set_lock"; 1337 if (lck->lk.initialized != lck) { 1338 KMP_FATAL(LockIsUninitialized, func); 1339 } 1340 if (__kmp_is_queuing_lock_nestable(lck)) { 1341 KMP_FATAL(LockNestableUsedAsSimple, func); 1342 } 1343 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1344 KMP_FATAL(LockIsAlreadyOwned, func); 1345 } 1346 1347 __kmp_acquire_queuing_lock(lck, gtid); 1348 1349 lck->lk.owner_id = gtid + 1; 1350 return KMP_LOCK_ACQUIRED_FIRST; 1351 } 1352 1353 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1354 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1355 kmp_int32 head; 1356 #ifdef KMP_DEBUG 1357 kmp_info_t *this_thr; 1358 #endif 1359 1360 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1361 KMP_DEBUG_ASSERT(gtid >= 0); 1362 #ifdef KMP_DEBUG 1363 this_thr = __kmp_thread_from_gtid(gtid); 1364 KMP_DEBUG_ASSERT(this_thr != NULL); 1365 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1366 #endif 1367 1368 head = *head_id_p; 1369 1370 if (head == 0) { /* nobody on queue, nobody holding */ 1371 /* try (0,0)->(-1,0) */ 1372 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1373 KA_TRACE(1000, 1374 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1375 KMP_FSYNC_ACQUIRED(lck); 1376 ANNOTATE_QUEUING_ACQUIRED(lck); 1377 return TRUE; 1378 } 1379 } 1380 1381 KA_TRACE(1000, 1382 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1383 return FALSE; 1384 } 1385 1386 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1387 kmp_int32 gtid) { 1388 char const *const func = "omp_test_lock"; 1389 if (lck->lk.initialized != lck) { 1390 KMP_FATAL(LockIsUninitialized, func); 1391 } 1392 if (__kmp_is_queuing_lock_nestable(lck)) { 1393 KMP_FATAL(LockNestableUsedAsSimple, func); 1394 } 1395 1396 int retval = __kmp_test_queuing_lock(lck, gtid); 1397 1398 if (retval) { 1399 lck->lk.owner_id = gtid + 1; 1400 } 1401 return retval; 1402 } 1403 1404 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1405 register kmp_info_t *this_thr; 1406 volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1407 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1408 1409 KA_TRACE(1000, 1410 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1411 KMP_DEBUG_ASSERT(gtid >= 0); 1412 this_thr = __kmp_thread_from_gtid(gtid); 1413 KMP_DEBUG_ASSERT(this_thr != NULL); 1414 #ifdef DEBUG_QUEUING_LOCKS 1415 TRACE_LOCK(gtid + 1, "rel ent"); 1416 1417 if (this_thr->th.th_spin_here) 1418 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1419 if (this_thr->th.th_next_waiting != 0) 1420 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1421 #endif 1422 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1423 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1424 1425 KMP_FSYNC_RELEASING(lck); 1426 ANNOTATE_QUEUING_RELEASED(lck); 1427 1428 while (1) { 1429 kmp_int32 dequeued; 1430 kmp_int32 head; 1431 kmp_int32 tail; 1432 1433 head = *head_id_p; 1434 1435 #ifdef DEBUG_QUEUING_LOCKS 1436 tail = *tail_id_p; 1437 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1438 if (head == 0) 1439 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1440 #endif 1441 KMP_DEBUG_ASSERT(head != 1442 0); /* holding the lock, head must be -1 or queue head */ 1443 1444 if (head == -1) { /* nobody on queue */ 1445 /* try (-1,0)->(0,0) */ 1446 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1447 KA_TRACE( 1448 1000, 1449 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1450 lck, gtid)); 1451 #ifdef DEBUG_QUEUING_LOCKS 1452 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1453 #endif 1454 1455 #if OMPT_SUPPORT 1456 /* nothing to do - no other thread is trying to shift blame */ 1457 #endif 1458 return KMP_LOCK_RELEASED; 1459 } 1460 dequeued = FALSE; 1461 } else { 1462 tail = *tail_id_p; 1463 if (head == tail) { /* only one thread on the queue */ 1464 #ifdef DEBUG_QUEUING_LOCKS 1465 if (head <= 0) 1466 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1467 #endif 1468 KMP_DEBUG_ASSERT(head > 0); 1469 1470 /* try (h,h)->(-1,0) */ 1471 dequeued = KMP_COMPARE_AND_STORE_REL64((kmp_int64 *)tail_id_p, 1472 KMP_PACK_64(head, head), 1473 KMP_PACK_64(-1, 0)); 1474 #ifdef DEBUG_QUEUING_LOCKS 1475 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1476 #endif 1477 1478 } else { 1479 volatile kmp_int32 *waiting_id_p; 1480 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1481 KMP_DEBUG_ASSERT(head_thr != NULL); 1482 waiting_id_p = &head_thr->th.th_next_waiting; 1483 1484 /* Does this require synchronous reads? */ 1485 #ifdef DEBUG_QUEUING_LOCKS 1486 if (head <= 0 || tail <= 0) 1487 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1488 #endif 1489 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1490 1491 /* try (h,t)->(h',t) or (t,t) */ 1492 KMP_MB(); 1493 /* make sure enqueuing thread has time to update next waiting thread 1494 * field */ 1495 *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, 1496 KMP_NEQ, NULL); 1497 #ifdef DEBUG_QUEUING_LOCKS 1498 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1499 #endif 1500 dequeued = TRUE; 1501 } 1502 } 1503 1504 if (dequeued) { 1505 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1506 KMP_DEBUG_ASSERT(head_thr != NULL); 1507 1508 /* Does this require synchronous reads? */ 1509 #ifdef DEBUG_QUEUING_LOCKS 1510 if (head <= 0 || tail <= 0) 1511 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1512 #endif 1513 KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1514 1515 /* For clean code only. Thread not released until next statement prevents 1516 race with acquire code. */ 1517 head_thr->th.th_next_waiting = 0; 1518 #ifdef DEBUG_QUEUING_LOCKS 1519 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1520 #endif 1521 1522 KMP_MB(); 1523 /* reset spin value */ 1524 head_thr->th.th_spin_here = FALSE; 1525 1526 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1527 "dequeuing\n", 1528 lck, gtid)); 1529 #ifdef DEBUG_QUEUING_LOCKS 1530 TRACE_LOCK(gtid + 1, "rel exit 2"); 1531 #endif 1532 return KMP_LOCK_RELEASED; 1533 } 1534 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1535 threads */ 1536 1537 #ifdef DEBUG_QUEUING_LOCKS 1538 TRACE_LOCK(gtid + 1, "rel retry"); 1539 #endif 1540 1541 } /* while */ 1542 KMP_ASSERT2(0, "should not get here"); 1543 return KMP_LOCK_RELEASED; 1544 } 1545 1546 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1547 kmp_int32 gtid) { 1548 char const *const func = "omp_unset_lock"; 1549 KMP_MB(); /* in case another processor initialized lock */ 1550 if (lck->lk.initialized != lck) { 1551 KMP_FATAL(LockIsUninitialized, func); 1552 } 1553 if (__kmp_is_queuing_lock_nestable(lck)) { 1554 KMP_FATAL(LockNestableUsedAsSimple, func); 1555 } 1556 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1557 KMP_FATAL(LockUnsettingFree, func); 1558 } 1559 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1560 KMP_FATAL(LockUnsettingSetByAnother, func); 1561 } 1562 lck->lk.owner_id = 0; 1563 return __kmp_release_queuing_lock(lck, gtid); 1564 } 1565 1566 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1567 lck->lk.location = NULL; 1568 lck->lk.head_id = 0; 1569 lck->lk.tail_id = 0; 1570 lck->lk.next_ticket = 0; 1571 lck->lk.now_serving = 0; 1572 lck->lk.owner_id = 0; // no thread owns the lock. 1573 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1574 lck->lk.initialized = lck; 1575 1576 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1577 } 1578 1579 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1580 __kmp_init_queuing_lock(lck); 1581 } 1582 1583 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1584 lck->lk.initialized = NULL; 1585 lck->lk.location = NULL; 1586 lck->lk.head_id = 0; 1587 lck->lk.tail_id = 0; 1588 lck->lk.next_ticket = 0; 1589 lck->lk.now_serving = 0; 1590 lck->lk.owner_id = 0; 1591 lck->lk.depth_locked = -1; 1592 } 1593 1594 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1595 char const *const func = "omp_destroy_lock"; 1596 if (lck->lk.initialized != lck) { 1597 KMP_FATAL(LockIsUninitialized, func); 1598 } 1599 if (__kmp_is_queuing_lock_nestable(lck)) { 1600 KMP_FATAL(LockNestableUsedAsSimple, func); 1601 } 1602 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1603 KMP_FATAL(LockStillOwned, func); 1604 } 1605 __kmp_destroy_queuing_lock(lck); 1606 } 1607 1608 // nested queuing locks 1609 1610 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1611 KMP_DEBUG_ASSERT(gtid >= 0); 1612 1613 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1614 lck->lk.depth_locked += 1; 1615 return KMP_LOCK_ACQUIRED_NEXT; 1616 } else { 1617 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1618 ANNOTATE_QUEUING_ACQUIRED(lck); 1619 KMP_MB(); 1620 lck->lk.depth_locked = 1; 1621 KMP_MB(); 1622 lck->lk.owner_id = gtid + 1; 1623 return KMP_LOCK_ACQUIRED_FIRST; 1624 } 1625 } 1626 1627 static int 1628 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1629 kmp_int32 gtid) { 1630 char const *const func = "omp_set_nest_lock"; 1631 if (lck->lk.initialized != lck) { 1632 KMP_FATAL(LockIsUninitialized, func); 1633 } 1634 if (!__kmp_is_queuing_lock_nestable(lck)) { 1635 KMP_FATAL(LockSimpleUsedAsNestable, func); 1636 } 1637 return __kmp_acquire_nested_queuing_lock(lck, gtid); 1638 } 1639 1640 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1641 int retval; 1642 1643 KMP_DEBUG_ASSERT(gtid >= 0); 1644 1645 if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1646 retval = ++lck->lk.depth_locked; 1647 } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1648 retval = 0; 1649 } else { 1650 KMP_MB(); 1651 retval = lck->lk.depth_locked = 1; 1652 KMP_MB(); 1653 lck->lk.owner_id = gtid + 1; 1654 } 1655 return retval; 1656 } 1657 1658 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1659 kmp_int32 gtid) { 1660 char const *const func = "omp_test_nest_lock"; 1661 if (lck->lk.initialized != lck) { 1662 KMP_FATAL(LockIsUninitialized, func); 1663 } 1664 if (!__kmp_is_queuing_lock_nestable(lck)) { 1665 KMP_FATAL(LockSimpleUsedAsNestable, func); 1666 } 1667 return __kmp_test_nested_queuing_lock(lck, gtid); 1668 } 1669 1670 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1671 KMP_DEBUG_ASSERT(gtid >= 0); 1672 1673 KMP_MB(); 1674 if (--(lck->lk.depth_locked) == 0) { 1675 KMP_MB(); 1676 lck->lk.owner_id = 0; 1677 __kmp_release_queuing_lock(lck, gtid); 1678 return KMP_LOCK_RELEASED; 1679 } 1680 return KMP_LOCK_STILL_HELD; 1681 } 1682 1683 static int 1684 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1685 kmp_int32 gtid) { 1686 char const *const func = "omp_unset_nest_lock"; 1687 KMP_MB(); /* in case another processor initialized lock */ 1688 if (lck->lk.initialized != lck) { 1689 KMP_FATAL(LockIsUninitialized, func); 1690 } 1691 if (!__kmp_is_queuing_lock_nestable(lck)) { 1692 KMP_FATAL(LockSimpleUsedAsNestable, func); 1693 } 1694 if (__kmp_get_queuing_lock_owner(lck) == -1) { 1695 KMP_FATAL(LockUnsettingFree, func); 1696 } 1697 if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1698 KMP_FATAL(LockUnsettingSetByAnother, func); 1699 } 1700 return __kmp_release_nested_queuing_lock(lck, gtid); 1701 } 1702 1703 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1704 __kmp_init_queuing_lock(lck); 1705 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1706 } 1707 1708 static void 1709 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1710 __kmp_init_nested_queuing_lock(lck); 1711 } 1712 1713 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1714 __kmp_destroy_queuing_lock(lck); 1715 lck->lk.depth_locked = 0; 1716 } 1717 1718 static void 1719 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1720 char const *const func = "omp_destroy_nest_lock"; 1721 if (lck->lk.initialized != lck) { 1722 KMP_FATAL(LockIsUninitialized, func); 1723 } 1724 if (!__kmp_is_queuing_lock_nestable(lck)) { 1725 KMP_FATAL(LockSimpleUsedAsNestable, func); 1726 } 1727 if (__kmp_get_queuing_lock_owner(lck) != -1) { 1728 KMP_FATAL(LockStillOwned, func); 1729 } 1730 __kmp_destroy_nested_queuing_lock(lck); 1731 } 1732 1733 // access functions to fields which don't exist for all lock kinds. 1734 1735 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 1736 return lck == lck->lk.initialized; 1737 } 1738 1739 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1740 return lck->lk.location; 1741 } 1742 1743 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1744 const ident_t *loc) { 1745 lck->lk.location = loc; 1746 } 1747 1748 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1749 return lck->lk.flags; 1750 } 1751 1752 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1753 kmp_lock_flags_t flags) { 1754 lck->lk.flags = flags; 1755 } 1756 1757 #if KMP_USE_ADAPTIVE_LOCKS 1758 1759 /* RTM Adaptive locks */ 1760 1761 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1762 1763 #include <immintrin.h> 1764 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1765 1766 #else 1767 1768 // Values from the status register after failed speculation. 1769 #define _XBEGIN_STARTED (~0u) 1770 #define _XABORT_EXPLICIT (1 << 0) 1771 #define _XABORT_RETRY (1 << 1) 1772 #define _XABORT_CONFLICT (1 << 2) 1773 #define _XABORT_CAPACITY (1 << 3) 1774 #define _XABORT_DEBUG (1 << 4) 1775 #define _XABORT_NESTED (1 << 5) 1776 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1777 1778 // Aborts for which it's worth trying again immediately 1779 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1780 1781 #define STRINGIZE_INTERNAL(arg) #arg 1782 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1783 1784 // Access to RTM instructions 1785 /*A version of XBegin which returns -1 on speculation, and the value of EAX on 1786 an abort. This is the same definition as the compiler intrinsic that will be 1787 supported at some point. */ 1788 static __inline int _xbegin() { 1789 int res = -1; 1790 1791 #if KMP_OS_WINDOWS 1792 #if KMP_ARCH_X86_64 1793 _asm { 1794 _emit 0xC7 1795 _emit 0xF8 1796 _emit 2 1797 _emit 0 1798 _emit 0 1799 _emit 0 1800 jmp L2 1801 mov res, eax 1802 L2: 1803 } 1804 #else /* IA32 */ 1805 _asm { 1806 _emit 0xC7 1807 _emit 0xF8 1808 _emit 2 1809 _emit 0 1810 _emit 0 1811 _emit 0 1812 jmp L2 1813 mov res, eax 1814 L2: 1815 } 1816 #endif // KMP_ARCH_X86_64 1817 #else 1818 /* Note that %eax must be noted as killed (clobbered), because the XSR is 1819 returned in %eax(%rax) on abort. Other register values are restored, so 1820 don't need to be killed. 1821 1822 We must also mark 'res' as an input and an output, since otherwise 1823 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1824 the successful (i.e., non-abort) path. */ 1825 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1826 " .long 1f-1b-6\n" 1827 " jmp 2f\n" 1828 "1: movl %%eax,%0\n" 1829 "2:" 1830 : "+r"(res)::"memory", "%eax"); 1831 #endif // KMP_OS_WINDOWS 1832 return res; 1833 } 1834 1835 /* Transaction end */ 1836 static __inline void _xend() { 1837 #if KMP_OS_WINDOWS 1838 __asm { 1839 _emit 0x0f 1840 _emit 0x01 1841 _emit 0xd5 1842 } 1843 #else 1844 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1845 #endif 1846 } 1847 1848 /* This is a macro, the argument must be a single byte constant which can be 1849 evaluated by the inline assembler, since it is emitted as a byte into the 1850 assembly code. */ 1851 // clang-format off 1852 #if KMP_OS_WINDOWS 1853 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1854 #else 1855 #define _xabort(ARG) \ 1856 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1857 #endif 1858 // clang-format on 1859 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1860 1861 // Statistics is collected for testing purpose 1862 #if KMP_DEBUG_ADAPTIVE_LOCKS 1863 1864 // We accumulate speculative lock statistics when the lock is destroyed. We 1865 // keep locks that haven't been destroyed in the liveLocks list so that we can 1866 // grab their statistics too. 1867 static kmp_adaptive_lock_statistics_t destroyedStats; 1868 1869 // To hold the list of live locks. 1870 static kmp_adaptive_lock_info_t liveLocks; 1871 1872 // A lock so we can safely update the list of locks. 1873 static kmp_bootstrap_lock_t chain_lock; 1874 1875 // Initialize the list of stats. 1876 void __kmp_init_speculative_stats() { 1877 kmp_adaptive_lock_info_t *lck = &liveLocks; 1878 1879 memset((void *)&(lck->stats), 0, sizeof(lck->stats)); 1880 lck->stats.next = lck; 1881 lck->stats.prev = lck; 1882 1883 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1884 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1885 1886 __kmp_init_bootstrap_lock(&chain_lock); 1887 } 1888 1889 // Insert the lock into the circular list 1890 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1891 __kmp_acquire_bootstrap_lock(&chain_lock); 1892 1893 lck->stats.next = liveLocks.stats.next; 1894 lck->stats.prev = &liveLocks; 1895 1896 liveLocks.stats.next = lck; 1897 lck->stats.next->stats.prev = lck; 1898 1899 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1900 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1901 1902 __kmp_release_bootstrap_lock(&chain_lock); 1903 } 1904 1905 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1906 KMP_ASSERT(lck->stats.next->stats.prev == lck); 1907 KMP_ASSERT(lck->stats.prev->stats.next == lck); 1908 1909 kmp_adaptive_lock_info_t *n = lck->stats.next; 1910 kmp_adaptive_lock_info_t *p = lck->stats.prev; 1911 1912 n->stats.prev = p; 1913 p->stats.next = n; 1914 } 1915 1916 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1917 memset((void *)&lck->stats, 0, sizeof(lck->stats)); 1918 __kmp_remember_lock(lck); 1919 } 1920 1921 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1922 kmp_adaptive_lock_info_t *lck) { 1923 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1924 1925 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1926 t->successfulSpeculations += s->successfulSpeculations; 1927 t->hardFailedSpeculations += s->hardFailedSpeculations; 1928 t->softFailedSpeculations += s->softFailedSpeculations; 1929 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1930 t->lemmingYields += s->lemmingYields; 1931 } 1932 1933 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1934 kmp_adaptive_lock_statistics_t *t = &destroyedStats; 1935 1936 __kmp_acquire_bootstrap_lock(&chain_lock); 1937 1938 __kmp_add_stats(&destroyedStats, lck); 1939 __kmp_forget_lock(lck); 1940 1941 __kmp_release_bootstrap_lock(&chain_lock); 1942 } 1943 1944 static float percent(kmp_uint32 count, kmp_uint32 total) { 1945 return (total == 0) ? 0.0 : (100.0 * count) / total; 1946 } 1947 1948 static FILE *__kmp_open_stats_file() { 1949 if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1950 return stdout; 1951 1952 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1953 char buffer[buffLen]; 1954 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1955 (kmp_int32)getpid()); 1956 FILE *result = fopen(&buffer[0], "w"); 1957 1958 // Maybe we should issue a warning here... 1959 return result ? result : stdout; 1960 } 1961 1962 void __kmp_print_speculative_stats() { 1963 if (__kmp_user_lock_kind != lk_adaptive) 1964 return; 1965 1966 FILE *statsFile = __kmp_open_stats_file(); 1967 1968 kmp_adaptive_lock_statistics_t total = destroyedStats; 1969 kmp_adaptive_lock_info_t *lck; 1970 1971 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1972 __kmp_add_stats(&total, lck); 1973 } 1974 kmp_adaptive_lock_statistics_t *t = &total; 1975 kmp_uint32 totalSections = 1976 t->nonSpeculativeAcquires + t->successfulSpeculations; 1977 kmp_uint32 totalSpeculations = t->successfulSpeculations + 1978 t->hardFailedSpeculations + 1979 t->softFailedSpeculations; 1980 1981 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1982 fprintf(statsFile, " Lock parameters: \n" 1983 " max_soft_retries : %10d\n" 1984 " max_badness : %10d\n", 1985 __kmp_adaptive_backoff_params.max_soft_retries, 1986 __kmp_adaptive_backoff_params.max_badness); 1987 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1988 t->nonSpeculativeAcquireAttempts); 1989 fprintf(statsFile, " Total critical sections : %10d\n", 1990 totalSections); 1991 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1992 t->successfulSpeculations, 1993 percent(t->successfulSpeculations, totalSections)); 1994 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1995 t->nonSpeculativeAcquires, 1996 percent(t->nonSpeculativeAcquires, totalSections)); 1997 fprintf(statsFile, " Lemming yields : %10d\n\n", 1998 t->lemmingYields); 1999 2000 fprintf(statsFile, " Speculative acquire attempts : %10d\n", 2001 totalSpeculations); 2002 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 2003 t->successfulSpeculations, 2004 percent(t->successfulSpeculations, totalSpeculations)); 2005 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 2006 t->softFailedSpeculations, 2007 percent(t->softFailedSpeculations, totalSpeculations)); 2008 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 2009 t->hardFailedSpeculations, 2010 percent(t->hardFailedSpeculations, totalSpeculations)); 2011 2012 if (statsFile != stdout) 2013 fclose(statsFile); 2014 } 2015 2016 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 2017 #else 2018 #define KMP_INC_STAT(lck, stat) 2019 2020 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 2021 2022 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 2023 // It is enough to check that the head_id is zero. 2024 // We don't also need to check the tail. 2025 bool res = lck->lk.head_id == 0; 2026 2027 // We need a fence here, since we must ensure that no memory operations 2028 // from later in this thread float above that read. 2029 #if KMP_COMPILER_ICC 2030 _mm_mfence(); 2031 #else 2032 __sync_synchronize(); 2033 #endif 2034 2035 return res; 2036 } 2037 2038 // Functions for manipulating the badness 2039 static __inline void 2040 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 2041 // Reset the badness to zero so we eagerly try to speculate again 2042 lck->lk.adaptive.badness = 0; 2043 KMP_INC_STAT(lck, successfulSpeculations); 2044 } 2045 2046 // Create a bit mask with one more set bit. 2047 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 2048 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 2049 if (newBadness > lck->lk.adaptive.max_badness) { 2050 return; 2051 } else { 2052 lck->lk.adaptive.badness = newBadness; 2053 } 2054 } 2055 2056 // Check whether speculation should be attempted. 2057 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2058 kmp_int32 gtid) { 2059 kmp_uint32 badness = lck->lk.adaptive.badness; 2060 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2061 int res = (attempts & badness) == 0; 2062 return res; 2063 } 2064 2065 // Attempt to acquire only the speculative lock. 2066 // Does not back off to the non-speculative lock. 2067 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2068 kmp_int32 gtid) { 2069 int retries = lck->lk.adaptive.max_soft_retries; 2070 2071 // We don't explicitly count the start of speculation, rather we record the 2072 // results (success, hard fail, soft fail). The sum of all of those is the 2073 // total number of times we started speculation since all speculations must 2074 // end one of those ways. 2075 do { 2076 kmp_uint32 status = _xbegin(); 2077 // Switch this in to disable actual speculation but exercise at least some 2078 // of the rest of the code. Useful for debugging... 2079 // kmp_uint32 status = _XABORT_NESTED; 2080 2081 if (status == _XBEGIN_STARTED) { 2082 /* We have successfully started speculation. Check that no-one acquired 2083 the lock for real between when we last looked and now. This also gets 2084 the lock cache line into our read-set, which we need so that we'll 2085 abort if anyone later claims it for real. */ 2086 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2087 // Lock is now visibly acquired, so someone beat us to it. Abort the 2088 // transaction so we'll restart from _xbegin with the failure status. 2089 _xabort(0x01); 2090 KMP_ASSERT2(0, "should not get here"); 2091 } 2092 return 1; // Lock has been acquired (speculatively) 2093 } else { 2094 // We have aborted, update the statistics 2095 if (status & SOFT_ABORT_MASK) { 2096 KMP_INC_STAT(lck, softFailedSpeculations); 2097 // and loop round to retry. 2098 } else { 2099 KMP_INC_STAT(lck, hardFailedSpeculations); 2100 // Give up if we had a hard failure. 2101 break; 2102 } 2103 } 2104 } while (retries--); // Loop while we have retries, and didn't fail hard. 2105 2106 // Either we had a hard failure or we didn't succeed softly after 2107 // the full set of attempts, so back off the badness. 2108 __kmp_step_badness(lck); 2109 return 0; 2110 } 2111 2112 // Attempt to acquire the speculative lock, or back off to the non-speculative 2113 // one if the speculative lock cannot be acquired. 2114 // We can succeed speculatively, non-speculatively, or fail. 2115 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2116 // First try to acquire the lock speculatively 2117 if (__kmp_should_speculate(lck, gtid) && 2118 __kmp_test_adaptive_lock_only(lck, gtid)) 2119 return 1; 2120 2121 // Speculative acquisition failed, so try to acquire it non-speculatively. 2122 // Count the non-speculative acquire attempt 2123 lck->lk.adaptive.acquire_attempts++; 2124 2125 // Use base, non-speculative lock. 2126 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2127 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2128 return 1; // Lock is acquired (non-speculatively) 2129 } else { 2130 return 0; // Failed to acquire the lock, it's already visibly locked. 2131 } 2132 } 2133 2134 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2135 kmp_int32 gtid) { 2136 char const *const func = "omp_test_lock"; 2137 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2138 KMP_FATAL(LockIsUninitialized, func); 2139 } 2140 2141 int retval = __kmp_test_adaptive_lock(lck, gtid); 2142 2143 if (retval) { 2144 lck->lk.qlk.owner_id = gtid + 1; 2145 } 2146 return retval; 2147 } 2148 2149 // Block until we can acquire a speculative, adaptive lock. We check whether we 2150 // should be trying to speculate. If we should be, we check the real lock to see 2151 // if it is free, and, if not, pause without attempting to acquire it until it 2152 // is. Then we try the speculative acquire. This means that although we suffer 2153 // from lemmings a little (because all we can't acquire the lock speculatively 2154 // until the queue of threads waiting has cleared), we don't get into a state 2155 // where we can never acquire the lock speculatively (because we force the queue 2156 // to clear by preventing new arrivals from entering the queue). This does mean 2157 // that when we're trying to break lemmings, the lock is no longer fair. However 2158 // OpenMP makes no guarantee that its locks are fair, so this isn't a real 2159 // problem. 2160 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2161 kmp_int32 gtid) { 2162 if (__kmp_should_speculate(lck, gtid)) { 2163 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2164 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2165 return; 2166 // We tried speculation and failed, so give up. 2167 } else { 2168 // We can't try speculation until the lock is free, so we pause here 2169 // (without suspending on the queueing lock, to allow it to drain, then 2170 // try again. All other threads will also see the same result for 2171 // shouldSpeculate, so will be doing the same if they try to claim the 2172 // lock from now on. 2173 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2174 KMP_INC_STAT(lck, lemmingYields); 2175 __kmp_yield(TRUE); 2176 } 2177 2178 if (__kmp_test_adaptive_lock_only(lck, gtid)) 2179 return; 2180 } 2181 } 2182 2183 // Speculative acquisition failed, so acquire it non-speculatively. 2184 // Count the non-speculative acquire attempt 2185 lck->lk.adaptive.acquire_attempts++; 2186 2187 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2188 // We have acquired the base lock, so count that. 2189 KMP_INC_STAT(lck, nonSpeculativeAcquires); 2190 ANNOTATE_QUEUING_ACQUIRED(lck); 2191 } 2192 2193 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2194 kmp_int32 gtid) { 2195 char const *const func = "omp_set_lock"; 2196 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2197 KMP_FATAL(LockIsUninitialized, func); 2198 } 2199 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2200 KMP_FATAL(LockIsAlreadyOwned, func); 2201 } 2202 2203 __kmp_acquire_adaptive_lock(lck, gtid); 2204 2205 lck->lk.qlk.owner_id = gtid + 1; 2206 } 2207 2208 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2209 kmp_int32 gtid) { 2210 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2211 lck))) { // If the lock doesn't look claimed we must be speculating. 2212 // (Or the user's code is buggy and they're releasing without locking; 2213 // if we had XTEST we'd be able to check that case...) 2214 _xend(); // Exit speculation 2215 __kmp_update_badness_after_success(lck); 2216 } else { // Since the lock *is* visibly locked we're not speculating, 2217 // so should use the underlying lock's release scheme. 2218 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2219 } 2220 return KMP_LOCK_RELEASED; 2221 } 2222 2223 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2224 kmp_int32 gtid) { 2225 char const *const func = "omp_unset_lock"; 2226 KMP_MB(); /* in case another processor initialized lock */ 2227 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2228 KMP_FATAL(LockIsUninitialized, func); 2229 } 2230 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2231 KMP_FATAL(LockUnsettingFree, func); 2232 } 2233 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2234 KMP_FATAL(LockUnsettingSetByAnother, func); 2235 } 2236 lck->lk.qlk.owner_id = 0; 2237 __kmp_release_adaptive_lock(lck, gtid); 2238 return KMP_LOCK_RELEASED; 2239 } 2240 2241 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2242 __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2243 lck->lk.adaptive.badness = 0; 2244 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2245 lck->lk.adaptive.max_soft_retries = 2246 __kmp_adaptive_backoff_params.max_soft_retries; 2247 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2248 #if KMP_DEBUG_ADAPTIVE_LOCKS 2249 __kmp_zero_speculative_stats(&lck->lk.adaptive); 2250 #endif 2251 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2252 } 2253 2254 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2255 __kmp_init_adaptive_lock(lck); 2256 } 2257 2258 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2259 #if KMP_DEBUG_ADAPTIVE_LOCKS 2260 __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2261 #endif 2262 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2263 // Nothing needed for the speculative part. 2264 } 2265 2266 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2267 char const *const func = "omp_destroy_lock"; 2268 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2269 KMP_FATAL(LockIsUninitialized, func); 2270 } 2271 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2272 KMP_FATAL(LockStillOwned, func); 2273 } 2274 __kmp_destroy_adaptive_lock(lck); 2275 } 2276 2277 #endif // KMP_USE_ADAPTIVE_LOCKS 2278 2279 /* ------------------------------------------------------------------------ */ 2280 /* DRDPA ticket locks */ 2281 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2282 2283 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2284 return TCR_4(lck->lk.owner_id) - 1; 2285 } 2286 2287 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2288 return lck->lk.depth_locked != -1; 2289 } 2290 2291 __forceinline static int 2292 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2293 kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); 2294 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2295 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = 2296 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( 2297 lck->lk.polls); // volatile load 2298 2299 #ifdef USE_LOCK_PROFILE 2300 if (TCR_8(polls[ticket & mask].poll) != ticket) 2301 __kmp_printf("LOCK CONTENTION: %p\n", lck); 2302 /* else __kmp_printf( "." );*/ 2303 #endif /* USE_LOCK_PROFILE */ 2304 2305 // Now spin-wait, but reload the polls pointer and mask, in case the 2306 // polling area has been reconfigured. Unless it is reconfigured, the 2307 // reloads stay in L1 cache and are cheap. 2308 // 2309 // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! 2310 // 2311 // The current implementation of KMP_WAIT_YIELD doesn't allow for mask 2312 // and poll to be re-read every spin iteration. 2313 kmp_uint32 spins; 2314 2315 KMP_FSYNC_PREPARE(lck); 2316 KMP_INIT_YIELD(spins); 2317 while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load 2318 // If we are oversubscribed, 2319 // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. 2320 // CPU Pause is in the macros for yield. 2321 // 2322 KMP_YIELD(TCR_4(__kmp_nth) > 2323 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 2324 KMP_YIELD_SPIN(spins); 2325 2326 // Re-read the mask and the poll pointer from the lock structure. 2327 // 2328 // Make certain that "mask" is read before "polls" !!! 2329 // 2330 // If another thread picks reconfigures the polling area and updates their 2331 // values, and we get the new value of mask and the old polls pointer, we 2332 // could access memory beyond the end of the old polling area. 2333 mask = TCR_8(lck->lk.mask); // volatile load 2334 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( 2335 lck->lk.polls); // volatile load 2336 } 2337 2338 // Critical section starts here 2339 KMP_FSYNC_ACQUIRED(lck); 2340 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2341 ticket, lck)); 2342 lck->lk.now_serving = ticket; // non-volatile store 2343 2344 // Deallocate a garbage polling area if we know that we are the last 2345 // thread that could possibly access it. 2346 // 2347 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2348 // ticket. 2349 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2350 __kmp_free((void *)lck->lk.old_polls); 2351 lck->lk.old_polls = NULL; 2352 lck->lk.cleanup_ticket = 0; 2353 } 2354 2355 // Check to see if we should reconfigure the polling area. 2356 // If there is still a garbage polling area to be deallocated from a 2357 // previous reconfiguration, let a later thread reconfigure it. 2358 if (lck->lk.old_polls == NULL) { 2359 bool reconfigure = false; 2360 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; 2361 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2362 2363 if (TCR_4(__kmp_nth) > 2364 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2365 // We are in oversubscription mode. Contract the polling area 2366 // down to a single location, if that hasn't been done already. 2367 if (num_polls > 1) { 2368 reconfigure = true; 2369 num_polls = TCR_4(lck->lk.num_polls); 2370 mask = 0; 2371 num_polls = 1; 2372 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2373 __kmp_allocate(num_polls * sizeof(*polls)); 2374 polls[0].poll = ticket; 2375 } 2376 } else { 2377 // We are in under/fully subscribed mode. Check the number of 2378 // threads waiting on the lock. The size of the polling area 2379 // should be at least the number of threads waiting. 2380 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2381 if (num_waiting > num_polls) { 2382 kmp_uint32 old_num_polls = num_polls; 2383 reconfigure = true; 2384 do { 2385 mask = (mask << 1) | 1; 2386 num_polls *= 2; 2387 } while (num_polls <= num_waiting); 2388 2389 // Allocate the new polling area, and copy the relevant portion 2390 // of the old polling area to the new area. __kmp_allocate() 2391 // zeroes the memory it allocates, and most of the old area is 2392 // just zero padding, so we only copy the release counters. 2393 polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) 2394 __kmp_allocate(num_polls * sizeof(*polls)); 2395 kmp_uint32 i; 2396 for (i = 0; i < old_num_polls; i++) { 2397 polls[i].poll = old_polls[i].poll; 2398 } 2399 } 2400 } 2401 2402 if (reconfigure) { 2403 // Now write the updated fields back to the lock structure. 2404 // 2405 // Make certain that "polls" is written before "mask" !!! 2406 // 2407 // If another thread picks up the new value of mask and the old polls 2408 // pointer , it could access memory beyond the end of the old polling 2409 // area. 2410 // 2411 // On x86, we need memory fences. 2412 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2413 "lock %p to %d polls\n", 2414 ticket, lck, num_polls)); 2415 2416 lck->lk.old_polls = old_polls; // non-volatile store 2417 lck->lk.polls = polls; // volatile store 2418 2419 KMP_MB(); 2420 2421 lck->lk.num_polls = num_polls; // non-volatile store 2422 lck->lk.mask = mask; // volatile store 2423 2424 KMP_MB(); 2425 2426 // Only after the new polling area and mask have been flushed 2427 // to main memory can we update the cleanup ticket field. 2428 // 2429 // volatile load / non-volatile store 2430 lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); 2431 } 2432 } 2433 return KMP_LOCK_ACQUIRED_FIRST; 2434 } 2435 2436 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2437 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2438 ANNOTATE_DRDPA_ACQUIRED(lck); 2439 return retval; 2440 } 2441 2442 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2443 kmp_int32 gtid) { 2444 char const *const func = "omp_set_lock"; 2445 if (lck->lk.initialized != lck) { 2446 KMP_FATAL(LockIsUninitialized, func); 2447 } 2448 if (__kmp_is_drdpa_lock_nestable(lck)) { 2449 KMP_FATAL(LockNestableUsedAsSimple, func); 2450 } 2451 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2452 KMP_FATAL(LockIsAlreadyOwned, func); 2453 } 2454 2455 __kmp_acquire_drdpa_lock(lck, gtid); 2456 2457 lck->lk.owner_id = gtid + 1; 2458 return KMP_LOCK_ACQUIRED_FIRST; 2459 } 2460 2461 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2462 // First get a ticket, then read the polls pointer and the mask. 2463 // The polls pointer must be read before the mask!!! (See above) 2464 kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load 2465 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = 2466 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( 2467 lck->lk.polls); // volatile load 2468 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2469 if (TCR_8(polls[ticket & mask].poll) == ticket) { 2470 kmp_uint64 next_ticket = ticket + 1; 2471 if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, ticket, 2472 next_ticket)) { 2473 KMP_FSYNC_ACQUIRED(lck); 2474 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2475 ticket, lck)); 2476 lck->lk.now_serving = ticket; // non-volatile store 2477 2478 // Since no threads are waiting, there is no possibility that we would 2479 // want to reconfigure the polling area. We might have the cleanup ticket 2480 // value (which says that it is now safe to deallocate old_polls), but 2481 // we'll let a later thread which calls __kmp_acquire_lock do that - this 2482 // routine isn't supposed to block, and we would risk blocks if we called 2483 // __kmp_free() to do the deallocation. 2484 return TRUE; 2485 } 2486 } 2487 return FALSE; 2488 } 2489 2490 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2491 kmp_int32 gtid) { 2492 char const *const func = "omp_test_lock"; 2493 if (lck->lk.initialized != lck) { 2494 KMP_FATAL(LockIsUninitialized, func); 2495 } 2496 if (__kmp_is_drdpa_lock_nestable(lck)) { 2497 KMP_FATAL(LockNestableUsedAsSimple, func); 2498 } 2499 2500 int retval = __kmp_test_drdpa_lock(lck, gtid); 2501 2502 if (retval) { 2503 lck->lk.owner_id = gtid + 1; 2504 } 2505 return retval; 2506 } 2507 2508 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2509 // Read the ticket value from the lock data struct, then the polls pointer and 2510 // the mask. The polls pointer must be read before the mask!!! (See above) 2511 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load 2512 volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = 2513 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( 2514 lck->lk.polls); // volatile load 2515 kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load 2516 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2517 ticket - 1, lck)); 2518 KMP_FSYNC_RELEASING(lck); 2519 ANNOTATE_DRDPA_RELEASED(lck); 2520 KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store 2521 return KMP_LOCK_RELEASED; 2522 } 2523 2524 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2525 kmp_int32 gtid) { 2526 char const *const func = "omp_unset_lock"; 2527 KMP_MB(); /* in case another processor initialized lock */ 2528 if (lck->lk.initialized != lck) { 2529 KMP_FATAL(LockIsUninitialized, func); 2530 } 2531 if (__kmp_is_drdpa_lock_nestable(lck)) { 2532 KMP_FATAL(LockNestableUsedAsSimple, func); 2533 } 2534 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2535 KMP_FATAL(LockUnsettingFree, func); 2536 } 2537 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2538 (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2539 KMP_FATAL(LockUnsettingSetByAnother, func); 2540 } 2541 lck->lk.owner_id = 0; 2542 return __kmp_release_drdpa_lock(lck, gtid); 2543 } 2544 2545 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2546 lck->lk.location = NULL; 2547 lck->lk.mask = 0; 2548 lck->lk.num_polls = 1; 2549 lck->lk.polls = 2550 (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)__kmp_allocate( 2551 lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2552 lck->lk.cleanup_ticket = 0; 2553 lck->lk.old_polls = NULL; 2554 lck->lk.next_ticket = 0; 2555 lck->lk.now_serving = 0; 2556 lck->lk.owner_id = 0; // no thread owns the lock. 2557 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2558 lck->lk.initialized = lck; 2559 2560 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2561 } 2562 2563 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2564 __kmp_init_drdpa_lock(lck); 2565 } 2566 2567 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2568 lck->lk.initialized = NULL; 2569 lck->lk.location = NULL; 2570 if (lck->lk.polls != NULL) { 2571 __kmp_free((void *)lck->lk.polls); 2572 lck->lk.polls = NULL; 2573 } 2574 if (lck->lk.old_polls != NULL) { 2575 __kmp_free((void *)lck->lk.old_polls); 2576 lck->lk.old_polls = NULL; 2577 } 2578 lck->lk.mask = 0; 2579 lck->lk.num_polls = 0; 2580 lck->lk.cleanup_ticket = 0; 2581 lck->lk.next_ticket = 0; 2582 lck->lk.now_serving = 0; 2583 lck->lk.owner_id = 0; 2584 lck->lk.depth_locked = -1; 2585 } 2586 2587 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2588 char const *const func = "omp_destroy_lock"; 2589 if (lck->lk.initialized != lck) { 2590 KMP_FATAL(LockIsUninitialized, func); 2591 } 2592 if (__kmp_is_drdpa_lock_nestable(lck)) { 2593 KMP_FATAL(LockNestableUsedAsSimple, func); 2594 } 2595 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2596 KMP_FATAL(LockStillOwned, func); 2597 } 2598 __kmp_destroy_drdpa_lock(lck); 2599 } 2600 2601 // nested drdpa ticket locks 2602 2603 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2604 KMP_DEBUG_ASSERT(gtid >= 0); 2605 2606 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2607 lck->lk.depth_locked += 1; 2608 return KMP_LOCK_ACQUIRED_NEXT; 2609 } else { 2610 __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2611 ANNOTATE_DRDPA_ACQUIRED(lck); 2612 KMP_MB(); 2613 lck->lk.depth_locked = 1; 2614 KMP_MB(); 2615 lck->lk.owner_id = gtid + 1; 2616 return KMP_LOCK_ACQUIRED_FIRST; 2617 } 2618 } 2619 2620 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2621 kmp_int32 gtid) { 2622 char const *const func = "omp_set_nest_lock"; 2623 if (lck->lk.initialized != lck) { 2624 KMP_FATAL(LockIsUninitialized, func); 2625 } 2626 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2627 KMP_FATAL(LockSimpleUsedAsNestable, func); 2628 } 2629 __kmp_acquire_nested_drdpa_lock(lck, gtid); 2630 } 2631 2632 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2633 int retval; 2634 2635 KMP_DEBUG_ASSERT(gtid >= 0); 2636 2637 if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2638 retval = ++lck->lk.depth_locked; 2639 } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2640 retval = 0; 2641 } else { 2642 KMP_MB(); 2643 retval = lck->lk.depth_locked = 1; 2644 KMP_MB(); 2645 lck->lk.owner_id = gtid + 1; 2646 } 2647 return retval; 2648 } 2649 2650 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2651 kmp_int32 gtid) { 2652 char const *const func = "omp_test_nest_lock"; 2653 if (lck->lk.initialized != lck) { 2654 KMP_FATAL(LockIsUninitialized, func); 2655 } 2656 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2657 KMP_FATAL(LockSimpleUsedAsNestable, func); 2658 } 2659 return __kmp_test_nested_drdpa_lock(lck, gtid); 2660 } 2661 2662 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2663 KMP_DEBUG_ASSERT(gtid >= 0); 2664 2665 KMP_MB(); 2666 if (--(lck->lk.depth_locked) == 0) { 2667 KMP_MB(); 2668 lck->lk.owner_id = 0; 2669 __kmp_release_drdpa_lock(lck, gtid); 2670 return KMP_LOCK_RELEASED; 2671 } 2672 return KMP_LOCK_STILL_HELD; 2673 } 2674 2675 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2676 kmp_int32 gtid) { 2677 char const *const func = "omp_unset_nest_lock"; 2678 KMP_MB(); /* in case another processor initialized lock */ 2679 if (lck->lk.initialized != lck) { 2680 KMP_FATAL(LockIsUninitialized, func); 2681 } 2682 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2683 KMP_FATAL(LockSimpleUsedAsNestable, func); 2684 } 2685 if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2686 KMP_FATAL(LockUnsettingFree, func); 2687 } 2688 if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2689 KMP_FATAL(LockUnsettingSetByAnother, func); 2690 } 2691 return __kmp_release_nested_drdpa_lock(lck, gtid); 2692 } 2693 2694 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2695 __kmp_init_drdpa_lock(lck); 2696 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2697 } 2698 2699 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2700 __kmp_init_nested_drdpa_lock(lck); 2701 } 2702 2703 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2704 __kmp_destroy_drdpa_lock(lck); 2705 lck->lk.depth_locked = 0; 2706 } 2707 2708 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2709 char const *const func = "omp_destroy_nest_lock"; 2710 if (lck->lk.initialized != lck) { 2711 KMP_FATAL(LockIsUninitialized, func); 2712 } 2713 if (!__kmp_is_drdpa_lock_nestable(lck)) { 2714 KMP_FATAL(LockSimpleUsedAsNestable, func); 2715 } 2716 if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2717 KMP_FATAL(LockStillOwned, func); 2718 } 2719 __kmp_destroy_nested_drdpa_lock(lck); 2720 } 2721 2722 // access functions to fields which don't exist for all lock kinds. 2723 2724 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 2725 return lck == lck->lk.initialized; 2726 } 2727 2728 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2729 return lck->lk.location; 2730 } 2731 2732 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2733 const ident_t *loc) { 2734 lck->lk.location = loc; 2735 } 2736 2737 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2738 return lck->lk.flags; 2739 } 2740 2741 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2742 kmp_lock_flags_t flags) { 2743 lck->lk.flags = flags; 2744 } 2745 2746 // Time stamp counter 2747 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2748 #define __kmp_tsc() __kmp_hardware_timestamp() 2749 // Runtime's default backoff parameters 2750 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2751 #else 2752 // Use nanoseconds for other platforms 2753 extern kmp_uint64 __kmp_now_nsec(); 2754 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2755 #define __kmp_tsc() __kmp_now_nsec() 2756 #endif 2757 2758 // A useful predicate for dealing with timestamps that may wrap. 2759 // Is a before b? Since the timestamps may wrap, this is asking whether it's 2760 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2761 // Times where going clockwise is less distance than going anti-clockwise 2762 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2763 // then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2764 // signed(b) = 0 captures the actual difference 2765 static inline bool before(kmp_uint64 a, kmp_uint64 b) { 2766 return ((kmp_int64)b - (kmp_int64)a) > 0; 2767 } 2768 2769 // Truncated binary exponential backoff function 2770 void __kmp_spin_backoff(kmp_backoff_t *boff) { 2771 // We could flatten this loop, but making it a nested loop gives better result 2772 kmp_uint32 i; 2773 for (i = boff->step; i > 0; i--) { 2774 kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2775 do { 2776 KMP_CPU_PAUSE(); 2777 } while (before(__kmp_tsc(), goal)); 2778 } 2779 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2780 } 2781 2782 #if KMP_USE_DYNAMIC_LOCK 2783 2784 // Direct lock initializers. It simply writes a tag to the low 8 bits of the 2785 // lock word. 2786 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2787 kmp_dyna_lockseq_t seq) { 2788 TCW_4(*lck, KMP_GET_D_TAG(seq)); 2789 KA_TRACE( 2790 20, 2791 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2792 } 2793 2794 #if KMP_USE_TSX 2795 2796 // HLE lock functions - imported from the testbed runtime. 2797 #define HLE_ACQUIRE ".byte 0xf2;" 2798 #define HLE_RELEASE ".byte 0xf3;" 2799 2800 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2801 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2802 return v; 2803 } 2804 2805 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2806 2807 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2808 // Use gtid for KMP_LOCK_BUSY if necessary 2809 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2810 int delay = 1; 2811 do { 2812 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2813 for (int i = delay; i != 0; --i) 2814 KMP_CPU_PAUSE(); 2815 delay = ((delay << 1) | 1) & 7; 2816 } 2817 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2818 } 2819 } 2820 2821 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2822 kmp_int32 gtid) { 2823 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2824 } 2825 2826 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2827 __asm__ volatile(HLE_RELEASE "movl %1,%0" 2828 : "=m"(*lck) 2829 : "r"(KMP_LOCK_FREE(hle)) 2830 : "memory"); 2831 return KMP_LOCK_RELEASED; 2832 } 2833 2834 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2835 kmp_int32 gtid) { 2836 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2837 } 2838 2839 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2840 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2841 } 2842 2843 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2844 kmp_int32 gtid) { 2845 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2846 } 2847 2848 static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2849 __kmp_init_queuing_lock(lck); 2850 } 2851 2852 static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2853 __kmp_destroy_queuing_lock(lck); 2854 } 2855 2856 static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2857 unsigned retries = 3, status; 2858 do { 2859 status = _xbegin(); 2860 if (status == _XBEGIN_STARTED) { 2861 if (__kmp_is_unlocked_queuing_lock(lck)) 2862 return; 2863 _xabort(0xff); 2864 } 2865 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2866 // Wait until lock becomes free 2867 while (!__kmp_is_unlocked_queuing_lock(lck)) 2868 __kmp_yield(TRUE); 2869 } else if (!(status & _XABORT_RETRY)) 2870 break; 2871 } while (retries--); 2872 2873 // Fall-back non-speculative lock (xchg) 2874 __kmp_acquire_queuing_lock(lck, gtid); 2875 } 2876 2877 static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2878 kmp_int32 gtid) { 2879 __kmp_acquire_rtm_lock(lck, gtid); 2880 } 2881 2882 static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2883 if (__kmp_is_unlocked_queuing_lock(lck)) { 2884 // Releasing from speculation 2885 _xend(); 2886 } else { 2887 // Releasing from a real lock 2888 __kmp_release_queuing_lock(lck, gtid); 2889 } 2890 return KMP_LOCK_RELEASED; 2891 } 2892 2893 static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2894 kmp_int32 gtid) { 2895 return __kmp_release_rtm_lock(lck, gtid); 2896 } 2897 2898 static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2899 unsigned retries = 3, status; 2900 do { 2901 status = _xbegin(); 2902 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2903 return 1; 2904 } 2905 if (!(status & _XABORT_RETRY)) 2906 break; 2907 } while (retries--); 2908 2909 return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2910 } 2911 2912 static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2913 kmp_int32 gtid) { 2914 return __kmp_test_rtm_lock(lck, gtid); 2915 } 2916 2917 #endif // KMP_USE_TSX 2918 2919 // Entry functions for indirect locks (first element of direct lock jump tables) 2920 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2921 kmp_dyna_lockseq_t tag); 2922 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2923 static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2924 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2925 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2926 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2927 kmp_int32); 2928 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2929 kmp_int32); 2930 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2931 kmp_int32); 2932 2933 // Jump tables for the indirect lock functions 2934 // Only fill in the odd entries, that avoids the need to shift out the low bit 2935 2936 // init functions 2937 #define expand(l, op) 0, __kmp_init_direct_lock, 2938 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2939 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2940 #undef expand 2941 2942 // destroy functions 2943 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2944 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = { 2945 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2946 #undef expand 2947 2948 // set/acquire functions 2949 #define expand(l, op) \ 2950 0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2951 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2952 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2953 #undef expand 2954 #define expand(l, op) \ 2955 0, (void (*)(kmp_dyna_lock_t *, \ 2956 kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2957 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2958 __kmp_set_indirect_lock_with_checks, 0, 2959 KMP_FOREACH_D_LOCK(expand, acquire)}; 2960 #undef expand 2961 2962 // unset/release and test functions 2963 #define expand(l, op) \ 2964 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2965 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2966 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2967 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2968 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2969 #undef expand 2970 #define expand(l, op) \ 2971 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2972 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2973 __kmp_unset_indirect_lock_with_checks, 0, 2974 KMP_FOREACH_D_LOCK(expand, release)}; 2975 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2976 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2977 #undef expand 2978 2979 // Exposes only one set of jump tables (*lock or *lock_with_checks). 2980 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; 2981 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; 2982 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; 2983 2984 // Jump tables for the indirect lock functions 2985 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2986 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2987 KMP_FOREACH_I_LOCK(expand, init)}; 2988 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { 2989 KMP_FOREACH_I_LOCK(expand, destroy)}; 2990 #undef expand 2991 2992 // set/acquire functions 2993 #define expand(l, op) \ 2994 (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2995 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { 2996 KMP_FOREACH_I_LOCK(expand, acquire)}; 2997 #undef expand 2998 #define expand(l, op) \ 2999 (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3000 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 3001 KMP_FOREACH_I_LOCK(expand, acquire)}; 3002 #undef expand 3003 3004 // unset/release and test functions 3005 #define expand(l, op) \ 3006 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 3007 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 3008 KMP_FOREACH_I_LOCK(expand, release)}; 3009 static int (*indirect_test[])(kmp_user_lock_p, 3010 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 3011 #undef expand 3012 #define expand(l, op) \ 3013 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 3014 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 3015 KMP_FOREACH_I_LOCK(expand, release)}; 3016 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 3017 KMP_FOREACH_I_LOCK(expand, test)}; 3018 #undef expand 3019 3020 // Exposes only one jump tables (*lock or *lock_with_checks). 3021 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; 3022 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; 3023 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; 3024 3025 // Lock index table. 3026 kmp_indirect_lock_table_t __kmp_i_lock_table; 3027 3028 // Size of indirect locks. 3029 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3030 3031 // Jump tables for lock accessor/modifier. 3032 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3033 const ident_t *) = {0}; 3034 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3035 kmp_lock_flags_t) = {0}; 3036 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3037 kmp_user_lock_p) = {0}; 3038 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3039 kmp_user_lock_p) = {0}; 3040 3041 // Use different lock pools for different lock types. 3042 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3043 3044 // User lock allocator for dynamically dispatched indirect locks. Every entry of 3045 // the indirect lock table holds the address and type of the allocated indrect 3046 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3047 // full. A destroyed indirect lock object is returned to the reusable pool of 3048 // locks, unique to each lock type. 3049 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3050 kmp_int32 gtid, 3051 kmp_indirect_locktag_t tag) { 3052 kmp_indirect_lock_t *lck; 3053 kmp_lock_index_t idx; 3054 3055 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3056 3057 if (__kmp_indirect_lock_pool[tag] != NULL) { 3058 // Reuse the allocated and destroyed lock object 3059 lck = __kmp_indirect_lock_pool[tag]; 3060 if (OMP_LOCK_T_SIZE < sizeof(void *)) 3061 idx = lck->lock->pool.index; 3062 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3063 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3064 lck)); 3065 } else { 3066 idx = __kmp_i_lock_table.next; 3067 // Check capacity and double the size if it is full 3068 if (idx == __kmp_i_lock_table.size) { 3069 // Double up the space for block pointers 3070 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3071 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3072 __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( 3073 2 * row * sizeof(kmp_indirect_lock_t *)); 3074 KMP_MEMCPY(__kmp_i_lock_table.table, old_table, 3075 row * sizeof(kmp_indirect_lock_t *)); 3076 __kmp_free(old_table); 3077 // Allocate new objects in the new blocks 3078 for (int i = row; i < 2 * row; ++i) 3079 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3080 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3081 __kmp_i_lock_table.size = 2 * idx; 3082 } 3083 __kmp_i_lock_table.next++; 3084 lck = KMP_GET_I_LOCK(idx); 3085 // Allocate a new base lock object 3086 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3087 KA_TRACE(20, 3088 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3089 } 3090 3091 __kmp_release_lock(&__kmp_global_lock, gtid); 3092 3093 lck->type = tag; 3094 3095 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3096 *((kmp_lock_index_t *)user_lock) = idx 3097 << 1; // indirect lock word must be even 3098 } else { 3099 *((kmp_indirect_lock_t **)user_lock) = lck; 3100 } 3101 3102 return lck; 3103 } 3104 3105 // User lock lookup for dynamically dispatched locks. 3106 static __forceinline kmp_indirect_lock_t * 3107 __kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3108 if (__kmp_env_consistency_check) { 3109 kmp_indirect_lock_t *lck = NULL; 3110 if (user_lock == NULL) { 3111 KMP_FATAL(LockIsUninitialized, func); 3112 } 3113 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3114 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3115 if (idx >= __kmp_i_lock_table.size) { 3116 KMP_FATAL(LockIsUninitialized, func); 3117 } 3118 lck = KMP_GET_I_LOCK(idx); 3119 } else { 3120 lck = *((kmp_indirect_lock_t **)user_lock); 3121 } 3122 if (lck == NULL) { 3123 KMP_FATAL(LockIsUninitialized, func); 3124 } 3125 return lck; 3126 } else { 3127 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3128 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3129 } else { 3130 return *((kmp_indirect_lock_t **)user_lock); 3131 } 3132 } 3133 } 3134 3135 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3136 kmp_dyna_lockseq_t seq) { 3137 #if KMP_USE_ADAPTIVE_LOCKS 3138 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3139 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3140 seq = lockseq_queuing; 3141 } 3142 #endif 3143 #if KMP_USE_TSX 3144 if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3145 seq = lockseq_queuing; 3146 } 3147 #endif 3148 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3149 kmp_indirect_lock_t *l = 3150 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3151 KMP_I_LOCK_FUNC(l, init)(l->lock); 3152 KA_TRACE( 3153 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3154 seq)); 3155 } 3156 3157 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3158 kmp_uint32 gtid = __kmp_entry_gtid(); 3159 kmp_indirect_lock_t *l = 3160 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3161 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3162 kmp_indirect_locktag_t tag = l->type; 3163 3164 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3165 3166 // Use the base lock's space to keep the pool chain. 3167 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3168 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3169 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3170 } 3171 __kmp_indirect_lock_pool[tag] = l; 3172 3173 __kmp_release_lock(&__kmp_global_lock, gtid); 3174 } 3175 3176 static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3177 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3178 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3179 } 3180 3181 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3182 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3183 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3184 } 3185 3186 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3187 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3188 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3189 } 3190 3191 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3192 kmp_int32 gtid) { 3193 kmp_indirect_lock_t *l = 3194 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3195 KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3196 } 3197 3198 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3199 kmp_int32 gtid) { 3200 kmp_indirect_lock_t *l = 3201 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3202 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3203 } 3204 3205 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3206 kmp_int32 gtid) { 3207 kmp_indirect_lock_t *l = 3208 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3209 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3210 } 3211 3212 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3213 3214 // This is used only in kmp_error.cpp when consistency checking is on. 3215 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3216 switch (seq) { 3217 case lockseq_tas: 3218 case lockseq_nested_tas: 3219 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3220 #if KMP_USE_FUTEX 3221 case lockseq_futex: 3222 case lockseq_nested_futex: 3223 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3224 #endif 3225 case lockseq_ticket: 3226 case lockseq_nested_ticket: 3227 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3228 case lockseq_queuing: 3229 case lockseq_nested_queuing: 3230 #if KMP_USE_ADAPTIVE_LOCKS 3231 case lockseq_adaptive: 3232 #endif 3233 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3234 case lockseq_drdpa: 3235 case lockseq_nested_drdpa: 3236 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3237 default: 3238 return 0; 3239 } 3240 } 3241 3242 // Initializes data for dynamic user locks. 3243 void __kmp_init_dynamic_user_locks() { 3244 // Initialize jump table for the lock functions 3245 if (__kmp_env_consistency_check) { 3246 __kmp_direct_set = direct_set_check; 3247 __kmp_direct_unset = direct_unset_check; 3248 __kmp_direct_test = direct_test_check; 3249 __kmp_indirect_set = indirect_set_check; 3250 __kmp_indirect_unset = indirect_unset_check; 3251 __kmp_indirect_test = indirect_test_check; 3252 } else { 3253 __kmp_direct_set = direct_set; 3254 __kmp_direct_unset = direct_unset; 3255 __kmp_direct_test = direct_test; 3256 __kmp_indirect_set = indirect_set; 3257 __kmp_indirect_unset = indirect_unset; 3258 __kmp_indirect_test = indirect_test; 3259 } 3260 // If the user locks have already been initialized, then return. Allow the 3261 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3262 // new lock tables if they have already been allocated. 3263 if (__kmp_init_user_locks) 3264 return; 3265 3266 // Initialize lock index table 3267 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3268 __kmp_i_lock_table.table = 3269 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3270 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3271 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3272 __kmp_i_lock_table.next = 0; 3273 3274 // Indirect lock size 3275 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3276 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3277 #if KMP_USE_ADAPTIVE_LOCKS 3278 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3279 #endif 3280 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3281 #if KMP_USE_TSX 3282 __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3283 #endif 3284 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3285 #if KMP_USE_FUTEX 3286 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3287 #endif 3288 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3289 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3290 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3291 3292 // Initialize lock accessor/modifier 3293 #define fill_jumps(table, expand, sep) \ 3294 { \ 3295 table[locktag##sep##ticket] = expand(ticket); \ 3296 table[locktag##sep##queuing] = expand(queuing); \ 3297 table[locktag##sep##drdpa] = expand(drdpa); \ 3298 } 3299 3300 #if KMP_USE_ADAPTIVE_LOCKS 3301 #define fill_table(table, expand) \ 3302 { \ 3303 fill_jumps(table, expand, _); \ 3304 table[locktag_adaptive] = expand(queuing); \ 3305 fill_jumps(table, expand, _nested_); \ 3306 } 3307 #else 3308 #define fill_table(table, expand) \ 3309 { \ 3310 fill_jumps(table, expand, _); \ 3311 fill_jumps(table, expand, _nested_); \ 3312 } 3313 #endif // KMP_USE_ADAPTIVE_LOCKS 3314 3315 #define expand(l) \ 3316 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3317 fill_table(__kmp_indirect_set_location, expand); 3318 #undef expand 3319 #define expand(l) \ 3320 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3321 fill_table(__kmp_indirect_set_flags, expand); 3322 #undef expand 3323 #define expand(l) \ 3324 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3325 fill_table(__kmp_indirect_get_location, expand); 3326 #undef expand 3327 #define expand(l) \ 3328 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3329 fill_table(__kmp_indirect_get_flags, expand); 3330 #undef expand 3331 3332 __kmp_init_user_locks = TRUE; 3333 } 3334 3335 // Clean up the lock table. 3336 void __kmp_cleanup_indirect_user_locks() { 3337 kmp_lock_index_t i; 3338 int k; 3339 3340 // Clean up locks in the pools first (they were already destroyed before going 3341 // into the pools). 3342 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3343 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3344 while (l != NULL) { 3345 kmp_indirect_lock_t *ll = l; 3346 l = (kmp_indirect_lock_t *)l->lock->pool.next; 3347 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3348 ll)); 3349 __kmp_free(ll->lock); 3350 ll->lock = NULL; 3351 } 3352 __kmp_indirect_lock_pool[k] = NULL; 3353 } 3354 // Clean up the remaining undestroyed locks. 3355 for (i = 0; i < __kmp_i_lock_table.next; i++) { 3356 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3357 if (l->lock != NULL) { 3358 // Locks not destroyed explicitly need to be destroyed here. 3359 KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3360 KA_TRACE( 3361 20, 3362 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3363 l)); 3364 __kmp_free(l->lock); 3365 } 3366 } 3367 // Free the table 3368 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3369 __kmp_free(__kmp_i_lock_table.table[i]); 3370 __kmp_free(__kmp_i_lock_table.table); 3371 3372 __kmp_init_user_locks = FALSE; 3373 } 3374 3375 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3376 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3377 3378 #else // KMP_USE_DYNAMIC_LOCK 3379 3380 /* user locks 3381 * They are implemented as a table of function pointers which are set to the 3382 * lock functions of the appropriate kind, once that has been determined. */ 3383 3384 enum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3385 3386 size_t __kmp_base_user_lock_size = 0; 3387 size_t __kmp_user_lock_size = 0; 3388 3389 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3390 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3391 kmp_int32 gtid) = NULL; 3392 3393 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3394 kmp_int32 gtid) = NULL; 3395 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3396 kmp_int32 gtid) = NULL; 3397 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3398 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3399 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3400 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3401 kmp_int32 gtid) = NULL; 3402 3403 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3404 kmp_int32 gtid) = NULL; 3405 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3406 kmp_int32 gtid) = NULL; 3407 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3408 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3409 3410 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3411 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3412 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3413 const ident_t *loc) = NULL; 3414 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3415 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3416 kmp_lock_flags_t flags) = NULL; 3417 3418 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3419 switch (user_lock_kind) { 3420 case lk_default: 3421 default: 3422 KMP_ASSERT(0); 3423 3424 case lk_tas: { 3425 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3426 __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3427 3428 __kmp_get_user_lock_owner_ = 3429 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3430 3431 if (__kmp_env_consistency_check) { 3432 KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3433 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3434 } else { 3435 KMP_BIND_USER_LOCK(tas); 3436 KMP_BIND_NESTED_USER_LOCK(tas); 3437 } 3438 3439 __kmp_destroy_user_lock_ = 3440 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3441 3442 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3443 3444 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3445 3446 __kmp_set_user_lock_location_ = 3447 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3448 3449 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3450 3451 __kmp_set_user_lock_flags_ = 3452 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3453 } break; 3454 3455 #if KMP_USE_FUTEX 3456 3457 case lk_futex: { 3458 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3459 __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3460 3461 __kmp_get_user_lock_owner_ = 3462 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3463 3464 if (__kmp_env_consistency_check) { 3465 KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3466 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3467 } else { 3468 KMP_BIND_USER_LOCK(futex); 3469 KMP_BIND_NESTED_USER_LOCK(futex); 3470 } 3471 3472 __kmp_destroy_user_lock_ = 3473 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3474 3475 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3476 3477 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3478 3479 __kmp_set_user_lock_location_ = 3480 (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3481 3482 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3483 3484 __kmp_set_user_lock_flags_ = 3485 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3486 } break; 3487 3488 #endif // KMP_USE_FUTEX 3489 3490 case lk_ticket: { 3491 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3492 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3493 3494 __kmp_get_user_lock_owner_ = 3495 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3496 3497 if (__kmp_env_consistency_check) { 3498 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3499 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3500 } else { 3501 KMP_BIND_USER_LOCK(ticket); 3502 KMP_BIND_NESTED_USER_LOCK(ticket); 3503 } 3504 3505 __kmp_destroy_user_lock_ = 3506 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3507 3508 __kmp_is_user_lock_initialized_ = 3509 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3510 3511 __kmp_get_user_lock_location_ = 3512 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3513 3514 __kmp_set_user_lock_location_ = (void (*)( 3515 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3516 3517 __kmp_get_user_lock_flags_ = 3518 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3519 3520 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3521 &__kmp_set_ticket_lock_flags); 3522 } break; 3523 3524 case lk_queuing: { 3525 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3526 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3527 3528 __kmp_get_user_lock_owner_ = 3529 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3530 3531 if (__kmp_env_consistency_check) { 3532 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3533 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3534 } else { 3535 KMP_BIND_USER_LOCK(queuing); 3536 KMP_BIND_NESTED_USER_LOCK(queuing); 3537 } 3538 3539 __kmp_destroy_user_lock_ = 3540 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3541 3542 __kmp_is_user_lock_initialized_ = 3543 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3544 3545 __kmp_get_user_lock_location_ = 3546 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3547 3548 __kmp_set_user_lock_location_ = (void (*)( 3549 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3550 3551 __kmp_get_user_lock_flags_ = 3552 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3553 3554 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3555 &__kmp_set_queuing_lock_flags); 3556 } break; 3557 3558 #if KMP_USE_ADAPTIVE_LOCKS 3559 case lk_adaptive: { 3560 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3561 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3562 3563 __kmp_get_user_lock_owner_ = 3564 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3565 3566 if (__kmp_env_consistency_check) { 3567 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3568 } else { 3569 KMP_BIND_USER_LOCK(adaptive); 3570 } 3571 3572 __kmp_destroy_user_lock_ = 3573 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3574 3575 __kmp_is_user_lock_initialized_ = 3576 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3577 3578 __kmp_get_user_lock_location_ = 3579 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3580 3581 __kmp_set_user_lock_location_ = (void (*)( 3582 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3583 3584 __kmp_get_user_lock_flags_ = 3585 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3586 3587 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3588 &__kmp_set_queuing_lock_flags); 3589 3590 } break; 3591 #endif // KMP_USE_ADAPTIVE_LOCKS 3592 3593 case lk_drdpa: { 3594 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3595 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3596 3597 __kmp_get_user_lock_owner_ = 3598 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3599 3600 if (__kmp_env_consistency_check) { 3601 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3602 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3603 } else { 3604 KMP_BIND_USER_LOCK(drdpa); 3605 KMP_BIND_NESTED_USER_LOCK(drdpa); 3606 } 3607 3608 __kmp_destroy_user_lock_ = 3609 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3610 3611 __kmp_is_user_lock_initialized_ = 3612 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3613 3614 __kmp_get_user_lock_location_ = 3615 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3616 3617 __kmp_set_user_lock_location_ = (void (*)( 3618 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3619 3620 __kmp_get_user_lock_flags_ = 3621 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3622 3623 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3624 &__kmp_set_drdpa_lock_flags); 3625 } break; 3626 } 3627 } 3628 3629 // ---------------------------------------------------------------------------- 3630 // User lock table & lock allocation 3631 3632 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3633 kmp_user_lock_p __kmp_lock_pool = NULL; 3634 3635 // Lock block-allocation support. 3636 kmp_block_of_locks *__kmp_lock_blocks = NULL; 3637 int __kmp_num_locks_in_block = 1; // FIXME - tune this value 3638 3639 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3640 // Assume that kmp_global_lock is held upon entry/exit. 3641 kmp_lock_index_t index; 3642 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3643 kmp_lock_index_t size; 3644 kmp_user_lock_p *table; 3645 // Reallocate lock table. 3646 if (__kmp_user_lock_table.allocated == 0) { 3647 size = 1024; 3648 } else { 3649 size = __kmp_user_lock_table.allocated * 2; 3650 } 3651 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3652 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3653 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3654 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3655 // We cannot free the previous table now, since it may be in use by other 3656 // threads. So save the pointer to the previous table in in the first 3657 // element of the new table. All the tables will be organized into a list, 3658 // and could be freed when library shutting down. 3659 __kmp_user_lock_table.table = table; 3660 __kmp_user_lock_table.allocated = size; 3661 } 3662 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3663 __kmp_user_lock_table.allocated); 3664 index = __kmp_user_lock_table.used; 3665 __kmp_user_lock_table.table[index] = lck; 3666 ++__kmp_user_lock_table.used; 3667 return index; 3668 } 3669 3670 static kmp_user_lock_p __kmp_lock_block_allocate() { 3671 // Assume that kmp_global_lock is held upon entry/exit. 3672 static int last_index = 0; 3673 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3674 // Restart the index. 3675 last_index = 0; 3676 // Need to allocate a new block. 3677 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3678 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3679 char *buffer = 3680 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3681 // Set up the new block. 3682 kmp_block_of_locks *new_block = 3683 (kmp_block_of_locks *)(&buffer[space_for_locks]); 3684 new_block->next_block = __kmp_lock_blocks; 3685 new_block->locks = (void *)buffer; 3686 // Publish the new block. 3687 KMP_MB(); 3688 __kmp_lock_blocks = new_block; 3689 } 3690 kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3691 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3692 last_index++; 3693 return ret; 3694 } 3695 3696 // Get memory for a lock. It may be freshly allocated memory or reused memory 3697 // from lock pool. 3698 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3699 kmp_lock_flags_t flags) { 3700 kmp_user_lock_p lck; 3701 kmp_lock_index_t index; 3702 KMP_DEBUG_ASSERT(user_lock); 3703 3704 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3705 3706 if (__kmp_lock_pool == NULL) { 3707 // Lock pool is empty. Allocate new memory. 3708 3709 // ANNOTATION: Found no good way to express the syncronisation 3710 // between allocation and usage, so ignore the allocation 3711 ANNOTATE_IGNORE_WRITES_BEGIN(); 3712 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3713 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3714 } else { 3715 lck = __kmp_lock_block_allocate(); 3716 } 3717 ANNOTATE_IGNORE_WRITES_END(); 3718 3719 // Insert lock in the table so that it can be freed in __kmp_cleanup, 3720 // and debugger has info on all allocated locks. 3721 index = __kmp_lock_table_insert(lck); 3722 } else { 3723 // Pick up lock from pool. 3724 lck = __kmp_lock_pool; 3725 index = __kmp_lock_pool->pool.index; 3726 __kmp_lock_pool = __kmp_lock_pool->pool.next; 3727 } 3728 3729 // We could potentially differentiate between nested and regular locks 3730 // here, and do the lock table lookup for regular locks only. 3731 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3732 *((kmp_lock_index_t *)user_lock) = index; 3733 } else { 3734 *((kmp_user_lock_p *)user_lock) = lck; 3735 } 3736 3737 // mark the lock if it is critical section lock. 3738 __kmp_set_user_lock_flags(lck, flags); 3739 3740 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3741 3742 return lck; 3743 } 3744 3745 // Put lock's memory to pool for reusing. 3746 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3747 kmp_user_lock_p lck) { 3748 KMP_DEBUG_ASSERT(user_lock != NULL); 3749 KMP_DEBUG_ASSERT(lck != NULL); 3750 3751 __kmp_acquire_lock(&__kmp_global_lock, gtid); 3752 3753 lck->pool.next = __kmp_lock_pool; 3754 __kmp_lock_pool = lck; 3755 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3756 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3757 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3758 lck->pool.index = index; 3759 } 3760 3761 __kmp_release_lock(&__kmp_global_lock, gtid); 3762 } 3763 3764 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3765 kmp_user_lock_p lck = NULL; 3766 3767 if (__kmp_env_consistency_check) { 3768 if (user_lock == NULL) { 3769 KMP_FATAL(LockIsUninitialized, func); 3770 } 3771 } 3772 3773 if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3774 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3775 if (__kmp_env_consistency_check) { 3776 if (!(0 < index && index < __kmp_user_lock_table.used)) { 3777 KMP_FATAL(LockIsUninitialized, func); 3778 } 3779 } 3780 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3781 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3782 lck = __kmp_user_lock_table.table[index]; 3783 } else { 3784 lck = *((kmp_user_lock_p *)user_lock); 3785 } 3786 3787 if (__kmp_env_consistency_check) { 3788 if (lck == NULL) { 3789 KMP_FATAL(LockIsUninitialized, func); 3790 } 3791 } 3792 3793 return lck; 3794 } 3795 3796 void __kmp_cleanup_user_locks(void) { 3797 // Reset lock pool. Don't worry about lock in the pool--we will free them when 3798 // iterating through lock table (it includes all the locks, dead or alive). 3799 __kmp_lock_pool = NULL; 3800 3801 #define IS_CRITICAL(lck) \ 3802 ((__kmp_get_user_lock_flags_ != NULL) && \ 3803 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3804 3805 // Loop through lock table, free all locks. 3806 // Do not free item [0], it is reserved for lock tables list. 3807 // 3808 // FIXME - we are iterating through a list of (pointers to) objects of type 3809 // union kmp_user_lock, but we have no way of knowing whether the base type is 3810 // currently "pool" or whatever the global user lock type is. 3811 // 3812 // We are relying on the fact that for all of the user lock types 3813 // (except "tas"), the first field in the lock struct is the "initialized" 3814 // field, which is set to the address of the lock object itself when 3815 // the lock is initialized. When the union is of type "pool", the 3816 // first field is a pointer to the next object in the free list, which 3817 // will not be the same address as the object itself. 3818 // 3819 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3820 // for "pool" objects on the free list. This must happen as the "location" 3821 // field of real user locks overlaps the "index" field of "pool" objects. 3822 // 3823 // It would be better to run through the free list, and remove all "pool" 3824 // objects from the lock table before executing this loop. However, 3825 // "pool" objects do not always have their index field set (only on 3826 // lin_32e), and I don't want to search the lock table for the address 3827 // of every "pool" object on the free list. 3828 while (__kmp_user_lock_table.used > 1) { 3829 const ident *loc; 3830 3831 // reduce __kmp_user_lock_table.used before freeing the lock, 3832 // so that state of locks is consistent 3833 kmp_user_lock_p lck = 3834 __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3835 3836 if ((__kmp_is_user_lock_initialized_ != NULL) && 3837 (*__kmp_is_user_lock_initialized_)(lck)) { 3838 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3839 // it is NOT a critical section (user is not responsible for destroying 3840 // criticals) AND we know source location to report. 3841 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3842 ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3843 (loc->psource != NULL)) { 3844 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3845 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3846 __kmp_str_loc_free(&str_loc); 3847 } 3848 3849 #ifdef KMP_DEBUG 3850 if (IS_CRITICAL(lck)) { 3851 KA_TRACE( 3852 20, 3853 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3854 lck, *(void **)lck)); 3855 } else { 3856 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3857 *(void **)lck)); 3858 } 3859 #endif // KMP_DEBUG 3860 3861 // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3862 __kmp_destroy_user_lock(lck); 3863 } 3864 3865 // Free the lock if block allocation of locks is not used. 3866 if (__kmp_lock_blocks == NULL) { 3867 __kmp_free(lck); 3868 } 3869 } 3870 3871 #undef IS_CRITICAL 3872 3873 // delete lock table(s). 3874 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3875 __kmp_user_lock_table.table = NULL; 3876 __kmp_user_lock_table.allocated = 0; 3877 3878 while (table_ptr != NULL) { 3879 // In the first element we saved the pointer to the previous 3880 // (smaller) lock table. 3881 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3882 __kmp_free(table_ptr); 3883 table_ptr = next; 3884 } 3885 3886 // Free buffers allocated for blocks of locks. 3887 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3888 __kmp_lock_blocks = NULL; 3889 3890 while (block_ptr != NULL) { 3891 kmp_block_of_locks_t *next = block_ptr->next_block; 3892 __kmp_free(block_ptr->locks); 3893 // *block_ptr itself was allocated at the end of the locks vector. 3894 block_ptr = next; 3895 } 3896 3897 TCW_4(__kmp_init_user_locks, FALSE); 3898 } 3899 3900 #endif // KMP_USE_DYNAMIC_LOCK 3901