1 /* 2 * kmp_lock.h -- lock header file 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #ifndef KMP_LOCK_H 17 #define KMP_LOCK_H 18 19 #include <limits.h> // CHAR_BIT 20 #include <stddef.h> // offsetof 21 22 #include "kmp_os.h" 23 #include "kmp_debug.h" 24 25 #ifdef __cplusplus 26 #include <atomic> 27 28 extern "C" { 29 #endif // __cplusplus 30 31 // ---------------------------------------------------------------------------- 32 // Have to copy these definitions from kmp.h because kmp.h cannot be included 33 // due to circular dependencies. Will undef these at end of file. 34 35 #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 36 #define KMP_GTID_DNE (-2) 37 38 // Forward declaration of ident and ident_t 39 40 struct ident; 41 typedef struct ident ident_t; 42 43 // End of copied code. 44 // ---------------------------------------------------------------------------- 45 46 // 47 // We need to know the size of the area we can assume that the compiler(s) 48 // allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel 49 // compiler always allocates a pointer-sized area, as does visual studio. 50 // 51 // gcc however, only allocates 4 bytes for regular locks, even on 64-bit 52 // intel archs. It allocates at least 8 bytes for nested lock (more on 53 // recent versions), but we are bounded by the pointer-sized chunks that 54 // the Intel compiler allocates. 55 // 56 57 #if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) 58 # define OMP_LOCK_T_SIZE sizeof(int) 59 # define OMP_NEST_LOCK_T_SIZE sizeof(void *) 60 #else 61 # define OMP_LOCK_T_SIZE sizeof(void *) 62 # define OMP_NEST_LOCK_T_SIZE sizeof(void *) 63 #endif 64 65 // 66 // The Intel compiler allocates a 32-byte chunk for a critical section. 67 // Both gcc and visual studio only allocate enough space for a pointer. 68 // Sometimes we know that the space was allocated by the Intel compiler. 69 // 70 #define OMP_CRITICAL_SIZE sizeof(void *) 71 #define INTEL_CRITICAL_SIZE 32 72 73 // 74 // lock flags 75 // 76 typedef kmp_uint32 kmp_lock_flags_t; 77 78 #define kmp_lf_critical_section 1 79 80 // 81 // When a lock table is used, the indices are of kmp_lock_index_t 82 // 83 typedef kmp_uint32 kmp_lock_index_t; 84 85 // 86 // When memory allocated for locks are on the lock pool (free list), 87 // it is treated as structs of this type. 88 // 89 struct kmp_lock_pool { 90 union kmp_user_lock *next; 91 kmp_lock_index_t index; 92 }; 93 94 typedef struct kmp_lock_pool kmp_lock_pool_t; 95 96 97 extern void __kmp_validate_locks( void ); 98 99 100 // ---------------------------------------------------------------------------- 101 // 102 // There are 5 lock implementations: 103 // 104 // 1. Test and set locks. 105 // 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) 106 // 3. Ticket (Lamport bakery) locks. 107 // 4. Queuing locks (with separate spin fields). 108 // 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks 109 // 110 // and 3 lock purposes: 111 // 112 // 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. 113 // These do not require non-negative global thread ID's. 114 // 2. Internal RTL locks -- Used everywhere else in the RTL 115 // 3. User locks (includes critical sections) 116 // 117 // ---------------------------------------------------------------------------- 118 119 120 // ============================================================================ 121 // Lock implementations. 122 // ============================================================================ 123 124 125 // ---------------------------------------------------------------------------- 126 // Test and set locks. 127 // 128 // Non-nested test and set locks differ from the other lock kinds (except 129 // futex) in that we use the memory allocated by the compiler for the lock, 130 // rather than a pointer to it. 131 // 132 // On lin32, lin_32e, and win_32, the space allocated may be as small as 4 133 // bytes, so we have to use a lock table for nested locks, and avoid accessing 134 // the depth_locked field for non-nested locks. 135 // 136 // Information normally available to the tools, such as lock location, 137 // lock usage (normal lock vs. critical section), etc. is not available with 138 // test and set locks. 139 // ---------------------------------------------------------------------------- 140 141 struct kmp_base_tas_lock { 142 volatile kmp_int32 poll; // 0 => unlocked 143 // locked: (gtid+1) of owning thread 144 kmp_int32 depth_locked; // depth locked, for nested locks only 145 }; 146 147 typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; 148 149 union kmp_tas_lock { 150 kmp_base_tas_lock_t lk; 151 kmp_lock_pool_t pool; // make certain struct is large enough 152 double lk_align; // use worst case alignment 153 // no cache line padding 154 }; 155 156 typedef union kmp_tas_lock kmp_tas_lock_t; 157 158 // 159 // Static initializer for test and set lock variables. Usage: 160 // kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); 161 // 162 #define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } 163 164 extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 165 extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 166 extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 167 extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); 168 extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); 169 170 extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 171 extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 172 extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); 173 extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); 174 extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); 175 176 #define KMP_LOCK_RELEASED 1 177 #define KMP_LOCK_STILL_HELD 0 178 #define KMP_LOCK_ACQUIRED_FIRST 1 179 #define KMP_LOCK_ACQUIRED_NEXT 0 180 181 #define KMP_USE_FUTEX (KMP_OS_LINUX && !KMP_OS_CNK && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) 182 183 #if KMP_USE_FUTEX 184 185 // ---------------------------------------------------------------------------- 186 // futex locks. futex locks are only available on Linux* OS. 187 // 188 // Like non-nested test and set lock, non-nested futex locks use the memory 189 // allocated by the compiler for the lock, rather than a pointer to it. 190 // 191 // Information normally available to the tools, such as lock location, 192 // lock usage (normal lock vs. critical section), etc. is not available with 193 // test and set locks. With non-nested futex locks, the lock owner is not 194 // even available. 195 // ---------------------------------------------------------------------------- 196 197 struct kmp_base_futex_lock { 198 volatile kmp_int32 poll; // 0 => unlocked 199 // 2*(gtid+1) of owning thread, 0 if unlocked 200 // locked: (gtid+1) of owning thread 201 kmp_int32 depth_locked; // depth locked, for nested locks only 202 }; 203 204 typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; 205 206 union kmp_futex_lock { 207 kmp_base_futex_lock_t lk; 208 kmp_lock_pool_t pool; // make certain struct is large enough 209 double lk_align; // use worst case alignment 210 // no cache line padding 211 }; 212 213 typedef union kmp_futex_lock kmp_futex_lock_t; 214 215 // 216 // Static initializer for futex lock variables. Usage: 217 // kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); 218 // 219 #define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } 220 221 extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 222 extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 223 extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 224 extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); 225 extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); 226 227 extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 228 extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 229 extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); 230 extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); 231 extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); 232 233 #endif // KMP_USE_FUTEX 234 235 236 // ---------------------------------------------------------------------------- 237 // Ticket locks. 238 // ---------------------------------------------------------------------------- 239 240 #ifdef __cplusplus 241 242 #ifdef _MSC_VER 243 // MSVC won't allow use of std::atomic<> in a union since it has non-trivial copy constructor. 244 245 struct kmp_base_ticket_lock { 246 // `initialized' must be the first entry in the lock data structure! 247 std::atomic_bool initialized; 248 volatile union kmp_ticket_lock *self; // points to the lock union 249 ident_t const * location; // Source code location of omp_init_lock(). 250 std::atomic_uint next_ticket; // ticket number to give to next thread which acquires 251 std::atomic_uint now_serving; // ticket number for thread which holds the lock 252 std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked 253 std::atomic_int depth_locked; // depth locked, for nested locks only 254 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 255 }; 256 #else 257 struct kmp_base_ticket_lock { 258 // `initialized' must be the first entry in the lock data structure! 259 std::atomic<bool> initialized; 260 volatile union kmp_ticket_lock *self; // points to the lock union 261 ident_t const * location; // Source code location of omp_init_lock(). 262 std::atomic<unsigned> next_ticket; // ticket number to give to next thread which acquires 263 std::atomic<unsigned> now_serving; // ticket number for thread which holds the lock 264 std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked 265 std::atomic<int> depth_locked; // depth locked, for nested locks only 266 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 267 }; 268 #endif 269 270 #else // __cplusplus 271 272 struct kmp_base_ticket_lock; 273 274 #endif // !__cplusplus 275 276 typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; 277 278 union KMP_ALIGN_CACHE kmp_ticket_lock { 279 kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. 280 kmp_lock_pool_t pool; 281 double lk_align; // use worst case alignment 282 char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; 283 }; 284 285 typedef union kmp_ticket_lock kmp_ticket_lock_t; 286 287 // 288 // Static initializer for simple ticket lock variables. Usage: 289 // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); 290 // Note the macro argument. It is important to make var properly initialized. 291 // 292 #define KMP_TICKET_LOCK_INITIALIZER( lock ) { { ATOMIC_VAR_INIT(true), \ 293 &(lock), \ 294 NULL, \ 295 ATOMIC_VAR_INIT(0U), \ 296 ATOMIC_VAR_INIT(0U), \ 297 ATOMIC_VAR_INIT(0), \ 298 ATOMIC_VAR_INIT(-1) } } 299 300 extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 301 extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 302 extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 303 extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 304 extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); 305 extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); 306 307 extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 308 extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 309 extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); 310 extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); 311 extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); 312 313 314 // ---------------------------------------------------------------------------- 315 // Queuing locks. 316 // ---------------------------------------------------------------------------- 317 318 #if KMP_USE_ADAPTIVE_LOCKS 319 320 struct kmp_adaptive_lock_info; 321 322 typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; 323 324 #if KMP_DEBUG_ADAPTIVE_LOCKS 325 326 struct kmp_adaptive_lock_statistics { 327 /* So we can get stats from locks that haven't been destroyed. */ 328 kmp_adaptive_lock_info_t * next; 329 kmp_adaptive_lock_info_t * prev; 330 331 /* Other statistics */ 332 kmp_uint32 successfulSpeculations; 333 kmp_uint32 hardFailedSpeculations; 334 kmp_uint32 softFailedSpeculations; 335 kmp_uint32 nonSpeculativeAcquires; 336 kmp_uint32 nonSpeculativeAcquireAttempts; 337 kmp_uint32 lemmingYields; 338 }; 339 340 typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; 341 342 extern void __kmp_print_speculative_stats(); 343 extern void __kmp_init_speculative_stats(); 344 345 #endif // KMP_DEBUG_ADAPTIVE_LOCKS 346 347 struct kmp_adaptive_lock_info 348 { 349 /* Values used for adaptivity. 350 * Although these are accessed from multiple threads we don't access them atomically, 351 * because if we miss updates it probably doesn't matter much. (It just affects our 352 * decision about whether to try speculation on the lock). 353 */ 354 kmp_uint32 volatile badness; 355 kmp_uint32 volatile acquire_attempts; 356 /* Parameters of the lock. */ 357 kmp_uint32 max_badness; 358 kmp_uint32 max_soft_retries; 359 360 #if KMP_DEBUG_ADAPTIVE_LOCKS 361 kmp_adaptive_lock_statistics_t volatile stats; 362 #endif 363 }; 364 365 #endif // KMP_USE_ADAPTIVE_LOCKS 366 367 368 struct kmp_base_queuing_lock { 369 370 // `initialized' must be the first entry in the lock data structure! 371 volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. 372 373 ident_t const * location; // Source code location of omp_init_lock(). 374 375 KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! 376 377 volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty 378 // Must be no padding here since head/tail used in 8-byte CAS 379 volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty 380 // Decl order assumes little endian 381 // bakery-style lock 382 volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires 383 volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock 384 volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked 385 kmp_int32 depth_locked; // depth locked, for nested locks only 386 387 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 388 }; 389 390 typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; 391 392 KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); 393 394 union KMP_ALIGN_CACHE kmp_queuing_lock { 395 kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. 396 kmp_lock_pool_t pool; 397 double lk_align; // use worst case alignment 398 char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; 399 }; 400 401 typedef union kmp_queuing_lock kmp_queuing_lock_t; 402 403 extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 404 extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 405 extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 406 extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); 407 extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); 408 409 extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 410 extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 411 extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); 412 extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); 413 extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); 414 415 #if KMP_USE_ADAPTIVE_LOCKS 416 417 // ---------------------------------------------------------------------------- 418 // Adaptive locks. 419 // ---------------------------------------------------------------------------- 420 struct kmp_base_adaptive_lock { 421 kmp_base_queuing_lock qlk; 422 KMP_ALIGN(CACHE_LINE) 423 kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock 424 }; 425 426 typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; 427 428 union KMP_ALIGN_CACHE kmp_adaptive_lock { 429 kmp_base_adaptive_lock_t lk; 430 kmp_lock_pool_t pool; 431 double lk_align; 432 char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; 433 }; 434 typedef union kmp_adaptive_lock kmp_adaptive_lock_t; 435 436 # define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) 437 438 #endif // KMP_USE_ADAPTIVE_LOCKS 439 440 // ---------------------------------------------------------------------------- 441 // DRDPA ticket locks. 442 // ---------------------------------------------------------------------------- 443 444 struct kmp_base_drdpa_lock { 445 // 446 // All of the fields on the first cache line are only written when 447 // initializing or reconfiguring the lock. These are relatively rare 448 // operations, so data from the first cache line will usually stay 449 // resident in the cache of each thread trying to acquire the lock. 450 // 451 // initialized must be the first entry in the lock data structure! 452 // 453 KMP_ALIGN_CACHE 454 455 volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state 456 ident_t const * location; // Source code location of omp_init_lock(). 457 volatile struct kmp_lock_poll { 458 kmp_uint64 poll; 459 } * volatile polls; 460 volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op 461 kmp_uint64 cleanup_ticket; // thread with cleanup ticket 462 volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls 463 kmp_uint32 num_polls; // must be power of 2 464 465 // 466 // next_ticket it needs to exist in a separate cache line, as it is 467 // invalidated every time a thread takes a new ticket. 468 // 469 KMP_ALIGN_CACHE 470 471 volatile kmp_uint64 next_ticket; 472 473 // 474 // now_serving is used to store our ticket value while we hold the lock. 475 // It has a slightly different meaning in the DRDPA ticket locks (where 476 // it is written by the acquiring thread) than it does in the simple 477 // ticket locks (where it is written by the releasing thread). 478 // 479 // Since now_serving is only read an written in the critical section, 480 // it is non-volatile, but it needs to exist on a separate cache line, 481 // as it is invalidated at every lock acquire. 482 // 483 // Likewise, the vars used for nested locks (owner_id and depth_locked) 484 // are only written by the thread owning the lock, so they are put in 485 // this cache line. owner_id is read by other threads, so it must be 486 // declared volatile. 487 // 488 KMP_ALIGN_CACHE 489 490 kmp_uint64 now_serving; // doesn't have to be volatile 491 volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked 492 kmp_int32 depth_locked; // depth locked 493 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock 494 }; 495 496 typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; 497 498 union KMP_ALIGN_CACHE kmp_drdpa_lock { 499 kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ 500 kmp_lock_pool_t pool; 501 double lk_align; // use worst case alignment 502 char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; 503 }; 504 505 typedef union kmp_drdpa_lock kmp_drdpa_lock_t; 506 507 extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 508 extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 509 extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 510 extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); 511 extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); 512 513 extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 514 extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 515 extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); 516 extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); 517 extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); 518 519 520 // ============================================================================ 521 // Lock purposes. 522 // ============================================================================ 523 524 525 // ---------------------------------------------------------------------------- 526 // Bootstrap locks. 527 // ---------------------------------------------------------------------------- 528 529 // Bootstrap locks -- very few locks used at library initialization time. 530 // Bootstrap locks are currently implemented as ticket locks. 531 // They could also be implemented as test and set lock, but cannot be 532 // implemented with other lock kinds as they require gtids which are not 533 // available at initialization time. 534 535 typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; 536 537 #define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) 538 539 static inline int 540 __kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 541 { 542 return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); 543 } 544 545 static inline int 546 __kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 547 { 548 return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); 549 } 550 551 static inline void 552 __kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 553 { 554 __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); 555 } 556 557 static inline void 558 __kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 559 { 560 __kmp_init_ticket_lock( lck ); 561 } 562 563 static inline void 564 __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) 565 { 566 __kmp_destroy_ticket_lock( lck ); 567 } 568 569 570 // ---------------------------------------------------------------------------- 571 // Internal RTL locks. 572 // ---------------------------------------------------------------------------- 573 574 // 575 // Internal RTL locks are also implemented as ticket locks, for now. 576 // 577 // FIXME - We should go through and figure out which lock kind works best for 578 // each internal lock, and use the type declaration and function calls for 579 // that explicit lock kind (and get rid of this section). 580 // 581 582 typedef kmp_ticket_lock_t kmp_lock_t; 583 584 static inline int 585 __kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) 586 { 587 return __kmp_acquire_ticket_lock( lck, gtid ); 588 } 589 590 static inline int 591 __kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) 592 { 593 return __kmp_test_ticket_lock( lck, gtid ); 594 } 595 596 static inline void 597 __kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) 598 { 599 __kmp_release_ticket_lock( lck, gtid ); 600 } 601 602 static inline void 603 __kmp_init_lock( kmp_lock_t *lck ) 604 { 605 __kmp_init_ticket_lock( lck ); 606 } 607 608 static inline void 609 __kmp_destroy_lock( kmp_lock_t *lck ) 610 { 611 __kmp_destroy_ticket_lock( lck ); 612 } 613 614 615 // ---------------------------------------------------------------------------- 616 // User locks. 617 // ---------------------------------------------------------------------------- 618 619 // 620 // Do not allocate objects of type union kmp_user_lock!!! 621 // This will waste space unless __kmp_user_lock_kind == lk_drdpa. 622 // Instead, check the value of __kmp_user_lock_kind and allocate objects of 623 // the type of the appropriate union member, and cast their addresses to 624 // kmp_user_lock_p. 625 // 626 627 enum kmp_lock_kind { 628 lk_default = 0, 629 lk_tas, 630 #if KMP_USE_FUTEX 631 lk_futex, 632 #endif 633 #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX 634 lk_hle, 635 lk_rtm, 636 #endif 637 lk_ticket, 638 lk_queuing, 639 lk_drdpa, 640 #if KMP_USE_ADAPTIVE_LOCKS 641 lk_adaptive 642 #endif // KMP_USE_ADAPTIVE_LOCKS 643 }; 644 645 typedef enum kmp_lock_kind kmp_lock_kind_t; 646 647 extern kmp_lock_kind_t __kmp_user_lock_kind; 648 649 union kmp_user_lock { 650 kmp_tas_lock_t tas; 651 #if KMP_USE_FUTEX 652 kmp_futex_lock_t futex; 653 #endif 654 kmp_ticket_lock_t ticket; 655 kmp_queuing_lock_t queuing; 656 kmp_drdpa_lock_t drdpa; 657 #if KMP_USE_ADAPTIVE_LOCKS 658 kmp_adaptive_lock_t adaptive; 659 #endif // KMP_USE_ADAPTIVE_LOCKS 660 kmp_lock_pool_t pool; 661 }; 662 663 typedef union kmp_user_lock *kmp_user_lock_p; 664 665 #if ! KMP_USE_DYNAMIC_LOCK 666 667 extern size_t __kmp_base_user_lock_size; 668 extern size_t __kmp_user_lock_size; 669 670 extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); 671 672 static inline kmp_int32 673 __kmp_get_user_lock_owner( kmp_user_lock_p lck ) 674 { 675 KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); 676 return ( *__kmp_get_user_lock_owner_ )( lck ); 677 } 678 679 extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 680 681 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 682 683 #define __kmp_acquire_user_lock_with_checks(lck,gtid) \ 684 if (__kmp_user_lock_kind == lk_tas) { \ 685 if ( __kmp_env_consistency_check ) { \ 686 char const * const func = "omp_set_lock"; \ 687 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ 688 && lck->tas.lk.depth_locked != -1 ) { \ 689 KMP_FATAL( LockNestableUsedAsSimple, func ); \ 690 } \ 691 if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ 692 KMP_FATAL( LockIsAlreadyOwned, func ); \ 693 } \ 694 } \ 695 if ( ( lck->tas.lk.poll != 0 ) || \ 696 ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ 697 kmp_uint32 spins; \ 698 KMP_FSYNC_PREPARE( lck ); \ 699 KMP_INIT_YIELD( spins ); \ 700 if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ 701 KMP_YIELD( TRUE ); \ 702 } else { \ 703 KMP_YIELD_SPIN( spins ); \ 704 } \ 705 while ( ( lck->tas.lk.poll != 0 ) || \ 706 ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ 707 if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ 708 KMP_YIELD( TRUE ); \ 709 } else { \ 710 KMP_YIELD_SPIN( spins ); \ 711 } \ 712 } \ 713 } \ 714 KMP_FSYNC_ACQUIRED( lck ); \ 715 } else { \ 716 KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ 717 ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ 718 } 719 720 #else 721 static inline int 722 __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 723 { 724 KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); 725 return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); 726 } 727 #endif 728 729 extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 730 731 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 732 733 #include "kmp_i18n.h" /* AC: KMP_FATAL definition */ 734 extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ 735 static inline int 736 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 737 { 738 if ( __kmp_user_lock_kind == lk_tas ) { 739 if ( __kmp_env_consistency_check ) { 740 char const * const func = "omp_test_lock"; 741 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) 742 && lck->tas.lk.depth_locked != -1 ) { 743 KMP_FATAL( LockNestableUsedAsSimple, func ); 744 } 745 } 746 return ( ( lck->tas.lk.poll == 0 ) && 747 KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); 748 } else { 749 KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); 750 return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); 751 } 752 } 753 #else 754 static inline int 755 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 756 { 757 KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); 758 return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); 759 } 760 #endif 761 762 extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 763 764 static inline void 765 __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 766 { 767 KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); 768 ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); 769 } 770 771 extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); 772 773 static inline void 774 __kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) 775 { 776 KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); 777 ( *__kmp_init_user_lock_with_checks_ )( lck ); 778 } 779 780 // 781 // We need a non-checking version of destroy lock for when the RTL is 782 // doing the cleanup as it can't always tell if the lock is nested or not. 783 // 784 extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); 785 786 static inline void 787 __kmp_destroy_user_lock( kmp_user_lock_p lck ) 788 { 789 KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); 790 ( *__kmp_destroy_user_lock_ )( lck ); 791 } 792 793 extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); 794 795 static inline void 796 __kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) 797 { 798 KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); 799 ( *__kmp_destroy_user_lock_with_checks_ )( lck ); 800 } 801 802 extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 803 804 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 805 806 #define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ 807 if (__kmp_user_lock_kind == lk_tas) { \ 808 if ( __kmp_env_consistency_check ) { \ 809 char const * const func = "omp_set_nest_lock"; \ 810 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ 811 && lck->tas.lk.depth_locked == -1 ) { \ 812 KMP_FATAL( LockSimpleUsedAsNestable, func ); \ 813 } \ 814 } \ 815 if ( lck->tas.lk.poll - 1 == gtid ) { \ 816 lck->tas.lk.depth_locked += 1; \ 817 *depth = KMP_LOCK_ACQUIRED_NEXT; \ 818 } else { \ 819 if ( ( lck->tas.lk.poll != 0 ) || \ 820 ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ 821 kmp_uint32 spins; \ 822 KMP_FSYNC_PREPARE( lck ); \ 823 KMP_INIT_YIELD( spins ); \ 824 if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ 825 KMP_YIELD( TRUE ); \ 826 } else { \ 827 KMP_YIELD_SPIN( spins ); \ 828 } \ 829 while ( ( lck->tas.lk.poll != 0 ) || \ 830 ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ 831 if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ 832 KMP_YIELD( TRUE ); \ 833 } else { \ 834 KMP_YIELD_SPIN( spins ); \ 835 } \ 836 } \ 837 } \ 838 lck->tas.lk.depth_locked = 1; \ 839 *depth = KMP_LOCK_ACQUIRED_FIRST; \ 840 } \ 841 KMP_FSYNC_ACQUIRED( lck ); \ 842 } else { \ 843 KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ 844 *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ 845 } 846 847 #else 848 static inline void 849 __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) 850 { 851 KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); 852 *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); 853 } 854 #endif 855 856 extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 857 858 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 859 static inline int 860 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 861 { 862 if ( __kmp_user_lock_kind == lk_tas ) { 863 int retval; 864 if ( __kmp_env_consistency_check ) { 865 char const * const func = "omp_test_nest_lock"; 866 if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) 867 && lck->tas.lk.depth_locked == -1 ) { 868 KMP_FATAL( LockSimpleUsedAsNestable, func ); 869 } 870 } 871 KMP_DEBUG_ASSERT( gtid >= 0 ); 872 if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ 873 return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ 874 } 875 retval = ( ( lck->tas.lk.poll == 0 ) && 876 KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); 877 if ( retval ) { 878 KMP_MB(); 879 lck->tas.lk.depth_locked = 1; 880 } 881 return retval; 882 } else { 883 KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); 884 return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); 885 } 886 } 887 #else 888 static inline int 889 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 890 { 891 KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); 892 return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); 893 } 894 #endif 895 896 extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); 897 898 static inline int 899 __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) 900 { 901 KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); 902 return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); 903 } 904 905 extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); 906 907 static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) 908 { 909 KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); 910 ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); 911 } 912 913 extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); 914 915 static inline void 916 __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) 917 { 918 KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); 919 ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); 920 } 921 922 // 923 // user lock functions which do not necessarily exist for all lock kinds. 924 // 925 // The "set" functions usually have wrapper routines that check for a NULL set 926 // function pointer and call it if non-NULL. 927 // 928 // In some cases, it makes sense to have a "get" wrapper function check for a 929 // NULL get function pointer and return NULL / invalid value / error code if 930 // the function pointer is NULL. 931 // 932 // In other cases, the calling code really should differentiate between an 933 // unimplemented function and one that is implemented but returning NULL / 934 // invalied value. If this is the case, no get function wrapper exists. 935 // 936 937 extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); 938 939 // no set function; fields set durining local allocation 940 941 extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); 942 943 static inline const ident_t * 944 __kmp_get_user_lock_location( kmp_user_lock_p lck ) 945 { 946 if ( __kmp_get_user_lock_location_ != NULL ) { 947 return ( *__kmp_get_user_lock_location_ )( lck ); 948 } 949 else { 950 return NULL; 951 } 952 } 953 954 extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); 955 956 static inline void 957 __kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) 958 { 959 if ( __kmp_set_user_lock_location_ != NULL ) { 960 ( *__kmp_set_user_lock_location_ )( lck, loc ); 961 } 962 } 963 964 extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); 965 966 extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); 967 968 static inline void 969 __kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) 970 { 971 if ( __kmp_set_user_lock_flags_ != NULL ) { 972 ( *__kmp_set_user_lock_flags_ )( lck, flags ); 973 } 974 } 975 976 // 977 // The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. 978 // 979 extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); 980 981 // 982 // Macros for binding user lock functions. 983 // 984 #define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ 985 __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ 986 __kmp_acquire##nest##kind##_##suffix; \ 987 __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ 988 __kmp_release##nest##kind##_##suffix; \ 989 __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ 990 __kmp_test##nest##kind##_##suffix; \ 991 __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ 992 __kmp_init##nest##kind##_##suffix; \ 993 __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ 994 __kmp_destroy##nest##kind##_##suffix; \ 995 } 996 997 #define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) 998 #define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) 999 #define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) 1000 #define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) 1001 1002 // ---------------------------------------------------------------------------- 1003 // User lock table & lock allocation 1004 // ---------------------------------------------------------------------------- 1005 1006 /* 1007 On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which 1008 is not enough to store a pointer, so we have to use lock indexes instead of pointers and 1009 maintain lock table to map indexes to pointers. 1010 1011 1012 Note: The first element of the table is not a pointer to lock! It is a pointer to previously 1013 allocated table (or NULL if it is the first table). 1014 1015 Usage: 1016 1017 if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE 1018 Lock table is fully utilized. User locks are indexes, so table is 1019 used on user lock operation. 1020 Note: it may be the case (lin_32) that we don't need to use a lock 1021 table for regular locks, but do need the table for nested locks. 1022 } 1023 else { 1024 Lock table initialized but not actually used. 1025 } 1026 */ 1027 1028 struct kmp_lock_table { 1029 kmp_lock_index_t used; // Number of used elements 1030 kmp_lock_index_t allocated; // Number of allocated elements 1031 kmp_user_lock_p * table; // Lock table. 1032 }; 1033 1034 typedef struct kmp_lock_table kmp_lock_table_t; 1035 1036 extern kmp_lock_table_t __kmp_user_lock_table; 1037 extern kmp_user_lock_p __kmp_lock_pool; 1038 1039 struct kmp_block_of_locks { 1040 struct kmp_block_of_locks * next_block; 1041 void * locks; 1042 }; 1043 1044 typedef struct kmp_block_of_locks kmp_block_of_locks_t; 1045 1046 extern kmp_block_of_locks_t *__kmp_lock_blocks; 1047 extern int __kmp_num_locks_in_block; 1048 1049 extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); 1050 extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); 1051 extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); 1052 extern void __kmp_cleanup_user_locks(); 1053 1054 #define KMP_CHECK_USER_LOCK_INIT() \ 1055 { \ 1056 if ( ! TCR_4( __kmp_init_user_locks ) ) { \ 1057 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ 1058 if ( ! TCR_4( __kmp_init_user_locks ) ) { \ 1059 TCW_4( __kmp_init_user_locks, TRUE ); \ 1060 } \ 1061 __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ 1062 } \ 1063 } 1064 1065 #endif // KMP_USE_DYNAMIC_LOCK 1066 1067 #undef KMP_PAD 1068 #undef KMP_GTID_DNE 1069 1070 #if KMP_USE_DYNAMIC_LOCK 1071 1072 // 1073 // KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current 1074 // compatibility. Essential functionality of this new code is dynamic dispatch, but it also 1075 // implements (or enables implementation of) hinted user lock and critical section which will be 1076 // part of OMP 4.5 soon. 1077 // 1078 // Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock 1079 // function call on the created lock object requires type extraction and call through jump table 1080 // using the extracted type. This type information is stored in two different ways depending on 1081 // the size of the lock object, and we differentiate lock types by this size requirement - direct 1082 // and indirect locks. 1083 // 1084 // Direct locks: 1085 // A direct lock object fits into the space created by the compiler for an omp_lock_t object, and 1086 // TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage 1087 // for the lock type, and appropriate bit operation is required to access the data meaningful to 1088 // the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB 1089 // of the lock object. The newly introduced "hle" lock is also a direct lock. 1090 // 1091 // Indirect locks: 1092 // An indirect lock object requires more space than the compiler-generated space, and it should be 1093 // allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., 1094 // size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated 1095 // indirect lock (void * fits in the object) or an index to the indirect lock table entry that 1096 // holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly 1097 // introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. 1098 // When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to 1099 // differentiate the lock from a direct lock, and the remaining part is the actual index to the 1100 // indirect lock table. 1101 // 1102 1103 #include <stdint.h> // for uintptr_t 1104 1105 // Shortcuts 1106 #define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 1107 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 1108 1109 // List of lock definitions; all nested locks are indirect locks. 1110 // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. 1111 // All nested locks are indirect lock types. 1112 #if KMP_USE_TSX 1113 # if KMP_USE_FUTEX 1114 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) 1115 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ 1116 m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ 1117 m(nested_queuing, a) m(nested_drdpa, a) 1118 # else 1119 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) 1120 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ 1121 m(nested_tas, a) m(nested_ticket, a) \ 1122 m(nested_queuing, a) m(nested_drdpa, a) 1123 # endif // KMP_USE_FUTEX 1124 # define KMP_LAST_D_LOCK lockseq_hle 1125 #else 1126 # if KMP_USE_FUTEX 1127 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) 1128 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ 1129 m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ 1130 m(nested_queuing, a) m(nested_drdpa, a) 1131 # define KMP_LAST_D_LOCK lockseq_futex 1132 # else 1133 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) 1134 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ 1135 m(nested_tas, a) m(nested_ticket, a) \ 1136 m(nested_queuing, a) m(nested_drdpa, a) 1137 # define KMP_LAST_D_LOCK lockseq_tas 1138 # endif // KMP_USE_FUTEX 1139 #endif // KMP_USE_TSX 1140 1141 // Information used in dynamic dispatch 1142 #define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks 1143 #define KMP_FIRST_D_LOCK lockseq_tas 1144 #define KMP_FIRST_I_LOCK lockseq_ticket 1145 #define KMP_LAST_I_LOCK lockseq_nested_drdpa 1146 #define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types 1147 1148 // Base type for dynamic locks. 1149 typedef kmp_uint32 kmp_dyna_lock_t; 1150 1151 // Lock sequence that enumerates all lock kinds. 1152 // Always make this enumeration consistent with kmp_lockseq_t in the include directory. 1153 typedef enum { 1154 lockseq_indirect = 0, 1155 #define expand_seq(l,a) lockseq_##l, 1156 KMP_FOREACH_D_LOCK(expand_seq, 0) 1157 KMP_FOREACH_I_LOCK(expand_seq, 0) 1158 #undef expand_seq 1159 } kmp_dyna_lockseq_t; 1160 1161 // Enumerates indirect lock tags. 1162 typedef enum { 1163 #define expand_tag(l,a) locktag_##l, 1164 KMP_FOREACH_I_LOCK(expand_tag, 0) 1165 #undef expand_tag 1166 } kmp_indirect_locktag_t; 1167 1168 // Utility macros that extract information from lock sequences. 1169 #define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) 1170 #define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) 1171 #define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) 1172 #define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) 1173 1174 // Enumerates direct lock tags starting from indirect tag. 1175 typedef enum { 1176 #define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), 1177 KMP_FOREACH_D_LOCK(expand_tag, 0) 1178 #undef expand_tag 1179 } kmp_direct_locktag_t; 1180 1181 // Indirect lock type 1182 typedef struct { 1183 kmp_user_lock_p lock; 1184 kmp_indirect_locktag_t type; 1185 } kmp_indirect_lock_t; 1186 1187 // Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. 1188 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); 1189 extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); 1190 extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); 1191 extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); 1192 extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); 1193 1194 // Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. 1195 extern void (*__kmp_indirect_init[])(kmp_user_lock_p); 1196 extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); 1197 extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); 1198 extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); 1199 extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); 1200 1201 // Extracts direct lock tag from a user lock pointer 1202 #define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1)) 1203 1204 // Extracts indirect lock index from a user lock pointer 1205 #define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) 1206 1207 // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). 1208 #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] 1209 1210 // Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). 1211 #define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] 1212 1213 // Initializes a direct lock with the given lock pointer and lock sequence. 1214 #define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) 1215 1216 // Initializes an indirect lock with the given lock pointer and lock sequence. 1217 #define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) 1218 1219 // Returns "free" lock value for the given lock type. 1220 #define KMP_LOCK_FREE(type) (locktag_##type) 1221 1222 // Returns "busy" lock value for the given lock teyp. 1223 #define KMP_LOCK_BUSY(v, type) ((v)<<KMP_LOCK_SHIFT | locktag_##type) 1224 1225 // Returns lock value after removing (shifting) lock tag. 1226 #define KMP_LOCK_STRIP(v) ((v)>>KMP_LOCK_SHIFT) 1227 1228 // Initializes global states and data structures for managing dynamic user locks. 1229 extern void __kmp_init_dynamic_user_locks(); 1230 1231 // Allocates and returns an indirect lock with the given indirect lock tag. 1232 extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); 1233 1234 // Cleans up global states and data structures for managing dynamic user locks. 1235 extern void __kmp_cleanup_indirect_user_locks(); 1236 1237 // Default user lock sequence when not using hinted locks. 1238 extern kmp_dyna_lockseq_t __kmp_user_lock_seq; 1239 1240 // Jump table for "set lock location", available only for indirect locks. 1241 extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); 1242 #define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ 1243 if (__kmp_indirect_set_location[(lck)->type] != NULL) \ 1244 __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ 1245 } 1246 1247 // Jump table for "set lock flags", available only for indirect locks. 1248 extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); 1249 #define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ 1250 if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ 1251 __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ 1252 } 1253 1254 // Jump table for "get lock location", available only for indirect locks. 1255 extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); 1256 #define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ 1257 ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ 1258 : NULL ) 1259 1260 // Jump table for "get lock flags", available only for indirect locks. 1261 extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); 1262 #define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ 1263 ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ 1264 : NULL ) 1265 1266 #define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together 1267 1268 // Lock table for indirect locks. 1269 typedef struct kmp_indirect_lock_table { 1270 kmp_indirect_lock_t **table; // blocks of indirect locks allocated 1271 kmp_lock_index_t size; // size of the indirect lock table 1272 kmp_lock_index_t next; // index to the next lock to be allocated 1273 } kmp_indirect_lock_table_t; 1274 1275 extern kmp_indirect_lock_table_t __kmp_i_lock_table; 1276 1277 // Returns the indirect lock associated with the given index. 1278 #define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) 1279 1280 // Number of locks in a lock block, which is fixed to "1" now. 1281 // TODO: No lock block implementation now. If we do support, we need to manage lock block data 1282 // structure for each indirect lock type. 1283 extern int __kmp_num_locks_in_block; 1284 1285 // Fast lock table lookup without consistency checking 1286 #define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ 1287 ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ 1288 : *((kmp_indirect_lock_t **)(l)) ) 1289 1290 // Used once in kmp_error.cpp 1291 extern kmp_int32 1292 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); 1293 1294 #else // KMP_USE_DYNAMIC_LOCK 1295 1296 # define KMP_LOCK_BUSY(v, type) (v) 1297 # define KMP_LOCK_FREE(type) 0 1298 # define KMP_LOCK_STRIP(v) (v) 1299 1300 #endif // KMP_USE_DYNAMIC_LOCK 1301 1302 // data structure for using backoff within spin locks. 1303 typedef struct { 1304 kmp_uint32 step; // current step 1305 kmp_uint32 max_backoff; // upper bound of outer delay loop 1306 kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) 1307 } kmp_backoff_t; 1308 1309 // Runtime's default backoff parameters 1310 extern kmp_backoff_t __kmp_spin_backoff_params; 1311 1312 // Backoff function 1313 extern void __kmp_spin_backoff(kmp_backoff_t *); 1314 1315 #ifdef __cplusplus 1316 } // extern "C" 1317 #endif // __cplusplus 1318 1319 #endif /* KMP_LOCK_H */ 1320 1321