1 /*! \file */ 2 /* 3 * kmp.h -- KPTS runtime header file. 4 */ 5 6 7 //===----------------------------------------------------------------------===// 8 // 9 // The LLVM Compiler Infrastructure 10 // 11 // This file is dual licensed under the MIT and the University of Illinois Open 12 // Source Licenses. See LICENSE.txt for details. 13 // 14 //===----------------------------------------------------------------------===// 15 16 17 #ifndef KMP_H 18 #define KMP_H 19 20 #include "kmp_config.h" 21 22 /* #define BUILD_PARALLEL_ORDERED 1 */ 23 24 /* This fix replaces gettimeofday with clock_gettime for better scalability on 25 the Altix. Requires user code to be linked with -lrt. 26 */ 27 //#define FIX_SGI_CLOCK 28 29 /* Defines for OpenMP 3.0 tasking and auto scheduling */ 30 31 # ifndef KMP_STATIC_STEAL_ENABLED 32 # define KMP_STATIC_STEAL_ENABLED 1 33 # endif 34 35 #define TASK_CURRENT_NOT_QUEUED 0 36 #define TASK_CURRENT_QUEUED 1 37 38 #ifdef BUILD_TIED_TASK_STACK 39 #define TASK_STACK_EMPTY 0 // entries when the stack is empty 40 41 #define TASK_STACK_BLOCK_BITS 5 // Used to define TASK_STACK_SIZE and TASK_STACK_MASK 42 #define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array 43 #define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block 44 #endif // BUILD_TIED_TASK_STACK 45 46 #define TASK_NOT_PUSHED 1 47 #define TASK_SUCCESSFULLY_PUSHED 0 48 #define TASK_TIED 1 49 #define TASK_UNTIED 0 50 #define TASK_EXPLICIT 1 51 #define TASK_IMPLICIT 0 52 #define TASK_PROXY 1 53 #define TASK_FULL 0 54 55 #define KMP_CANCEL_THREADS 56 #define KMP_THREAD_ATTR 57 58 // Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being 59 // built on Android 60 #if defined(__ANDROID__) 61 #undef KMP_CANCEL_THREADS 62 #endif 63 64 #include <stdio.h> 65 #include <stdlib.h> 66 #include <stddef.h> 67 #include <stdarg.h> 68 #include <string.h> 69 #include <signal.h> 70 /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */ 71 /* some macros provided below to replace some of these functions */ 72 #ifndef __ABSOFT_WIN 73 #include <sys/types.h> 74 #endif 75 #include <limits.h> 76 #include <time.h> 77 78 #include <errno.h> 79 80 #include "kmp_os.h" 81 82 #include "kmp_safe_c_api.h" 83 84 #if KMP_STATS_ENABLED 85 class kmp_stats_list; 86 #endif 87 88 #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED 89 # include "hwloc.h" 90 #endif 91 92 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 93 #include <xmmintrin.h> 94 #endif 95 96 #include "kmp_version.h" 97 #include "kmp_debug.h" 98 #include "kmp_lock.h" 99 #if USE_DEBUGGER 100 #include "kmp_debugger.h" 101 #endif 102 #include "kmp_i18n.h" 103 104 #define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS) 105 106 #include "kmp_wrapper_malloc.h" 107 #if KMP_OS_UNIX 108 # include <unistd.h> 109 # if !defined NSIG && defined _NSIG 110 # define NSIG _NSIG 111 # endif 112 #endif 113 114 #if KMP_OS_LINUX 115 # pragma weak clock_gettime 116 #endif 117 118 #if OMPT_SUPPORT 119 #include "ompt-internal.h" 120 #endif 121 122 /*Select data placement in NUMA memory */ 123 #define NO_FIRST_TOUCH 0 124 #define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */ 125 126 /* If not specified on compile command line, assume no first touch */ 127 #ifndef BUILD_MEMORY 128 #define BUILD_MEMORY NO_FIRST_TOUCH 129 #endif 130 131 // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. 132 // 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size. 133 #ifndef USE_FAST_MEMORY 134 #define USE_FAST_MEMORY 3 135 #endif 136 137 #ifndef KMP_NESTED_HOT_TEAMS 138 # define KMP_NESTED_HOT_TEAMS 0 139 # define USE_NESTED_HOT_ARG(x) 140 #else 141 # if KMP_NESTED_HOT_TEAMS 142 # if OMP_40_ENABLED 143 # define USE_NESTED_HOT_ARG(x) ,x 144 # else 145 // Nested hot teams feature depends on omp 4.0, disable it for earlier versions 146 # undef KMP_NESTED_HOT_TEAMS 147 # define KMP_NESTED_HOT_TEAMS 0 148 # define USE_NESTED_HOT_ARG(x) 149 # endif 150 # else 151 # define USE_NESTED_HOT_ARG(x) 152 # endif 153 #endif 154 155 // Assume using BGET compare_exchange instruction instead of lock by default. 156 #ifndef USE_CMP_XCHG_FOR_BGET 157 #define USE_CMP_XCHG_FOR_BGET 1 158 #endif 159 160 // Test to see if queuing lock is better than bootstrap lock for bget 161 // #ifndef USE_QUEUING_LOCK_FOR_BGET 162 // #define USE_QUEUING_LOCK_FOR_BGET 163 // #endif 164 165 #define KMP_NSEC_PER_SEC 1000000000L 166 #define KMP_USEC_PER_SEC 1000000L 167 168 /*! 169 @ingroup BASIC_TYPES 170 @{ 171 */ 172 173 // FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...) 174 /*! 175 Values for bit flags used in the ident_t to describe the fields. 176 */ 177 /*! Use trampoline for internal microtasks */ 178 #define KMP_IDENT_IMB 0x01 179 /*! Use c-style ident structure */ 180 #define KMP_IDENT_KMPC 0x02 181 /* 0x04 is no longer used */ 182 /*! Entry point generated by auto-parallelization */ 183 #define KMP_IDENT_AUTOPAR 0x08 184 /*! Compiler generates atomic reduction option for kmpc_reduce* */ 185 #define KMP_IDENT_ATOMIC_REDUCE 0x10 186 /*! To mark a 'barrier' directive in user code */ 187 #define KMP_IDENT_BARRIER_EXPL 0x20 188 /*! To Mark implicit barriers. */ 189 #define KMP_IDENT_BARRIER_IMPL 0x0040 190 #define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0 191 #define KMP_IDENT_BARRIER_IMPL_FOR 0x0040 192 #define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0 193 194 #define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 195 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 196 197 /*! 198 * The ident structure that describes a source location. 199 */ 200 typedef struct ident { 201 kmp_int32 reserved_1; /**< might be used in Fortran; see above */ 202 kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */ 203 kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ 204 #if USE_ITT_BUILD 205 /* but currently used for storing region-specific ITT */ 206 /* contextual information. */ 207 #endif /* USE_ITT_BUILD */ 208 kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ 209 char const *psource; /**< String describing the source location. 210 The string is composed of semi-colon separated fields which describe the source file, 211 the function and a pair of line numbers that delimit the construct. 212 */ 213 } ident_t; 214 /*! 215 @} 216 */ 217 218 // Some forward declarations. 219 220 typedef union kmp_team kmp_team_t; 221 typedef struct kmp_taskdata kmp_taskdata_t; 222 typedef union kmp_task_team kmp_task_team_t; 223 typedef union kmp_team kmp_team_p; 224 typedef union kmp_info kmp_info_p; 225 typedef union kmp_root kmp_root_p; 226 227 #ifdef __cplusplus 228 extern "C" { 229 #endif 230 231 /* ------------------------------------------------------------------------ */ 232 /* ------------------------------------------------------------------------ */ 233 234 /* Pack two 32-bit signed integers into a 64-bit signed integer */ 235 /* ToDo: Fix word ordering for big-endian machines. */ 236 #define KMP_PACK_64(HIGH_32,LOW_32) \ 237 ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) ) 238 239 240 /* 241 * Generic string manipulation macros. 242 * Assume that _x is of type char * 243 */ 244 #define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } 245 #define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } 246 #define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } 247 248 /* ------------------------------------------------------------------------ */ 249 /* ------------------------------------------------------------------------ */ 250 251 #define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) 252 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 253 254 /* ------------------------------------------------------------------------ */ 255 /* ------------------------------------------------------------------------ */ 256 257 258 /* Enumeration types */ 259 260 enum kmp_state_timer { 261 ts_stop, 262 ts_start, 263 ts_pause, 264 265 ts_last_state 266 }; 267 268 enum dynamic_mode { 269 dynamic_default, 270 #ifdef USE_LOAD_BALANCE 271 dynamic_load_balance, 272 #endif /* USE_LOAD_BALANCE */ 273 dynamic_random, 274 dynamic_thread_limit, 275 dynamic_max 276 }; 277 278 /* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */ 279 #ifndef KMP_SCHED_TYPE_DEFINED 280 #define KMP_SCHED_TYPE_DEFINED 281 typedef enum kmp_sched { 282 kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check 283 // Note: need to adjust __kmp_sch_map global array in case this enum is changed 284 kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) 285 kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) 286 kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) 287 kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) 288 kmp_sched_upper_std = 5, // upper bound for standard schedules 289 kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules 290 kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) 291 #if KMP_STATIC_STEAL_ENABLED 292 kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) 293 #endif 294 kmp_sched_upper, 295 kmp_sched_default = kmp_sched_static // default scheduling 296 } kmp_sched_t; 297 #endif 298 299 /*! 300 @ingroup WORK_SHARING 301 * Describes the loop schedule to be used for a parallel for loop. 302 */ 303 enum sched_type { 304 kmp_sch_lower = 32, /**< lower bound for unordered values */ 305 kmp_sch_static_chunked = 33, 306 kmp_sch_static = 34, /**< static unspecialized */ 307 kmp_sch_dynamic_chunked = 35, 308 kmp_sch_guided_chunked = 36, /**< guided unspecialized */ 309 kmp_sch_runtime = 37, 310 kmp_sch_auto = 38, /**< auto */ 311 kmp_sch_trapezoidal = 39, 312 313 /* accessible only through KMP_SCHEDULE environment variable */ 314 kmp_sch_static_greedy = 40, 315 kmp_sch_static_balanced = 41, 316 /* accessible only through KMP_SCHEDULE environment variable */ 317 kmp_sch_guided_iterative_chunked = 42, 318 kmp_sch_guided_analytical_chunked = 43, 319 320 kmp_sch_static_steal = 44, /**< accessible only through KMP_SCHEDULE environment variable */ 321 322 #if OMP_45_ENABLED 323 kmp_sch_static_balanced_chunked = 45, /**< static with chunk adjustment (e.g., simd) */ 324 #endif 325 326 /* accessible only through KMP_SCHEDULE environment variable */ 327 kmp_sch_upper = 46, /**< upper bound for unordered values */ 328 329 kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ 330 kmp_ord_static_chunked = 65, 331 kmp_ord_static = 66, /**< ordered static unspecialized */ 332 kmp_ord_dynamic_chunked = 67, 333 kmp_ord_guided_chunked = 68, 334 kmp_ord_runtime = 69, 335 kmp_ord_auto = 70, /**< ordered auto */ 336 kmp_ord_trapezoidal = 71, 337 kmp_ord_upper = 72, /**< upper bound for ordered values */ 338 339 #if OMP_40_ENABLED 340 /* Schedules for Distribute construct */ 341 kmp_distribute_static_chunked = 91, /**< distribute static chunked */ 342 kmp_distribute_static = 92, /**< distribute static unspecialized */ 343 #endif 344 345 /* 346 * For the "nomerge" versions, kmp_dispatch_next*() will always return 347 * a single iteration/chunk, even if the loop is serialized. For the 348 * schedule types listed above, the entire iteration vector is returned 349 * if the loop is serialized. This doesn't work for gcc/gcomp sections. 350 */ 351 kmp_nm_lower = 160, /**< lower bound for nomerge values */ 352 353 kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), 354 kmp_nm_static = 162, /**< static unspecialized */ 355 kmp_nm_dynamic_chunked = 163, 356 kmp_nm_guided_chunked = 164, /**< guided unspecialized */ 357 kmp_nm_runtime = 165, 358 kmp_nm_auto = 166, /**< auto */ 359 kmp_nm_trapezoidal = 167, 360 361 /* accessible only through KMP_SCHEDULE environment variable */ 362 kmp_nm_static_greedy = 168, 363 kmp_nm_static_balanced = 169, 364 /* accessible only through KMP_SCHEDULE environment variable */ 365 kmp_nm_guided_iterative_chunked = 170, 366 kmp_nm_guided_analytical_chunked = 171, 367 kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */ 368 369 kmp_nm_ord_static_chunked = 193, 370 kmp_nm_ord_static = 194, /**< ordered static unspecialized */ 371 kmp_nm_ord_dynamic_chunked = 195, 372 kmp_nm_ord_guided_chunked = 196, 373 kmp_nm_ord_runtime = 197, 374 kmp_nm_ord_auto = 198, /**< auto */ 375 kmp_nm_ord_trapezoidal = 199, 376 kmp_nm_upper = 200, /**< upper bound for nomerge values */ 377 378 #if OMP_45_ENABLED 379 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 380 * Since we need to distinguish the three possible cases (no modifier, monotonic modifier, 381 * nonmonotonic modifier), we need separate bits for each modifier. 382 * The absence of monotonic does not imply nonmonotonic, especially since 4.5 says 383 * that the behaviour of the "no modifier" case is implementation defined in 4.5, 384 * but will become "nonmonotonic" in 5.0. 385 * 386 * Since we're passing a full 32 bit value, we can use a couple of high bits for these 387 * flags; out of paranoia we avoid the sign bit. 388 * 389 * These modifiers can be or-ed into non-static schedules by the compiler to pass 390 * the additional information. 391 * They will be stripped early in the processing in __kmp_dispatch_init when setting up schedules, so 392 * most of the code won't ever see schedules with these bits set. 393 */ 394 kmp_sch_modifier_monotonic = (1<<29), /**< Set if the monotonic schedule modifier was present */ 395 kmp_sch_modifier_nonmonotonic = (1<<30), /**< Set if the nonmonotonic schedule modifier was present */ 396 397 # define SCHEDULE_WITHOUT_MODIFIERS(s) (enum sched_type)((s) & ~ (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) 398 # define SCHEDULE_HAS_MONOTONIC(s) (((s) & kmp_sch_modifier_monotonic) != 0) 399 # define SCHEDULE_HAS_NONMONOTONIC(s) (((s) & kmp_sch_modifier_nonmonotonic) != 0) 400 # define SCHEDULE_HAS_NO_MODIFIERS(s) (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0) 401 #else 402 /* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers can now eliminate tests on compile time 403 * constants and dead code that results from them, so we can leave code guarded by such an if in place. 404 */ 405 # define SCHEDULE_WITHOUT_MODIFIERS(s) (s) 406 # define SCHEDULE_HAS_MONOTONIC(s) false 407 # define SCHEDULE_HAS_NONMONOTONIC(s) false 408 # define SCHEDULE_HAS_NO_MODIFIERS(s) true 409 #endif 410 411 kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ 412 }; 413 414 /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ 415 typedef struct kmp_r_sched { 416 enum sched_type r_sched_type; 417 int chunk; 418 } kmp_r_sched_t; 419 420 extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types 421 422 enum library_type { 423 library_none, 424 library_serial, 425 library_turnaround, 426 library_throughput 427 }; 428 429 #if KMP_OS_LINUX 430 enum clock_function_type { 431 clock_function_gettimeofday, 432 clock_function_clock_gettime 433 }; 434 #endif /* KMP_OS_LINUX */ 435 436 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 437 enum mic_type { 438 non_mic, 439 mic1, 440 mic2, 441 mic3, 442 dummy 443 }; 444 #endif 445 446 /* ------------------------------------------------------------------------ */ 447 /* -- fast reduction stuff ------------------------------------------------ */ 448 449 #undef KMP_FAST_REDUCTION_BARRIER 450 #define KMP_FAST_REDUCTION_BARRIER 1 451 452 #undef KMP_FAST_REDUCTION_CORE_DUO 453 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 454 #define KMP_FAST_REDUCTION_CORE_DUO 1 455 #endif 456 457 enum _reduction_method { 458 reduction_method_not_defined = 0, 459 critical_reduce_block = ( 1 << 8 ), 460 atomic_reduce_block = ( 2 << 8 ), 461 tree_reduce_block = ( 3 << 8 ), 462 empty_reduce_block = ( 4 << 8 ) 463 }; 464 465 // description of the packed_reduction_method variable 466 // the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte: 467 // 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier 468 // 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction; 469 // reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty, 470 // so no need to execute a shift instruction while packing/unpacking 471 472 #if KMP_FAST_REDUCTION_BARRIER 473 #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ 474 ( ( reduction_method ) | ( barrier_type ) ) 475 476 #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 477 ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) ) 478 479 #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ 480 ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) ) 481 #else 482 #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ 483 ( reduction_method ) 484 485 #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ 486 ( packed_reduction_method ) 487 488 #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ 489 ( bs_plain_barrier ) 490 #endif 491 492 #define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \ 493 ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) ) 494 495 #if KMP_FAST_REDUCTION_BARRIER 496 #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ 497 ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) ) 498 499 #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ 500 ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) ) 501 #endif 502 503 typedef int PACKED_REDUCTION_METHOD_T; 504 505 /* -- end of fast reduction stuff ----------------------------------------- */ 506 507 /* ------------------------------------------------------------------------ */ 508 /* ------------------------------------------------------------------------ */ 509 510 #if KMP_OS_WINDOWS 511 # define USE_CBLKDATA 512 # pragma warning( push ) 513 # pragma warning( disable: 271 310 ) 514 # include <windows.h> 515 # pragma warning( pop ) 516 #endif 517 518 #if KMP_OS_UNIX 519 # include <pthread.h> 520 # include <dlfcn.h> 521 #endif 522 523 /* ------------------------------------------------------------------------ */ 524 /* ------------------------------------------------------------------------ */ 525 526 /* 527 * Only Linux* OS and Windows* OS support thread affinity. 528 */ 529 #if KMP_AFFINITY_SUPPORTED 530 531 // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). 532 # if KMP_OS_WINDOWS 533 # if _MSC_VER < 1600 534 typedef struct GROUP_AFFINITY { 535 KAFFINITY Mask; 536 WORD Group; 537 WORD Reserved[3]; 538 } GROUP_AFFINITY; 539 # endif /* _MSC_VER < 1600 */ 540 # if KMP_GROUP_AFFINITY 541 extern int __kmp_num_proc_groups; 542 # else 543 static const int __kmp_num_proc_groups = 1; 544 # endif /* KMP_GROUP_AFFINITY */ 545 typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); 546 extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; 547 548 typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); 549 extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; 550 551 typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); 552 extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; 553 554 typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); 555 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; 556 # endif /* KMP_OS_WINDOWS */ 557 558 # if KMP_USE_HWLOC 559 extern hwloc_topology_t __kmp_hwloc_topology; 560 extern int __kmp_hwloc_error; 561 # endif 562 563 extern size_t __kmp_affin_mask_size; 564 # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) 565 # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) 566 # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) 567 # define KMP_CPU_SET_ITERATE(i,mask) \ 568 for (i = (mask)->begin(); i != (mask)->end() ; i = (mask)->next(i)) 569 # define KMP_CPU_SET(i,mask) (mask)->set(i) 570 # define KMP_CPU_ISSET(i,mask) (mask)->is_set(i) 571 # define KMP_CPU_CLR(i,mask) (mask)->clear(i) 572 # define KMP_CPU_ZERO(mask) (mask)->zero() 573 # define KMP_CPU_COPY(dest, src) (dest)->copy(src) 574 # define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) 575 # define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() 576 # define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) 577 # define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) 578 # define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) 579 # define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) 580 # define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) 581 # define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) 582 # define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) 583 # define KMP_CPU_INDEX(arr,i) __kmp_affinity_dispatch->index_mask_array(arr, i) 584 # define KMP_CPU_ALLOC_ARRAY(arr, n) (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) 585 # define KMP_CPU_FREE_ARRAY(arr, n) __kmp_affinity_dispatch->deallocate_mask_array(arr) 586 # define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) 587 # define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) 588 # define __kmp_get_system_affinity(mask, abort_bool) (mask)->get_system_affinity(abort_bool) 589 # define __kmp_set_system_affinity(mask, abort_bool) (mask)->set_system_affinity(abort_bool) 590 # define __kmp_get_proc_group(mask) (mask)->get_proc_group() 591 592 class KMPAffinity { 593 public: 594 class Mask { 595 public: 596 void* operator new(size_t n); 597 void operator delete(void* p); 598 void* operator new[](size_t n); 599 void operator delete[](void* p); 600 virtual ~Mask() {} 601 // Set bit i to 1 602 virtual void set(int i) {} 603 // Return bit i 604 virtual bool is_set(int i) const { return false; } 605 // Set bit i to 0 606 virtual void clear(int i) {} 607 // Zero out entire mask 608 virtual void zero() {} 609 // Copy src into this mask 610 virtual void copy(const Mask* src) {} 611 // this &= rhs 612 virtual void bitwise_and(const Mask* rhs) {} 613 // this |= rhs 614 virtual void bitwise_or(const Mask* rhs) {} 615 // this = ~this 616 virtual void bitwise_not() {} 617 // API for iterating over an affinity mask 618 // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) 619 virtual int begin() const { return 0; } 620 virtual int end() const { return 0; } 621 virtual int next(int previous) const { return 0; } 622 // Set the system's affinity to this affinity mask's value 623 virtual int set_system_affinity(bool abort_on_error) const { return -1; } 624 // Set this affinity mask to the current system affinity 625 virtual int get_system_affinity(bool abort_on_error) { return -1; } 626 // Only 1 DWORD in the mask should have any procs set. 627 // Return the appropriate index, or -1 for an invalid mask. 628 virtual int get_proc_group() const { return -1; } 629 }; 630 void* operator new(size_t n); 631 void operator delete(void* p); 632 // Determine if affinity is capable 633 virtual void determine_capable(const char* env_var) {} 634 // Bind the current thread to os proc 635 virtual void bind_thread(int proc) {} 636 // Factory functions to allocate/deallocate a mask 637 virtual Mask* allocate_mask() { return nullptr; } 638 virtual void deallocate_mask(Mask* m) { } 639 virtual Mask* allocate_mask_array(int num) { return nullptr; } 640 virtual void deallocate_mask_array(Mask* m) { } 641 virtual Mask* index_mask_array(Mask* m, int index) { return nullptr; } 642 static void pick_api(); 643 static void destroy_api(); 644 enum api_type { 645 NATIVE_OS 646 #if KMP_USE_HWLOC 647 , HWLOC 648 #endif 649 }; 650 virtual api_type get_api_type() const { KMP_ASSERT(0); return NATIVE_OS; }; 651 private: 652 static bool picked_api; 653 }; 654 655 typedef KMPAffinity::Mask kmp_affin_mask_t; 656 extern KMPAffinity* __kmp_affinity_dispatch; 657 658 // 659 // Declare local char buffers with this size for printing debug and info 660 // messages, using __kmp_affinity_print_mask(). 661 // 662 #define KMP_AFFIN_MASK_PRINT_LEN 1024 663 664 enum affinity_type { 665 affinity_none = 0, 666 affinity_physical, 667 affinity_logical, 668 affinity_compact, 669 affinity_scatter, 670 affinity_explicit, 671 affinity_balanced, 672 affinity_disabled, // not used outsize the env var parser 673 affinity_default 674 }; 675 676 enum affinity_gran { 677 affinity_gran_fine = 0, 678 affinity_gran_thread, 679 affinity_gran_core, 680 affinity_gran_package, 681 affinity_gran_node, 682 #if KMP_GROUP_AFFINITY 683 // 684 // The "group" granularity isn't necesssarily coarser than all of the 685 // other levels, but we put it last in the enum. 686 // 687 affinity_gran_group, 688 #endif /* KMP_GROUP_AFFINITY */ 689 affinity_gran_default 690 }; 691 692 enum affinity_top_method { 693 affinity_top_method_all = 0, // try all (supported) methods, in order 694 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 695 affinity_top_method_apicid, 696 affinity_top_method_x2apicid, 697 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 698 affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too 699 #if KMP_GROUP_AFFINITY 700 affinity_top_method_group, 701 #endif /* KMP_GROUP_AFFINITY */ 702 affinity_top_method_flat, 703 #if KMP_USE_HWLOC 704 affinity_top_method_hwloc, 705 #endif 706 affinity_top_method_default 707 }; 708 709 #define affinity_respect_mask_default (-1) 710 711 extern enum affinity_type __kmp_affinity_type; /* Affinity type */ 712 extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ 713 extern int __kmp_affinity_gran_levels; /* corresponding int value */ 714 extern int __kmp_affinity_dups; /* Affinity duplicate masks */ 715 extern enum affinity_top_method __kmp_affinity_top_method; 716 extern int __kmp_affinity_compact; /* Affinity 'compact' value */ 717 extern int __kmp_affinity_offset; /* Affinity offset value */ 718 extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ 719 extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ 720 extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */ 721 extern char * __kmp_affinity_proclist; /* proc ID list */ 722 extern kmp_affin_mask_t *__kmp_affinity_masks; 723 extern unsigned __kmp_affinity_num_masks; 724 extern void __kmp_affinity_bind_thread(int which); 725 726 extern kmp_affin_mask_t *__kmp_affin_fullMask; 727 extern char const * __kmp_cpuinfo_file; 728 729 #endif /* KMP_AFFINITY_SUPPORTED */ 730 731 #if OMP_40_ENABLED 732 733 // 734 // This needs to be kept in sync with the values in omp.h !!! 735 // 736 typedef enum kmp_proc_bind_t { 737 proc_bind_false = 0, 738 proc_bind_true, 739 proc_bind_master, 740 proc_bind_close, 741 proc_bind_spread, 742 proc_bind_intel, // use KMP_AFFINITY interface 743 proc_bind_default 744 } kmp_proc_bind_t; 745 746 typedef struct kmp_nested_proc_bind_t { 747 kmp_proc_bind_t *bind_types; 748 int size; 749 int used; 750 } kmp_nested_proc_bind_t; 751 752 extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; 753 754 #endif /* OMP_40_ENABLED */ 755 756 # if KMP_AFFINITY_SUPPORTED 757 # define KMP_PLACE_ALL (-1) 758 # define KMP_PLACE_UNDEFINED (-2) 759 # endif /* KMP_AFFINITY_SUPPORTED */ 760 761 extern int __kmp_affinity_num_places; 762 763 764 #if OMP_40_ENABLED 765 typedef enum kmp_cancel_kind_t { 766 cancel_noreq = 0, 767 cancel_parallel = 1, 768 cancel_loop = 2, 769 cancel_sections = 3, 770 cancel_taskgroup = 4 771 } kmp_cancel_kind_t; 772 #endif // OMP_40_ENABLED 773 774 extern int __kmp_place_num_sockets; 775 extern int __kmp_place_socket_offset; 776 extern int __kmp_place_num_cores; 777 extern int __kmp_place_core_offset; 778 extern int __kmp_place_num_threads_per_core; 779 780 /* ------------------------------------------------------------------------ */ 781 /* ------------------------------------------------------------------------ */ 782 783 #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) 784 785 // 786 // We need to avoid using -1 as a GTID as +1 is added to the gtid 787 // when storing it in a lock, and the value 0 is reserved. 788 // 789 #define KMP_GTID_DNE (-2) /* Does not exist */ 790 #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ 791 #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ 792 #define KMP_GTID_UNKNOWN (-5) /* Is not known */ 793 #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ 794 795 #define __kmp_get_gtid() __kmp_get_global_thread_id() 796 #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() 797 798 #define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ 799 __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid ) 800 801 #define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) ) 802 #define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \ 803 team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid ) 804 805 #define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team ) 806 #define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ 807 __kmp_threads[ (gtid) ]-> th.th_team ) 808 809 #define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] ) 810 #define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) ) 811 812 // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works 813 // with registered and not-yet-registered threads. 814 #define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \ 815 (thr)->th.th_info.ds.ds_gtid ) 816 817 // AT: Which way is correct? 818 // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; 819 // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; 820 #define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc ) 821 822 823 /* ------------------------------------------------------------------------ */ 824 /* ------------------------------------------------------------------------ */ 825 826 #define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1))) 827 828 #define KMP_MIN_NTH 1 829 830 #ifndef KMP_MAX_NTH 831 # if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX 832 # define KMP_MAX_NTH PTHREAD_THREADS_MAX 833 # else 834 # define KMP_MAX_NTH INT_MAX 835 # endif 836 #endif /* KMP_MAX_NTH */ 837 838 #ifdef PTHREAD_STACK_MIN 839 # define KMP_MIN_STKSIZE PTHREAD_STACK_MIN 840 #else 841 # define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) 842 #endif 843 844 #define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) 845 846 #if KMP_ARCH_X86 847 # define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) 848 #elif KMP_ARCH_X86_64 849 # define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) 850 # define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) 851 #else 852 # define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) 853 #endif 854 855 #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024)) 856 #define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024)) 857 #define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) 858 859 #define KMP_MIN_STKOFFSET (0) 860 #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE 861 #if KMP_OS_DARWIN 862 # define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET 863 #else 864 # define KMP_DEFAULT_STKOFFSET CACHE_LINE 865 #endif 866 867 #define KMP_MIN_STKPADDING (0) 868 #define KMP_MAX_STKPADDING (2 * 1024 * 1024) 869 870 #define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */ 871 #define KMP_MIN_BLOCKTIME (0) 872 #define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */ 873 #define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ 874 875 #if KMP_USE_MONITOR 876 #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) 877 #define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */ 878 #define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */ 879 880 /* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */ 881 /* Only allow increasing number of wakeups */ 882 #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 883 ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \ 884 ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \ 885 ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \ 886 (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) ) 887 888 /* Calculate number of intervals for a specific block time based on monitor_wakeups */ 889 #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ 890 ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \ 891 (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) ) 892 #endif // KMP_USE_MONITOR 893 894 #define KMP_MIN_STATSCOLS 40 895 #define KMP_MAX_STATSCOLS 4096 896 #define KMP_DEFAULT_STATSCOLS 80 897 898 #define KMP_MIN_INTERVAL 0 899 #define KMP_MAX_INTERVAL (INT_MAX-1) 900 #define KMP_DEFAULT_INTERVAL 0 901 902 #define KMP_MIN_CHUNK 1 903 #define KMP_MAX_CHUNK (INT_MAX-1) 904 #define KMP_DEFAULT_CHUNK 1 905 906 #define KMP_MIN_INIT_WAIT 1 907 #define KMP_MAX_INIT_WAIT (INT_MAX/2) 908 #define KMP_DEFAULT_INIT_WAIT 2048U 909 910 #define KMP_MIN_NEXT_WAIT 1 911 #define KMP_MAX_NEXT_WAIT (INT_MAX/2) 912 #define KMP_DEFAULT_NEXT_WAIT 1024U 913 914 #define KMP_DFLT_DISP_NUM_BUFF 7 915 #define KMP_MAX_ORDERED 8 916 917 #define KMP_MAX_FIELDS 32 918 919 #define KMP_MAX_BRANCH_BITS 31 920 921 #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX 922 923 #define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX 924 925 #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX 926 927 /* Minimum number of threads before switch to TLS gtid (experimentally determined) */ 928 /* josh TODO: what about OS X* tuning? */ 929 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 930 # define KMP_TLS_GTID_MIN 5 931 #else 932 # define KMP_TLS_GTID_MIN INT_MAX 933 #endif 934 935 #define KMP_MASTER_TID(tid) ( (tid) == 0 ) 936 #define KMP_WORKER_TID(tid) ( (tid) != 0 ) 937 938 #define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 ) 939 #define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 ) 940 #define KMP_UBER_GTID(gtid) \ 941 ( \ 942 KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \ 943 KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \ 944 (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \ 945 (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\ 946 ) 947 #define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 ) 948 949 #ifndef TRUE 950 #define FALSE 0 951 #define TRUE (! FALSE) 952 #endif 953 954 /* NOTE: all of the following constants must be even */ 955 956 #if KMP_OS_WINDOWS 957 # define KMP_INIT_WAIT 64U /* initial number of spin-tests */ 958 # define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ 959 #elif KMP_OS_CNK 960 # define KMP_INIT_WAIT 16U /* initial number of spin-tests */ 961 # define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ 962 #elif KMP_OS_LINUX 963 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 964 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 965 #elif KMP_OS_DARWIN 966 /* TODO: tune for KMP_OS_DARWIN */ 967 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 968 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 969 #elif KMP_OS_FREEBSD 970 /* TODO: tune for KMP_OS_FREEBSD */ 971 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 972 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 973 #elif KMP_OS_NETBSD 974 /* TODO: tune for KMP_OS_NETBSD */ 975 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ 976 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ 977 #endif 978 979 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 980 typedef struct kmp_cpuid { 981 kmp_uint32 eax; 982 kmp_uint32 ebx; 983 kmp_uint32 ecx; 984 kmp_uint32 edx; 985 } kmp_cpuid_t; 986 extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 987 # if KMP_ARCH_X86 988 extern void __kmp_x86_pause( void ); 989 # elif KMP_MIC 990 static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); } 991 # else 992 static void __kmp_x86_pause( void ) { _mm_pause(); } 993 # endif 994 # define KMP_CPU_PAUSE() __kmp_x86_pause() 995 #elif KMP_ARCH_PPC64 996 # define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1") 997 # define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2") 998 # define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory") 999 # define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0) 1000 #else 1001 # define KMP_CPU_PAUSE() /* nothing to do */ 1002 #endif 1003 1004 #define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; } 1005 1006 #define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); } 1007 1008 // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, 1009 // there should be no yielding since the starting value from KMP_INIT_YIELD() is odd. 1010 1011 #define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \ 1012 if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } } 1013 #define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \ 1014 if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } } 1015 1016 /* ------------------------------------------------------------------------ */ 1017 /* Support datatypes for the orphaned construct nesting checks. */ 1018 /* ------------------------------------------------------------------------ */ 1019 1020 enum cons_type { 1021 ct_none, 1022 ct_parallel, 1023 ct_pdo, 1024 ct_pdo_ordered, 1025 ct_psections, 1026 ct_psingle, 1027 1028 /* the following must be left in order and not split up */ 1029 ct_taskq, 1030 ct_task, /* really task inside non-ordered taskq, considered a worksharing type */ 1031 ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */ 1032 /* the preceding must be left in order and not split up */ 1033 1034 ct_critical, 1035 ct_ordered_in_parallel, 1036 ct_ordered_in_pdo, 1037 ct_ordered_in_taskq, 1038 ct_master, 1039 ct_reduce, 1040 ct_barrier 1041 }; 1042 1043 /* test to see if we are in a taskq construct */ 1044 # define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) ) 1045 # define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) 1046 1047 struct cons_data { 1048 ident_t const *ident; 1049 enum cons_type type; 1050 int prev; 1051 kmp_user_lock_p name; /* address exclusively for critical section name comparison */ 1052 }; 1053 1054 struct cons_header { 1055 int p_top, w_top, s_top; 1056 int stack_size, stack_top; 1057 struct cons_data *stack_data; 1058 }; 1059 1060 struct kmp_region_info { 1061 char *text; 1062 int offset[KMP_MAX_FIELDS]; 1063 int length[KMP_MAX_FIELDS]; 1064 }; 1065 1066 1067 /* ---------------------------------------------------------------------- */ 1068 /* ---------------------------------------------------------------------- */ 1069 1070 #if KMP_OS_WINDOWS 1071 typedef HANDLE kmp_thread_t; 1072 typedef DWORD kmp_key_t; 1073 #endif /* KMP_OS_WINDOWS */ 1074 1075 #if KMP_OS_UNIX 1076 typedef pthread_t kmp_thread_t; 1077 typedef pthread_key_t kmp_key_t; 1078 #endif 1079 1080 extern kmp_key_t __kmp_gtid_threadprivate_key; 1081 1082 typedef struct kmp_sys_info { 1083 long maxrss; /* the maximum resident set size utilized (in kilobytes) */ 1084 long minflt; /* the number of page faults serviced without any I/O */ 1085 long majflt; /* the number of page faults serviced that required I/O */ 1086 long nswap; /* the number of times a process was "swapped" out of memory */ 1087 long inblock; /* the number of times the file system had to perform input */ 1088 long oublock; /* the number of times the file system had to perform output */ 1089 long nvcsw; /* the number of times a context switch was voluntarily */ 1090 long nivcsw; /* the number of times a context switch was forced */ 1091 } kmp_sys_info_t; 1092 1093 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1094 typedef struct kmp_cpuinfo { 1095 int initialized; // If 0, other fields are not initialized. 1096 int signature; // CPUID(1).EAX 1097 int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family ) 1098 int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model) 1099 int stepping; // CPUID(1).EAX[3:0] ( Stepping ) 1100 int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. 1101 int rtm; // 0 if RTM instructions are not supported, 1 otherwise. 1102 int cpu_stackoffset; 1103 int apic_id; 1104 int physical_id; 1105 int logical_id; 1106 kmp_uint64 frequency; // Nominal CPU frequency in Hz. 1107 char name [3*sizeof (kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004) 1108 } kmp_cpuinfo_t; 1109 #endif 1110 1111 #ifdef BUILD_TV 1112 1113 struct tv_threadprivate { 1114 /* Record type #1 */ 1115 void *global_addr; 1116 void *thread_addr; 1117 }; 1118 1119 struct tv_data { 1120 struct tv_data *next; 1121 void *type; 1122 union tv_union { 1123 struct tv_threadprivate tp; 1124 } u; 1125 }; 1126 1127 extern kmp_key_t __kmp_tv_key; 1128 1129 #endif /* BUILD_TV */ 1130 1131 /* ------------------------------------------------------------------------ */ 1132 1133 #if USE_ITT_BUILD 1134 // We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here. 1135 // Later we will check the type meets requirements. 1136 typedef int kmp_itt_mark_t; 1137 #define KMP_ITT_DEBUG 0 1138 #endif /* USE_ITT_BUILD */ 1139 1140 /* ------------------------------------------------------------------------ */ 1141 1142 /* 1143 * Taskq data structures 1144 */ 1145 1146 #define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4) 1147 #define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */ 1148 1149 /* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */ 1150 1151 #define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */ 1152 #define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */ 1153 #define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */ 1154 #define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */ 1155 #define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */ 1156 #define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */ 1157 #define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */ 1158 #define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */ 1159 1160 #define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */ 1161 1162 #define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */ 1163 #define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */ 1164 #define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */ 1165 #define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */ 1166 #define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */ 1167 #define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */ 1168 1169 #define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */ 1170 1171 typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t { 1172 kmp_int32 ai_data; 1173 } kmpc_aligned_int32_t; 1174 1175 typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t { 1176 struct kmpc_thunk_t *qs_thunk; 1177 } kmpc_aligned_queue_slot_t; 1178 1179 typedef struct kmpc_task_queue_t { 1180 /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */ 1181 kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */ 1182 union { 1183 struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */ 1184 struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */ 1185 } tq; 1186 volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */ 1187 struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */ 1188 struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */ 1189 volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */ 1190 /* (other than the thread executing the kmpc_end_taskq call) */ 1191 /* locked by parent tq's tq_link_lck */ 1192 1193 /* shared data for task queue */ 1194 struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */ 1195 /* only one array element exists for all but outermost taskq */ 1196 1197 /* bookkeeping for ordered task queue */ 1198 kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */ 1199 volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */ 1200 1201 /* thunk storage management for task queue */ 1202 kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */ 1203 struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */ 1204 struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */ 1205 1206 /* data fields for queue itself */ 1207 kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */ 1208 kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */ 1209 volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */ 1210 kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */ 1211 kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */ 1212 kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */ 1213 volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */ 1214 kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */ 1215 volatile kmp_int32 tq_flags; /* TQF_xxx */ 1216 1217 /* bookkeeping for outstanding thunks */ 1218 struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */ 1219 kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */ 1220 1221 /* statistics library bookkeeping */ 1222 ident_t *tq_loc; /* source location information for taskq directive */ 1223 } kmpc_task_queue_t; 1224 1225 typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk); 1226 1227 /* sizeof_shareds passed as arg to __kmpc_taskq call */ 1228 typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */ 1229 kmpc_task_queue_t *sv_queue; 1230 /* (pointers to) shared vars */ 1231 } kmpc_shared_vars_t; 1232 1233 typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t { 1234 volatile struct kmpc_shared_vars_t *ai_data; 1235 } kmpc_aligned_shared_vars_t; 1236 1237 /* sizeof_thunk passed as arg to kmpc_taskq call */ 1238 typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */ 1239 union { /* field used for internal freelists too */ 1240 kmpc_shared_vars_t *th_shareds; 1241 struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */ 1242 } th; 1243 kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */ 1244 struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */ 1245 kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */ 1246 kmp_int32 th_status; 1247 kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */ 1248 /* private vars */ 1249 } kmpc_thunk_t; 1250 1251 typedef struct KMP_ALIGN_CACHE kmp_taskq { 1252 int tq_curr_thunk_capacity; 1253 1254 kmpc_task_queue_t *tq_root; 1255 kmp_int32 tq_global_flags; 1256 1257 kmp_lock_t tq_freelist_lck; 1258 kmpc_task_queue_t *tq_freelist; 1259 1260 kmpc_thunk_t **tq_curr_thunk; 1261 } kmp_taskq_t; 1262 1263 /* END Taskq data structures */ 1264 /* --------------------------------------------------------------------------- */ 1265 1266 typedef kmp_int32 kmp_critical_name[8]; 1267 1268 /*! 1269 @ingroup PARALLEL 1270 The type for a microtask which gets passed to @ref __kmpc_fork_call(). 1271 The arguments to the outlined function are 1272 @param global_tid the global thread identity of the thread executing the function. 1273 @param bound_tid the local identitiy of the thread executing the function 1274 @param ... pointers to shared variables accessed by the function. 1275 */ 1276 typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... ); 1277 typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... ); 1278 1279 /*! 1280 @ingroup THREADPRIVATE 1281 @{ 1282 */ 1283 /* --------------------------------------------------------------------------- */ 1284 /* Threadprivate initialization/finalization function declarations */ 1285 1286 /* for non-array objects: __kmpc_threadprivate_register() */ 1287 1288 /*! 1289 Pointer to the constructor function. 1290 The first argument is the <tt>this</tt> pointer 1291 */ 1292 typedef void *(*kmpc_ctor) (void *); 1293 1294 /*! 1295 Pointer to the destructor function. 1296 The first argument is the <tt>this</tt> pointer 1297 */ 1298 typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */ 1299 /*! 1300 Pointer to an alternate constructor. 1301 The first argument is the <tt>this</tt> pointer. 1302 */ 1303 typedef void *(*kmpc_cctor) (void *, void *); 1304 1305 /* for array objects: __kmpc_threadprivate_register_vec() */ 1306 /* First arg: "this" pointer */ 1307 /* Last arg: number of array elements */ 1308 /*! 1309 Array constructor. 1310 First argument is the <tt>this</tt> pointer 1311 Second argument the number of array elements. 1312 */ 1313 typedef void *(*kmpc_ctor_vec) (void *, size_t); 1314 /*! 1315 Pointer to the array destructor function. 1316 The first argument is the <tt>this</tt> pointer 1317 Second argument the number of array elements. 1318 */ 1319 typedef void (*kmpc_dtor_vec) (void *, size_t); 1320 /*! 1321 Array constructor. 1322 First argument is the <tt>this</tt> pointer 1323 Third argument the number of array elements. 1324 */ 1325 typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */ 1326 1327 /*! 1328 @} 1329 */ 1330 1331 1332 /* ------------------------------------------------------------------------ */ 1333 1334 /* keeps tracked of threadprivate cache allocations for cleanup later */ 1335 typedef struct kmp_cached_addr { 1336 void **addr; /* address of allocated cache */ 1337 struct kmp_cached_addr *next; /* pointer to next cached address */ 1338 } kmp_cached_addr_t; 1339 1340 struct private_data { 1341 struct private_data *next; /* The next descriptor in the list */ 1342 void *data; /* The data buffer for this descriptor */ 1343 int more; /* The repeat count for this descriptor */ 1344 size_t size; /* The data size for this descriptor */ 1345 }; 1346 1347 struct private_common { 1348 struct private_common *next; 1349 struct private_common *link; 1350 void *gbl_addr; 1351 void *par_addr; /* par_addr == gbl_addr for MASTER thread */ 1352 size_t cmn_size; 1353 }; 1354 1355 struct shared_common 1356 { 1357 struct shared_common *next; 1358 struct private_data *pod_init; 1359 void *obj_init; 1360 void *gbl_addr; 1361 union { 1362 kmpc_ctor ctor; 1363 kmpc_ctor_vec ctorv; 1364 } ct; 1365 union { 1366 kmpc_cctor cctor; 1367 kmpc_cctor_vec cctorv; 1368 } cct; 1369 union { 1370 kmpc_dtor dtor; 1371 kmpc_dtor_vec dtorv; 1372 } dt; 1373 size_t vec_len; 1374 int is_vec; 1375 size_t cmn_size; 1376 }; 1377 1378 #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ 1379 #define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ 1380 #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ 1381 #define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1)) 1382 1383 struct common_table { 1384 struct private_common *data[ KMP_HASH_TABLE_SIZE ]; 1385 }; 1386 1387 struct shared_table { 1388 struct shared_common *data[ KMP_HASH_TABLE_SIZE ]; 1389 }; 1390 /* ------------------------------------------------------------------------ */ 1391 /* ------------------------------------------------------------------------ */ 1392 1393 #if KMP_STATIC_STEAL_ENABLED 1394 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 1395 kmp_int32 count; 1396 kmp_int32 ub; 1397 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 1398 kmp_int32 lb; 1399 kmp_int32 st; 1400 kmp_int32 tc; 1401 kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */ 1402 1403 // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) 1404 // a) parm3 is properly aligned and 1405 // b) all parm1-4 are in the same cache line. 1406 // Because of parm1-4 are used together, performance seems to be better 1407 // if they are in the same line (not measured though). 1408 1409 struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template 1410 kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should 1411 kmp_int32 parm2; // make no real change at least while padding is off. 1412 kmp_int32 parm3; 1413 kmp_int32 parm4; 1414 }; 1415 1416 kmp_uint32 ordered_lower; 1417 kmp_uint32 ordered_upper; 1418 #if KMP_OS_WINDOWS 1419 // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. 1420 // It would be nice to measure execution times. 1421 // Conditional if/endif can be removed at all. 1422 kmp_int32 last_upper; 1423 #endif /* KMP_OS_WINDOWS */ 1424 } dispatch_private_info32_t; 1425 1426 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 1427 kmp_int64 count; /* current chunk number for static and static-steal scheduling*/ 1428 kmp_int64 ub; /* upper-bound */ 1429 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ 1430 kmp_int64 lb; /* lower-bound */ 1431 kmp_int64 st; /* stride */ 1432 kmp_int64 tc; /* trip count (number of iterations) */ 1433 kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */ 1434 1435 /* parm[1-4] are used in different ways by different scheduling algorithms */ 1436 1437 // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) 1438 // a) parm3 is properly aligned and 1439 // b) all parm1-4 are in the same cache line. 1440 // Because of parm1-4 are used together, performance seems to be better 1441 // if they are in the same line (not measured though). 1442 1443 struct KMP_ALIGN( 32 ) { 1444 kmp_int64 parm1; 1445 kmp_int64 parm2; 1446 kmp_int64 parm3; 1447 kmp_int64 parm4; 1448 }; 1449 1450 kmp_uint64 ordered_lower; 1451 kmp_uint64 ordered_upper; 1452 #if KMP_OS_WINDOWS 1453 // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. 1454 // It would be nice to measure execution times. 1455 // Conditional if/endif can be removed at all. 1456 kmp_int64 last_upper; 1457 #endif /* KMP_OS_WINDOWS */ 1458 } dispatch_private_info64_t; 1459 #else /* KMP_STATIC_STEAL_ENABLED */ 1460 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { 1461 kmp_int32 lb; 1462 kmp_int32 ub; 1463 kmp_int32 st; 1464 kmp_int32 tc; 1465 1466 kmp_int32 parm1; 1467 kmp_int32 parm2; 1468 kmp_int32 parm3; 1469 kmp_int32 parm4; 1470 1471 kmp_int32 count; 1472 1473 kmp_uint32 ordered_lower; 1474 kmp_uint32 ordered_upper; 1475 #if KMP_OS_WINDOWS 1476 kmp_int32 last_upper; 1477 #endif /* KMP_OS_WINDOWS */ 1478 } dispatch_private_info32_t; 1479 1480 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { 1481 kmp_int64 lb; /* lower-bound */ 1482 kmp_int64 ub; /* upper-bound */ 1483 kmp_int64 st; /* stride */ 1484 kmp_int64 tc; /* trip count (number of iterations) */ 1485 1486 /* parm[1-4] are used in different ways by different scheduling algorithms */ 1487 kmp_int64 parm1; 1488 kmp_int64 parm2; 1489 kmp_int64 parm3; 1490 kmp_int64 parm4; 1491 1492 kmp_int64 count; /* current chunk number for static scheduling */ 1493 1494 kmp_uint64 ordered_lower; 1495 kmp_uint64 ordered_upper; 1496 #if KMP_OS_WINDOWS 1497 kmp_int64 last_upper; 1498 #endif /* KMP_OS_WINDOWS */ 1499 } dispatch_private_info64_t; 1500 #endif /* KMP_STATIC_STEAL_ENABLED */ 1501 1502 typedef struct KMP_ALIGN_CACHE dispatch_private_info { 1503 union private_info { 1504 dispatch_private_info32_t p32; 1505 dispatch_private_info64_t p64; 1506 } u; 1507 enum sched_type schedule; /* scheduling algorithm */ 1508 kmp_int32 ordered; /* ordered clause specified */ 1509 kmp_int32 ordered_bumped; 1510 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar 1511 struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */ 1512 kmp_int32 nomerge; /* don't merge iters if serialized */ 1513 kmp_int32 type_size; /* the size of types in private_info */ 1514 enum cons_type pushed_ws; 1515 } dispatch_private_info_t; 1516 1517 typedef struct dispatch_shared_info32 { 1518 /* chunk index under dynamic, number of idle threads under static-steal; 1519 iteration index otherwise */ 1520 volatile kmp_uint32 iteration; 1521 volatile kmp_uint32 num_done; 1522 volatile kmp_uint32 ordered_iteration; 1523 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar 1524 } dispatch_shared_info32_t; 1525 1526 typedef struct dispatch_shared_info64 { 1527 /* chunk index under dynamic, number of idle threads under static-steal; 1528 iteration index otherwise */ 1529 volatile kmp_uint64 iteration; 1530 volatile kmp_uint64 num_done; 1531 volatile kmp_uint64 ordered_iteration; 1532 kmp_int64 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar 1533 } dispatch_shared_info64_t; 1534 1535 typedef struct dispatch_shared_info { 1536 union shared_info { 1537 dispatch_shared_info32_t s32; 1538 dispatch_shared_info64_t s64; 1539 } u; 1540 volatile kmp_uint32 buffer_index; 1541 #if OMP_45_ENABLED 1542 volatile kmp_int32 doacross_buf_idx; // teamwise index 1543 volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) 1544 kmp_int32 doacross_num_done; // count finished threads 1545 #endif 1546 #if KMP_USE_HWLOC 1547 // When linking with libhwloc, the ORDERED EPCC test slows down on big 1548 // machines (> 48 cores). Performance analysis showed that a cache thrash 1549 // was occurring and this padding helps alleviate the problem. 1550 char padding[64]; 1551 #endif 1552 } dispatch_shared_info_t; 1553 1554 typedef struct kmp_disp { 1555 /* Vector for ORDERED SECTION */ 1556 void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); 1557 /* Vector for END ORDERED SECTION */ 1558 void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); 1559 1560 dispatch_shared_info_t *th_dispatch_sh_current; 1561 dispatch_private_info_t *th_dispatch_pr_current; 1562 1563 dispatch_private_info_t *th_disp_buffer; 1564 kmp_int32 th_disp_index; 1565 #if OMP_45_ENABLED 1566 kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index 1567 volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags 1568 union { // we can use union here because doacross cannot be used in nonmonotonic loops 1569 kmp_int64 *th_doacross_info; // info on loop bounds 1570 kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) 1571 }; 1572 #else 1573 #if KMP_STATIC_STEAL_ENABLED 1574 kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) 1575 void* dummy_padding[1]; // make it 64 bytes on Intel(R) 64 1576 #else 1577 void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64 1578 #endif 1579 #endif 1580 #if KMP_USE_INTERNODE_ALIGNMENT 1581 char more_padding[INTERNODE_CACHE_LINE]; 1582 #endif 1583 } kmp_disp_t; 1584 1585 /* ------------------------------------------------------------------------ */ 1586 /* ------------------------------------------------------------------------ */ 1587 1588 /* Barrier stuff */ 1589 1590 /* constants for barrier state update */ 1591 #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ 1592 #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ 1593 #define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */ 1594 #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ 1595 1596 #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT) 1597 #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT) 1598 #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT) 1599 1600 #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) 1601 # error "Barrier sleep bit must be smaller than barrier bump bit" 1602 #endif 1603 #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) 1604 # error "Barrier unused bit must be smaller than barrier bump bit" 1605 #endif 1606 1607 // Constants for release barrier wait state: currently, hierarchical only 1608 #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep 1609 #define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release 1610 #define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release 1611 #define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go 1612 #define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up 1613 1614 enum barrier_type { 1615 bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */ 1616 bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ 1617 #if KMP_FAST_REDUCTION_BARRIER 1618 bs_reduction_barrier, /* 2, All barriers that are used in reduction */ 1619 #endif // KMP_FAST_REDUCTION_BARRIER 1620 bs_last_barrier /* Just a placeholder to mark the end */ 1621 }; 1622 1623 // to work with reduction barriers just like with plain barriers 1624 #if !KMP_FAST_REDUCTION_BARRIER 1625 #define bs_reduction_barrier bs_plain_barrier 1626 #endif // KMP_FAST_REDUCTION_BARRIER 1627 1628 typedef enum kmp_bar_pat { /* Barrier communication patterns */ 1629 bp_linear_bar = 0, /* Single level (degenerate) tree */ 1630 bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */ 1631 bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */ 1632 bp_hierarchical_bar = 3, /* Machine hierarchy tree */ 1633 bp_last_bar = 4 /* Placeholder to mark the end */ 1634 } kmp_bar_pat_e; 1635 1636 # define KMP_BARRIER_ICV_PUSH 1 1637 1638 /* Record for holding the values of the internal controls stack records */ 1639 typedef struct kmp_internal_control { 1640 int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */ 1641 kmp_int8 nested; /* internal control for nested parallelism (per thread) */ 1642 kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */ 1643 kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */ 1644 int blocktime; /* internal control for blocktime */ 1645 #if KMP_USE_MONITOR 1646 int bt_intervals; /* internal control for blocktime intervals */ 1647 #endif 1648 int nproc; /* internal control for #threads for next parallel region (per thread) */ 1649 int max_active_levels; /* internal control for max_active_levels */ 1650 kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */ 1651 #if OMP_40_ENABLED 1652 kmp_proc_bind_t proc_bind; /* internal control for affinity */ 1653 kmp_int32 default_device; /* internal control for default device */ 1654 #endif // OMP_40_ENABLED 1655 struct kmp_internal_control *next; 1656 } kmp_internal_control_t; 1657 1658 static inline void 1659 copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) { 1660 *dst = *src; 1661 } 1662 1663 /* Thread barrier needs volatile barrier fields */ 1664 typedef struct KMP_ALIGN_CACHE kmp_bstate { 1665 // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it). 1666 // It is not explicitly aligned below, because we *don't* want it to be padded -- instead, 1667 // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines 1668 // stores in the hierarchical barrier. 1669 kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread 1670 // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store 1671 volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) 1672 KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point. 1673 kmp_uint32 *skip_per_level; 1674 kmp_uint32 my_level; 1675 kmp_int32 parent_tid; 1676 kmp_int32 old_tid; 1677 kmp_uint32 depth; 1678 struct kmp_bstate *parent_bar; 1679 kmp_team_t *team; 1680 kmp_uint64 leaf_state; 1681 kmp_uint32 nproc; 1682 kmp_uint8 base_leaf_kids; 1683 kmp_uint8 leaf_kids; 1684 kmp_uint8 offset; 1685 kmp_uint8 wait_flag; 1686 kmp_uint8 use_oncore_barrier; 1687 #if USE_DEBUGGER 1688 // The following field is intended for the debugger solely. Only the worker thread itself accesses this 1689 // field: the worker increases it by 1 when it arrives to a barrier. 1690 KMP_ALIGN_CACHE kmp_uint b_worker_arrived; 1691 #endif /* USE_DEBUGGER */ 1692 } kmp_bstate_t; 1693 1694 union KMP_ALIGN_CACHE kmp_barrier_union { 1695 double b_align; /* use worst case alignment */ 1696 char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ]; 1697 kmp_bstate_t bb; 1698 }; 1699 1700 typedef union kmp_barrier_union kmp_balign_t; 1701 1702 /* Team barrier needs only non-volatile arrived counter */ 1703 union KMP_ALIGN_CACHE kmp_barrier_team_union { 1704 double b_align; /* use worst case alignment */ 1705 char b_pad[ CACHE_LINE ]; 1706 struct { 1707 kmp_uint64 b_arrived; /* STATE => task reached synch point. */ 1708 #if USE_DEBUGGER 1709 // The following two fields are indended for the debugger solely. Only master of the team accesses 1710 // these fields: the first one is increased by 1 when master arrives to a barrier, the 1711 // second one is increased by one when all the threads arrived. 1712 kmp_uint b_master_arrived; 1713 kmp_uint b_team_arrived; 1714 #endif 1715 }; 1716 }; 1717 1718 typedef union kmp_barrier_team_union kmp_balign_team_t; 1719 1720 /* 1721 * Padding for Linux* OS pthreads condition variables and mutexes used to signal 1722 * threads when a condition changes. This is to workaround an NPTL bug 1723 * where padding was added to pthread_cond_t which caused the initialization 1724 * routine to write outside of the structure if compiled on pre-NPTL threads. 1725 */ 1726 1727 #if KMP_OS_WINDOWS 1728 typedef struct kmp_win32_mutex 1729 { 1730 /* The Lock */ 1731 CRITICAL_SECTION cs; 1732 } kmp_win32_mutex_t; 1733 1734 typedef struct kmp_win32_cond 1735 { 1736 /* Count of the number of waiters. */ 1737 int waiters_count_; 1738 1739 /* Serialize access to <waiters_count_> */ 1740 kmp_win32_mutex_t waiters_count_lock_; 1741 1742 /* Number of threads to release via a <cond_broadcast> or a */ 1743 /* <cond_signal> */ 1744 int release_count_; 1745 1746 /* Keeps track of the current "generation" so that we don't allow */ 1747 /* one thread to steal all the "releases" from the broadcast. */ 1748 int wait_generation_count_; 1749 1750 /* A manual-reset event that's used to block and release waiting */ 1751 /* threads. */ 1752 HANDLE event_; 1753 } kmp_win32_cond_t; 1754 #endif 1755 1756 #if KMP_OS_UNIX 1757 1758 union KMP_ALIGN_CACHE kmp_cond_union { 1759 double c_align; 1760 char c_pad[ CACHE_LINE ]; 1761 pthread_cond_t c_cond; 1762 }; 1763 1764 typedef union kmp_cond_union kmp_cond_align_t; 1765 1766 union KMP_ALIGN_CACHE kmp_mutex_union { 1767 double m_align; 1768 char m_pad[ CACHE_LINE ]; 1769 pthread_mutex_t m_mutex; 1770 }; 1771 1772 typedef union kmp_mutex_union kmp_mutex_align_t; 1773 1774 #endif /* KMP_OS_UNIX */ 1775 1776 typedef struct kmp_desc_base { 1777 void *ds_stackbase; 1778 size_t ds_stacksize; 1779 int ds_stackgrow; 1780 kmp_thread_t ds_thread; 1781 volatile int ds_tid; 1782 int ds_gtid; 1783 #if KMP_OS_WINDOWS 1784 volatile int ds_alive; 1785 DWORD ds_thread_id; 1786 /* 1787 ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However, 1788 debugger support (libomp_db) cannot work with handles, because they uncomparable. For 1789 example, debugger requests info about thread with handle h. h is valid within debugger 1790 process, and meaningless within debugee process. Even if h is duped by call to 1791 DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new* 1792 handle which does *not* equal to any other handle in debugee... The only way to 1793 compare handles is convert them to system-wide ids. GetThreadId() function is 1794 available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is 1795 available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by 1796 call to GetCurrentThreadId() from within the thread and save it to let libomp_db 1797 identify threads. 1798 */ 1799 #endif /* KMP_OS_WINDOWS */ 1800 } kmp_desc_base_t; 1801 1802 typedef union KMP_ALIGN_CACHE kmp_desc { 1803 double ds_align; /* use worst case alignment */ 1804 char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ]; 1805 kmp_desc_base_t ds; 1806 } kmp_desc_t; 1807 1808 1809 typedef struct kmp_local { 1810 volatile int this_construct; /* count of single's encountered by thread */ 1811 void *reduce_data; 1812 #if KMP_USE_BGET 1813 void *bget_data; 1814 void *bget_list; 1815 #if ! USE_CMP_XCHG_FOR_BGET 1816 #ifdef USE_QUEUING_LOCK_FOR_BGET 1817 kmp_lock_t bget_lock; /* Lock for accessing bget free list */ 1818 #else 1819 kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */ 1820 /* Must be bootstrap lock so we can use it at library shutdown */ 1821 #endif /* USE_LOCK_FOR_BGET */ 1822 #endif /* ! USE_CMP_XCHG_FOR_BGET */ 1823 #endif /* KMP_USE_BGET */ 1824 1825 #ifdef BUILD_TV 1826 struct tv_data *tv_data; 1827 #endif 1828 1829 PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */ 1830 1831 } kmp_local_t; 1832 1833 #define KMP_CHECK_UPDATE(a, b) if ((a) != (b)) (a) = (b) 1834 #define KMP_CHECK_UPDATE_SYNC(a, b) if ((a) != (b)) TCW_SYNC_PTR((a), (b)) 1835 1836 #define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) 1837 #define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) 1838 #if KMP_USE_MONITOR 1839 #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) 1840 #endif 1841 1842 #define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) 1843 #define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) 1844 #define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) 1845 #define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) 1846 1847 #define set__blocktime_team( xteam, xtid, xval ) \ 1848 ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) ) 1849 1850 #if KMP_USE_MONITOR 1851 #define set__bt_intervals_team( xteam, xtid, xval ) \ 1852 ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) ) 1853 #endif 1854 1855 #define set__bt_set_team( xteam, xtid, xval ) \ 1856 ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) ) 1857 1858 1859 #define set__nested( xthread, xval ) \ 1860 ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) ) 1861 #define get__nested( xthread ) \ 1862 ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) ) 1863 1864 #define set__dynamic( xthread, xval ) \ 1865 ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) ) 1866 #define get__dynamic( xthread ) \ 1867 ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) ) 1868 1869 #define set__nproc( xthread, xval ) \ 1870 ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) ) 1871 1872 #define set__max_active_levels( xthread, xval ) \ 1873 ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) ) 1874 1875 #define set__sched( xthread, xval ) \ 1876 ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) ) 1877 1878 #if OMP_40_ENABLED 1879 1880 #define set__proc_bind( xthread, xval ) \ 1881 ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) ) 1882 #define get__proc_bind( xthread ) \ 1883 ( (xthread)->th.th_current_task->td_icvs.proc_bind ) 1884 1885 #endif /* OMP_40_ENABLED */ 1886 1887 1888 /* ------------------------------------------------------------------------ */ 1889 // OpenMP tasking data structures 1890 // 1891 1892 typedef enum kmp_tasking_mode { 1893 tskm_immediate_exec = 0, 1894 tskm_extra_barrier = 1, 1895 tskm_task_teams = 2, 1896 tskm_max = 2 1897 } kmp_tasking_mode_t; 1898 1899 extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ 1900 extern kmp_int32 __kmp_task_stealing_constraint; 1901 #if OMP_40_ENABLED 1902 extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if specified, defaults to 0 otherwise 1903 #endif 1904 #if OMP_45_ENABLED 1905 extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise 1906 #endif 1907 1908 /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */ 1909 #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1) 1910 #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1) 1911 1912 // The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and 1913 // queued since the previous barrier release. 1914 #define KMP_TASKING_ENABLED(task_team) \ 1915 (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) 1916 /*! 1917 @ingroup BASIC_TYPES 1918 @{ 1919 */ 1920 1921 /*! 1922 */ 1923 typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * ); 1924 1925 #if OMP_40_ENABLED || OMP_45_ENABLED 1926 typedef union kmp_cmplrdata { 1927 #if OMP_45_ENABLED 1928 kmp_int32 priority; /**< priority specified by user for the task */ 1929 #endif // OMP_45_ENABLED 1930 #if OMP_40_ENABLED 1931 kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ 1932 #endif // OMP_40_ENABLED 1933 /* future data */ 1934 } kmp_cmplrdata_t; 1935 #endif 1936 1937 /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ 1938 /*! 1939 */ 1940 typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ 1941 void * shareds; /**< pointer to block of pointers to shared vars */ 1942 kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ 1943 kmp_int32 part_id; /**< part id for the task */ 1944 #if OMP_40_ENABLED || OMP_45_ENABLED 1945 kmp_cmplrdata_t data1; /* Two known optional additions: destructors and priority */ 1946 kmp_cmplrdata_t data2; /* Process destructors first, priority second */ 1947 /* future data */ 1948 #endif 1949 /* private vars */ 1950 } kmp_task_t; 1951 1952 /*! 1953 @} 1954 */ 1955 1956 #if OMP_40_ENABLED 1957 typedef struct kmp_taskgroup { 1958 kmp_uint32 count; // number of allocated and not yet complete tasks 1959 kmp_int32 cancel_request; // request for cancellation of this taskgroup 1960 struct kmp_taskgroup *parent; // parent taskgroup 1961 } kmp_taskgroup_t; 1962 1963 1964 // forward declarations 1965 typedef union kmp_depnode kmp_depnode_t; 1966 typedef struct kmp_depnode_list kmp_depnode_list_t; 1967 typedef struct kmp_dephash_entry kmp_dephash_entry_t; 1968 1969 typedef struct kmp_depend_info { 1970 kmp_intptr_t base_addr; 1971 size_t len; 1972 struct { 1973 bool in:1; 1974 bool out:1; 1975 } flags; 1976 } kmp_depend_info_t; 1977 1978 struct kmp_depnode_list { 1979 kmp_depnode_t * node; 1980 kmp_depnode_list_t * next; 1981 }; 1982 1983 typedef struct kmp_base_depnode { 1984 kmp_depnode_list_t * successors; 1985 kmp_task_t * task; 1986 1987 kmp_lock_t lock; 1988 1989 #if KMP_SUPPORT_GRAPH_OUTPUT 1990 kmp_uint32 id; 1991 #endif 1992 1993 volatile kmp_int32 npredecessors; 1994 volatile kmp_int32 nrefs; 1995 } kmp_base_depnode_t; 1996 1997 union KMP_ALIGN_CACHE kmp_depnode { 1998 double dn_align; /* use worst case alignment */ 1999 char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ]; 2000 kmp_base_depnode_t dn; 2001 }; 2002 2003 struct kmp_dephash_entry { 2004 kmp_intptr_t addr; 2005 kmp_depnode_t * last_out; 2006 kmp_depnode_list_t * last_ins; 2007 kmp_dephash_entry_t * next_in_bucket; 2008 }; 2009 2010 typedef struct kmp_dephash { 2011 kmp_dephash_entry_t ** buckets; 2012 size_t size; 2013 #ifdef KMP_DEBUG 2014 kmp_uint32 nelements; 2015 kmp_uint32 nconflicts; 2016 #endif 2017 } kmp_dephash_t; 2018 2019 #endif 2020 2021 #ifdef BUILD_TIED_TASK_STACK 2022 2023 /* Tied Task stack definitions */ 2024 typedef struct kmp_stack_block { 2025 kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ]; 2026 struct kmp_stack_block * sb_next; 2027 struct kmp_stack_block * sb_prev; 2028 } kmp_stack_block_t; 2029 2030 typedef struct kmp_task_stack { 2031 kmp_stack_block_t ts_first_block; // first block of stack entries 2032 kmp_taskdata_t ** ts_top; // pointer to the top of stack 2033 kmp_int32 ts_entries; // number of entries on the stack 2034 } kmp_task_stack_t; 2035 2036 #endif // BUILD_TIED_TASK_STACK 2037 2038 typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ 2039 /* Compiler flags */ /* Total compiler flags must be 16 bits */ 2040 unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ 2041 unsigned final : 1; /* task is final(1) so execute immediately */ 2042 unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ 2043 #if OMP_40_ENABLED 2044 unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */ 2045 #if OMP_45_ENABLED 2046 unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */ 2047 unsigned priority_specified :1; /* set if the compiler provides priority setting for the task */ 2048 unsigned reserved : 10; /* reserved for compiler use */ 2049 #else 2050 unsigned reserved : 12; /* reserved for compiler use */ 2051 #endif 2052 #else // OMP_40_ENABLED 2053 unsigned reserved : 13; /* reserved for compiler use */ 2054 #endif // OMP_40_ENABLED 2055 2056 /* Library flags */ /* Total library flags must be 16 bits */ 2057 unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ 2058 unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */ 2059 unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */ 2060 unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */ 2061 /* If either team_serial or tasking_ser is set, task team may be NULL */ 2062 /* Task State Flags: */ 2063 unsigned started : 1; /* 1==started, 0==not started */ 2064 unsigned executing : 1; /* 1==executing, 0==not executing */ 2065 unsigned complete : 1; /* 1==complete, 0==not complete */ 2066 unsigned freed : 1; /* 1==freed, 0==allocateed */ 2067 unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ 2068 unsigned reserved31 : 7; /* reserved for library use */ 2069 2070 } kmp_tasking_flags_t; 2071 2072 2073 struct kmp_taskdata { /* aligned during dynamic allocation */ 2074 kmp_int32 td_task_id; /* id, assigned by debugger */ 2075 kmp_tasking_flags_t td_flags; /* task flags */ 2076 kmp_team_t * td_team; /* team for this task */ 2077 kmp_info_p * td_alloc_thread; /* thread that allocated data structures */ 2078 /* Currently not used except for perhaps IDB */ 2079 kmp_taskdata_t * td_parent; /* parent task */ 2080 kmp_int32 td_level; /* task nesting level */ 2081 kmp_int32 td_untied_count; /* untied task active parts counter */ 2082 ident_t * td_ident; /* task identifier */ 2083 // Taskwait data. 2084 ident_t * td_taskwait_ident; 2085 kmp_uint32 td_taskwait_counter; 2086 kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ 2087 KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */ 2088 KMP_ALIGN_CACHE volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */ 2089 volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */ 2090 #if OMP_40_ENABLED 2091 kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup 2092 kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here 2093 kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies 2094 #endif 2095 #if OMPT_SUPPORT 2096 ompt_task_info_t ompt_task_info; 2097 #endif 2098 #if OMP_45_ENABLED 2099 kmp_task_team_t * td_task_team; 2100 kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. 2101 #endif 2102 }; // struct kmp_taskdata 2103 2104 // Make sure padding above worked 2105 KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 ); 2106 2107 // Data for task team but per thread 2108 typedef struct kmp_base_thread_data { 2109 kmp_info_p * td_thr; // Pointer back to thread info 2110 // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued? 2111 kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque 2112 kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated 2113 kmp_int32 td_deque_size; // Size of deck 2114 kmp_uint32 td_deque_head; // Head of deque (will wrap) 2115 kmp_uint32 td_deque_tail; // Tail of deque (will wrap) 2116 kmp_int32 td_deque_ntasks; // Number of tasks in deque 2117 // GEH: shouldn't this be volatile since used in while-spin? 2118 kmp_int32 td_deque_last_stolen; // Thread number of last successful steal 2119 #ifdef BUILD_TIED_TASK_STACK 2120 kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint 2121 #endif // BUILD_TIED_TASK_STACK 2122 } kmp_base_thread_data_t; 2123 2124 #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE 2125 #define INITIAL_TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS ) 2126 2127 #define TASK_DEQUE_SIZE(td) ((td).td_deque_size) 2128 #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1) 2129 2130 typedef union KMP_ALIGN_CACHE kmp_thread_data { 2131 kmp_base_thread_data_t td; 2132 double td_align; /* use worst case alignment */ 2133 char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ]; 2134 } kmp_thread_data_t; 2135 2136 2137 // Data for task teams which are used when tasking is enabled for the team 2138 typedef struct kmp_base_task_team { 2139 kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */ 2140 /* must be bootstrap lock since used at library shutdown*/ 2141 kmp_task_team_t * tt_next; /* For linking the task team free list */ 2142 kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */ 2143 /* Data survives task team deallocation */ 2144 kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */ 2145 /* TRUE means tt_threads_data is set up and initialized */ 2146 kmp_int32 tt_nproc; /* #threads in team */ 2147 kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */ 2148 #if OMP_45_ENABLED 2149 kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ 2150 #endif 2151 2152 KMP_ALIGN_CACHE 2153 volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */ 2154 2155 KMP_ALIGN_CACHE 2156 volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */ 2157 } kmp_base_task_team_t; 2158 2159 union KMP_ALIGN_CACHE kmp_task_team { 2160 kmp_base_task_team_t tt; 2161 double tt_align; /* use worst case alignment */ 2162 char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ]; 2163 }; 2164 2165 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) 2166 // Free lists keep same-size free memory slots for fast memory allocation routines 2167 typedef struct kmp_free_list { 2168 void *th_free_list_self; // Self-allocated tasks free list 2169 void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads 2170 void *th_free_list_other; // Non-self free list (to be returned to owner's sync list) 2171 } kmp_free_list_t; 2172 #endif 2173 #if KMP_NESTED_HOT_TEAMS 2174 // Hot teams array keeps hot teams and their sizes for given thread. 2175 // Hot teams are not put in teams pool, and they don't put threads in threads pool. 2176 typedef struct kmp_hot_team_ptr { 2177 kmp_team_p *hot_team; // pointer to hot_team of given nesting level 2178 kmp_int32 hot_team_nth; // number of threads allocated for the hot_team 2179 } kmp_hot_team_ptr_t; 2180 #endif 2181 #if OMP_40_ENABLED 2182 typedef struct kmp_teams_size { 2183 kmp_int32 nteams; // number of teams in a league 2184 kmp_int32 nth; // number of threads in each team of the league 2185 } kmp_teams_size_t; 2186 #endif 2187 2188 /* ------------------------------------------------------------------------ */ 2189 // OpenMP thread data structures 2190 // 2191 2192 typedef struct KMP_ALIGN_CACHE kmp_base_info { 2193 /* 2194 * Start with the readonly data which is cache aligned and padded. 2195 * this is written before the thread starts working by the master. 2196 * (uber masters may update themselves later) 2197 * (usage does not consider serialized regions) 2198 */ 2199 kmp_desc_t th_info; 2200 kmp_team_p *th_team; /* team we belong to */ 2201 kmp_root_p *th_root; /* pointer to root of task hierarchy */ 2202 kmp_info_p *th_next_pool; /* next available thread in the pool */ 2203 kmp_disp_t *th_dispatch; /* thread's dispatch data */ 2204 int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ 2205 2206 /* The following are cached from the team info structure */ 2207 /* TODO use these in more places as determined to be needed via profiling */ 2208 int th_team_nproc; /* number of threads in a team */ 2209 kmp_info_p *th_team_master; /* the team's master thread */ 2210 int th_team_serialized; /* team is serialized */ 2211 #if OMP_40_ENABLED 2212 microtask_t th_teams_microtask; /* save entry address for teams construct */ 2213 int th_teams_level; /* save initial level of teams construct */ 2214 /* it is 0 on device but may be any on host */ 2215 #endif 2216 2217 /* The blocktime info is copied from the team struct to the thread sruct */ 2218 /* at the start of a barrier, and the values stored in the team are used */ 2219 /* at points in the code where the team struct is no longer guaranteed */ 2220 /* to exist (from the POV of worker threads). */ 2221 #if KMP_USE_MONITOR 2222 int th_team_bt_intervals; 2223 #endif 2224 int th_team_bt_set; 2225 2226 2227 #if KMP_AFFINITY_SUPPORTED 2228 kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ 2229 #endif 2230 2231 /* 2232 * The data set by the master at reinit, then R/W by the worker 2233 */ 2234 KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ 2235 #if KMP_NESTED_HOT_TEAMS 2236 kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ 2237 #endif 2238 #if OMP_40_ENABLED 2239 kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ 2240 kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */ 2241 # if KMP_AFFINITY_SUPPORTED 2242 int th_current_place; /* place currently bound to */ 2243 int th_new_place; /* place to bind to in par reg */ 2244 int th_first_place; /* first place in partition */ 2245 int th_last_place; /* last place in partition */ 2246 # endif 2247 #endif 2248 #if USE_ITT_BUILD 2249 kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ 2250 kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ 2251 kmp_uint64 th_frame_time; /* frame timestamp */ 2252 #endif /* USE_ITT_BUILD */ 2253 kmp_local_t th_local; 2254 struct private_common *th_pri_head; 2255 2256 /* 2257 * Now the data only used by the worker (after initial allocation) 2258 */ 2259 /* TODO the first serial team should actually be stored in the info_t 2260 * structure. this will help reduce initial allocation overhead */ 2261 KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/ 2262 2263 #if OMPT_SUPPORT 2264 ompt_thread_info_t ompt_thread_info; 2265 #endif 2266 2267 /* The following are also read by the master during reinit */ 2268 struct common_table *th_pri_common; 2269 2270 volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ 2271 /* while awaiting queuing lock acquire */ 2272 2273 volatile void *th_sleep_loc; // this points at a kmp_flag<T> 2274 2275 ident_t *th_ident; 2276 unsigned th_x; // Random number generator data 2277 unsigned th_a; // Random number generator data 2278 2279 /* 2280 * Tasking-related data for the thread 2281 */ 2282 kmp_task_team_t * th_task_team; // Task team struct 2283 kmp_taskdata_t * th_current_task; // Innermost Task being executed 2284 kmp_uint8 th_task_state; // alternating 0/1 for task team identification 2285 kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels 2286 kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack 2287 kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack 2288 2289 /* 2290 * More stuff for keeping track of active/sleeping threads 2291 * (this part is written by the worker thread) 2292 */ 2293 kmp_uint8 th_active_in_pool; // included in count of 2294 // #active threads in pool 2295 int th_active; // ! sleeping 2296 // 32 bits for TCR/TCW 2297 2298 struct cons_header * th_cons; // used for consistency check 2299 2300 /* 2301 * Add the syncronizing data which is cache aligned and padded. 2302 */ 2303 KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ]; 2304 2305 KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ 2306 2307 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) 2308 #define NUM_LISTS 4 2309 kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines 2310 #endif 2311 2312 #if KMP_OS_WINDOWS 2313 kmp_win32_cond_t th_suspend_cv; 2314 kmp_win32_mutex_t th_suspend_mx; 2315 int th_suspend_init; 2316 #endif 2317 #if KMP_OS_UNIX 2318 kmp_cond_align_t th_suspend_cv; 2319 kmp_mutex_align_t th_suspend_mx; 2320 int th_suspend_init_count; 2321 #endif 2322 2323 #if USE_ITT_BUILD 2324 kmp_itt_mark_t th_itt_mark_single; 2325 // alignment ??? 2326 #endif /* USE_ITT_BUILD */ 2327 #if KMP_STATS_ENABLED 2328 kmp_stats_list* th_stats; 2329 #endif 2330 } kmp_base_info_t; 2331 2332 typedef union KMP_ALIGN_CACHE kmp_info { 2333 double th_align; /* use worst case alignment */ 2334 char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ]; 2335 kmp_base_info_t th; 2336 } kmp_info_t; 2337 2338 /* ------------------------------------------------------------------------ */ 2339 // OpenMP thread team data structures 2340 // 2341 typedef struct kmp_base_data { 2342 volatile kmp_uint32 t_value; 2343 } kmp_base_data_t; 2344 2345 typedef union KMP_ALIGN_CACHE kmp_sleep_team { 2346 double dt_align; /* use worst case alignment */ 2347 char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; 2348 kmp_base_data_t dt; 2349 } kmp_sleep_team_t; 2350 2351 typedef union KMP_ALIGN_CACHE kmp_ordered_team { 2352 double dt_align; /* use worst case alignment */ 2353 char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; 2354 kmp_base_data_t dt; 2355 } kmp_ordered_team_t; 2356 2357 typedef int (*launch_t)( int gtid ); 2358 2359 /* Minimum number of ARGV entries to malloc if necessary */ 2360 #define KMP_MIN_MALLOC_ARGV_ENTRIES 100 2361 2362 // Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we 2363 // have supported at least 96 bytes. Using a larger value for more space between the master write/worker 2364 // read section and read/write by all section seems to buy more performance on EPCC PARALLEL. 2365 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2366 # define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) ) 2367 #else 2368 # define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) ) 2369 #endif 2370 #define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP ) 2371 2372 typedef struct KMP_ALIGN_CACHE kmp_base_team { 2373 // Synchronization Data --------------------------------------------------------------------------------- 2374 KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; 2375 kmp_balign_team_t t_bar[ bs_last_barrier ]; 2376 volatile int t_construct; // count of single directive encountered by team 2377 kmp_lock_t t_single_lock; // team specific lock 2378 2379 // Master only ----------------------------------------------------------------------------------------- 2380 KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team 2381 int t_master_this_cons; // "this_construct" single counter of master in parent team 2382 ident_t *t_ident; // if volatile, have to change too much other crud to volatile too 2383 kmp_team_p *t_parent; // parent team 2384 kmp_team_p *t_next_pool; // next free team in the team pool 2385 kmp_disp_t *t_dispatch; // thread's dispatch data 2386 kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 2387 #if OMP_40_ENABLED 2388 kmp_proc_bind_t t_proc_bind; // bind type for par region 2389 #endif // OMP_40_ENABLED 2390 #if USE_ITT_BUILD 2391 kmp_uint64 t_region_time; // region begin timestamp 2392 #endif /* USE_ITT_BUILD */ 2393 2394 // Master write, workers read -------------------------------------------------------------------------- 2395 KMP_ALIGN_CACHE void **t_argv; 2396 int t_argc; 2397 int t_nproc; // number of threads in team 2398 microtask_t t_pkfn; 2399 launch_t t_invoke; // procedure to launch the microtask 2400 2401 #if OMPT_SUPPORT 2402 ompt_team_info_t ompt_team_info; 2403 ompt_lw_taskteam_t *ompt_serialized_team_info; 2404 #endif 2405 2406 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2407 kmp_int8 t_fp_control_saved; 2408 kmp_int8 t_pad2b; 2409 kmp_int16 t_x87_fpu_control_word; // FP control regs 2410 kmp_uint32 t_mxcsr; 2411 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 2412 2413 void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; 2414 2415 KMP_ALIGN_CACHE kmp_info_t **t_threads; 2416 kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task 2417 int t_level; // nested parallel level 2418 2419 KMP_ALIGN_CACHE int t_max_argc; 2420 int t_max_nproc; // maximum threads this team can handle (dynamicly expandable) 2421 int t_serialized; // levels deep of serialized teams 2422 dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system 2423 int t_id; // team's id, assigned by debugger. 2424 int t_active_level; // nested active parallel level 2425 kmp_r_sched_t t_sched; // run-time schedule for the team 2426 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2427 int t_first_place; // first & last place in parent thread's partition. 2428 int t_last_place; // Restore these values to master after par region. 2429 #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2430 int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call 2431 2432 // Read/write by workers as well ----------------------------------------------------------------------- 2433 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 2434 // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' 2435 // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' 2436 // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. 2437 char dummy_padding[1024]; 2438 #endif 2439 KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. 2440 // for SERIALIZED teams nested 2 or more levels deep 2441 #if OMP_40_ENABLED 2442 kmp_int32 t_cancel_request; // typed flag to store request state of cancellation 2443 #endif 2444 int t_master_active; // save on fork, restore on join 2445 kmp_taskq_t t_taskq; // this team's task queue 2446 void *t_copypriv_data; // team specific pointer to copyprivate data array 2447 kmp_uint32 t_copyin_counter; 2448 #if USE_ITT_BUILD 2449 void *t_stack_id; // team specific stack stitching id (for ittnotify) 2450 #endif /* USE_ITT_BUILD */ 2451 } kmp_base_team_t; 2452 2453 union KMP_ALIGN_CACHE kmp_team { 2454 kmp_base_team_t t; 2455 double t_align; /* use worst case alignment */ 2456 char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ]; 2457 }; 2458 2459 2460 typedef union KMP_ALIGN_CACHE kmp_time_global { 2461 double dt_align; /* use worst case alignment */ 2462 char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; 2463 kmp_base_data_t dt; 2464 } kmp_time_global_t; 2465 2466 typedef struct kmp_base_global { 2467 /* cache-aligned */ 2468 kmp_time_global_t g_time; 2469 2470 /* non cache-aligned */ 2471 volatile int g_abort; 2472 volatile int g_done; 2473 2474 int g_dynamic; 2475 enum dynamic_mode g_dynamic_mode; 2476 } kmp_base_global_t; 2477 2478 typedef union KMP_ALIGN_CACHE kmp_global { 2479 kmp_base_global_t g; 2480 double g_align; /* use worst case alignment */ 2481 char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ]; 2482 } kmp_global_t; 2483 2484 2485 typedef struct kmp_base_root { 2486 // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0) 2487 // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch 2488 // overhead or keeping r_active 2489 2490 volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ 2491 // GEH: This is misnamed, should be r_in_parallel 2492 volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. 2493 int r_in_parallel; /* keeps a count of active parallel regions per root */ 2494 // GEH: This is misnamed, should be r_active_levels 2495 kmp_team_t *r_root_team; 2496 kmp_team_t *r_hot_team; 2497 kmp_info_t *r_uber_thread; 2498 kmp_lock_t r_begin_lock; 2499 volatile int r_begin; 2500 int r_blocktime; /* blocktime for this root and descendants */ 2501 } kmp_base_root_t; 2502 2503 typedef union KMP_ALIGN_CACHE kmp_root { 2504 kmp_base_root_t r; 2505 double r_align; /* use worst case alignment */ 2506 char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ]; 2507 } kmp_root_t; 2508 2509 struct fortran_inx_info { 2510 kmp_int32 data; 2511 }; 2512 2513 /* ------------------------------------------------------------------------ */ 2514 2515 /* ------------------------------------------------------------------------ */ 2516 /* ------------------------------------------------------------------------ */ 2517 2518 extern int __kmp_settings; 2519 extern int __kmp_duplicate_library_ok; 2520 #if USE_ITT_BUILD 2521 extern int __kmp_forkjoin_frames; 2522 extern int __kmp_forkjoin_frames_mode; 2523 #endif 2524 extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; 2525 extern int __kmp_determ_red; 2526 2527 #ifdef KMP_DEBUG 2528 extern int kmp_a_debug; 2529 extern int kmp_b_debug; 2530 extern int kmp_c_debug; 2531 extern int kmp_d_debug; 2532 extern int kmp_e_debug; 2533 extern int kmp_f_debug; 2534 #endif /* KMP_DEBUG */ 2535 2536 /* For debug information logging using rotating buffer */ 2537 #define KMP_DEBUG_BUF_LINES_INIT 512 2538 #define KMP_DEBUG_BUF_LINES_MIN 1 2539 2540 #define KMP_DEBUG_BUF_CHARS_INIT 128 2541 #define KMP_DEBUG_BUF_CHARS_MIN 2 2542 2543 extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ 2544 extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ 2545 extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ 2546 extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */ 2547 2548 extern char *__kmp_debug_buffer; /* Debug buffer itself */ 2549 extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */ 2550 extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */ 2551 /* end rotating debug buffer */ 2552 2553 #ifdef KMP_DEBUG 2554 extern int __kmp_par_range; /* +1 => only go par for constructs in range */ 2555 2556 #define KMP_PAR_RANGE_ROUTINE_LEN 1024 2557 extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; 2558 #define KMP_PAR_RANGE_FILENAME_LEN 1024 2559 extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; 2560 extern int __kmp_par_range_lb; 2561 extern int __kmp_par_range_ub; 2562 #endif 2563 2564 /* For printing out dynamic storage map for threads and teams */ 2565 extern int __kmp_storage_map; /* True means print storage map for threads and teams */ 2566 extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */ 2567 extern int __kmp_storage_map_verbose_specified; 2568 2569 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2570 extern kmp_cpuinfo_t __kmp_cpuinfo; 2571 #endif 2572 2573 extern volatile int __kmp_init_serial; 2574 extern volatile int __kmp_init_gtid; 2575 extern volatile int __kmp_init_common; 2576 extern volatile int __kmp_init_middle; 2577 extern volatile int __kmp_init_parallel; 2578 #if KMP_USE_MONITOR 2579 extern volatile int __kmp_init_monitor; 2580 #endif 2581 extern volatile int __kmp_init_user_locks; 2582 extern int __kmp_init_counter; 2583 extern int __kmp_root_counter; 2584 extern int __kmp_version; 2585 2586 /* list of address of allocated caches for commons */ 2587 extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; 2588 2589 /* Barrier algorithm types and options */ 2590 extern kmp_uint32 __kmp_barrier_gather_bb_dflt; 2591 extern kmp_uint32 __kmp_barrier_release_bb_dflt; 2592 extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; 2593 extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; 2594 extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ]; 2595 extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ]; 2596 extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ]; 2597 extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ]; 2598 extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ]; 2599 extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ]; 2600 extern char const *__kmp_barrier_type_name [ bs_last_barrier ]; 2601 extern char const *__kmp_barrier_pattern_name [ bp_last_bar ]; 2602 2603 /* Global Locks */ 2604 extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ 2605 extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ 2606 extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ 2607 #if KMP_USE_MONITOR 2608 extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ 2609 #endif 2610 extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ 2611 2612 extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ 2613 extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ 2614 extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ 2615 2616 /* used for yielding spin-waits */ 2617 extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ 2618 extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ 2619 2620 extern enum library_type __kmp_library; 2621 2622 extern enum sched_type __kmp_sched; /* default runtime scheduling */ 2623 extern enum sched_type __kmp_static; /* default static scheduling method */ 2624 extern enum sched_type __kmp_guided; /* default guided scheduling method */ 2625 extern enum sched_type __kmp_auto; /* default auto scheduling method */ 2626 extern int __kmp_chunk; /* default runtime chunk size */ 2627 2628 extern size_t __kmp_stksize; /* stack size per thread */ 2629 #if KMP_USE_MONITOR 2630 extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */ 2631 #endif 2632 extern size_t __kmp_stkoffset; /* stack offset per thread */ 2633 extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ 2634 2635 extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ 2636 extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */ 2637 extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ 2638 extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */ 2639 extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ 2640 extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */ 2641 extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ 2642 extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ 2643 extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */ 2644 extern int __kmp_generate_warnings; /* should we issue warnings? */ 2645 extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ 2646 2647 #ifdef DEBUG_SUSPEND 2648 extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ 2649 #endif 2650 2651 extern kmp_uint32 __kmp_yield_init; 2652 extern kmp_uint32 __kmp_yield_next; 2653 2654 #if KMP_USE_MONITOR 2655 extern kmp_uint32 __kmp_yielding_on; 2656 extern kmp_uint32 __kmp_yield_cycle; 2657 extern kmp_int32 __kmp_yield_on_count; 2658 extern kmp_int32 __kmp_yield_off_count; 2659 #endif 2660 2661 /* ------------------------------------------------------------------------- */ 2662 extern int __kmp_allThreadsSpecified; 2663 2664 extern size_t __kmp_align_alloc; 2665 /* following data protected by initialization routines */ 2666 extern int __kmp_xproc; /* number of processors in the system */ 2667 extern int __kmp_avail_proc; /* number of processors available to the process */ 2668 extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ 2669 extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ 2670 extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */ 2671 extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */ 2672 extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */ 2673 extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */ 2674 extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */ 2675 extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ 2676 extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */ 2677 extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ 2678 #if KMP_USE_MONITOR 2679 extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */ 2680 extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */ 2681 #endif 2682 #ifdef KMP_ADJUST_BLOCKTIME 2683 extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ 2684 #endif /* KMP_ADJUST_BLOCKTIME */ 2685 #ifdef KMP_DFLT_NTH_CORES 2686 extern int __kmp_ncores; /* Total number of cores for threads placement */ 2687 #endif 2688 extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */ 2689 2690 extern int __kmp_need_register_atfork_specified; 2691 extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */ 2692 extern int __kmp_gtid_mode; /* Method of getting gtid, values: 2693 0 - not set, will be set at runtime 2694 1 - using stack search 2695 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS)) 2696 3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only. 2697 */ 2698 extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ 2699 #ifdef KMP_TDATA_GTID 2700 #if KMP_OS_WINDOWS 2701 extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */ 2702 #else 2703 extern __thread int __kmp_gtid; 2704 #endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ 2705 #endif 2706 extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ 2707 extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */ 2708 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2709 extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */ 2710 extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */ 2711 extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ 2712 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 2713 2714 extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */ 2715 extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */ 2716 #if KMP_NESTED_HOT_TEAMS 2717 extern int __kmp_hot_teams_mode; 2718 extern int __kmp_hot_teams_max_level; 2719 #endif 2720 2721 # if KMP_OS_LINUX 2722 extern enum clock_function_type __kmp_clock_function; 2723 extern int __kmp_clock_function_param; 2724 # endif /* KMP_OS_LINUX */ 2725 2726 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 2727 extern enum mic_type __kmp_mic_type; 2728 #endif 2729 2730 # ifdef USE_LOAD_BALANCE 2731 extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */ 2732 # endif /* USE_LOAD_BALANCE */ 2733 2734 // OpenMP 3.1 - Nested num threads array 2735 typedef struct kmp_nested_nthreads_t { 2736 int * nth; 2737 int size; 2738 int used; 2739 } kmp_nested_nthreads_t; 2740 2741 extern kmp_nested_nthreads_t __kmp_nested_nth; 2742 2743 #if KMP_USE_ADAPTIVE_LOCKS 2744 2745 // Parameters for the speculative lock backoff system. 2746 struct kmp_adaptive_backoff_params_t { 2747 // Number of soft retries before it counts as a hard retry. 2748 kmp_uint32 max_soft_retries; 2749 // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right 2750 kmp_uint32 max_badness; 2751 }; 2752 2753 extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; 2754 2755 #if KMP_DEBUG_ADAPTIVE_LOCKS 2756 extern char * __kmp_speculative_statsfile; 2757 #endif 2758 2759 #endif // KMP_USE_ADAPTIVE_LOCKS 2760 2761 #if OMP_40_ENABLED 2762 extern int __kmp_display_env; /* TRUE or FALSE */ 2763 extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ 2764 extern int __kmp_omp_cancellation; /* TRUE or FALSE */ 2765 #endif 2766 2767 /* ------------------------------------------------------------------------- */ 2768 2769 /* --------------------------------------------------------------------------- */ 2770 /* the following are protected by the fork/join lock */ 2771 /* write: lock read: anytime */ 2772 extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ 2773 /* read/write: lock */ 2774 extern volatile kmp_team_t * __kmp_team_pool; 2775 extern volatile kmp_info_t * __kmp_thread_pool; 2776 2777 /* total number of threads reachable from some root thread including all root threads*/ 2778 extern volatile int __kmp_nth; 2779 /* total number of threads reachable from some root thread including all root threads, 2780 and those in the thread pool */ 2781 extern volatile int __kmp_all_nth; 2782 extern int __kmp_thread_pool_nth; 2783 extern volatile int __kmp_thread_pool_active_nth; 2784 2785 extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ 2786 /* end data protected by fork/join lock */ 2787 /* --------------------------------------------------------------------------- */ 2788 2789 extern kmp_global_t __kmp_global; /* global status */ 2790 2791 extern kmp_info_t __kmp_monitor; 2792 extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library. 2793 extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library. 2794 2795 #if USE_DEBUGGER 2796 2797 #define _KMP_GEN_ID( counter ) \ 2798 ( \ 2799 __kmp_debugging \ 2800 ? \ 2801 KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \ 2802 : \ 2803 ~ 0 \ 2804 ) 2805 #else 2806 #define _KMP_GEN_ID( counter ) \ 2807 ( \ 2808 ~ 0 \ 2809 ) 2810 #endif /* USE_DEBUGGER */ 2811 2812 #define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter ) 2813 #define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter ) 2814 2815 /* ------------------------------------------------------------------------ */ 2816 /* ------------------------------------------------------------------------ */ 2817 2818 extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... ); 2819 2820 extern void __kmp_serial_initialize( void ); 2821 extern void __kmp_middle_initialize( void ); 2822 extern void __kmp_parallel_initialize( void ); 2823 2824 extern void __kmp_internal_begin( void ); 2825 extern void __kmp_internal_end_library( int gtid ); 2826 extern void __kmp_internal_end_thread( int gtid ); 2827 extern void __kmp_internal_end_atexit( void ); 2828 extern void __kmp_internal_end_fini( void ); 2829 extern void __kmp_internal_end_dtor( void ); 2830 extern void __kmp_internal_end_dest( void* ); 2831 2832 extern int __kmp_register_root( int initial_thread ); 2833 extern void __kmp_unregister_root( int gtid ); 2834 2835 extern int __kmp_ignore_mppbeg( void ); 2836 extern int __kmp_ignore_mppend( void ); 2837 2838 extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ); 2839 extern void __kmp_exit_single( int gtid ); 2840 2841 extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); 2842 extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); 2843 2844 #ifdef USE_LOAD_BALANCE 2845 extern int __kmp_get_load_balance( int ); 2846 #endif 2847 2848 #ifdef BUILD_TV 2849 extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ); 2850 #endif 2851 2852 extern int __kmp_get_global_thread_id( void ); 2853 extern int __kmp_get_global_thread_id_reg( void ); 2854 extern void __kmp_exit_thread( int exit_status ); 2855 extern void __kmp_abort( char const * format, ... ); 2856 extern void __kmp_abort_thread( void ); 2857 extern void __kmp_abort_process( void ); 2858 extern void __kmp_warn( char const * format, ... ); 2859 2860 extern void __kmp_set_num_threads( int new_nth, int gtid ); 2861 2862 // Returns current thread (pointer to kmp_info_t). Current thread *must* be registered. 2863 static inline kmp_info_t * __kmp_entry_thread() 2864 { 2865 int gtid = __kmp_entry_gtid(); 2866 2867 return __kmp_threads[gtid]; 2868 } 2869 2870 extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels ); 2871 extern int __kmp_get_max_active_levels( int gtid ); 2872 extern int __kmp_get_ancestor_thread_num( int gtid, int level ); 2873 extern int __kmp_get_team_size( int gtid, int level ); 2874 extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk ); 2875 extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk ); 2876 2877 extern unsigned short __kmp_get_random( kmp_info_t * thread ); 2878 extern void __kmp_init_random( kmp_info_t * thread ); 2879 2880 extern kmp_r_sched_t __kmp_get_schedule_global( void ); 2881 extern void __kmp_adjust_num_threads( int new_nproc ); 2882 2883 extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL ); 2884 extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ); 2885 extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL ); 2886 #define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR ) 2887 #define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR ) 2888 #define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR ) 2889 2890 #if USE_FAST_MEMORY 2891 extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ); 2892 extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL ); 2893 extern void __kmp_free_fast_memory( kmp_info_t *this_thr ); 2894 extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr ); 2895 #define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR ) 2896 #define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR ) 2897 #endif 2898 2899 extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ); 2900 extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ); 2901 extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ); 2902 extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ); 2903 #define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR ) 2904 #define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR ) 2905 #define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR ) 2906 #define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR ) 2907 2908 #define KMP_INTERNAL_MALLOC(sz) malloc(sz) 2909 #define KMP_INTERNAL_FREE(p) free(p) 2910 #define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz)) 2911 #define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz)) 2912 2913 extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads ); 2914 2915 #if OMP_40_ENABLED 2916 extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind ); 2917 extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads ); 2918 #endif 2919 2920 extern void __kmp_yield( int cond ); 2921 2922 extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, 2923 enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, 2924 kmp_int32 chunk ); 2925 extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, 2926 enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, 2927 kmp_int32 chunk ); 2928 extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, 2929 enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, 2930 kmp_int64 chunk ); 2931 extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, 2932 enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, 2933 kmp_int64 chunk ); 2934 2935 extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, 2936 kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ); 2937 extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, 2938 kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ); 2939 extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, 2940 kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ); 2941 extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, 2942 kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ); 2943 2944 extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ); 2945 extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ); 2946 extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ); 2947 extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ); 2948 2949 2950 #ifdef KMP_GOMP_COMPAT 2951 2952 extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, 2953 enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, 2954 kmp_int32 chunk, int push_ws ); 2955 extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, 2956 enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, 2957 kmp_int32 chunk, int push_ws ); 2958 extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, 2959 enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, 2960 kmp_int64 chunk, int push_ws ); 2961 extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, 2962 enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, 2963 kmp_int64 chunk, int push_ws ); 2964 extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ); 2965 extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ); 2966 extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ); 2967 extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ); 2968 2969 #endif /* KMP_GOMP_COMPAT */ 2970 2971 2972 extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker ); 2973 extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker ); 2974 extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker ); 2975 extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker ); 2976 extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker ); 2977 extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj ); 2978 extern void __kmp_wait_yield_4_ptr( void * spinner, kmp_uint32 checker, kmp_uint32 (* pred)( void *, kmp_uint32 ), void * obj ); 2979 2980 class kmp_flag_32; 2981 class kmp_flag_64; 2982 class kmp_flag_oncore; 2983 extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin 2984 #if USE_ITT_BUILD 2985 , void * itt_sync_obj 2986 #endif 2987 ); 2988 extern void __kmp_release_64(kmp_flag_64 *flag); 2989 2990 extern void __kmp_infinite_loop( void ); 2991 2992 extern void __kmp_cleanup( void ); 2993 2994 #if KMP_HANDLE_SIGNALS 2995 extern int __kmp_handle_signals; 2996 extern void __kmp_install_signals( int parallel_init ); 2997 extern void __kmp_remove_signals( void ); 2998 #endif 2999 3000 extern void __kmp_clear_system_time( void ); 3001 extern void __kmp_read_system_time( double *delta ); 3002 3003 extern void __kmp_check_stack_overlap( kmp_info_t *thr ); 3004 3005 extern void __kmp_expand_host_name( char *buffer, size_t size ); 3006 extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern ); 3007 3008 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3009 extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */ 3010 #endif 3011 3012 extern void __kmp_runtime_initialize( void ); /* machine specific initialization */ 3013 extern void __kmp_runtime_destroy( void ); 3014 3015 #if KMP_AFFINITY_SUPPORTED 3016 extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); 3017 extern void __kmp_affinity_initialize(void); 3018 extern void __kmp_affinity_uninitialize(void); 3019 extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ 3020 #if OMP_40_ENABLED 3021 extern void __kmp_affinity_set_place(int gtid); 3022 #endif 3023 extern void __kmp_affinity_determine_capable( const char *env_var ); 3024 extern int __kmp_aux_set_affinity(void **mask); 3025 extern int __kmp_aux_get_affinity(void **mask); 3026 extern int __kmp_aux_get_affinity_max_proc(); 3027 extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); 3028 extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); 3029 extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); 3030 extern void __kmp_balanced_affinity( int tid, int team_size ); 3031 #endif /* KMP_AFFINITY_SUPPORTED */ 3032 3033 extern void __kmp_cleanup_hierarchy(); 3034 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); 3035 3036 #if KMP_USE_FUTEX 3037 3038 extern int __kmp_futex_determine_capable( void ); 3039 3040 #endif // KMP_USE_FUTEX 3041 3042 extern void __kmp_gtid_set_specific( int gtid ); 3043 extern int __kmp_gtid_get_specific( void ); 3044 3045 extern double __kmp_read_cpu_time( void ); 3046 3047 extern int __kmp_read_system_info( struct kmp_sys_info *info ); 3048 3049 #if KMP_USE_MONITOR 3050 extern void __kmp_create_monitor( kmp_info_t *th ); 3051 #endif 3052 3053 extern void *__kmp_launch_thread( kmp_info_t *thr ); 3054 3055 extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ); 3056 3057 #if KMP_OS_WINDOWS 3058 extern int __kmp_still_running(kmp_info_t *th); 3059 extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ); 3060 extern void __kmp_free_handle( kmp_thread_t tHandle ); 3061 #endif 3062 3063 #if KMP_USE_MONITOR 3064 extern void __kmp_reap_monitor( kmp_info_t *th ); 3065 #endif 3066 extern void __kmp_reap_worker( kmp_info_t *th ); 3067 extern void __kmp_terminate_thread( int gtid ); 3068 3069 extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag ); 3070 extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag ); 3071 extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag ); 3072 extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag ); 3073 extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag ); 3074 extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag ); 3075 3076 extern void __kmp_elapsed( double * ); 3077 extern void __kmp_elapsed_tick( double * ); 3078 3079 extern void __kmp_enable( int old_state ); 3080 extern void __kmp_disable( int *old_state ); 3081 3082 extern void __kmp_thread_sleep( int millis ); 3083 3084 extern void __kmp_common_initialize( void ); 3085 extern void __kmp_common_destroy( void ); 3086 extern void __kmp_common_destroy_gtid( int gtid ); 3087 3088 #if KMP_OS_UNIX 3089 extern void __kmp_register_atfork( void ); 3090 #endif 3091 extern void __kmp_suspend_initialize( void ); 3092 extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th ); 3093 3094 extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root, 3095 kmp_team_t *team, int tid); 3096 #if OMP_40_ENABLED 3097 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 3098 #if OMPT_SUPPORT 3099 ompt_parallel_id_t ompt_parallel_id, 3100 #endif 3101 kmp_proc_bind_t proc_bind, 3102 kmp_internal_control_t *new_icvs, 3103 int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); 3104 #else 3105 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, 3106 #if OMPT_SUPPORT 3107 ompt_parallel_id_t ompt_parallel_id, 3108 #endif 3109 kmp_internal_control_t *new_icvs, 3110 int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); 3111 #endif // OMP_40_ENABLED 3112 extern void __kmp_free_thread( kmp_info_t * ); 3113 extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) ); 3114 extern kmp_team_t * __kmp_reap_team( kmp_team_t * ); 3115 3116 /* ------------------------------------------------------------------------ */ 3117 3118 extern void __kmp_initialize_bget( kmp_info_t *th ); 3119 extern void __kmp_finalize_bget( kmp_info_t *th ); 3120 3121 KMP_EXPORT void *kmpc_malloc( size_t size ); 3122 KMP_EXPORT void *kmpc_aligned_malloc( size_t size, size_t alignment ); 3123 KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize ); 3124 KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size ); 3125 KMP_EXPORT void kmpc_free( void *ptr ); 3126 3127 /* ------------------------------------------------------------------------ */ 3128 /* declarations for internal use */ 3129 3130 extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split, 3131 size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) ); 3132 extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid ); 3133 3134 /*! 3135 * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call. 3136 */ 3137 enum fork_context_e 3138 { 3139 fork_context_gnu, /**< Called from GNU generated code, so must not invoke the microtask internally. */ 3140 fork_context_intel, /**< Called from Intel generated code. */ 3141 fork_context_last 3142 }; 3143 extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context, 3144 kmp_int32 argc, 3145 #if OMPT_SUPPORT 3146 void *unwrapped_task, 3147 #endif 3148 microtask_t microtask, launch_t invoker, 3149 /* TODO: revert workaround for Intel(R) 64 tracker #96 */ 3150 #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX 3151 va_list *ap 3152 #else 3153 va_list ap 3154 #endif 3155 ); 3156 3157 extern void __kmp_join_call( ident_t *loc, int gtid 3158 #if OMPT_SUPPORT 3159 , enum fork_context_e fork_context 3160 #endif 3161 #if OMP_40_ENABLED 3162 , int exit_teams = 0 3163 #endif 3164 ); 3165 3166 extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); 3167 extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ); 3168 extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ); 3169 extern int __kmp_invoke_task_func( int gtid ); 3170 extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); 3171 extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); 3172 3173 // should never have been exported 3174 KMP_EXPORT int __kmpc_invoke_task_func( int gtid ); 3175 #if OMP_40_ENABLED 3176 extern int __kmp_invoke_teams_master( int gtid ); 3177 extern void __kmp_teams_master( int gtid ); 3178 #endif 3179 extern void __kmp_save_internal_controls( kmp_info_t * thread ); 3180 extern void __kmp_user_set_library (enum library_type arg); 3181 extern void __kmp_aux_set_library (enum library_type arg); 3182 extern void __kmp_aux_set_stacksize( size_t arg); 3183 extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid); 3184 extern void __kmp_aux_set_defaults( char const * str, int len ); 3185 3186 /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */ 3187 void kmpc_set_blocktime (int arg); 3188 void ompc_set_nested( int flag ); 3189 void ompc_set_dynamic( int flag ); 3190 void ompc_set_num_threads( int arg ); 3191 3192 extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr, 3193 kmp_team_t *team, int tid ); 3194 extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr ); 3195 extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, 3196 kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3197 kmp_routine_entry_t task_entry ); 3198 extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, 3199 kmp_team_t *team, int tid, int set_curr_task ); 3200 extern void __kmp_finish_implicit_task(kmp_info_t *this_thr); 3201 extern void __kmp_free_implicit_task(kmp_info_t *this_thr); 3202 3203 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, 3204 int *thread_finished, 3205 #if USE_ITT_BUILD 3206 void * itt_sync_obj, 3207 #endif /* USE_ITT_BUILD */ 3208 kmp_int32 is_constrained); 3209 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, 3210 int *thread_finished, 3211 #if USE_ITT_BUILD 3212 void * itt_sync_obj, 3213 #endif /* USE_ITT_BUILD */ 3214 kmp_int32 is_constrained); 3215 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, 3216 int *thread_finished, 3217 #if USE_ITT_BUILD 3218 void * itt_sync_obj, 3219 #endif /* USE_ITT_BUILD */ 3220 kmp_int32 is_constrained); 3221 3222 extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ); 3223 extern void __kmp_reap_task_teams( void ); 3224 extern void __kmp_wait_to_unref_task_teams( void ); 3225 extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always ); 3226 extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team ); 3227 extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team 3228 #if USE_ITT_BUILD 3229 , void * itt_sync_obj 3230 #endif /* USE_ITT_BUILD */ 3231 , int wait=1 3232 ); 3233 extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ); 3234 3235 extern int __kmp_is_address_mapped( void *addr ); 3236 extern kmp_uint64 __kmp_hardware_timestamp(void); 3237 3238 #if KMP_OS_UNIX 3239 extern int __kmp_read_from_file( char const *path, char const *format, ... ); 3240 #endif 3241 3242 /* ------------------------------------------------------------------------ */ 3243 // 3244 // Assembly routines that have no compiler intrinsic replacement 3245 // 3246 3247 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3248 3249 extern void __kmp_query_cpuid( kmp_cpuinfo_t *p ); 3250 3251 #define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) 3252 static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); } 3253 3254 extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 3255 extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 3256 extern void __kmp_clear_x87_fpu_status_word(); 3257 # define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ 3258 3259 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 3260 3261 extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[] 3262 #if OMPT_SUPPORT 3263 , void **exit_frame_ptr 3264 #endif 3265 ); 3266 3267 3268 /* ------------------------------------------------------------------------ */ 3269 3270 KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags ); 3271 KMP_EXPORT void __kmpc_end ( ident_t * ); 3272 3273 KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor, 3274 kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length ); 3275 KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor ); 3276 KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size ); 3277 3278 KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * ); 3279 KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * ); 3280 KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * ); 3281 KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * ); 3282 3283 KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * ); 3284 KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... ); 3285 3286 KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid ); 3287 KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid ); 3288 3289 KMP_EXPORT void __kmpc_flush ( ident_t *); 3290 KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid ); 3291 KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid ); 3292 KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid ); 3293 KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid ); 3294 KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid ); 3295 KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); 3296 KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); 3297 3298 #if OMP_45_ENABLED 3299 KMP_EXPORT void __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint ); 3300 #endif 3301 3302 KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid ); 3303 KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid ); 3304 3305 KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid ); 3306 3307 KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid ); 3308 KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid ); 3309 3310 KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, 3311 kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk ); 3312 3313 KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid ); 3314 3315 KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ); 3316 3317 extern void KMPC_SET_NUM_THREADS ( int arg ); 3318 extern void KMPC_SET_DYNAMIC ( int flag ); 3319 extern void KMPC_SET_NESTED ( int flag ); 3320 3321 /* --------------------------------------------------------------------------- */ 3322 3323 /* 3324 * Taskq interface routines 3325 */ 3326 3327 KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk, 3328 size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds); 3329 KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); 3330 KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); 3331 KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status); 3332 KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); 3333 KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task); 3334 3335 /* ------------------------------------------------------------------------ */ 3336 3337 /* 3338 * OMP 3.0 tasking interface routines 3339 */ 3340 3341 KMP_EXPORT kmp_int32 3342 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); 3343 KMP_EXPORT kmp_task_t* 3344 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, 3345 size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3346 kmp_routine_entry_t task_entry ); 3347 KMP_EXPORT void 3348 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); 3349 KMP_EXPORT void 3350 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); 3351 KMP_EXPORT kmp_int32 3352 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); 3353 KMP_EXPORT kmp_int32 3354 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ); 3355 3356 KMP_EXPORT kmp_int32 3357 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ); 3358 3359 #if TASK_UNUSED 3360 void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); 3361 void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); 3362 #endif // TASK_UNUSED 3363 3364 /* ------------------------------------------------------------------------ */ 3365 3366 #if OMP_40_ENABLED 3367 3368 KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid ); 3369 KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid ); 3370 3371 KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps( 3372 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, 3373 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 3374 kmp_depend_info_t *noalias_dep_list); 3375 KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, 3376 kmp_int32 ndeps, 3377 kmp_depend_info_t *dep_list, 3378 kmp_int32 ndeps_noalias, 3379 kmp_depend_info_t *noalias_dep_list); 3380 extern void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task); 3381 extern void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h); 3382 extern void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h); 3383 3384 extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ); 3385 3386 KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); 3387 KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); 3388 KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid); 3389 KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); 3390 3391 #if OMP_45_ENABLED 3392 3393 KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ); 3394 KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); 3395 KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, kmp_int32 if_val, 3396 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 3397 kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize, void * task_dup ); 3398 #endif 3399 3400 #endif 3401 3402 3403 /* 3404 * Lock interface routines (fast versions with gtid passed in) 3405 */ 3406 KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3407 KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3408 KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3409 KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3410 KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3411 KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3412 KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3413 KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3414 KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3415 KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); 3416 3417 #if OMP_45_ENABLED 3418 KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); 3419 KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); 3420 #endif 3421 3422 /* ------------------------------------------------------------------------ */ 3423 3424 /* 3425 * Interface to fast scalable reduce methods routines 3426 */ 3427 3428 KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid, 3429 kmp_int32 num_vars, size_t reduce_size, 3430 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3431 kmp_critical_name *lck ); 3432 KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); 3433 KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid, 3434 kmp_int32 num_vars, size_t reduce_size, 3435 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3436 kmp_critical_name *lck ); 3437 KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); 3438 3439 /* 3440 * internal fast reduction routines 3441 */ 3442 3443 extern PACKED_REDUCTION_METHOD_T 3444 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, 3445 kmp_int32 num_vars, size_t reduce_size, 3446 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), 3447 kmp_critical_name *lck ); 3448 3449 // this function is for testing set/get/determine reduce method 3450 KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void ); 3451 3452 KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); 3453 KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); 3454 3455 // this function exported for testing of KMP_PLACE_THREADS functionality 3456 KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int); 3457 3458 /* ------------------------------------------------------------------------ */ 3459 /* ------------------------------------------------------------------------ */ 3460 3461 // C++ port 3462 // missing 'extern "C"' declarations 3463 3464 KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc ); 3465 KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid ); 3466 KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ); 3467 3468 #if OMP_40_ENABLED 3469 KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind ); 3470 KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ); 3471 KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); 3472 #endif 3473 #if OMP_45_ENABLED 3474 struct kmp_dim { // loop bounds info casted to kmp_int64 3475 kmp_int64 lo; // lower 3476 kmp_int64 up; // upper 3477 kmp_int64 st; // stride 3478 }; 3479 KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, struct kmp_dim * dims); 3480 KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec); 3481 KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec); 3482 KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 3483 #endif 3484 3485 KMP_EXPORT void* 3486 __kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid, 3487 void * data, size_t size, void *** cache ); 3488 3489 // Symbols for MS mutual detection. 3490 extern int _You_must_link_with_exactly_one_OpenMP_library; 3491 extern int _You_must_link_with_Intel_OpenMP_library; 3492 #if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) 3493 extern int _You_must_link_with_Microsoft_OpenMP_library; 3494 #endif 3495 3496 3497 // The routines below are not exported. 3498 // Consider making them 'static' in corresponding source files. 3499 void 3500 kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); 3501 struct private_common * 3502 kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); 3503 3504 // 3505 // ompc_, kmpc_ entries moved from omp.h. 3506 // 3507 #if KMP_OS_WINDOWS 3508 # define KMPC_CONVENTION __cdecl 3509 #else 3510 # define KMPC_CONVENTION 3511 #endif 3512 3513 #ifndef __OMP_H 3514 typedef enum omp_sched_t { 3515 omp_sched_static = 1, 3516 omp_sched_dynamic = 2, 3517 omp_sched_guided = 3, 3518 omp_sched_auto = 4 3519 } omp_sched_t; 3520 typedef void * kmp_affinity_mask_t; 3521 #endif 3522 3523 KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); 3524 KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); 3525 KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); 3526 KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); 3527 KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); 3528 KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); 3529 KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); 3530 3531 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); 3532 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); 3533 KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); 3534 KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); 3535 KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int); 3536 3537 #ifdef __cplusplus 3538 } 3539 #endif 3540 3541 #endif /* KMP_H */ 3542 3543