1 /*
2  * z_Linux_util.cpp -- platform specific routines.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_affinity.h"
18 #include "kmp_i18n.h"
19 #include "kmp_io.h"
20 #include "kmp_itt.h"
21 #include "kmp_lock.h"
22 #include "kmp_stats.h"
23 #include "kmp_str.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 
27 #if !KMP_OS_FREEBSD && !KMP_OS_NETBSD
28 #include <alloca.h>
29 #endif
30 #include <math.h> // HUGE_VAL.
31 #include <sys/resource.h>
32 #include <sys/syscall.h>
33 #include <sys/time.h>
34 #include <sys/times.h>
35 #include <unistd.h>
36 
37 #if KMP_OS_LINUX && !KMP_OS_CNK
38 #include <sys/sysinfo.h>
39 #if KMP_USE_FUTEX
40 // We should really include <futex.h>, but that causes compatibility problems on
41 // different Linux* OS distributions that either require that you include (or
42 // break when you try to include) <pci/types.h>. Since all we need is the two
43 // macros below (which are part of the kernel ABI, so can't change) we just
44 // define the constants here and don't include <futex.h>
45 #ifndef FUTEX_WAIT
46 #define FUTEX_WAIT 0
47 #endif
48 #ifndef FUTEX_WAKE
49 #define FUTEX_WAKE 1
50 #endif
51 #endif
52 #elif KMP_OS_DARWIN
53 #include <mach/mach.h>
54 #include <sys/sysctl.h>
55 #elif KMP_OS_FREEBSD
56 #include <pthread_np.h>
57 #endif
58 
59 #include <ctype.h>
60 #include <dirent.h>
61 #include <fcntl.h>
62 
63 #include "tsan_annotations.h"
64 
65 struct kmp_sys_timer {
66   struct timespec start;
67 };
68 
69 // Convert timespec to nanoseconds.
70 #define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec)
71 
72 static struct kmp_sys_timer __kmp_sys_timer_data;
73 
74 #if KMP_HANDLE_SIGNALS
75 typedef void (*sig_func_t)(int);
76 STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
77 static sigset_t __kmp_sigset;
78 #endif
79 
80 static int __kmp_init_runtime = FALSE;
81 
82 static int __kmp_fork_count = 0;
83 
84 static pthread_condattr_t __kmp_suspend_cond_attr;
85 static pthread_mutexattr_t __kmp_suspend_mutex_attr;
86 
87 static kmp_cond_align_t __kmp_wait_cv;
88 static kmp_mutex_align_t __kmp_wait_mx;
89 
90 kmp_uint64 __kmp_ticks_per_msec = 1000000;
91 
92 #ifdef DEBUG_SUSPEND
93 static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
94   KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
95                cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
96                cond->c_cond.__c_waiting);
97 }
98 #endif
99 
100 #if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
101 
102 /* Affinity support */
103 
104 void __kmp_affinity_bind_thread(int which) {
105   KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
106               "Illegal set affinity operation when not capable");
107 
108   kmp_affin_mask_t *mask;
109   KMP_CPU_ALLOC_ON_STACK(mask);
110   KMP_CPU_ZERO(mask);
111   KMP_CPU_SET(which, mask);
112   __kmp_set_system_affinity(mask, TRUE);
113   KMP_CPU_FREE_FROM_STACK(mask);
114 }
115 
116 /* Determine if we can access affinity functionality on this version of
117  * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
118  * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
119 void __kmp_affinity_determine_capable(const char *env_var) {
120 // Check and see if the OS supports thread affinity.
121 
122 #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
123 
124   int gCode;
125   int sCode;
126   unsigned char *buf;
127   buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
128 
129   // If Linux* OS:
130   // If the syscall fails or returns a suggestion for the size,
131   // then we don't have to search for an appropriate size.
132   gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
133   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
134                 "initial getaffinity call returned %d errno = %d\n",
135                 gCode, errno));
136 
137   // if ((gCode < 0) && (errno == ENOSYS))
138   if (gCode < 0) {
139     // System call not supported
140     if (__kmp_affinity_verbose ||
141         (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
142          (__kmp_affinity_type != affinity_default) &&
143          (__kmp_affinity_type != affinity_disabled))) {
144       int error = errno;
145       kmp_msg_t err_code = KMP_ERR(error);
146       __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
147                 err_code, __kmp_msg_null);
148       if (__kmp_generate_warnings == kmp_warnings_off) {
149         __kmp_str_free(&err_code.str);
150       }
151     }
152     KMP_AFFINITY_DISABLE();
153     KMP_INTERNAL_FREE(buf);
154     return;
155   }
156   if (gCode > 0) { // Linux* OS only
157     // The optimal situation: the OS returns the size of the buffer it expects.
158     //
159     // A verification of correct behavior is that Isetaffinity on a NULL
160     // buffer with the same size fails with errno set to EFAULT.
161     sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
162     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
163                   "setaffinity for mask size %d returned %d errno = %d\n",
164                   gCode, sCode, errno));
165     if (sCode < 0) {
166       if (errno == ENOSYS) {
167         if (__kmp_affinity_verbose ||
168             (__kmp_affinity_warnings &&
169              (__kmp_affinity_type != affinity_none) &&
170              (__kmp_affinity_type != affinity_default) &&
171              (__kmp_affinity_type != affinity_disabled))) {
172           int error = errno;
173           kmp_msg_t err_code = KMP_ERR(error);
174           __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
175                     err_code, __kmp_msg_null);
176           if (__kmp_generate_warnings == kmp_warnings_off) {
177             __kmp_str_free(&err_code.str);
178           }
179         }
180         KMP_AFFINITY_DISABLE();
181         KMP_INTERNAL_FREE(buf);
182       }
183       if (errno == EFAULT) {
184         KMP_AFFINITY_ENABLE(gCode);
185         KA_TRACE(10, ("__kmp_affinity_determine_capable: "
186                       "affinity supported (mask size %d)\n",
187                       (int)__kmp_affin_mask_size));
188         KMP_INTERNAL_FREE(buf);
189         return;
190       }
191     }
192   }
193 
194   // Call the getaffinity system call repeatedly with increasing set sizes
195   // until we succeed, or reach an upper bound on the search.
196   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
197                 "searching for proper set size\n"));
198   int size;
199   for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
200     gCode = syscall(__NR_sched_getaffinity, 0, size, buf);
201     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
202                   "getaffinity for mask size %d returned %d errno = %d\n",
203                   size, gCode, errno));
204 
205     if (gCode < 0) {
206       if (errno == ENOSYS) {
207         // We shouldn't get here
208         KA_TRACE(30, ("__kmp_affinity_determine_capable: "
209                       "inconsistent OS call behavior: errno == ENOSYS for mask "
210                       "size %d\n",
211                       size));
212         if (__kmp_affinity_verbose ||
213             (__kmp_affinity_warnings &&
214              (__kmp_affinity_type != affinity_none) &&
215              (__kmp_affinity_type != affinity_default) &&
216              (__kmp_affinity_type != affinity_disabled))) {
217           int error = errno;
218           kmp_msg_t err_code = KMP_ERR(error);
219           __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
220                     err_code, __kmp_msg_null);
221           if (__kmp_generate_warnings == kmp_warnings_off) {
222             __kmp_str_free(&err_code.str);
223           }
224         }
225         KMP_AFFINITY_DISABLE();
226         KMP_INTERNAL_FREE(buf);
227         return;
228       }
229       continue;
230     }
231 
232     sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
233     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
234                   "setaffinity for mask size %d returned %d errno = %d\n",
235                   gCode, sCode, errno));
236     if (sCode < 0) {
237       if (errno == ENOSYS) { // Linux* OS only
238         // We shouldn't get here
239         KA_TRACE(30, ("__kmp_affinity_determine_capable: "
240                       "inconsistent OS call behavior: errno == ENOSYS for mask "
241                       "size %d\n",
242                       size));
243         if (__kmp_affinity_verbose ||
244             (__kmp_affinity_warnings &&
245              (__kmp_affinity_type != affinity_none) &&
246              (__kmp_affinity_type != affinity_default) &&
247              (__kmp_affinity_type != affinity_disabled))) {
248           int error = errno;
249           kmp_msg_t err_code = KMP_ERR(error);
250           __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
251                     err_code, __kmp_msg_null);
252           if (__kmp_generate_warnings == kmp_warnings_off) {
253             __kmp_str_free(&err_code.str);
254           }
255         }
256         KMP_AFFINITY_DISABLE();
257         KMP_INTERNAL_FREE(buf);
258         return;
259       }
260       if (errno == EFAULT) {
261         KMP_AFFINITY_ENABLE(gCode);
262         KA_TRACE(10, ("__kmp_affinity_determine_capable: "
263                       "affinity supported (mask size %d)\n",
264                       (int)__kmp_affin_mask_size));
265         KMP_INTERNAL_FREE(buf);
266         return;
267       }
268     }
269   }
270   // save uncaught error code
271   // int error = errno;
272   KMP_INTERNAL_FREE(buf);
273   // restore uncaught error code, will be printed at the next KMP_WARNING below
274   // errno = error;
275 
276   // Affinity is not supported
277   KMP_AFFINITY_DISABLE();
278   KA_TRACE(10, ("__kmp_affinity_determine_capable: "
279                 "cannot determine mask size - affinity not supported\n"));
280   if (__kmp_affinity_verbose ||
281       (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
282        (__kmp_affinity_type != affinity_default) &&
283        (__kmp_affinity_type != affinity_disabled))) {
284     KMP_WARNING(AffCantGetMaskSize, env_var);
285   }
286 }
287 
288 #endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
289 
290 #if KMP_USE_FUTEX
291 
292 int __kmp_futex_determine_capable() {
293   int loc = 0;
294   int rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0);
295   int retval = (rc == 0) || (errno != ENOSYS);
296 
297   KA_TRACE(10,
298            ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno));
299   KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n",
300                 retval ? "" : " not"));
301 
302   return retval;
303 }
304 
305 #endif // KMP_USE_FUTEX
306 
307 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS)
308 /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
309    use compare_and_store for these routines */
310 
311 kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
312   kmp_int8 old_value, new_value;
313 
314   old_value = TCR_1(*p);
315   new_value = old_value | d;
316 
317   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
318     KMP_CPU_PAUSE();
319     old_value = TCR_1(*p);
320     new_value = old_value | d;
321   }
322   return old_value;
323 }
324 
325 kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
326   kmp_int8 old_value, new_value;
327 
328   old_value = TCR_1(*p);
329   new_value = old_value & d;
330 
331   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
332     KMP_CPU_PAUSE();
333     old_value = TCR_1(*p);
334     new_value = old_value & d;
335   }
336   return old_value;
337 }
338 
339 kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
340   kmp_uint32 old_value, new_value;
341 
342   old_value = TCR_4(*p);
343   new_value = old_value | d;
344 
345   while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
346     KMP_CPU_PAUSE();
347     old_value = TCR_4(*p);
348     new_value = old_value | d;
349   }
350   return old_value;
351 }
352 
353 kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
354   kmp_uint32 old_value, new_value;
355 
356   old_value = TCR_4(*p);
357   new_value = old_value & d;
358 
359   while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
360     KMP_CPU_PAUSE();
361     old_value = TCR_4(*p);
362     new_value = old_value & d;
363   }
364   return old_value;
365 }
366 
367 #if KMP_ARCH_X86
368 kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
369   kmp_int8 old_value, new_value;
370 
371   old_value = TCR_1(*p);
372   new_value = old_value + d;
373 
374   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
375     KMP_CPU_PAUSE();
376     old_value = TCR_1(*p);
377     new_value = old_value + d;
378   }
379   return old_value;
380 }
381 
382 kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
383   kmp_int64 old_value, new_value;
384 
385   old_value = TCR_8(*p);
386   new_value = old_value + d;
387 
388   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
389     KMP_CPU_PAUSE();
390     old_value = TCR_8(*p);
391     new_value = old_value + d;
392   }
393   return old_value;
394 }
395 #endif /* KMP_ARCH_X86 */
396 
397 kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
398   kmp_uint64 old_value, new_value;
399 
400   old_value = TCR_8(*p);
401   new_value = old_value | d;
402   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
403     KMP_CPU_PAUSE();
404     old_value = TCR_8(*p);
405     new_value = old_value | d;
406   }
407   return old_value;
408 }
409 
410 kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
411   kmp_uint64 old_value, new_value;
412 
413   old_value = TCR_8(*p);
414   new_value = old_value & d;
415   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
416     KMP_CPU_PAUSE();
417     old_value = TCR_8(*p);
418     new_value = old_value & d;
419   }
420   return old_value;
421 }
422 
423 #endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
424 
425 void __kmp_terminate_thread(int gtid) {
426   int status;
427   kmp_info_t *th = __kmp_threads[gtid];
428 
429   if (!th)
430     return;
431 
432 #ifdef KMP_CANCEL_THREADS
433   KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
434   status = pthread_cancel(th->th.th_info.ds.ds_thread);
435   if (status != 0 && status != ESRCH) {
436     __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status),
437                 __kmp_msg_null);
438   }; // if
439 #endif
440   __kmp_yield(TRUE);
441 } //
442 
443 /* Set thread stack info according to values returned by pthread_getattr_np().
444    If values are unreasonable, assume call failed and use incremental stack
445    refinement method instead. Returns TRUE if the stack parameters could be
446    determined exactly, FALSE if incremental refinement is necessary. */
447 static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
448   int stack_data;
449 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
450   /* Linux* OS only -- no pthread_getattr_np support on OS X* */
451   pthread_attr_t attr;
452   int status;
453   size_t size = 0;
454   void *addr = 0;
455 
456   /* Always do incremental stack refinement for ubermaster threads since the
457      initial thread stack range can be reduced by sibling thread creation so
458      pthread_attr_getstack may cause thread gtid aliasing */
459   if (!KMP_UBER_GTID(gtid)) {
460 
461     /* Fetch the real thread attributes */
462     status = pthread_attr_init(&attr);
463     KMP_CHECK_SYSFAIL("pthread_attr_init", status);
464 #if KMP_OS_FREEBSD || KMP_OS_NETBSD
465     status = pthread_attr_get_np(pthread_self(), &attr);
466     KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
467 #else
468     status = pthread_getattr_np(pthread_self(), &attr);
469     KMP_CHECK_SYSFAIL("pthread_getattr_np", status);
470 #endif
471     status = pthread_attr_getstack(&attr, &addr, &size);
472     KMP_CHECK_SYSFAIL("pthread_attr_getstack", status);
473     KA_TRACE(60,
474              ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:"
475               " %lu, low addr: %p\n",
476               gtid, size, addr));
477     status = pthread_attr_destroy(&attr);
478     KMP_CHECK_SYSFAIL("pthread_attr_destroy", status);
479   }
480 
481   if (size != 0 && addr != 0) { // was stack parameter determination successful?
482     /* Store the correct base and size */
483     TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
484     TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
485     TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
486     return TRUE;
487   }
488 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */
489   /* Use incremental refinement starting from initial conservative estimate */
490   TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
491   TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
492   TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
493   return FALSE;
494 }
495 
496 static void *__kmp_launch_worker(void *thr) {
497   int status, old_type, old_state;
498 #ifdef KMP_BLOCK_SIGNALS
499   sigset_t new_set, old_set;
500 #endif /* KMP_BLOCK_SIGNALS */
501   void *exit_val;
502 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
503   void *volatile padding = 0;
504 #endif
505   int gtid;
506 
507   gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid;
508   __kmp_gtid_set_specific(gtid);
509 #ifdef KMP_TDATA_GTID
510   __kmp_gtid = gtid;
511 #endif
512 #if KMP_STATS_ENABLED
513   // set __thread local index to point to thread-specific stats
514   __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
515   KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
516   KMP_SET_THREAD_STATE(IDLE);
517   KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
518 #endif
519 
520 #if USE_ITT_BUILD
521   __kmp_itt_thread_name(gtid);
522 #endif /* USE_ITT_BUILD */
523 
524 #if KMP_AFFINITY_SUPPORTED
525   __kmp_affinity_set_init_mask(gtid, FALSE);
526 #endif
527 
528 #ifdef KMP_CANCEL_THREADS
529   status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
530   KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
531   // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
532   status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
533   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
534 #endif
535 
536 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
537   // Set FP control regs to be a copy of the parallel initialization thread's.
538   __kmp_clear_x87_fpu_status_word();
539   __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
540   __kmp_load_mxcsr(&__kmp_init_mxcsr);
541 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
542 
543 #ifdef KMP_BLOCK_SIGNALS
544   status = sigfillset(&new_set);
545   KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
546   status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set);
547   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
548 #endif /* KMP_BLOCK_SIGNALS */
549 
550 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
551   if (__kmp_stkoffset > 0 && gtid > 0) {
552     padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
553   }
554 #endif
555 
556   KMP_MB();
557   __kmp_set_stack_info(gtid, (kmp_info_t *)thr);
558 
559   __kmp_check_stack_overlap((kmp_info_t *)thr);
560 
561   exit_val = __kmp_launch_thread((kmp_info_t *)thr);
562 
563 #ifdef KMP_BLOCK_SIGNALS
564   status = pthread_sigmask(SIG_SETMASK, &old_set, NULL);
565   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
566 #endif /* KMP_BLOCK_SIGNALS */
567 
568   return exit_val;
569 }
570 
571 #if KMP_USE_MONITOR
572 /* The monitor thread controls all of the threads in the complex */
573 
574 static void *__kmp_launch_monitor(void *thr) {
575   int status, old_type, old_state;
576 #ifdef KMP_BLOCK_SIGNALS
577   sigset_t new_set;
578 #endif /* KMP_BLOCK_SIGNALS */
579   struct timespec interval;
580   int yield_count;
581   int yield_cycles = 0;
582 
583   KMP_MB(); /* Flush all pending memory write invalidates.  */
584 
585   KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));
586 
587   /* register us as the monitor thread */
588   __kmp_gtid_set_specific(KMP_GTID_MONITOR);
589 #ifdef KMP_TDATA_GTID
590   __kmp_gtid = KMP_GTID_MONITOR;
591 #endif
592 
593   KMP_MB();
594 
595 #if USE_ITT_BUILD
596   // Instruct Intel(R) Threading Tools to ignore monitor thread.
597   __kmp_itt_thread_ignore();
598 #endif /* USE_ITT_BUILD */
599 
600   __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
601                        (kmp_info_t *)thr);
602 
603   __kmp_check_stack_overlap((kmp_info_t *)thr);
604 
605 #ifdef KMP_CANCEL_THREADS
606   status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
607   KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
608   // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
609   status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
610   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
611 #endif
612 
613 #if KMP_REAL_TIME_FIX
614   // This is a potential fix which allows application with real-time scheduling
615   // policy work. However, decision about the fix is not made yet, so it is
616   // disabled by default.
617   { // Are program started with real-time scheduling policy?
618     int sched = sched_getscheduler(0);
619     if (sched == SCHED_FIFO || sched == SCHED_RR) {
620       // Yes, we are a part of real-time application. Try to increase the
621       // priority of the monitor.
622       struct sched_param param;
623       int max_priority = sched_get_priority_max(sched);
624       int rc;
625       KMP_WARNING(RealTimeSchedNotSupported);
626       sched_getparam(0, &param);
627       if (param.sched_priority < max_priority) {
628         param.sched_priority += 1;
629         rc = sched_setscheduler(0, sched, &param);
630         if (rc != 0) {
631           int error = errno;
632           kmp_msg_t err_code = KMP_ERR(error);
633           __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
634                     err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
635           if (__kmp_generate_warnings == kmp_warnings_off) {
636             __kmp_str_free(&err_code.str);
637           }
638         }; // if
639       } else {
640         // We cannot abort here, because number of CPUs may be enough for all
641         // the threads, including the monitor thread, so application could
642         // potentially work...
643         __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
644                   KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
645                   __kmp_msg_null);
646       }; // if
647     }; // if
648     // AC: free thread that waits for monitor started
649     TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
650   }
651 #endif // KMP_REAL_TIME_FIX
652 
653   KMP_MB(); /* Flush all pending memory write invalidates.  */
654 
655   if (__kmp_monitor_wakeups == 1) {
656     interval.tv_sec = 1;
657     interval.tv_nsec = 0;
658   } else {
659     interval.tv_sec = 0;
660     interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
661   }
662 
663   KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
664 
665   if (__kmp_yield_cycle) {
666     __kmp_yielding_on = 0; /* Start out with yielding shut off */
667     yield_count = __kmp_yield_off_count;
668   } else {
669     __kmp_yielding_on = 1; /* Yielding is on permanently */
670   }
671 
672   while (!TCR_4(__kmp_global.g.g_done)) {
673     struct timespec now;
674     struct timeval tval;
675 
676     /*  This thread monitors the state of the system */
677 
678     KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
679 
680     status = gettimeofday(&tval, NULL);
681     KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
682     TIMEVAL_TO_TIMESPEC(&tval, &now);
683 
684     now.tv_sec += interval.tv_sec;
685     now.tv_nsec += interval.tv_nsec;
686 
687     if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
688       now.tv_sec += 1;
689       now.tv_nsec -= KMP_NSEC_PER_SEC;
690     }
691 
692     status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
693     KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
694     // AC: the monitor should not fall asleep if g_done has been set
695     if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
696       status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
697                                       &__kmp_wait_mx.m_mutex, &now);
698       if (status != 0) {
699         if (status != ETIMEDOUT && status != EINTR) {
700           KMP_SYSFAIL("pthread_cond_timedwait", status);
701         };
702       };
703     };
704     status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
705     KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
706 
707     if (__kmp_yield_cycle) {
708       yield_cycles++;
709       if ((yield_cycles % yield_count) == 0) {
710         if (__kmp_yielding_on) {
711           __kmp_yielding_on = 0; /* Turn it off now */
712           yield_count = __kmp_yield_off_count;
713         } else {
714           __kmp_yielding_on = 1; /* Turn it on now */
715           yield_count = __kmp_yield_on_count;
716         }
717         yield_cycles = 0;
718       }
719     } else {
720       __kmp_yielding_on = 1;
721     }
722 
723     TCW_4(__kmp_global.g.g_time.dt.t_value,
724           TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
725 
726     KMP_MB(); /* Flush all pending memory write invalidates.  */
727   }
728 
729   KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));
730 
731 #ifdef KMP_BLOCK_SIGNALS
732   status = sigfillset(&new_set);
733   KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
734   status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
735   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
736 #endif /* KMP_BLOCK_SIGNALS */
737 
738   KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));
739 
740   if (__kmp_global.g.g_abort != 0) {
741     /* now we need to terminate the worker threads  */
742     /* the value of t_abort is the signal we caught */
743 
744     int gtid;
745 
746     KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
747                   __kmp_global.g.g_abort));
748 
749     /* terminate the OpenMP worker threads */
750     /* TODO this is not valid for sibling threads!!
751      * the uber master might not be 0 anymore.. */
752     for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
753       __kmp_terminate_thread(gtid);
754 
755     __kmp_cleanup();
756 
757     KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
758                   __kmp_global.g.g_abort));
759 
760     if (__kmp_global.g.g_abort > 0)
761       raise(__kmp_global.g.g_abort);
762   }
763 
764   KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));
765 
766   return thr;
767 }
768 #endif // KMP_USE_MONITOR
769 
770 void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
771   pthread_t handle;
772   pthread_attr_t thread_attr;
773   int status;
774 
775   th->th.th_info.ds.ds_gtid = gtid;
776 
777 #if KMP_STATS_ENABLED
778   // sets up worker thread stats
779   __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
780 
781   // th->th.th_stats is used to transfer thread-specific stats-pointer to
782   // __kmp_launch_worker. So when thread is created (goes into
783   // __kmp_launch_worker) it will set its __thread local pointer to
784   // th->th.th_stats
785   if (!KMP_UBER_GTID(gtid)) {
786     th->th.th_stats = __kmp_stats_list->push_back(gtid);
787   } else {
788     // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(),
789     // so set the th->th.th_stats field to it.
790     th->th.th_stats = __kmp_stats_thread_ptr;
791   }
792   __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
793 
794 #endif // KMP_STATS_ENABLED
795 
796   if (KMP_UBER_GTID(gtid)) {
797     KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid));
798     th->th.th_info.ds.ds_thread = pthread_self();
799     __kmp_set_stack_info(gtid, th);
800     __kmp_check_stack_overlap(th);
801     return;
802   }; // if
803 
804   KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
805 
806   KMP_MB(); /* Flush all pending memory write invalidates.  */
807 
808 #ifdef KMP_THREAD_ATTR
809   status = pthread_attr_init(&thread_attr);
810   if (status != 0) {
811     __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
812   }; // if
813   status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
814   if (status != 0) {
815     __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null);
816   }; // if
817 
818   /* Set stack size for this thread now.
819      The multiple of 2 is there because on some machines, requesting an unusual
820      stacksize causes the thread to have an offset before the dummy alloca()
821      takes place to create the offset.  Since we want the user to have a
822      sufficient stacksize AND support a stack offset, we alloca() twice the
823      offset so that the upcoming alloca() does not eliminate any premade offset,
824      and also gives the user the stack space they requested for all threads */
825   stack_size += gtid * __kmp_stkoffset * 2;
826 
827   KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
828                 "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
829                 gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
830 
831 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
832   status = pthread_attr_setstacksize(&thread_attr, stack_size);
833 #ifdef KMP_BACKUP_STKSIZE
834   if (status != 0) {
835     if (!__kmp_env_stksize) {
836       stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
837       __kmp_stksize = KMP_BACKUP_STKSIZE;
838       KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
839                     "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
840                     "bytes\n",
841                     gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
842       status = pthread_attr_setstacksize(&thread_attr, stack_size);
843     }; // if
844   }; // if
845 #endif /* KMP_BACKUP_STKSIZE */
846   if (status != 0) {
847     __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
848                 KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null);
849   }; // if
850 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
851 
852 #endif /* KMP_THREAD_ATTR */
853 
854   status =
855       pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th);
856   if (status != 0 || !handle) { // ??? Why do we check handle??
857 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
858     if (status == EINVAL) {
859       __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
860                   KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null);
861     };
862     if (status == ENOMEM) {
863       __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
864                   KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null);
865     };
866 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
867     if (status == EAGAIN) {
868       __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status),
869                   KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null);
870     }; // if
871     KMP_SYSFAIL("pthread_create", status);
872   }; // if
873 
874   th->th.th_info.ds.ds_thread = handle;
875 
876 #ifdef KMP_THREAD_ATTR
877   status = pthread_attr_destroy(&thread_attr);
878   if (status) {
879     kmp_msg_t err_code = KMP_ERR(status);
880     __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
881               __kmp_msg_null);
882     if (__kmp_generate_warnings == kmp_warnings_off) {
883       __kmp_str_free(&err_code.str);
884     }
885   }; // if
886 #endif /* KMP_THREAD_ATTR */
887 
888   KMP_MB(); /* Flush all pending memory write invalidates.  */
889 
890   KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
891 
892 } // __kmp_create_worker
893 
894 #if KMP_USE_MONITOR
895 void __kmp_create_monitor(kmp_info_t *th) {
896   pthread_t handle;
897   pthread_attr_t thread_attr;
898   size_t size;
899   int status;
900   int auto_adj_size = FALSE;
901 
902   if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
903     // We don't need monitor thread in case of MAX_BLOCKTIME
904     KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
905                   "MAX blocktime\n"));
906     th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
907     th->th.th_info.ds.ds_gtid = 0;
908     return;
909   }
910   KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
911 
912   KMP_MB(); /* Flush all pending memory write invalidates.  */
913 
914   th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
915   th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
916 #if KMP_REAL_TIME_FIX
917   TCW_4(__kmp_global.g.g_time.dt.t_value,
918         -1); // Will use it for synchronization a bit later.
919 #else
920   TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
921 #endif // KMP_REAL_TIME_FIX
922 
923 #ifdef KMP_THREAD_ATTR
924   if (__kmp_monitor_stksize == 0) {
925     __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
926     auto_adj_size = TRUE;
927   }
928   status = pthread_attr_init(&thread_attr);
929   if (status != 0) {
930     __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
931   }; // if
932   status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
933   if (status != 0) {
934     __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
935   }; // if
936 
937 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
938   status = pthread_attr_getstacksize(&thread_attr, &size);
939   KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
940 #else
941   size = __kmp_sys_min_stksize;
942 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
943 #endif /* KMP_THREAD_ATTR */
944 
945   if (__kmp_monitor_stksize == 0) {
946     __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
947   }
948   if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
949     __kmp_monitor_stksize = __kmp_sys_min_stksize;
950   }
951 
952   KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
953                 "requested stacksize = %lu bytes\n",
954                 size, __kmp_monitor_stksize));
955 
956 retry:
957 
958 /* Set stack size for this thread now. */
959 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
960   KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
961                 __kmp_monitor_stksize));
962   status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
963   if (status != 0) {
964     if (auto_adj_size) {
965       __kmp_monitor_stksize *= 2;
966       goto retry;
967     }
968     kmp_msg_t err_code = KMP_ERR(status);
969     __kmp_msg(kmp_ms_warning, // should this be fatal?  BB
970               KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
971               err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
972     if (__kmp_generate_warnings == kmp_warnings_off) {
973       __kmp_str_free(&err_code.str);
974     }
975   }; // if
976 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
977 
978   status =
979       pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);
980 
981   if (status != 0) {
982 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
983     if (status == EINVAL) {
984       if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
985         __kmp_monitor_stksize *= 2;
986         goto retry;
987       }
988       __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
989                   KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
990                   __kmp_msg_null);
991     }; // if
992     if (status == ENOMEM) {
993       __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
994                   KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
995                   __kmp_msg_null);
996     }; // if
997 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
998     if (status == EAGAIN) {
999       __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
1000                   KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
1001     }; // if
1002     KMP_SYSFAIL("pthread_create", status);
1003   }; // if
1004 
1005   th->th.th_info.ds.ds_thread = handle;
1006 
1007 #if KMP_REAL_TIME_FIX
1008   // Wait for the monitor thread is really started and set its *priority*.
1009   KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
1010                    sizeof(__kmp_global.g.g_time.dt.t_value));
1011   __kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value,
1012                      -1, &__kmp_neq_4, NULL);
1013 #endif // KMP_REAL_TIME_FIX
1014 
1015 #ifdef KMP_THREAD_ATTR
1016   status = pthread_attr_destroy(&thread_attr);
1017   if (status != 0) {
1018     kmp_msg_t err_code = KMP_ERR(status);
1019     __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
1020               __kmp_msg_null);
1021     if (__kmp_generate_warnings == kmp_warnings_off) {
1022       __kmp_str_free(&err_code.str);
1023     }
1024   }; // if
1025 #endif
1026 
1027   KMP_MB(); /* Flush all pending memory write invalidates.  */
1028 
1029   KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
1030                 th->th.th_info.ds.ds_thread));
1031 
1032 } // __kmp_create_monitor
1033 #endif // KMP_USE_MONITOR
1034 
1035 void __kmp_exit_thread(int exit_status) {
1036   pthread_exit((void *)(intptr_t)exit_status);
1037 } // __kmp_exit_thread
1038 
1039 #if KMP_USE_MONITOR
1040 void __kmp_resume_monitor();
1041 
1042 void __kmp_reap_monitor(kmp_info_t *th) {
1043   int status;
1044   void *exit_val;
1045 
1046   KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
1047                 " %#.8lx\n",
1048                 th->th.th_info.ds.ds_thread));
1049 
1050   // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1051   // If both tid and gtid are 0, it means the monitor did not ever start.
1052   // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1053   KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1054   if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1055     KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1056     return;
1057   }; // if
1058 
1059   KMP_MB(); /* Flush all pending memory write invalidates.  */
1060 
1061   /* First, check to see whether the monitor thread exists to wake it up. This
1062      is to avoid performance problem when the monitor sleeps during
1063      blocktime-size interval */
1064 
1065   status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
1066   if (status != ESRCH) {
1067     __kmp_resume_monitor(); // Wake up the monitor thread
1068   }
1069   KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
1070   status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1071   if (exit_val != th) {
1072     __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
1073   }
1074 
1075   th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1076   th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1077 
1078   KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
1079                 " %#.8lx\n",
1080                 th->th.th_info.ds.ds_thread));
1081 
1082   KMP_MB(); /* Flush all pending memory write invalidates.  */
1083 }
1084 #endif // KMP_USE_MONITOR
1085 
1086 void __kmp_reap_worker(kmp_info_t *th) {
1087   int status;
1088   void *exit_val;
1089 
1090   KMP_MB(); /* Flush all pending memory write invalidates.  */
1091 
1092   KA_TRACE(
1093       10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid));
1094 
1095   status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1096 #ifdef KMP_DEBUG
1097   /* Don't expose these to the user until we understand when they trigger */
1098   if (status != 0) {
1099     __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null);
1100   }
1101   if (exit_val != th) {
1102     KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, "
1103                   "exit_val = %p\n",
1104                   th->th.th_info.ds.ds_gtid, exit_val));
1105   }
1106 #endif /* KMP_DEBUG */
1107 
1108   KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
1109                 th->th.th_info.ds.ds_gtid));
1110 
1111   KMP_MB(); /* Flush all pending memory write invalidates.  */
1112 }
1113 
1114 #if KMP_HANDLE_SIGNALS
1115 
1116 static void __kmp_null_handler(int signo) {
1117   //  Do nothing, for doing SIG_IGN-type actions.
1118 } // __kmp_null_handler
1119 
1120 static void __kmp_team_handler(int signo) {
1121   if (__kmp_global.g.g_abort == 0) {
1122 /* Stage 1 signal handler, let's shut down all of the threads */
1123 #ifdef KMP_DEBUG
1124     __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo);
1125 #endif
1126     switch (signo) {
1127     case SIGHUP:
1128     case SIGINT:
1129     case SIGQUIT:
1130     case SIGILL:
1131     case SIGABRT:
1132     case SIGFPE:
1133     case SIGBUS:
1134     case SIGSEGV:
1135 #ifdef SIGSYS
1136     case SIGSYS:
1137 #endif
1138     case SIGTERM:
1139       if (__kmp_debug_buf) {
1140         __kmp_dump_debug_buffer();
1141       }; // if
1142       KMP_MB(); // Flush all pending memory write invalidates.
1143       TCW_4(__kmp_global.g.g_abort, signo);
1144       KMP_MB(); // Flush all pending memory write invalidates.
1145       TCW_4(__kmp_global.g.g_done, TRUE);
1146       KMP_MB(); // Flush all pending memory write invalidates.
1147       break;
1148     default:
1149 #ifdef KMP_DEBUG
1150       __kmp_debug_printf("__kmp_team_handler: unknown signal type");
1151 #endif
1152       break;
1153     }; // switch
1154   }; // if
1155 } // __kmp_team_handler
1156 
1157 static void __kmp_sigaction(int signum, const struct sigaction *act,
1158                             struct sigaction *oldact) {
1159   int rc = sigaction(signum, act, oldact);
1160   KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc);
1161 }
1162 
1163 static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
1164                                       int parallel_init) {
1165   KMP_MB(); // Flush all pending memory write invalidates.
1166   KB_TRACE(60,
1167            ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init));
1168   if (parallel_init) {
1169     struct sigaction new_action;
1170     struct sigaction old_action;
1171     new_action.sa_handler = handler_func;
1172     new_action.sa_flags = 0;
1173     sigfillset(&new_action.sa_mask);
1174     __kmp_sigaction(sig, &new_action, &old_action);
1175     if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) {
1176       sigaddset(&__kmp_sigset, sig);
1177     } else {
1178       // Restore/keep user's handler if one previously installed.
1179       __kmp_sigaction(sig, &old_action, NULL);
1180     }; // if
1181   } else {
1182     // Save initial/system signal handlers to see if user handlers installed.
1183     __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]);
1184   }; // if
1185   KMP_MB(); // Flush all pending memory write invalidates.
1186 } // __kmp_install_one_handler
1187 
1188 static void __kmp_remove_one_handler(int sig) {
1189   KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig));
1190   if (sigismember(&__kmp_sigset, sig)) {
1191     struct sigaction old;
1192     KMP_MB(); // Flush all pending memory write invalidates.
1193     __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old);
1194     if ((old.sa_handler != __kmp_team_handler) &&
1195         (old.sa_handler != __kmp_null_handler)) {
1196       // Restore the users signal handler.
1197       KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1198                     "restoring: sig=%d\n",
1199                     sig));
1200       __kmp_sigaction(sig, &old, NULL);
1201     }; // if
1202     sigdelset(&__kmp_sigset, sig);
1203     KMP_MB(); // Flush all pending memory write invalidates.
1204   }; // if
1205 } // __kmp_remove_one_handler
1206 
1207 void __kmp_install_signals(int parallel_init) {
1208   KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init));
1209   if (__kmp_handle_signals || !parallel_init) {
1210     // If ! parallel_init, we do not install handlers, just save original
1211     // handlers. Let us do it even __handle_signals is 0.
1212     sigemptyset(&__kmp_sigset);
1213     __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init);
1214     __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1215     __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init);
1216     __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1217     __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1218     __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1219     __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init);
1220     __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1221 #ifdef SIGSYS
1222     __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init);
1223 #endif // SIGSYS
1224     __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1225 #ifdef SIGPIPE
1226     __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init);
1227 #endif // SIGPIPE
1228   }; // if
1229 } // __kmp_install_signals
1230 
1231 void __kmp_remove_signals(void) {
1232   int sig;
1233   KB_TRACE(10, ("__kmp_remove_signals()\n"));
1234   for (sig = 1; sig < NSIG; ++sig) {
1235     __kmp_remove_one_handler(sig);
1236   }; // for sig
1237 } // __kmp_remove_signals
1238 
1239 #endif // KMP_HANDLE_SIGNALS
1240 
1241 void __kmp_enable(int new_state) {
1242 #ifdef KMP_CANCEL_THREADS
1243   int status, old_state;
1244   status = pthread_setcancelstate(new_state, &old_state);
1245   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1246   KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE);
1247 #endif
1248 }
1249 
1250 void __kmp_disable(int *old_state) {
1251 #ifdef KMP_CANCEL_THREADS
1252   int status;
1253   status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state);
1254   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1255 #endif
1256 }
1257 
1258 static void __kmp_atfork_prepare(void) { /*  nothing to do  */
1259 }
1260 
1261 static void __kmp_atfork_parent(void) { /*  nothing to do  */
1262 }
1263 
1264 /* Reset the library so execution in the child starts "all over again" with
1265    clean data structures in initial states.  Don't worry about freeing memory
1266    allocated by parent, just abandon it to be safe. */
1267 static void __kmp_atfork_child(void) {
1268   /* TODO make sure this is done right for nested/sibling */
1269   // ATT:  Memory leaks are here? TODO: Check it and fix.
1270   /* KMP_ASSERT( 0 ); */
1271 
1272   ++__kmp_fork_count;
1273 
1274 #if KMP_AFFINITY_SUPPORTED
1275 #if KMP_OS_LINUX
1276   // reset the affinity in the child to the initial thread
1277   // affinity in the parent
1278   kmp_set_thread_affinity_mask_initial();
1279 #endif
1280   // Set default not to bind threads tightly in the child (we’re expecting
1281   // over-subscription after the fork and this can improve things for
1282   // scripting languages that use OpenMP inside process-parallel code).
1283   __kmp_affinity_type = affinity_none;
1284 #if OMP_40_ENABLED
1285   if (__kmp_nested_proc_bind.bind_types != NULL) {
1286     __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
1287   }
1288 #endif // OMP_40_ENABLED
1289 #endif // KMP_AFFINITY_SUPPORTED
1290 
1291   __kmp_init_runtime = FALSE;
1292 #if KMP_USE_MONITOR
1293   __kmp_init_monitor = 0;
1294 #endif
1295   __kmp_init_parallel = FALSE;
1296   __kmp_init_middle = FALSE;
1297   __kmp_init_serial = FALSE;
1298   TCW_4(__kmp_init_gtid, FALSE);
1299   __kmp_init_common = FALSE;
1300 
1301   TCW_4(__kmp_init_user_locks, FALSE);
1302 #if !KMP_USE_DYNAMIC_LOCK
1303   __kmp_user_lock_table.used = 1;
1304   __kmp_user_lock_table.allocated = 0;
1305   __kmp_user_lock_table.table = NULL;
1306   __kmp_lock_blocks = NULL;
1307 #endif
1308 
1309   __kmp_all_nth = 0;
1310   TCW_4(__kmp_nth, 0);
1311 
1312   /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
1313      here so threadprivate doesn't use stale data */
1314   KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
1315                 __kmp_threadpriv_cache_list));
1316 
1317   while (__kmp_threadpriv_cache_list != NULL) {
1318 
1319     if (*__kmp_threadpriv_cache_list->addr != NULL) {
1320       KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
1321                     &(*__kmp_threadpriv_cache_list->addr)));
1322 
1323       *__kmp_threadpriv_cache_list->addr = NULL;
1324     }
1325     __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
1326   }
1327 
1328   __kmp_init_runtime = FALSE;
1329 
1330   /* reset statically initialized locks */
1331   __kmp_init_bootstrap_lock(&__kmp_initz_lock);
1332   __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
1333   __kmp_init_bootstrap_lock(&__kmp_console_lock);
1334 
1335   /* This is necessary to make sure no stale data is left around */
1336   /* AC: customers complain that we use unsafe routines in the atfork
1337      handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
1338      in dynamic_link when check the presence of shared tbbmalloc library.
1339      Suggestion is to make the library initialization lazier, similar
1340      to what done for __kmpc_begin(). */
1341   // TODO: synchronize all static initializations with regular library
1342   //       startup; look at kmp_global.cpp and etc.
1343   //__kmp_internal_begin ();
1344 }
1345 
1346 void __kmp_register_atfork(void) {
1347   if (__kmp_need_register_atfork) {
1348     int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
1349                                 __kmp_atfork_child);
1350     KMP_CHECK_SYSFAIL("pthread_atfork", status);
1351     __kmp_need_register_atfork = FALSE;
1352   }
1353 }
1354 
1355 void __kmp_suspend_initialize(void) {
1356   int status;
1357   status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr);
1358   KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1359   status = pthread_condattr_init(&__kmp_suspend_cond_attr);
1360   KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1361 }
1362 
1363 static void __kmp_suspend_initialize_thread(kmp_info_t *th) {
1364   ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count);
1365   if (th->th.th_suspend_init_count <= __kmp_fork_count) {
1366     /* this means we haven't initialized the suspension pthread objects for this
1367        thread in this instance of the process */
1368     int status;
1369     status = pthread_cond_init(&th->th.th_suspend_cv.c_cond,
1370                                &__kmp_suspend_cond_attr);
1371     KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1372     status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex,
1373                                 &__kmp_suspend_mutex_attr);
1374     KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1375     *(volatile int *)&th->th.th_suspend_init_count = __kmp_fork_count + 1;
1376     ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count);
1377   };
1378 }
1379 
1380 void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
1381   if (th->th.th_suspend_init_count > __kmp_fork_count) {
1382     /* this means we have initialize the suspension pthread objects for this
1383        thread in this instance of the process */
1384     int status;
1385 
1386     status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond);
1387     if (status != 0 && status != EBUSY) {
1388       KMP_SYSFAIL("pthread_cond_destroy", status);
1389     };
1390     status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex);
1391     if (status != 0 && status != EBUSY) {
1392       KMP_SYSFAIL("pthread_mutex_destroy", status);
1393     };
1394     --th->th.th_suspend_init_count;
1395     KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count);
1396   }
1397 }
1398 
1399 
1400 /* This routine puts the calling thread to sleep after setting the
1401    sleep bit for the indicated flag variable to true. */
1402 template <class C>
1403 static inline void __kmp_suspend_template(int th_gtid, C *flag) {
1404   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
1405   kmp_info_t *th = __kmp_threads[th_gtid];
1406   int status;
1407   typename C::flag_t old_spin;
1408 
1409   KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid,
1410                 flag->get()));
1411 
1412   __kmp_suspend_initialize_thread(th);
1413 
1414   status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1415   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1416 
1417   KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
1418                 th_gtid, flag->get()));
1419 
1420   /* TODO: shouldn't this use release semantics to ensure that
1421      __kmp_suspend_initialize_thread gets called first? */
1422   old_spin = flag->set_sleeping();
1423 
1424   KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
1425                " was %x\n",
1426                th_gtid, flag->get(), *(flag->get()), old_spin));
1427 
1428   if (flag->done_check_val(old_spin)) {
1429     old_spin = flag->unset_sleeping();
1430     KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
1431                  "for spin(%p)\n",
1432                  th_gtid, flag->get()));
1433   } else {
1434     /* Encapsulate in a loop as the documentation states that this may
1435        "with low probability" return when the condition variable has
1436        not been signaled or broadcast */
1437     int deactivated = FALSE;
1438     TCW_PTR(th->th.th_sleep_loc, (void *)flag);
1439 
1440     while (flag->is_sleeping()) {
1441 #ifdef DEBUG_SUSPEND
1442       char buffer[128];
1443       __kmp_suspend_count++;
1444       __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1445       __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
1446                    buffer);
1447 #endif
1448       // Mark the thread as no longer active (only in the first iteration of the
1449       // loop).
1450       if (!deactivated) {
1451         th->th.th_active = FALSE;
1452         if (th->th.th_active_in_pool) {
1453           th->th.th_active_in_pool = FALSE;
1454           KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
1455           KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
1456         }
1457         deactivated = TRUE;
1458       }
1459 
1460 #if USE_SUSPEND_TIMEOUT
1461       struct timespec now;
1462       struct timeval tval;
1463       int msecs;
1464 
1465       status = gettimeofday(&tval, NULL);
1466       KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1467       TIMEVAL_TO_TIMESPEC(&tval, &now);
1468 
1469       msecs = (4 * __kmp_dflt_blocktime) + 200;
1470       now.tv_sec += msecs / 1000;
1471       now.tv_nsec += (msecs % 1000) * 1000;
1472 
1473       KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
1474                     "pthread_cond_timedwait\n",
1475                     th_gtid));
1476       status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond,
1477                                       &th->th.th_suspend_mx.m_mutex, &now);
1478 #else
1479       KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform"
1480                     " pthread_cond_wait\n",
1481                     th_gtid));
1482       status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
1483                                  &th->th.th_suspend_mx.m_mutex);
1484 #endif
1485 
1486       if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
1487         KMP_SYSFAIL("pthread_cond_wait", status);
1488       }
1489 #ifdef KMP_DEBUG
1490       if (status == ETIMEDOUT) {
1491         if (flag->is_sleeping()) {
1492           KF_TRACE(100,
1493                    ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
1494         } else {
1495           KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
1496                        "not set!\n",
1497                        th_gtid));
1498         }
1499       } else if (flag->is_sleeping()) {
1500         KF_TRACE(100,
1501                  ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
1502       }
1503 #endif
1504     } // while
1505 
1506     // Mark the thread as active again (if it was previous marked as inactive)
1507     if (deactivated) {
1508       th->th.th_active = TRUE;
1509       if (TCR_4(th->th.th_in_pool)) {
1510         KMP_TEST_THEN_INC32(&__kmp_thread_pool_active_nth);
1511         th->th.th_active_in_pool = TRUE;
1512       }
1513     }
1514   }
1515 #ifdef DEBUG_SUSPEND
1516   {
1517     char buffer[128];
1518     __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1519     __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
1520                  buffer);
1521   }
1522 #endif
1523 
1524   status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1525   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1526   KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
1527 }
1528 
1529 void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
1530   __kmp_suspend_template(th_gtid, flag);
1531 }
1532 void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
1533   __kmp_suspend_template(th_gtid, flag);
1534 }
1535 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
1536   __kmp_suspend_template(th_gtid, flag);
1537 }
1538 
1539 /* This routine signals the thread specified by target_gtid to wake up
1540    after setting the sleep bit indicated by the flag argument to FALSE.
1541    The target thread must already have called __kmp_suspend_template() */
1542 template <class C>
1543 static inline void __kmp_resume_template(int target_gtid, C *flag) {
1544   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1545   kmp_info_t *th = __kmp_threads[target_gtid];
1546   int status;
1547 
1548 #ifdef KMP_DEBUG
1549   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1550 #endif
1551 
1552   KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
1553                 gtid, target_gtid));
1554   KMP_DEBUG_ASSERT(gtid != target_gtid);
1555 
1556   __kmp_suspend_initialize_thread(th);
1557 
1558   status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1559   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1560 
1561   if (!flag) { // coming from __kmp_null_resume_wrapper
1562     flag = (C *)CCAST(void *, th->th.th_sleep_loc);
1563   }
1564 
1565   // First, check if the flag is null or its type has changed. If so, someone
1566   // else woke it up.
1567   if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
1568     // simply shows what
1569     // flag was cast to
1570     KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1571                  "awake: flag(%p)\n",
1572                  gtid, target_gtid, NULL));
1573     status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1574     KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1575     return;
1576   } else { // if multiple threads are sleeping, flag should be internally
1577     // referring to a specific thread here
1578     typename C::flag_t old_spin = flag->unset_sleeping();
1579     if (!flag->is_sleeping_val(old_spin)) {
1580       KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1581                    "awake: flag(%p): "
1582                    "%u => %u\n",
1583                    gtid, target_gtid, flag->get(), old_spin, *flag->get()));
1584       status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1585       KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1586       return;
1587     }
1588     KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
1589                  "sleep bit for flag's loc(%p): "
1590                  "%u => %u\n",
1591                  gtid, target_gtid, flag->get(), old_spin, *flag->get()));
1592   }
1593   TCW_PTR(th->th.th_sleep_loc, NULL);
1594 
1595 #ifdef DEBUG_SUSPEND
1596   {
1597     char buffer[128];
1598     __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1599     __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
1600                  target_gtid, buffer);
1601   }
1602 #endif
1603   status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
1604   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1605   status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1606   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1607   KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
1608                 " for T#%d\n",
1609                 gtid, target_gtid));
1610 }
1611 
1612 void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
1613   __kmp_resume_template(target_gtid, flag);
1614 }
1615 void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
1616   __kmp_resume_template(target_gtid, flag);
1617 }
1618 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
1619   __kmp_resume_template(target_gtid, flag);
1620 }
1621 
1622 #if KMP_USE_MONITOR
1623 void __kmp_resume_monitor() {
1624   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1625   int status;
1626 #ifdef KMP_DEBUG
1627   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1628   KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
1629                 KMP_GTID_MONITOR));
1630   KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
1631 #endif
1632   status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
1633   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1634 #ifdef DEBUG_SUSPEND
1635   {
1636     char buffer[128];
1637     __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
1638     __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
1639                  KMP_GTID_MONITOR, buffer);
1640   }
1641 #endif
1642   status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
1643   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1644   status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
1645   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1646   KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
1647                 " for T#%d\n",
1648                 gtid, KMP_GTID_MONITOR));
1649 }
1650 #endif // KMP_USE_MONITOR
1651 
1652 void __kmp_yield(int cond) {
1653   if (!cond)
1654     return;
1655 #if KMP_USE_MONITOR
1656   if (!__kmp_yielding_on)
1657     return;
1658 #else
1659   if (__kmp_yield_cycle && !KMP_YIELD_NOW())
1660     return;
1661 #endif
1662   sched_yield();
1663 }
1664 
1665 void __kmp_gtid_set_specific(int gtid) {
1666   if (__kmp_init_gtid) {
1667     int status;
1668     status = pthread_setspecific(__kmp_gtid_threadprivate_key,
1669                                  (void *)(intptr_t)(gtid + 1));
1670     KMP_CHECK_SYSFAIL("pthread_setspecific", status);
1671   } else {
1672     KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
1673   }
1674 }
1675 
1676 int __kmp_gtid_get_specific() {
1677   int gtid;
1678   if (!__kmp_init_gtid) {
1679     KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
1680                   "KMP_GTID_SHUTDOWN\n"));
1681     return KMP_GTID_SHUTDOWN;
1682   }
1683   gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key);
1684   if (gtid == 0) {
1685     gtid = KMP_GTID_DNE;
1686   } else {
1687     gtid--;
1688   }
1689   KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
1690                 __kmp_gtid_threadprivate_key, gtid));
1691   return gtid;
1692 }
1693 
1694 double __kmp_read_cpu_time(void) {
1695   /*clock_t   t;*/
1696   struct tms buffer;
1697 
1698   /*t =*/times(&buffer);
1699 
1700   return (buffer.tms_utime + buffer.tms_cutime) / (double)CLOCKS_PER_SEC;
1701 }
1702 
1703 int __kmp_read_system_info(struct kmp_sys_info *info) {
1704   int status;
1705   struct rusage r_usage;
1706 
1707   memset(info, 0, sizeof(*info));
1708 
1709   status = getrusage(RUSAGE_SELF, &r_usage);
1710   KMP_CHECK_SYSFAIL_ERRNO("getrusage", status);
1711 
1712   // The maximum resident set size utilized (in kilobytes)
1713   info->maxrss = r_usage.ru_maxrss;
1714   // The number of page faults serviced without any I/O
1715   info->minflt = r_usage.ru_minflt;
1716   // The number of page faults serviced that required I/O
1717   info->majflt = r_usage.ru_majflt;
1718   // The number of times a process was "swapped" out of memory
1719   info->nswap = r_usage.ru_nswap;
1720   // The number of times the file system had to perform input
1721   info->inblock = r_usage.ru_inblock;
1722   // The number of times the file system had to perform output
1723   info->oublock = r_usage.ru_oublock;
1724   // The number of times a context switch was voluntarily
1725   info->nvcsw = r_usage.ru_nvcsw;
1726   // The number of times a context switch was forced
1727   info->nivcsw = r_usage.ru_nivcsw;
1728 
1729   return (status != 0);
1730 }
1731 
1732 void __kmp_read_system_time(double *delta) {
1733   double t_ns;
1734   struct timeval tval;
1735   struct timespec stop;
1736   int status;
1737 
1738   status = gettimeofday(&tval, NULL);
1739   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1740   TIMEVAL_TO_TIMESPEC(&tval, &stop);
1741   t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start);
1742   *delta = (t_ns * 1e-9);
1743 }
1744 
1745 void __kmp_clear_system_time(void) {
1746   struct timeval tval;
1747   int status;
1748   status = gettimeofday(&tval, NULL);
1749   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1750   TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start);
1751 }
1752 
1753 static int __kmp_get_xproc(void) {
1754 
1755   int r = 0;
1756 
1757 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
1758 
1759   r = sysconf(_SC_NPROCESSORS_ONLN);
1760 
1761 #elif KMP_OS_DARWIN
1762 
1763   // Bug C77011 High "OpenMP Threads and number of active cores".
1764 
1765   // Find the number of available CPUs.
1766   kern_return_t rc;
1767   host_basic_info_data_t info;
1768   mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT;
1769   rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num);
1770   if (rc == 0 && num == HOST_BASIC_INFO_COUNT) {
1771     // Cannot use KA_TRACE() here because this code works before trace support
1772     // is initialized.
1773     r = info.avail_cpus;
1774   } else {
1775     KMP_WARNING(CantGetNumAvailCPU);
1776     KMP_INFORM(AssumedNumCPU);
1777   }; // if
1778 
1779 #else
1780 
1781 #error "Unknown or unsupported OS."
1782 
1783 #endif
1784 
1785   return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
1786 
1787 } // __kmp_get_xproc
1788 
1789 int __kmp_read_from_file(char const *path, char const *format, ...) {
1790   int result;
1791   va_list args;
1792 
1793   va_start(args, format);
1794   FILE *f = fopen(path, "rb");
1795   if (f == NULL)
1796     return 0;
1797   result = vfscanf(f, format, args);
1798   fclose(f);
1799 
1800   return result;
1801 }
1802 
1803 void __kmp_runtime_initialize(void) {
1804   int status;
1805   pthread_mutexattr_t mutex_attr;
1806   pthread_condattr_t cond_attr;
1807 
1808   if (__kmp_init_runtime) {
1809     return;
1810   }; // if
1811 
1812 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1813   if (!__kmp_cpuinfo.initialized) {
1814     __kmp_query_cpuid(&__kmp_cpuinfo);
1815   }; // if
1816 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1817 
1818   __kmp_xproc = __kmp_get_xproc();
1819 
1820   if (sysconf(_SC_THREADS)) {
1821 
1822     /* Query the maximum number of threads */
1823     __kmp_sys_max_nth = sysconf(_SC_THREAD_THREADS_MAX);
1824     if (__kmp_sys_max_nth == -1) {
1825       /* Unlimited threads for NPTL */
1826       __kmp_sys_max_nth = INT_MAX;
1827     } else if (__kmp_sys_max_nth <= 1) {
1828       /* Can't tell, just use PTHREAD_THREADS_MAX */
1829       __kmp_sys_max_nth = KMP_MAX_NTH;
1830     }
1831 
1832     /* Query the minimum stack size */
1833     __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN);
1834     if (__kmp_sys_min_stksize <= 1) {
1835       __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
1836     }
1837   }
1838 
1839   /* Set up minimum number of threads to switch to TLS gtid */
1840   __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
1841 
1842   status = pthread_key_create(&__kmp_gtid_threadprivate_key,
1843                               __kmp_internal_end_dest);
1844   KMP_CHECK_SYSFAIL("pthread_key_create", status);
1845   status = pthread_mutexattr_init(&mutex_attr);
1846   KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1847   status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr);
1848   KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1849   status = pthread_condattr_init(&cond_attr);
1850   KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1851   status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr);
1852   KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1853 #if USE_ITT_BUILD
1854   __kmp_itt_initialize();
1855 #endif /* USE_ITT_BUILD */
1856 
1857   __kmp_init_runtime = TRUE;
1858 }
1859 
1860 void __kmp_runtime_destroy(void) {
1861   int status;
1862 
1863   if (!__kmp_init_runtime) {
1864     return; // Nothing to do.
1865   };
1866 
1867 #if USE_ITT_BUILD
1868   __kmp_itt_destroy();
1869 #endif /* USE_ITT_BUILD */
1870 
1871   status = pthread_key_delete(__kmp_gtid_threadprivate_key);
1872   KMP_CHECK_SYSFAIL("pthread_key_delete", status);
1873 
1874   status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex);
1875   if (status != 0 && status != EBUSY) {
1876     KMP_SYSFAIL("pthread_mutex_destroy", status);
1877   }
1878   status = pthread_cond_destroy(&__kmp_wait_cv.c_cond);
1879   if (status != 0 && status != EBUSY) {
1880     KMP_SYSFAIL("pthread_cond_destroy", status);
1881   }
1882 #if KMP_AFFINITY_SUPPORTED
1883   __kmp_affinity_uninitialize();
1884 #endif
1885 
1886   __kmp_init_runtime = FALSE;
1887 }
1888 
1889 /* Put the thread to sleep for a time period */
1890 /* NOTE: not currently used anywhere */
1891 void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); }
1892 
1893 /* Calculate the elapsed wall clock time for the user */
1894 void __kmp_elapsed(double *t) {
1895   int status;
1896 #ifdef FIX_SGI_CLOCK
1897   struct timespec ts;
1898 
1899   status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
1900   KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status);
1901   *t =
1902       (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec;
1903 #else
1904   struct timeval tv;
1905 
1906   status = gettimeofday(&tv, NULL);
1907   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1908   *t =
1909       (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec;
1910 #endif
1911 }
1912 
1913 /* Calculate the elapsed wall clock tick for the user */
1914 void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
1915 
1916 /* Return the current time stamp in nsec */
1917 kmp_uint64 __kmp_now_nsec() {
1918   struct timeval t;
1919   gettimeofday(&t, NULL);
1920   return KMP_NSEC_PER_SEC * t.tv_sec + 1000 * t.tv_usec;
1921 }
1922 
1923 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1924 /* Measure clock ticks per millisecond */
1925 void __kmp_initialize_system_tick() {
1926   kmp_uint64 delay = 100000; // 50~100 usec on most machines.
1927   kmp_uint64 nsec = __kmp_now_nsec();
1928   kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
1929   kmp_uint64 now;
1930   while ((now = __kmp_hardware_timestamp()) < goal)
1931     ;
1932   __kmp_ticks_per_msec =
1933       (kmp_uint64)(1e6 * (delay + (now - goal)) / (__kmp_now_nsec() - nsec));
1934 }
1935 #endif
1936 
1937 /* Determine whether the given address is mapped into the current address
1938    space. */
1939 
1940 int __kmp_is_address_mapped(void *addr) {
1941 
1942   int found = 0;
1943   int rc;
1944 
1945 #if KMP_OS_LINUX || KMP_OS_FREEBSD
1946 
1947   /* On Linux* OS, read the /proc/<pid>/maps pseudo-file to get all the address
1948      ranges mapped into the address space. */
1949 
1950   char *name = __kmp_str_format("/proc/%d/maps", getpid());
1951   FILE *file = NULL;
1952 
1953   file = fopen(name, "r");
1954   KMP_ASSERT(file != NULL);
1955 
1956   for (;;) {
1957 
1958     void *beginning = NULL;
1959     void *ending = NULL;
1960     char perms[5];
1961 
1962     rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms);
1963     if (rc == EOF) {
1964       break;
1965     }; // if
1966     KMP_ASSERT(rc == 3 &&
1967                KMP_STRLEN(perms) == 4); // Make sure all fields are read.
1968 
1969     // Ending address is not included in the region, but beginning is.
1970     if ((addr >= beginning) && (addr < ending)) {
1971       perms[2] = 0; // 3th and 4th character does not matter.
1972       if (strcmp(perms, "rw") == 0) {
1973         // Memory we are looking for should be readable and writable.
1974         found = 1;
1975       }; // if
1976       break;
1977     }; // if
1978 
1979   }; // forever
1980 
1981   // Free resources.
1982   fclose(file);
1983   KMP_INTERNAL_FREE(name);
1984 
1985 #elif KMP_OS_DARWIN
1986 
1987   /* On OS X*, /proc pseudo filesystem is not available. Try to read memory
1988      using vm interface. */
1989 
1990   int buffer;
1991   vm_size_t count;
1992   rc = vm_read_overwrite(
1993       mach_task_self(), // Task to read memory of.
1994       (vm_address_t)(addr), // Address to read from.
1995       1, // Number of bytes to be read.
1996       (vm_address_t)(&buffer), // Address of buffer to save read bytes in.
1997       &count // Address of var to save number of read bytes in.
1998       );
1999   if (rc == 0) {
2000     // Memory successfully read.
2001     found = 1;
2002   }; // if
2003 
2004 #elif KMP_OS_FREEBSD || KMP_OS_NETBSD
2005 
2006   // FIXME(FreeBSD, NetBSD): Implement this
2007   found = 1;
2008 
2009 #else
2010 
2011 #error "Unknown or unsupported OS"
2012 
2013 #endif
2014 
2015   return found;
2016 
2017 } // __kmp_is_address_mapped
2018 
2019 #ifdef USE_LOAD_BALANCE
2020 
2021 #if KMP_OS_DARWIN
2022 
2023 // The function returns the rounded value of the system load average
2024 // during given time interval which depends on the value of
2025 // __kmp_load_balance_interval variable (default is 60 sec, other values
2026 // may be 300 sec or 900 sec).
2027 // It returns -1 in case of error.
2028 int __kmp_get_load_balance(int max) {
2029   double averages[3];
2030   int ret_avg = 0;
2031 
2032   int res = getloadavg(averages, 3);
2033 
2034   // Check __kmp_load_balance_interval to determine which of averages to use.
2035   // getloadavg() may return the number of samples less than requested that is
2036   // less than 3.
2037   if (__kmp_load_balance_interval < 180 && (res >= 1)) {
2038     ret_avg = averages[0]; // 1 min
2039   } else if ((__kmp_load_balance_interval >= 180 &&
2040               __kmp_load_balance_interval < 600) &&
2041              (res >= 2)) {
2042     ret_avg = averages[1]; // 5 min
2043   } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
2044     ret_avg = averages[2]; // 15 min
2045   } else { // Error occurred
2046     return -1;
2047   }
2048 
2049   return ret_avg;
2050 }
2051 
2052 #else // Linux* OS
2053 
2054 // The fuction returns number of running (not sleeping) threads, or -1 in case
2055 // of error. Error could be reported if Linux* OS kernel too old (without
2056 // "/proc" support). Counting running threads stops if max running threads
2057 // encountered.
2058 int __kmp_get_load_balance(int max) {
2059   static int permanent_error = 0;
2060   static int glb_running_threads = 0; // Saved count of the running threads for
2061   // the thread balance algortihm
2062   static double glb_call_time = 0; /* Thread balance algorithm call time */
2063 
2064   int running_threads = 0; // Number of running threads in the system.
2065 
2066   DIR *proc_dir = NULL; // Handle of "/proc/" directory.
2067   struct dirent *proc_entry = NULL;
2068 
2069   kmp_str_buf_t task_path; // "/proc/<pid>/task/<tid>/" path.
2070   DIR *task_dir = NULL; // Handle of "/proc/<pid>/task/<tid>/" directory.
2071   struct dirent *task_entry = NULL;
2072   int task_path_fixed_len;
2073 
2074   kmp_str_buf_t stat_path; // "/proc/<pid>/task/<tid>/stat" path.
2075   int stat_file = -1;
2076   int stat_path_fixed_len;
2077 
2078   int total_processes = 0; // Total number of processes in system.
2079   int total_threads = 0; // Total number of threads in system.
2080 
2081   double call_time = 0.0;
2082 
2083   __kmp_str_buf_init(&task_path);
2084   __kmp_str_buf_init(&stat_path);
2085 
2086   __kmp_elapsed(&call_time);
2087 
2088   if (glb_call_time &&
2089       (call_time - glb_call_time < __kmp_load_balance_interval)) {
2090     running_threads = glb_running_threads;
2091     goto finish;
2092   }
2093 
2094   glb_call_time = call_time;
2095 
2096   // Do not spend time on scanning "/proc/" if we have a permanent error.
2097   if (permanent_error) {
2098     running_threads = -1;
2099     goto finish;
2100   }; // if
2101 
2102   if (max <= 0) {
2103     max = INT_MAX;
2104   }; // if
2105 
2106   // Open "/proc/" directory.
2107   proc_dir = opendir("/proc");
2108   if (proc_dir == NULL) {
2109     // Cannot open "/prroc/". Probably the kernel does not support it. Return an
2110     // error now and in subsequent calls.
2111     running_threads = -1;
2112     permanent_error = 1;
2113     goto finish;
2114   }; // if
2115 
2116   // Initialize fixed part of task_path. This part will not change.
2117   __kmp_str_buf_cat(&task_path, "/proc/", 6);
2118   task_path_fixed_len = task_path.used; // Remember number of used characters.
2119 
2120   proc_entry = readdir(proc_dir);
2121   while (proc_entry != NULL) {
2122     // Proc entry is a directory and name starts with a digit. Assume it is a
2123     // process' directory.
2124     if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) {
2125 
2126       ++total_processes;
2127       // Make sure init process is the very first in "/proc", so we can replace
2128       // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes ==
2129       // 1. We are going to check that total_processes == 1 => d_name == "1" is
2130       // true (where "=>" is implication). Since C++ does not have => operator,
2131       // let us replace it with its equivalent: a => b == ! a || b.
2132       KMP_DEBUG_ASSERT(total_processes != 1 ||
2133                        strcmp(proc_entry->d_name, "1") == 0);
2134 
2135       // Construct task_path.
2136       task_path.used = task_path_fixed_len; // Reset task_path to "/proc/".
2137       __kmp_str_buf_cat(&task_path, proc_entry->d_name,
2138                         KMP_STRLEN(proc_entry->d_name));
2139       __kmp_str_buf_cat(&task_path, "/task", 5);
2140 
2141       task_dir = opendir(task_path.str);
2142       if (task_dir == NULL) {
2143         // Process can finish between reading "/proc/" directory entry and
2144         // opening process' "task/" directory. So, in general case we should not
2145         // complain, but have to skip this process and read the next one. But on
2146         // systems with no "task/" support we will spend lot of time to scan
2147         // "/proc/" tree again and again without any benefit. "init" process
2148         // (its pid is 1) should exist always, so, if we cannot open
2149         // "/proc/1/task/" directory, it means "task/" is not supported by
2150         // kernel. Report an error now and in the future.
2151         if (strcmp(proc_entry->d_name, "1") == 0) {
2152           running_threads = -1;
2153           permanent_error = 1;
2154           goto finish;
2155         }; // if
2156       } else {
2157         // Construct fixed part of stat file path.
2158         __kmp_str_buf_clear(&stat_path);
2159         __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used);
2160         __kmp_str_buf_cat(&stat_path, "/", 1);
2161         stat_path_fixed_len = stat_path.used;
2162 
2163         task_entry = readdir(task_dir);
2164         while (task_entry != NULL) {
2165           // It is a directory and name starts with a digit.
2166           if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) {
2167             ++total_threads;
2168 
2169             // Consruct complete stat file path. Easiest way would be:
2170             //  __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str,
2171             //  task_entry->d_name );
2172             // but seriae of __kmp_str_buf_cat works a bit faster.
2173             stat_path.used =
2174                 stat_path_fixed_len; // Reset stat path to its fixed part.
2175             __kmp_str_buf_cat(&stat_path, task_entry->d_name,
2176                               KMP_STRLEN(task_entry->d_name));
2177             __kmp_str_buf_cat(&stat_path, "/stat", 5);
2178 
2179             // Note: Low-level API (open/read/close) is used. High-level API
2180             // (fopen/fclose)  works ~ 30 % slower.
2181             stat_file = open(stat_path.str, O_RDONLY);
2182             if (stat_file == -1) {
2183               // We cannot report an error because task (thread) can terminate
2184               // just before reading this file.
2185             } else {
2186               /* Content of "stat" file looks like:
2187                  24285 (program) S ...
2188 
2189                  It is a single line (if program name does not include funny
2190                  symbols). First number is a thread id, then name of executable
2191                  file name in paretheses, then state of the thread. We need just
2192                  thread state.
2193 
2194                  Good news: Length of program name is 15 characters max. Longer
2195                  names are truncated.
2196 
2197                  Thus, we need rather short buffer: 15 chars for program name +
2198                  2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
2199 
2200                  Bad news: Program name may contain special symbols like space,
2201                  closing parenthesis, or even new line. This makes parsing
2202                  "stat" file not 100 % reliable. In case of fanny program names
2203                  parsing may fail (report incorrect thread state).
2204 
2205                  Parsing "status" file looks more promissing (due to different
2206                  file structure and escaping special symbols) but reading and
2207                  parsing of "status" file works slower.
2208                   -- ln
2209               */
2210               char buffer[65];
2211               int len;
2212               len = read(stat_file, buffer, sizeof(buffer) - 1);
2213               if (len >= 0) {
2214                 buffer[len] = 0;
2215                 // Using scanf:
2216                 //     sscanf( buffer, "%*d (%*s) %c ", & state );
2217                 // looks very nice, but searching for a closing parenthesis
2218                 // works a bit faster.
2219                 char *close_parent = strstr(buffer, ") ");
2220                 if (close_parent != NULL) {
2221                   char state = *(close_parent + 2);
2222                   if (state == 'R') {
2223                     ++running_threads;
2224                     if (running_threads >= max) {
2225                       goto finish;
2226                     }; // if
2227                   }; // if
2228                 }; // if
2229               }; // if
2230               close(stat_file);
2231               stat_file = -1;
2232             }; // if
2233           }; // if
2234           task_entry = readdir(task_dir);
2235         }; // while
2236         closedir(task_dir);
2237         task_dir = NULL;
2238       }; // if
2239     }; // if
2240     proc_entry = readdir(proc_dir);
2241   }; // while
2242 
2243   // There _might_ be a timing hole where the thread executing this
2244   // code get skipped in the load balance, and running_threads is 0.
2245   // Assert in the debug builds only!!!
2246   KMP_DEBUG_ASSERT(running_threads > 0);
2247   if (running_threads <= 0) {
2248     running_threads = 1;
2249   }
2250 
2251 finish: // Clean up and exit.
2252   if (proc_dir != NULL) {
2253     closedir(proc_dir);
2254   }; // if
2255   __kmp_str_buf_free(&task_path);
2256   if (task_dir != NULL) {
2257     closedir(task_dir);
2258   }; // if
2259   __kmp_str_buf_free(&stat_path);
2260   if (stat_file != -1) {
2261     close(stat_file);
2262   }; // if
2263 
2264   glb_running_threads = running_threads;
2265 
2266   return running_threads;
2267 
2268 } // __kmp_get_load_balance
2269 
2270 #endif // KMP_OS_DARWIN
2271 
2272 #endif // USE_LOAD_BALANCE
2273 
2274 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
2275       ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64)
2276 
2277 // we really only need the case with 1 argument, because CLANG always build
2278 // a struct of pointers to shared variables referenced in the outlined function
2279 int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2280                            void *p_argv[]
2281 #if OMPT_SUPPORT
2282                            ,
2283                            void **exit_frame_ptr
2284 #endif
2285                            ) {
2286 #if OMPT_SUPPORT
2287   *exit_frame_ptr = __builtin_frame_address(0);
2288 #endif
2289 
2290   switch (argc) {
2291   default:
2292     fprintf(stderr, "Too many args to microtask: %d!\n", argc);
2293     fflush(stderr);
2294     exit(-1);
2295   case 0:
2296     (*pkfn)(&gtid, &tid);
2297     break;
2298   case 1:
2299     (*pkfn)(&gtid, &tid, p_argv[0]);
2300     break;
2301   case 2:
2302     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
2303     break;
2304   case 3:
2305     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
2306     break;
2307   case 4:
2308     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
2309     break;
2310   case 5:
2311     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
2312     break;
2313   case 6:
2314     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2315             p_argv[5]);
2316     break;
2317   case 7:
2318     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2319             p_argv[5], p_argv[6]);
2320     break;
2321   case 8:
2322     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2323             p_argv[5], p_argv[6], p_argv[7]);
2324     break;
2325   case 9:
2326     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2327             p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
2328     break;
2329   case 10:
2330     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2331             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
2332     break;
2333   case 11:
2334     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2335             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
2336     break;
2337   case 12:
2338     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2339             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2340             p_argv[11]);
2341     break;
2342   case 13:
2343     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2344             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2345             p_argv[11], p_argv[12]);
2346     break;
2347   case 14:
2348     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2349             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2350             p_argv[11], p_argv[12], p_argv[13]);
2351     break;
2352   case 15:
2353     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2354             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2355             p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
2356     break;
2357   }
2358 
2359 #if OMPT_SUPPORT
2360   *exit_frame_ptr = 0;
2361 #endif
2362 
2363   return 1;
2364 }
2365 
2366 #endif
2367 
2368 // end of file //
2369