1 /*
2  * z_Linux_util.cpp -- platform specific routines.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_affinity.h"
15 #include "kmp_i18n.h"
16 #include "kmp_io.h"
17 #include "kmp_itt.h"
18 #include "kmp_lock.h"
19 #include "kmp_stats.h"
20 #include "kmp_str.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
25 #include <alloca.h>
26 #endif
27 #include <math.h> // HUGE_VAL.
28 #include <sys/resource.h>
29 #include <sys/syscall.h>
30 #include <sys/time.h>
31 #include <sys/times.h>
32 #include <unistd.h>
33 
34 #if KMP_OS_LINUX
35 #include <sys/sysinfo.h>
36 #if KMP_USE_FUTEX
37 // We should really include <futex.h>, but that causes compatibility problems on
38 // different Linux* OS distributions that either require that you include (or
39 // break when you try to include) <pci/types.h>. Since all we need is the two
40 // macros below (which are part of the kernel ABI, so can't change) we just
41 // define the constants here and don't include <futex.h>
42 #ifndef FUTEX_WAIT
43 #define FUTEX_WAIT 0
44 #endif
45 #ifndef FUTEX_WAKE
46 #define FUTEX_WAKE 1
47 #endif
48 #endif
49 #elif KMP_OS_DARWIN
50 #include <mach/mach.h>
51 #include <sys/sysctl.h>
52 #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
53 #include <sys/types.h>
54 #include <sys/sysctl.h>
55 #include <sys/user.h>
56 #include <pthread_np.h>
57 #elif KMP_OS_NETBSD || KMP_OS_OPENBSD
58 #include <sys/types.h>
59 #include <sys/sysctl.h>
60 #endif
61 
62 #include <ctype.h>
63 #include <dirent.h>
64 #include <fcntl.h>
65 
66 #include "tsan_annotations.h"
67 
68 struct kmp_sys_timer {
69   struct timespec start;
70 };
71 
72 // Convert timespec to nanoseconds.
73 #define TS2NS(timespec)                                                        \
74   (((timespec).tv_sec * (long int)1e9) + (timespec).tv_nsec)
75 
76 static struct kmp_sys_timer __kmp_sys_timer_data;
77 
78 #if KMP_HANDLE_SIGNALS
79 typedef void (*sig_func_t)(int);
80 STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
81 static sigset_t __kmp_sigset;
82 #endif
83 
84 static int __kmp_init_runtime = FALSE;
85 
86 static int __kmp_fork_count = 0;
87 
88 static pthread_condattr_t __kmp_suspend_cond_attr;
89 static pthread_mutexattr_t __kmp_suspend_mutex_attr;
90 
91 static kmp_cond_align_t __kmp_wait_cv;
92 static kmp_mutex_align_t __kmp_wait_mx;
93 
94 kmp_uint64 __kmp_ticks_per_msec = 1000000;
95 
96 #ifdef DEBUG_SUSPEND
97 static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
98   KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
99                cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
100                cond->c_cond.__c_waiting);
101 }
102 #endif
103 
104 #if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED)
105 
106 /* Affinity support */
107 
108 void __kmp_affinity_bind_thread(int which) {
109   KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
110               "Illegal set affinity operation when not capable");
111 
112   kmp_affin_mask_t *mask;
113   KMP_CPU_ALLOC_ON_STACK(mask);
114   KMP_CPU_ZERO(mask);
115   KMP_CPU_SET(which, mask);
116   __kmp_set_system_affinity(mask, TRUE);
117   KMP_CPU_FREE_FROM_STACK(mask);
118 }
119 
120 /* Determine if we can access affinity functionality on this version of
121  * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
122  * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
123 void __kmp_affinity_determine_capable(const char *env_var) {
124 // Check and see if the OS supports thread affinity.
125 
126 #if KMP_OS_LINUX
127 #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
128 #elif KMP_OS_FREEBSD
129 #define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
130 #endif
131 
132 
133 #if KMP_OS_LINUX
134   // If Linux* OS:
135   // If the syscall fails or returns a suggestion for the size,
136   // then we don't have to search for an appropriate size.
137   long gCode;
138   long sCode;
139   unsigned char *buf;
140   buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
141   gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
142   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
143                 "initial getaffinity call returned %ld errno = %d\n",
144                 gCode, errno));
145 
146   // if ((gCode < 0) && (errno == ENOSYS))
147   if (gCode < 0) {
148     // System call not supported
149     if (__kmp_affinity_verbose ||
150         (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
151          (__kmp_affinity_type != affinity_default) &&
152          (__kmp_affinity_type != affinity_disabled))) {
153       int error = errno;
154       kmp_msg_t err_code = KMP_ERR(error);
155       __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
156                 err_code, __kmp_msg_null);
157       if (__kmp_generate_warnings == kmp_warnings_off) {
158         __kmp_str_free(&err_code.str);
159       }
160     }
161     KMP_AFFINITY_DISABLE();
162     KMP_INTERNAL_FREE(buf);
163     return;
164   }
165   if (gCode > 0) { // Linux* OS only
166     // The optimal situation: the OS returns the size of the buffer it expects.
167     //
168     // A verification of correct behavior is that setaffinity on a NULL
169     // buffer with the same size fails with errno set to EFAULT.
170     sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
171     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
172                   "setaffinity for mask size %ld returned %ld errno = %d\n",
173                   gCode, sCode, errno));
174     if (sCode < 0) {
175       if (errno == ENOSYS) {
176         if (__kmp_affinity_verbose ||
177             (__kmp_affinity_warnings &&
178              (__kmp_affinity_type != affinity_none) &&
179              (__kmp_affinity_type != affinity_default) &&
180              (__kmp_affinity_type != affinity_disabled))) {
181           int error = errno;
182           kmp_msg_t err_code = KMP_ERR(error);
183           __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
184                     err_code, __kmp_msg_null);
185           if (__kmp_generate_warnings == kmp_warnings_off) {
186             __kmp_str_free(&err_code.str);
187           }
188         }
189         KMP_AFFINITY_DISABLE();
190         KMP_INTERNAL_FREE(buf);
191       }
192       if (errno == EFAULT) {
193         KMP_AFFINITY_ENABLE(gCode);
194         KA_TRACE(10, ("__kmp_affinity_determine_capable: "
195                       "affinity supported (mask size %d)\n",
196                       (int)__kmp_affin_mask_size));
197         KMP_INTERNAL_FREE(buf);
198         return;
199       }
200     }
201   }
202 
203   // Call the getaffinity system call repeatedly with increasing set sizes
204   // until we succeed, or reach an upper bound on the search.
205   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
206                 "searching for proper set size\n"));
207   int size;
208   for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
209     gCode = syscall(__NR_sched_getaffinity, 0, size, buf);
210     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
211                   "getaffinity for mask size %ld returned %ld errno = %d\n",
212                   size, gCode, errno));
213 
214     if (gCode < 0) {
215       if (errno == ENOSYS) {
216         // We shouldn't get here
217         KA_TRACE(30, ("__kmp_affinity_determine_capable: "
218                       "inconsistent OS call behavior: errno == ENOSYS for mask "
219                       "size %d\n",
220                       size));
221         if (__kmp_affinity_verbose ||
222             (__kmp_affinity_warnings &&
223              (__kmp_affinity_type != affinity_none) &&
224              (__kmp_affinity_type != affinity_default) &&
225              (__kmp_affinity_type != affinity_disabled))) {
226           int error = errno;
227           kmp_msg_t err_code = KMP_ERR(error);
228           __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
229                     err_code, __kmp_msg_null);
230           if (__kmp_generate_warnings == kmp_warnings_off) {
231             __kmp_str_free(&err_code.str);
232           }
233         }
234         KMP_AFFINITY_DISABLE();
235         KMP_INTERNAL_FREE(buf);
236         return;
237       }
238       continue;
239     }
240 
241     sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
242     KA_TRACE(30, ("__kmp_affinity_determine_capable: "
243                   "setaffinity for mask size %ld returned %ld errno = %d\n",
244                   gCode, sCode, errno));
245     if (sCode < 0) {
246       if (errno == ENOSYS) { // Linux* OS only
247         // We shouldn't get here
248         KA_TRACE(30, ("__kmp_affinity_determine_capable: "
249                       "inconsistent OS call behavior: errno == ENOSYS for mask "
250                       "size %d\n",
251                       size));
252         if (__kmp_affinity_verbose ||
253             (__kmp_affinity_warnings &&
254              (__kmp_affinity_type != affinity_none) &&
255              (__kmp_affinity_type != affinity_default) &&
256              (__kmp_affinity_type != affinity_disabled))) {
257           int error = errno;
258           kmp_msg_t err_code = KMP_ERR(error);
259           __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
260                     err_code, __kmp_msg_null);
261           if (__kmp_generate_warnings == kmp_warnings_off) {
262             __kmp_str_free(&err_code.str);
263           }
264         }
265         KMP_AFFINITY_DISABLE();
266         KMP_INTERNAL_FREE(buf);
267         return;
268       }
269       if (errno == EFAULT) {
270         KMP_AFFINITY_ENABLE(gCode);
271         KA_TRACE(10, ("__kmp_affinity_determine_capable: "
272                       "affinity supported (mask size %d)\n",
273                       (int)__kmp_affin_mask_size));
274         KMP_INTERNAL_FREE(buf);
275         return;
276       }
277     }
278   }
279 #elif KMP_OS_FREEBSD
280   long gCode;
281   unsigned char *buf;
282   buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
283   gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT, reinterpret_cast<cpuset_t *>(buf));
284   KA_TRACE(30, ("__kmp_affinity_determine_capable: "
285                 "initial getaffinity call returned %d errno = %d\n",
286                 gCode, errno));
287   if (gCode == 0) {
288     KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
289     KA_TRACE(10, ("__kmp_affinity_determine_capable: "
290                   "affinity supported (mask size %d)\n",
291 		  (int)__kmp_affin_mask_size));
292     KMP_INTERNAL_FREE(buf);
293     return;
294   }
295 #endif
296   // save uncaught error code
297   // int error = errno;
298   KMP_INTERNAL_FREE(buf);
299   // restore uncaught error code, will be printed at the next KMP_WARNING below
300   // errno = error;
301 
302   // Affinity is not supported
303   KMP_AFFINITY_DISABLE();
304   KA_TRACE(10, ("__kmp_affinity_determine_capable: "
305                 "cannot determine mask size - affinity not supported\n"));
306   if (__kmp_affinity_verbose ||
307       (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
308        (__kmp_affinity_type != affinity_default) &&
309        (__kmp_affinity_type != affinity_disabled))) {
310     KMP_WARNING(AffCantGetMaskSize, env_var);
311   }
312 }
313 
314 #endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
315 
316 #if KMP_USE_FUTEX
317 
318 int __kmp_futex_determine_capable() {
319   int loc = 0;
320   long rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0);
321   int retval = (rc == 0) || (errno != ENOSYS);
322 
323   KA_TRACE(10,
324            ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno));
325   KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n",
326                 retval ? "" : " not"));
327 
328   return retval;
329 }
330 
331 #endif // KMP_USE_FUTEX
332 
333 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS)
334 /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
335    use compare_and_store for these routines */
336 
337 kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
338   kmp_int8 old_value, new_value;
339 
340   old_value = TCR_1(*p);
341   new_value = old_value | d;
342 
343   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
344     KMP_CPU_PAUSE();
345     old_value = TCR_1(*p);
346     new_value = old_value | d;
347   }
348   return old_value;
349 }
350 
351 kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
352   kmp_int8 old_value, new_value;
353 
354   old_value = TCR_1(*p);
355   new_value = old_value & d;
356 
357   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
358     KMP_CPU_PAUSE();
359     old_value = TCR_1(*p);
360     new_value = old_value & d;
361   }
362   return old_value;
363 }
364 
365 kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
366   kmp_uint32 old_value, new_value;
367 
368   old_value = TCR_4(*p);
369   new_value = old_value | d;
370 
371   while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
372     KMP_CPU_PAUSE();
373     old_value = TCR_4(*p);
374     new_value = old_value | d;
375   }
376   return old_value;
377 }
378 
379 kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
380   kmp_uint32 old_value, new_value;
381 
382   old_value = TCR_4(*p);
383   new_value = old_value & d;
384 
385   while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
386     KMP_CPU_PAUSE();
387     old_value = TCR_4(*p);
388     new_value = old_value & d;
389   }
390   return old_value;
391 }
392 
393 #if KMP_ARCH_X86
394 kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
395   kmp_int8 old_value, new_value;
396 
397   old_value = TCR_1(*p);
398   new_value = old_value + d;
399 
400   while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
401     KMP_CPU_PAUSE();
402     old_value = TCR_1(*p);
403     new_value = old_value + d;
404   }
405   return old_value;
406 }
407 
408 kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
409   kmp_int64 old_value, new_value;
410 
411   old_value = TCR_8(*p);
412   new_value = old_value + d;
413 
414   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
415     KMP_CPU_PAUSE();
416     old_value = TCR_8(*p);
417     new_value = old_value + d;
418   }
419   return old_value;
420 }
421 #endif /* KMP_ARCH_X86 */
422 
423 kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
424   kmp_uint64 old_value, new_value;
425 
426   old_value = TCR_8(*p);
427   new_value = old_value | d;
428   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
429     KMP_CPU_PAUSE();
430     old_value = TCR_8(*p);
431     new_value = old_value | d;
432   }
433   return old_value;
434 }
435 
436 kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
437   kmp_uint64 old_value, new_value;
438 
439   old_value = TCR_8(*p);
440   new_value = old_value & d;
441   while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
442     KMP_CPU_PAUSE();
443     old_value = TCR_8(*p);
444     new_value = old_value & d;
445   }
446   return old_value;
447 }
448 
449 #endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
450 
451 void __kmp_terminate_thread(int gtid) {
452   int status;
453   kmp_info_t *th = __kmp_threads[gtid];
454 
455   if (!th)
456     return;
457 
458 #ifdef KMP_CANCEL_THREADS
459   KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
460   status = pthread_cancel(th->th.th_info.ds.ds_thread);
461   if (status != 0 && status != ESRCH) {
462     __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status),
463                 __kmp_msg_null);
464   }
465 #endif
466   KMP_YIELD(TRUE);
467 } //
468 
469 /* Set thread stack info according to values returned by pthread_getattr_np().
470    If values are unreasonable, assume call failed and use incremental stack
471    refinement method instead. Returns TRUE if the stack parameters could be
472    determined exactly, FALSE if incremental refinement is necessary. */
473 static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
474   int stack_data;
475 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
476         KMP_OS_HURD
477   pthread_attr_t attr;
478   int status;
479   size_t size = 0;
480   void *addr = 0;
481 
482   /* Always do incremental stack refinement for ubermaster threads since the
483      initial thread stack range can be reduced by sibling thread creation so
484      pthread_attr_getstack may cause thread gtid aliasing */
485   if (!KMP_UBER_GTID(gtid)) {
486 
487     /* Fetch the real thread attributes */
488     status = pthread_attr_init(&attr);
489     KMP_CHECK_SYSFAIL("pthread_attr_init", status);
490 #if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
491     status = pthread_attr_get_np(pthread_self(), &attr);
492     KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
493 #else
494     status = pthread_getattr_np(pthread_self(), &attr);
495     KMP_CHECK_SYSFAIL("pthread_getattr_np", status);
496 #endif
497     status = pthread_attr_getstack(&attr, &addr, &size);
498     KMP_CHECK_SYSFAIL("pthread_attr_getstack", status);
499     KA_TRACE(60,
500              ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:"
501               " %lu, low addr: %p\n",
502               gtid, size, addr));
503     status = pthread_attr_destroy(&attr);
504     KMP_CHECK_SYSFAIL("pthread_attr_destroy", status);
505   }
506 
507   if (size != 0 && addr != 0) { // was stack parameter determination successful?
508     /* Store the correct base and size */
509     TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
510     TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
511     TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
512     return TRUE;
513   }
514 #endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
515               KMP_OS_HURD */
516   /* Use incremental refinement starting from initial conservative estimate */
517   TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
518   TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
519   TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
520   return FALSE;
521 }
522 
523 static void *__kmp_launch_worker(void *thr) {
524   int status, old_type, old_state;
525 #ifdef KMP_BLOCK_SIGNALS
526   sigset_t new_set, old_set;
527 #endif /* KMP_BLOCK_SIGNALS */
528   void *exit_val;
529 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
530         KMP_OS_OPENBSD || KMP_OS_HURD
531   void *volatile padding = 0;
532 #endif
533   int gtid;
534 
535   gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid;
536   __kmp_gtid_set_specific(gtid);
537 #ifdef KMP_TDATA_GTID
538   __kmp_gtid = gtid;
539 #endif
540 #if KMP_STATS_ENABLED
541   // set thread local index to point to thread-specific stats
542   __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
543   __kmp_stats_thread_ptr->startLife();
544   KMP_SET_THREAD_STATE(IDLE);
545   KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
546 #endif
547 
548 #if USE_ITT_BUILD
549   __kmp_itt_thread_name(gtid);
550 #endif /* USE_ITT_BUILD */
551 
552 #if KMP_AFFINITY_SUPPORTED
553   __kmp_affinity_set_init_mask(gtid, FALSE);
554 #endif
555 
556 #ifdef KMP_CANCEL_THREADS
557   status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
558   KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
559   // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
560   status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
561   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
562 #endif
563 
564 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
565   // Set FP control regs to be a copy of the parallel initialization thread's.
566   __kmp_clear_x87_fpu_status_word();
567   __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
568   __kmp_load_mxcsr(&__kmp_init_mxcsr);
569 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
570 
571 #ifdef KMP_BLOCK_SIGNALS
572   status = sigfillset(&new_set);
573   KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
574   status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set);
575   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
576 #endif /* KMP_BLOCK_SIGNALS */
577 
578 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
579         KMP_OS_OPENBSD
580   if (__kmp_stkoffset > 0 && gtid > 0) {
581     padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
582   }
583 #endif
584 
585   KMP_MB();
586   __kmp_set_stack_info(gtid, (kmp_info_t *)thr);
587 
588   __kmp_check_stack_overlap((kmp_info_t *)thr);
589 
590   exit_val = __kmp_launch_thread((kmp_info_t *)thr);
591 
592 #ifdef KMP_BLOCK_SIGNALS
593   status = pthread_sigmask(SIG_SETMASK, &old_set, NULL);
594   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
595 #endif /* KMP_BLOCK_SIGNALS */
596 
597   return exit_val;
598 }
599 
600 #if KMP_USE_MONITOR
601 /* The monitor thread controls all of the threads in the complex */
602 
603 static void *__kmp_launch_monitor(void *thr) {
604   int status, old_type, old_state;
605 #ifdef KMP_BLOCK_SIGNALS
606   sigset_t new_set;
607 #endif /* KMP_BLOCK_SIGNALS */
608   struct timespec interval;
609 
610   KMP_MB(); /* Flush all pending memory write invalidates.  */
611 
612   KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));
613 
614   /* register us as the monitor thread */
615   __kmp_gtid_set_specific(KMP_GTID_MONITOR);
616 #ifdef KMP_TDATA_GTID
617   __kmp_gtid = KMP_GTID_MONITOR;
618 #endif
619 
620   KMP_MB();
621 
622 #if USE_ITT_BUILD
623   // Instruct Intel(R) Threading Tools to ignore monitor thread.
624   __kmp_itt_thread_ignore();
625 #endif /* USE_ITT_BUILD */
626 
627   __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
628                        (kmp_info_t *)thr);
629 
630   __kmp_check_stack_overlap((kmp_info_t *)thr);
631 
632 #ifdef KMP_CANCEL_THREADS
633   status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
634   KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
635   // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
636   status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
637   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
638 #endif
639 
640 #if KMP_REAL_TIME_FIX
641   // This is a potential fix which allows application with real-time scheduling
642   // policy work. However, decision about the fix is not made yet, so it is
643   // disabled by default.
644   { // Are program started with real-time scheduling policy?
645     int sched = sched_getscheduler(0);
646     if (sched == SCHED_FIFO || sched == SCHED_RR) {
647       // Yes, we are a part of real-time application. Try to increase the
648       // priority of the monitor.
649       struct sched_param param;
650       int max_priority = sched_get_priority_max(sched);
651       int rc;
652       KMP_WARNING(RealTimeSchedNotSupported);
653       sched_getparam(0, &param);
654       if (param.sched_priority < max_priority) {
655         param.sched_priority += 1;
656         rc = sched_setscheduler(0, sched, &param);
657         if (rc != 0) {
658           int error = errno;
659           kmp_msg_t err_code = KMP_ERR(error);
660           __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
661                     err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
662           if (__kmp_generate_warnings == kmp_warnings_off) {
663             __kmp_str_free(&err_code.str);
664           }
665         }
666       } else {
667         // We cannot abort here, because number of CPUs may be enough for all
668         // the threads, including the monitor thread, so application could
669         // potentially work...
670         __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
671                   KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
672                   __kmp_msg_null);
673       }
674     }
675     // AC: free thread that waits for monitor started
676     TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
677   }
678 #endif // KMP_REAL_TIME_FIX
679 
680   KMP_MB(); /* Flush all pending memory write invalidates.  */
681 
682   if (__kmp_monitor_wakeups == 1) {
683     interval.tv_sec = 1;
684     interval.tv_nsec = 0;
685   } else {
686     interval.tv_sec = 0;
687     interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
688   }
689 
690   KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
691 
692   while (!TCR_4(__kmp_global.g.g_done)) {
693     struct timespec now;
694     struct timeval tval;
695 
696     /*  This thread monitors the state of the system */
697 
698     KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
699 
700     status = gettimeofday(&tval, NULL);
701     KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
702     TIMEVAL_TO_TIMESPEC(&tval, &now);
703 
704     now.tv_sec += interval.tv_sec;
705     now.tv_nsec += interval.tv_nsec;
706 
707     if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
708       now.tv_sec += 1;
709       now.tv_nsec -= KMP_NSEC_PER_SEC;
710     }
711 
712     status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
713     KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
714     // AC: the monitor should not fall asleep if g_done has been set
715     if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
716       status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
717                                       &__kmp_wait_mx.m_mutex, &now);
718       if (status != 0) {
719         if (status != ETIMEDOUT && status != EINTR) {
720           KMP_SYSFAIL("pthread_cond_timedwait", status);
721         }
722       }
723     }
724     status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
725     KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
726 
727     TCW_4(__kmp_global.g.g_time.dt.t_value,
728           TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
729 
730     KMP_MB(); /* Flush all pending memory write invalidates.  */
731   }
732 
733   KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));
734 
735 #ifdef KMP_BLOCK_SIGNALS
736   status = sigfillset(&new_set);
737   KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
738   status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
739   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
740 #endif /* KMP_BLOCK_SIGNALS */
741 
742   KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));
743 
744   if (__kmp_global.g.g_abort != 0) {
745     /* now we need to terminate the worker threads  */
746     /* the value of t_abort is the signal we caught */
747 
748     int gtid;
749 
750     KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
751                   __kmp_global.g.g_abort));
752 
753     /* terminate the OpenMP worker threads */
754     /* TODO this is not valid for sibling threads!!
755      * the uber master might not be 0 anymore.. */
756     for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
757       __kmp_terminate_thread(gtid);
758 
759     __kmp_cleanup();
760 
761     KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
762                   __kmp_global.g.g_abort));
763 
764     if (__kmp_global.g.g_abort > 0)
765       raise(__kmp_global.g.g_abort);
766   }
767 
768   KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));
769 
770   return thr;
771 }
772 #endif // KMP_USE_MONITOR
773 
774 void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
775   pthread_t handle;
776   pthread_attr_t thread_attr;
777   int status;
778 
779   th->th.th_info.ds.ds_gtid = gtid;
780 
781 #if KMP_STATS_ENABLED
782   // sets up worker thread stats
783   __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
784 
785   // th->th.th_stats is used to transfer thread-specific stats-pointer to
786   // __kmp_launch_worker. So when thread is created (goes into
787   // __kmp_launch_worker) it will set its thread local pointer to
788   // th->th.th_stats
789   if (!KMP_UBER_GTID(gtid)) {
790     th->th.th_stats = __kmp_stats_list->push_back(gtid);
791   } else {
792     // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(),
793     // so set the th->th.th_stats field to it.
794     th->th.th_stats = __kmp_stats_thread_ptr;
795   }
796   __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
797 
798 #endif // KMP_STATS_ENABLED
799 
800   if (KMP_UBER_GTID(gtid)) {
801     KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid));
802     th->th.th_info.ds.ds_thread = pthread_self();
803     __kmp_set_stack_info(gtid, th);
804     __kmp_check_stack_overlap(th);
805     return;
806   }
807 
808   KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
809 
810   KMP_MB(); /* Flush all pending memory write invalidates.  */
811 
812 #ifdef KMP_THREAD_ATTR
813   status = pthread_attr_init(&thread_attr);
814   if (status != 0) {
815     __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
816   }
817   status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
818   if (status != 0) {
819     __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null);
820   }
821 
822   /* Set stack size for this thread now.
823      The multiple of 2 is there because on some machines, requesting an unusual
824      stacksize causes the thread to have an offset before the dummy alloca()
825      takes place to create the offset.  Since we want the user to have a
826      sufficient stacksize AND support a stack offset, we alloca() twice the
827      offset so that the upcoming alloca() does not eliminate any premade offset,
828      and also gives the user the stack space they requested for all threads */
829   stack_size += gtid * __kmp_stkoffset * 2;
830 
831 #if defined(__ANDROID__) && __ANDROID_API__ < 19
832     // Round the stack size to a multiple of the page size. Older versions of
833     // Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL
834     // if the stack size was not a multiple of the page size.
835     stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
836 #endif
837 
838   KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
839                 "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
840                 gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
841 
842 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
843   status = pthread_attr_setstacksize(&thread_attr, stack_size);
844 #ifdef KMP_BACKUP_STKSIZE
845   if (status != 0) {
846     if (!__kmp_env_stksize) {
847       stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
848       __kmp_stksize = KMP_BACKUP_STKSIZE;
849       KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
850                     "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
851                     "bytes\n",
852                     gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
853       status = pthread_attr_setstacksize(&thread_attr, stack_size);
854     }
855   }
856 #endif /* KMP_BACKUP_STKSIZE */
857   if (status != 0) {
858     __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
859                 KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null);
860   }
861 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
862 
863 #endif /* KMP_THREAD_ATTR */
864 
865   status =
866       pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th);
867   if (status != 0 || !handle) { // ??? Why do we check handle??
868 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
869     if (status == EINVAL) {
870       __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
871                   KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null);
872     }
873     if (status == ENOMEM) {
874       __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
875                   KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null);
876     }
877 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
878     if (status == EAGAIN) {
879       __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status),
880                   KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null);
881     }
882     KMP_SYSFAIL("pthread_create", status);
883   }
884 
885   th->th.th_info.ds.ds_thread = handle;
886 
887 #ifdef KMP_THREAD_ATTR
888   status = pthread_attr_destroy(&thread_attr);
889   if (status) {
890     kmp_msg_t err_code = KMP_ERR(status);
891     __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
892               __kmp_msg_null);
893     if (__kmp_generate_warnings == kmp_warnings_off) {
894       __kmp_str_free(&err_code.str);
895     }
896   }
897 #endif /* KMP_THREAD_ATTR */
898 
899   KMP_MB(); /* Flush all pending memory write invalidates.  */
900 
901   KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
902 
903 } // __kmp_create_worker
904 
905 #if KMP_USE_MONITOR
906 void __kmp_create_monitor(kmp_info_t *th) {
907   pthread_t handle;
908   pthread_attr_t thread_attr;
909   size_t size;
910   int status;
911   int auto_adj_size = FALSE;
912 
913   if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
914     // We don't need monitor thread in case of MAX_BLOCKTIME
915     KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
916                   "MAX blocktime\n"));
917     th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
918     th->th.th_info.ds.ds_gtid = 0;
919     return;
920   }
921   KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
922 
923   KMP_MB(); /* Flush all pending memory write invalidates.  */
924 
925   th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
926   th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
927 #if KMP_REAL_TIME_FIX
928   TCW_4(__kmp_global.g.g_time.dt.t_value,
929         -1); // Will use it for synchronization a bit later.
930 #else
931   TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
932 #endif // KMP_REAL_TIME_FIX
933 
934 #ifdef KMP_THREAD_ATTR
935   if (__kmp_monitor_stksize == 0) {
936     __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
937     auto_adj_size = TRUE;
938   }
939   status = pthread_attr_init(&thread_attr);
940   if (status != 0) {
941     __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
942   }
943   status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
944   if (status != 0) {
945     __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
946   }
947 
948 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
949   status = pthread_attr_getstacksize(&thread_attr, &size);
950   KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
951 #else
952   size = __kmp_sys_min_stksize;
953 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
954 #endif /* KMP_THREAD_ATTR */
955 
956   if (__kmp_monitor_stksize == 0) {
957     __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
958   }
959   if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
960     __kmp_monitor_stksize = __kmp_sys_min_stksize;
961   }
962 
963   KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
964                 "requested stacksize = %lu bytes\n",
965                 size, __kmp_monitor_stksize));
966 
967 retry:
968 
969 /* Set stack size for this thread now. */
970 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
971   KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
972                 __kmp_monitor_stksize));
973   status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
974   if (status != 0) {
975     if (auto_adj_size) {
976       __kmp_monitor_stksize *= 2;
977       goto retry;
978     }
979     kmp_msg_t err_code = KMP_ERR(status);
980     __kmp_msg(kmp_ms_warning, // should this be fatal?  BB
981               KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
982               err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
983     if (__kmp_generate_warnings == kmp_warnings_off) {
984       __kmp_str_free(&err_code.str);
985     }
986   }
987 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
988 
989   status =
990       pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);
991 
992   if (status != 0) {
993 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
994     if (status == EINVAL) {
995       if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
996         __kmp_monitor_stksize *= 2;
997         goto retry;
998       }
999       __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1000                   KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
1001                   __kmp_msg_null);
1002     }
1003     if (status == ENOMEM) {
1004       __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1005                   KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
1006                   __kmp_msg_null);
1007     }
1008 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
1009     if (status == EAGAIN) {
1010       __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
1011                   KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
1012     }
1013     KMP_SYSFAIL("pthread_create", status);
1014   }
1015 
1016   th->th.th_info.ds.ds_thread = handle;
1017 
1018 #if KMP_REAL_TIME_FIX
1019   // Wait for the monitor thread is really started and set its *priority*.
1020   KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
1021                    sizeof(__kmp_global.g.g_time.dt.t_value));
1022   __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
1023                &__kmp_neq_4, NULL);
1024 #endif // KMP_REAL_TIME_FIX
1025 
1026 #ifdef KMP_THREAD_ATTR
1027   status = pthread_attr_destroy(&thread_attr);
1028   if (status != 0) {
1029     kmp_msg_t err_code = KMP_ERR(status);
1030     __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
1031               __kmp_msg_null);
1032     if (__kmp_generate_warnings == kmp_warnings_off) {
1033       __kmp_str_free(&err_code.str);
1034     }
1035   }
1036 #endif
1037 
1038   KMP_MB(); /* Flush all pending memory write invalidates.  */
1039 
1040   KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
1041                 th->th.th_info.ds.ds_thread));
1042 
1043 } // __kmp_create_monitor
1044 #endif // KMP_USE_MONITOR
1045 
1046 void __kmp_exit_thread(int exit_status) {
1047   pthread_exit((void *)(intptr_t)exit_status);
1048 } // __kmp_exit_thread
1049 
1050 #if KMP_USE_MONITOR
1051 void __kmp_resume_monitor();
1052 
1053 void __kmp_reap_monitor(kmp_info_t *th) {
1054   int status;
1055   void *exit_val;
1056 
1057   KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
1058                 " %#.8lx\n",
1059                 th->th.th_info.ds.ds_thread));
1060 
1061   // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1062   // If both tid and gtid are 0, it means the monitor did not ever start.
1063   // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1064   KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1065   if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1066     KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1067     return;
1068   }
1069 
1070   KMP_MB(); /* Flush all pending memory write invalidates.  */
1071 
1072   /* First, check to see whether the monitor thread exists to wake it up. This
1073      is to avoid performance problem when the monitor sleeps during
1074      blocktime-size interval */
1075 
1076   status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
1077   if (status != ESRCH) {
1078     __kmp_resume_monitor(); // Wake up the monitor thread
1079   }
1080   KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
1081   status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1082   if (exit_val != th) {
1083     __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
1084   }
1085 
1086   th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1087   th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1088 
1089   KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
1090                 " %#.8lx\n",
1091                 th->th.th_info.ds.ds_thread));
1092 
1093   KMP_MB(); /* Flush all pending memory write invalidates.  */
1094 }
1095 #endif // KMP_USE_MONITOR
1096 
1097 void __kmp_reap_worker(kmp_info_t *th) {
1098   int status;
1099   void *exit_val;
1100 
1101   KMP_MB(); /* Flush all pending memory write invalidates.  */
1102 
1103   KA_TRACE(
1104       10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid));
1105 
1106   status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1107 #ifdef KMP_DEBUG
1108   /* Don't expose these to the user until we understand when they trigger */
1109   if (status != 0) {
1110     __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null);
1111   }
1112   if (exit_val != th) {
1113     KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, "
1114                   "exit_val = %p\n",
1115                   th->th.th_info.ds.ds_gtid, exit_val));
1116   }
1117 #endif /* KMP_DEBUG */
1118 
1119   KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
1120                 th->th.th_info.ds.ds_gtid));
1121 
1122   KMP_MB(); /* Flush all pending memory write invalidates.  */
1123 }
1124 
1125 #if KMP_HANDLE_SIGNALS
1126 
1127 static void __kmp_null_handler(int signo) {
1128   //  Do nothing, for doing SIG_IGN-type actions.
1129 } // __kmp_null_handler
1130 
1131 static void __kmp_team_handler(int signo) {
1132   if (__kmp_global.g.g_abort == 0) {
1133 /* Stage 1 signal handler, let's shut down all of the threads */
1134 #ifdef KMP_DEBUG
1135     __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo);
1136 #endif
1137     switch (signo) {
1138     case SIGHUP:
1139     case SIGINT:
1140     case SIGQUIT:
1141     case SIGILL:
1142     case SIGABRT:
1143     case SIGFPE:
1144     case SIGBUS:
1145     case SIGSEGV:
1146 #ifdef SIGSYS
1147     case SIGSYS:
1148 #endif
1149     case SIGTERM:
1150       if (__kmp_debug_buf) {
1151         __kmp_dump_debug_buffer();
1152       }
1153       __kmp_unregister_library(); // cleanup shared memory
1154       KMP_MB(); // Flush all pending memory write invalidates.
1155       TCW_4(__kmp_global.g.g_abort, signo);
1156       KMP_MB(); // Flush all pending memory write invalidates.
1157       TCW_4(__kmp_global.g.g_done, TRUE);
1158       KMP_MB(); // Flush all pending memory write invalidates.
1159       break;
1160     default:
1161 #ifdef KMP_DEBUG
1162       __kmp_debug_printf("__kmp_team_handler: unknown signal type");
1163 #endif
1164       break;
1165     }
1166   }
1167 } // __kmp_team_handler
1168 
1169 static void __kmp_sigaction(int signum, const struct sigaction *act,
1170                             struct sigaction *oldact) {
1171   int rc = sigaction(signum, act, oldact);
1172   KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc);
1173 }
1174 
1175 static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
1176                                       int parallel_init) {
1177   KMP_MB(); // Flush all pending memory write invalidates.
1178   KB_TRACE(60,
1179            ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init));
1180   if (parallel_init) {
1181     struct sigaction new_action;
1182     struct sigaction old_action;
1183     new_action.sa_handler = handler_func;
1184     new_action.sa_flags = 0;
1185     sigfillset(&new_action.sa_mask);
1186     __kmp_sigaction(sig, &new_action, &old_action);
1187     if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) {
1188       sigaddset(&__kmp_sigset, sig);
1189     } else {
1190       // Restore/keep user's handler if one previously installed.
1191       __kmp_sigaction(sig, &old_action, NULL);
1192     }
1193   } else {
1194     // Save initial/system signal handlers to see if user handlers installed.
1195     __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]);
1196   }
1197   KMP_MB(); // Flush all pending memory write invalidates.
1198 } // __kmp_install_one_handler
1199 
1200 static void __kmp_remove_one_handler(int sig) {
1201   KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig));
1202   if (sigismember(&__kmp_sigset, sig)) {
1203     struct sigaction old;
1204     KMP_MB(); // Flush all pending memory write invalidates.
1205     __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old);
1206     if ((old.sa_handler != __kmp_team_handler) &&
1207         (old.sa_handler != __kmp_null_handler)) {
1208       // Restore the users signal handler.
1209       KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1210                     "restoring: sig=%d\n",
1211                     sig));
1212       __kmp_sigaction(sig, &old, NULL);
1213     }
1214     sigdelset(&__kmp_sigset, sig);
1215     KMP_MB(); // Flush all pending memory write invalidates.
1216   }
1217 } // __kmp_remove_one_handler
1218 
1219 void __kmp_install_signals(int parallel_init) {
1220   KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init));
1221   if (__kmp_handle_signals || !parallel_init) {
1222     // If ! parallel_init, we do not install handlers, just save original
1223     // handlers. Let us do it even __handle_signals is 0.
1224     sigemptyset(&__kmp_sigset);
1225     __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init);
1226     __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1227     __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init);
1228     __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1229     __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1230     __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1231     __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init);
1232     __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1233 #ifdef SIGSYS
1234     __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init);
1235 #endif // SIGSYS
1236     __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1237 #ifdef SIGPIPE
1238     __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init);
1239 #endif // SIGPIPE
1240   }
1241 } // __kmp_install_signals
1242 
1243 void __kmp_remove_signals(void) {
1244   int sig;
1245   KB_TRACE(10, ("__kmp_remove_signals()\n"));
1246   for (sig = 1; sig < NSIG; ++sig) {
1247     __kmp_remove_one_handler(sig);
1248   }
1249 } // __kmp_remove_signals
1250 
1251 #endif // KMP_HANDLE_SIGNALS
1252 
1253 void __kmp_enable(int new_state) {
1254 #ifdef KMP_CANCEL_THREADS
1255   int status, old_state;
1256   status = pthread_setcancelstate(new_state, &old_state);
1257   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1258   KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE);
1259 #endif
1260 }
1261 
1262 void __kmp_disable(int *old_state) {
1263 #ifdef KMP_CANCEL_THREADS
1264   int status;
1265   status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state);
1266   KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1267 #endif
1268 }
1269 
1270 static void __kmp_atfork_prepare(void) {
1271   __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
1272   __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1273 }
1274 
1275 static void __kmp_atfork_parent(void) {
1276   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1277   __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1278 }
1279 
1280 /* Reset the library so execution in the child starts "all over again" with
1281    clean data structures in initial states.  Don't worry about freeing memory
1282    allocated by parent, just abandon it to be safe. */
1283 static void __kmp_atfork_child(void) {
1284   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1285   __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1286   /* TODO make sure this is done right for nested/sibling */
1287   // ATT:  Memory leaks are here? TODO: Check it and fix.
1288   /* KMP_ASSERT( 0 ); */
1289 
1290   ++__kmp_fork_count;
1291 
1292 #if KMP_AFFINITY_SUPPORTED
1293 #if KMP_OS_LINUX || KMP_OS_FREEBSD
1294   // reset the affinity in the child to the initial thread
1295   // affinity in the parent
1296   kmp_set_thread_affinity_mask_initial();
1297 #endif
1298   // Set default not to bind threads tightly in the child (we’re expecting
1299   // over-subscription after the fork and this can improve things for
1300   // scripting languages that use OpenMP inside process-parallel code).
1301   __kmp_affinity_type = affinity_none;
1302   if (__kmp_nested_proc_bind.bind_types != NULL) {
1303     __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
1304   }
1305 #endif // KMP_AFFINITY_SUPPORTED
1306 
1307 #if KMP_USE_MONITOR
1308   __kmp_init_monitor = 0;
1309 #endif
1310   __kmp_init_parallel = FALSE;
1311   __kmp_init_middle = FALSE;
1312   __kmp_init_serial = FALSE;
1313   TCW_4(__kmp_init_gtid, FALSE);
1314   __kmp_init_common = FALSE;
1315 
1316   TCW_4(__kmp_init_user_locks, FALSE);
1317 #if !KMP_USE_DYNAMIC_LOCK
1318   __kmp_user_lock_table.used = 1;
1319   __kmp_user_lock_table.allocated = 0;
1320   __kmp_user_lock_table.table = NULL;
1321   __kmp_lock_blocks = NULL;
1322 #endif
1323 
1324   __kmp_all_nth = 0;
1325   TCW_4(__kmp_nth, 0);
1326 
1327   __kmp_thread_pool = NULL;
1328   __kmp_thread_pool_insert_pt = NULL;
1329   __kmp_team_pool = NULL;
1330 
1331   /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
1332      here so threadprivate doesn't use stale data */
1333   KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
1334                 __kmp_threadpriv_cache_list));
1335 
1336   while (__kmp_threadpriv_cache_list != NULL) {
1337 
1338     if (*__kmp_threadpriv_cache_list->addr != NULL) {
1339       KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
1340                     &(*__kmp_threadpriv_cache_list->addr)));
1341 
1342       *__kmp_threadpriv_cache_list->addr = NULL;
1343     }
1344     __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
1345   }
1346 
1347   __kmp_init_runtime = FALSE;
1348 
1349   /* reset statically initialized locks */
1350   __kmp_init_bootstrap_lock(&__kmp_initz_lock);
1351   __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
1352   __kmp_init_bootstrap_lock(&__kmp_console_lock);
1353   __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
1354 
1355 #if USE_ITT_BUILD
1356   __kmp_itt_reset(); // reset ITT's global state
1357 #endif /* USE_ITT_BUILD */
1358 
1359   __kmp_serial_initialize();
1360 
1361   /* This is necessary to make sure no stale data is left around */
1362   /* AC: customers complain that we use unsafe routines in the atfork
1363      handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
1364      in dynamic_link when check the presence of shared tbbmalloc library.
1365      Suggestion is to make the library initialization lazier, similar
1366      to what done for __kmpc_begin(). */
1367   // TODO: synchronize all static initializations with regular library
1368   //       startup; look at kmp_global.cpp and etc.
1369   //__kmp_internal_begin ();
1370 }
1371 
1372 void __kmp_register_atfork(void) {
1373   if (__kmp_need_register_atfork) {
1374     int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
1375                                 __kmp_atfork_child);
1376     KMP_CHECK_SYSFAIL("pthread_atfork", status);
1377     __kmp_need_register_atfork = FALSE;
1378   }
1379 }
1380 
1381 void __kmp_suspend_initialize(void) {
1382   int status;
1383   status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr);
1384   KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1385   status = pthread_condattr_init(&__kmp_suspend_cond_attr);
1386   KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1387 }
1388 
1389 void __kmp_suspend_initialize_thread(kmp_info_t *th) {
1390   ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count);
1391   int old_value = KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count);
1392   int new_value = __kmp_fork_count + 1;
1393   // Return if already initialized
1394   if (old_value == new_value)
1395     return;
1396   // Wait, then return if being initialized
1397   if (old_value == -1 ||
1398       !__kmp_atomic_compare_store(&th->th.th_suspend_init_count, old_value,
1399                                   -1)) {
1400     while (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) != new_value) {
1401       KMP_CPU_PAUSE();
1402     }
1403   } else {
1404     // Claim to be the initializer and do initializations
1405     int status;
1406     status = pthread_cond_init(&th->th.th_suspend_cv.c_cond,
1407                                &__kmp_suspend_cond_attr);
1408     KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1409     status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex,
1410                                 &__kmp_suspend_mutex_attr);
1411     KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1412     KMP_ATOMIC_ST_REL(&th->th.th_suspend_init_count, new_value);
1413     ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count);
1414   }
1415 }
1416 
1417 void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
1418   if (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) > __kmp_fork_count) {
1419     /* this means we have initialize the suspension pthread objects for this
1420        thread in this instance of the process */
1421     int status;
1422 
1423     status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond);
1424     if (status != 0 && status != EBUSY) {
1425       KMP_SYSFAIL("pthread_cond_destroy", status);
1426     }
1427     status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex);
1428     if (status != 0 && status != EBUSY) {
1429       KMP_SYSFAIL("pthread_mutex_destroy", status);
1430     }
1431     --th->th.th_suspend_init_count;
1432     KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count) ==
1433                      __kmp_fork_count);
1434   }
1435 }
1436 
1437 // return true if lock obtained, false otherwise
1438 int __kmp_try_suspend_mx(kmp_info_t *th) {
1439   return (pthread_mutex_trylock(&th->th.th_suspend_mx.m_mutex) == 0);
1440 }
1441 
1442 void __kmp_lock_suspend_mx(kmp_info_t *th) {
1443   int status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1444   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1445 }
1446 
1447 void __kmp_unlock_suspend_mx(kmp_info_t *th) {
1448   int status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1449   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1450 }
1451 
1452 /* This routine puts the calling thread to sleep after setting the
1453    sleep bit for the indicated flag variable to true. */
1454 template <class C>
1455 static inline void __kmp_suspend_template(int th_gtid, C *flag) {
1456   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
1457   kmp_info_t *th = __kmp_threads[th_gtid];
1458   int status;
1459   typename C::flag_t old_spin;
1460 
1461   KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid,
1462                 flag->get()));
1463 
1464   __kmp_suspend_initialize_thread(th);
1465 
1466   __kmp_lock_suspend_mx(th);
1467 
1468   KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
1469                 th_gtid, flag->get()));
1470 
1471   /* TODO: shouldn't this use release semantics to ensure that
1472      __kmp_suspend_initialize_thread gets called first? */
1473   old_spin = flag->set_sleeping();
1474   if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1475       __kmp_pause_status != kmp_soft_paused) {
1476     flag->unset_sleeping();
1477     __kmp_unlock_suspend_mx(th);
1478     return;
1479   }
1480   KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
1481                " was %x\n",
1482                th_gtid, flag->get(), flag->load(), old_spin));
1483 
1484   if (flag->done_check_val(old_spin)) {
1485     old_spin = flag->unset_sleeping();
1486     KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
1487                  "for spin(%p)\n",
1488                  th_gtid, flag->get()));
1489   } else {
1490     /* Encapsulate in a loop as the documentation states that this may
1491        "with low probability" return when the condition variable has
1492        not been signaled or broadcast */
1493     int deactivated = FALSE;
1494     TCW_PTR(th->th.th_sleep_loc, (void *)flag);
1495 
1496     while (flag->is_sleeping()) {
1497 #ifdef DEBUG_SUSPEND
1498       char buffer[128];
1499       __kmp_suspend_count++;
1500       __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1501       __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
1502                    buffer);
1503 #endif
1504       // Mark the thread as no longer active (only in the first iteration of the
1505       // loop).
1506       if (!deactivated) {
1507         th->th.th_active = FALSE;
1508         if (th->th.th_active_in_pool) {
1509           th->th.th_active_in_pool = FALSE;
1510           KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
1511           KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
1512         }
1513         deactivated = TRUE;
1514       }
1515 
1516 #if USE_SUSPEND_TIMEOUT
1517       struct timespec now;
1518       struct timeval tval;
1519       int msecs;
1520 
1521       status = gettimeofday(&tval, NULL);
1522       KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1523       TIMEVAL_TO_TIMESPEC(&tval, &now);
1524 
1525       msecs = (4 * __kmp_dflt_blocktime) + 200;
1526       now.tv_sec += msecs / 1000;
1527       now.tv_nsec += (msecs % 1000) * 1000;
1528 
1529       KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
1530                     "pthread_cond_timedwait\n",
1531                     th_gtid));
1532       status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond,
1533                                       &th->th.th_suspend_mx.m_mutex, &now);
1534 #else
1535       KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform"
1536                     " pthread_cond_wait\n",
1537                     th_gtid));
1538       status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
1539                                  &th->th.th_suspend_mx.m_mutex);
1540 #endif // USE_SUSPEND_TIMEOUT
1541 
1542       if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
1543         KMP_SYSFAIL("pthread_cond_wait", status);
1544       }
1545 #ifdef KMP_DEBUG
1546       if (status == ETIMEDOUT) {
1547         if (flag->is_sleeping()) {
1548           KF_TRACE(100,
1549                    ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
1550         } else {
1551           KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
1552                        "not set!\n",
1553                        th_gtid));
1554         }
1555       } else if (flag->is_sleeping()) {
1556         KF_TRACE(100,
1557                  ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
1558       }
1559 #endif
1560     } // while
1561 
1562     // Mark the thread as active again (if it was previous marked as inactive)
1563     if (deactivated) {
1564       th->th.th_active = TRUE;
1565       if (TCR_4(th->th.th_in_pool)) {
1566         KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
1567         th->th.th_active_in_pool = TRUE;
1568       }
1569     }
1570   }
1571 #ifdef DEBUG_SUSPEND
1572   {
1573     char buffer[128];
1574     __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1575     __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
1576                  buffer);
1577   }
1578 #endif
1579 
1580   __kmp_unlock_suspend_mx(th);
1581   KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
1582 }
1583 
1584 template <bool C, bool S>
1585 void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
1586   __kmp_suspend_template(th_gtid, flag);
1587 }
1588 template <bool C, bool S>
1589 void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
1590   __kmp_suspend_template(th_gtid, flag);
1591 }
1592 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
1593   __kmp_suspend_template(th_gtid, flag);
1594 }
1595 
1596 template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
1597 template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
1598 template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
1599 
1600 /* This routine signals the thread specified by target_gtid to wake up
1601    after setting the sleep bit indicated by the flag argument to FALSE.
1602    The target thread must already have called __kmp_suspend_template() */
1603 template <class C>
1604 static inline void __kmp_resume_template(int target_gtid, C *flag) {
1605   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1606   kmp_info_t *th = __kmp_threads[target_gtid];
1607   int status;
1608 
1609 #ifdef KMP_DEBUG
1610   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1611 #endif
1612 
1613   KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
1614                 gtid, target_gtid));
1615   KMP_DEBUG_ASSERT(gtid != target_gtid);
1616 
1617   __kmp_suspend_initialize_thread(th);
1618 
1619   __kmp_lock_suspend_mx(th);
1620 
1621   if (!flag) { // coming from __kmp_null_resume_wrapper
1622     flag = (C *)CCAST(void *, th->th.th_sleep_loc);
1623   }
1624 
1625   // First, check if the flag is null or its type has changed. If so, someone
1626   // else woke it up.
1627   if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
1628     // simply shows what flag was cast to
1629     KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1630                  "awake: flag(%p)\n",
1631                  gtid, target_gtid, NULL));
1632     __kmp_unlock_suspend_mx(th);
1633     return;
1634   } else { // if multiple threads are sleeping, flag should be internally
1635     // referring to a specific thread here
1636     typename C::flag_t old_spin = flag->unset_sleeping();
1637     if (!flag->is_sleeping_val(old_spin)) {
1638       KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1639                    "awake: flag(%p): "
1640                    "%u => %u\n",
1641                    gtid, target_gtid, flag->get(), old_spin, flag->load()));
1642       __kmp_unlock_suspend_mx(th);
1643       return;
1644     }
1645     KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
1646                  "sleep bit for flag's loc(%p): "
1647                  "%u => %u\n",
1648                  gtid, target_gtid, flag->get(), old_spin, flag->load()));
1649   }
1650   TCW_PTR(th->th.th_sleep_loc, NULL);
1651 
1652 #ifdef DEBUG_SUSPEND
1653   {
1654     char buffer[128];
1655     __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1656     __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
1657                  target_gtid, buffer);
1658   }
1659 #endif
1660   status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
1661   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1662   __kmp_unlock_suspend_mx(th);
1663   KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
1664                 " for T#%d\n",
1665                 gtid, target_gtid));
1666 }
1667 
1668 template <bool C, bool S>
1669 void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
1670   __kmp_resume_template(target_gtid, flag);
1671 }
1672 template <bool C, bool S>
1673 void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
1674   __kmp_resume_template(target_gtid, flag);
1675 }
1676 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
1677   __kmp_resume_template(target_gtid, flag);
1678 }
1679 
1680 template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
1681 template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
1682 
1683 #if KMP_USE_MONITOR
1684 void __kmp_resume_monitor() {
1685   KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1686   int status;
1687 #ifdef KMP_DEBUG
1688   int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1689   KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
1690                 KMP_GTID_MONITOR));
1691   KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
1692 #endif
1693   status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
1694   KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1695 #ifdef DEBUG_SUSPEND
1696   {
1697     char buffer[128];
1698     __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
1699     __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
1700                  KMP_GTID_MONITOR, buffer);
1701   }
1702 #endif
1703   status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
1704   KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1705   status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
1706   KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1707   KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
1708                 " for T#%d\n",
1709                 gtid, KMP_GTID_MONITOR));
1710 }
1711 #endif // KMP_USE_MONITOR
1712 
1713 void __kmp_yield() { sched_yield(); }
1714 
1715 void __kmp_gtid_set_specific(int gtid) {
1716   if (__kmp_init_gtid) {
1717     int status;
1718     status = pthread_setspecific(__kmp_gtid_threadprivate_key,
1719                                  (void *)(intptr_t)(gtid + 1));
1720     KMP_CHECK_SYSFAIL("pthread_setspecific", status);
1721   } else {
1722     KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
1723   }
1724 }
1725 
1726 int __kmp_gtid_get_specific() {
1727   int gtid;
1728   if (!__kmp_init_gtid) {
1729     KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
1730                   "KMP_GTID_SHUTDOWN\n"));
1731     return KMP_GTID_SHUTDOWN;
1732   }
1733   gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key);
1734   if (gtid == 0) {
1735     gtid = KMP_GTID_DNE;
1736   } else {
1737     gtid--;
1738   }
1739   KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
1740                 __kmp_gtid_threadprivate_key, gtid));
1741   return gtid;
1742 }
1743 
1744 double __kmp_read_cpu_time(void) {
1745   /*clock_t   t;*/
1746   struct tms buffer;
1747 
1748   /*t =*/times(&buffer);
1749 
1750   return (double)(buffer.tms_utime + buffer.tms_cutime) /
1751          (double)CLOCKS_PER_SEC;
1752 }
1753 
1754 int __kmp_read_system_info(struct kmp_sys_info *info) {
1755   int status;
1756   struct rusage r_usage;
1757 
1758   memset(info, 0, sizeof(*info));
1759 
1760   status = getrusage(RUSAGE_SELF, &r_usage);
1761   KMP_CHECK_SYSFAIL_ERRNO("getrusage", status);
1762 
1763   // The maximum resident set size utilized (in kilobytes)
1764   info->maxrss = r_usage.ru_maxrss;
1765   // The number of page faults serviced without any I/O
1766   info->minflt = r_usage.ru_minflt;
1767   // The number of page faults serviced that required I/O
1768   info->majflt = r_usage.ru_majflt;
1769   // The number of times a process was "swapped" out of memory
1770   info->nswap = r_usage.ru_nswap;
1771   // The number of times the file system had to perform input
1772   info->inblock = r_usage.ru_inblock;
1773   // The number of times the file system had to perform output
1774   info->oublock = r_usage.ru_oublock;
1775   // The number of times a context switch was voluntarily
1776   info->nvcsw = r_usage.ru_nvcsw;
1777   // The number of times a context switch was forced
1778   info->nivcsw = r_usage.ru_nivcsw;
1779 
1780   return (status != 0);
1781 }
1782 
1783 void __kmp_read_system_time(double *delta) {
1784   double t_ns;
1785   struct timeval tval;
1786   struct timespec stop;
1787   int status;
1788 
1789   status = gettimeofday(&tval, NULL);
1790   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1791   TIMEVAL_TO_TIMESPEC(&tval, &stop);
1792   t_ns = (double)(TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start));
1793   *delta = (t_ns * 1e-9);
1794 }
1795 
1796 void __kmp_clear_system_time(void) {
1797   struct timeval tval;
1798   int status;
1799   status = gettimeofday(&tval, NULL);
1800   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1801   TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start);
1802 }
1803 
1804 static int __kmp_get_xproc(void) {
1805 
1806   int r = 0;
1807 
1808 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
1809         KMP_OS_OPENBSD || KMP_OS_HURD
1810 
1811   __kmp_type_convert(sysconf(_SC_NPROCESSORS_ONLN), &(r));
1812 
1813 #elif KMP_OS_DARWIN
1814 
1815   // Bug C77011 High "OpenMP Threads and number of active cores".
1816 
1817   // Find the number of available CPUs.
1818   kern_return_t rc;
1819   host_basic_info_data_t info;
1820   mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT;
1821   rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num);
1822   if (rc == 0 && num == HOST_BASIC_INFO_COUNT) {
1823     // Cannot use KA_TRACE() here because this code works before trace support
1824     // is initialized.
1825     r = info.avail_cpus;
1826   } else {
1827     KMP_WARNING(CantGetNumAvailCPU);
1828     KMP_INFORM(AssumedNumCPU);
1829   }
1830 
1831 #else
1832 
1833 #error "Unknown or unsupported OS."
1834 
1835 #endif
1836 
1837   return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
1838 
1839 } // __kmp_get_xproc
1840 
1841 int __kmp_read_from_file(char const *path, char const *format, ...) {
1842   int result;
1843   va_list args;
1844 
1845   va_start(args, format);
1846   FILE *f = fopen(path, "rb");
1847   if (f == NULL)
1848     return 0;
1849   result = vfscanf(f, format, args);
1850   fclose(f);
1851 
1852   return result;
1853 }
1854 
1855 void __kmp_runtime_initialize(void) {
1856   int status;
1857   pthread_mutexattr_t mutex_attr;
1858   pthread_condattr_t cond_attr;
1859 
1860   if (__kmp_init_runtime) {
1861     return;
1862   }
1863 
1864 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1865   if (!__kmp_cpuinfo.initialized) {
1866     __kmp_query_cpuid(&__kmp_cpuinfo);
1867   }
1868 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1869 
1870   __kmp_xproc = __kmp_get_xproc();
1871 
1872 #if ! KMP_32_BIT_ARCH
1873   struct rlimit rlim;
1874   // read stack size of calling thread, save it as default for worker threads;
1875   // this should be done before reading environment variables
1876   status = getrlimit(RLIMIT_STACK, &rlim);
1877   if (status == 0) { // success?
1878     __kmp_stksize = rlim.rlim_cur;
1879     __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
1880   }
1881 #endif /* KMP_32_BIT_ARCH */
1882 
1883   if (sysconf(_SC_THREADS)) {
1884 
1885     /* Query the maximum number of threads */
1886     __kmp_type_convert(sysconf(_SC_THREAD_THREADS_MAX), &(__kmp_sys_max_nth));
1887     if (__kmp_sys_max_nth == -1) {
1888       /* Unlimited threads for NPTL */
1889       __kmp_sys_max_nth = INT_MAX;
1890     } else if (__kmp_sys_max_nth <= 1) {
1891       /* Can't tell, just use PTHREAD_THREADS_MAX */
1892       __kmp_sys_max_nth = KMP_MAX_NTH;
1893     }
1894 
1895     /* Query the minimum stack size */
1896     __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN);
1897     if (__kmp_sys_min_stksize <= 1) {
1898       __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
1899     }
1900   }
1901 
1902   /* Set up minimum number of threads to switch to TLS gtid */
1903   __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
1904 
1905   status = pthread_key_create(&__kmp_gtid_threadprivate_key,
1906                               __kmp_internal_end_dest);
1907   KMP_CHECK_SYSFAIL("pthread_key_create", status);
1908   status = pthread_mutexattr_init(&mutex_attr);
1909   KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1910   status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr);
1911   KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1912   status = pthread_condattr_init(&cond_attr);
1913   KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1914   status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr);
1915   KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1916 #if USE_ITT_BUILD
1917   __kmp_itt_initialize();
1918 #endif /* USE_ITT_BUILD */
1919 
1920   __kmp_init_runtime = TRUE;
1921 }
1922 
1923 void __kmp_runtime_destroy(void) {
1924   int status;
1925 
1926   if (!__kmp_init_runtime) {
1927     return; // Nothing to do.
1928   }
1929 
1930 #if USE_ITT_BUILD
1931   __kmp_itt_destroy();
1932 #endif /* USE_ITT_BUILD */
1933 
1934   status = pthread_key_delete(__kmp_gtid_threadprivate_key);
1935   KMP_CHECK_SYSFAIL("pthread_key_delete", status);
1936 
1937   status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex);
1938   if (status != 0 && status != EBUSY) {
1939     KMP_SYSFAIL("pthread_mutex_destroy", status);
1940   }
1941   status = pthread_cond_destroy(&__kmp_wait_cv.c_cond);
1942   if (status != 0 && status != EBUSY) {
1943     KMP_SYSFAIL("pthread_cond_destroy", status);
1944   }
1945 #if KMP_AFFINITY_SUPPORTED
1946   __kmp_affinity_uninitialize();
1947 #endif
1948 
1949   __kmp_init_runtime = FALSE;
1950 }
1951 
1952 /* Put the thread to sleep for a time period */
1953 /* NOTE: not currently used anywhere */
1954 void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); }
1955 
1956 /* Calculate the elapsed wall clock time for the user */
1957 void __kmp_elapsed(double *t) {
1958   int status;
1959 #ifdef FIX_SGI_CLOCK
1960   struct timespec ts;
1961 
1962   status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
1963   KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status);
1964   *t =
1965       (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec;
1966 #else
1967   struct timeval tv;
1968 
1969   status = gettimeofday(&tv, NULL);
1970   KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1971   *t =
1972       (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec;
1973 #endif
1974 }
1975 
1976 /* Calculate the elapsed wall clock tick for the user */
1977 void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
1978 
1979 /* Return the current time stamp in nsec */
1980 kmp_uint64 __kmp_now_nsec() {
1981   struct timeval t;
1982   gettimeofday(&t, NULL);
1983   kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
1984                     (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
1985   return nsec;
1986 }
1987 
1988 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1989 /* Measure clock ticks per millisecond */
1990 void __kmp_initialize_system_tick() {
1991   kmp_uint64 now, nsec2, diff;
1992   kmp_uint64 delay = 100000; // 50~100 usec on most machines.
1993   kmp_uint64 nsec = __kmp_now_nsec();
1994   kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
1995   while ((now = __kmp_hardware_timestamp()) < goal)
1996     ;
1997   nsec2 = __kmp_now_nsec();
1998   diff = nsec2 - nsec;
1999   if (diff > 0) {
2000     kmp_uint64 tpms = ((kmp_uint64)1e6 * (delay + (now - goal)) / diff);
2001     if (tpms > 0)
2002       __kmp_ticks_per_msec = tpms;
2003   }
2004 }
2005 #endif
2006 
2007 /* Determine whether the given address is mapped into the current address
2008    space. */
2009 
2010 int __kmp_is_address_mapped(void *addr) {
2011 
2012   int found = 0;
2013   int rc;
2014 
2015 #if KMP_OS_LINUX || KMP_OS_HURD
2016 
2017   /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address
2018      ranges mapped into the address space. */
2019 
2020   char *name = __kmp_str_format("/proc/%d/maps", getpid());
2021   FILE *file = NULL;
2022 
2023   file = fopen(name, "r");
2024   KMP_ASSERT(file != NULL);
2025 
2026   for (;;) {
2027 
2028     void *beginning = NULL;
2029     void *ending = NULL;
2030     char perms[5];
2031 
2032     rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms);
2033     if (rc == EOF) {
2034       break;
2035     }
2036     KMP_ASSERT(rc == 3 &&
2037                KMP_STRLEN(perms) == 4); // Make sure all fields are read.
2038 
2039     // Ending address is not included in the region, but beginning is.
2040     if ((addr >= beginning) && (addr < ending)) {
2041       perms[2] = 0; // 3th and 4th character does not matter.
2042       if (strcmp(perms, "rw") == 0) {
2043         // Memory we are looking for should be readable and writable.
2044         found = 1;
2045       }
2046       break;
2047     }
2048   }
2049 
2050   // Free resources.
2051   fclose(file);
2052   KMP_INTERNAL_FREE(name);
2053 #elif KMP_OS_FREEBSD
2054   char *buf;
2055   size_t lstsz;
2056   int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
2057   rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
2058   if (rc < 0)
2059      return 0;
2060   // We pass from number of vm entry's semantic
2061   // to size of whole entry map list.
2062   lstsz = lstsz * 4 / 3;
2063   buf = reinterpret_cast<char *>(kmpc_malloc(lstsz));
2064   rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
2065   if (rc < 0) {
2066      kmpc_free(buf);
2067      return 0;
2068   }
2069 
2070   char *lw = buf;
2071   char *up = buf + lstsz;
2072 
2073   while (lw < up) {
2074       struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
2075       size_t cursz = cur->kve_structsize;
2076       if (cursz == 0)
2077           break;
2078       void *start = reinterpret_cast<void *>(cur->kve_start);
2079       void *end = reinterpret_cast<void *>(cur->kve_end);
2080       // Readable/Writable addresses within current map entry
2081       if ((addr >= start) && (addr < end)) {
2082           if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
2083               (cur->kve_protection & KVME_PROT_WRITE) != 0) {
2084               found = 1;
2085               break;
2086           }
2087       }
2088       lw += cursz;
2089   }
2090   kmpc_free(buf);
2091 
2092 #elif KMP_OS_DARWIN
2093 
2094   /* On OS X*, /proc pseudo filesystem is not available. Try to read memory
2095      using vm interface. */
2096 
2097   int buffer;
2098   vm_size_t count;
2099   rc = vm_read_overwrite(
2100       mach_task_self(), // Task to read memory of.
2101       (vm_address_t)(addr), // Address to read from.
2102       1, // Number of bytes to be read.
2103       (vm_address_t)(&buffer), // Address of buffer to save read bytes in.
2104       &count // Address of var to save number of read bytes in.
2105       );
2106   if (rc == 0) {
2107     // Memory successfully read.
2108     found = 1;
2109   }
2110 
2111 #elif KMP_OS_NETBSD
2112 
2113   int mib[5];
2114   mib[0] = CTL_VM;
2115   mib[1] = VM_PROC;
2116   mib[2] = VM_PROC_MAP;
2117   mib[3] = getpid();
2118   mib[4] = sizeof(struct kinfo_vmentry);
2119 
2120   size_t size;
2121   rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
2122   KMP_ASSERT(!rc);
2123   KMP_ASSERT(size);
2124 
2125   size = size * 4 / 3;
2126   struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
2127   KMP_ASSERT(kiv);
2128 
2129   rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
2130   KMP_ASSERT(!rc);
2131   KMP_ASSERT(size);
2132 
2133   for (size_t i = 0; i < size; i++) {
2134     if (kiv[i].kve_start >= (uint64_t)addr &&
2135         kiv[i].kve_end <= (uint64_t)addr) {
2136       found = 1;
2137       break;
2138     }
2139   }
2140   KMP_INTERNAL_FREE(kiv);
2141 #elif KMP_OS_OPENBSD
2142 
2143   int mib[3];
2144   mib[0] = CTL_KERN;
2145   mib[1] = KERN_PROC_VMMAP;
2146   mib[2] = getpid();
2147 
2148   size_t size;
2149   uint64_t end;
2150   rc = sysctl(mib, 3, NULL, &size, NULL, 0);
2151   KMP_ASSERT(!rc);
2152   KMP_ASSERT(size);
2153   end = size;
2154 
2155   struct kinfo_vmentry kiv = {.kve_start = 0};
2156 
2157   while ((rc = sysctl(mib, 3, &kiv, &size, NULL, 0)) == 0) {
2158     KMP_ASSERT(size);
2159     if (kiv.kve_end == end)
2160       break;
2161 
2162     if (kiv.kve_start >= (uint64_t)addr && kiv.kve_end <= (uint64_t)addr) {
2163       found = 1;
2164       break;
2165     }
2166     kiv.kve_start += 1;
2167   }
2168 #elif KMP_OS_DRAGONFLY
2169 
2170   // FIXME(DragonFly): Implement this
2171   found = 1;
2172 
2173 #else
2174 
2175 #error "Unknown or unsupported OS"
2176 
2177 #endif
2178 
2179   return found;
2180 
2181 } // __kmp_is_address_mapped
2182 
2183 #ifdef USE_LOAD_BALANCE
2184 
2185 #if KMP_OS_DARWIN || KMP_OS_NETBSD
2186 
2187 // The function returns the rounded value of the system load average
2188 // during given time interval which depends on the value of
2189 // __kmp_load_balance_interval variable (default is 60 sec, other values
2190 // may be 300 sec or 900 sec).
2191 // It returns -1 in case of error.
2192 int __kmp_get_load_balance(int max) {
2193   double averages[3];
2194   int ret_avg = 0;
2195 
2196   int res = getloadavg(averages, 3);
2197 
2198   // Check __kmp_load_balance_interval to determine which of averages to use.
2199   // getloadavg() may return the number of samples less than requested that is
2200   // less than 3.
2201   if (__kmp_load_balance_interval < 180 && (res >= 1)) {
2202     ret_avg = (int)averages[0]; // 1 min
2203   } else if ((__kmp_load_balance_interval >= 180 &&
2204               __kmp_load_balance_interval < 600) &&
2205              (res >= 2)) {
2206     ret_avg = (int)averages[1]; // 5 min
2207   } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
2208     ret_avg = (int)averages[2]; // 15 min
2209   } else { // Error occurred
2210     return -1;
2211   }
2212 
2213   return ret_avg;
2214 }
2215 
2216 #else // Linux* OS
2217 
2218 // The function returns number of running (not sleeping) threads, or -1 in case
2219 // of error. Error could be reported if Linux* OS kernel too old (without
2220 // "/proc" support). Counting running threads stops if max running threads
2221 // encountered.
2222 int __kmp_get_load_balance(int max) {
2223   static int permanent_error = 0;
2224   static int glb_running_threads = 0; // Saved count of the running threads for
2225   // the thread balance algorithm
2226   static double glb_call_time = 0; /* Thread balance algorithm call time */
2227 
2228   int running_threads = 0; // Number of running threads in the system.
2229 
2230   DIR *proc_dir = NULL; // Handle of "/proc/" directory.
2231   struct dirent *proc_entry = NULL;
2232 
2233   kmp_str_buf_t task_path; // "/proc/<pid>/task/<tid>/" path.
2234   DIR *task_dir = NULL; // Handle of "/proc/<pid>/task/<tid>/" directory.
2235   struct dirent *task_entry = NULL;
2236   int task_path_fixed_len;
2237 
2238   kmp_str_buf_t stat_path; // "/proc/<pid>/task/<tid>/stat" path.
2239   int stat_file = -1;
2240   int stat_path_fixed_len;
2241 
2242   int total_processes = 0; // Total number of processes in system.
2243   int total_threads = 0; // Total number of threads in system.
2244 
2245   double call_time = 0.0;
2246 
2247   __kmp_str_buf_init(&task_path);
2248   __kmp_str_buf_init(&stat_path);
2249 
2250   __kmp_elapsed(&call_time);
2251 
2252   if (glb_call_time &&
2253       (call_time - glb_call_time < __kmp_load_balance_interval)) {
2254     running_threads = glb_running_threads;
2255     goto finish;
2256   }
2257 
2258   glb_call_time = call_time;
2259 
2260   // Do not spend time on scanning "/proc/" if we have a permanent error.
2261   if (permanent_error) {
2262     running_threads = -1;
2263     goto finish;
2264   }
2265 
2266   if (max <= 0) {
2267     max = INT_MAX;
2268   }
2269 
2270   // Open "/proc/" directory.
2271   proc_dir = opendir("/proc");
2272   if (proc_dir == NULL) {
2273     // Cannot open "/prroc/". Probably the kernel does not support it. Return an
2274     // error now and in subsequent calls.
2275     running_threads = -1;
2276     permanent_error = 1;
2277     goto finish;
2278   }
2279 
2280   // Initialize fixed part of task_path. This part will not change.
2281   __kmp_str_buf_cat(&task_path, "/proc/", 6);
2282   task_path_fixed_len = task_path.used; // Remember number of used characters.
2283 
2284   proc_entry = readdir(proc_dir);
2285   while (proc_entry != NULL) {
2286     // Proc entry is a directory and name starts with a digit. Assume it is a
2287     // process' directory.
2288     if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) {
2289 
2290       ++total_processes;
2291       // Make sure init process is the very first in "/proc", so we can replace
2292       // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes ==
2293       // 1. We are going to check that total_processes == 1 => d_name == "1" is
2294       // true (where "=>" is implication). Since C++ does not have => operator,
2295       // let us replace it with its equivalent: a => b == ! a || b.
2296       KMP_DEBUG_ASSERT(total_processes != 1 ||
2297                        strcmp(proc_entry->d_name, "1") == 0);
2298 
2299       // Construct task_path.
2300       task_path.used = task_path_fixed_len; // Reset task_path to "/proc/".
2301       __kmp_str_buf_cat(&task_path, proc_entry->d_name,
2302                         KMP_STRLEN(proc_entry->d_name));
2303       __kmp_str_buf_cat(&task_path, "/task", 5);
2304 
2305       task_dir = opendir(task_path.str);
2306       if (task_dir == NULL) {
2307         // Process can finish between reading "/proc/" directory entry and
2308         // opening process' "task/" directory. So, in general case we should not
2309         // complain, but have to skip this process and read the next one. But on
2310         // systems with no "task/" support we will spend lot of time to scan
2311         // "/proc/" tree again and again without any benefit. "init" process
2312         // (its pid is 1) should exist always, so, if we cannot open
2313         // "/proc/1/task/" directory, it means "task/" is not supported by
2314         // kernel. Report an error now and in the future.
2315         if (strcmp(proc_entry->d_name, "1") == 0) {
2316           running_threads = -1;
2317           permanent_error = 1;
2318           goto finish;
2319         }
2320       } else {
2321         // Construct fixed part of stat file path.
2322         __kmp_str_buf_clear(&stat_path);
2323         __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used);
2324         __kmp_str_buf_cat(&stat_path, "/", 1);
2325         stat_path_fixed_len = stat_path.used;
2326 
2327         task_entry = readdir(task_dir);
2328         while (task_entry != NULL) {
2329           // It is a directory and name starts with a digit.
2330           if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) {
2331             ++total_threads;
2332 
2333             // Construct complete stat file path. Easiest way would be:
2334             //  __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str,
2335             //  task_entry->d_name );
2336             // but seriae of __kmp_str_buf_cat works a bit faster.
2337             stat_path.used =
2338                 stat_path_fixed_len; // Reset stat path to its fixed part.
2339             __kmp_str_buf_cat(&stat_path, task_entry->d_name,
2340                               KMP_STRLEN(task_entry->d_name));
2341             __kmp_str_buf_cat(&stat_path, "/stat", 5);
2342 
2343             // Note: Low-level API (open/read/close) is used. High-level API
2344             // (fopen/fclose)  works ~ 30 % slower.
2345             stat_file = open(stat_path.str, O_RDONLY);
2346             if (stat_file == -1) {
2347               // We cannot report an error because task (thread) can terminate
2348               // just before reading this file.
2349             } else {
2350               /* Content of "stat" file looks like:
2351                  24285 (program) S ...
2352 
2353                  It is a single line (if program name does not include funny
2354                  symbols). First number is a thread id, then name of executable
2355                  file name in paretheses, then state of the thread. We need just
2356                  thread state.
2357 
2358                  Good news: Length of program name is 15 characters max. Longer
2359                  names are truncated.
2360 
2361                  Thus, we need rather short buffer: 15 chars for program name +
2362                  2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
2363 
2364                  Bad news: Program name may contain special symbols like space,
2365                  closing parenthesis, or even new line. This makes parsing
2366                  "stat" file not 100 % reliable. In case of fanny program names
2367                  parsing may fail (report incorrect thread state).
2368 
2369                  Parsing "status" file looks more promissing (due to different
2370                  file structure and escaping special symbols) but reading and
2371                  parsing of "status" file works slower.
2372                   -- ln
2373               */
2374               char buffer[65];
2375               ssize_t len;
2376               len = read(stat_file, buffer, sizeof(buffer) - 1);
2377               if (len >= 0) {
2378                 buffer[len] = 0;
2379                 // Using scanf:
2380                 //     sscanf( buffer, "%*d (%*s) %c ", & state );
2381                 // looks very nice, but searching for a closing parenthesis
2382                 // works a bit faster.
2383                 char *close_parent = strstr(buffer, ") ");
2384                 if (close_parent != NULL) {
2385                   char state = *(close_parent + 2);
2386                   if (state == 'R') {
2387                     ++running_threads;
2388                     if (running_threads >= max) {
2389                       goto finish;
2390                     }
2391                   }
2392                 }
2393               }
2394               close(stat_file);
2395               stat_file = -1;
2396             }
2397           }
2398           task_entry = readdir(task_dir);
2399         }
2400         closedir(task_dir);
2401         task_dir = NULL;
2402       }
2403     }
2404     proc_entry = readdir(proc_dir);
2405   }
2406 
2407   // There _might_ be a timing hole where the thread executing this
2408   // code get skipped in the load balance, and running_threads is 0.
2409   // Assert in the debug builds only!!!
2410   KMP_DEBUG_ASSERT(running_threads > 0);
2411   if (running_threads <= 0) {
2412     running_threads = 1;
2413   }
2414 
2415 finish: // Clean up and exit.
2416   if (proc_dir != NULL) {
2417     closedir(proc_dir);
2418   }
2419   __kmp_str_buf_free(&task_path);
2420   if (task_dir != NULL) {
2421     closedir(task_dir);
2422   }
2423   __kmp_str_buf_free(&stat_path);
2424   if (stat_file != -1) {
2425     close(stat_file);
2426   }
2427 
2428   glb_running_threads = running_threads;
2429 
2430   return running_threads;
2431 
2432 } // __kmp_get_load_balance
2433 
2434 #endif // KMP_OS_DARWIN
2435 
2436 #endif // USE_LOAD_BALANCE
2437 
2438 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
2439       ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) ||                 \
2440       KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
2441 
2442 // we really only need the case with 1 argument, because CLANG always build
2443 // a struct of pointers to shared variables referenced in the outlined function
2444 int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2445                            void *p_argv[]
2446 #if OMPT_SUPPORT
2447                            ,
2448                            void **exit_frame_ptr
2449 #endif
2450                            ) {
2451 #if OMPT_SUPPORT
2452   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2453 #endif
2454 
2455   switch (argc) {
2456   default:
2457     fprintf(stderr, "Too many args to microtask: %d!\n", argc);
2458     fflush(stderr);
2459     exit(-1);
2460   case 0:
2461     (*pkfn)(&gtid, &tid);
2462     break;
2463   case 1:
2464     (*pkfn)(&gtid, &tid, p_argv[0]);
2465     break;
2466   case 2:
2467     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
2468     break;
2469   case 3:
2470     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
2471     break;
2472   case 4:
2473     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
2474     break;
2475   case 5:
2476     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
2477     break;
2478   case 6:
2479     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2480             p_argv[5]);
2481     break;
2482   case 7:
2483     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2484             p_argv[5], p_argv[6]);
2485     break;
2486   case 8:
2487     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2488             p_argv[5], p_argv[6], p_argv[7]);
2489     break;
2490   case 9:
2491     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2492             p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
2493     break;
2494   case 10:
2495     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2496             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
2497     break;
2498   case 11:
2499     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2500             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
2501     break;
2502   case 12:
2503     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2504             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2505             p_argv[11]);
2506     break;
2507   case 13:
2508     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2509             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2510             p_argv[11], p_argv[12]);
2511     break;
2512   case 14:
2513     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2514             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2515             p_argv[11], p_argv[12], p_argv[13]);
2516     break;
2517   case 15:
2518     (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2519             p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2520             p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
2521     break;
2522   }
2523 
2524   return 1;
2525 }
2526 
2527 #endif
2528 
2529 // end of file //
2530