1 /*
2  * kmp_gsupport.cpp
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_atomic.h"
15 
16 #if OMPT_SUPPORT
17 #include "ompt-specific.h"
18 #endif
19 
20 enum {
21   KMP_GOMP_TASK_UNTIED_FLAG = 1,
22   KMP_GOMP_TASK_FINAL_FLAG = 2,
23   KMP_GOMP_TASK_DEPENDS_FLAG = 8
24 };
25 
26 enum {
27   KMP_GOMP_DEPOBJ_IN = 1,
28   KMP_GOMP_DEPOBJ_OUT = 2,
29   KMP_GOMP_DEPOBJ_INOUT = 3,
30   KMP_GOMP_DEPOBJ_MTXINOUTSET = 4
31 };
32 
33 // This class helps convert gomp dependency info into
34 // kmp_depend_info_t structures
35 class kmp_gomp_depends_info_t {
36   void **depend;
37   kmp_int32 num_deps;
38   size_t num_out, num_mutexinout, num_in, num_depobj;
39   size_t offset;
40 
41 public:
42   kmp_gomp_depends_info_t(void **depend) : depend(depend) {
43     size_t ndeps = (kmp_intptr_t)depend[0];
44     // GOMP taskdep structure:
45     // if depend[0] != 0:
46     // depend =  [ ndeps | nout | &out | ... | &out | &in | ... | &in ]
47     //
48     // if depend[0] == 0:
49     // depend = [ 0 | ndeps | nout | nmtx | nin | &out | ... | &out | &mtx |
50     //            ... | &mtx | &in   | ...  | &in  | &depobj | ... | &depobj ]
51     if (ndeps) {
52       num_out = (kmp_intptr_t)depend[1];
53       num_in = ndeps - num_out;
54       num_mutexinout = num_depobj = 0;
55       offset = 2;
56     } else {
57       ndeps = (kmp_intptr_t)depend[1];
58       num_out = (kmp_intptr_t)depend[2];
59       num_mutexinout = (kmp_intptr_t)depend[3];
60       num_in = (kmp_intptr_t)depend[4];
61       num_depobj = ndeps - num_out - num_mutexinout - num_in;
62       KMP_ASSERT(num_depobj <= ndeps);
63       offset = 5;
64     }
65     num_deps = static_cast<kmp_int32>(ndeps);
66   }
67   kmp_int32 get_num_deps() const { return num_deps; }
68   kmp_depend_info_t get_kmp_depend(size_t index) const {
69     kmp_depend_info_t retval;
70     memset(&retval, '\0', sizeof(retval));
71     KMP_ASSERT(index < (size_t)num_deps);
72     retval.len = 0;
73     // Because inout and out are logically equivalent,
74     // use inout and in dependency flags. GOMP does not provide a
75     // way to distinguish if user specified out vs. inout.
76     if (index < num_out) {
77       retval.flags.in = 1;
78       retval.flags.out = 1;
79       retval.base_addr = (kmp_intptr_t)depend[offset + index];
80     } else if (index >= num_out && index < (num_out + num_mutexinout)) {
81       retval.flags.mtx = 1;
82       retval.base_addr = (kmp_intptr_t)depend[offset + index];
83     } else if (index >= (num_out + num_mutexinout) &&
84                index < (num_out + num_mutexinout + num_in)) {
85       retval.flags.in = 1;
86       retval.base_addr = (kmp_intptr_t)depend[offset + index];
87     } else {
88       // depobj is a two element array (size of elements are size of pointer)
89       // depobj[0] = base_addr
90       // depobj[1] = type (in, out, inout, mutexinoutset, etc.)
91       kmp_intptr_t *depobj = (kmp_intptr_t *)depend[offset + index];
92       retval.base_addr = depobj[0];
93       switch (depobj[1]) {
94       case KMP_GOMP_DEPOBJ_IN:
95         retval.flags.in = 1;
96         break;
97       case KMP_GOMP_DEPOBJ_OUT:
98         retval.flags.out = 1;
99         break;
100       case KMP_GOMP_DEPOBJ_INOUT:
101         retval.flags.in = 1;
102         retval.flags.out = 1;
103         break;
104       case KMP_GOMP_DEPOBJ_MTXINOUTSET:
105         retval.flags.mtx = 1;
106         break;
107       default:
108         KMP_FATAL(GompFeatureNotSupported, "Unknown depobj type");
109       }
110     }
111     return retval;
112   }
113 };
114 
115 #ifdef __cplusplus
116 extern "C" {
117 #endif // __cplusplus
118 
119 #define MKLOC(loc, routine)                                                    \
120   static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
121 
122 #include "kmp_ftn_os.h"
123 
124 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {
125   int gtid = __kmp_entry_gtid();
126   MKLOC(loc, "GOMP_barrier");
127   KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
128 #if OMPT_SUPPORT && OMPT_OPTIONAL
129   ompt_frame_t *ompt_frame;
130   if (ompt_enabled.enabled) {
131     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
132     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
133   }
134   OMPT_STORE_RETURN_ADDRESS(gtid);
135 #endif
136   __kmpc_barrier(&loc, gtid);
137 #if OMPT_SUPPORT && OMPT_OPTIONAL
138   if (ompt_enabled.enabled) {
139     ompt_frame->enter_frame = ompt_data_none;
140   }
141 #endif
142 }
143 
144 // Mutual exclusion
145 
146 // The symbol that icc/ifort generates for unnamed for unnamed critical sections
147 // - .gomp_critical_user_ - is defined using .comm in any objects reference it.
148 // We can't reference it directly here in C code, as the symbol contains a ".".
149 //
150 // The RTL contains an assembly language definition of .gomp_critical_user_
151 // with another symbol __kmp_unnamed_critical_addr initialized with it's
152 // address.
153 extern kmp_critical_name *__kmp_unnamed_critical_addr;
154 
155 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) {
156   int gtid = __kmp_entry_gtid();
157   MKLOC(loc, "GOMP_critical_start");
158   KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));
159 #if OMPT_SUPPORT && OMPT_OPTIONAL
160   OMPT_STORE_RETURN_ADDRESS(gtid);
161 #endif
162   __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);
163 }
164 
165 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) {
166   int gtid = __kmp_get_gtid();
167   MKLOC(loc, "GOMP_critical_end");
168   KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));
169 #if OMPT_SUPPORT && OMPT_OPTIONAL
170   OMPT_STORE_RETURN_ADDRESS(gtid);
171 #endif
172   __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);
173 }
174 
175 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) {
176   int gtid = __kmp_entry_gtid();
177   MKLOC(loc, "GOMP_critical_name_start");
178   KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));
179   __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);
180 }
181 
182 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) {
183   int gtid = __kmp_get_gtid();
184   MKLOC(loc, "GOMP_critical_name_end");
185   KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));
186   __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);
187 }
188 
189 // The Gnu codegen tries to use locked operations to perform atomic updates
190 // inline.  If it can't, then it calls GOMP_atomic_start() before performing
191 // the update and GOMP_atomic_end() afterward, regardless of the data type.
192 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) {
193   int gtid = __kmp_entry_gtid();
194   KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));
195 
196 #if OMPT_SUPPORT
197   __ompt_thread_assign_wait_id(0);
198 #endif
199 
200   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
201 }
202 
203 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) {
204   int gtid = __kmp_get_gtid();
205   KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid));
206   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
207 }
208 
209 int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) {
210   int gtid = __kmp_entry_gtid();
211   MKLOC(loc, "GOMP_single_start");
212   KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));
213 
214   if (!TCR_4(__kmp_init_parallel))
215     __kmp_parallel_initialize();
216   __kmp_resume_if_soft_paused();
217 
218   // 3rd parameter == FALSE prevents kmp_enter_single from pushing a
219   // workshare when USE_CHECKS is defined.  We need to avoid the push,
220   // as there is no corresponding GOMP_single_end() call.
221   kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);
222 
223 #if OMPT_SUPPORT && OMPT_OPTIONAL
224   kmp_info_t *this_thr = __kmp_threads[gtid];
225   kmp_team_t *team = this_thr->th.th_team;
226   int tid = __kmp_tid_from_gtid(gtid);
227 
228   if (ompt_enabled.enabled) {
229     if (rc) {
230       if (ompt_enabled.ompt_callback_work) {
231         ompt_callbacks.ompt_callback(ompt_callback_work)(
232             ompt_work_single_executor, ompt_scope_begin,
233             &(team->t.ompt_team_info.parallel_data),
234             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
235             1, OMPT_GET_RETURN_ADDRESS(0));
236       }
237     } else {
238       if (ompt_enabled.ompt_callback_work) {
239         ompt_callbacks.ompt_callback(ompt_callback_work)(
240             ompt_work_single_other, ompt_scope_begin,
241             &(team->t.ompt_team_info.parallel_data),
242             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
243             1, OMPT_GET_RETURN_ADDRESS(0));
244         ompt_callbacks.ompt_callback(ompt_callback_work)(
245             ompt_work_single_other, ompt_scope_end,
246             &(team->t.ompt_team_info.parallel_data),
247             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
248             1, OMPT_GET_RETURN_ADDRESS(0));
249       }
250     }
251   }
252 #endif
253 
254   return rc;
255 }
256 
257 void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
258   void *retval;
259   int gtid = __kmp_entry_gtid();
260   MKLOC(loc, "GOMP_single_copy_start");
261   KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));
262 
263   if (!TCR_4(__kmp_init_parallel))
264     __kmp_parallel_initialize();
265   __kmp_resume_if_soft_paused();
266 
267   // If this is the first thread to enter, return NULL.  The generated code will
268   // then call GOMP_single_copy_end() for this thread only, with the
269   // copyprivate data pointer as an argument.
270   if (__kmp_enter_single(gtid, &loc, FALSE))
271     return NULL;
272 
273     // Wait for the first thread to set the copyprivate data pointer,
274     // and for all other threads to reach this point.
275 
276 #if OMPT_SUPPORT && OMPT_OPTIONAL
277   ompt_frame_t *ompt_frame;
278   if (ompt_enabled.enabled) {
279     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
280     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
281   }
282   OMPT_STORE_RETURN_ADDRESS(gtid);
283 #endif
284   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
285 
286   // Retrieve the value of the copyprivate data point, and wait for all
287   // threads to do likewise, then return.
288   retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;
289   {
290 #if OMPT_SUPPORT && OMPT_OPTIONAL
291     OMPT_STORE_RETURN_ADDRESS(gtid);
292 #endif
293     __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
294   }
295 #if OMPT_SUPPORT && OMPT_OPTIONAL
296   if (ompt_enabled.enabled) {
297     ompt_frame->enter_frame = ompt_data_none;
298   }
299 #endif
300   return retval;
301 }
302 
303 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
304   int gtid = __kmp_get_gtid();
305   KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));
306 
307   // Set the copyprivate data pointer fo the team, then hit the barrier so that
308   // the other threads will continue on and read it.  Hit another barrier before
309   // continuing, so that the know that the copyprivate data pointer has been
310   // propagated to all threads before trying to reuse the t_copypriv_data field.
311   __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
312 #if OMPT_SUPPORT && OMPT_OPTIONAL
313   ompt_frame_t *ompt_frame;
314   if (ompt_enabled.enabled) {
315     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
316     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
317   }
318   OMPT_STORE_RETURN_ADDRESS(gtid);
319 #endif
320   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
321   {
322 #if OMPT_SUPPORT && OMPT_OPTIONAL
323     OMPT_STORE_RETURN_ADDRESS(gtid);
324 #endif
325     __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
326   }
327 #if OMPT_SUPPORT && OMPT_OPTIONAL
328   if (ompt_enabled.enabled) {
329     ompt_frame->enter_frame = ompt_data_none;
330   }
331 #endif
332 }
333 
334 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) {
335   int gtid = __kmp_entry_gtid();
336   MKLOC(loc, "GOMP_ordered_start");
337   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
338 #if OMPT_SUPPORT && OMPT_OPTIONAL
339   OMPT_STORE_RETURN_ADDRESS(gtid);
340 #endif
341   __kmpc_ordered(&loc, gtid);
342 }
343 
344 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {
345   int gtid = __kmp_get_gtid();
346   MKLOC(loc, "GOMP_ordered_end");
347   KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));
348 #if OMPT_SUPPORT && OMPT_OPTIONAL
349   OMPT_STORE_RETURN_ADDRESS(gtid);
350 #endif
351   __kmpc_end_ordered(&loc, gtid);
352 }
353 
354 // Dispatch macro defs
355 //
356 // They come in two flavors: 64-bit unsigned, and either 32-bit signed
357 // (IA-32 architecture) or 64-bit signed (Intel(R) 64).
358 
359 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
360 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4
361 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4
362 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4
363 #else
364 #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8
365 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8
366 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8
367 #endif /* KMP_ARCH_X86 */
368 
369 #define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u
370 #define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u
371 #define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u
372 
373 // The parallel construct
374 
375 #ifndef KMP_DEBUG
376 static
377 #endif /* KMP_DEBUG */
378     void
379     __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),
380                                  void *data) {
381 #if OMPT_SUPPORT
382   kmp_info_t *thr;
383   ompt_frame_t *ompt_frame;
384   ompt_state_t enclosing_state;
385 
386   if (ompt_enabled.enabled) {
387     // get pointer to thread data structure
388     thr = __kmp_threads[*gtid];
389 
390     // save enclosing task state; set current state for task
391     enclosing_state = thr->th.ompt_thread_info.state;
392     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
393 
394     // set task frame
395     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
396     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
397   }
398 #endif
399 
400   task(data);
401 
402 #if OMPT_SUPPORT
403   if (ompt_enabled.enabled) {
404     // clear task frame
405     ompt_frame->exit_frame = ompt_data_none;
406 
407     // restore enclosing state
408     thr->th.ompt_thread_info.state = enclosing_state;
409   }
410 #endif
411 }
412 
413 #ifndef KMP_DEBUG
414 static
415 #endif /* KMP_DEBUG */
416     void
417     __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,
418                                           void (*task)(void *), void *data,
419                                           unsigned num_threads, ident_t *loc,
420                                           enum sched_type schedule, long start,
421                                           long end, long incr,
422                                           long chunk_size) {
423   // Initialize the loop worksharing construct.
424 
425   KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
426                     schedule != kmp_sch_static);
427 
428 #if OMPT_SUPPORT
429   kmp_info_t *thr;
430   ompt_frame_t *ompt_frame;
431   ompt_state_t enclosing_state;
432 
433   if (ompt_enabled.enabled) {
434     thr = __kmp_threads[*gtid];
435     // save enclosing task state; set current state for task
436     enclosing_state = thr->th.ompt_thread_info.state;
437     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
438 
439     // set task frame
440     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
441     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
442   }
443 #endif
444 
445   // Now invoke the microtask.
446   task(data);
447 
448 #if OMPT_SUPPORT
449   if (ompt_enabled.enabled) {
450     // clear task frame
451     ompt_frame->exit_frame = ompt_data_none;
452 
453     // reset enclosing state
454     thr->th.ompt_thread_info.state = enclosing_state;
455   }
456 #endif
457 }
458 
459 static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads,
460                                  unsigned flags, void (*unwrapped_task)(void *),
461                                  microtask_t wrapper, int argc, ...) {
462   int rc;
463   kmp_info_t *thr = __kmp_threads[gtid];
464   kmp_team_t *team = thr->th.th_team;
465   int tid = __kmp_tid_from_gtid(gtid);
466 
467   va_list ap;
468   va_start(ap, argc);
469 
470   if (num_threads != 0)
471     __kmp_push_num_threads(loc, gtid, num_threads);
472   if (flags != 0)
473     __kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags);
474   rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
475                        __kmp_invoke_task_func, kmp_va_addr_of(ap));
476 
477   va_end(ap);
478 
479   if (rc) {
480     __kmp_run_before_invoked_task(gtid, tid, thr, team);
481   }
482 
483 #if OMPT_SUPPORT
484   int ompt_team_size;
485   if (ompt_enabled.enabled) {
486     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
487     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
488 
489     // implicit task callback
490     if (ompt_enabled.ompt_callback_implicit_task) {
491       ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;
492       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
493           ompt_scope_begin, &(team_info->parallel_data),
494           &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid),
495           ompt_task_implicit); // TODO: Can this be ompt_task_initial?
496       task_info->thread_num = __kmp_tid_from_gtid(gtid);
497     }
498     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
499   }
500 #endif
501 }
502 
503 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
504                                                        void *data,
505                                                        unsigned num_threads) {
506   int gtid = __kmp_entry_gtid();
507 
508 #if OMPT_SUPPORT
509   ompt_frame_t *parent_frame, *frame;
510 
511   if (ompt_enabled.enabled) {
512     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
513     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
514   }
515   OMPT_STORE_RETURN_ADDRESS(gtid);
516 #endif
517 
518   MKLOC(loc, "GOMP_parallel_start");
519   KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
520   __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
521                        (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
522                        data);
523 #if OMPT_SUPPORT
524   if (ompt_enabled.enabled) {
525     __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
526     frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
527   }
528 #endif
529 #if OMPD_SUPPORT
530   if (ompd_state & OMPD_ENABLE_BP)
531     ompd_bp_parallel_begin();
532 #endif
533 }
534 
535 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
536   int gtid = __kmp_get_gtid();
537   kmp_info_t *thr;
538 
539   thr = __kmp_threads[gtid];
540 
541   MKLOC(loc, "GOMP_parallel_end");
542   KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));
543 
544   if (!thr->th.th_team->t.t_serialized) {
545     __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
546                                  thr->th.th_team);
547   }
548 #if OMPT_SUPPORT
549   if (ompt_enabled.enabled) {
550     // Implicit task is finished here, in the barrier we might schedule
551     // deferred tasks,
552     // these don't see the implicit task on the stack
553     OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
554   }
555 #endif
556 
557   __kmp_join_call(&loc, gtid
558 #if OMPT_SUPPORT
559                   ,
560                   fork_context_gnu
561 #endif
562   );
563 #if OMPD_SUPPORT
564   if (ompd_state & OMPD_ENABLE_BP)
565     ompd_bp_parallel_end();
566 #endif
567 }
568 
569 // Loop worksharing constructs
570 
571 // The Gnu codegen passes in an exclusive upper bound for the overall range,
572 // but the libguide dispatch code expects an inclusive upper bound, hence the
573 // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th
574 // argument to __kmp_GOMP_fork_call).
575 //
576 // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,
577 // but the Gnu codegen expects an exclusive upper bound, so the adjustment
578 // "*p_ub += stride" compensates for the discrepancy.
579 //
580 // Correction: the gnu codegen always adjusts the upper bound by +-1, not the
581 // stride value.  We adjust the dispatch parameters accordingly (by +-1), but
582 // we still adjust p_ub by the actual stride value.
583 //
584 // The "runtime" versions do not take a chunk_sz parameter.
585 //
586 // The profile lib cannot support construct checking of unordered loops that
587 // are predetermined by the compiler to be statically scheduled, as the gcc
588 // codegen will not always emit calls to GOMP_loop_static_next() to get the
589 // next iteration.  Instead, it emits inline code to call omp_get_thread_num()
590 // num and calculate the iteration space using the result.  It doesn't do this
591 // with ordered static loop, so they can be checked.
592 
593 #if OMPT_SUPPORT
594 #define IF_OMPT_SUPPORT(code) code
595 #else
596 #define IF_OMPT_SUPPORT(code)
597 #endif
598 
599 #define LOOP_START(func, schedule)                                             \
600   int func(long lb, long ub, long str, long chunk_sz, long *p_lb,              \
601            long *p_ub) {                                                       \
602     int status;                                                                \
603     long stride;                                                               \
604     int gtid = __kmp_entry_gtid();                                             \
605     MKLOC(loc, KMP_STR(func));                                                 \
606     KA_TRACE(                                                                  \
607         20,                                                                    \
608         (KMP_STR(                                                              \
609              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
610          gtid, lb, ub, str, chunk_sz));                                        \
611                                                                                \
612     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
613       {                                                                        \
614         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
615         KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                          \
616                           (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,      \
617                           (schedule) != kmp_sch_static);                       \
618       }                                                                        \
619       {                                                                        \
620         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
621         status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,          \
622                                    (kmp_int *)p_ub, (kmp_int *)&stride);       \
623       }                                                                        \
624       if (status) {                                                            \
625         KMP_DEBUG_ASSERT(stride == str);                                       \
626         *p_ub += (str > 0) ? 1 : -1;                                           \
627       }                                                                        \
628     } else {                                                                   \
629       status = 0;                                                              \
630     }                                                                          \
631                                                                                \
632     KA_TRACE(                                                                  \
633         20,                                                                    \
634         (KMP_STR(                                                              \
635              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
636          gtid, *p_lb, *p_ub, status));                                         \
637     return status;                                                             \
638   }
639 
640 #define LOOP_RUNTIME_START(func, schedule)                                     \
641   int func(long lb, long ub, long str, long *p_lb, long *p_ub) {               \
642     int status;                                                                \
643     long stride;                                                               \
644     long chunk_sz = 0;                                                         \
645     int gtid = __kmp_entry_gtid();                                             \
646     MKLOC(loc, KMP_STR(func));                                                 \
647     KA_TRACE(                                                                  \
648         20,                                                                    \
649         (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
650          gtid, lb, ub, str, chunk_sz));                                        \
651                                                                                \
652     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
653       {                                                                        \
654         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
655         KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                          \
656                           (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,      \
657                           TRUE);                                               \
658       }                                                                        \
659       {                                                                        \
660         IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                      \
661         status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,          \
662                                    (kmp_int *)p_ub, (kmp_int *)&stride);       \
663       }                                                                        \
664       if (status) {                                                            \
665         KMP_DEBUG_ASSERT(stride == str);                                       \
666         *p_ub += (str > 0) ? 1 : -1;                                           \
667       }                                                                        \
668     } else {                                                                   \
669       status = 0;                                                              \
670     }                                                                          \
671                                                                                \
672     KA_TRACE(                                                                  \
673         20,                                                                    \
674         (KMP_STR(                                                              \
675              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
676          gtid, *p_lb, *p_ub, status));                                         \
677     return status;                                                             \
678   }
679 
680 #define KMP_DOACROSS_FINI(status, gtid)                                        \
681   if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) {     \
682     __kmpc_doacross_fini(NULL, gtid);                                          \
683   }
684 
685 #define LOOP_NEXT(func, fini_code)                                             \
686   int func(long *p_lb, long *p_ub) {                                           \
687     int status;                                                                \
688     long stride;                                                               \
689     int gtid = __kmp_get_gtid();                                               \
690     MKLOC(loc, KMP_STR(func));                                                 \
691     KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid));                            \
692                                                                                \
693     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
694     fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,    \
695                                          (kmp_int *)p_ub, (kmp_int *)&stride); \
696     if (status) {                                                              \
697       *p_ub += (stride > 0) ? 1 : -1;                                          \
698     }                                                                          \
699     KMP_DOACROSS_FINI(status, gtid)                                            \
700                                                                                \
701     KA_TRACE(                                                                  \
702         20,                                                                    \
703         (KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \
704                        "returning %d\n",                                       \
705          gtid, *p_lb, *p_ub, stride, status));                                 \
706     return status;                                                             \
707   }
708 
709 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)
710 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})
711 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),
712            kmp_sch_dynamic_chunked)
713 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),
714            kmp_sch_dynamic_chunked)
715 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})
716 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})
717 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),
718            kmp_sch_guided_chunked)
719 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),
720            kmp_sch_guided_chunked)
721 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})
722 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
723 LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
724                    kmp_sch_runtime)
725 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
726 LOOP_RUNTIME_START(
727     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START),
728     kmp_sch_runtime)
729 LOOP_RUNTIME_START(
730     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START),
731     kmp_sch_runtime)
732 LOOP_NEXT(
733     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT), {})
734 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT), {})
735 
736 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START),
737            kmp_ord_static)
738 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT),
739           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
740 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START),
741            kmp_ord_dynamic_chunked)
742 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT),
743           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
744 LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START),
745            kmp_ord_guided_chunked)
746 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT),
747           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
748 LOOP_RUNTIME_START(
749     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START),
750     kmp_ord_runtime)
751 LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT),
752           { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })
753 
754 #define LOOP_DOACROSS_START(func, schedule)                                    \
755   bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb,         \
756             long *p_ub) {                                                      \
757     int status;                                                                \
758     long stride, lb, ub, str;                                                  \
759     int gtid = __kmp_entry_gtid();                                             \
760     struct kmp_dim *dims =                                                     \
761         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
762     MKLOC(loc, KMP_STR(func));                                                 \
763     for (unsigned i = 0; i < ncounts; ++i) {                                   \
764       dims[i].lo = 0;                                                          \
765       dims[i].up = counts[i] - 1;                                              \
766       dims[i].st = 1;                                                          \
767     }                                                                          \
768     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
769     lb = 0;                                                                    \
770     ub = counts[0];                                                            \
771     str = 1;                                                                   \
772     KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \
773                                 "0x%lx, chunk_sz "                             \
774                                 "0x%lx\n",                                     \
775                   gtid, ncounts, lb, ub, str, chunk_sz));                      \
776                                                                                \
777     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
778       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
779                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
780                         (schedule) != kmp_sch_static);                         \
781       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
782                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
783       if (status) {                                                            \
784         KMP_DEBUG_ASSERT(stride == str);                                       \
785         *p_ub += (str > 0) ? 1 : -1;                                           \
786       }                                                                        \
787     } else {                                                                   \
788       status = 0;                                                              \
789     }                                                                          \
790     KMP_DOACROSS_FINI(status, gtid);                                           \
791                                                                                \
792     KA_TRACE(                                                                  \
793         20,                                                                    \
794         (KMP_STR(                                                              \
795              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
796          gtid, *p_lb, *p_ub, status));                                         \
797     __kmp_free(dims);                                                          \
798     return status;                                                             \
799   }
800 
801 #define LOOP_DOACROSS_RUNTIME_START(func, schedule)                            \
802   int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) {           \
803     int status;                                                                \
804     long stride, lb, ub, str;                                                  \
805     long chunk_sz = 0;                                                         \
806     int gtid = __kmp_entry_gtid();                                             \
807     struct kmp_dim *dims =                                                     \
808         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
809     MKLOC(loc, KMP_STR(func));                                                 \
810     for (unsigned i = 0; i < ncounts; ++i) {                                   \
811       dims[i].lo = 0;                                                          \
812       dims[i].up = counts[i] - 1;                                              \
813       dims[i].st = 1;                                                          \
814     }                                                                          \
815     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
816     lb = 0;                                                                    \
817     ub = counts[0];                                                            \
818     str = 1;                                                                   \
819     KA_TRACE(                                                                  \
820         20,                                                                    \
821         (KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \
822          gtid, lb, ub, str, chunk_sz));                                        \
823                                                                                \
824     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
825       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
826                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
827       status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb,            \
828                                  (kmp_int *)p_ub, (kmp_int *)&stride);         \
829       if (status) {                                                            \
830         KMP_DEBUG_ASSERT(stride == str);                                       \
831         *p_ub += (str > 0) ? 1 : -1;                                           \
832       }                                                                        \
833     } else {                                                                   \
834       status = 0;                                                              \
835     }                                                                          \
836     KMP_DOACROSS_FINI(status, gtid);                                           \
837                                                                                \
838     KA_TRACE(                                                                  \
839         20,                                                                    \
840         (KMP_STR(                                                              \
841              func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n",    \
842          gtid, *p_lb, *p_ub, status));                                         \
843     __kmp_free(dims);                                                          \
844     return status;                                                             \
845   }
846 
847 LOOP_DOACROSS_START(
848     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START),
849     kmp_sch_static)
850 LOOP_DOACROSS_START(
851     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START),
852     kmp_sch_dynamic_chunked)
853 LOOP_DOACROSS_START(
854     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START),
855     kmp_sch_guided_chunked)
856 LOOP_DOACROSS_RUNTIME_START(
857     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START),
858     kmp_sch_runtime)
859 
860 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) {
861   int gtid = __kmp_get_gtid();
862   KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
863 
864 #if OMPT_SUPPORT && OMPT_OPTIONAL
865   ompt_frame_t *ompt_frame;
866   if (ompt_enabled.enabled) {
867     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
868     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
869     OMPT_STORE_RETURN_ADDRESS(gtid);
870   }
871 #endif
872   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
873 #if OMPT_SUPPORT && OMPT_OPTIONAL
874   if (ompt_enabled.enabled) {
875     ompt_frame->enter_frame = ompt_data_none;
876   }
877 #endif
878 
879   KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))
880 }
881 
882 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) {
883   KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))
884 }
885 
886 // Unsigned long long loop worksharing constructs
887 //
888 // These are new with gcc 4.4
889 
890 #define LOOP_START_ULL(func, schedule)                                         \
891   int func(int up, unsigned long long lb, unsigned long long ub,               \
892            unsigned long long str, unsigned long long chunk_sz,                \
893            unsigned long long *p_lb, unsigned long long *p_ub) {               \
894     int status;                                                                \
895     long long str2 = up ? ((long long)str) : -((long long)str);                \
896     long long stride;                                                          \
897     int gtid = __kmp_entry_gtid();                                             \
898     MKLOC(loc, KMP_STR(func));                                                 \
899                                                                                \
900     KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str "    \
901                                 "0x%llx, chunk_sz 0x%llx\n",                   \
902                   gtid, up, lb, ub, str, chunk_sz));                           \
903                                                                                \
904     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
905       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
906                             (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,  \
907                             (schedule) != kmp_sch_static);                     \
908       status =                                                                 \
909           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
910                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
911       if (status) {                                                            \
912         KMP_DEBUG_ASSERT(stride == str2);                                      \
913         *p_ub += (str > 0) ? 1 : -1;                                           \
914       }                                                                        \
915     } else {                                                                   \
916       status = 0;                                                              \
917     }                                                                          \
918                                                                                \
919     KA_TRACE(                                                                  \
920         20,                                                                    \
921         (KMP_STR(                                                              \
922              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
923          gtid, *p_lb, *p_ub, status));                                         \
924     return status;                                                             \
925   }
926 
927 #define LOOP_RUNTIME_START_ULL(func, schedule)                                 \
928   int func(int up, unsigned long long lb, unsigned long long ub,               \
929            unsigned long long str, unsigned long long *p_lb,                   \
930            unsigned long long *p_ub) {                                         \
931     int status;                                                                \
932     long long str2 = up ? ((long long)str) : -((long long)str);                \
933     unsigned long long stride;                                                 \
934     unsigned long long chunk_sz = 0;                                           \
935     int gtid = __kmp_entry_gtid();                                             \
936     MKLOC(loc, KMP_STR(func));                                                 \
937                                                                                \
938     KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str "    \
939                                 "0x%llx, chunk_sz 0x%llx\n",                   \
940                   gtid, up, lb, ub, str, chunk_sz));                           \
941                                                                                \
942     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
943       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
944                             (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz,  \
945                             TRUE);                                             \
946       status =                                                                 \
947           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
948                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
949       if (status) {                                                            \
950         KMP_DEBUG_ASSERT((long long)stride == str2);                           \
951         *p_ub += (str > 0) ? 1 : -1;                                           \
952       }                                                                        \
953     } else {                                                                   \
954       status = 0;                                                              \
955     }                                                                          \
956                                                                                \
957     KA_TRACE(                                                                  \
958         20,                                                                    \
959         (KMP_STR(                                                              \
960              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
961          gtid, *p_lb, *p_ub, status));                                         \
962     return status;                                                             \
963   }
964 
965 #define LOOP_NEXT_ULL(func, fini_code)                                         \
966   int func(unsigned long long *p_lb, unsigned long long *p_ub) {               \
967     int status;                                                                \
968     long long stride;                                                          \
969     int gtid = __kmp_get_gtid();                                               \
970     MKLOC(loc, KMP_STR(func));                                                 \
971     KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid));                            \
972                                                                                \
973     fini_code status =                                                         \
974         KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,            \
975                               (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);       \
976     if (status) {                                                              \
977       *p_ub += (stride > 0) ? 1 : -1;                                          \
978     }                                                                          \
979                                                                                \
980     KA_TRACE(                                                                  \
981         20,                                                                    \
982         (KMP_STR(                                                              \
983              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, "  \
984                    "returning %d\n",                                           \
985          gtid, *p_lb, *p_ub, stride, status));                                 \
986     return status;                                                             \
987   }
988 
989 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START),
990                kmp_sch_static)
991 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})
992 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START),
993                kmp_sch_dynamic_chunked)
994 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})
995 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),
996                kmp_sch_guided_chunked)
997 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})
998 LOOP_START_ULL(
999     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),
1000     kmp_sch_dynamic_chunked)
1001 LOOP_NEXT_ULL(
1002     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})
1003 LOOP_START_ULL(
1004     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),
1005     kmp_sch_guided_chunked)
1006 LOOP_NEXT_ULL(
1007     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})
1008 LOOP_RUNTIME_START_ULL(
1009     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
1010 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
1011 LOOP_RUNTIME_START_ULL(
1012     KMP_EXPAND_NAME(
1013         KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START),
1014     kmp_sch_runtime)
1015 LOOP_RUNTIME_START_ULL(
1016     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START),
1017     kmp_sch_runtime)
1018 LOOP_NEXT_ULL(
1019     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT),
1020     {})
1021 LOOP_NEXT_ULL(
1022     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT), {})
1023 
1024 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START),
1025                kmp_ord_static)
1026 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT),
1027               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1028 LOOP_START_ULL(
1029     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START),
1030     kmp_ord_dynamic_chunked)
1031 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT),
1032               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1033 LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START),
1034                kmp_ord_guided_chunked)
1035 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT),
1036               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1037 LOOP_RUNTIME_START_ULL(
1038     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START),
1039     kmp_ord_runtime)
1040 LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),
1041               { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })
1042 
1043 #define LOOP_DOACROSS_START_ULL(func, schedule)                                \
1044   int func(unsigned ncounts, unsigned long long *counts,                       \
1045            unsigned long long chunk_sz, unsigned long long *p_lb,              \
1046            unsigned long long *p_ub) {                                         \
1047     int status;                                                                \
1048     long long stride, str, lb, ub;                                             \
1049     int gtid = __kmp_entry_gtid();                                             \
1050     struct kmp_dim *dims =                                                     \
1051         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
1052     MKLOC(loc, KMP_STR(func));                                                 \
1053     for (unsigned i = 0; i < ncounts; ++i) {                                   \
1054       dims[i].lo = 0;                                                          \
1055       dims[i].up = counts[i] - 1;                                              \
1056       dims[i].st = 1;                                                          \
1057     }                                                                          \
1058     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
1059     lb = 0;                                                                    \
1060     ub = counts[0];                                                            \
1061     str = 1;                                                                   \
1062                                                                                \
1063     KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str "           \
1064                                 "0x%llx, chunk_sz 0x%llx\n",                   \
1065                   gtid, lb, ub, str, chunk_sz));                               \
1066                                                                                \
1067     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
1068       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
1069                             (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,    \
1070                             (schedule) != kmp_sch_static);                     \
1071       status =                                                                 \
1072           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
1073                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
1074       if (status) {                                                            \
1075         KMP_DEBUG_ASSERT(stride == str);                                       \
1076         *p_ub += (str > 0) ? 1 : -1;                                           \
1077       }                                                                        \
1078     } else {                                                                   \
1079       status = 0;                                                              \
1080     }                                                                          \
1081     KMP_DOACROSS_FINI(status, gtid);                                           \
1082                                                                                \
1083     KA_TRACE(                                                                  \
1084         20,                                                                    \
1085         (KMP_STR(                                                              \
1086              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
1087          gtid, *p_lb, *p_ub, status));                                         \
1088     __kmp_free(dims);                                                          \
1089     return status;                                                             \
1090   }
1091 
1092 #define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule)                        \
1093   int func(unsigned ncounts, unsigned long long *counts,                       \
1094            unsigned long long *p_lb, unsigned long long *p_ub) {               \
1095     int status;                                                                \
1096     unsigned long long stride, str, lb, ub;                                    \
1097     unsigned long long chunk_sz = 0;                                           \
1098     int gtid = __kmp_entry_gtid();                                             \
1099     struct kmp_dim *dims =                                                     \
1100         (struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts);    \
1101     MKLOC(loc, KMP_STR(func));                                                 \
1102     for (unsigned i = 0; i < ncounts; ++i) {                                   \
1103       dims[i].lo = 0;                                                          \
1104       dims[i].up = counts[i] - 1;                                              \
1105       dims[i].st = 1;                                                          \
1106     }                                                                          \
1107     __kmpc_doacross_init(&loc, gtid, (int)ncounts, dims);                      \
1108     lb = 0;                                                                    \
1109     ub = counts[0];                                                            \
1110     str = 1;                                                                   \
1111     KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str "           \
1112                                 "0x%llx, chunk_sz 0x%llx\n",                   \
1113                   gtid, lb, ub, str, chunk_sz));                               \
1114                                                                                \
1115     if ((str > 0) ? (lb < ub) : (lb > ub)) {                                   \
1116       KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb,                        \
1117                             (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,    \
1118                             TRUE);                                             \
1119       status =                                                                 \
1120           KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb,          \
1121                                 (kmp_uint64 *)p_ub, (kmp_int64 *)&stride);     \
1122       if (status) {                                                            \
1123         KMP_DEBUG_ASSERT(stride == str);                                       \
1124         *p_ub += (str > 0) ? 1 : -1;                                           \
1125       }                                                                        \
1126     } else {                                                                   \
1127       status = 0;                                                              \
1128     }                                                                          \
1129     KMP_DOACROSS_FINI(status, gtid);                                           \
1130                                                                                \
1131     KA_TRACE(                                                                  \
1132         20,                                                                    \
1133         (KMP_STR(                                                              \
1134              func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n",  \
1135          gtid, *p_lb, *p_ub, status));                                         \
1136     __kmp_free(dims);                                                          \
1137     return status;                                                             \
1138   }
1139 
1140 LOOP_DOACROSS_START_ULL(
1141     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START),
1142     kmp_sch_static)
1143 LOOP_DOACROSS_START_ULL(
1144     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START),
1145     kmp_sch_dynamic_chunked)
1146 LOOP_DOACROSS_START_ULL(
1147     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START),
1148     kmp_sch_guided_chunked)
1149 LOOP_DOACROSS_RUNTIME_START_ULL(
1150     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START),
1151     kmp_sch_runtime)
1152 
1153 // Combined parallel / loop worksharing constructs
1154 //
1155 // There are no ull versions (yet).
1156 
1157 #define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post)               \
1158   void func(void (*task)(void *), void *data, unsigned num_threads, long lb,   \
1159             long ub, long str, long chunk_sz) {                                \
1160     int gtid = __kmp_entry_gtid();                                             \
1161     MKLOC(loc, KMP_STR(func));                                                 \
1162     KA_TRACE(                                                                  \
1163         20,                                                                    \
1164         (KMP_STR(                                                              \
1165              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
1166          gtid, lb, ub, str, chunk_sz));                                        \
1167                                                                                \
1168     ompt_pre();                                                                \
1169                                                                                \
1170     __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,                    \
1171                          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper,   \
1172                          9, task, data, num_threads, &loc, (schedule), lb,     \
1173                          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);      \
1174     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                          \
1175                                                                                \
1176     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
1177                       (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,          \
1178                       (schedule) != kmp_sch_static);                           \
1179                                                                                \
1180     ompt_post();                                                               \
1181                                                                                \
1182     KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid));                       \
1183   }
1184 
1185 #if OMPT_SUPPORT && OMPT_OPTIONAL
1186 
1187 #define OMPT_LOOP_PRE()                                                        \
1188   ompt_frame_t *parent_frame;                                                  \
1189   if (ompt_enabled.enabled) {                                                  \
1190     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);   \
1191     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);                 \
1192     OMPT_STORE_RETURN_ADDRESS(gtid);                                           \
1193   }
1194 
1195 #define OMPT_LOOP_POST()                                                       \
1196   if (ompt_enabled.enabled) {                                                  \
1197     parent_frame->enter_frame = ompt_data_none;                                \
1198   }
1199 
1200 #else
1201 
1202 #define OMPT_LOOP_PRE()
1203 
1204 #define OMPT_LOOP_POST()
1205 
1206 #endif
1207 
1208 PARALLEL_LOOP_START(
1209     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),
1210     kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1211 PARALLEL_LOOP_START(
1212     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),
1213     kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1214 PARALLEL_LOOP_START(
1215     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),
1216     kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1217 PARALLEL_LOOP_START(
1218     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),
1219     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1220 
1221 // Tasking constructs
1222 
1223 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
1224                                              void (*copy_func)(void *, void *),
1225                                              long arg_size, long arg_align,
1226                                              bool if_cond, unsigned gomp_flags,
1227                                              void **depend) {
1228   MKLOC(loc, "GOMP_task");
1229   int gtid = __kmp_entry_gtid();
1230   kmp_int32 flags = 0;
1231   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1232 
1233   KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));
1234 
1235   // The low-order bit is the "untied" flag
1236   if (!(gomp_flags & KMP_GOMP_TASK_UNTIED_FLAG)) {
1237     input_flags->tiedness = TASK_TIED;
1238   }
1239   // The second low-order bit is the "final" flag
1240   if (gomp_flags & KMP_GOMP_TASK_FINAL_FLAG) {
1241     input_flags->final = 1;
1242   }
1243   input_flags->native = 1;
1244   // __kmp_task_alloc() sets up all other flags
1245 
1246   if (!if_cond) {
1247     arg_size = 0;
1248   }
1249 
1250   kmp_task_t *task = __kmp_task_alloc(
1251       &loc, gtid, input_flags, sizeof(kmp_task_t),
1252       arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func);
1253 
1254   if (arg_size > 0) {
1255     if (arg_align > 0) {
1256       task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1257                                arg_align * arg_align);
1258     }
1259     // else error??
1260 
1261     if (copy_func) {
1262       (*copy_func)(task->shareds, data);
1263     } else {
1264       KMP_MEMCPY(task->shareds, data, arg_size);
1265     }
1266   }
1267 
1268 #if OMPT_SUPPORT
1269   kmp_taskdata_t *current_task;
1270   if (ompt_enabled.enabled) {
1271     current_task = __kmp_threads[gtid]->th.th_current_task;
1272     current_task->ompt_task_info.frame.enter_frame.ptr =
1273         OMPT_GET_FRAME_ADDRESS(0);
1274   }
1275   OMPT_STORE_RETURN_ADDRESS(gtid);
1276 #endif
1277 
1278   if (if_cond) {
1279     if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {
1280       KMP_ASSERT(depend);
1281       kmp_gomp_depends_info_t gomp_depends(depend);
1282       kmp_int32 ndeps = gomp_depends.get_num_deps();
1283       kmp_depend_info_t dep_list[ndeps];
1284       for (kmp_int32 i = 0; i < ndeps; i++)
1285         dep_list[i] = gomp_depends.get_kmp_depend(i);
1286       kmp_int32 ndeps_cnv;
1287       __kmp_type_convert(ndeps, &ndeps_cnv);
1288       __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps_cnv, dep_list, 0, NULL);
1289     } else {
1290       __kmpc_omp_task(&loc, gtid, task);
1291     }
1292   } else {
1293 #if OMPT_SUPPORT
1294     ompt_thread_info_t oldInfo;
1295     kmp_info_t *thread;
1296     kmp_taskdata_t *taskdata;
1297     if (ompt_enabled.enabled) {
1298       // Store the threads states and restore them after the task
1299       thread = __kmp_threads[gtid];
1300       taskdata = KMP_TASK_TO_TASKDATA(task);
1301       oldInfo = thread->th.ompt_thread_info;
1302       thread->th.ompt_thread_info.wait_id = 0;
1303       thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1304       taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1305     }
1306     OMPT_STORE_RETURN_ADDRESS(gtid);
1307 #endif
1308     if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {
1309       KMP_ASSERT(depend);
1310       kmp_gomp_depends_info_t gomp_depends(depend);
1311       kmp_int32 ndeps = gomp_depends.get_num_deps();
1312       kmp_depend_info_t dep_list[ndeps];
1313       for (kmp_int32 i = 0; i < ndeps; i++)
1314         dep_list[i] = gomp_depends.get_kmp_depend(i);
1315       __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);
1316     }
1317 
1318     __kmpc_omp_task_begin_if0(&loc, gtid, task);
1319     func(data);
1320     __kmpc_omp_task_complete_if0(&loc, gtid, task);
1321 
1322 #if OMPT_SUPPORT
1323     if (ompt_enabled.enabled) {
1324       thread->th.ompt_thread_info = oldInfo;
1325       taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1326     }
1327 #endif
1328   }
1329 #if OMPT_SUPPORT
1330   if (ompt_enabled.enabled) {
1331     current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
1332   }
1333 #endif
1334 
1335   KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));
1336 }
1337 
1338 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) {
1339   MKLOC(loc, "GOMP_taskwait");
1340   int gtid = __kmp_entry_gtid();
1341 
1342 #if OMPT_SUPPORT
1343   OMPT_STORE_RETURN_ADDRESS(gtid);
1344 #endif
1345 
1346   KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));
1347 
1348   __kmpc_omp_taskwait(&loc, gtid);
1349 
1350   KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));
1351 }
1352 
1353 // Sections worksharing constructs
1354 //
1355 // For the sections construct, we initialize a dynamically scheduled loop
1356 // worksharing construct with lb 1 and stride 1, and use the iteration #'s
1357 // that its returns as sections ids.
1358 //
1359 // There are no special entry points for ordered sections, so we always use
1360 // the dynamically scheduled workshare, even if the sections aren't ordered.
1361 
1362 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) {
1363   int status;
1364   kmp_int lb, ub, stride;
1365   int gtid = __kmp_entry_gtid();
1366   MKLOC(loc, "GOMP_sections_start");
1367   KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));
1368 
1369   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1370 
1371   status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1372   if (status) {
1373     KMP_DEBUG_ASSERT(stride == 1);
1374     KMP_DEBUG_ASSERT(lb > 0);
1375     KMP_ASSERT(lb == ub);
1376   } else {
1377     lb = 0;
1378   }
1379 
1380   KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,
1381                 (unsigned)lb));
1382   return (unsigned)lb;
1383 }
1384 
1385 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) {
1386   int status;
1387   kmp_int lb, ub, stride;
1388   int gtid = __kmp_get_gtid();
1389   MKLOC(loc, "GOMP_sections_next");
1390   KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));
1391 
1392 #if OMPT_SUPPORT
1393   OMPT_STORE_RETURN_ADDRESS(gtid);
1394 #endif
1395 
1396   status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);
1397   if (status) {
1398     KMP_DEBUG_ASSERT(stride == 1);
1399     KMP_DEBUG_ASSERT(lb > 0);
1400     KMP_ASSERT(lb == ub);
1401   } else {
1402     lb = 0;
1403   }
1404 
1405   KA_TRACE(
1406       20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb));
1407   return (unsigned)lb;
1408 }
1409 
1410 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
1411     void (*task)(void *), void *data, unsigned num_threads, unsigned count) {
1412   int gtid = __kmp_entry_gtid();
1413 
1414 #if OMPT_SUPPORT
1415   ompt_frame_t *parent_frame;
1416 
1417   if (ompt_enabled.enabled) {
1418     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
1419     parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1420   }
1421   OMPT_STORE_RETURN_ADDRESS(gtid);
1422 #endif
1423 
1424   MKLOC(loc, "GOMP_parallel_sections_start");
1425   KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
1426 
1427   __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
1428                        (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1429                        task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1430                        (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1431 
1432 #if OMPT_SUPPORT
1433   if (ompt_enabled.enabled) {
1434     parent_frame->enter_frame = ompt_data_none;
1435   }
1436 #endif
1437 
1438   KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1439 
1440   KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));
1441 }
1442 
1443 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
1444   int gtid = __kmp_get_gtid();
1445   KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
1446 
1447 #if OMPT_SUPPORT
1448   ompt_frame_t *ompt_frame;
1449   if (ompt_enabled.enabled) {
1450     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1451     ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1452   }
1453   OMPT_STORE_RETURN_ADDRESS(gtid);
1454 #endif
1455   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
1456 #if OMPT_SUPPORT
1457   if (ompt_enabled.enabled) {
1458     ompt_frame->enter_frame = ompt_data_none;
1459   }
1460 #endif
1461 
1462   KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))
1463 }
1464 
1465 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) {
1466   KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))
1467 }
1468 
1469 // libgomp has an empty function for GOMP_taskyield as of 2013-10-10
1470 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) {
1471   KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid()))
1472   return;
1473 }
1474 
1475 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
1476                                                  void *data,
1477                                                  unsigned num_threads,
1478                                                  unsigned int flags) {
1479   int gtid = __kmp_entry_gtid();
1480   MKLOC(loc, "GOMP_parallel");
1481   KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));
1482 
1483 #if OMPT_SUPPORT
1484   ompt_task_info_t *parent_task_info, *task_info;
1485   if (ompt_enabled.enabled) {
1486     parent_task_info = __ompt_get_task_info_object(0);
1487     parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1488   }
1489   OMPT_STORE_RETURN_ADDRESS(gtid);
1490 #endif
1491   __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1492                        (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
1493                        data);
1494 #if OMPT_SUPPORT
1495   if (ompt_enabled.enabled) {
1496     task_info = __ompt_get_task_info_object(0);
1497     task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1498   }
1499 #endif
1500   task(data);
1501   {
1502 #if OMPT_SUPPORT
1503     OMPT_STORE_RETURN_ADDRESS(gtid);
1504 #endif
1505     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1506   }
1507 #if OMPT_SUPPORT
1508   if (ompt_enabled.enabled) {
1509     task_info->frame.exit_frame = ompt_data_none;
1510     parent_task_info->frame.enter_frame = ompt_data_none;
1511   }
1512 #endif
1513 }
1514 
1515 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
1516                                                           void *data,
1517                                                           unsigned num_threads,
1518                                                           unsigned count,
1519                                                           unsigned flags) {
1520   int gtid = __kmp_entry_gtid();
1521   MKLOC(loc, "GOMP_parallel_sections");
1522   KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));
1523 
1524 #if OMPT_SUPPORT
1525   OMPT_STORE_RETURN_ADDRESS(gtid);
1526 #endif
1527 
1528   __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
1529                        (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
1530                        task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
1531                        (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
1532 
1533   {
1534 #if OMPT_SUPPORT
1535     OMPT_STORE_RETURN_ADDRESS(gtid);
1536 #endif
1537 
1538     KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
1539   }
1540   task(data);
1541   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
1542   KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
1543 }
1544 
1545 #define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post)                     \
1546   void func(void (*task)(void *), void *data, unsigned num_threads, long lb,   \
1547             long ub, long str, long chunk_sz, unsigned flags) {                \
1548     int gtid = __kmp_entry_gtid();                                             \
1549     MKLOC(loc, KMP_STR(func));                                                 \
1550     KA_TRACE(                                                                  \
1551         20,                                                                    \
1552         (KMP_STR(                                                              \
1553              func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n",  \
1554          gtid, lb, ub, str, chunk_sz));                                        \
1555                                                                                \
1556     ompt_pre();                                                                \
1557     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
1558     __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,                 \
1559                          (microtask_t)__kmp_GOMP_parallel_microtask_wrapper,   \
1560                          9, task, data, num_threads, &loc, (schedule), lb,     \
1561                          (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);      \
1562                                                                                \
1563     {                                                                          \
1564       IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                        \
1565       KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                            \
1566                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,        \
1567                         (schedule) != kmp_sch_static);                         \
1568     }                                                                          \
1569     task(data);                                                                \
1570     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();                         \
1571     ompt_post();                                                               \
1572                                                                                \
1573     KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid));                       \
1574   }
1575 
1576 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),
1577               kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1578 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),
1579               kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1580 PARALLEL_LOOP(
1581     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),
1582     kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1583 PARALLEL_LOOP(
1584     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),
1585     kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1586 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
1587               kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1588 PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
1589               kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1590 PARALLEL_LOOP(
1591     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME),
1592     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1593 PARALLEL_LOOP(
1594     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME),
1595     kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
1596 
1597 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
1598   int gtid = __kmp_entry_gtid();
1599   MKLOC(loc, "GOMP_taskgroup_start");
1600   KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));
1601 
1602 #if OMPT_SUPPORT
1603   OMPT_STORE_RETURN_ADDRESS(gtid);
1604 #endif
1605 
1606   __kmpc_taskgroup(&loc, gtid);
1607 
1608   return;
1609 }
1610 
1611 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {
1612   int gtid = __kmp_get_gtid();
1613   MKLOC(loc, "GOMP_taskgroup_end");
1614   KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));
1615 
1616 #if OMPT_SUPPORT
1617   OMPT_STORE_RETURN_ADDRESS(gtid);
1618 #endif
1619 
1620   __kmpc_end_taskgroup(&loc, gtid);
1621 
1622   return;
1623 }
1624 
1625 static kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) {
1626   kmp_int32 cncl_kind = 0;
1627   switch (gomp_kind) {
1628   case 1:
1629     cncl_kind = cancel_parallel;
1630     break;
1631   case 2:
1632     cncl_kind = cancel_loop;
1633     break;
1634   case 4:
1635     cncl_kind = cancel_sections;
1636     break;
1637   case 8:
1638     cncl_kind = cancel_taskgroup;
1639     break;
1640   }
1641   return cncl_kind;
1642 }
1643 
1644 // Return true if cancellation should take place, false otherwise
1645 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) {
1646   int gtid = __kmp_get_gtid();
1647   MKLOC(loc, "GOMP_cancellation_point");
1648   KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which));
1649   kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1650   return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1651 }
1652 
1653 // Return true if cancellation should take place, false otherwise
1654 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) {
1655   int gtid = __kmp_get_gtid();
1656   MKLOC(loc, "GOMP_cancel");
1657   KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which,
1658                 (int)do_cancel));
1659   kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);
1660 
1661   if (do_cancel == FALSE) {
1662     return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);
1663   } else {
1664     return __kmpc_cancel(&loc, gtid, cncl_kind);
1665   }
1666 }
1667 
1668 // Return true if cancellation should take place, false otherwise
1669 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) {
1670   int gtid = __kmp_get_gtid();
1671   KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid));
1672   return __kmp_barrier_gomp_cancel(gtid);
1673 }
1674 
1675 // Return true if cancellation should take place, false otherwise
1676 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) {
1677   int gtid = __kmp_get_gtid();
1678   KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid));
1679   return __kmp_barrier_gomp_cancel(gtid);
1680 }
1681 
1682 // Return true if cancellation should take place, false otherwise
1683 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) {
1684   int gtid = __kmp_get_gtid();
1685   KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid));
1686   return __kmp_barrier_gomp_cancel(gtid);
1687 }
1688 
1689 // All target functions are empty as of 2014-05-29
1690 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *),
1691                                                const void *openmp_target,
1692                                                size_t mapnum, void **hostaddrs,
1693                                                size_t *sizes,
1694                                                unsigned char *kinds) {
1695   return;
1696 }
1697 
1698 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)(
1699     int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1700     size_t *sizes, unsigned char *kinds) {
1701   return;
1702 }
1703 
1704 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; }
1705 
1706 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)(
1707     int device, const void *openmp_target, size_t mapnum, void **hostaddrs,
1708     size_t *sizes, unsigned char *kinds) {
1709   return;
1710 }
1711 
1712 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams,
1713                                               unsigned int thread_limit) {
1714   return;
1715 }
1716 
1717 // Task duplication function which copies src to dest (both are
1718 // preallocated task structures)
1719 static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src,
1720                                 kmp_int32 last_private) {
1721   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src);
1722   if (taskdata->td_copy_func) {
1723     (taskdata->td_copy_func)(dest->shareds, src->shareds);
1724   }
1725 }
1726 
1727 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
1728     uintptr_t *);
1729 
1730 #ifdef __cplusplus
1731 } // extern "C"
1732 #endif
1733 
1734 template <typename T>
1735 void __GOMP_taskloop(void (*func)(void *), void *data,
1736                      void (*copy_func)(void *, void *), long arg_size,
1737                      long arg_align, unsigned gomp_flags,
1738                      unsigned long num_tasks, int priority, T start, T end,
1739                      T step) {
1740   typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
1741   MKLOC(loc, "GOMP_taskloop");
1742   int sched;
1743   T *loop_bounds;
1744   int gtid = __kmp_entry_gtid();
1745   kmp_int32 flags = 0;
1746   int if_val = gomp_flags & (1u << 10);
1747   int nogroup = gomp_flags & (1u << 11);
1748   int up = gomp_flags & (1u << 8);
1749   int reductions = gomp_flags & (1u << 12);
1750   p_task_dup_t task_dup = NULL;
1751   kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1752 #ifdef KMP_DEBUG
1753   {
1754     char *buff;
1755     buff = __kmp_str_format(
1756         "GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p "
1757         "arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu "
1758         "priority:%%d start:%%%s end:%%%s step:%%%s\n",
1759         traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1760     KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align,
1761                   gomp_flags, num_tasks, priority, start, end, step));
1762     __kmp_str_free(&buff);
1763   }
1764 #endif
1765   KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T));
1766   KMP_ASSERT(arg_align > 0);
1767   // The low-order bit is the "untied" flag
1768   if (!(gomp_flags & 1)) {
1769     input_flags->tiedness = TASK_TIED;
1770   }
1771   // The second low-order bit is the "final" flag
1772   if (gomp_flags & 2) {
1773     input_flags->final = 1;
1774   }
1775   // Negative step flag
1776   if (!up) {
1777     // If step is flagged as negative, but isn't properly sign extended
1778     // Then manually sign extend it.  Could be a short, int, char embedded
1779     // in a long.  So cannot assume any cast.
1780     if (step > 0) {
1781       for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) {
1782         // break at the first 1 bit
1783         if (step & ((T)1 << i))
1784           break;
1785         step |= ((T)1 << i);
1786       }
1787     }
1788   }
1789   input_flags->native = 1;
1790   // Figure out if none/grainsize/num_tasks clause specified
1791   if (num_tasks > 0) {
1792     if (gomp_flags & (1u << 9))
1793       sched = 1; // grainsize specified
1794     else
1795       sched = 2; // num_tasks specified
1796     // neither grainsize nor num_tasks specified
1797   } else {
1798     sched = 0;
1799   }
1800 
1801   // __kmp_task_alloc() sets up all other flags
1802   kmp_task_t *task =
1803       __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t),
1804                        arg_size + arg_align - 1, (kmp_routine_entry_t)func);
1805   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1806   taskdata->td_copy_func = copy_func;
1807   taskdata->td_size_loop_bounds = sizeof(T);
1808 
1809   // re-align shareds if needed and setup firstprivate copy constructors
1810   // through the task_dup mechanism
1811   task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /
1812                            arg_align * arg_align);
1813   if (copy_func) {
1814     task_dup = __kmp_gomp_task_dup;
1815   }
1816   KMP_MEMCPY(task->shareds, data, arg_size);
1817 
1818   loop_bounds = (T *)task->shareds;
1819   loop_bounds[0] = start;
1820   loop_bounds[1] = end + (up ? -1 : 1);
1821 
1822   if (!nogroup) {
1823 #if OMPT_SUPPORT && OMPT_OPTIONAL
1824     OMPT_STORE_RETURN_ADDRESS(gtid);
1825 #endif
1826     __kmpc_taskgroup(&loc, gtid);
1827     if (reductions) {
1828       // The data pointer points to lb, ub, then reduction data
1829       struct data_t {
1830         T a, b;
1831         uintptr_t *d;
1832       };
1833       uintptr_t *d = ((data_t *)data)->d;
1834       KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(d);
1835     }
1836   }
1837   __kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]),
1838                   (kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, 1, sched,
1839                   (kmp_uint64)num_tasks, (void *)task_dup);
1840   if (!nogroup) {
1841 #if OMPT_SUPPORT && OMPT_OPTIONAL
1842     OMPT_STORE_RETURN_ADDRESS(gtid);
1843 #endif
1844     __kmpc_end_taskgroup(&loc, gtid);
1845   }
1846 }
1847 
1848 // 4 byte version of GOMP_doacross_post
1849 // This verison needs to create a temporary array which converts 4 byte
1850 // integers into 8 byte integers
1851 template <typename T, bool need_conversion = (sizeof(long) == 4)>
1852 void __kmp_GOMP_doacross_post(T *count);
1853 
1854 template <> void __kmp_GOMP_doacross_post<long, true>(long *count) {
1855   int gtid = __kmp_entry_gtid();
1856   kmp_info_t *th = __kmp_threads[gtid];
1857   MKLOC(loc, "GOMP_doacross_post");
1858   kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1859   kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(
1860       th, (size_t)(sizeof(kmp_int64) * num_dims));
1861   for (kmp_int64 i = 0; i < num_dims; ++i) {
1862     vec[i] = (kmp_int64)count[i];
1863   }
1864   __kmpc_doacross_post(&loc, gtid, vec);
1865   __kmp_thread_free(th, vec);
1866 }
1867 
1868 // 8 byte versions of GOMP_doacross_post
1869 // This version can just pass in the count array directly instead of creating
1870 // a temporary array
1871 template <> void __kmp_GOMP_doacross_post<long, false>(long *count) {
1872   int gtid = __kmp_entry_gtid();
1873   MKLOC(loc, "GOMP_doacross_post");
1874   __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1875 }
1876 
1877 template <typename T> void __kmp_GOMP_doacross_wait(T first, va_list args) {
1878   int gtid = __kmp_entry_gtid();
1879   kmp_info_t *th = __kmp_threads[gtid];
1880   MKLOC(loc, "GOMP_doacross_wait");
1881   kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];
1882   kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(
1883       th, (size_t)(sizeof(kmp_int64) * num_dims));
1884   vec[0] = (kmp_int64)first;
1885   for (kmp_int64 i = 1; i < num_dims; ++i) {
1886     T item = va_arg(args, T);
1887     vec[i] = (kmp_int64)item;
1888   }
1889   __kmpc_doacross_wait(&loc, gtid, vec);
1890   __kmp_thread_free(th, vec);
1891   return;
1892 }
1893 
1894 #ifdef __cplusplus
1895 extern "C" {
1896 #endif // __cplusplus
1897 
1898 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)(
1899     void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1900     long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1901     int priority, long start, long end, long step) {
1902   __GOMP_taskloop<long>(func, data, copy_func, arg_size, arg_align, gomp_flags,
1903                         num_tasks, priority, start, end, step);
1904 }
1905 
1906 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)(
1907     void (*func)(void *), void *data, void (*copy_func)(void *, void *),
1908     long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,
1909     int priority, unsigned long long start, unsigned long long end,
1910     unsigned long long step) {
1911   __GOMP_taskloop<unsigned long long>(func, data, copy_func, arg_size,
1912                                       arg_align, gomp_flags, num_tasks,
1913                                       priority, start, end, step);
1914 }
1915 
1916 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) {
1917   __kmp_GOMP_doacross_post(count);
1918 }
1919 
1920 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) {
1921   va_list args;
1922   va_start(args, first);
1923   __kmp_GOMP_doacross_wait<long>(first, args);
1924   va_end(args);
1925 }
1926 
1927 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)(
1928     unsigned long long *count) {
1929   int gtid = __kmp_entry_gtid();
1930   MKLOC(loc, "GOMP_doacross_ull_post");
1931   __kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));
1932 }
1933 
1934 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(
1935     unsigned long long first, ...) {
1936   va_list args;
1937   va_start(args, first);
1938   __kmp_GOMP_doacross_wait<unsigned long long>(first, args);
1939   va_end(args);
1940 }
1941 
1942 // fn: the function each primary thread of new team will call
1943 // data: argument to fn
1944 // num_teams, thread_limit: max bounds on respective ICV
1945 // flags: unused
1946 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *),
1947                                                   void *data,
1948                                                   unsigned num_teams,
1949                                                   unsigned thread_limit,
1950                                                   unsigned flags) {
1951   MKLOC(loc, "GOMP_teams_reg");
1952   int gtid = __kmp_entry_gtid();
1953   KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n",
1954                 gtid, num_teams, thread_limit, flags));
1955   __kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit);
1956   __kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn,
1957                     data);
1958   KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid));
1959 }
1960 
1961 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) {
1962   MKLOC(loc, "GOMP_taskwait_depend");
1963   int gtid = __kmp_entry_gtid();
1964   KA_TRACE(20, ("GOMP_taskwait_depend: T#%d\n", gtid));
1965   kmp_gomp_depends_info_t gomp_depends(depend);
1966   kmp_int32 ndeps = gomp_depends.get_num_deps();
1967   kmp_depend_info_t dep_list[ndeps];
1968   for (kmp_int32 i = 0; i < ndeps; i++)
1969     dep_list[i] = gomp_depends.get_kmp_depend(i);
1970 #if OMPT_SUPPORT
1971   OMPT_STORE_RETURN_ADDRESS(gtid);
1972 #endif
1973   __kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);
1974   KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid));
1975 }
1976 
1977 static inline void
1978 __kmp_GOMP_taskgroup_reduction_register(uintptr_t *data, kmp_taskgroup_t *tg,
1979                                         int nthreads,
1980                                         uintptr_t *allocated = nullptr) {
1981   KMP_ASSERT(data);
1982   KMP_ASSERT(nthreads > 0);
1983   // Have private copy pointers point to previously allocated
1984   // reduction data or allocate new data here
1985   if (allocated) {
1986     data[2] = allocated[2];
1987     data[6] = allocated[6];
1988   } else {
1989     data[2] = (uintptr_t)__kmp_allocate(nthreads * data[1]);
1990     data[6] = data[2] + (nthreads * data[1]);
1991   }
1992   if (tg)
1993     tg->gomp_data = data;
1994 }
1995 
1996 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(
1997     uintptr_t *data) {
1998   int gtid = __kmp_entry_gtid();
1999   KA_TRACE(20, ("GOMP_taskgroup_reduction_register: T#%d\n", gtid));
2000   kmp_info_t *thread = __kmp_threads[gtid];
2001   kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2002   int nthreads = thread->th.th_team_nproc;
2003   __kmp_GOMP_taskgroup_reduction_register(data, tg, nthreads);
2004 }
2005 
2006 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)(
2007     uintptr_t *data) {
2008   KA_TRACE(20,
2009            ("GOMP_taskgroup_reduction_unregister: T#%d\n", __kmp_get_gtid()));
2010   KMP_ASSERT(data && data[2]);
2011   __kmp_free((void *)data[2]);
2012 }
2013 
2014 // Search through reduction data and set ptrs[] elements
2015 // to proper privatized copy address
2016 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP)(size_t cnt,
2017                                                              size_t cntorig,
2018                                                              void **ptrs) {
2019   int gtid = __kmp_entry_gtid();
2020   KA_TRACE(20, ("GOMP_task_reduction_remap: T#%d\n", gtid));
2021   kmp_info_t *thread = __kmp_threads[gtid];
2022   kmp_int32 tid = __kmp_get_tid();
2023   for (size_t i = 0; i < cnt; ++i) {
2024     uintptr_t address = (uintptr_t)ptrs[i];
2025     void *propagated_address = NULL;
2026     void *mapped_address = NULL;
2027     // Check taskgroups reduce data
2028     kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2029     while (tg) {
2030       uintptr_t *gomp_data = tg->gomp_data;
2031       if (!gomp_data) {
2032         tg = tg->parent;
2033         continue;
2034       }
2035       // Check the shared addresses list
2036       size_t num_vars = (size_t)gomp_data[0];
2037       uintptr_t per_thread_size = gomp_data[1];
2038       uintptr_t reduce_data = gomp_data[2];
2039       uintptr_t end_reduce_data = gomp_data[6];
2040       for (size_t j = 0; j < num_vars; ++j) {
2041         uintptr_t *entry = gomp_data + 7 + 3 * j;
2042         if (entry[0] == address) {
2043           uintptr_t offset = entry[1];
2044           mapped_address =
2045               (void *)(reduce_data + tid * per_thread_size + offset);
2046           if (i < cntorig)
2047             propagated_address = (void *)entry[0];
2048           break;
2049         }
2050       }
2051       if (mapped_address)
2052         break;
2053       // Check if address is within privatized copies range
2054       if (!mapped_address && address >= reduce_data &&
2055           address < end_reduce_data) {
2056         uintptr_t offset = (address - reduce_data) % per_thread_size;
2057         mapped_address = (void *)(reduce_data + tid * per_thread_size + offset);
2058         if (i < cntorig) {
2059           for (size_t j = 0; j < num_vars; ++j) {
2060             uintptr_t *entry = gomp_data + 7 + 3 * j;
2061             if (entry[1] == offset) {
2062               propagated_address = (void *)entry[0];
2063               break;
2064             }
2065           }
2066         }
2067       }
2068       if (mapped_address)
2069         break;
2070       tg = tg->parent;
2071     }
2072     KMP_ASSERT(mapped_address);
2073     ptrs[i] = mapped_address;
2074     if (i < cntorig) {
2075       KMP_ASSERT(propagated_address);
2076       ptrs[cnt + i] = propagated_address;
2077     }
2078   }
2079 }
2080 
2081 static void __kmp_GOMP_init_reductions(int gtid, uintptr_t *data, int is_ws) {
2082   kmp_info_t *thr = __kmp_threads[gtid];
2083   kmp_team_t *team = thr->th.th_team;
2084   // First start a taskgroup
2085   __kmpc_taskgroup(NULL, gtid);
2086   // Then setup reduction data
2087   void *reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2088   if (reduce_data == NULL &&
2089       __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2090                                  (void *)1)) {
2091     // Single thread enters this block to initialize common reduction data
2092     KMP_DEBUG_ASSERT(reduce_data == NULL);
2093     __kmp_GOMP_taskgroup_reduction_register(data, NULL, thr->th.th_team_nproc);
2094     KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[is_ws], 0);
2095     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], (void *)data);
2096   } else {
2097     // Wait for task reduction initialization
2098     while ((reduce_data = KMP_ATOMIC_LD_ACQ(
2099                 &team->t.t_tg_reduce_data[is_ws])) == (void *)1) {
2100       KMP_CPU_PAUSE();
2101     }
2102     KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here
2103   }
2104   // For worksharing constructs, each thread has its own reduction structure.
2105   // Have each reduction structure point to same privatized copies of vars.
2106   // For parallel, each thread points to same reduction structure and privatized
2107   // copies of vars
2108   if (is_ws) {
2109     __kmp_GOMP_taskgroup_reduction_register(
2110         data, NULL, thr->th.th_team_nproc,
2111         (uintptr_t *)KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws]));
2112   }
2113   kmp_taskgroup_t *tg = thr->th.th_current_task->td_taskgroup;
2114   tg->gomp_data = data;
2115 }
2116 
2117 static unsigned
2118 __kmp_GOMP_par_reductions_microtask_wrapper(int *gtid, int *npr,
2119                                             void (*task)(void *), void *data) {
2120   kmp_info_t *thr = __kmp_threads[*gtid];
2121   kmp_team_t *team = thr->th.th_team;
2122   uintptr_t *reduce_data = *(uintptr_t **)data;
2123   __kmp_GOMP_init_reductions(*gtid, reduce_data, 0);
2124 
2125 #if OMPT_SUPPORT
2126   ompt_frame_t *ompt_frame;
2127   ompt_state_t enclosing_state;
2128 
2129   if (ompt_enabled.enabled) {
2130     // save enclosing task state; set current state for task
2131     enclosing_state = thr->th.ompt_thread_info.state;
2132     thr->th.ompt_thread_info.state = ompt_state_work_parallel;
2133 
2134     // set task frame
2135     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2136     ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2137   }
2138 #endif
2139 
2140   task(data);
2141 
2142 #if OMPT_SUPPORT
2143   if (ompt_enabled.enabled) {
2144     // clear task frame
2145     ompt_frame->exit_frame = ompt_data_none;
2146 
2147     // restore enclosing state
2148     thr->th.ompt_thread_info.state = enclosing_state;
2149   }
2150 #endif
2151   __kmpc_end_taskgroup(NULL, *gtid);
2152   // if last thread out, then reset the team's reduce data
2153   // the GOMP_taskgroup_reduction_unregister() function will deallocate
2154   // private copies after reduction calculations take place.
2155   int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[0]);
2156   if (count == thr->th.th_team_nproc - 1) {
2157     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[0], NULL);
2158     KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[0], 0);
2159   }
2160   return (unsigned)thr->th.th_team_nproc;
2161 }
2162 
2163 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS)(
2164     void (*task)(void *), void *data, unsigned num_threads,
2165     unsigned int flags) {
2166   MKLOC(loc, "GOMP_parallel_reductions");
2167   int gtid = __kmp_entry_gtid();
2168   KA_TRACE(20, ("GOMP_parallel_reductions: T#%d\n", gtid));
2169   __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
2170                        (microtask_t)__kmp_GOMP_par_reductions_microtask_wrapper,
2171                        2, task, data);
2172   unsigned retval =
2173       __kmp_GOMP_par_reductions_microtask_wrapper(&gtid, NULL, task, data);
2174   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
2175   KA_TRACE(20, ("GOMP_parallel_reductions exit: T#%d\n", gtid));
2176   return retval;
2177 }
2178 
2179 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_START)(
2180     long start, long end, long incr, long sched, long chunk_size, long *istart,
2181     long *iend, uintptr_t *reductions, void **mem) {
2182   int status = 0;
2183   int gtid = __kmp_entry_gtid();
2184   KA_TRACE(20, ("GOMP_loop_start: T#%d, reductions: %p\n", gtid, reductions));
2185   if (reductions)
2186     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2187   if (mem)
2188     KMP_FATAL(GompFeatureNotSupported, "scan");
2189   if (istart == NULL)
2190     return true;
2191   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2192   long monotonic = sched & MONOTONIC_FLAG;
2193   sched &= ~MONOTONIC_FLAG;
2194   if (sched == 0) {
2195     if (monotonic)
2196       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START)(
2197           start, end, incr, istart, iend);
2198     else
2199       status = KMP_EXPAND_NAME(
2200           KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START)(
2201           start, end, incr, istart, iend);
2202   } else if (sched == 1) {
2203     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START)(
2204         start, end, incr, chunk_size, istart, iend);
2205   } else if (sched == 2) {
2206     if (monotonic)
2207       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START)(
2208           start, end, incr, chunk_size, istart, iend);
2209     else
2210       status =
2211           KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START)(
2212               start, end, incr, chunk_size, istart, iend);
2213   } else if (sched == 3) {
2214     if (monotonic)
2215       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START)(
2216           start, end, incr, chunk_size, istart, iend);
2217     else
2218       status =
2219           KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START)(
2220               start, end, incr, chunk_size, istart, iend);
2221   } else if (sched == 4) {
2222     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START)(
2223         start, end, incr, istart, iend);
2224   } else {
2225     KMP_ASSERT(0);
2226   }
2227   return status;
2228 }
2229 
2230 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_START)(
2231     bool up, unsigned long long start, unsigned long long end,
2232     unsigned long long incr, long sched, unsigned long long chunk_size,
2233     unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
2234     void **mem) {
2235   int status = 0;
2236   int gtid = __kmp_entry_gtid();
2237   KA_TRACE(20,
2238            ("GOMP_loop_ull_start: T#%d, reductions: %p\n", gtid, reductions));
2239   if (reductions)
2240     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2241   if (mem)
2242     KMP_FATAL(GompFeatureNotSupported, "scan");
2243   if (istart == NULL)
2244     return true;
2245   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2246   long monotonic = sched & MONOTONIC_FLAG;
2247   sched &= ~MONOTONIC_FLAG;
2248   if (sched == 0) {
2249     if (monotonic)
2250       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START)(
2251           up, start, end, incr, istart, iend);
2252     else
2253       status = KMP_EXPAND_NAME(
2254           KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START)(
2255           up, start, end, incr, istart, iend);
2256   } else if (sched == 1) {
2257     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START)(
2258         up, start, end, incr, chunk_size, istart, iend);
2259   } else if (sched == 2) {
2260     if (monotonic)
2261       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START)(
2262           up, start, end, incr, chunk_size, istart, iend);
2263     else
2264       status = KMP_EXPAND_NAME(
2265           KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START)(
2266           up, start, end, incr, chunk_size, istart, iend);
2267   } else if (sched == 3) {
2268     if (monotonic)
2269       status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START)(
2270           up, start, end, incr, chunk_size, istart, iend);
2271     else
2272       status =
2273           KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START)(
2274               up, start, end, incr, chunk_size, istart, iend);
2275   } else if (sched == 4) {
2276     status =
2277         KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START)(
2278             up, start, end, incr, istart, iend);
2279   } else {
2280     KMP_ASSERT(0);
2281   }
2282   return status;
2283 }
2284 
2285 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_START)(
2286     unsigned ncounts, long *counts, long sched, long chunk_size, long *istart,
2287     long *iend, uintptr_t *reductions, void **mem) {
2288   int status = 0;
2289   int gtid = __kmp_entry_gtid();
2290   KA_TRACE(20, ("GOMP_loop_doacross_start: T#%d, reductions: %p\n", gtid,
2291                 reductions));
2292   if (reductions)
2293     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2294   if (mem)
2295     KMP_FATAL(GompFeatureNotSupported, "scan");
2296   if (istart == NULL)
2297     return true;
2298   // Ignore any monotonic flag
2299   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2300   sched &= ~MONOTONIC_FLAG;
2301   if (sched == 0) {
2302     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START)(
2303         ncounts, counts, istart, iend);
2304   } else if (sched == 1) {
2305     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START)(
2306         ncounts, counts, chunk_size, istart, iend);
2307   } else if (sched == 2) {
2308     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START)(
2309         ncounts, counts, chunk_size, istart, iend);
2310   } else if (sched == 3) {
2311     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START)(
2312         ncounts, counts, chunk_size, istart, iend);
2313   } else {
2314     KMP_ASSERT(0);
2315   }
2316   return status;
2317 }
2318 
2319 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START)(
2320     unsigned ncounts, unsigned long long *counts, long sched,
2321     unsigned long long chunk_size, unsigned long long *istart,
2322     unsigned long long *iend, uintptr_t *reductions, void **mem) {
2323   int status = 0;
2324   int gtid = __kmp_entry_gtid();
2325   KA_TRACE(20, ("GOMP_loop_ull_doacross_start: T#%d, reductions: %p\n", gtid,
2326                 reductions));
2327   if (reductions)
2328     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2329   if (mem)
2330     KMP_FATAL(GompFeatureNotSupported, "scan");
2331   if (istart == NULL)
2332     return true;
2333   // Ignore any monotonic flag
2334   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2335   sched &= ~MONOTONIC_FLAG;
2336   if (sched == 0) {
2337     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START)(
2338         ncounts, counts, istart, iend);
2339   } else if (sched == 1) {
2340     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START)(
2341         ncounts, counts, chunk_size, istart, iend);
2342   } else if (sched == 2) {
2343     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START)(
2344         ncounts, counts, chunk_size, istart, iend);
2345   } else if (sched == 3) {
2346     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START)(
2347         ncounts, counts, chunk_size, istart, iend);
2348   } else {
2349     KMP_ASSERT(0);
2350   }
2351   return status;
2352 }
2353 
2354 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_START)(
2355     long start, long end, long incr, long sched, long chunk_size, long *istart,
2356     long *iend, uintptr_t *reductions, void **mem) {
2357   int status = 0;
2358   int gtid = __kmp_entry_gtid();
2359   KA_TRACE(20, ("GOMP_loop_ordered_start: T#%d, reductions: %p\n", gtid,
2360                 reductions));
2361   if (reductions)
2362     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2363   if (mem)
2364     KMP_FATAL(GompFeatureNotSupported, "scan");
2365   if (istart == NULL)
2366     return true;
2367   // Ignore any monotonic flag
2368   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2369   sched &= ~MONOTONIC_FLAG;
2370   if (sched == 0) {
2371     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START)(
2372         start, end, incr, istart, iend);
2373   } else if (sched == 1) {
2374     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START)(
2375         start, end, incr, chunk_size, istart, iend);
2376   } else if (sched == 2) {
2377     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START)(
2378         start, end, incr, chunk_size, istart, iend);
2379   } else if (sched == 3) {
2380     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START)(
2381         start, end, incr, chunk_size, istart, iend);
2382   } else {
2383     KMP_ASSERT(0);
2384   }
2385   return status;
2386 }
2387 
2388 bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START)(
2389     bool up, unsigned long long start, unsigned long long end,
2390     unsigned long long incr, long sched, unsigned long long chunk_size,
2391     unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,
2392     void **mem) {
2393   int status = 0;
2394   int gtid = __kmp_entry_gtid();
2395   KA_TRACE(20, ("GOMP_loop_ull_ordered_start: T#%d, reductions: %p\n", gtid,
2396                 reductions));
2397   if (reductions)
2398     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2399   if (mem)
2400     KMP_FATAL(GompFeatureNotSupported, "scan");
2401   if (istart == NULL)
2402     return true;
2403   // Ignore any monotonic flag
2404   const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);
2405   sched &= ~MONOTONIC_FLAG;
2406   if (sched == 0) {
2407     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START)(
2408         up, start, end, incr, istart, iend);
2409   } else if (sched == 1) {
2410     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START)(
2411         up, start, end, incr, chunk_size, istart, iend);
2412   } else if (sched == 2) {
2413     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START)(
2414         up, start, end, incr, chunk_size, istart, iend);
2415   } else if (sched == 3) {
2416     status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START)(
2417         up, start, end, incr, chunk_size, istart, iend);
2418   } else {
2419     KMP_ASSERT(0);
2420   }
2421   return status;
2422 }
2423 
2424 unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS2_START)(
2425     unsigned count, uintptr_t *reductions, void **mem) {
2426   int gtid = __kmp_entry_gtid();
2427   KA_TRACE(20,
2428            ("GOMP_sections2_start: T#%d, reductions: %p\n", gtid, reductions));
2429   if (reductions)
2430     __kmp_GOMP_init_reductions(gtid, reductions, 1);
2431   if (mem)
2432     KMP_FATAL(GompFeatureNotSupported, "scan");
2433   return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(count);
2434 }
2435 
2436 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)(
2437     bool cancelled) {
2438   int gtid = __kmp_get_gtid();
2439   MKLOC(loc, "GOMP_workshare_task_reduction_unregister");
2440   KA_TRACE(20, ("GOMP_workshare_task_reduction_unregister: T#%d\n", gtid));
2441   kmp_info_t *thr = __kmp_threads[gtid];
2442   kmp_team_t *team = thr->th.th_team;
2443   __kmpc_end_taskgroup(NULL, gtid);
2444   // If last thread out of workshare, then reset the team's reduce data
2445   // the GOMP_taskgroup_reduction_unregister() function will deallocate
2446   // private copies after reduction calculations take place.
2447   int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[1]);
2448   if (count == thr->th.th_team_nproc - 1) {
2449     KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)
2450     ((uintptr_t *)KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[1]));
2451     KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[1], NULL);
2452     KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[1], 0);
2453   }
2454   if (!cancelled) {
2455     __kmpc_barrier(&loc, gtid);
2456   }
2457 }
2458 
2459 /* The following sections of code create aliases for the GOMP_* functions, then
2460    create versioned symbols using the assembler directive .symver. This is only
2461    pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
2462    kmp_os.h  */
2463 
2464 #ifdef KMP_USE_VERSION_SYMBOLS
2465 // GOMP_1.0 versioned symbols
2466 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");
2467 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");
2468 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");
2469 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");
2470 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");
2471 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");
2472 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");
2473 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");
2474 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");
2475 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");
2476 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");
2477 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");
2478 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");
2479 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");
2480 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10,
2481                    "GOMP_1.0");
2482 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");
2483 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");
2484 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");
2485 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10,
2486                    "GOMP_1.0");
2487 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");
2488 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");
2489 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");
2490 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");
2491 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");
2492 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");
2493 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");
2494 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");
2495 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");
2496 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10,
2497                    "GOMP_1.0");
2498 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10,
2499                    "GOMP_1.0");
2500 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10,
2501                    "GOMP_1.0");
2502 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10,
2503                    "GOMP_1.0");
2504 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");
2505 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");
2506 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");
2507 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");
2508 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");
2509 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");
2510 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");
2511 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");
2512 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");
2513 
2514 // GOMP_2.0 versioned symbols
2515 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");
2516 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");
2517 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");
2518 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");
2519 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");
2520 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");
2521 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20,
2522                    "GOMP_2.0");
2523 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20,
2524                    "GOMP_2.0");
2525 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20,
2526                    "GOMP_2.0");
2527 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20,
2528                    "GOMP_2.0");
2529 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20,
2530                    "GOMP_2.0");
2531 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20,
2532                    "GOMP_2.0");
2533 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20,
2534                    "GOMP_2.0");
2535 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20,
2536                    "GOMP_2.0");
2537 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");
2538 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");
2539 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");
2540 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");
2541 
2542 // GOMP_3.0 versioned symbols
2543 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");
2544 
2545 // GOMP_4.0 versioned symbols
2546 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0");
2547 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0");
2548 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0");
2549 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0");
2550 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0");
2551 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0");
2552 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0");
2553 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0");
2554 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0");
2555 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0");
2556 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0");
2557 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0");
2558 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0");
2559 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0");
2560 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0");
2561 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0");
2562 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0");
2563 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0");
2564 
2565 // GOMP_4.5 versioned symbols
2566 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5");
2567 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5");
2568 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5");
2569 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5");
2570 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45,
2571                    "GOMP_4.5");
2572 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45,
2573                    "GOMP_4.5");
2574 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45,
2575                    "GOMP_4.5");
2576 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45,
2577                    "GOMP_4.5");
2578 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5");
2579 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5");
2580 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45,
2581                    "GOMP_4.5");
2582 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45,
2583                    "GOMP_4.5");
2584 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,
2585                    "GOMP_4.5");
2586 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,
2587                    "GOMP_4.5");
2588 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,
2589                    "GOMP_4.5");
2590 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,
2591                    "GOMP_4.5");
2592 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,
2593                    "GOMP_4.5");
2594 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,
2595                    "GOMP_4.5");
2596 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,
2597                    "GOMP_4.5");
2598 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,
2599                    "GOMP_4.5");
2600 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,
2601                    "GOMP_4.5");
2602 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,
2603                    "GOMP_4.5");
2604 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
2605                    "GOMP_4.5");
2606 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
2607                    "GOMP_4.5");
2608 
2609 // GOMP_5.0 versioned symbols
2610 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 50,
2611                    "GOMP_5.0");
2612 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START, 50,
2613                    "GOMP_5.0");
2614 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT, 50,
2615                    "GOMP_5.0");
2616 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, 50,
2617                    "GOMP_5.0");
2618 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT,
2619                    50, "GOMP_5.0");
2620 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START,
2621                    50, "GOMP_5.0");
2622 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT, 50,
2623                    "GOMP_5.0");
2624 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START, 50,
2625                    "GOMP_5.0");
2626 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,
2627                    "GOMP_5.0");
2628 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
2629                    50, "GOMP_5.0");
2630 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");
2631 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT_DEPEND, 50, "GOMP_5.0");
2632 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER, 50,
2633                    "GOMP_5.0");
2634 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER, 50,
2635                    "GOMP_5.0");
2636 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP, 50, "GOMP_5.0");
2637 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS, 50, "GOMP_5.0");
2638 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_START, 50, "GOMP_5.0");
2639 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_START, 50, "GOMP_5.0");
2640 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_START, 50, "GOMP_5.0");
2641 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START, 50, "GOMP_5.0");
2642 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_START, 50, "GOMP_5.0");
2643 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0");
2644 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0");
2645 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50,
2646                    "GOMP_5.0");
2647 #endif // KMP_USE_VERSION_SYMBOLS
2648 
2649 #ifdef __cplusplus
2650 } // extern "C"
2651 #endif // __cplusplus
2652