15e8470afSJim Cownie /*
25e8470afSJim Cownie  * kmp_taskdeps.cpp
35e8470afSJim Cownie  */
45e8470afSJim Cownie 
55e8470afSJim Cownie //===----------------------------------------------------------------------===//
65e8470afSJim Cownie //
757b08b09SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
857b08b09SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
957b08b09SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
105e8470afSJim Cownie //
115e8470afSJim Cownie //===----------------------------------------------------------------------===//
125e8470afSJim Cownie 
135e8470afSJim Cownie //#define KMP_SUPPORT_GRAPH_OUTPUT 1
145e8470afSJim Cownie 
155e8470afSJim Cownie #include "kmp.h"
165e8470afSJim Cownie #include "kmp_io.h"
174cc4bb4cSJim Cownie #include "kmp_wait_release.h"
18cf27e31bSJonathan Peyton #include "kmp_taskdeps.h"
1982e94a59SJoachim Protze #if OMPT_SUPPORT
2082e94a59SJoachim Protze #include "ompt-specific.h"
2182e94a59SJoachim Protze #endif
225e8470afSJim Cownie 
233041982dSJonathan Peyton // TODO: Improve memory allocation? keep a list of pre-allocated structures?
243041982dSJonathan Peyton // allocate in blocks? re-use list finished list entries?
255e8470afSJim Cownie // TODO: don't use atomic ref counters for stack-allocated nodes.
265e8470afSJim Cownie // TODO: find an alternate to atomic refs for heap-allocated nodes?
275e8470afSJim Cownie // TODO: Finish graph output support
283041982dSJonathan Peyton // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
293041982dSJonathan Peyton // runtime locks
305e8470afSJim Cownie // TODO: Any ITT support needed?
315e8470afSJim Cownie 
325e8470afSJim Cownie #ifdef KMP_SUPPORT_GRAPH_OUTPUT
3337e2ef54SJonathan Peyton static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0);
345e8470afSJim Cownie #endif
355e8470afSJim Cownie 
__kmp_init_node(kmp_depnode_t * node)363041982dSJonathan Peyton static void __kmp_init_node(kmp_depnode_t *node) {
375e8470afSJim Cownie   node->dn.successors = NULL;
3842016791SKazuaki Ishizaki   node->dn.task = NULL; // will point to the right task
39c3344345SAndrey Churbanov   // once dependences have been processed
40c3344345SAndrey Churbanov   for (int i = 0; i < MAX_MTX_DEPS; ++i)
41c3344345SAndrey Churbanov     node->dn.mtx_locks[i] = NULL;
42c3344345SAndrey Churbanov   node->dn.mtx_num_locks = 0;
435e8470afSJim Cownie   __kmp_init_lock(&node->dn.lock);
44c3344345SAndrey Churbanov   KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1); // init creates the first reference
455e8470afSJim Cownie #ifdef KMP_SUPPORT_GRAPH_OUTPUT
4637e2ef54SJonathan Peyton   node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
475e8470afSJim Cownie #endif
48*52d0ef3cSAndreyChurbanov #if USE_ITT_BUILD && USE_ITT_NOTIFY
49*52d0ef3cSAndreyChurbanov   __itt_sync_create(node, "OMP task dep node", NULL, 0);
50*52d0ef3cSAndreyChurbanov #endif
515e8470afSJim Cownie }
525e8470afSJim Cownie 
__kmp_node_ref(kmp_depnode_t * node)533041982dSJonathan Peyton static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
5437e2ef54SJonathan Peyton   KMP_ATOMIC_INC(&node->dn.nrefs);
555e8470afSJim Cownie   return node;
565e8470afSJim Cownie }
575e8470afSJim Cownie 
583041982dSJonathan Peyton enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
595e8470afSJim Cownie 
60a1639b9bSAndrey Churbanov size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
61a1639b9bSAndrey Churbanov const size_t MAX_GEN = 8;
62a1639b9bSAndrey Churbanov 
__kmp_dephash_hash(kmp_intptr_t addr,size_t hsize)636b316febSTerry Wilmarth static inline size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
643041982dSJonathan Peyton   // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
653041982dSJonathan Peyton   // m_num_sets );
667d45451aSJonathan Peyton   return ((addr >> 6) ^ (addr >> 2)) % hsize;
675e8470afSJim Cownie }
685e8470afSJim Cownie 
__kmp_dephash_extend(kmp_info_t * thread,kmp_dephash_t * current_dephash)69a1639b9bSAndrey Churbanov static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
70a1639b9bSAndrey Churbanov                                            kmp_dephash_t *current_dephash) {
71a1639b9bSAndrey Churbanov   kmp_dephash_t *h;
72a1639b9bSAndrey Churbanov 
73a1639b9bSAndrey Churbanov   size_t gen = current_dephash->generation + 1;
74a1639b9bSAndrey Churbanov   if (gen >= MAX_GEN)
75a1639b9bSAndrey Churbanov     return current_dephash;
76a1639b9bSAndrey Churbanov   size_t new_size = sizes[gen];
77a1639b9bSAndrey Churbanov 
786b316febSTerry Wilmarth   size_t size_to_allocate =
79a1639b9bSAndrey Churbanov       new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
80a1639b9bSAndrey Churbanov 
81a1639b9bSAndrey Churbanov #if USE_FAST_MEMORY
82a1639b9bSAndrey Churbanov   h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
83a1639b9bSAndrey Churbanov #else
84a1639b9bSAndrey Churbanov   h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
85a1639b9bSAndrey Churbanov #endif
86a1639b9bSAndrey Churbanov 
87a1639b9bSAndrey Churbanov   h->size = new_size;
88a1639b9bSAndrey Churbanov   h->nelements = current_dephash->nelements;
89a1639b9bSAndrey Churbanov   h->buckets = (kmp_dephash_entry **)(h + 1);
90a1639b9bSAndrey Churbanov   h->generation = gen;
91be29d928SAndreyChurbanov   h->nconflicts = 0;
92d40108e0SAndreyChurbanov   h->last_all = current_dephash->last_all;
93edbcc17bSJoseph Schuchart 
94edbcc17bSJoseph Schuchart   // make sure buckets are properly initialized
95edbcc17bSJoseph Schuchart   for (size_t i = 0; i < new_size; i++) {
96edbcc17bSJoseph Schuchart     h->buckets[i] = NULL;
97edbcc17bSJoseph Schuchart   }
98edbcc17bSJoseph Schuchart 
99a1639b9bSAndrey Churbanov   // insert existing elements in the new table
100a1639b9bSAndrey Churbanov   for (size_t i = 0; i < current_dephash->size; i++) {
101be29d928SAndreyChurbanov     kmp_dephash_entry_t *next, *entry;
102be29d928SAndreyChurbanov     for (entry = current_dephash->buckets[i]; entry; entry = next) {
103a1639b9bSAndrey Churbanov       next = entry->next_in_bucket;
104a1639b9bSAndrey Churbanov       // Compute the new hash using the new size, and insert the entry in
105a1639b9bSAndrey Churbanov       // the new bucket.
1066b316febSTerry Wilmarth       size_t new_bucket = __kmp_dephash_hash(entry->addr, h->size);
107be29d928SAndreyChurbanov       entry->next_in_bucket = h->buckets[new_bucket];
108a1639b9bSAndrey Churbanov       if (entry->next_in_bucket) {
109a1639b9bSAndrey Churbanov         h->nconflicts++;
110a1639b9bSAndrey Churbanov       }
111a1639b9bSAndrey Churbanov       h->buckets[new_bucket] = entry;
112a1639b9bSAndrey Churbanov     }
113a1639b9bSAndrey Churbanov   }
114a1639b9bSAndrey Churbanov 
115a1639b9bSAndrey Churbanov   // Free old hash table
116a1639b9bSAndrey Churbanov #if USE_FAST_MEMORY
117a1639b9bSAndrey Churbanov   __kmp_fast_free(thread, current_dephash);
118a1639b9bSAndrey Churbanov #else
119a1639b9bSAndrey Churbanov   __kmp_thread_free(thread, current_dephash);
120a1639b9bSAndrey Churbanov #endif
121a1639b9bSAndrey Churbanov 
122a1639b9bSAndrey Churbanov   return h;
123a1639b9bSAndrey Churbanov }
124a1639b9bSAndrey Churbanov 
__kmp_dephash_create(kmp_info_t * thread,kmp_taskdata_t * current_task)1253041982dSJonathan Peyton static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
1263041982dSJonathan Peyton                                            kmp_taskdata_t *current_task) {
1275e8470afSJim Cownie   kmp_dephash_t *h;
1285e8470afSJim Cownie 
1297d45451aSJonathan Peyton   size_t h_size;
1307d45451aSJonathan Peyton 
1317d45451aSJonathan Peyton   if (current_task->td_flags.tasktype == TASK_IMPLICIT)
1327d45451aSJonathan Peyton     h_size = KMP_DEPHASH_MASTER_SIZE;
1337d45451aSJonathan Peyton   else
1347d45451aSJonathan Peyton     h_size = KMP_DEPHASH_OTHER_SIZE;
1357d45451aSJonathan Peyton 
1366b316febSTerry Wilmarth   size_t size = h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
1375e8470afSJim Cownie 
1385e8470afSJim Cownie #if USE_FAST_MEMORY
1395e8470afSJim Cownie   h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
1405e8470afSJim Cownie #else
1415e8470afSJim Cownie   h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
1425e8470afSJim Cownie #endif
1437d45451aSJonathan Peyton   h->size = h_size;
1445e8470afSJim Cownie 
145a1639b9bSAndrey Churbanov   h->generation = 0;
1465e8470afSJim Cownie   h->nelements = 0;
1477d45451aSJonathan Peyton   h->nconflicts = 0;
1485e8470afSJim Cownie   h->buckets = (kmp_dephash_entry **)(h + 1);
149d40108e0SAndreyChurbanov   h->last_all = NULL;
1505e8470afSJim Cownie 
1517d45451aSJonathan Peyton   for (size_t i = 0; i < h_size; i++)
1525e8470afSJim Cownie     h->buckets[i] = 0;
1535e8470afSJim Cownie 
1545e8470afSJim Cownie   return h;
1555e8470afSJim Cownie }
1565e8470afSJim Cownie 
__kmp_dephash_find(kmp_info_t * thread,kmp_dephash_t ** hash,kmp_intptr_t addr)157309b00a4SShilei Tian static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
158309b00a4SShilei Tian                                              kmp_dephash_t **hash,
159309b00a4SShilei Tian                                              kmp_intptr_t addr) {
160a1639b9bSAndrey Churbanov   kmp_dephash_t *h = *hash;
161309b00a4SShilei Tian   if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
162a1639b9bSAndrey Churbanov     *hash = __kmp_dephash_extend(thread, h);
163a1639b9bSAndrey Churbanov     h = *hash;
164a1639b9bSAndrey Churbanov   }
1656b316febSTerry Wilmarth   size_t bucket = __kmp_dephash_hash(addr, h->size);
1665e8470afSJim Cownie 
1675e8470afSJim Cownie   kmp_dephash_entry_t *entry;
1685e8470afSJim Cownie   for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
1693041982dSJonathan Peyton     if (entry->addr == addr)
1703041982dSJonathan Peyton       break;
1715e8470afSJim Cownie 
1725e8470afSJim Cownie   if (entry == NULL) {
1735e8470afSJim Cownie // create entry. This is only done by one thread so no locking required
1745e8470afSJim Cownie #if USE_FAST_MEMORY
1753041982dSJonathan Peyton     entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
1763041982dSJonathan Peyton         thread, sizeof(kmp_dephash_entry_t));
1775e8470afSJim Cownie #else
1783041982dSJonathan Peyton     entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
1793041982dSJonathan Peyton         thread, sizeof(kmp_dephash_entry_t));
1805e8470afSJim Cownie #endif
1815e8470afSJim Cownie     entry->addr = addr;
182d40108e0SAndreyChurbanov     if (!h->last_all) // no predecessor task with omp_all_memory dependence
1835e8470afSJim Cownie       entry->last_out = NULL;
184d40108e0SAndreyChurbanov     else // else link the omp_all_memory depnode to the new entry
185d40108e0SAndreyChurbanov       entry->last_out = __kmp_node_ref(h->last_all);
186610fea65SAndreyChurbanov     entry->last_set = NULL;
187610fea65SAndreyChurbanov     entry->prev_set = NULL;
188610fea65SAndreyChurbanov     entry->last_flag = 0;
189c3344345SAndrey Churbanov     entry->mtx_lock = NULL;
1905e8470afSJim Cownie     entry->next_in_bucket = h->buckets[bucket];
1915e8470afSJim Cownie     h->buckets[bucket] = entry;
1925e8470afSJim Cownie     h->nelements++;
1933041982dSJonathan Peyton     if (entry->next_in_bucket)
1943041982dSJonathan Peyton       h->nconflicts++;
1955e8470afSJim Cownie   }
1965e8470afSJim Cownie   return entry;
1975e8470afSJim Cownie }
1985e8470afSJim Cownie 
__kmp_add_node(kmp_info_t * thread,kmp_depnode_list_t * list,kmp_depnode_t * node)1993041982dSJonathan Peyton static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
2003041982dSJonathan Peyton                                           kmp_depnode_list_t *list,
2013041982dSJonathan Peyton                                           kmp_depnode_t *node) {
2025e8470afSJim Cownie   kmp_depnode_list_t *new_head;
2035e8470afSJim Cownie 
2045e8470afSJim Cownie #if USE_FAST_MEMORY
2053041982dSJonathan Peyton   new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
2063041982dSJonathan Peyton       thread, sizeof(kmp_depnode_list_t));
2075e8470afSJim Cownie #else
2083041982dSJonathan Peyton   new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
2093041982dSJonathan Peyton       thread, sizeof(kmp_depnode_list_t));
2105e8470afSJim Cownie #endif
2115e8470afSJim Cownie 
2125e8470afSJim Cownie   new_head->node = __kmp_node_ref(node);
2135e8470afSJim Cownie   new_head->next = list;
2145e8470afSJim Cownie 
2155e8470afSJim Cownie   return new_head;
2165e8470afSJim Cownie }
2175e8470afSJim Cownie 
__kmp_track_dependence(kmp_int32 gtid,kmp_depnode_t * source,kmp_depnode_t * sink,kmp_task_t * sink_task)21847cb8a0fSJoachim Protze static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
2193041982dSJonathan Peyton                                           kmp_depnode_t *sink,
2203041982dSJonathan Peyton                                           kmp_task_t *sink_task) {
2215e8470afSJim Cownie #ifdef KMP_SUPPORT_GRAPH_OUTPUT
2225e8470afSJim Cownie   kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
223cf1ddae7SAndreyChurbanov   // do not use sink->dn.task as that is only filled after the dependences
22420236611SJonas Hahnfeld   // are already processed!
22520236611SJonas Hahnfeld   kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
2265e8470afSJim Cownie 
2273041982dSJonathan Peyton   __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
2283041982dSJonathan Peyton                task_source->td_ident->psource, sink->dn.id,
2293041982dSJonathan Peyton                task_sink->td_ident->psource);
2305e8470afSJim Cownie #endif
23182e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
23282e94a59SJoachim Protze   /* OMPT tracks dependences between task (a=source, b=sink) in which
23382e94a59SJoachim Protze      task a blocks the execution of b through the ompt_new_dependence_callback
23482e94a59SJoachim Protze      */
23582e94a59SJoachim Protze   if (ompt_enabled.ompt_callback_task_dependence) {
23639b68624SJonas Hahnfeld     kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
23747cb8a0fSJoachim Protze     ompt_data_t *sink_data;
23847cb8a0fSJoachim Protze     if (sink_task)
23947cb8a0fSJoachim Protze       sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data);
24047cb8a0fSJoachim Protze     else
24147cb8a0fSJoachim Protze       sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data;
24239b68624SJonas Hahnfeld 
24382e94a59SJoachim Protze     ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
24447cb8a0fSJoachim Protze         &(task_source->ompt_task_info.task_data), sink_data);
24539b68624SJonas Hahnfeld   }
24682e94a59SJoachim Protze #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
2475e8470afSJim Cownie }
2485e8470afSJim Cownie 
249c3344345SAndrey Churbanov static inline kmp_int32
__kmp_depnode_link_successor(kmp_int32 gtid,kmp_info_t * thread,kmp_task_t * task,kmp_depnode_t * node,kmp_depnode_list_t * plist)250c3344345SAndrey Churbanov __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
251c3344345SAndrey Churbanov                              kmp_task_t *task, kmp_depnode_t *node,
252c3344345SAndrey Churbanov                              kmp_depnode_list_t *plist) {
253c3344345SAndrey Churbanov   if (!plist)
254c3344345SAndrey Churbanov     return 0;
255c3344345SAndrey Churbanov   kmp_int32 npredecessors = 0;
256c3344345SAndrey Churbanov   // link node as successor of list elements
257c3344345SAndrey Churbanov   for (kmp_depnode_list_t *p = plist; p; p = p->next) {
258c3344345SAndrey Churbanov     kmp_depnode_t *dep = p->node;
259c3344345SAndrey Churbanov     if (dep->dn.task) {
260c3344345SAndrey Churbanov       KMP_ACQUIRE_DEPNODE(gtid, dep);
261c3344345SAndrey Churbanov       if (dep->dn.task) {
26247cb8a0fSJoachim Protze         __kmp_track_dependence(gtid, dep, node, task);
263c3344345SAndrey Churbanov         dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
264c3344345SAndrey Churbanov         KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
265c3344345SAndrey Churbanov                       "%p\n",
266c3344345SAndrey Churbanov                       gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
267c3344345SAndrey Churbanov                       KMP_TASK_TO_TASKDATA(task)));
268c3344345SAndrey Churbanov         npredecessors++;
269c3344345SAndrey Churbanov       }
270c3344345SAndrey Churbanov       KMP_RELEASE_DEPNODE(gtid, dep);
271c3344345SAndrey Churbanov     }
272c3344345SAndrey Churbanov   }
273c3344345SAndrey Churbanov   return npredecessors;
274c3344345SAndrey Churbanov }
275c3344345SAndrey Churbanov 
__kmp_depnode_link_successor(kmp_int32 gtid,kmp_info_t * thread,kmp_task_t * task,kmp_depnode_t * source,kmp_depnode_t * sink)276c3344345SAndrey Churbanov static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
277c3344345SAndrey Churbanov                                                      kmp_info_t *thread,
278c3344345SAndrey Churbanov                                                      kmp_task_t *task,
279c3344345SAndrey Churbanov                                                      kmp_depnode_t *source,
280c3344345SAndrey Churbanov                                                      kmp_depnode_t *sink) {
281c3344345SAndrey Churbanov   if (!sink)
282c3344345SAndrey Churbanov     return 0;
283c3344345SAndrey Churbanov   kmp_int32 npredecessors = 0;
284c3344345SAndrey Churbanov   if (sink->dn.task) {
285c3344345SAndrey Churbanov     // synchronously add source to sink' list of successors
286c3344345SAndrey Churbanov     KMP_ACQUIRE_DEPNODE(gtid, sink);
287c3344345SAndrey Churbanov     if (sink->dn.task) {
28847cb8a0fSJoachim Protze       __kmp_track_dependence(gtid, sink, source, task);
289c3344345SAndrey Churbanov       sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
290c3344345SAndrey Churbanov       KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
291c3344345SAndrey Churbanov                     "%p\n",
292c3344345SAndrey Churbanov                     gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
293c3344345SAndrey Churbanov                     KMP_TASK_TO_TASKDATA(task)));
294c3344345SAndrey Churbanov       npredecessors++;
295c3344345SAndrey Churbanov     }
296c3344345SAndrey Churbanov     KMP_RELEASE_DEPNODE(gtid, sink);
297c3344345SAndrey Churbanov   }
298c3344345SAndrey Churbanov   return npredecessors;
299c3344345SAndrey Churbanov }
300c3344345SAndrey Churbanov 
301d40108e0SAndreyChurbanov static inline kmp_int32
__kmp_process_dep_all(kmp_int32 gtid,kmp_depnode_t * node,kmp_dephash_t * h,bool dep_barrier,kmp_task_t * task)302d40108e0SAndreyChurbanov __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
303d40108e0SAndreyChurbanov                       bool dep_barrier, kmp_task_t *task) {
304d40108e0SAndreyChurbanov   KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
305d40108e0SAndreyChurbanov                 "dep_barrier = %d\n",
306d40108e0SAndreyChurbanov                 gtid, dep_barrier));
307d40108e0SAndreyChurbanov   kmp_info_t *thread = __kmp_threads[gtid];
308d40108e0SAndreyChurbanov   kmp_int32 npredecessors = 0;
309d40108e0SAndreyChurbanov 
310d40108e0SAndreyChurbanov   // process previous omp_all_memory node if any
311d40108e0SAndreyChurbanov   npredecessors +=
312d40108e0SAndreyChurbanov       __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
313d40108e0SAndreyChurbanov   __kmp_node_deref(thread, h->last_all);
314d40108e0SAndreyChurbanov   if (!dep_barrier) {
315d40108e0SAndreyChurbanov     h->last_all = __kmp_node_ref(node);
316d40108e0SAndreyChurbanov   } else {
317d40108e0SAndreyChurbanov     // if this is a sync point in the serial sequence, then the previous
318d40108e0SAndreyChurbanov     // outputs are guaranteed to be completed after the execution of this
319d40108e0SAndreyChurbanov     // task so the previous output nodes can be cleared.
320d40108e0SAndreyChurbanov     h->last_all = NULL;
321d40108e0SAndreyChurbanov   }
322d40108e0SAndreyChurbanov 
323d40108e0SAndreyChurbanov   // process all regular dependences
324d40108e0SAndreyChurbanov   for (size_t i = 0; i < h->size; i++) {
325d40108e0SAndreyChurbanov     kmp_dephash_entry_t *info = h->buckets[i];
326d40108e0SAndreyChurbanov     if (!info) // skip empty slots in dephash
327d40108e0SAndreyChurbanov       continue;
328d40108e0SAndreyChurbanov     for (; info; info = info->next_in_bucket) {
329d40108e0SAndreyChurbanov       // for each entry the omp_all_memory works as OUT dependence
330d40108e0SAndreyChurbanov       kmp_depnode_t *last_out = info->last_out;
331d40108e0SAndreyChurbanov       kmp_depnode_list_t *last_set = info->last_set;
332d40108e0SAndreyChurbanov       kmp_depnode_list_t *prev_set = info->prev_set;
333d40108e0SAndreyChurbanov       if (last_set) {
334d40108e0SAndreyChurbanov         npredecessors +=
335d40108e0SAndreyChurbanov             __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
336d40108e0SAndreyChurbanov         __kmp_depnode_list_free(thread, last_set);
337d40108e0SAndreyChurbanov         __kmp_depnode_list_free(thread, prev_set);
338d40108e0SAndreyChurbanov         info->last_set = NULL;
339d40108e0SAndreyChurbanov         info->prev_set = NULL;
340d40108e0SAndreyChurbanov         info->last_flag = 0; // no sets in this dephash entry
341d40108e0SAndreyChurbanov       } else {
342d40108e0SAndreyChurbanov         npredecessors +=
343d40108e0SAndreyChurbanov             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
344d40108e0SAndreyChurbanov       }
345d40108e0SAndreyChurbanov       __kmp_node_deref(thread, last_out);
346d40108e0SAndreyChurbanov       if (!dep_barrier) {
347d40108e0SAndreyChurbanov         info->last_out = __kmp_node_ref(node);
348d40108e0SAndreyChurbanov       } else {
349d40108e0SAndreyChurbanov         info->last_out = NULL;
350d40108e0SAndreyChurbanov       }
351d40108e0SAndreyChurbanov     }
352d40108e0SAndreyChurbanov   }
353d40108e0SAndreyChurbanov   KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
354d40108e0SAndreyChurbanov                 npredecessors));
355d40108e0SAndreyChurbanov   return npredecessors;
356d40108e0SAndreyChurbanov }
357d40108e0SAndreyChurbanov 
3585e8470afSJim Cownie template <bool filter>
3595e8470afSJim Cownie static inline kmp_int32
__kmp_process_deps(kmp_int32 gtid,kmp_depnode_t * node,kmp_dephash_t ** hash,bool dep_barrier,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_task_t * task)360a1639b9bSAndrey Churbanov __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
3613041982dSJonathan Peyton                    bool dep_barrier, kmp_int32 ndeps,
3623041982dSJonathan Peyton                    kmp_depend_info_t *dep_list, kmp_task_t *task) {
363cf1ddae7SAndreyChurbanov   KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "
3643041982dSJonathan Peyton                 "dep_barrier = %d\n",
3653041982dSJonathan Peyton                 filter, gtid, ndeps, dep_barrier));
3664cc4bb4cSJim Cownie 
3675e8470afSJim Cownie   kmp_info_t *thread = __kmp_threads[gtid];
3685e8470afSJim Cownie   kmp_int32 npredecessors = 0;
3695e8470afSJim Cownie   for (kmp_int32 i = 0; i < ndeps; i++) {
3705e8470afSJim Cownie     const kmp_depend_info_t *dep = &dep_list[i];
3715e8470afSJim Cownie 
3723041982dSJonathan Peyton     if (filter && dep->base_addr == 0)
3733041982dSJonathan Peyton       continue; // skip filtered entries
3745e8470afSJim Cownie 
3753041982dSJonathan Peyton     kmp_dephash_entry_t *info =
3763041982dSJonathan Peyton         __kmp_dephash_find(thread, hash, dep->base_addr);
3775e8470afSJim Cownie     kmp_depnode_t *last_out = info->last_out;
378610fea65SAndreyChurbanov     kmp_depnode_list_t *last_set = info->last_set;
379610fea65SAndreyChurbanov     kmp_depnode_list_t *prev_set = info->prev_set;
3805e8470afSJim Cownie 
381610fea65SAndreyChurbanov     if (dep->flags.out) { // out or inout --> clean lists if any
382610fea65SAndreyChurbanov       if (last_set) {
383c3344345SAndrey Churbanov         npredecessors +=
384610fea65SAndreyChurbanov             __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
385610fea65SAndreyChurbanov         __kmp_depnode_list_free(thread, last_set);
386610fea65SAndreyChurbanov         __kmp_depnode_list_free(thread, prev_set);
387610fea65SAndreyChurbanov         info->last_set = NULL;
388610fea65SAndreyChurbanov         info->prev_set = NULL;
389610fea65SAndreyChurbanov         info->last_flag = 0; // no sets in this dephash entry
390c3344345SAndrey Churbanov       } else {
391c3344345SAndrey Churbanov         npredecessors +=
392c3344345SAndrey Churbanov             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
3935e8470afSJim Cownie       }
394c3344345SAndrey Churbanov       __kmp_node_deref(thread, last_out);
395610fea65SAndreyChurbanov       if (!dep_barrier) {
396610fea65SAndreyChurbanov         info->last_out = __kmp_node_ref(node);
397610fea65SAndreyChurbanov       } else {
3983041982dSJonathan Peyton         // if this is a sync point in the serial sequence, then the previous
399c3344345SAndrey Churbanov         // outputs are guaranteed to be completed after the execution of this
400c3344345SAndrey Churbanov         // task so the previous output nodes can be cleared.
4015e8470afSJim Cownie         info->last_out = NULL;
402c3344345SAndrey Churbanov       }
403610fea65SAndreyChurbanov     } else { // either IN or MTX or SET
404610fea65SAndreyChurbanov       if (info->last_flag == 0 || info->last_flag == dep->flag) {
405610fea65SAndreyChurbanov         // last_set either didn't exist or of same dep kind
406c3344345SAndrey Churbanov         // link node as successor of the last_out if any
407c3344345SAndrey Churbanov         npredecessors +=
408c3344345SAndrey Churbanov             __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
409610fea65SAndreyChurbanov         // link node as successor of all nodes in the prev_set if any
410c3344345SAndrey Churbanov         npredecessors +=
411610fea65SAndreyChurbanov             __kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
4128e29b4b3SAndreyChurbanov         if (dep_barrier) {
4138e29b4b3SAndreyChurbanov           // clean last_out and prev_set if any; don't touch last_set
4148e29b4b3SAndreyChurbanov           __kmp_node_deref(thread, last_out);
4158e29b4b3SAndreyChurbanov           info->last_out = NULL;
4168e29b4b3SAndreyChurbanov           __kmp_depnode_list_free(thread, prev_set);
4178e29b4b3SAndreyChurbanov           info->prev_set = NULL;
4188e29b4b3SAndreyChurbanov         }
419610fea65SAndreyChurbanov       } else { // last_set is of different dep kind, make it prev_set
420610fea65SAndreyChurbanov         // link node as successor of all nodes in the last_set
421610fea65SAndreyChurbanov         npredecessors +=
422610fea65SAndreyChurbanov             __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
423610fea65SAndreyChurbanov         // clean last_out if any
424c3344345SAndrey Churbanov         __kmp_node_deref(thread, last_out);
425c3344345SAndrey Churbanov         info->last_out = NULL;
426610fea65SAndreyChurbanov         // clean prev_set if any
427610fea65SAndreyChurbanov         __kmp_depnode_list_free(thread, prev_set);
4288e29b4b3SAndreyChurbanov         if (!dep_barrier) {
429610fea65SAndreyChurbanov           // move last_set to prev_set, new last_set will be allocated
430610fea65SAndreyChurbanov           info->prev_set = last_set;
4318e29b4b3SAndreyChurbanov         } else {
4328e29b4b3SAndreyChurbanov           info->prev_set = NULL;
4338e29b4b3SAndreyChurbanov           info->last_flag = 0;
4348e29b4b3SAndreyChurbanov         }
435610fea65SAndreyChurbanov         info->last_set = NULL;
436c3344345SAndrey Churbanov       }
4378e29b4b3SAndreyChurbanov       // for dep_barrier last_flag value should remain:
4388e29b4b3SAndreyChurbanov       // 0 if last_set is empty, unchanged otherwise
4398e29b4b3SAndreyChurbanov       if (!dep_barrier) {
440610fea65SAndreyChurbanov         info->last_flag = dep->flag; // store dep kind of the last_set
441610fea65SAndreyChurbanov         info->last_set = __kmp_add_node(thread, info->last_set, node);
4428e29b4b3SAndreyChurbanov       }
443610fea65SAndreyChurbanov       // check if we are processing MTX dependency
444610fea65SAndreyChurbanov       if (dep->flag == KMP_DEP_MTX) {
445c3344345SAndrey Churbanov         if (info->mtx_lock == NULL) {
446c3344345SAndrey Churbanov           info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
447c3344345SAndrey Churbanov           __kmp_init_lock(info->mtx_lock);
448c3344345SAndrey Churbanov         }
449c3344345SAndrey Churbanov         KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
450c3344345SAndrey Churbanov         kmp_int32 m;
451c3344345SAndrey Churbanov         // Save lock in node's array
452c3344345SAndrey Churbanov         for (m = 0; m < MAX_MTX_DEPS; ++m) {
453c3344345SAndrey Churbanov           // sort pointers in decreasing order to avoid potential livelock
454c3344345SAndrey Churbanov           if (node->dn.mtx_locks[m] < info->mtx_lock) {
455610fea65SAndreyChurbanov             KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
456c3344345SAndrey Churbanov             for (int n = node->dn.mtx_num_locks; n > m; --n) {
457c3344345SAndrey Churbanov               // shift right all lesser non-NULL pointers
458c3344345SAndrey Churbanov               KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
459c3344345SAndrey Churbanov               node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
460c3344345SAndrey Churbanov             }
461c3344345SAndrey Churbanov             node->dn.mtx_locks[m] = info->mtx_lock;
462c3344345SAndrey Churbanov             break;
4635e8470afSJim Cownie           }
4645e8470afSJim Cownie         }
465c3344345SAndrey Churbanov         KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
466c3344345SAndrey Churbanov         node->dn.mtx_num_locks++;
467c3344345SAndrey Churbanov       }
468c3344345SAndrey Churbanov     }
469610fea65SAndreyChurbanov   }
4703041982dSJonathan Peyton   KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
4713041982dSJonathan Peyton                 gtid, npredecessors));
4725e8470afSJim Cownie   return npredecessors;
4735e8470afSJim Cownie }
4745e8470afSJim Cownie 
4755e8470afSJim Cownie #define NO_DEP_BARRIER (false)
4765e8470afSJim Cownie #define DEP_BARRIER (true)
4775e8470afSJim Cownie 
4785e8470afSJim Cownie // returns true if the task has any outstanding dependence
__kmp_check_deps(kmp_int32 gtid,kmp_depnode_t * node,kmp_task_t * task,kmp_dephash_t ** hash,bool dep_barrier,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_int32 ndeps_noalias,kmp_depend_info_t * noalias_dep_list)4793041982dSJonathan Peyton static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
480a1639b9bSAndrey Churbanov                              kmp_task_t *task, kmp_dephash_t **hash,
4813041982dSJonathan Peyton                              bool dep_barrier, kmp_int32 ndeps,
4823041982dSJonathan Peyton                              kmp_depend_info_t *dep_list,
4833041982dSJonathan Peyton                              kmp_int32 ndeps_noalias,
4843041982dSJonathan Peyton                              kmp_depend_info_t *noalias_dep_list) {
485d40108e0SAndreyChurbanov   int i, n_mtxs = 0, dep_all = 0;
486d2eb3c73SJonathan Peyton #if KMP_DEBUG
487d2eb3c73SJonathan Peyton   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
488d2eb3c73SJonathan Peyton #endif
489cf1ddae7SAndreyChurbanov   KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "
490cf1ddae7SAndreyChurbanov                 "possibly aliased dependences, %d non-aliased dependences : "
4913041982dSJonathan Peyton                 "dep_barrier=%d .\n",
4923041982dSJonathan Peyton                 gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
4934cc4bb4cSJim Cownie 
4945e8470afSJim Cownie   // Filter deps in dep_list
4955e8470afSJim Cownie   // TODO: Different algorithm for large dep_list ( > 10 ? )
4965e8470afSJim Cownie   for (i = 0; i < ndeps; i++) {
49759b877d0SAndreyChurbanov     if (dep_list[i].base_addr != 0 &&
49859b877d0SAndreyChurbanov         dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX) {
499610fea65SAndreyChurbanov       KMP_DEBUG_ASSERT(
500610fea65SAndreyChurbanov           dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
501610fea65SAndreyChurbanov           dep_list[i].flag == KMP_DEP_INOUT ||
502610fea65SAndreyChurbanov           dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
503c3344345SAndrey Churbanov       for (int j = i + 1; j < ndeps; j++) {
5045e8470afSJim Cownie         if (dep_list[i].base_addr == dep_list[j].base_addr) {
505610fea65SAndreyChurbanov           if (dep_list[i].flag != dep_list[j].flag) {
506610fea65SAndreyChurbanov             // two different dependences on same address work identical to OUT
507610fea65SAndreyChurbanov             dep_list[i].flag = KMP_DEP_OUT;
508610fea65SAndreyChurbanov           }
5095e8470afSJim Cownie           dep_list[j].base_addr = 0; // Mark j element as void
5105e8470afSJim Cownie         }
5115e8470afSJim Cownie       }
512610fea65SAndreyChurbanov       if (dep_list[i].flag == KMP_DEP_MTX) {
513c3344345SAndrey Churbanov         // limit number of mtx deps to MAX_MTX_DEPS per node
514c3344345SAndrey Churbanov         if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
515c3344345SAndrey Churbanov           ++n_mtxs;
516c3344345SAndrey Churbanov         } else {
517610fea65SAndreyChurbanov           dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
518c3344345SAndrey Churbanov         }
519c3344345SAndrey Churbanov       }
520d40108e0SAndreyChurbanov     } else if (dep_list[i].flag == KMP_DEP_ALL ||
52159b877d0SAndreyChurbanov                dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX) {
522d40108e0SAndreyChurbanov       // omp_all_memory dependence can be marked by compiler by either
523d40108e0SAndreyChurbanov       // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
524d40108e0SAndreyChurbanov       // omp_all_memory overrides all other dependences if any
525d40108e0SAndreyChurbanov       dep_all = 1;
526d40108e0SAndreyChurbanov       break;
527c3344345SAndrey Churbanov     }
528c3344345SAndrey Churbanov   }
5295e8470afSJim Cownie 
5303041982dSJonathan Peyton   // doesn't need to be atomic as no other thread is going to be accessing this
5313041982dSJonathan Peyton   // node just yet.
5323041982dSJonathan Peyton   // npredecessors is set -1 to ensure that none of the releasing tasks queues
533cf1ddae7SAndreyChurbanov   // this task before we have finished processing all the dependences
5344cc4bb4cSJim Cownie   node->dn.npredecessors = -1;
5355e8470afSJim Cownie 
5363041982dSJonathan Peyton   // used to pack all npredecessors additions into a single atomic operation at
5373041982dSJonathan Peyton   // the end
5385e8470afSJim Cownie   int npredecessors;
5395e8470afSJim Cownie 
540d40108e0SAndreyChurbanov   if (!dep_all) { // regular dependences
541d40108e0SAndreyChurbanov     npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
542d40108e0SAndreyChurbanov                                              ndeps, dep_list, task);
5433041982dSJonathan Peyton     npredecessors += __kmp_process_deps<false>(
5443041982dSJonathan Peyton         gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
545d40108e0SAndreyChurbanov   } else { // omp_all_memory dependence
546d40108e0SAndreyChurbanov     npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
547d40108e0SAndreyChurbanov   }
5485e8470afSJim Cownie 
5495e8470afSJim Cownie   node->dn.task = task;
5505e8470afSJim Cownie   KMP_MB();
5514cc4bb4cSJim Cownie 
5524cc4bb4cSJim Cownie   // Account for our initial fake value
5534cc4bb4cSJim Cownie   npredecessors++;
5544cc4bb4cSJim Cownie 
5553041982dSJonathan Peyton   // Update predecessors and obtain current value to check if there are still
55642016791SKazuaki Ishizaki   // any outstanding dependences (some tasks may have finished while we
55742016791SKazuaki Ishizaki   // processed the dependences)
558c47afcd9SAndrey Churbanov   npredecessors =
55937e2ef54SJonathan Peyton       node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
5604cc4bb4cSJim Cownie 
5613041982dSJonathan Peyton   KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
5623041982dSJonathan Peyton                 gtid, npredecessors, taskdata));
5635e8470afSJim Cownie 
5643041982dSJonathan Peyton   // beyond this point the task could be queued (and executed) by a releasing
5653041982dSJonathan Peyton   // task...
5665e8470afSJim Cownie   return npredecessors > 0 ? true : false;
5675e8470afSJim Cownie }
5685e8470afSJim Cownie 
5695e8470afSJim Cownie /*!
5705e8470afSJim Cownie @ingroup TASKING
5715e8470afSJim Cownie @param loc_ref location of the original task directive
5725e8470afSJim Cownie @param gtid Global Thread ID of encountering thread
5733041982dSJonathan Peyton @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
5743041982dSJonathan Peyton task''
5755e8470afSJim Cownie @param ndeps Number of depend items with possible aliasing
5765e8470afSJim Cownie @param dep_list List of depend items with possible aliasing
5775e8470afSJim Cownie @param ndeps_noalias Number of depend items with no aliasing
5785e8470afSJim Cownie @param noalias_dep_list List of depend items with no aliasing
5795e8470afSJim Cownie 
5803041982dSJonathan Peyton @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
58142016791SKazuaki Ishizaki suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
5825e8470afSJim Cownie 
5835e8470afSJim Cownie Schedule a non-thread-switchable task with dependences for execution
5845e8470afSJim Cownie */
__kmpc_omp_task_with_deps(ident_t * loc_ref,kmp_int32 gtid,kmp_task_t * new_task,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_int32 ndeps_noalias,kmp_depend_info_t * noalias_dep_list)5853041982dSJonathan Peyton kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
5863041982dSJonathan Peyton                                     kmp_task_t *new_task, kmp_int32 ndeps,
5873041982dSJonathan Peyton                                     kmp_depend_info_t *dep_list,
5883041982dSJonathan Peyton                                     kmp_int32 ndeps_noalias,
5893041982dSJonathan Peyton                                     kmp_depend_info_t *noalias_dep_list) {
5904cc4bb4cSJim Cownie 
5914cc4bb4cSJim Cownie   kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
5923041982dSJonathan Peyton   KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
5933041982dSJonathan Peyton                 loc_ref, new_taskdata));
594787eb0c6SAndreyChurbanov   __kmp_assert_valid_gtid(gtid);
5955e8470afSJim Cownie   kmp_info_t *thread = __kmp_threads[gtid];
5965e8470afSJim Cownie   kmp_taskdata_t *current_task = thread->th.th_current_task;
5975e8470afSJim Cownie 
59882e94a59SJoachim Protze #if OMPT_SUPPORT
59982e94a59SJoachim Protze   if (ompt_enabled.enabled) {
6000e0d6cddSJoachim Protze     if (!current_task->ompt_task_info.frame.enter_frame.ptr)
6010e0d6cddSJoachim Protze       current_task->ompt_task_info.frame.enter_frame.ptr =
6020e0d6cddSJoachim Protze           OMPT_GET_FRAME_ADDRESS(0);
60382e94a59SJoachim Protze     if (ompt_enabled.ompt_callback_task_create) {
60482e94a59SJoachim Protze       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
605480cbed3SHansang Bae           &(current_task->ompt_task_info.task_data),
606480cbed3SHansang Bae           &(current_task->ompt_task_info.frame),
60782e94a59SJoachim Protze           &(new_taskdata->ompt_task_info.task_data),
60882e94a59SJoachim Protze           ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
6096104b304SJoachim Protze           OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
61082e94a59SJoachim Protze     }
61182e94a59SJoachim Protze 
612309b00a4SShilei Tian     new_taskdata->ompt_task_info.frame.enter_frame.ptr =
613309b00a4SShilei Tian         OMPT_GET_FRAME_ADDRESS(0);
61482e94a59SJoachim Protze   }
61582e94a59SJoachim Protze 
61682e94a59SJoachim Protze #if OMPT_OPTIONAL
61739b68624SJonas Hahnfeld   /* OMPT grab all dependences if requested by the tool */
618309b00a4SShilei Tian   if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
61939b68624SJonas Hahnfeld     kmp_int32 i;
62039b68624SJonas Hahnfeld 
62163a3c592SJoachim Protze     int ompt_ndeps = ndeps + ndeps_noalias;
62263a3c592SJoachim Protze     ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
6232b46d30fSJoachim Protze         thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
62439b68624SJonas Hahnfeld 
62563a3c592SJoachim Protze     KMP_ASSERT(ompt_deps != NULL);
62639b68624SJonas Hahnfeld 
6273041982dSJonathan Peyton     for (i = 0; i < ndeps; i++) {
62863a3c592SJoachim Protze       ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
62939b68624SJonas Hahnfeld       if (dep_list[i].flags.in && dep_list[i].flags.out)
63063a3c592SJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_inout;
63139b68624SJonas Hahnfeld       else if (dep_list[i].flags.out)
63263a3c592SJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_out;
63339b68624SJonas Hahnfeld       else if (dep_list[i].flags.in)
63463a3c592SJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_in;
63563a3c592SJoachim Protze       else if (dep_list[i].flags.mtx)
63663a3c592SJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
637610fea65SAndreyChurbanov       else if (dep_list[i].flags.set)
638610fea65SAndreyChurbanov         ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
63939b68624SJonas Hahnfeld     }
6403041982dSJonathan Peyton     for (i = 0; i < ndeps_noalias; i++) {
64163a3c592SJoachim Protze       ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
64239b68624SJonas Hahnfeld       if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
64363a3c592SJoachim Protze         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
64439b68624SJonas Hahnfeld       else if (noalias_dep_list[i].flags.out)
64563a3c592SJoachim Protze         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
64639b68624SJonas Hahnfeld       else if (noalias_dep_list[i].flags.in)
64763a3c592SJoachim Protze         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
64863a3c592SJoachim Protze       else if (noalias_dep_list[i].flags.mtx)
64963a3c592SJoachim Protze         ompt_deps[ndeps + i].dependence_type =
65063a3c592SJoachim Protze             ompt_dependence_type_mutexinoutset;
651610fea65SAndreyChurbanov       else if (noalias_dep_list[i].flags.set)
652610fea65SAndreyChurbanov         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
65339b68624SJonas Hahnfeld     }
6542b46d30fSJoachim Protze     ompt_callbacks.ompt_callback(ompt_callback_dependences)(
65563a3c592SJoachim Protze         &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
656cf1ddae7SAndreyChurbanov     /* We can now free the allocated memory for the dependences */
65763a3c592SJoachim Protze     /* For OMPD we might want to delay the free until end of this function */
65863a3c592SJoachim Protze     KMP_OMPT_DEPS_FREE(thread, ompt_deps);
65939b68624SJonas Hahnfeld   }
66082e94a59SJoachim Protze #endif /* OMPT_OPTIONAL */
66182e94a59SJoachim Protze #endif /* OMPT_SUPPORT */
66239b68624SJonas Hahnfeld 
6633041982dSJonathan Peyton   bool serial = current_task->td_flags.team_serial ||
6643041982dSJonathan Peyton                 current_task->td_flags.tasking_ser ||
6653041982dSJonathan Peyton                 current_task->td_flags.final;
666bedc371cSJonas Hahnfeld   kmp_task_team_t *task_team = thread->th.th_task_team;
6679d64275aSShilei Tian   serial = serial &&
6689d64275aSShilei Tian            !(task_team && (task_team->tt.tt_found_proxy_tasks ||
6699d64275aSShilei Tian                            task_team->tt.tt_hidden_helper_task_encountered));
6705e8470afSJim Cownie 
6715e8470afSJim Cownie   if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
672cf1ddae7SAndreyChurbanov     /* if no dependences have been tracked yet, create the dependence hash */
6735e8470afSJim Cownie     if (current_task->td_dephash == NULL)
6747d45451aSJonathan Peyton       current_task->td_dephash = __kmp_dephash_create(thread, current_task);
6755e8470afSJim Cownie 
6765e8470afSJim Cownie #if USE_FAST_MEMORY
6773041982dSJonathan Peyton     kmp_depnode_t *node =
6783041982dSJonathan Peyton         (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
6795e8470afSJim Cownie #else
6803041982dSJonathan Peyton     kmp_depnode_t *node =
6813041982dSJonathan Peyton         (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
6825e8470afSJim Cownie #endif
6835e8470afSJim Cownie 
6845e8470afSJim Cownie     __kmp_init_node(node);
6854cc4bb4cSJim Cownie     new_taskdata->td_depnode = node;
6865e8470afSJim Cownie 
687a1639b9bSAndrey Churbanov     if (__kmp_check_deps(gtid, node, new_task, &current_task->td_dephash,
6883041982dSJonathan Peyton                          NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
6893041982dSJonathan Peyton                          noalias_dep_list)) {
6903041982dSJonathan Peyton       KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
691cf1ddae7SAndreyChurbanov                     "dependences: "
6923041982dSJonathan Peyton                     "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
6933041982dSJonathan Peyton                     gtid, loc_ref, new_taskdata));
694265fb584SJoachim Protze #if OMPT_SUPPORT
695265fb584SJoachim Protze       if (ompt_enabled.enabled) {
6960e0d6cddSJoachim Protze         current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
697265fb584SJoachim Protze       }
698265fb584SJoachim Protze #endif
6995e8470afSJim Cownie       return TASK_CURRENT_NOT_QUEUED;
7005e8470afSJim Cownie     }
701535b6faaSAndrey Churbanov   } else {
702cf1ddae7SAndreyChurbanov     KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
703cf1ddae7SAndreyChurbanov                   "for task (serialized) loc=%p task=%p\n",
7043041982dSJonathan Peyton                   gtid, loc_ref, new_taskdata));
7054cc4bb4cSJim Cownie   }
7064cc4bb4cSJim Cownie 
7073041982dSJonathan Peyton   KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
708cf1ddae7SAndreyChurbanov                 "dependences : "
709c3344345SAndrey Churbanov                 "loc=%p task=%p, transferring to __kmp_omp_task\n",
7103041982dSJonathan Peyton                 gtid, loc_ref, new_taskdata));
7115e8470afSJim Cownie 
712265fb584SJoachim Protze   kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
713265fb584SJoachim Protze #if OMPT_SUPPORT
714265fb584SJoachim Protze   if (ompt_enabled.enabled) {
7150e0d6cddSJoachim Protze     current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
716265fb584SJoachim Protze   }
717265fb584SJoachim Protze #endif
718265fb584SJoachim Protze   return ret;
7195e8470afSJim Cownie }
7205e8470afSJim Cownie 
72130205865SJoachim Protze #if OMPT_SUPPORT
__ompt_taskwait_dep_finish(kmp_taskdata_t * current_task,ompt_data_t * taskwait_task_data)72247cb8a0fSJoachim Protze void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task,
72347cb8a0fSJoachim Protze                                 ompt_data_t *taskwait_task_data) {
72447cb8a0fSJoachim Protze   if (ompt_enabled.ompt_callback_task_schedule) {
72547cb8a0fSJoachim Protze     ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
7267ba4e96eSHansang Bae         taskwait_task_data, ompt_taskwait_complete, NULL);
72747cb8a0fSJoachim Protze   }
72847cb8a0fSJoachim Protze   current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
72947cb8a0fSJoachim Protze   *taskwait_task_data = ompt_data_none;
73047cb8a0fSJoachim Protze }
73130205865SJoachim Protze #endif /* OMPT_SUPPORT */
73247cb8a0fSJoachim Protze 
7335e8470afSJim Cownie /*!
7345e8470afSJim Cownie @ingroup TASKING
7355e8470afSJim Cownie @param loc_ref location of the original task directive
7365e8470afSJim Cownie @param gtid Global Thread ID of encountering thread
7375e8470afSJim Cownie @param ndeps Number of depend items with possible aliasing
7385e8470afSJim Cownie @param dep_list List of depend items with possible aliasing
7395e8470afSJim Cownie @param ndeps_noalias Number of depend items with no aliasing
7405e8470afSJim Cownie @param noalias_dep_list List of depend items with no aliasing
7415e8470afSJim Cownie 
742cf1ddae7SAndreyChurbanov Blocks the current task until all specifies dependences have been fulfilled.
7435e8470afSJim Cownie */
__kmpc_omp_wait_deps(ident_t * loc_ref,kmp_int32 gtid,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_int32 ndeps_noalias,kmp_depend_info_t * noalias_dep_list)7443041982dSJonathan Peyton void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
7453041982dSJonathan Peyton                           kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
7463041982dSJonathan Peyton                           kmp_depend_info_t *noalias_dep_list) {
7474cc4bb4cSJim Cownie   KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
7484cc4bb4cSJim Cownie 
7494cc4bb4cSJim Cownie   if (ndeps == 0 && ndeps_noalias == 0) {
750cf1ddae7SAndreyChurbanov     KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependences to "
7513041982dSJonathan Peyton                   "wait upon : loc=%p\n",
7523041982dSJonathan Peyton                   gtid, loc_ref));
7534cc4bb4cSJim Cownie     return;
7544cc4bb4cSJim Cownie   }
755787eb0c6SAndreyChurbanov   __kmp_assert_valid_gtid(gtid);
7565e8470afSJim Cownie   kmp_info_t *thread = __kmp_threads[gtid];
7575e8470afSJim Cownie   kmp_taskdata_t *current_task = thread->th.th_current_task;
7585e8470afSJim Cownie 
75947cb8a0fSJoachim Protze #if OMPT_SUPPORT
76047cb8a0fSJoachim Protze   // this function represents a taskwait construct with depend clause
76147cb8a0fSJoachim Protze   // We signal 4 events:
76247cb8a0fSJoachim Protze   //  - creation of the taskwait task
76347cb8a0fSJoachim Protze   //  - dependences of the taskwait task
76447cb8a0fSJoachim Protze   //  - schedule and finish of the taskwait task
76547cb8a0fSJoachim Protze   ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
76647cb8a0fSJoachim Protze   KMP_ASSERT(taskwait_task_data->ptr == NULL);
76747cb8a0fSJoachim Protze   if (ompt_enabled.enabled) {
76847cb8a0fSJoachim Protze     if (!current_task->ompt_task_info.frame.enter_frame.ptr)
76947cb8a0fSJoachim Protze       current_task->ompt_task_info.frame.enter_frame.ptr =
77047cb8a0fSJoachim Protze           OMPT_GET_FRAME_ADDRESS(0);
77147cb8a0fSJoachim Protze     if (ompt_enabled.ompt_callback_task_create) {
77247cb8a0fSJoachim Protze       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
773480cbed3SHansang Bae           &(current_task->ompt_task_info.task_data),
774480cbed3SHansang Bae           &(current_task->ompt_task_info.frame), taskwait_task_data,
7757ba4e96eSHansang Bae           ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1,
7766104b304SJoachim Protze           OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
77747cb8a0fSJoachim Protze     }
77847cb8a0fSJoachim Protze   }
77947cb8a0fSJoachim Protze 
78047cb8a0fSJoachim Protze #if OMPT_OPTIONAL
78147cb8a0fSJoachim Protze   /* OMPT grab all dependences if requested by the tool */
78247cb8a0fSJoachim Protze   if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
78347cb8a0fSJoachim Protze     kmp_int32 i;
78447cb8a0fSJoachim Protze 
78547cb8a0fSJoachim Protze     int ompt_ndeps = ndeps + ndeps_noalias;
78647cb8a0fSJoachim Protze     ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
78747cb8a0fSJoachim Protze         thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
78847cb8a0fSJoachim Protze 
78947cb8a0fSJoachim Protze     KMP_ASSERT(ompt_deps != NULL);
79047cb8a0fSJoachim Protze 
79147cb8a0fSJoachim Protze     for (i = 0; i < ndeps; i++) {
79247cb8a0fSJoachim Protze       ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
79347cb8a0fSJoachim Protze       if (dep_list[i].flags.in && dep_list[i].flags.out)
79447cb8a0fSJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_inout;
79547cb8a0fSJoachim Protze       else if (dep_list[i].flags.out)
79647cb8a0fSJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_out;
79747cb8a0fSJoachim Protze       else if (dep_list[i].flags.in)
79847cb8a0fSJoachim Protze         ompt_deps[i].dependence_type = ompt_dependence_type_in;
79947cb8a0fSJoachim Protze       else if (dep_list[i].flags.mtx)
80047cb8a0fSJoachim Protze         ompt_deps[ndeps + i].dependence_type =
80147cb8a0fSJoachim Protze             ompt_dependence_type_mutexinoutset;
802610fea65SAndreyChurbanov       else if (dep_list[i].flags.set)
803610fea65SAndreyChurbanov         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
80447cb8a0fSJoachim Protze     }
80547cb8a0fSJoachim Protze     for (i = 0; i < ndeps_noalias; i++) {
80647cb8a0fSJoachim Protze       ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
80747cb8a0fSJoachim Protze       if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
80847cb8a0fSJoachim Protze         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
80947cb8a0fSJoachim Protze       else if (noalias_dep_list[i].flags.out)
81047cb8a0fSJoachim Protze         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
81147cb8a0fSJoachim Protze       else if (noalias_dep_list[i].flags.in)
81247cb8a0fSJoachim Protze         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
81347cb8a0fSJoachim Protze       else if (noalias_dep_list[i].flags.mtx)
81447cb8a0fSJoachim Protze         ompt_deps[ndeps + i].dependence_type =
81547cb8a0fSJoachim Protze             ompt_dependence_type_mutexinoutset;
816610fea65SAndreyChurbanov       else if (noalias_dep_list[i].flags.set)
817610fea65SAndreyChurbanov         ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
81847cb8a0fSJoachim Protze     }
81947cb8a0fSJoachim Protze     ompt_callbacks.ompt_callback(ompt_callback_dependences)(
82047cb8a0fSJoachim Protze         taskwait_task_data, ompt_deps, ompt_ndeps);
821cf1ddae7SAndreyChurbanov     /* We can now free the allocated memory for the dependences */
82247cb8a0fSJoachim Protze     /* For OMPD we might want to delay the free until end of this function */
82347cb8a0fSJoachim Protze     KMP_OMPT_DEPS_FREE(thread, ompt_deps);
82447cb8a0fSJoachim Protze     ompt_deps = NULL;
82547cb8a0fSJoachim Protze   }
82647cb8a0fSJoachim Protze #endif /* OMPT_OPTIONAL */
82747cb8a0fSJoachim Protze #endif /* OMPT_SUPPORT */
82847cb8a0fSJoachim Protze 
8294cc4bb4cSJim Cownie   // We can return immediately as:
8303041982dSJonathan Peyton   // - dependences are not computed in serial teams (except with proxy tasks)
8314cc4bb4cSJim Cownie   // - if the dephash is not yet created it means we have nothing to wait for
8323041982dSJonathan Peyton   bool ignore = current_task->td_flags.team_serial ||
8333041982dSJonathan Peyton                 current_task->td_flags.tasking_ser ||
8343041982dSJonathan Peyton                 current_task->td_flags.final;
835458db51cSShilei Tian   ignore =
836458db51cSShilei Tian       ignore && thread->th.th_task_team != NULL &&
837458db51cSShilei Tian       thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE &&
838458db51cSShilei Tian       thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE;
839535b6faaSAndrey Churbanov   ignore = ignore || current_task->td_dephash == NULL;
840535b6faaSAndrey Churbanov 
841535b6faaSAndrey Churbanov   if (ignore) {
8423041982dSJonathan Peyton     KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
843cf1ddae7SAndreyChurbanov                   "dependences : loc=%p\n",
8443041982dSJonathan Peyton                   gtid, loc_ref));
84530205865SJoachim Protze #if OMPT_SUPPORT
84647cb8a0fSJoachim Protze     __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
84730205865SJoachim Protze #endif /* OMPT_SUPPORT */
8485e8470afSJim Cownie     return;
8494cc4bb4cSJim Cownie   }
8505e8470afSJim Cownie 
85137e2ef54SJonathan Peyton   kmp_depnode_t node = {0};
8525e8470afSJim Cownie   __kmp_init_node(&node);
8535e8470afSJim Cownie 
854a1639b9bSAndrey Churbanov   if (!__kmp_check_deps(gtid, &node, NULL, &current_task->td_dephash,
8553041982dSJonathan Peyton                         DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
8563041982dSJonathan Peyton                         noalias_dep_list)) {
8573041982dSJonathan Peyton     KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
858cf1ddae7SAndreyChurbanov                   "dependences : loc=%p\n",
8593041982dSJonathan Peyton                   gtid, loc_ref));
86030205865SJoachim Protze #if OMPT_SUPPORT
86147cb8a0fSJoachim Protze     __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
86230205865SJoachim Protze #endif /* OMPT_SUPPORT */
8635e8470afSJim Cownie     return;
8644cc4bb4cSJim Cownie   }
8655e8470afSJim Cownie 
8665e8470afSJim Cownie   int thread_finished = FALSE;
867e0665a90STerry Wilmarth   kmp_flag_32<false, false> flag(
868e0665a90STerry Wilmarth       (std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
8695e8470afSJim Cownie   while (node.dn.npredecessors > 0) {
87037e2ef54SJonathan Peyton     flag.execute_tasks(thread, gtid, FALSE,
87137e2ef54SJonathan Peyton                        &thread_finished USE_ITT_BUILD_ARG(NULL),
8725e8470afSJim Cownie                        __kmp_task_stealing_constraint);
8735e8470afSJim Cownie   }
8745e8470afSJim Cownie 
87530205865SJoachim Protze #if OMPT_SUPPORT
87647cb8a0fSJoachim Protze   __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
87730205865SJoachim Protze #endif /* OMPT_SUPPORT */
8783041982dSJonathan Peyton   KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
8793041982dSJonathan Peyton                 gtid, loc_ref));
8805e8470afSJim Cownie }
881