15e8470afSJim Cownie /*
25e8470afSJim Cownie * kmp_taskdeps.cpp
35e8470afSJim Cownie */
45e8470afSJim Cownie
55e8470afSJim Cownie //===----------------------------------------------------------------------===//
65e8470afSJim Cownie //
757b08b09SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
857b08b09SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
957b08b09SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
105e8470afSJim Cownie //
115e8470afSJim Cownie //===----------------------------------------------------------------------===//
125e8470afSJim Cownie
135e8470afSJim Cownie //#define KMP_SUPPORT_GRAPH_OUTPUT 1
145e8470afSJim Cownie
155e8470afSJim Cownie #include "kmp.h"
165e8470afSJim Cownie #include "kmp_io.h"
174cc4bb4cSJim Cownie #include "kmp_wait_release.h"
18cf27e31bSJonathan Peyton #include "kmp_taskdeps.h"
1982e94a59SJoachim Protze #if OMPT_SUPPORT
2082e94a59SJoachim Protze #include "ompt-specific.h"
2182e94a59SJoachim Protze #endif
225e8470afSJim Cownie
233041982dSJonathan Peyton // TODO: Improve memory allocation? keep a list of pre-allocated structures?
243041982dSJonathan Peyton // allocate in blocks? re-use list finished list entries?
255e8470afSJim Cownie // TODO: don't use atomic ref counters for stack-allocated nodes.
265e8470afSJim Cownie // TODO: find an alternate to atomic refs for heap-allocated nodes?
275e8470afSJim Cownie // TODO: Finish graph output support
283041982dSJonathan Peyton // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
293041982dSJonathan Peyton // runtime locks
305e8470afSJim Cownie // TODO: Any ITT support needed?
315e8470afSJim Cownie
325e8470afSJim Cownie #ifdef KMP_SUPPORT_GRAPH_OUTPUT
3337e2ef54SJonathan Peyton static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0);
345e8470afSJim Cownie #endif
355e8470afSJim Cownie
__kmp_init_node(kmp_depnode_t * node)363041982dSJonathan Peyton static void __kmp_init_node(kmp_depnode_t *node) {
375e8470afSJim Cownie node->dn.successors = NULL;
3842016791SKazuaki Ishizaki node->dn.task = NULL; // will point to the right task
39c3344345SAndrey Churbanov // once dependences have been processed
40c3344345SAndrey Churbanov for (int i = 0; i < MAX_MTX_DEPS; ++i)
41c3344345SAndrey Churbanov node->dn.mtx_locks[i] = NULL;
42c3344345SAndrey Churbanov node->dn.mtx_num_locks = 0;
435e8470afSJim Cownie __kmp_init_lock(&node->dn.lock);
44c3344345SAndrey Churbanov KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1); // init creates the first reference
455e8470afSJim Cownie #ifdef KMP_SUPPORT_GRAPH_OUTPUT
4637e2ef54SJonathan Peyton node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
475e8470afSJim Cownie #endif
48*52d0ef3cSAndreyChurbanov #if USE_ITT_BUILD && USE_ITT_NOTIFY
49*52d0ef3cSAndreyChurbanov __itt_sync_create(node, "OMP task dep node", NULL, 0);
50*52d0ef3cSAndreyChurbanov #endif
515e8470afSJim Cownie }
525e8470afSJim Cownie
__kmp_node_ref(kmp_depnode_t * node)533041982dSJonathan Peyton static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
5437e2ef54SJonathan Peyton KMP_ATOMIC_INC(&node->dn.nrefs);
555e8470afSJim Cownie return node;
565e8470afSJim Cownie }
575e8470afSJim Cownie
583041982dSJonathan Peyton enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
595e8470afSJim Cownie
60a1639b9bSAndrey Churbanov size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
61a1639b9bSAndrey Churbanov const size_t MAX_GEN = 8;
62a1639b9bSAndrey Churbanov
__kmp_dephash_hash(kmp_intptr_t addr,size_t hsize)636b316febSTerry Wilmarth static inline size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
643041982dSJonathan Peyton // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
653041982dSJonathan Peyton // m_num_sets );
667d45451aSJonathan Peyton return ((addr >> 6) ^ (addr >> 2)) % hsize;
675e8470afSJim Cownie }
685e8470afSJim Cownie
__kmp_dephash_extend(kmp_info_t * thread,kmp_dephash_t * current_dephash)69a1639b9bSAndrey Churbanov static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
70a1639b9bSAndrey Churbanov kmp_dephash_t *current_dephash) {
71a1639b9bSAndrey Churbanov kmp_dephash_t *h;
72a1639b9bSAndrey Churbanov
73a1639b9bSAndrey Churbanov size_t gen = current_dephash->generation + 1;
74a1639b9bSAndrey Churbanov if (gen >= MAX_GEN)
75a1639b9bSAndrey Churbanov return current_dephash;
76a1639b9bSAndrey Churbanov size_t new_size = sizes[gen];
77a1639b9bSAndrey Churbanov
786b316febSTerry Wilmarth size_t size_to_allocate =
79a1639b9bSAndrey Churbanov new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
80a1639b9bSAndrey Churbanov
81a1639b9bSAndrey Churbanov #if USE_FAST_MEMORY
82a1639b9bSAndrey Churbanov h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
83a1639b9bSAndrey Churbanov #else
84a1639b9bSAndrey Churbanov h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
85a1639b9bSAndrey Churbanov #endif
86a1639b9bSAndrey Churbanov
87a1639b9bSAndrey Churbanov h->size = new_size;
88a1639b9bSAndrey Churbanov h->nelements = current_dephash->nelements;
89a1639b9bSAndrey Churbanov h->buckets = (kmp_dephash_entry **)(h + 1);
90a1639b9bSAndrey Churbanov h->generation = gen;
91be29d928SAndreyChurbanov h->nconflicts = 0;
92d40108e0SAndreyChurbanov h->last_all = current_dephash->last_all;
93edbcc17bSJoseph Schuchart
94edbcc17bSJoseph Schuchart // make sure buckets are properly initialized
95edbcc17bSJoseph Schuchart for (size_t i = 0; i < new_size; i++) {
96edbcc17bSJoseph Schuchart h->buckets[i] = NULL;
97edbcc17bSJoseph Schuchart }
98edbcc17bSJoseph Schuchart
99a1639b9bSAndrey Churbanov // insert existing elements in the new table
100a1639b9bSAndrey Churbanov for (size_t i = 0; i < current_dephash->size; i++) {
101be29d928SAndreyChurbanov kmp_dephash_entry_t *next, *entry;
102be29d928SAndreyChurbanov for (entry = current_dephash->buckets[i]; entry; entry = next) {
103a1639b9bSAndrey Churbanov next = entry->next_in_bucket;
104a1639b9bSAndrey Churbanov // Compute the new hash using the new size, and insert the entry in
105a1639b9bSAndrey Churbanov // the new bucket.
1066b316febSTerry Wilmarth size_t new_bucket = __kmp_dephash_hash(entry->addr, h->size);
107be29d928SAndreyChurbanov entry->next_in_bucket = h->buckets[new_bucket];
108a1639b9bSAndrey Churbanov if (entry->next_in_bucket) {
109a1639b9bSAndrey Churbanov h->nconflicts++;
110a1639b9bSAndrey Churbanov }
111a1639b9bSAndrey Churbanov h->buckets[new_bucket] = entry;
112a1639b9bSAndrey Churbanov }
113a1639b9bSAndrey Churbanov }
114a1639b9bSAndrey Churbanov
115a1639b9bSAndrey Churbanov // Free old hash table
116a1639b9bSAndrey Churbanov #if USE_FAST_MEMORY
117a1639b9bSAndrey Churbanov __kmp_fast_free(thread, current_dephash);
118a1639b9bSAndrey Churbanov #else
119a1639b9bSAndrey Churbanov __kmp_thread_free(thread, current_dephash);
120a1639b9bSAndrey Churbanov #endif
121a1639b9bSAndrey Churbanov
122a1639b9bSAndrey Churbanov return h;
123a1639b9bSAndrey Churbanov }
124a1639b9bSAndrey Churbanov
__kmp_dephash_create(kmp_info_t * thread,kmp_taskdata_t * current_task)1253041982dSJonathan Peyton static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
1263041982dSJonathan Peyton kmp_taskdata_t *current_task) {
1275e8470afSJim Cownie kmp_dephash_t *h;
1285e8470afSJim Cownie
1297d45451aSJonathan Peyton size_t h_size;
1307d45451aSJonathan Peyton
1317d45451aSJonathan Peyton if (current_task->td_flags.tasktype == TASK_IMPLICIT)
1327d45451aSJonathan Peyton h_size = KMP_DEPHASH_MASTER_SIZE;
1337d45451aSJonathan Peyton else
1347d45451aSJonathan Peyton h_size = KMP_DEPHASH_OTHER_SIZE;
1357d45451aSJonathan Peyton
1366b316febSTerry Wilmarth size_t size = h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
1375e8470afSJim Cownie
1385e8470afSJim Cownie #if USE_FAST_MEMORY
1395e8470afSJim Cownie h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
1405e8470afSJim Cownie #else
1415e8470afSJim Cownie h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
1425e8470afSJim Cownie #endif
1437d45451aSJonathan Peyton h->size = h_size;
1445e8470afSJim Cownie
145a1639b9bSAndrey Churbanov h->generation = 0;
1465e8470afSJim Cownie h->nelements = 0;
1477d45451aSJonathan Peyton h->nconflicts = 0;
1485e8470afSJim Cownie h->buckets = (kmp_dephash_entry **)(h + 1);
149d40108e0SAndreyChurbanov h->last_all = NULL;
1505e8470afSJim Cownie
1517d45451aSJonathan Peyton for (size_t i = 0; i < h_size; i++)
1525e8470afSJim Cownie h->buckets[i] = 0;
1535e8470afSJim Cownie
1545e8470afSJim Cownie return h;
1555e8470afSJim Cownie }
1565e8470afSJim Cownie
__kmp_dephash_find(kmp_info_t * thread,kmp_dephash_t ** hash,kmp_intptr_t addr)157309b00a4SShilei Tian static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
158309b00a4SShilei Tian kmp_dephash_t **hash,
159309b00a4SShilei Tian kmp_intptr_t addr) {
160a1639b9bSAndrey Churbanov kmp_dephash_t *h = *hash;
161309b00a4SShilei Tian if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
162a1639b9bSAndrey Churbanov *hash = __kmp_dephash_extend(thread, h);
163a1639b9bSAndrey Churbanov h = *hash;
164a1639b9bSAndrey Churbanov }
1656b316febSTerry Wilmarth size_t bucket = __kmp_dephash_hash(addr, h->size);
1665e8470afSJim Cownie
1675e8470afSJim Cownie kmp_dephash_entry_t *entry;
1685e8470afSJim Cownie for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
1693041982dSJonathan Peyton if (entry->addr == addr)
1703041982dSJonathan Peyton break;
1715e8470afSJim Cownie
1725e8470afSJim Cownie if (entry == NULL) {
1735e8470afSJim Cownie // create entry. This is only done by one thread so no locking required
1745e8470afSJim Cownie #if USE_FAST_MEMORY
1753041982dSJonathan Peyton entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
1763041982dSJonathan Peyton thread, sizeof(kmp_dephash_entry_t));
1775e8470afSJim Cownie #else
1783041982dSJonathan Peyton entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
1793041982dSJonathan Peyton thread, sizeof(kmp_dephash_entry_t));
1805e8470afSJim Cownie #endif
1815e8470afSJim Cownie entry->addr = addr;
182d40108e0SAndreyChurbanov if (!h->last_all) // no predecessor task with omp_all_memory dependence
1835e8470afSJim Cownie entry->last_out = NULL;
184d40108e0SAndreyChurbanov else // else link the omp_all_memory depnode to the new entry
185d40108e0SAndreyChurbanov entry->last_out = __kmp_node_ref(h->last_all);
186610fea65SAndreyChurbanov entry->last_set = NULL;
187610fea65SAndreyChurbanov entry->prev_set = NULL;
188610fea65SAndreyChurbanov entry->last_flag = 0;
189c3344345SAndrey Churbanov entry->mtx_lock = NULL;
1905e8470afSJim Cownie entry->next_in_bucket = h->buckets[bucket];
1915e8470afSJim Cownie h->buckets[bucket] = entry;
1925e8470afSJim Cownie h->nelements++;
1933041982dSJonathan Peyton if (entry->next_in_bucket)
1943041982dSJonathan Peyton h->nconflicts++;
1955e8470afSJim Cownie }
1965e8470afSJim Cownie return entry;
1975e8470afSJim Cownie }
1985e8470afSJim Cownie
__kmp_add_node(kmp_info_t * thread,kmp_depnode_list_t * list,kmp_depnode_t * node)1993041982dSJonathan Peyton static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
2003041982dSJonathan Peyton kmp_depnode_list_t *list,
2013041982dSJonathan Peyton kmp_depnode_t *node) {
2025e8470afSJim Cownie kmp_depnode_list_t *new_head;
2035e8470afSJim Cownie
2045e8470afSJim Cownie #if USE_FAST_MEMORY
2053041982dSJonathan Peyton new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
2063041982dSJonathan Peyton thread, sizeof(kmp_depnode_list_t));
2075e8470afSJim Cownie #else
2083041982dSJonathan Peyton new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
2093041982dSJonathan Peyton thread, sizeof(kmp_depnode_list_t));
2105e8470afSJim Cownie #endif
2115e8470afSJim Cownie
2125e8470afSJim Cownie new_head->node = __kmp_node_ref(node);
2135e8470afSJim Cownie new_head->next = list;
2145e8470afSJim Cownie
2155e8470afSJim Cownie return new_head;
2165e8470afSJim Cownie }
2175e8470afSJim Cownie
__kmp_track_dependence(kmp_int32 gtid,kmp_depnode_t * source,kmp_depnode_t * sink,kmp_task_t * sink_task)21847cb8a0fSJoachim Protze static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
2193041982dSJonathan Peyton kmp_depnode_t *sink,
2203041982dSJonathan Peyton kmp_task_t *sink_task) {
2215e8470afSJim Cownie #ifdef KMP_SUPPORT_GRAPH_OUTPUT
2225e8470afSJim Cownie kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
223cf1ddae7SAndreyChurbanov // do not use sink->dn.task as that is only filled after the dependences
22420236611SJonas Hahnfeld // are already processed!
22520236611SJonas Hahnfeld kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
2265e8470afSJim Cownie
2273041982dSJonathan Peyton __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
2283041982dSJonathan Peyton task_source->td_ident->psource, sink->dn.id,
2293041982dSJonathan Peyton task_sink->td_ident->psource);
2305e8470afSJim Cownie #endif
23182e94a59SJoachim Protze #if OMPT_SUPPORT && OMPT_OPTIONAL
23282e94a59SJoachim Protze /* OMPT tracks dependences between task (a=source, b=sink) in which
23382e94a59SJoachim Protze task a blocks the execution of b through the ompt_new_dependence_callback
23482e94a59SJoachim Protze */
23582e94a59SJoachim Protze if (ompt_enabled.ompt_callback_task_dependence) {
23639b68624SJonas Hahnfeld kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
23747cb8a0fSJoachim Protze ompt_data_t *sink_data;
23847cb8a0fSJoachim Protze if (sink_task)
23947cb8a0fSJoachim Protze sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data);
24047cb8a0fSJoachim Protze else
24147cb8a0fSJoachim Protze sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data;
24239b68624SJonas Hahnfeld
24382e94a59SJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
24447cb8a0fSJoachim Protze &(task_source->ompt_task_info.task_data), sink_data);
24539b68624SJonas Hahnfeld }
24682e94a59SJoachim Protze #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
2475e8470afSJim Cownie }
2485e8470afSJim Cownie
249c3344345SAndrey Churbanov static inline kmp_int32
__kmp_depnode_link_successor(kmp_int32 gtid,kmp_info_t * thread,kmp_task_t * task,kmp_depnode_t * node,kmp_depnode_list_t * plist)250c3344345SAndrey Churbanov __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
251c3344345SAndrey Churbanov kmp_task_t *task, kmp_depnode_t *node,
252c3344345SAndrey Churbanov kmp_depnode_list_t *plist) {
253c3344345SAndrey Churbanov if (!plist)
254c3344345SAndrey Churbanov return 0;
255c3344345SAndrey Churbanov kmp_int32 npredecessors = 0;
256c3344345SAndrey Churbanov // link node as successor of list elements
257c3344345SAndrey Churbanov for (kmp_depnode_list_t *p = plist; p; p = p->next) {
258c3344345SAndrey Churbanov kmp_depnode_t *dep = p->node;
259c3344345SAndrey Churbanov if (dep->dn.task) {
260c3344345SAndrey Churbanov KMP_ACQUIRE_DEPNODE(gtid, dep);
261c3344345SAndrey Churbanov if (dep->dn.task) {
26247cb8a0fSJoachim Protze __kmp_track_dependence(gtid, dep, node, task);
263c3344345SAndrey Churbanov dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
264c3344345SAndrey Churbanov KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
265c3344345SAndrey Churbanov "%p\n",
266c3344345SAndrey Churbanov gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
267c3344345SAndrey Churbanov KMP_TASK_TO_TASKDATA(task)));
268c3344345SAndrey Churbanov npredecessors++;
269c3344345SAndrey Churbanov }
270c3344345SAndrey Churbanov KMP_RELEASE_DEPNODE(gtid, dep);
271c3344345SAndrey Churbanov }
272c3344345SAndrey Churbanov }
273c3344345SAndrey Churbanov return npredecessors;
274c3344345SAndrey Churbanov }
275c3344345SAndrey Churbanov
__kmp_depnode_link_successor(kmp_int32 gtid,kmp_info_t * thread,kmp_task_t * task,kmp_depnode_t * source,kmp_depnode_t * sink)276c3344345SAndrey Churbanov static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
277c3344345SAndrey Churbanov kmp_info_t *thread,
278c3344345SAndrey Churbanov kmp_task_t *task,
279c3344345SAndrey Churbanov kmp_depnode_t *source,
280c3344345SAndrey Churbanov kmp_depnode_t *sink) {
281c3344345SAndrey Churbanov if (!sink)
282c3344345SAndrey Churbanov return 0;
283c3344345SAndrey Churbanov kmp_int32 npredecessors = 0;
284c3344345SAndrey Churbanov if (sink->dn.task) {
285c3344345SAndrey Churbanov // synchronously add source to sink' list of successors
286c3344345SAndrey Churbanov KMP_ACQUIRE_DEPNODE(gtid, sink);
287c3344345SAndrey Churbanov if (sink->dn.task) {
28847cb8a0fSJoachim Protze __kmp_track_dependence(gtid, sink, source, task);
289c3344345SAndrey Churbanov sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
290c3344345SAndrey Churbanov KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
291c3344345SAndrey Churbanov "%p\n",
292c3344345SAndrey Churbanov gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
293c3344345SAndrey Churbanov KMP_TASK_TO_TASKDATA(task)));
294c3344345SAndrey Churbanov npredecessors++;
295c3344345SAndrey Churbanov }
296c3344345SAndrey Churbanov KMP_RELEASE_DEPNODE(gtid, sink);
297c3344345SAndrey Churbanov }
298c3344345SAndrey Churbanov return npredecessors;
299c3344345SAndrey Churbanov }
300c3344345SAndrey Churbanov
301d40108e0SAndreyChurbanov static inline kmp_int32
__kmp_process_dep_all(kmp_int32 gtid,kmp_depnode_t * node,kmp_dephash_t * h,bool dep_barrier,kmp_task_t * task)302d40108e0SAndreyChurbanov __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
303d40108e0SAndreyChurbanov bool dep_barrier, kmp_task_t *task) {
304d40108e0SAndreyChurbanov KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
305d40108e0SAndreyChurbanov "dep_barrier = %d\n",
306d40108e0SAndreyChurbanov gtid, dep_barrier));
307d40108e0SAndreyChurbanov kmp_info_t *thread = __kmp_threads[gtid];
308d40108e0SAndreyChurbanov kmp_int32 npredecessors = 0;
309d40108e0SAndreyChurbanov
310d40108e0SAndreyChurbanov // process previous omp_all_memory node if any
311d40108e0SAndreyChurbanov npredecessors +=
312d40108e0SAndreyChurbanov __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
313d40108e0SAndreyChurbanov __kmp_node_deref(thread, h->last_all);
314d40108e0SAndreyChurbanov if (!dep_barrier) {
315d40108e0SAndreyChurbanov h->last_all = __kmp_node_ref(node);
316d40108e0SAndreyChurbanov } else {
317d40108e0SAndreyChurbanov // if this is a sync point in the serial sequence, then the previous
318d40108e0SAndreyChurbanov // outputs are guaranteed to be completed after the execution of this
319d40108e0SAndreyChurbanov // task so the previous output nodes can be cleared.
320d40108e0SAndreyChurbanov h->last_all = NULL;
321d40108e0SAndreyChurbanov }
322d40108e0SAndreyChurbanov
323d40108e0SAndreyChurbanov // process all regular dependences
324d40108e0SAndreyChurbanov for (size_t i = 0; i < h->size; i++) {
325d40108e0SAndreyChurbanov kmp_dephash_entry_t *info = h->buckets[i];
326d40108e0SAndreyChurbanov if (!info) // skip empty slots in dephash
327d40108e0SAndreyChurbanov continue;
328d40108e0SAndreyChurbanov for (; info; info = info->next_in_bucket) {
329d40108e0SAndreyChurbanov // for each entry the omp_all_memory works as OUT dependence
330d40108e0SAndreyChurbanov kmp_depnode_t *last_out = info->last_out;
331d40108e0SAndreyChurbanov kmp_depnode_list_t *last_set = info->last_set;
332d40108e0SAndreyChurbanov kmp_depnode_list_t *prev_set = info->prev_set;
333d40108e0SAndreyChurbanov if (last_set) {
334d40108e0SAndreyChurbanov npredecessors +=
335d40108e0SAndreyChurbanov __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
336d40108e0SAndreyChurbanov __kmp_depnode_list_free(thread, last_set);
337d40108e0SAndreyChurbanov __kmp_depnode_list_free(thread, prev_set);
338d40108e0SAndreyChurbanov info->last_set = NULL;
339d40108e0SAndreyChurbanov info->prev_set = NULL;
340d40108e0SAndreyChurbanov info->last_flag = 0; // no sets in this dephash entry
341d40108e0SAndreyChurbanov } else {
342d40108e0SAndreyChurbanov npredecessors +=
343d40108e0SAndreyChurbanov __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
344d40108e0SAndreyChurbanov }
345d40108e0SAndreyChurbanov __kmp_node_deref(thread, last_out);
346d40108e0SAndreyChurbanov if (!dep_barrier) {
347d40108e0SAndreyChurbanov info->last_out = __kmp_node_ref(node);
348d40108e0SAndreyChurbanov } else {
349d40108e0SAndreyChurbanov info->last_out = NULL;
350d40108e0SAndreyChurbanov }
351d40108e0SAndreyChurbanov }
352d40108e0SAndreyChurbanov }
353d40108e0SAndreyChurbanov KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
354d40108e0SAndreyChurbanov npredecessors));
355d40108e0SAndreyChurbanov return npredecessors;
356d40108e0SAndreyChurbanov }
357d40108e0SAndreyChurbanov
3585e8470afSJim Cownie template <bool filter>
3595e8470afSJim Cownie static inline kmp_int32
__kmp_process_deps(kmp_int32 gtid,kmp_depnode_t * node,kmp_dephash_t ** hash,bool dep_barrier,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_task_t * task)360a1639b9bSAndrey Churbanov __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
3613041982dSJonathan Peyton bool dep_barrier, kmp_int32 ndeps,
3623041982dSJonathan Peyton kmp_depend_info_t *dep_list, kmp_task_t *task) {
363cf1ddae7SAndreyChurbanov KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "
3643041982dSJonathan Peyton "dep_barrier = %d\n",
3653041982dSJonathan Peyton filter, gtid, ndeps, dep_barrier));
3664cc4bb4cSJim Cownie
3675e8470afSJim Cownie kmp_info_t *thread = __kmp_threads[gtid];
3685e8470afSJim Cownie kmp_int32 npredecessors = 0;
3695e8470afSJim Cownie for (kmp_int32 i = 0; i < ndeps; i++) {
3705e8470afSJim Cownie const kmp_depend_info_t *dep = &dep_list[i];
3715e8470afSJim Cownie
3723041982dSJonathan Peyton if (filter && dep->base_addr == 0)
3733041982dSJonathan Peyton continue; // skip filtered entries
3745e8470afSJim Cownie
3753041982dSJonathan Peyton kmp_dephash_entry_t *info =
3763041982dSJonathan Peyton __kmp_dephash_find(thread, hash, dep->base_addr);
3775e8470afSJim Cownie kmp_depnode_t *last_out = info->last_out;
378610fea65SAndreyChurbanov kmp_depnode_list_t *last_set = info->last_set;
379610fea65SAndreyChurbanov kmp_depnode_list_t *prev_set = info->prev_set;
3805e8470afSJim Cownie
381610fea65SAndreyChurbanov if (dep->flags.out) { // out or inout --> clean lists if any
382610fea65SAndreyChurbanov if (last_set) {
383c3344345SAndrey Churbanov npredecessors +=
384610fea65SAndreyChurbanov __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
385610fea65SAndreyChurbanov __kmp_depnode_list_free(thread, last_set);
386610fea65SAndreyChurbanov __kmp_depnode_list_free(thread, prev_set);
387610fea65SAndreyChurbanov info->last_set = NULL;
388610fea65SAndreyChurbanov info->prev_set = NULL;
389610fea65SAndreyChurbanov info->last_flag = 0; // no sets in this dephash entry
390c3344345SAndrey Churbanov } else {
391c3344345SAndrey Churbanov npredecessors +=
392c3344345SAndrey Churbanov __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
3935e8470afSJim Cownie }
394c3344345SAndrey Churbanov __kmp_node_deref(thread, last_out);
395610fea65SAndreyChurbanov if (!dep_barrier) {
396610fea65SAndreyChurbanov info->last_out = __kmp_node_ref(node);
397610fea65SAndreyChurbanov } else {
3983041982dSJonathan Peyton // if this is a sync point in the serial sequence, then the previous
399c3344345SAndrey Churbanov // outputs are guaranteed to be completed after the execution of this
400c3344345SAndrey Churbanov // task so the previous output nodes can be cleared.
4015e8470afSJim Cownie info->last_out = NULL;
402c3344345SAndrey Churbanov }
403610fea65SAndreyChurbanov } else { // either IN or MTX or SET
404610fea65SAndreyChurbanov if (info->last_flag == 0 || info->last_flag == dep->flag) {
405610fea65SAndreyChurbanov // last_set either didn't exist or of same dep kind
406c3344345SAndrey Churbanov // link node as successor of the last_out if any
407c3344345SAndrey Churbanov npredecessors +=
408c3344345SAndrey Churbanov __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
409610fea65SAndreyChurbanov // link node as successor of all nodes in the prev_set if any
410c3344345SAndrey Churbanov npredecessors +=
411610fea65SAndreyChurbanov __kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
4128e29b4b3SAndreyChurbanov if (dep_barrier) {
4138e29b4b3SAndreyChurbanov // clean last_out and prev_set if any; don't touch last_set
4148e29b4b3SAndreyChurbanov __kmp_node_deref(thread, last_out);
4158e29b4b3SAndreyChurbanov info->last_out = NULL;
4168e29b4b3SAndreyChurbanov __kmp_depnode_list_free(thread, prev_set);
4178e29b4b3SAndreyChurbanov info->prev_set = NULL;
4188e29b4b3SAndreyChurbanov }
419610fea65SAndreyChurbanov } else { // last_set is of different dep kind, make it prev_set
420610fea65SAndreyChurbanov // link node as successor of all nodes in the last_set
421610fea65SAndreyChurbanov npredecessors +=
422610fea65SAndreyChurbanov __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
423610fea65SAndreyChurbanov // clean last_out if any
424c3344345SAndrey Churbanov __kmp_node_deref(thread, last_out);
425c3344345SAndrey Churbanov info->last_out = NULL;
426610fea65SAndreyChurbanov // clean prev_set if any
427610fea65SAndreyChurbanov __kmp_depnode_list_free(thread, prev_set);
4288e29b4b3SAndreyChurbanov if (!dep_barrier) {
429610fea65SAndreyChurbanov // move last_set to prev_set, new last_set will be allocated
430610fea65SAndreyChurbanov info->prev_set = last_set;
4318e29b4b3SAndreyChurbanov } else {
4328e29b4b3SAndreyChurbanov info->prev_set = NULL;
4338e29b4b3SAndreyChurbanov info->last_flag = 0;
4348e29b4b3SAndreyChurbanov }
435610fea65SAndreyChurbanov info->last_set = NULL;
436c3344345SAndrey Churbanov }
4378e29b4b3SAndreyChurbanov // for dep_barrier last_flag value should remain:
4388e29b4b3SAndreyChurbanov // 0 if last_set is empty, unchanged otherwise
4398e29b4b3SAndreyChurbanov if (!dep_barrier) {
440610fea65SAndreyChurbanov info->last_flag = dep->flag; // store dep kind of the last_set
441610fea65SAndreyChurbanov info->last_set = __kmp_add_node(thread, info->last_set, node);
4428e29b4b3SAndreyChurbanov }
443610fea65SAndreyChurbanov // check if we are processing MTX dependency
444610fea65SAndreyChurbanov if (dep->flag == KMP_DEP_MTX) {
445c3344345SAndrey Churbanov if (info->mtx_lock == NULL) {
446c3344345SAndrey Churbanov info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
447c3344345SAndrey Churbanov __kmp_init_lock(info->mtx_lock);
448c3344345SAndrey Churbanov }
449c3344345SAndrey Churbanov KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
450c3344345SAndrey Churbanov kmp_int32 m;
451c3344345SAndrey Churbanov // Save lock in node's array
452c3344345SAndrey Churbanov for (m = 0; m < MAX_MTX_DEPS; ++m) {
453c3344345SAndrey Churbanov // sort pointers in decreasing order to avoid potential livelock
454c3344345SAndrey Churbanov if (node->dn.mtx_locks[m] < info->mtx_lock) {
455610fea65SAndreyChurbanov KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
456c3344345SAndrey Churbanov for (int n = node->dn.mtx_num_locks; n > m; --n) {
457c3344345SAndrey Churbanov // shift right all lesser non-NULL pointers
458c3344345SAndrey Churbanov KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
459c3344345SAndrey Churbanov node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
460c3344345SAndrey Churbanov }
461c3344345SAndrey Churbanov node->dn.mtx_locks[m] = info->mtx_lock;
462c3344345SAndrey Churbanov break;
4635e8470afSJim Cownie }
4645e8470afSJim Cownie }
465c3344345SAndrey Churbanov KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
466c3344345SAndrey Churbanov node->dn.mtx_num_locks++;
467c3344345SAndrey Churbanov }
468c3344345SAndrey Churbanov }
469610fea65SAndreyChurbanov }
4703041982dSJonathan Peyton KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
4713041982dSJonathan Peyton gtid, npredecessors));
4725e8470afSJim Cownie return npredecessors;
4735e8470afSJim Cownie }
4745e8470afSJim Cownie
4755e8470afSJim Cownie #define NO_DEP_BARRIER (false)
4765e8470afSJim Cownie #define DEP_BARRIER (true)
4775e8470afSJim Cownie
4785e8470afSJim Cownie // returns true if the task has any outstanding dependence
__kmp_check_deps(kmp_int32 gtid,kmp_depnode_t * node,kmp_task_t * task,kmp_dephash_t ** hash,bool dep_barrier,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_int32 ndeps_noalias,kmp_depend_info_t * noalias_dep_list)4793041982dSJonathan Peyton static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
480a1639b9bSAndrey Churbanov kmp_task_t *task, kmp_dephash_t **hash,
4813041982dSJonathan Peyton bool dep_barrier, kmp_int32 ndeps,
4823041982dSJonathan Peyton kmp_depend_info_t *dep_list,
4833041982dSJonathan Peyton kmp_int32 ndeps_noalias,
4843041982dSJonathan Peyton kmp_depend_info_t *noalias_dep_list) {
485d40108e0SAndreyChurbanov int i, n_mtxs = 0, dep_all = 0;
486d2eb3c73SJonathan Peyton #if KMP_DEBUG
487d2eb3c73SJonathan Peyton kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
488d2eb3c73SJonathan Peyton #endif
489cf1ddae7SAndreyChurbanov KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "
490cf1ddae7SAndreyChurbanov "possibly aliased dependences, %d non-aliased dependences : "
4913041982dSJonathan Peyton "dep_barrier=%d .\n",
4923041982dSJonathan Peyton gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
4934cc4bb4cSJim Cownie
4945e8470afSJim Cownie // Filter deps in dep_list
4955e8470afSJim Cownie // TODO: Different algorithm for large dep_list ( > 10 ? )
4965e8470afSJim Cownie for (i = 0; i < ndeps; i++) {
49759b877d0SAndreyChurbanov if (dep_list[i].base_addr != 0 &&
49859b877d0SAndreyChurbanov dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX) {
499610fea65SAndreyChurbanov KMP_DEBUG_ASSERT(
500610fea65SAndreyChurbanov dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
501610fea65SAndreyChurbanov dep_list[i].flag == KMP_DEP_INOUT ||
502610fea65SAndreyChurbanov dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
503c3344345SAndrey Churbanov for (int j = i + 1; j < ndeps; j++) {
5045e8470afSJim Cownie if (dep_list[i].base_addr == dep_list[j].base_addr) {
505610fea65SAndreyChurbanov if (dep_list[i].flag != dep_list[j].flag) {
506610fea65SAndreyChurbanov // two different dependences on same address work identical to OUT
507610fea65SAndreyChurbanov dep_list[i].flag = KMP_DEP_OUT;
508610fea65SAndreyChurbanov }
5095e8470afSJim Cownie dep_list[j].base_addr = 0; // Mark j element as void
5105e8470afSJim Cownie }
5115e8470afSJim Cownie }
512610fea65SAndreyChurbanov if (dep_list[i].flag == KMP_DEP_MTX) {
513c3344345SAndrey Churbanov // limit number of mtx deps to MAX_MTX_DEPS per node
514c3344345SAndrey Churbanov if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
515c3344345SAndrey Churbanov ++n_mtxs;
516c3344345SAndrey Churbanov } else {
517610fea65SAndreyChurbanov dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
518c3344345SAndrey Churbanov }
519c3344345SAndrey Churbanov }
520d40108e0SAndreyChurbanov } else if (dep_list[i].flag == KMP_DEP_ALL ||
52159b877d0SAndreyChurbanov dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX) {
522d40108e0SAndreyChurbanov // omp_all_memory dependence can be marked by compiler by either
523d40108e0SAndreyChurbanov // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
524d40108e0SAndreyChurbanov // omp_all_memory overrides all other dependences if any
525d40108e0SAndreyChurbanov dep_all = 1;
526d40108e0SAndreyChurbanov break;
527c3344345SAndrey Churbanov }
528c3344345SAndrey Churbanov }
5295e8470afSJim Cownie
5303041982dSJonathan Peyton // doesn't need to be atomic as no other thread is going to be accessing this
5313041982dSJonathan Peyton // node just yet.
5323041982dSJonathan Peyton // npredecessors is set -1 to ensure that none of the releasing tasks queues
533cf1ddae7SAndreyChurbanov // this task before we have finished processing all the dependences
5344cc4bb4cSJim Cownie node->dn.npredecessors = -1;
5355e8470afSJim Cownie
5363041982dSJonathan Peyton // used to pack all npredecessors additions into a single atomic operation at
5373041982dSJonathan Peyton // the end
5385e8470afSJim Cownie int npredecessors;
5395e8470afSJim Cownie
540d40108e0SAndreyChurbanov if (!dep_all) { // regular dependences
541d40108e0SAndreyChurbanov npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
542d40108e0SAndreyChurbanov ndeps, dep_list, task);
5433041982dSJonathan Peyton npredecessors += __kmp_process_deps<false>(
5443041982dSJonathan Peyton gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
545d40108e0SAndreyChurbanov } else { // omp_all_memory dependence
546d40108e0SAndreyChurbanov npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
547d40108e0SAndreyChurbanov }
5485e8470afSJim Cownie
5495e8470afSJim Cownie node->dn.task = task;
5505e8470afSJim Cownie KMP_MB();
5514cc4bb4cSJim Cownie
5524cc4bb4cSJim Cownie // Account for our initial fake value
5534cc4bb4cSJim Cownie npredecessors++;
5544cc4bb4cSJim Cownie
5553041982dSJonathan Peyton // Update predecessors and obtain current value to check if there are still
55642016791SKazuaki Ishizaki // any outstanding dependences (some tasks may have finished while we
55742016791SKazuaki Ishizaki // processed the dependences)
558c47afcd9SAndrey Churbanov npredecessors =
55937e2ef54SJonathan Peyton node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
5604cc4bb4cSJim Cownie
5613041982dSJonathan Peyton KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
5623041982dSJonathan Peyton gtid, npredecessors, taskdata));
5635e8470afSJim Cownie
5643041982dSJonathan Peyton // beyond this point the task could be queued (and executed) by a releasing
5653041982dSJonathan Peyton // task...
5665e8470afSJim Cownie return npredecessors > 0 ? true : false;
5675e8470afSJim Cownie }
5685e8470afSJim Cownie
5695e8470afSJim Cownie /*!
5705e8470afSJim Cownie @ingroup TASKING
5715e8470afSJim Cownie @param loc_ref location of the original task directive
5725e8470afSJim Cownie @param gtid Global Thread ID of encountering thread
5733041982dSJonathan Peyton @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
5743041982dSJonathan Peyton task''
5755e8470afSJim Cownie @param ndeps Number of depend items with possible aliasing
5765e8470afSJim Cownie @param dep_list List of depend items with possible aliasing
5775e8470afSJim Cownie @param ndeps_noalias Number of depend items with no aliasing
5785e8470afSJim Cownie @param noalias_dep_list List of depend items with no aliasing
5795e8470afSJim Cownie
5803041982dSJonathan Peyton @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
58142016791SKazuaki Ishizaki suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
5825e8470afSJim Cownie
5835e8470afSJim Cownie Schedule a non-thread-switchable task with dependences for execution
5845e8470afSJim Cownie */
__kmpc_omp_task_with_deps(ident_t * loc_ref,kmp_int32 gtid,kmp_task_t * new_task,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_int32 ndeps_noalias,kmp_depend_info_t * noalias_dep_list)5853041982dSJonathan Peyton kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
5863041982dSJonathan Peyton kmp_task_t *new_task, kmp_int32 ndeps,
5873041982dSJonathan Peyton kmp_depend_info_t *dep_list,
5883041982dSJonathan Peyton kmp_int32 ndeps_noalias,
5893041982dSJonathan Peyton kmp_depend_info_t *noalias_dep_list) {
5904cc4bb4cSJim Cownie
5914cc4bb4cSJim Cownie kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
5923041982dSJonathan Peyton KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
5933041982dSJonathan Peyton loc_ref, new_taskdata));
594787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
5955e8470afSJim Cownie kmp_info_t *thread = __kmp_threads[gtid];
5965e8470afSJim Cownie kmp_taskdata_t *current_task = thread->th.th_current_task;
5975e8470afSJim Cownie
59882e94a59SJoachim Protze #if OMPT_SUPPORT
59982e94a59SJoachim Protze if (ompt_enabled.enabled) {
6000e0d6cddSJoachim Protze if (!current_task->ompt_task_info.frame.enter_frame.ptr)
6010e0d6cddSJoachim Protze current_task->ompt_task_info.frame.enter_frame.ptr =
6020e0d6cddSJoachim Protze OMPT_GET_FRAME_ADDRESS(0);
60382e94a59SJoachim Protze if (ompt_enabled.ompt_callback_task_create) {
60482e94a59SJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_task_create)(
605480cbed3SHansang Bae &(current_task->ompt_task_info.task_data),
606480cbed3SHansang Bae &(current_task->ompt_task_info.frame),
60782e94a59SJoachim Protze &(new_taskdata->ompt_task_info.task_data),
60882e94a59SJoachim Protze ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
6096104b304SJoachim Protze OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
61082e94a59SJoachim Protze }
61182e94a59SJoachim Protze
612309b00a4SShilei Tian new_taskdata->ompt_task_info.frame.enter_frame.ptr =
613309b00a4SShilei Tian OMPT_GET_FRAME_ADDRESS(0);
61482e94a59SJoachim Protze }
61582e94a59SJoachim Protze
61682e94a59SJoachim Protze #if OMPT_OPTIONAL
61739b68624SJonas Hahnfeld /* OMPT grab all dependences if requested by the tool */
618309b00a4SShilei Tian if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
61939b68624SJonas Hahnfeld kmp_int32 i;
62039b68624SJonas Hahnfeld
62163a3c592SJoachim Protze int ompt_ndeps = ndeps + ndeps_noalias;
62263a3c592SJoachim Protze ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
6232b46d30fSJoachim Protze thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
62439b68624SJonas Hahnfeld
62563a3c592SJoachim Protze KMP_ASSERT(ompt_deps != NULL);
62639b68624SJonas Hahnfeld
6273041982dSJonathan Peyton for (i = 0; i < ndeps; i++) {
62863a3c592SJoachim Protze ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
62939b68624SJonas Hahnfeld if (dep_list[i].flags.in && dep_list[i].flags.out)
63063a3c592SJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_inout;
63139b68624SJonas Hahnfeld else if (dep_list[i].flags.out)
63263a3c592SJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_out;
63339b68624SJonas Hahnfeld else if (dep_list[i].flags.in)
63463a3c592SJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_in;
63563a3c592SJoachim Protze else if (dep_list[i].flags.mtx)
63663a3c592SJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
637610fea65SAndreyChurbanov else if (dep_list[i].flags.set)
638610fea65SAndreyChurbanov ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
63939b68624SJonas Hahnfeld }
6403041982dSJonathan Peyton for (i = 0; i < ndeps_noalias; i++) {
64163a3c592SJoachim Protze ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
64239b68624SJonas Hahnfeld if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
64363a3c592SJoachim Protze ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
64439b68624SJonas Hahnfeld else if (noalias_dep_list[i].flags.out)
64563a3c592SJoachim Protze ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
64639b68624SJonas Hahnfeld else if (noalias_dep_list[i].flags.in)
64763a3c592SJoachim Protze ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
64863a3c592SJoachim Protze else if (noalias_dep_list[i].flags.mtx)
64963a3c592SJoachim Protze ompt_deps[ndeps + i].dependence_type =
65063a3c592SJoachim Protze ompt_dependence_type_mutexinoutset;
651610fea65SAndreyChurbanov else if (noalias_dep_list[i].flags.set)
652610fea65SAndreyChurbanov ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
65339b68624SJonas Hahnfeld }
6542b46d30fSJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_dependences)(
65563a3c592SJoachim Protze &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
656cf1ddae7SAndreyChurbanov /* We can now free the allocated memory for the dependences */
65763a3c592SJoachim Protze /* For OMPD we might want to delay the free until end of this function */
65863a3c592SJoachim Protze KMP_OMPT_DEPS_FREE(thread, ompt_deps);
65939b68624SJonas Hahnfeld }
66082e94a59SJoachim Protze #endif /* OMPT_OPTIONAL */
66182e94a59SJoachim Protze #endif /* OMPT_SUPPORT */
66239b68624SJonas Hahnfeld
6633041982dSJonathan Peyton bool serial = current_task->td_flags.team_serial ||
6643041982dSJonathan Peyton current_task->td_flags.tasking_ser ||
6653041982dSJonathan Peyton current_task->td_flags.final;
666bedc371cSJonas Hahnfeld kmp_task_team_t *task_team = thread->th.th_task_team;
6679d64275aSShilei Tian serial = serial &&
6689d64275aSShilei Tian !(task_team && (task_team->tt.tt_found_proxy_tasks ||
6699d64275aSShilei Tian task_team->tt.tt_hidden_helper_task_encountered));
6705e8470afSJim Cownie
6715e8470afSJim Cownie if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
672cf1ddae7SAndreyChurbanov /* if no dependences have been tracked yet, create the dependence hash */
6735e8470afSJim Cownie if (current_task->td_dephash == NULL)
6747d45451aSJonathan Peyton current_task->td_dephash = __kmp_dephash_create(thread, current_task);
6755e8470afSJim Cownie
6765e8470afSJim Cownie #if USE_FAST_MEMORY
6773041982dSJonathan Peyton kmp_depnode_t *node =
6783041982dSJonathan Peyton (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
6795e8470afSJim Cownie #else
6803041982dSJonathan Peyton kmp_depnode_t *node =
6813041982dSJonathan Peyton (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
6825e8470afSJim Cownie #endif
6835e8470afSJim Cownie
6845e8470afSJim Cownie __kmp_init_node(node);
6854cc4bb4cSJim Cownie new_taskdata->td_depnode = node;
6865e8470afSJim Cownie
687a1639b9bSAndrey Churbanov if (__kmp_check_deps(gtid, node, new_task, ¤t_task->td_dephash,
6883041982dSJonathan Peyton NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
6893041982dSJonathan Peyton noalias_dep_list)) {
6903041982dSJonathan Peyton KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
691cf1ddae7SAndreyChurbanov "dependences: "
6923041982dSJonathan Peyton "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
6933041982dSJonathan Peyton gtid, loc_ref, new_taskdata));
694265fb584SJoachim Protze #if OMPT_SUPPORT
695265fb584SJoachim Protze if (ompt_enabled.enabled) {
6960e0d6cddSJoachim Protze current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
697265fb584SJoachim Protze }
698265fb584SJoachim Protze #endif
6995e8470afSJim Cownie return TASK_CURRENT_NOT_QUEUED;
7005e8470afSJim Cownie }
701535b6faaSAndrey Churbanov } else {
702cf1ddae7SAndreyChurbanov KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
703cf1ddae7SAndreyChurbanov "for task (serialized) loc=%p task=%p\n",
7043041982dSJonathan Peyton gtid, loc_ref, new_taskdata));
7054cc4bb4cSJim Cownie }
7064cc4bb4cSJim Cownie
7073041982dSJonathan Peyton KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
708cf1ddae7SAndreyChurbanov "dependences : "
709c3344345SAndrey Churbanov "loc=%p task=%p, transferring to __kmp_omp_task\n",
7103041982dSJonathan Peyton gtid, loc_ref, new_taskdata));
7115e8470afSJim Cownie
712265fb584SJoachim Protze kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
713265fb584SJoachim Protze #if OMPT_SUPPORT
714265fb584SJoachim Protze if (ompt_enabled.enabled) {
7150e0d6cddSJoachim Protze current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
716265fb584SJoachim Protze }
717265fb584SJoachim Protze #endif
718265fb584SJoachim Protze return ret;
7195e8470afSJim Cownie }
7205e8470afSJim Cownie
72130205865SJoachim Protze #if OMPT_SUPPORT
__ompt_taskwait_dep_finish(kmp_taskdata_t * current_task,ompt_data_t * taskwait_task_data)72247cb8a0fSJoachim Protze void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task,
72347cb8a0fSJoachim Protze ompt_data_t *taskwait_task_data) {
72447cb8a0fSJoachim Protze if (ompt_enabled.ompt_callback_task_schedule) {
72547cb8a0fSJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
7267ba4e96eSHansang Bae taskwait_task_data, ompt_taskwait_complete, NULL);
72747cb8a0fSJoachim Protze }
72847cb8a0fSJoachim Protze current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
72947cb8a0fSJoachim Protze *taskwait_task_data = ompt_data_none;
73047cb8a0fSJoachim Protze }
73130205865SJoachim Protze #endif /* OMPT_SUPPORT */
73247cb8a0fSJoachim Protze
7335e8470afSJim Cownie /*!
7345e8470afSJim Cownie @ingroup TASKING
7355e8470afSJim Cownie @param loc_ref location of the original task directive
7365e8470afSJim Cownie @param gtid Global Thread ID of encountering thread
7375e8470afSJim Cownie @param ndeps Number of depend items with possible aliasing
7385e8470afSJim Cownie @param dep_list List of depend items with possible aliasing
7395e8470afSJim Cownie @param ndeps_noalias Number of depend items with no aliasing
7405e8470afSJim Cownie @param noalias_dep_list List of depend items with no aliasing
7415e8470afSJim Cownie
742cf1ddae7SAndreyChurbanov Blocks the current task until all specifies dependences have been fulfilled.
7435e8470afSJim Cownie */
__kmpc_omp_wait_deps(ident_t * loc_ref,kmp_int32 gtid,kmp_int32 ndeps,kmp_depend_info_t * dep_list,kmp_int32 ndeps_noalias,kmp_depend_info_t * noalias_dep_list)7443041982dSJonathan Peyton void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
7453041982dSJonathan Peyton kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
7463041982dSJonathan Peyton kmp_depend_info_t *noalias_dep_list) {
7474cc4bb4cSJim Cownie KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
7484cc4bb4cSJim Cownie
7494cc4bb4cSJim Cownie if (ndeps == 0 && ndeps_noalias == 0) {
750cf1ddae7SAndreyChurbanov KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependences to "
7513041982dSJonathan Peyton "wait upon : loc=%p\n",
7523041982dSJonathan Peyton gtid, loc_ref));
7534cc4bb4cSJim Cownie return;
7544cc4bb4cSJim Cownie }
755787eb0c6SAndreyChurbanov __kmp_assert_valid_gtid(gtid);
7565e8470afSJim Cownie kmp_info_t *thread = __kmp_threads[gtid];
7575e8470afSJim Cownie kmp_taskdata_t *current_task = thread->th.th_current_task;
7585e8470afSJim Cownie
75947cb8a0fSJoachim Protze #if OMPT_SUPPORT
76047cb8a0fSJoachim Protze // this function represents a taskwait construct with depend clause
76147cb8a0fSJoachim Protze // We signal 4 events:
76247cb8a0fSJoachim Protze // - creation of the taskwait task
76347cb8a0fSJoachim Protze // - dependences of the taskwait task
76447cb8a0fSJoachim Protze // - schedule and finish of the taskwait task
76547cb8a0fSJoachim Protze ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
76647cb8a0fSJoachim Protze KMP_ASSERT(taskwait_task_data->ptr == NULL);
76747cb8a0fSJoachim Protze if (ompt_enabled.enabled) {
76847cb8a0fSJoachim Protze if (!current_task->ompt_task_info.frame.enter_frame.ptr)
76947cb8a0fSJoachim Protze current_task->ompt_task_info.frame.enter_frame.ptr =
77047cb8a0fSJoachim Protze OMPT_GET_FRAME_ADDRESS(0);
77147cb8a0fSJoachim Protze if (ompt_enabled.ompt_callback_task_create) {
77247cb8a0fSJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_task_create)(
773480cbed3SHansang Bae &(current_task->ompt_task_info.task_data),
774480cbed3SHansang Bae &(current_task->ompt_task_info.frame), taskwait_task_data,
7757ba4e96eSHansang Bae ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1,
7766104b304SJoachim Protze OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
77747cb8a0fSJoachim Protze }
77847cb8a0fSJoachim Protze }
77947cb8a0fSJoachim Protze
78047cb8a0fSJoachim Protze #if OMPT_OPTIONAL
78147cb8a0fSJoachim Protze /* OMPT grab all dependences if requested by the tool */
78247cb8a0fSJoachim Protze if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
78347cb8a0fSJoachim Protze kmp_int32 i;
78447cb8a0fSJoachim Protze
78547cb8a0fSJoachim Protze int ompt_ndeps = ndeps + ndeps_noalias;
78647cb8a0fSJoachim Protze ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
78747cb8a0fSJoachim Protze thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
78847cb8a0fSJoachim Protze
78947cb8a0fSJoachim Protze KMP_ASSERT(ompt_deps != NULL);
79047cb8a0fSJoachim Protze
79147cb8a0fSJoachim Protze for (i = 0; i < ndeps; i++) {
79247cb8a0fSJoachim Protze ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
79347cb8a0fSJoachim Protze if (dep_list[i].flags.in && dep_list[i].flags.out)
79447cb8a0fSJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_inout;
79547cb8a0fSJoachim Protze else if (dep_list[i].flags.out)
79647cb8a0fSJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_out;
79747cb8a0fSJoachim Protze else if (dep_list[i].flags.in)
79847cb8a0fSJoachim Protze ompt_deps[i].dependence_type = ompt_dependence_type_in;
79947cb8a0fSJoachim Protze else if (dep_list[i].flags.mtx)
80047cb8a0fSJoachim Protze ompt_deps[ndeps + i].dependence_type =
80147cb8a0fSJoachim Protze ompt_dependence_type_mutexinoutset;
802610fea65SAndreyChurbanov else if (dep_list[i].flags.set)
803610fea65SAndreyChurbanov ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
80447cb8a0fSJoachim Protze }
80547cb8a0fSJoachim Protze for (i = 0; i < ndeps_noalias; i++) {
80647cb8a0fSJoachim Protze ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
80747cb8a0fSJoachim Protze if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
80847cb8a0fSJoachim Protze ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
80947cb8a0fSJoachim Protze else if (noalias_dep_list[i].flags.out)
81047cb8a0fSJoachim Protze ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
81147cb8a0fSJoachim Protze else if (noalias_dep_list[i].flags.in)
81247cb8a0fSJoachim Protze ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
81347cb8a0fSJoachim Protze else if (noalias_dep_list[i].flags.mtx)
81447cb8a0fSJoachim Protze ompt_deps[ndeps + i].dependence_type =
81547cb8a0fSJoachim Protze ompt_dependence_type_mutexinoutset;
816610fea65SAndreyChurbanov else if (noalias_dep_list[i].flags.set)
817610fea65SAndreyChurbanov ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
81847cb8a0fSJoachim Protze }
81947cb8a0fSJoachim Protze ompt_callbacks.ompt_callback(ompt_callback_dependences)(
82047cb8a0fSJoachim Protze taskwait_task_data, ompt_deps, ompt_ndeps);
821cf1ddae7SAndreyChurbanov /* We can now free the allocated memory for the dependences */
82247cb8a0fSJoachim Protze /* For OMPD we might want to delay the free until end of this function */
82347cb8a0fSJoachim Protze KMP_OMPT_DEPS_FREE(thread, ompt_deps);
82447cb8a0fSJoachim Protze ompt_deps = NULL;
82547cb8a0fSJoachim Protze }
82647cb8a0fSJoachim Protze #endif /* OMPT_OPTIONAL */
82747cb8a0fSJoachim Protze #endif /* OMPT_SUPPORT */
82847cb8a0fSJoachim Protze
8294cc4bb4cSJim Cownie // We can return immediately as:
8303041982dSJonathan Peyton // - dependences are not computed in serial teams (except with proxy tasks)
8314cc4bb4cSJim Cownie // - if the dephash is not yet created it means we have nothing to wait for
8323041982dSJonathan Peyton bool ignore = current_task->td_flags.team_serial ||
8333041982dSJonathan Peyton current_task->td_flags.tasking_ser ||
8343041982dSJonathan Peyton current_task->td_flags.final;
835458db51cSShilei Tian ignore =
836458db51cSShilei Tian ignore && thread->th.th_task_team != NULL &&
837458db51cSShilei Tian thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE &&
838458db51cSShilei Tian thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE;
839535b6faaSAndrey Churbanov ignore = ignore || current_task->td_dephash == NULL;
840535b6faaSAndrey Churbanov
841535b6faaSAndrey Churbanov if (ignore) {
8423041982dSJonathan Peyton KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
843cf1ddae7SAndreyChurbanov "dependences : loc=%p\n",
8443041982dSJonathan Peyton gtid, loc_ref));
84530205865SJoachim Protze #if OMPT_SUPPORT
84647cb8a0fSJoachim Protze __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
84730205865SJoachim Protze #endif /* OMPT_SUPPORT */
8485e8470afSJim Cownie return;
8494cc4bb4cSJim Cownie }
8505e8470afSJim Cownie
85137e2ef54SJonathan Peyton kmp_depnode_t node = {0};
8525e8470afSJim Cownie __kmp_init_node(&node);
8535e8470afSJim Cownie
854a1639b9bSAndrey Churbanov if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash,
8553041982dSJonathan Peyton DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
8563041982dSJonathan Peyton noalias_dep_list)) {
8573041982dSJonathan Peyton KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
858cf1ddae7SAndreyChurbanov "dependences : loc=%p\n",
8593041982dSJonathan Peyton gtid, loc_ref));
86030205865SJoachim Protze #if OMPT_SUPPORT
86147cb8a0fSJoachim Protze __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
86230205865SJoachim Protze #endif /* OMPT_SUPPORT */
8635e8470afSJim Cownie return;
8644cc4bb4cSJim Cownie }
8655e8470afSJim Cownie
8665e8470afSJim Cownie int thread_finished = FALSE;
867e0665a90STerry Wilmarth kmp_flag_32<false, false> flag(
868e0665a90STerry Wilmarth (std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
8695e8470afSJim Cownie while (node.dn.npredecessors > 0) {
87037e2ef54SJonathan Peyton flag.execute_tasks(thread, gtid, FALSE,
87137e2ef54SJonathan Peyton &thread_finished USE_ITT_BUILD_ARG(NULL),
8725e8470afSJim Cownie __kmp_task_stealing_constraint);
8735e8470afSJim Cownie }
8745e8470afSJim Cownie
87530205865SJoachim Protze #if OMPT_SUPPORT
87647cb8a0fSJoachim Protze __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
87730205865SJoachim Protze #endif /* OMPT_SUPPORT */
8783041982dSJonathan Peyton KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
8793041982dSJonathan Peyton gtid, loc_ref));
8805e8470afSJim Cownie }
881