1 /* 2 * kmp_taskdeps.cpp 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 //#define KMP_SUPPORT_GRAPH_OUTPUT 1 15 16 #include "kmp.h" 17 #include "kmp_io.h" 18 #include "kmp_wait_release.h" 19 #if OMPT_SUPPORT 20 #include "ompt-specific.h" 21 #endif 22 23 #if OMP_40_ENABLED 24 25 // TODO: Improve memory allocation? keep a list of pre-allocated structures? 26 // allocate in blocks? re-use list finished list entries? 27 // TODO: don't use atomic ref counters for stack-allocated nodes. 28 // TODO: find an alternate to atomic refs for heap-allocated nodes? 29 // TODO: Finish graph output support 30 // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other 31 // runtime locks 32 // TODO: Any ITT support needed? 33 34 #ifdef KMP_SUPPORT_GRAPH_OUTPUT 35 static kmp_int32 kmp_node_id_seed = 0; 36 #endif 37 38 static void __kmp_init_node(kmp_depnode_t *node) { 39 node->dn.task = NULL; // set to null initially, it will point to the right 40 // task once dependences have been processed 41 node->dn.successors = NULL; 42 __kmp_init_lock(&node->dn.lock); 43 node->dn.nrefs = 1; // init creates the first reference to the node 44 #ifdef KMP_SUPPORT_GRAPH_OUTPUT 45 node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); 46 #endif 47 } 48 49 static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { 50 KMP_TEST_THEN_INC32(CCAST(kmp_int32 *, &node->dn.nrefs)); 51 return node; 52 } 53 54 static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { 55 if (!node) 56 return; 57 58 kmp_int32 n = KMP_TEST_THEN_DEC32(CCAST(kmp_int32 *, &node->dn.nrefs)) - 1; 59 if (n == 0) { 60 KMP_ASSERT(node->dn.nrefs == 0); 61 #if USE_FAST_MEMORY 62 __kmp_fast_free(thread, node); 63 #else 64 __kmp_thread_free(thread, node); 65 #endif 66 } 67 } 68 69 #define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid)) 70 #define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid)) 71 72 static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list); 73 74 enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; 75 76 static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { 77 // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % 78 // m_num_sets ); 79 return ((addr >> 6) ^ (addr >> 2)) % hsize; 80 } 81 82 static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, 83 kmp_taskdata_t *current_task) { 84 kmp_dephash_t *h; 85 86 size_t h_size; 87 88 if (current_task->td_flags.tasktype == TASK_IMPLICIT) 89 h_size = KMP_DEPHASH_MASTER_SIZE; 90 else 91 h_size = KMP_DEPHASH_OTHER_SIZE; 92 93 kmp_int32 size = 94 h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); 95 96 #if USE_FAST_MEMORY 97 h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size); 98 #else 99 h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size); 100 #endif 101 h->size = h_size; 102 103 #ifdef KMP_DEBUG 104 h->nelements = 0; 105 h->nconflicts = 0; 106 #endif 107 h->buckets = (kmp_dephash_entry **)(h + 1); 108 109 for (size_t i = 0; i < h_size; i++) 110 h->buckets[i] = 0; 111 112 return h; 113 } 114 115 void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) { 116 for (size_t i = 0; i < h->size; i++) { 117 if (h->buckets[i]) { 118 kmp_dephash_entry_t *next; 119 for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) { 120 next = entry->next_in_bucket; 121 __kmp_depnode_list_free(thread, entry->last_ins); 122 __kmp_node_deref(thread, entry->last_out); 123 #if USE_FAST_MEMORY 124 __kmp_fast_free(thread, entry); 125 #else 126 __kmp_thread_free(thread, entry); 127 #endif 128 } 129 h->buckets[i] = 0; 130 } 131 } 132 } 133 134 void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { 135 __kmp_dephash_free_entries(thread, h); 136 #if USE_FAST_MEMORY 137 __kmp_fast_free(thread, h); 138 #else 139 __kmp_thread_free(thread, h); 140 #endif 141 } 142 143 static kmp_dephash_entry * 144 __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { 145 kmp_int32 bucket = __kmp_dephash_hash(addr, h->size); 146 147 kmp_dephash_entry_t *entry; 148 for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) 149 if (entry->addr == addr) 150 break; 151 152 if (entry == NULL) { 153 // create entry. This is only done by one thread so no locking required 154 #if USE_FAST_MEMORY 155 entry = (kmp_dephash_entry_t *)__kmp_fast_allocate( 156 thread, sizeof(kmp_dephash_entry_t)); 157 #else 158 entry = (kmp_dephash_entry_t *)__kmp_thread_malloc( 159 thread, sizeof(kmp_dephash_entry_t)); 160 #endif 161 entry->addr = addr; 162 entry->last_out = NULL; 163 entry->last_ins = NULL; 164 entry->next_in_bucket = h->buckets[bucket]; 165 h->buckets[bucket] = entry; 166 #ifdef KMP_DEBUG 167 h->nelements++; 168 if (entry->next_in_bucket) 169 h->nconflicts++; 170 #endif 171 } 172 return entry; 173 } 174 175 static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, 176 kmp_depnode_list_t *list, 177 kmp_depnode_t *node) { 178 kmp_depnode_list_t *new_head; 179 180 #if USE_FAST_MEMORY 181 new_head = (kmp_depnode_list_t *)__kmp_fast_allocate( 182 thread, sizeof(kmp_depnode_list_t)); 183 #else 184 new_head = (kmp_depnode_list_t *)__kmp_thread_malloc( 185 thread, sizeof(kmp_depnode_list_t)); 186 #endif 187 188 new_head->node = __kmp_node_ref(node); 189 new_head->next = list; 190 191 return new_head; 192 } 193 194 static void __kmp_depnode_list_free(kmp_info_t *thread, 195 kmp_depnode_list *list) { 196 kmp_depnode_list *next; 197 198 for (; list; list = next) { 199 next = list->next; 200 201 __kmp_node_deref(thread, list->node); 202 #if USE_FAST_MEMORY 203 __kmp_fast_free(thread, list); 204 #else 205 __kmp_thread_free(thread, list); 206 #endif 207 } 208 } 209 210 static inline void __kmp_track_dependence(kmp_depnode_t *source, 211 kmp_depnode_t *sink, 212 kmp_task_t *sink_task) { 213 #ifdef KMP_SUPPORT_GRAPH_OUTPUT 214 kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); 215 // do not use sink->dn.task as that is only filled after the dependencies 216 // are already processed! 217 kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); 218 219 __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, 220 task_source->td_ident->psource, sink->dn.id, 221 task_sink->td_ident->psource); 222 #endif 223 #if OMPT_SUPPORT && OMPT_OPTIONAL 224 /* OMPT tracks dependences between task (a=source, b=sink) in which 225 task a blocks the execution of b through the ompt_new_dependence_callback 226 */ 227 if (ompt_enabled.ompt_callback_task_dependence) { 228 kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); 229 kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); 230 231 ompt_callbacks.ompt_callback(ompt_callback_task_dependence)( 232 &(task_source->ompt_task_info.task_data), 233 &(task_sink->ompt_task_info.task_data)); 234 } 235 #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */ 236 } 237 238 template <bool filter> 239 static inline kmp_int32 240 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, 241 bool dep_barrier, kmp_int32 ndeps, 242 kmp_depend_info_t *dep_list, kmp_task_t *task) { 243 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : " 244 "dep_barrier = %d\n", 245 filter, gtid, ndeps, dep_barrier)); 246 247 kmp_info_t *thread = __kmp_threads[gtid]; 248 kmp_int32 npredecessors = 0; 249 for (kmp_int32 i = 0; i < ndeps; i++) { 250 const kmp_depend_info_t *dep = &dep_list[i]; 251 252 KMP_DEBUG_ASSERT(dep->flags.in); 253 254 if (filter && dep->base_addr == 0) 255 continue; // skip filtered entries 256 257 kmp_dephash_entry_t *info = 258 __kmp_dephash_find(thread, hash, dep->base_addr); 259 kmp_depnode_t *last_out = info->last_out; 260 261 if (dep->flags.out && info->last_ins) { 262 for (kmp_depnode_list_t *p = info->last_ins; p; p = p->next) { 263 kmp_depnode_t *indep = p->node; 264 if (indep->dn.task) { 265 KMP_ACQUIRE_DEPNODE(gtid, indep); 266 if (indep->dn.task) { 267 __kmp_track_dependence(indep, node, task); 268 indep->dn.successors = 269 __kmp_add_node(thread, indep->dn.successors, node); 270 KA_TRACE(40, ("__kmp_process_deps<%d>: T#%d adding dependence from " 271 "%p to %p\n", 272 filter, gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), 273 KMP_TASK_TO_TASKDATA(task))); 274 npredecessors++; 275 } 276 KMP_RELEASE_DEPNODE(gtid, indep); 277 } 278 } 279 280 __kmp_depnode_list_free(thread, info->last_ins); 281 info->last_ins = NULL; 282 283 } else if (last_out && last_out->dn.task) { 284 KMP_ACQUIRE_DEPNODE(gtid, last_out); 285 if (last_out->dn.task) { 286 __kmp_track_dependence(last_out, node, task); 287 last_out->dn.successors = 288 __kmp_add_node(thread, last_out->dn.successors, node); 289 KA_TRACE( 290 40, 291 ("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", 292 filter, gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), 293 KMP_TASK_TO_TASKDATA(task))); 294 295 npredecessors++; 296 } 297 KMP_RELEASE_DEPNODE(gtid, last_out); 298 } 299 300 if (dep_barrier) { 301 // if this is a sync point in the serial sequence, then the previous 302 // outputs are guaranteed to be completed after 303 // the execution of this task so the previous output nodes can be cleared. 304 __kmp_node_deref(thread, last_out); 305 info->last_out = NULL; 306 } else { 307 if (dep->flags.out) { 308 __kmp_node_deref(thread, last_out); 309 info->last_out = __kmp_node_ref(node); 310 } else 311 info->last_ins = __kmp_add_node(thread, info->last_ins, node); 312 } 313 } 314 315 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, 316 gtid, npredecessors)); 317 318 return npredecessors; 319 } 320 321 #define NO_DEP_BARRIER (false) 322 #define DEP_BARRIER (true) 323 324 // returns true if the task has any outstanding dependence 325 static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, 326 kmp_task_t *task, kmp_dephash_t *hash, 327 bool dep_barrier, kmp_int32 ndeps, 328 kmp_depend_info_t *dep_list, 329 kmp_int32 ndeps_noalias, 330 kmp_depend_info_t *noalias_dep_list) { 331 int i; 332 333 #if KMP_DEBUG 334 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); 335 #endif 336 KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d " 337 "possibly aliased dependencies, %d non-aliased depedencies : " 338 "dep_barrier=%d .\n", 339 gtid, taskdata, ndeps, ndeps_noalias, dep_barrier)); 340 341 // Filter deps in dep_list 342 // TODO: Different algorithm for large dep_list ( > 10 ? ) 343 for (i = 0; i < ndeps; i++) { 344 if (dep_list[i].base_addr != 0) 345 for (int j = i + 1; j < ndeps; j++) 346 if (dep_list[i].base_addr == dep_list[j].base_addr) { 347 dep_list[i].flags.in |= dep_list[j].flags.in; 348 dep_list[i].flags.out |= dep_list[j].flags.out; 349 dep_list[j].base_addr = 0; // Mark j element as void 350 } 351 } 352 353 // doesn't need to be atomic as no other thread is going to be accessing this 354 // node just yet. 355 // npredecessors is set -1 to ensure that none of the releasing tasks queues 356 // this task before we have finished processing all the dependencies 357 node->dn.npredecessors = -1; 358 359 // used to pack all npredecessors additions into a single atomic operation at 360 // the end 361 int npredecessors; 362 363 npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps, 364 dep_list, task); 365 npredecessors += __kmp_process_deps<false>( 366 gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); 367 368 node->dn.task = task; 369 KMP_MB(); 370 371 // Account for our initial fake value 372 npredecessors++; 373 374 // Update predecessors and obtain current value to check if there are still 375 // any outstandig dependences (some tasks may have finished while we processed 376 // the dependences) 377 npredecessors = 378 KMP_TEST_THEN_ADD32(CCAST(kmp_int32 *, &node->dn.npredecessors), 379 npredecessors) + 380 npredecessors; 381 382 KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", 383 gtid, npredecessors, taskdata)); 384 385 // beyond this point the task could be queued (and executed) by a releasing 386 // task... 387 return npredecessors > 0 ? true : false; 388 } 389 390 void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { 391 kmp_info_t *thread = __kmp_threads[gtid]; 392 kmp_depnode_t *node = task->td_depnode; 393 394 if (task->td_dephash) { 395 KA_TRACE( 396 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", 397 gtid, task)); 398 __kmp_dephash_free(thread, task->td_dephash); 399 task->td_dephash = NULL; 400 } 401 402 if (!node) 403 return; 404 405 KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", 406 gtid, task)); 407 408 KMP_ACQUIRE_DEPNODE(gtid, node); 409 node->dn.task = 410 NULL; // mark this task as finished, so no new dependencies are generated 411 KMP_RELEASE_DEPNODE(gtid, node); 412 413 kmp_depnode_list_t *next; 414 for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) { 415 kmp_depnode_t *successor = p->node; 416 kmp_int32 npredecessors = 417 KMP_TEST_THEN_DEC32(CCAST(kmp_int32 *, &successor->dn.npredecessors)) - 418 1; 419 // successor task can be NULL for wait_depends or because deps are still 420 // being processed 421 if (npredecessors == 0) { 422 KMP_MB(); 423 if (successor->dn.task) { 424 KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled " 425 "for execution.\n", 426 gtid, successor->dn.task, task)); 427 __kmp_omp_task(gtid, successor->dn.task, false); 428 } 429 } 430 431 next = p->next; 432 __kmp_node_deref(thread, p->node); 433 #if USE_FAST_MEMORY 434 __kmp_fast_free(thread, p); 435 #else 436 __kmp_thread_free(thread, p); 437 #endif 438 } 439 440 __kmp_node_deref(thread, node); 441 442 KA_TRACE( 443 20, 444 ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", 445 gtid, task)); 446 } 447 448 /*! 449 @ingroup TASKING 450 @param loc_ref location of the original task directive 451 @param gtid Global Thread ID of encountering thread 452 @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new 453 task'' 454 @param ndeps Number of depend items with possible aliasing 455 @param dep_list List of depend items with possible aliasing 456 @param ndeps_noalias Number of depend items with no aliasing 457 @param noalias_dep_list List of depend items with no aliasing 458 459 @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not 460 suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued 461 462 Schedule a non-thread-switchable task with dependences for execution 463 */ 464 kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, 465 kmp_task_t *new_task, kmp_int32 ndeps, 466 kmp_depend_info_t *dep_list, 467 kmp_int32 ndeps_noalias, 468 kmp_depend_info_t *noalias_dep_list) { 469 470 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 471 KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid, 472 loc_ref, new_taskdata)); 473 474 kmp_info_t *thread = __kmp_threads[gtid]; 475 kmp_taskdata_t *current_task = thread->th.th_current_task; 476 477 #if OMPT_SUPPORT 478 if (ompt_enabled.enabled) { 479 OMPT_STORE_RETURN_ADDRESS(gtid); 480 if (!current_task->ompt_task_info.frame.enter_frame) 481 current_task->ompt_task_info.frame.enter_frame = 482 OMPT_GET_FRAME_ADDRESS(1); 483 if (ompt_enabled.ompt_callback_task_create) { 484 ompt_data_t task_data = ompt_data_none; 485 ompt_callbacks.ompt_callback(ompt_callback_task_create)( 486 current_task ? &(current_task->ompt_task_info.task_data) : &task_data, 487 current_task ? &(current_task->ompt_task_info.frame) : NULL, 488 &(new_taskdata->ompt_task_info.task_data), 489 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1, 490 OMPT_LOAD_RETURN_ADDRESS(gtid)); 491 } 492 493 new_taskdata->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(0); 494 } 495 496 #if OMPT_OPTIONAL 497 /* OMPT grab all dependences if requested by the tool */ 498 if (ndeps + ndeps_noalias > 0 && 499 ompt_enabled.ompt_callback_task_dependences) { 500 kmp_int32 i; 501 502 new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; 503 new_taskdata->ompt_task_info.deps = 504 (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC( 505 thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t)); 506 507 KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); 508 509 for (i = 0; i < ndeps; i++) { 510 new_taskdata->ompt_task_info.deps[i].variable_addr = 511 (void *)dep_list[i].base_addr; 512 if (dep_list[i].flags.in && dep_list[i].flags.out) 513 new_taskdata->ompt_task_info.deps[i].dependence_flags = 514 ompt_task_dependence_type_inout; 515 else if (dep_list[i].flags.out) 516 new_taskdata->ompt_task_info.deps[i].dependence_flags = 517 ompt_task_dependence_type_out; 518 else if (dep_list[i].flags.in) 519 new_taskdata->ompt_task_info.deps[i].dependence_flags = 520 ompt_task_dependence_type_in; 521 } 522 for (i = 0; i < ndeps_noalias; i++) { 523 new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr = 524 (void *)noalias_dep_list[i].base_addr; 525 if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) 526 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = 527 ompt_task_dependence_type_inout; 528 else if (noalias_dep_list[i].flags.out) 529 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = 530 ompt_task_dependence_type_out; 531 else if (noalias_dep_list[i].flags.in) 532 new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = 533 ompt_task_dependence_type_in; 534 } 535 ompt_callbacks.ompt_callback(ompt_callback_task_dependences)( 536 &(new_taskdata->ompt_task_info.task_data), 537 new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps); 538 /* We can now free the allocated memory for the dependencies */ 539 /* For OMPD we might want to delay the free until task_end */ 540 KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps); 541 new_taskdata->ompt_task_info.deps = NULL; 542 new_taskdata->ompt_task_info.ndeps = 0; 543 } 544 #endif /* OMPT_OPTIONAL */ 545 #endif /* OMPT_SUPPORT */ 546 547 bool serial = current_task->td_flags.team_serial || 548 current_task->td_flags.tasking_ser || 549 current_task->td_flags.final; 550 #if OMP_45_ENABLED 551 kmp_task_team_t *task_team = thread->th.th_task_team; 552 serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks); 553 #endif 554 555 if (!serial && (ndeps > 0 || ndeps_noalias > 0)) { 556 /* if no dependencies have been tracked yet, create the dependence hash */ 557 if (current_task->td_dephash == NULL) 558 current_task->td_dephash = __kmp_dephash_create(thread, current_task); 559 560 #if USE_FAST_MEMORY 561 kmp_depnode_t *node = 562 (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t)); 563 #else 564 kmp_depnode_t *node = 565 (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t)); 566 #endif 567 568 __kmp_init_node(node); 569 new_taskdata->td_depnode = node; 570 571 if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash, 572 NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias, 573 noalias_dep_list)) { 574 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " 575 "dependencies: " 576 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", 577 gtid, loc_ref, new_taskdata)); 578 #if OMPT_SUPPORT 579 if (ompt_enabled.enabled) { 580 current_task->ompt_task_info.frame.enter_frame = NULL; 581 } 582 #endif 583 return TASK_CURRENT_NOT_QUEUED; 584 } 585 } else { 586 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies " 587 "for task (serialized)" 588 "loc=%p task=%p\n", 589 gtid, loc_ref, new_taskdata)); 590 } 591 592 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " 593 "dependencies : " 594 "loc=%p task=%p, transferring to __kmpc_omp_task\n", 595 gtid, loc_ref, new_taskdata)); 596 597 kmp_int32 ret = __kmp_omp_task(gtid, new_task, true); 598 #if OMPT_SUPPORT 599 if (ompt_enabled.enabled) { 600 current_task->ompt_task_info.frame.enter_frame = NULL; 601 } 602 #endif 603 return ret; 604 } 605 606 /*! 607 @ingroup TASKING 608 @param loc_ref location of the original task directive 609 @param gtid Global Thread ID of encountering thread 610 @param ndeps Number of depend items with possible aliasing 611 @param dep_list List of depend items with possible aliasing 612 @param ndeps_noalias Number of depend items with no aliasing 613 @param noalias_dep_list List of depend items with no aliasing 614 615 Blocks the current task until all specifies dependencies have been fulfilled. 616 */ 617 void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, 618 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 619 kmp_depend_info_t *noalias_dep_list) { 620 KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref)); 621 622 if (ndeps == 0 && ndeps_noalias == 0) { 623 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to " 624 "wait upon : loc=%p\n", 625 gtid, loc_ref)); 626 return; 627 } 628 629 kmp_info_t *thread = __kmp_threads[gtid]; 630 kmp_taskdata_t *current_task = thread->th.th_current_task; 631 632 // We can return immediately as: 633 // - dependences are not computed in serial teams (except with proxy tasks) 634 // - if the dephash is not yet created it means we have nothing to wait for 635 bool ignore = current_task->td_flags.team_serial || 636 current_task->td_flags.tasking_ser || 637 current_task->td_flags.final; 638 #if OMP_45_ENABLED 639 ignore = ignore && thread->th.th_task_team != NULL && 640 thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; 641 #endif 642 ignore = ignore || current_task->td_dephash == NULL; 643 644 if (ignore) { 645 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " 646 "dependencies : loc=%p\n", 647 gtid, loc_ref)); 648 return; 649 } 650 651 kmp_depnode_t node; 652 __kmp_init_node(&node); 653 654 if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash, 655 DEP_BARRIER, ndeps, dep_list, ndeps_noalias, 656 noalias_dep_list)) { 657 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " 658 "dependencies : loc=%p\n", 659 gtid, loc_ref)); 660 return; 661 } 662 663 int thread_finished = FALSE; 664 kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U); 665 while (node.dn.npredecessors > 0) { 666 flag.execute_tasks(thread, gtid, FALSE, &thread_finished, 667 #if USE_ITT_BUILD 668 NULL, 669 #endif 670 __kmp_task_stealing_constraint); 671 } 672 673 KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", 674 gtid, loc_ref)); 675 } 676 677 #endif /* OMP_40_ENABLED */ 678