1 /* 2 * kmp_tasking.cpp -- OpenMP 3.0 tasking support. 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_wait_release.h" 20 #include "kmp_stats.h" 21 22 #if OMPT_SUPPORT 23 #include "ompt-specific.h" 24 #endif 25 26 #include "tsan_annotations.h" 27 28 /* ------------------------------------------------------------------------ */ 29 /* ------------------------------------------------------------------------ */ 30 31 32 /* forward declaration */ 33 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ); 34 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ); 35 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ); 36 37 #ifdef OMP_45_ENABLED 38 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ); 39 #endif 40 41 #ifdef BUILD_TIED_TASK_STACK 42 43 //--------------------------------------------------------------------------- 44 // __kmp_trace_task_stack: print the tied tasks from the task stack in order 45 // from top do bottom 46 // 47 // gtid: global thread identifier for thread containing stack 48 // thread_data: thread data for task team thread containing stack 49 // threshold: value above which the trace statement triggers 50 // location: string identifying call site of this function (for trace) 51 52 static void 53 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location ) 54 { 55 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; 56 kmp_taskdata_t **stack_top = task_stack -> ts_top; 57 kmp_int32 entries = task_stack -> ts_entries; 58 kmp_taskdata_t *tied_task; 59 60 KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 61 "first_block = %p, stack_top = %p \n", 62 location, gtid, entries, task_stack->ts_first_block, stack_top ) ); 63 64 KMP_DEBUG_ASSERT( stack_top != NULL ); 65 KMP_DEBUG_ASSERT( entries > 0 ); 66 67 while ( entries != 0 ) 68 { 69 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] ); 70 // fix up ts_top if we need to pop from previous block 71 if ( entries & TASK_STACK_INDEX_MASK == 0 ) 72 { 73 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ; 74 75 stack_block = stack_block -> sb_prev; 76 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; 77 } 78 79 // finish bookkeeping 80 stack_top--; 81 entries--; 82 83 tied_task = * stack_top; 84 85 KMP_DEBUG_ASSERT( tied_task != NULL ); 86 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); 87 88 KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 89 "stack_top=%p, tied_task=%p\n", 90 location, gtid, entries, stack_top, tied_task ) ); 91 } 92 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] ); 93 94 KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", 95 location, gtid ) ); 96 } 97 98 //--------------------------------------------------------------------------- 99 // __kmp_init_task_stack: initialize the task stack for the first time 100 // after a thread_data structure is created. 101 // It should not be necessary to do this again (assuming the stack works). 102 // 103 // gtid: global thread identifier of calling thread 104 // thread_data: thread data for task team thread containing stack 105 106 static void 107 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) 108 { 109 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; 110 kmp_stack_block_t *first_block; 111 112 // set up the first block of the stack 113 first_block = & task_stack -> ts_first_block; 114 task_stack -> ts_top = (kmp_taskdata_t **) first_block; 115 memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); 116 117 // initialize the stack to be empty 118 task_stack -> ts_entries = TASK_STACK_EMPTY; 119 first_block -> sb_next = NULL; 120 first_block -> sb_prev = NULL; 121 } 122 123 124 //--------------------------------------------------------------------------- 125 // __kmp_free_task_stack: free the task stack when thread_data is destroyed. 126 // 127 // gtid: global thread identifier for calling thread 128 // thread_data: thread info for thread containing stack 129 130 static void 131 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) 132 { 133 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; 134 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block; 135 136 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY ); 137 // free from the second block of the stack 138 while ( stack_block != NULL ) { 139 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL; 140 141 stack_block -> sb_next = NULL; 142 stack_block -> sb_prev = NULL; 143 if (stack_block != & task_stack -> ts_first_block) { 144 __kmp_thread_free( thread, stack_block ); // free the block, if not the first 145 } 146 stack_block = next_block; 147 } 148 // initialize the stack to be empty 149 task_stack -> ts_entries = 0; 150 task_stack -> ts_top = NULL; 151 } 152 153 154 //--------------------------------------------------------------------------- 155 // __kmp_push_task_stack: Push the tied task onto the task stack. 156 // Grow the stack if necessary by allocating another block. 157 // 158 // gtid: global thread identifier for calling thread 159 // thread: thread info for thread containing stack 160 // tied_task: the task to push on the stack 161 162 static void 163 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task ) 164 { 165 // GEH - need to consider what to do if tt_threads_data not allocated yet 166 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> 167 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; 168 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; 169 170 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) { 171 return; // Don't push anything on stack if team or team tasks are serialized 172 } 173 174 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); 175 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); 176 177 KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", 178 gtid, thread, tied_task ) ); 179 // Store entry 180 * (task_stack -> ts_top) = tied_task; 181 182 // Do bookkeeping for next push 183 task_stack -> ts_top++; 184 task_stack -> ts_entries++; 185 186 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) 187 { 188 // Find beginning of this task block 189 kmp_stack_block_t *stack_block = 190 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE); 191 192 // Check if we already have a block 193 if ( stack_block -> sb_next != NULL ) 194 { // reset ts_top to beginning of next block 195 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0]; 196 } 197 else 198 { // Alloc new block and link it up 199 kmp_stack_block_t *new_block = (kmp_stack_block_t *) 200 __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t)); 201 202 task_stack -> ts_top = & new_block -> sb_block[0]; 203 stack_block -> sb_next = new_block; 204 new_block -> sb_prev = stack_block; 205 new_block -> sb_next = NULL; 206 207 KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", 208 gtid, tied_task, new_block ) ); 209 } 210 } 211 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); 212 } 213 214 //--------------------------------------------------------------------------- 215 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return 216 // the task, just check to make sure it matches the ending task passed in. 217 // 218 // gtid: global thread identifier for the calling thread 219 // thread: thread info structure containing stack 220 // tied_task: the task popped off the stack 221 // ending_task: the task that is ending (should match popped task) 222 223 static void 224 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task ) 225 { 226 // GEH - need to consider what to do if tt_threads_data not allocated yet 227 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; 228 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; 229 kmp_taskdata_t *tied_task; 230 231 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) { 232 return; // Don't pop anything from stack if team or team tasks are serialized 233 } 234 235 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); 236 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 ); 237 238 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) ); 239 240 // fix up ts_top if we need to pop from previous block 241 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) 242 { 243 kmp_stack_block_t *stack_block = 244 (kmp_stack_block_t *) (task_stack -> ts_top) ; 245 246 stack_block = stack_block -> sb_prev; 247 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; 248 } 249 250 // finish bookkeeping 251 task_stack -> ts_top--; 252 task_stack -> ts_entries--; 253 254 tied_task = * (task_stack -> ts_top ); 255 256 KMP_DEBUG_ASSERT( tied_task != NULL ); 257 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); 258 KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly 259 260 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); 261 return; 262 } 263 #endif /* BUILD_TIED_TASK_STACK */ 264 265 //--------------------------------------------------- 266 // __kmp_push_task: Add a task to the thread's deque 267 268 static kmp_int32 269 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) 270 { 271 kmp_info_t * thread = __kmp_threads[ gtid ]; 272 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 273 kmp_task_team_t * task_team = thread->th.th_task_team; 274 kmp_int32 tid = __kmp_tid_from_gtid( gtid ); 275 kmp_thread_data_t * thread_data; 276 277 KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) ); 278 279 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) { 280 // untied task needs to increment counter so that the task structure is not freed prematurely 281 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); 282 KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n", 283 gtid, counter, taskdata ) ); 284 } 285 286 // The first check avoids building task_team thread data if serialized 287 if ( taskdata->td_flags.task_serial ) { 288 KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n", 289 gtid, taskdata ) ); 290 return TASK_NOT_PUSHED; 291 } 292 293 // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode 294 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 295 if ( ! KMP_TASKING_ENABLED(task_team) ) { 296 __kmp_enable_tasking( task_team, thread ); 297 } 298 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE ); 299 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL ); 300 301 // Find tasking deque specific to encountering thread 302 thread_data = & task_team -> tt.tt_threads_data[ tid ]; 303 304 // No lock needed since only owner can allocate 305 if (thread_data -> td.td_deque == NULL ) { 306 __kmp_alloc_task_deque( thread, thread_data ); 307 } 308 309 // Check if deque is full 310 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) 311 { 312 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n", 313 gtid, taskdata ) ); 314 return TASK_NOT_PUSHED; 315 } 316 317 // Lock the deque for the task push operation 318 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); 319 320 #if OMP_45_ENABLED 321 // Need to recheck as we can get a proxy task from a thread outside of OpenMP 322 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) 323 { 324 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 325 KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n", 326 gtid, taskdata ) ); 327 return TASK_NOT_PUSHED; 328 } 329 #else 330 // Must have room since no thread can add tasks but calling thread 331 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) ); 332 #endif 333 334 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata 335 // Wrap index. 336 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td); 337 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count 338 339 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 340 "task=%p ntasks=%d head=%u tail=%u\n", 341 gtid, taskdata, thread_data->td.td_deque_ntasks, 342 thread_data->td.td_deque_head, thread_data->td.td_deque_tail) ); 343 344 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); 345 346 return TASK_SUCCESSFULLY_PUSHED; 347 } 348 349 350 //----------------------------------------------------------------------------------------- 351 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends 352 // this_thr: thread structure to set current_task in. 353 354 void 355 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr ) 356 { 357 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " 358 "curtask_parent=%p\n", 359 0, this_thr, this_thr -> th.th_current_task, 360 this_thr -> th.th_current_task -> td_parent ) ); 361 362 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent; 363 364 KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " 365 "curtask_parent=%p\n", 366 0, this_thr, this_thr -> th.th_current_task, 367 this_thr -> th.th_current_task -> td_parent ) ); 368 } 369 370 371 //--------------------------------------------------------------------------------------- 372 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team 373 // this_thr: thread structure to set up 374 // team: team for implicit task data 375 // tid: thread within team to set up 376 377 void 378 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid ) 379 { 380 // current task of the thread is a parent of the new just created implicit tasks of new team 381 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " 382 "parent_task=%p\n", 383 tid, this_thr, this_thr->th.th_current_task, 384 team->t.t_implicit_task_taskdata[tid].td_parent ) ); 385 386 KMP_DEBUG_ASSERT (this_thr != NULL); 387 388 if( tid == 0 ) { 389 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) { 390 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task; 391 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ]; 392 } 393 } else { 394 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent; 395 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ]; 396 } 397 398 KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " 399 "parent_task=%p\n", 400 tid, this_thr, this_thr->th.th_current_task, 401 team->t.t_implicit_task_taskdata[tid].td_parent ) ); 402 } 403 404 405 //---------------------------------------------------------------------- 406 // __kmp_task_start: bookkeeping for a task starting execution 407 // GTID: global thread id of calling thread 408 // task: task starting execution 409 // current_task: task suspending 410 411 static void 412 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task ) 413 { 414 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 415 kmp_info_t * thread = __kmp_threads[ gtid ]; 416 417 KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", 418 gtid, taskdata, current_task) ); 419 420 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 421 422 // mark currently executing task as suspended 423 // TODO: GEH - make sure root team implicit task is initialized properly. 424 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); 425 current_task -> td_flags.executing = 0; 426 427 // Add task to stack if tied 428 #ifdef BUILD_TIED_TASK_STACK 429 if ( taskdata -> td_flags.tiedness == TASK_TIED ) 430 { 431 __kmp_push_task_stack( gtid, thread, taskdata ); 432 } 433 #endif /* BUILD_TIED_TASK_STACK */ 434 435 // mark starting task as executing and as current task 436 thread -> th.th_current_task = taskdata; 437 438 KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED ); 439 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED ); 440 taskdata -> td_flags.started = 1; 441 taskdata -> td_flags.executing = 1; 442 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); 443 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); 444 445 // GEH TODO: shouldn't we pass some sort of location identifier here? 446 // APT: yes, we will pass location here. 447 // need to store current thread state (in a thread or taskdata structure) 448 // before setting work_state, otherwise wrong state is set after end of task 449 450 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", 451 gtid, taskdata ) ); 452 453 #if OMPT_SUPPORT 454 if (ompt_enabled && 455 ompt_callbacks.ompt_callback(ompt_event_task_begin)) { 456 kmp_taskdata_t *parent = taskdata->td_parent; 457 ompt_callbacks.ompt_callback(ompt_event_task_begin)( 458 parent ? parent->ompt_task_info.task_id : ompt_task_id_none, 459 parent ? &(parent->ompt_task_info.frame) : NULL, 460 taskdata->ompt_task_info.task_id, 461 taskdata->ompt_task_info.function); 462 } 463 #endif 464 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE 465 /* OMPT emit all dependences if requested by the tool */ 466 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && 467 ompt_callbacks.ompt_callback(ompt_event_task_dependences)) 468 { 469 ompt_callbacks.ompt_callback(ompt_event_task_dependences)( 470 taskdata->ompt_task_info.task_id, 471 taskdata->ompt_task_info.deps, 472 taskdata->ompt_task_info.ndeps 473 ); 474 /* We can now free the allocated memory for the dependencies */ 475 KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps); 476 taskdata->ompt_task_info.deps = NULL; 477 taskdata->ompt_task_info.ndeps = 0; 478 } 479 #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ 480 481 return; 482 } 483 484 485 //---------------------------------------------------------------------- 486 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution 487 // loc_ref: source location information; points to beginning of task block. 488 // gtid: global thread number. 489 // task: task thunk for the started task. 490 491 void 492 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) 493 { 494 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 495 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 496 497 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n", 498 gtid, loc_ref, taskdata, current_task ) ); 499 500 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) { 501 // untied task needs to increment counter so that the task structure is not freed prematurely 502 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); 503 KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n", 504 gtid, counter, taskdata ) ); 505 } 506 507 taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred. 508 __kmp_task_start( gtid, task, current_task ); 509 510 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", 511 gtid, loc_ref, taskdata ) ); 512 513 return; 514 } 515 516 #ifdef TASK_UNUSED 517 //---------------------------------------------------------------------- 518 // __kmpc_omp_task_begin: report that a given task has started execution 519 // NEVER GENERATED BY COMPILER, DEPRECATED!!! 520 521 void 522 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) 523 { 524 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 525 526 KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", 527 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) ); 528 529 __kmp_task_start( gtid, task, current_task ); 530 531 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", 532 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 533 534 return; 535 } 536 #endif // TASK_UNUSED 537 538 539 //------------------------------------------------------------------------------------- 540 // __kmp_free_task: free the current task space and the space for shareds 541 // gtid: Global thread ID of calling thread 542 // taskdata: task to free 543 // thread: thread data structure of caller 544 545 static void 546 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) 547 { 548 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", 549 gtid, taskdata) ); 550 551 // Check to make sure all flags and counters have the correct values 552 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT ); 553 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 ); 554 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 ); 555 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 ); 556 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1); 557 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 ); 558 559 taskdata->td_flags.freed = 1; 560 ANNOTATE_HAPPENS_BEFORE(taskdata); 561 // deallocate the taskdata and shared variable blocks associated with this task 562 #if USE_FAST_MEMORY 563 __kmp_fast_free( thread, taskdata ); 564 #else /* ! USE_FAST_MEMORY */ 565 __kmp_thread_free( thread, taskdata ); 566 #endif 567 568 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", 569 gtid, taskdata) ); 570 } 571 572 //------------------------------------------------------------------------------------- 573 // __kmp_free_task_and_ancestors: free the current task and ancestors without children 574 // 575 // gtid: Global thread ID of calling thread 576 // taskdata: task to free 577 // thread: thread data structure of caller 578 579 static void 580 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) 581 { 582 #if OMP_45_ENABLED 583 // Proxy tasks must always be allowed to free their parents 584 // because they can be run in background even in serial mode. 585 kmp_int32 team_serial = ( taskdata->td_flags.team_serial || 586 taskdata->td_flags.tasking_ser ) && !taskdata->td_flags.proxy; 587 #else 588 kmp_int32 team_serial = taskdata->td_flags.team_serial || 589 taskdata->td_flags.tasking_ser; 590 #endif 591 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 592 593 kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; 594 KMP_DEBUG_ASSERT( children >= 0 ); 595 596 // Now, go up the ancestor tree to see if any ancestors can now be freed. 597 while ( children == 0 ) 598 { 599 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent; 600 601 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 602 "and freeing itself\n", gtid, taskdata) ); 603 604 // --- Deallocate my ancestor task --- 605 __kmp_free_task( gtid, taskdata, thread ); 606 607 taskdata = parent_taskdata; 608 609 // Stop checking ancestors at implicit task 610 // instead of walking up ancestor tree to avoid premature deallocation of ancestors. 611 if ( team_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT ) 612 return; 613 614 // Predecrement simulated by "- 1" calculation 615 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; 616 KMP_DEBUG_ASSERT( children >= 0 ); 617 } 618 619 KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 620 "not freeing it yet\n", gtid, taskdata, children) ); 621 } 622 623 //--------------------------------------------------------------------- 624 // __kmp_task_finish: bookkeeping to do when a task finishes execution 625 // gtid: global thread ID for calling thread 626 // task: task to be finished 627 // resumed_task: task to be resumed. (may be NULL if task is serialized) 628 629 static void 630 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task ) 631 { 632 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 633 kmp_info_t * thread = __kmp_threads[ gtid ]; 634 kmp_task_team_t * task_team = thread->th.th_task_team; // might be NULL for serial teams... 635 kmp_int32 children = 0; 636 637 #if OMPT_SUPPORT 638 if (ompt_enabled && 639 ompt_callbacks.ompt_callback(ompt_event_task_end)) { 640 kmp_taskdata_t *parent = taskdata->td_parent; 641 ompt_callbacks.ompt_callback(ompt_event_task_end)( 642 taskdata->ompt_task_info.task_id); 643 } 644 #endif 645 646 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n", 647 gtid, taskdata, resumed_task) ); 648 649 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 650 651 // Pop task from stack if tied 652 #ifdef BUILD_TIED_TASK_STACK 653 if ( taskdata -> td_flags.tiedness == TASK_TIED ) 654 { 655 __kmp_pop_task_stack( gtid, thread, taskdata ); 656 } 657 #endif /* BUILD_TIED_TASK_STACK */ 658 659 if ( taskdata->td_flags.tiedness == TASK_UNTIED ) { 660 // untied task needs to check the counter so that the task structure is not freed prematurely 661 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1; 662 KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n", 663 gtid, counter, taskdata ) ); 664 if ( counter > 0 ) { 665 // untied task is not done, to be continued possibly by other thread, do not free it now 666 if (resumed_task == NULL) { 667 KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial ); 668 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent 669 } 670 thread->th.th_current_task = resumed_task; // restore current_task 671 resumed_task->td_flags.executing = 1; // resume previous task 672 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n", 673 gtid, taskdata, resumed_task) ); 674 return; 675 } 676 } 677 678 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); 679 taskdata -> td_flags.complete = 1; // mark the task as completed 680 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 ); 681 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); 682 683 // Only need to keep track of count if team parallel and tasking not serialized 684 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) { 685 // Predecrement simulated by "- 1" calculation 686 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; 687 KMP_DEBUG_ASSERT( children >= 0 ); 688 #if OMP_40_ENABLED 689 if ( taskdata->td_taskgroup ) 690 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); 691 #if OMP_45_ENABLED 692 } 693 // if we found proxy tasks there could exist a dependency chain 694 // with the proxy task as origin 695 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) { 696 #endif 697 __kmp_release_deps(gtid,taskdata); 698 #endif 699 } 700 701 // td_flags.executing must be marked as 0 after __kmp_release_deps has been called 702 // Othertwise, if a task is executed immediately from the release_deps code 703 // the flag will be reset to 1 again by this same function 704 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 ); 705 taskdata -> td_flags.executing = 0; // suspend the finishing task 706 707 KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", 708 gtid, taskdata, children) ); 709 710 #if OMP_40_ENABLED 711 /* If the tasks' destructor thunk flag has been set, we need to invoke the 712 destructor thunk that has been generated by the compiler. 713 The code is placed here, since at this point other tasks might have been released 714 hence overlapping the destructor invokations with some other work in the 715 released tasks. The OpenMP spec is not specific on when the destructors are 716 invoked, so we should be free to choose. 717 */ 718 if (taskdata->td_flags.destructors_thunk) { 719 kmp_routine_entry_t destr_thunk = task->data1.destructors; 720 KMP_ASSERT(destr_thunk); 721 destr_thunk(gtid, task); 722 } 723 #endif // OMP_40_ENABLED 724 725 // bookkeeping for resuming task: 726 // GEH - note tasking_ser => task_serial 727 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == 728 taskdata->td_flags.task_serial); 729 if ( taskdata->td_flags.task_serial ) 730 { 731 if (resumed_task == NULL) { 732 resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent 733 } 734 else 735 #if OMP_45_ENABLED 736 if ( !(task_team && task_team->tt.tt_found_proxy_tasks) ) 737 #endif 738 { 739 // verify resumed task passed in points to parent 740 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent ); 741 } 742 } 743 else { 744 KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt 745 } 746 747 // Free this task and then ancestor tasks if they have no children. 748 // Restore th_current_task first as suggested by John: 749 // johnmc: if an asynchronous inquiry peers into the runtime system 750 // it doesn't see the freed task as the current task. 751 thread->th.th_current_task = resumed_task; 752 __kmp_free_task_and_ancestors(gtid, taskdata, thread); 753 754 // TODO: GEH - make sure root team implicit task is initialized properly. 755 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); 756 resumed_task->td_flags.executing = 1; // resume previous task 757 758 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", 759 gtid, taskdata, resumed_task) ); 760 761 return; 762 } 763 764 //--------------------------------------------------------------------- 765 // __kmpc_omp_task_complete_if0: report that a task has completed execution 766 // loc_ref: source location information; points to end of task block. 767 // gtid: global thread number. 768 // task: task thunk for the completed task. 769 770 void 771 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) 772 { 773 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", 774 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 775 776 __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume 777 778 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", 779 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 780 781 return; 782 } 783 784 #ifdef TASK_UNUSED 785 //--------------------------------------------------------------------- 786 // __kmpc_omp_task_complete: report that a task has completed execution 787 // NEVER GENERATED BY COMPILER, DEPRECATED!!! 788 789 void 790 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) 791 { 792 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", 793 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 794 795 __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume 796 797 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", 798 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); 799 return; 800 } 801 #endif // TASK_UNUSED 802 803 804 #if OMPT_SUPPORT 805 //---------------------------------------------------------------------------------------------------- 806 // __kmp_task_init_ompt: 807 // Initialize OMPT fields maintained by a task. This will only be called after 808 // ompt_tool, so we already know whether ompt is enabled or not. 809 810 static inline void 811 __kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function ) 812 { 813 if (ompt_enabled) { 814 task->ompt_task_info.task_id = __ompt_task_id_new(tid); 815 task->ompt_task_info.function = function; 816 task->ompt_task_info.frame.exit_runtime_frame = NULL; 817 task->ompt_task_info.frame.reenter_runtime_frame = NULL; 818 #if OMP_40_ENABLED 819 task->ompt_task_info.ndeps = 0; 820 task->ompt_task_info.deps = NULL; 821 #endif /* OMP_40_ENABLED */ 822 } 823 } 824 #endif 825 826 827 //---------------------------------------------------------------------------------------------------- 828 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread 829 // 830 // loc_ref: reference to source location of parallel region 831 // this_thr: thread data structure corresponding to implicit task 832 // team: team for this_thr 833 // tid: thread id of given thread within team 834 // set_curr_task: TRUE if need to push current task to thread 835 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere. 836 // TODO: Get better loc_ref. Value passed in may be NULL 837 838 void 839 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task ) 840 { 841 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ]; 842 843 KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", 844 tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) ); 845 846 task->td_task_id = KMP_GEN_TASK_ID(); 847 task->td_team = team; 848 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger) 849 task->td_ident = loc_ref; 850 task->td_taskwait_ident = NULL; 851 task->td_taskwait_counter = 0; 852 task->td_taskwait_thread = 0; 853 854 task->td_flags.tiedness = TASK_TIED; 855 task->td_flags.tasktype = TASK_IMPLICIT; 856 #if OMP_45_ENABLED 857 task->td_flags.proxy = TASK_FULL; 858 #endif 859 860 // All implicit tasks are executed immediately, not deferred 861 task->td_flags.task_serial = 1; 862 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); 863 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; 864 865 task->td_flags.started = 1; 866 task->td_flags.executing = 1; 867 task->td_flags.complete = 0; 868 task->td_flags.freed = 0; 869 870 #if OMP_40_ENABLED 871 task->td_depnode = NULL; 872 #endif 873 874 if (set_curr_task) { // only do this initialization the first time a thread is created 875 task->td_incomplete_child_tasks = 0; 876 task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task 877 #if OMP_40_ENABLED 878 task->td_taskgroup = NULL; // An implicit task does not have taskgroup 879 task->td_dephash = NULL; 880 #endif 881 __kmp_push_current_task_to_thread( this_thr, team, tid ); 882 } else { 883 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); 884 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); 885 } 886 887 #if OMPT_SUPPORT 888 __kmp_task_init_ompt(task, tid, NULL); 889 #endif 890 891 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", 892 tid, team, task ) ); 893 } 894 895 896 //----------------------------------------------------------------------------- 897 //// __kmp_finish_implicit_task: Release resources associated to implicit tasks 898 //// at the end of parallel regions. Some resources are kept for reuse in the 899 //// next parallel region. 900 //// 901 //// thread: thread data structure corresponding to implicit task 902 // 903 void 904 __kmp_finish_implicit_task(kmp_info_t *thread) 905 { 906 kmp_taskdata_t *task = thread->th.th_current_task; 907 if (task->td_dephash) 908 __kmp_dephash_free_entries(thread, task->td_dephash); 909 } 910 911 912 //----------------------------------------------------------------------------- 913 //// __kmp_free_implicit_task: Release resources associated to implicit tasks 914 //// when these are destroyed regions 915 //// 916 //// thread: thread data structure corresponding to implicit task 917 // 918 void 919 __kmp_free_implicit_task(kmp_info_t *thread) 920 { 921 kmp_taskdata_t *task = thread->th.th_current_task; 922 if (task->td_dephash) 923 __kmp_dephash_free(thread, task->td_dephash); 924 task->td_dephash = NULL; 925 } 926 927 928 // Round up a size to a power of two specified by val 929 // Used to insert padding between structures co-allocated using a single malloc() call 930 static size_t 931 __kmp_round_up_to_val( size_t size, size_t val ) { 932 if ( size & ( val - 1 ) ) { 933 size &= ~ ( val - 1 ); 934 if ( size <= KMP_SIZE_T_MAX - val ) { 935 size += val; // Round up if there is no overflow. 936 }; // if 937 }; // if 938 return size; 939 } // __kmp_round_up_to_va 940 941 942 //--------------------------------------------------------------------------------- 943 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task 944 // 945 // loc_ref: source location information 946 // gtid: global thread number. 947 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered. 948 // Converted from kmp_int32 to kmp_tasking_flags_t in routine. 949 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task. 950 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task. 951 // task_entry: Pointer to task code entry point generated by compiler. 952 // returns: a pointer to the allocated kmp_task_t structure (task). 953 954 kmp_task_t * 955 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, 956 size_t sizeof_kmp_task_t, size_t sizeof_shareds, 957 kmp_routine_entry_t task_entry ) 958 { 959 kmp_task_t *task; 960 kmp_taskdata_t *taskdata; 961 kmp_info_t *thread = __kmp_threads[ gtid ]; 962 kmp_team_t *team = thread->th.th_team; 963 kmp_taskdata_t *parent_task = thread->th.th_current_task; 964 size_t shareds_offset; 965 966 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 967 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 968 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, 969 sizeof_shareds, task_entry) ); 970 971 if ( parent_task->td_flags.final ) { 972 if (flags->merged_if0) { 973 } 974 flags->final = 1; 975 } 976 977 #if OMP_45_ENABLED 978 if ( flags->proxy == TASK_PROXY ) { 979 flags->tiedness = TASK_UNTIED; 980 flags->merged_if0 = 1; 981 982 /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */ 983 if ( (thread->th.th_task_team) == NULL ) { 984 /* This should only happen if the team is serialized 985 setup a task team and propagate it to the thread 986 */ 987 KMP_DEBUG_ASSERT(team->t.t_serialized); 988 KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid)); 989 __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads 990 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; 991 } 992 kmp_task_team_t * task_team = thread->th.th_task_team; 993 994 /* tasking must be enabled now as the task might not be pushed */ 995 if ( !KMP_TASKING_ENABLED( task_team ) ) { 996 KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); 997 __kmp_enable_tasking( task_team, thread ); 998 kmp_int32 tid = thread->th.th_info.ds.ds_tid; 999 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; 1000 // No lock needed since only owner can allocate 1001 if (thread_data -> td.td_deque == NULL ) { 1002 __kmp_alloc_task_deque( thread, thread_data ); 1003 } 1004 } 1005 1006 if ( task_team->tt.tt_found_proxy_tasks == FALSE ) 1007 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE); 1008 } 1009 #endif 1010 1011 // Calculate shared structure offset including padding after kmp_task_t struct 1012 // to align pointers in shared struct 1013 shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t; 1014 shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * )); 1015 1016 // Allocate a kmp_taskdata_t block and a kmp_task_t block. 1017 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", 1018 gtid, shareds_offset) ); 1019 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", 1020 gtid, sizeof_shareds) ); 1021 1022 // Avoid double allocation here by combining shareds with taskdata 1023 #if USE_FAST_MEMORY 1024 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds ); 1025 #else /* ! USE_FAST_MEMORY */ 1026 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds ); 1027 #endif /* USE_FAST_MEMORY */ 1028 ANNOTATE_HAPPENS_AFTER(taskdata); 1029 1030 task = KMP_TASKDATA_TO_TASK(taskdata); 1031 1032 // Make sure task & taskdata are aligned appropriately 1033 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1034 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 ); 1035 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 ); 1036 #else 1037 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 ); 1038 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 ); 1039 #endif 1040 if (sizeof_shareds > 0) { 1041 // Avoid double allocation here by combining shareds with taskdata 1042 task->shareds = & ((char *) taskdata)[ shareds_offset ]; 1043 // Make sure shareds struct is aligned to pointer size 1044 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 ); 1045 } else { 1046 task->shareds = NULL; 1047 } 1048 task->routine = task_entry; 1049 task->part_id = 0; // AC: Always start with 0 part id 1050 1051 taskdata->td_task_id = KMP_GEN_TASK_ID(); 1052 taskdata->td_team = team; 1053 taskdata->td_alloc_thread = thread; 1054 taskdata->td_parent = parent_task; 1055 taskdata->td_level = parent_task->td_level + 1; // increment nesting level 1056 taskdata->td_untied_count = 0; 1057 taskdata->td_ident = loc_ref; 1058 taskdata->td_taskwait_ident = NULL; 1059 taskdata->td_taskwait_counter = 0; 1060 taskdata->td_taskwait_thread = 0; 1061 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL ); 1062 #if OMP_45_ENABLED 1063 // avoid copying icvs for proxy tasks 1064 if ( flags->proxy == TASK_FULL ) 1065 #endif 1066 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs ); 1067 1068 taskdata->td_flags.tiedness = flags->tiedness; 1069 taskdata->td_flags.final = flags->final; 1070 taskdata->td_flags.merged_if0 = flags->merged_if0; 1071 #if OMP_40_ENABLED 1072 taskdata->td_flags.destructors_thunk = flags->destructors_thunk; 1073 #endif // OMP_40_ENABLED 1074 #if OMP_45_ENABLED 1075 taskdata->td_flags.proxy = flags->proxy; 1076 taskdata->td_task_team = thread->th.th_task_team; 1077 taskdata->td_size_alloc = shareds_offset + sizeof_shareds; 1078 #endif 1079 taskdata->td_flags.tasktype = TASK_EXPLICIT; 1080 1081 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag 1082 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); 1083 1084 // GEH - TODO: fix this to copy parent task's value of team_serial flag 1085 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; 1086 1087 // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region 1088 // tasks are not left until program termination to execute. Also, it helps locality to execute 1089 // immediately. 1090 taskdata->td_flags.task_serial = ( parent_task->td_flags.final 1091 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ); 1092 1093 taskdata->td_flags.started = 0; 1094 taskdata->td_flags.executing = 0; 1095 taskdata->td_flags.complete = 0; 1096 taskdata->td_flags.freed = 0; 1097 1098 taskdata->td_flags.native = flags->native; 1099 1100 taskdata->td_incomplete_child_tasks = 0; 1101 taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children 1102 #if OMP_40_ENABLED 1103 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task 1104 taskdata->td_dephash = NULL; 1105 taskdata->td_depnode = NULL; 1106 #endif 1107 1108 // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task 1109 #if OMP_45_ENABLED 1110 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) 1111 #else 1112 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) 1113 #endif 1114 { 1115 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); 1116 #if OMP_40_ENABLED 1117 if ( parent_task->td_taskgroup ) 1118 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); 1119 #endif 1120 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated 1121 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) { 1122 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); 1123 } 1124 } 1125 1126 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", 1127 gtid, taskdata, taskdata->td_parent) ); 1128 ANNOTATE_HAPPENS_BEFORE(task); 1129 1130 #if OMPT_SUPPORT 1131 __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry); 1132 #endif 1133 1134 return task; 1135 } 1136 1137 1138 kmp_task_t * 1139 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, 1140 size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1141 kmp_routine_entry_t task_entry ) 1142 { 1143 kmp_task_t *retval; 1144 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; 1145 1146 input_flags->native = FALSE; 1147 // __kmp_task_alloc() sets up all other runtime flags 1148 1149 #if OMP_45_ENABLED 1150 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1151 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 1152 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", 1153 input_flags->proxy ? "proxy" : "", 1154 sizeof_kmp_task_t, sizeof_shareds, task_entry) ); 1155 #else 1156 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1157 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", 1158 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", 1159 sizeof_kmp_task_t, sizeof_shareds, task_entry) ); 1160 #endif 1161 1162 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t, 1163 sizeof_shareds, task_entry ); 1164 1165 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) ); 1166 1167 return retval; 1168 } 1169 1170 //----------------------------------------------------------- 1171 // __kmp_invoke_task: invoke the specified task 1172 // 1173 // gtid: global thread ID of caller 1174 // task: the task to invoke 1175 // current_task: the task to resume after task invokation 1176 1177 static void 1178 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task ) 1179 { 1180 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 1181 kmp_uint64 cur_time; 1182 #if OMP_40_ENABLED 1183 int discard = 0 /* false */; 1184 #endif 1185 KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", 1186 gtid, taskdata, current_task) ); 1187 KMP_DEBUG_ASSERT(task); 1188 #if OMP_45_ENABLED 1189 if ( taskdata->td_flags.proxy == TASK_PROXY && 1190 taskdata->td_flags.complete == 1) 1191 { 1192 // This is a proxy task that was already completed but it needs to run 1193 // its bottom-half finish 1194 KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", 1195 gtid, taskdata) ); 1196 1197 __kmp_bottom_half_finish_proxy(gtid,task); 1198 1199 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) ); 1200 1201 return; 1202 } 1203 #endif 1204 1205 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1206 if(__kmp_forkjoin_frames_mode == 3) { 1207 // Get the current time stamp to measure task execution time to correct barrier imbalance time 1208 cur_time = __itt_get_timestamp(); 1209 } 1210 #endif 1211 1212 #if OMP_45_ENABLED 1213 // Proxy tasks are not handled by the runtime 1214 if ( taskdata->td_flags.proxy != TASK_PROXY ) { 1215 #endif 1216 ANNOTATE_HAPPENS_AFTER(task); 1217 __kmp_task_start( gtid, task, current_task ); 1218 #if OMP_45_ENABLED 1219 } 1220 #endif 1221 1222 #if OMPT_SUPPORT 1223 ompt_thread_info_t oldInfo; 1224 kmp_info_t * thread; 1225 if (ompt_enabled) { 1226 // Store the threads states and restore them after the task 1227 thread = __kmp_threads[ gtid ]; 1228 oldInfo = thread->th.ompt_thread_info; 1229 thread->th.ompt_thread_info.wait_id = 0; 1230 thread->th.ompt_thread_info.state = ompt_state_work_parallel; 1231 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0); 1232 } 1233 #endif 1234 1235 #if OMP_40_ENABLED 1236 // TODO: cancel tasks if the parallel region has also been cancelled 1237 // TODO: check if this sequence can be hoisted above __kmp_task_start 1238 // if cancellation has been enabled for this run ... 1239 if (__kmp_omp_cancellation) { 1240 kmp_info_t *this_thr = __kmp_threads [ gtid ]; 1241 kmp_team_t * this_team = this_thr->th.th_team; 1242 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; 1243 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { 1244 KMP_COUNT_BLOCK(TASK_cancelled); 1245 // this task belongs to a task group and we need to cancel it 1246 discard = 1 /* true */; 1247 } 1248 } 1249 1250 // 1251 // Invoke the task routine and pass in relevant data. 1252 // Thunks generated by gcc take a different argument list. 1253 // 1254 if (!discard) { 1255 #if KMP_STATS_ENABLED 1256 KMP_COUNT_BLOCK(TASK_executed); 1257 switch(KMP_GET_THREAD_STATE()) { 1258 case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break; 1259 case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break; 1260 case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break; 1261 case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break; 1262 case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break; 1263 default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break; 1264 } 1265 #endif // KMP_STATS_ENABLED 1266 #endif // OMP_40_ENABLED 1267 1268 #if OMPT_SUPPORT && OMPT_TRACE 1269 /* let OMPT know that we're about to run this task */ 1270 if (ompt_enabled && 1271 ompt_callbacks.ompt_callback(ompt_event_task_switch)) 1272 { 1273 ompt_callbacks.ompt_callback(ompt_event_task_switch)( 1274 current_task->ompt_task_info.task_id, 1275 taskdata->ompt_task_info.task_id); 1276 } 1277 #endif 1278 1279 #ifdef KMP_GOMP_COMPAT 1280 if (taskdata->td_flags.native) { 1281 ((void (*)(void *))(*(task->routine)))(task->shareds); 1282 } 1283 else 1284 #endif /* KMP_GOMP_COMPAT */ 1285 { 1286 (*(task->routine))(gtid, task); 1287 } 1288 KMP_POP_PARTITIONED_TIMER(); 1289 1290 #if OMPT_SUPPORT && OMPT_TRACE 1291 /* let OMPT know that we're returning to the callee task */ 1292 if (ompt_enabled && 1293 ompt_callbacks.ompt_callback(ompt_event_task_switch)) 1294 { 1295 ompt_callbacks.ompt_callback(ompt_event_task_switch)( 1296 taskdata->ompt_task_info.task_id, 1297 current_task->ompt_task_info.task_id); 1298 } 1299 #endif 1300 1301 #if OMP_40_ENABLED 1302 } 1303 #endif // OMP_40_ENABLED 1304 1305 1306 #if OMPT_SUPPORT 1307 if (ompt_enabled) { 1308 thread->th.ompt_thread_info = oldInfo; 1309 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; 1310 } 1311 #endif 1312 1313 #if OMP_45_ENABLED 1314 // Proxy tasks are not handled by the runtime 1315 if ( taskdata->td_flags.proxy != TASK_PROXY ) { 1316 #endif 1317 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); 1318 __kmp_task_finish( gtid, task, current_task ); 1319 #if OMP_45_ENABLED 1320 } 1321 #endif 1322 1323 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1324 // Barrier imbalance - correct arrive time after the task finished 1325 if(__kmp_forkjoin_frames_mode == 3) { 1326 kmp_info_t *this_thr = __kmp_threads [ gtid ]; 1327 if(this_thr->th.th_bar_arrive_time) { 1328 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); 1329 } 1330 } 1331 #endif 1332 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", 1333 gtid, taskdata, current_task) ); 1334 return; 1335 } 1336 1337 //----------------------------------------------------------------------- 1338 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution 1339 // 1340 // loc_ref: location of original task pragma (ignored) 1341 // gtid: Global Thread ID of encountering thread 1342 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' 1343 // Returns: 1344 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. 1345 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. 1346 1347 kmp_int32 1348 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) 1349 { 1350 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 1351 1352 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", 1353 gtid, loc_ref, new_taskdata ) ); 1354 1355 /* Should we execute the new task or queue it? For now, let's just always try to 1356 queue it. If the queue fills up, then we'll execute it. */ 1357 1358 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer 1359 { // Execute this task immediately 1360 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 1361 new_taskdata->td_flags.task_serial = 1; 1362 __kmp_invoke_task( gtid, new_task, current_task ); 1363 } 1364 1365 KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1366 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, 1367 new_taskdata ) ); 1368 1369 ANNOTATE_HAPPENS_BEFORE(new_task); 1370 return TASK_CURRENT_NOT_QUEUED; 1371 } 1372 1373 //--------------------------------------------------------------------- 1374 // __kmp_omp_task: Schedule a non-thread-switchable task for execution 1375 // gtid: Global Thread ID of encountering thread 1376 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() 1377 // serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized 1378 // returns: 1379 // 1380 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. 1381 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. 1382 kmp_int32 1383 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ) 1384 { 1385 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 1386 1387 #if OMPT_SUPPORT 1388 if (ompt_enabled) { 1389 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 1390 __builtin_frame_address(1); 1391 } 1392 #endif 1393 1394 /* Should we execute the new task or queue it? For now, let's just always try to 1395 queue it. If the queue fills up, then we'll execute it. */ 1396 #if OMP_45_ENABLED 1397 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer 1398 #else 1399 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer 1400 #endif 1401 { // Execute this task immediately 1402 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; 1403 if ( serialize_immediate ) 1404 new_taskdata -> td_flags.task_serial = 1; 1405 __kmp_invoke_task( gtid, new_task, current_task ); 1406 } 1407 1408 #if OMPT_SUPPORT 1409 if (ompt_enabled) { 1410 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; 1411 } 1412 #endif 1413 1414 ANNOTATE_HAPPENS_BEFORE(new_task); 1415 return TASK_CURRENT_NOT_QUEUED; 1416 } 1417 1418 //--------------------------------------------------------------------- 1419 // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from 1420 // the parent thread only! 1421 // loc_ref: location of original task pragma (ignored) 1422 // gtid: Global Thread ID of encountering thread 1423 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() 1424 // returns: 1425 // 1426 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. 1427 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. 1428 1429 kmp_int32 1430 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) 1431 { 1432 kmp_int32 res; 1433 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); 1434 1435 #if KMP_DEBUG 1436 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); 1437 #endif 1438 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", 1439 gtid, loc_ref, new_taskdata ) ); 1440 1441 res = __kmp_omp_task(gtid,new_task,true); 1442 1443 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", 1444 gtid, loc_ref, new_taskdata ) ); 1445 return res; 1446 } 1447 1448 //------------------------------------------------------------------------------------- 1449 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete 1450 1451 kmp_int32 1452 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ) 1453 { 1454 kmp_taskdata_t * taskdata; 1455 kmp_info_t * thread; 1456 int thread_finished = FALSE; 1457 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); 1458 1459 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) ); 1460 1461 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 1462 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? 1463 1464 thread = __kmp_threads[ gtid ]; 1465 taskdata = thread -> th.th_current_task; 1466 1467 #if OMPT_SUPPORT && OMPT_TRACE 1468 ompt_task_id_t my_task_id; 1469 ompt_parallel_id_t my_parallel_id; 1470 1471 if (ompt_enabled) { 1472 kmp_team_t *team = thread->th.th_team; 1473 my_task_id = taskdata->ompt_task_info.task_id; 1474 my_parallel_id = team->t.ompt_team_info.parallel_id; 1475 1476 taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1); 1477 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { 1478 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)( 1479 my_parallel_id, my_task_id); 1480 } 1481 } 1482 #endif 1483 1484 // Debugger: The taskwait is active. Store location and thread encountered the taskwait. 1485 #if USE_ITT_BUILD 1486 // Note: These values are used by ITT events as well. 1487 #endif /* USE_ITT_BUILD */ 1488 taskdata->td_taskwait_counter += 1; 1489 taskdata->td_taskwait_ident = loc_ref; 1490 taskdata->td_taskwait_thread = gtid + 1; 1491 1492 #if USE_ITT_BUILD 1493 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); 1494 if ( itt_sync_obj != NULL ) 1495 __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); 1496 #endif /* USE_ITT_BUILD */ 1497 1498 bool must_wait = ! taskdata->td_flags.team_serial && ! taskdata->td_flags.final; 1499 1500 #if OMP_45_ENABLED 1501 must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks); 1502 #endif 1503 if (must_wait) 1504 { 1505 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U); 1506 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) { 1507 flag.execute_tasks(thread, gtid, FALSE, &thread_finished 1508 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); 1509 } 1510 } 1511 #if USE_ITT_BUILD 1512 if ( itt_sync_obj != NULL ) 1513 __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); 1514 #endif /* USE_ITT_BUILD */ 1515 1516 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? 1517 // Debugger: The taskwait is completed. Location remains, but thread is negated. 1518 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; 1519 1520 #if OMPT_SUPPORT && OMPT_TRACE 1521 if (ompt_enabled) { 1522 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { 1523 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)( 1524 my_parallel_id, my_task_id); 1525 } 1526 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; 1527 } 1528 #endif 1529 ANNOTATE_HAPPENS_AFTER(taskdata); 1530 } 1531 1532 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1533 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); 1534 1535 return TASK_CURRENT_NOT_QUEUED; 1536 } 1537 1538 1539 //------------------------------------------------- 1540 // __kmpc_omp_taskyield: switch to a different task 1541 1542 kmp_int32 1543 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) 1544 { 1545 kmp_taskdata_t * taskdata; 1546 kmp_info_t * thread; 1547 int thread_finished = FALSE; 1548 1549 KMP_COUNT_BLOCK(OMP_TASKYIELD); 1550 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD); 1551 1552 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", 1553 gtid, loc_ref, end_part) ); 1554 1555 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) { 1556 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? 1557 1558 thread = __kmp_threads[ gtid ]; 1559 taskdata = thread -> th.th_current_task; 1560 // Should we model this as a task wait or not? 1561 // Debugger: The taskwait is active. Store location and thread encountered the taskwait. 1562 #if USE_ITT_BUILD 1563 // Note: These values are used by ITT events as well. 1564 #endif /* USE_ITT_BUILD */ 1565 taskdata->td_taskwait_counter += 1; 1566 taskdata->td_taskwait_ident = loc_ref; 1567 taskdata->td_taskwait_thread = gtid + 1; 1568 1569 #if USE_ITT_BUILD 1570 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); 1571 if ( itt_sync_obj != NULL ) 1572 __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); 1573 #endif /* USE_ITT_BUILD */ 1574 if ( ! taskdata->td_flags.team_serial ) { 1575 kmp_task_team_t * task_team = thread->th.th_task_team; 1576 if (task_team != NULL) { 1577 if (KMP_TASKING_ENABLED(task_team)) { 1578 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished 1579 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); 1580 } 1581 } 1582 } 1583 #if USE_ITT_BUILD 1584 if ( itt_sync_obj != NULL ) 1585 __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); 1586 #endif /* USE_ITT_BUILD */ 1587 1588 // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? 1589 // Debugger: The taskwait is completed. Location remains, but thread is negated. 1590 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; 1591 } 1592 1593 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 1594 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); 1595 1596 return TASK_CURRENT_NOT_QUEUED; 1597 } 1598 1599 1600 #if OMP_40_ENABLED 1601 //------------------------------------------------------------------------------------- 1602 // __kmpc_taskgroup: Start a new taskgroup 1603 1604 void 1605 __kmpc_taskgroup( ident_t* loc, int gtid ) 1606 { 1607 kmp_info_t * thread = __kmp_threads[ gtid ]; 1608 kmp_taskdata_t * taskdata = thread->th.th_current_task; 1609 kmp_taskgroup_t * tg_new = 1610 (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) ); 1611 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) ); 1612 tg_new->count = 0; 1613 tg_new->cancel_request = cancel_noreq; 1614 tg_new->parent = taskdata->td_taskgroup; 1615 taskdata->td_taskgroup = tg_new; 1616 } 1617 1618 1619 //------------------------------------------------------------------------------------- 1620 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task 1621 // and its descendants are complete 1622 1623 void 1624 __kmpc_end_taskgroup( ident_t* loc, int gtid ) 1625 { 1626 kmp_info_t * thread = __kmp_threads[ gtid ]; 1627 kmp_taskdata_t * taskdata = thread->th.th_current_task; 1628 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; 1629 int thread_finished = FALSE; 1630 1631 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) ); 1632 KMP_DEBUG_ASSERT( taskgroup != NULL ); 1633 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); 1634 1635 if ( __kmp_tasking_mode != tskm_immediate_exec ) { 1636 #if USE_ITT_BUILD 1637 // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them 1638 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); 1639 if ( itt_sync_obj != NULL ) 1640 __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); 1641 #endif /* USE_ITT_BUILD */ 1642 1643 #if OMP_45_ENABLED 1644 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) 1645 #else 1646 if ( ! taskdata->td_flags.team_serial ) 1647 #endif 1648 { 1649 kmp_flag_32 flag(&(taskgroup->count), 0U); 1650 while ( TCR_4(taskgroup->count) != 0 ) { 1651 flag.execute_tasks(thread, gtid, FALSE, &thread_finished 1652 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); 1653 } 1654 } 1655 1656 #if USE_ITT_BUILD 1657 if ( itt_sync_obj != NULL ) 1658 __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); 1659 #endif /* USE_ITT_BUILD */ 1660 } 1661 KMP_DEBUG_ASSERT( taskgroup->count == 0 ); 1662 1663 // Restore parent taskgroup for the current task 1664 taskdata->td_taskgroup = taskgroup->parent; 1665 __kmp_thread_free( thread, taskgroup ); 1666 1667 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) ); 1668 ANNOTATE_HAPPENS_AFTER(taskdata); 1669 } 1670 #endif 1671 1672 1673 //------------------------------------------------------ 1674 // __kmp_remove_my_task: remove a task from my own deque 1675 1676 static kmp_task_t * 1677 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team, 1678 kmp_int32 is_constrained ) 1679 { 1680 kmp_task_t * task; 1681 kmp_taskdata_t * taskdata; 1682 kmp_thread_data_t *thread_data; 1683 kmp_uint32 tail; 1684 1685 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 1686 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition 1687 1688 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; 1689 1690 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", 1691 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 1692 thread_data->td.td_deque_tail) ); 1693 1694 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { 1695 KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", 1696 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 1697 thread_data->td.td_deque_tail) ); 1698 return NULL; 1699 } 1700 1701 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); 1702 1703 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { 1704 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 1705 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", 1706 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 1707 thread_data->td.td_deque_tail) ); 1708 return NULL; 1709 } 1710 1711 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index. 1712 taskdata = thread_data -> td.td_deque[ tail ]; 1713 1714 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) { 1715 // we need to check if the candidate obeys task scheduling constraint: 1716 // only child of current task can be scheduled 1717 kmp_taskdata_t * current = thread->th.th_current_task; 1718 kmp_int32 level = current->td_level; 1719 kmp_taskdata_t * parent = taskdata->td_parent; 1720 while ( parent != current && parent->td_level > level ) { 1721 parent = parent->td_parent; // check generation up to the level of the current task 1722 KMP_DEBUG_ASSERT(parent != NULL); 1723 } 1724 if ( parent != current ) { 1725 // If the tail task is not a child, then no other child can appear in the deque. 1726 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 1727 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", 1728 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 1729 thread_data->td.td_deque_tail) ); 1730 return NULL; 1731 } 1732 } 1733 1734 thread_data -> td.td_deque_tail = tail; 1735 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1); 1736 1737 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); 1738 1739 KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n", 1740 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, 1741 thread_data->td.td_deque_tail) ); 1742 1743 task = KMP_TASKDATA_TO_TASK( taskdata ); 1744 return task; 1745 } 1746 1747 1748 //----------------------------------------------------------- 1749 // __kmp_steal_task: remove a task from another thread's deque 1750 // Assume that calling thread has already checked existence of 1751 // task_team thread_data before calling this routine. 1752 1753 static kmp_task_t * 1754 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team, 1755 volatile kmp_uint32 *unfinished_threads, int *thread_finished, 1756 kmp_int32 is_constrained ) 1757 { 1758 kmp_task_t * task; 1759 kmp_taskdata_t * taskdata; 1760 kmp_thread_data_t *victim_td, *threads_data; 1761 kmp_int32 victim_tid; 1762 1763 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 1764 1765 threads_data = task_team -> tt.tt_threads_data; 1766 KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition 1767 1768 victim_tid = victim->th.th_info.ds.ds_tid; 1769 victim_td = & threads_data[ victim_tid ]; 1770 1771 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " 1772 "head=%u tail=%u\n", 1773 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, 1774 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 1775 1776 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition 1777 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? 1778 { 1779 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " 1780 "ntasks=%d head=%u tail=%u\n", 1781 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, 1782 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 1783 return NULL; 1784 } 1785 1786 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock ); 1787 1788 // Check again after we acquire the lock 1789 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || 1790 (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? 1791 { 1792 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); 1793 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 1794 "ntasks=%d head=%u tail=%u\n", 1795 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, 1796 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 1797 return NULL; 1798 } 1799 1800 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL ); 1801 1802 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head]; 1803 if ( is_constrained ) { 1804 // we need to check if the candidate obeys task scheduling constraint: 1805 // only descendant of current task can be scheduled 1806 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task; 1807 kmp_int32 level = current->td_level; 1808 kmp_taskdata_t * parent = taskdata->td_parent; 1809 while ( parent != current && parent->td_level > level ) { 1810 parent = parent->td_parent; // check generation up to the level of the current task 1811 KMP_DEBUG_ASSERT(parent != NULL); 1812 } 1813 if ( parent != current ) { 1814 // If the head task is not a descendant of the current task then do not 1815 // steal it. No other task in victim's deque can be a descendant of the 1816 // current task. 1817 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); 1818 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " 1819 "ntasks=%d head=%u tail=%u\n", 1820 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ), 1821 task_team, victim_td->td.td_deque_ntasks, 1822 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); 1823 return NULL; 1824 } 1825 } 1826 // Bump head pointer and Wrap. 1827 victim_td->td.td_deque_head = (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td); 1828 if (*thread_finished) { 1829 // We need to un-mark this victim as a finished victim. This must be done before 1830 // releasing the lock, or else other threads (starting with the master victim) 1831 // might be prematurely released from the barrier!!! 1832 kmp_uint32 count; 1833 1834 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads ); 1835 1836 KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", 1837 gtid, count + 1, task_team) ); 1838 1839 *thread_finished = FALSE; 1840 } 1841 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1); 1842 1843 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); 1844 1845 KMP_COUNT_BLOCK(TASK_stolen); 1846 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " 1847 "ntasks=%d head=%u tail=%u\n", 1848 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team, 1849 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, 1850 victim_td->td.td_deque_tail) ); 1851 1852 task = KMP_TASKDATA_TO_TASK( taskdata ); 1853 return task; 1854 } 1855 1856 1857 //----------------------------------------------------------------------------- 1858 // __kmp_execute_tasks_template: Choose and execute tasks until either the condition 1859 // is statisfied (return true) or there are none left (return false). 1860 // final_spin is TRUE if this is the spin at the release barrier. 1861 // thread_finished indicates whether the thread is finished executing all 1862 // the tasks it has on its deque, and is at the release barrier. 1863 // spinner is the location on which to spin. 1864 // spinner == NULL means only execute a single task and return. 1865 // checker is the value to check to terminate the spin. 1866 template <class C> 1867 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, 1868 int *thread_finished 1869 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 1870 { 1871 kmp_task_team_t * task_team = thread->th.th_task_team; 1872 kmp_thread_data_t * threads_data; 1873 kmp_task_t * task; 1874 kmp_info_t * other_thread; 1875 kmp_taskdata_t * current_task = thread -> th.th_current_task; 1876 volatile kmp_uint32 * unfinished_threads; 1877 kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid; 1878 1879 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 1880 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); 1881 1882 if (task_team == NULL) return FALSE; 1883 1884 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", 1885 gtid, final_spin, *thread_finished) ); 1886 1887 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); 1888 KMP_DEBUG_ASSERT( threads_data != NULL ); 1889 1890 nthreads = task_team -> tt.tt_nproc; 1891 unfinished_threads = &(task_team -> tt.tt_unfinished_threads); 1892 #if OMP_45_ENABLED 1893 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks); 1894 #else 1895 KMP_DEBUG_ASSERT( nthreads > 1 ); 1896 #endif 1897 KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 ); 1898 1899 while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs 1900 while (1) { // Inner loop to find a task and execute it 1901 task = NULL; 1902 if (use_own_tasks) { // check on own queue first 1903 task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained ); 1904 } 1905 if ((task == NULL) && (nthreads > 1)) { // Steal a task 1906 int asleep = 1; 1907 use_own_tasks = 0; 1908 // Try to steal from the last place I stole from successfully. 1909 if (victim == -2) { // haven't stolen anything yet 1910 victim = threads_data[tid].td.td_deque_last_stolen; 1911 if (victim != -1) // if we have a last stolen from victim, get the thread 1912 other_thread = threads_data[victim].td.td_thr; 1913 } 1914 if (victim != -1) { // found last victim 1915 asleep = 0; 1916 } 1917 else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread 1918 do { // Find a different thread to steal work from. 1919 // Pick a random thread. Initial plan was to cycle through all the threads, and only return if 1920 // we tried to steal from every thread, and failed. Arch says that's not such a great idea. 1921 victim = __kmp_get_random(thread) % (nthreads - 1); 1922 if (victim >= tid) { 1923 ++victim; // Adjusts random distribution to exclude self 1924 } 1925 // Found a potential victim 1926 other_thread = threads_data[victim].td.td_thr; 1927 // There is a slight chance that __kmp_enable_tasking() did not wake up all threads 1928 // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to 1929 // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway, 1930 // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks 1931 // do not sleep at the separate tasking barrier, so this isn't a problem. 1932 asleep = 0; 1933 if ( ( __kmp_tasking_mode == tskm_task_teams ) && 1934 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && 1935 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) { 1936 asleep = 1; 1937 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); 1938 // A sleeping thread should not have any tasks on it's queue. There is a slight 1939 // possibility that it resumes, steals a task from another thread, which spawns more 1940 // tasks, all in the time that it takes this thread to check => don't write an assertion 1941 // that the victim's queue is empty. Try stealing from a different thread. 1942 } 1943 } while (asleep); 1944 } 1945 1946 if (!asleep) { 1947 // We have a victim to try to steal from 1948 task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained); 1949 } 1950 if (task != NULL) { // set last stolen to victim 1951 if (threads_data[tid].td.td_deque_last_stolen != victim) { 1952 threads_data[tid].td.td_deque_last_stolen = victim; 1953 // The pre-refactored code did not try more than 1 successful new vicitm, 1954 // unless the last one generated more local tasks; new_victim keeps track of this 1955 new_victim = 1; 1956 } 1957 } 1958 else { // No tasks found; unset last_stolen 1959 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1); 1960 victim = -2; // no successful victim found 1961 } 1962 } 1963 1964 if (task == NULL) // break out of tasking loop 1965 break; 1966 1967 // Found a task; execute it 1968 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1969 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { 1970 if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably 1971 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); 1972 } 1973 __kmp_itt_task_starting( itt_sync_obj ); 1974 } 1975 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ 1976 __kmp_invoke_task( gtid, task, current_task ); 1977 #if USE_ITT_BUILD 1978 if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj ); 1979 #endif /* USE_ITT_BUILD */ 1980 // If this thread is only partway through the barrier and the condition is met, then return now, 1981 // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop 1982 // in the barrier, waiting to be released, we know that the termination condition will not be 1983 // satisified, so don't waste any cycles checking it. 1984 if (flag == NULL || (!final_spin && flag->done_check())) { 1985 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) ); 1986 return TRUE; 1987 } 1988 if (thread->th.th_task_team == NULL) { 1989 break; 1990 } 1991 KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task 1992 // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks 1993 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { 1994 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid)); 1995 use_own_tasks = 1; 1996 new_victim = 0; 1997 } 1998 } 1999 2000 // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied. 2001 #if OMP_45_ENABLED 2002 // The work queue may be empty but there might be proxy tasks still executing 2003 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0) 2004 #else 2005 if (final_spin) 2006 #endif 2007 { 2008 // First, decrement the #unfinished threads, if that has not already been done. This decrement 2009 // might be to the spin location, and result in the termination condition being satisfied. 2010 if (! *thread_finished) { 2011 kmp_uint32 count; 2012 2013 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; 2014 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n", 2015 gtid, count, task_team) ); 2016 *thread_finished = TRUE; 2017 } 2018 2019 // It is now unsafe to reference thread->th.th_team !!! 2020 // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through 2021 // the barrier, where it might reset each thread's th.th_team field for the next parallel region. 2022 // If we can steal more work, we know that this has not happened yet. 2023 if (flag != NULL && flag->done_check()) { 2024 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) ); 2025 return TRUE; 2026 } 2027 } 2028 2029 // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out 2030 if (thread->th.th_task_team == NULL) { 2031 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) ); 2032 return FALSE; 2033 } 2034 2035 #if OMP_45_ENABLED 2036 // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute 2037 // tasks from own queue 2038 if (nthreads == 1) 2039 use_own_tasks = 1; 2040 else 2041 #endif 2042 { 2043 KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) ); 2044 return FALSE; 2045 } 2046 } 2047 } 2048 2049 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, 2050 int *thread_finished 2051 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 2052 { 2053 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished 2054 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 2055 } 2056 2057 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, 2058 int *thread_finished 2059 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 2060 { 2061 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished 2062 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 2063 } 2064 2065 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, 2066 int *thread_finished 2067 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) 2068 { 2069 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished 2070 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); 2071 } 2072 2073 2074 2075 //----------------------------------------------------------------------------- 2076 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the 2077 // next barrier so they can assist in executing enqueued tasks. 2078 // First thread in allocates the task team atomically. 2079 2080 static void 2081 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ) 2082 { 2083 kmp_thread_data_t *threads_data; 2084 int nthreads, i, is_init_thread; 2085 2086 KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n", 2087 __kmp_gtid_from_thread( this_thr ) ) ); 2088 2089 KMP_DEBUG_ASSERT(task_team != NULL); 2090 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); 2091 2092 nthreads = task_team->tt.tt_nproc; 2093 KMP_DEBUG_ASSERT(nthreads > 0); 2094 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); 2095 2096 // Allocate or increase the size of threads_data if necessary 2097 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team ); 2098 2099 if (!is_init_thread) { 2100 // Some other thread already set up the array. 2101 KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", 2102 __kmp_gtid_from_thread( this_thr ) ) ); 2103 return; 2104 } 2105 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); 2106 KMP_DEBUG_ASSERT( threads_data != NULL ); 2107 2108 if ( ( __kmp_tasking_mode == tskm_task_teams ) && 2109 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) ) 2110 { 2111 // Release any threads sleeping at the barrier, so that they can steal 2112 // tasks and execute them. In extra barrier mode, tasks do not sleep 2113 // at the separate tasking barrier, so this isn't a problem. 2114 for (i = 0; i < nthreads; i++) { 2115 volatile void *sleep_loc; 2116 kmp_info_t *thread = threads_data[i].td.td_thr; 2117 2118 if (i == this_thr->th.th_info.ds.ds_tid) { 2119 continue; 2120 } 2121 // Since we haven't locked the thread's suspend mutex lock at this 2122 // point, there is a small window where a thread might be putting 2123 // itself to sleep, but hasn't set the th_sleep_loc field yet. 2124 // To work around this, __kmp_execute_tasks_template() periodically checks 2125 // see if other threads are sleeping (using the same random 2126 // mechanism that is used for task stealing) and awakens them if 2127 // they are. 2128 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL ) 2129 { 2130 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n", 2131 __kmp_gtid_from_thread( this_thr ), 2132 __kmp_gtid_from_thread( thread ) ) ); 2133 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); 2134 } 2135 else { 2136 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", 2137 __kmp_gtid_from_thread( this_thr ), 2138 __kmp_gtid_from_thread( thread ) ) ); 2139 } 2140 } 2141 } 2142 2143 KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n", 2144 __kmp_gtid_from_thread( this_thr ) ) ); 2145 } 2146 2147 2148 /* ------------------------------------------------------------------------ */ 2149 /* // TODO: Check the comment consistency 2150 * Utility routines for "task teams". A task team (kmp_task_t) is kind of 2151 * like a shadow of the kmp_team_t data struct, with a different lifetime. 2152 * After a child * thread checks into a barrier and calls __kmp_release() from 2153 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no 2154 * longer assume that the kmp_team_t structure is intact (at any moment, the 2155 * master thread may exit the barrier code and free the team data structure, 2156 * and return the threads to the thread pool). 2157 * 2158 * This does not work with the the tasking code, as the thread is still 2159 * expected to participate in the execution of any tasks that may have been 2160 * spawned my a member of the team, and the thread still needs access to all 2161 * to each thread in the team, so that it can steal work from it. 2162 * 2163 * Enter the existence of the kmp_task_team_t struct. It employs a reference 2164 * counting mechanims, and is allocated by the master thread before calling 2165 * __kmp_<barrier_kind>_release, and then is release by the last thread to 2166 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes 2167 * of the kmp_task_team_t structs for consecutive barriers can overlap 2168 * (and will, unless the master thread is the last thread to exit the barrier 2169 * release phase, which is not typical). 2170 * 2171 * The existence of such a struct is useful outside the context of tasking, 2172 * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro, 2173 * so that any performance differences show up when comparing the 2.5 vs. 3.0 2174 * libraries. 2175 * 2176 * We currently use the existence of the threads array as an indicator that 2177 * tasks were spawned since the last barrier. If the structure is to be 2178 * useful outside the context of tasking, then this will have to change, but 2179 * not settting the field minimizes the performance impact of tasking on 2180 * barriers, when no explicit tasks were spawned (pushed, actually). 2181 */ 2182 2183 2184 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures 2185 // Lock for task team data structures 2186 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock ); 2187 2188 2189 //------------------------------------------------------------------------------ 2190 // __kmp_alloc_task_deque: 2191 // Allocates a task deque for a particular thread, and initialize the necessary 2192 // data structures relating to the deque. This only happens once per thread 2193 // per task team since task teams are recycled. 2194 // No lock is needed during allocation since each thread allocates its own 2195 // deque. 2196 2197 static void 2198 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ) 2199 { 2200 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock ); 2201 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL ); 2202 2203 // Initialize last stolen task field to "none" 2204 thread_data -> td.td_deque_last_stolen = -1; 2205 2206 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 ); 2207 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 ); 2208 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 ); 2209 2210 KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", 2211 __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) ); 2212 // Allocate space for task deque, and zero the deque 2213 // Cannot use __kmp_thread_calloc() because threads not around for 2214 // kmp_reap_task_team( ). 2215 thread_data -> td.td_deque = (kmp_taskdata_t **) 2216 __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); 2217 thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE; 2218 } 2219 2220 //------------------------------------------------------------------------------ 2221 // __kmp_realloc_task_deque: 2222 // Re-allocates a task deque for a particular thread, copies the content from the old deque 2223 // and adjusts the necessary data structures relating to the deque. 2224 // This operation must be done with a the deque_lock being held 2225 2226 static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data ) 2227 { 2228 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); 2229 kmp_int32 new_size = 2 * size; 2230 2231 KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n", 2232 __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) ); 2233 2234 kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *)); 2235 2236 int i,j; 2237 for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ ) 2238 new_deque[j] = thread_data->td.td_deque[i]; 2239 2240 __kmp_free(thread_data->td.td_deque); 2241 2242 thread_data -> td.td_deque_head = 0; 2243 thread_data -> td.td_deque_tail = size; 2244 thread_data -> td.td_deque = new_deque; 2245 thread_data -> td.td_deque_size = new_size; 2246 } 2247 2248 //------------------------------------------------------------------------------ 2249 // __kmp_free_task_deque: 2250 // Deallocates a task deque for a particular thread. 2251 // Happens at library deallocation so don't need to reset all thread data fields. 2252 2253 static void 2254 __kmp_free_task_deque( kmp_thread_data_t *thread_data ) 2255 { 2256 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); 2257 2258 if ( thread_data -> td.td_deque != NULL ) { 2259 TCW_4(thread_data -> td.td_deque_ntasks, 0); 2260 __kmp_free( thread_data -> td.td_deque ); 2261 thread_data -> td.td_deque = NULL; 2262 } 2263 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); 2264 2265 #ifdef BUILD_TIED_TASK_STACK 2266 // GEH: Figure out what to do here for td_susp_tied_tasks 2267 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) { 2268 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data ); 2269 } 2270 #endif // BUILD_TIED_TASK_STACK 2271 } 2272 2273 2274 //------------------------------------------------------------------------------ 2275 // __kmp_realloc_task_threads_data: 2276 // Allocates a threads_data array for a task team, either by allocating an initial 2277 // array or enlarging an existing array. Only the first thread to get the lock 2278 // allocs or enlarges the array and re-initializes the array eleemnts. 2279 // That thread returns "TRUE", the rest return "FALSE". 2280 // Assumes that the new array size is given by task_team -> tt.tt_nproc. 2281 // The current size is given by task_team -> tt.tt_max_threads. 2282 2283 static int 2284 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ) 2285 { 2286 kmp_thread_data_t ** threads_data_p; 2287 kmp_int32 nthreads, maxthreads; 2288 int is_init_thread = FALSE; 2289 2290 if ( TCR_4(task_team -> tt.tt_found_tasks) ) { 2291 // Already reallocated and initialized. 2292 return FALSE; 2293 } 2294 2295 threads_data_p = & task_team -> tt.tt_threads_data; 2296 nthreads = task_team -> tt.tt_nproc; 2297 maxthreads = task_team -> tt.tt_max_threads; 2298 2299 // All threads must lock when they encounter the first task of the implicit task 2300 // region to make sure threads_data fields are (re)initialized before used. 2301 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 2302 2303 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) { 2304 // first thread to enable tasking 2305 kmp_team_t *team = thread -> th.th_team; 2306 int i; 2307 2308 is_init_thread = TRUE; 2309 if ( maxthreads < nthreads ) { 2310 2311 if ( *threads_data_p != NULL ) { 2312 kmp_thread_data_t *old_data = *threads_data_p; 2313 kmp_thread_data_t *new_data = NULL; 2314 2315 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating " 2316 "threads data for task_team %p, new_size = %d, old_size = %d\n", 2317 __kmp_gtid_from_thread( thread ), task_team, 2318 nthreads, maxthreads ) ); 2319 // Reallocate threads_data to have more elements than current array 2320 // Cannot use __kmp_thread_realloc() because threads not around for 2321 // kmp_reap_task_team( ). Note all new array entries are initialized 2322 // to zero by __kmp_allocate(). 2323 new_data = (kmp_thread_data_t *) 2324 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); 2325 // copy old data to new data 2326 KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t), 2327 (void *) old_data, 2328 maxthreads * sizeof(kmp_taskdata_t *) ); 2329 2330 #ifdef BUILD_TIED_TASK_STACK 2331 // GEH: Figure out if this is the right thing to do 2332 for (i = maxthreads; i < nthreads; i++) { 2333 kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; 2334 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); 2335 } 2336 #endif // BUILD_TIED_TASK_STACK 2337 // Install the new data and free the old data 2338 (*threads_data_p) = new_data; 2339 __kmp_free( old_data ); 2340 } 2341 else { 2342 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating " 2343 "threads data for task_team %p, size = %d\n", 2344 __kmp_gtid_from_thread( thread ), task_team, nthreads ) ); 2345 // Make the initial allocate for threads_data array, and zero entries 2346 // Cannot use __kmp_thread_calloc() because threads not around for 2347 // kmp_reap_task_team( ). 2348 ANNOTATE_IGNORE_WRITES_BEGIN(); 2349 *threads_data_p = (kmp_thread_data_t *) 2350 __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); 2351 ANNOTATE_IGNORE_WRITES_END(); 2352 #ifdef BUILD_TIED_TASK_STACK 2353 // GEH: Figure out if this is the right thing to do 2354 for (i = 0; i < nthreads; i++) { 2355 kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; 2356 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); 2357 } 2358 #endif // BUILD_TIED_TASK_STACK 2359 } 2360 task_team -> tt.tt_max_threads = nthreads; 2361 } 2362 else { 2363 // If array has (more than) enough elements, go ahead and use it 2364 KMP_DEBUG_ASSERT( *threads_data_p != NULL ); 2365 } 2366 2367 // initialize threads_data pointers back to thread_info structures 2368 for (i = 0; i < nthreads; i++) { 2369 kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; 2370 thread_data -> td.td_thr = team -> t.t_threads[i]; 2371 2372 if ( thread_data -> td.td_deque_last_stolen >= nthreads) { 2373 // The last stolen field survives across teams / barrier, and the number 2374 // of threads may have changed. It's possible (likely?) that a new 2375 // parallel region will exhibit the same behavior as the previous region. 2376 thread_data -> td.td_deque_last_stolen = -1; 2377 } 2378 } 2379 2380 KMP_MB(); 2381 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE); 2382 } 2383 2384 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 2385 return is_init_thread; 2386 } 2387 2388 2389 //------------------------------------------------------------------------------ 2390 // __kmp_free_task_threads_data: 2391 // Deallocates a threads_data array for a task team, including any attached 2392 // tasking deques. Only occurs at library shutdown. 2393 2394 static void 2395 __kmp_free_task_threads_data( kmp_task_team_t *task_team ) 2396 { 2397 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 2398 if ( task_team -> tt.tt_threads_data != NULL ) { 2399 int i; 2400 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) { 2401 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] ); 2402 } 2403 __kmp_free( task_team -> tt.tt_threads_data ); 2404 task_team -> tt.tt_threads_data = NULL; 2405 } 2406 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 2407 } 2408 2409 2410 //------------------------------------------------------------------------------ 2411 // __kmp_allocate_task_team: 2412 // Allocates a task team associated with a specific team, taking it from 2413 // the global task team free list if possible. Also initializes data structures. 2414 2415 static kmp_task_team_t * 2416 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team ) 2417 { 2418 kmp_task_team_t *task_team = NULL; 2419 int nthreads; 2420 2421 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n", 2422 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) ); 2423 2424 if (TCR_PTR(__kmp_free_task_teams) != NULL) { 2425 // Take a task team from the task team pool 2426 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); 2427 if (__kmp_free_task_teams != NULL) { 2428 task_team = __kmp_free_task_teams; 2429 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next); 2430 task_team -> tt.tt_next = NULL; 2431 } 2432 __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); 2433 } 2434 2435 if (task_team == NULL) { 2436 KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating " 2437 "task team for team %p\n", 2438 __kmp_gtid_from_thread( thread ), team ) ); 2439 // Allocate a new task team if one is not available. 2440 // Cannot use __kmp_thread_malloc() because threads not around for 2441 // kmp_reap_task_team( ). 2442 task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) ); 2443 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock ); 2444 //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory 2445 //task_team -> tt.tt_max_threads = 0; 2446 //task_team -> tt.tt_next = NULL; 2447 } 2448 2449 TCW_4(task_team -> tt.tt_found_tasks, FALSE); 2450 #if OMP_45_ENABLED 2451 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE); 2452 #endif 2453 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc; 2454 2455 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); 2456 TCW_4( task_team -> tt.tt_active, TRUE ); 2457 2458 KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n", 2459 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) ); 2460 return task_team; 2461 } 2462 2463 2464 //------------------------------------------------------------------------------ 2465 // __kmp_free_task_team: 2466 // Frees the task team associated with a specific thread, and adds it 2467 // to the global task team free list. 2468 2469 void 2470 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ) 2471 { 2472 KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n", 2473 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) ); 2474 2475 // Put task team back on free list 2476 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock ); 2477 2478 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL ); 2479 task_team -> tt.tt_next = __kmp_free_task_teams; 2480 TCW_PTR(__kmp_free_task_teams, task_team); 2481 2482 __kmp_release_bootstrap_lock( & __kmp_task_team_lock ); 2483 } 2484 2485 2486 //------------------------------------------------------------------------------ 2487 // __kmp_reap_task_teams: 2488 // Free all the task teams on the task team free list. 2489 // Should only be done during library shutdown. 2490 // Cannot do anything that needs a thread structure or gtid since they are already gone. 2491 2492 void 2493 __kmp_reap_task_teams( void ) 2494 { 2495 kmp_task_team_t *task_team; 2496 2497 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) { 2498 // Free all task_teams on the free list 2499 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); 2500 while ( ( task_team = __kmp_free_task_teams ) != NULL ) { 2501 __kmp_free_task_teams = task_team -> tt.tt_next; 2502 task_team -> tt.tt_next = NULL; 2503 2504 // Free threads_data if necessary 2505 if ( task_team -> tt.tt_threads_data != NULL ) { 2506 __kmp_free_task_threads_data( task_team ); 2507 } 2508 __kmp_free( task_team ); 2509 } 2510 __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); 2511 } 2512 } 2513 2514 //------------------------------------------------------------------------------ 2515 // __kmp_wait_to_unref_task_teams: 2516 // Some threads could still be in the fork barrier release code, possibly 2517 // trying to steal tasks. Wait for each thread to unreference its task team. 2518 // 2519 void 2520 __kmp_wait_to_unref_task_teams(void) 2521 { 2522 kmp_info_t *thread; 2523 kmp_uint32 spins; 2524 int done; 2525 2526 KMP_INIT_YIELD( spins ); 2527 2528 for (;;) { 2529 done = TRUE; 2530 2531 // TODO: GEH - this may be is wrong because some sync would be necessary 2532 // in case threads are added to the pool during the traversal. 2533 // Need to verify that lock for thread pool is held when calling 2534 // this routine. 2535 for (thread = (kmp_info_t *)__kmp_thread_pool; 2536 thread != NULL; 2537 thread = thread->th.th_next_pool) 2538 { 2539 #if KMP_OS_WINDOWS 2540 DWORD exit_val; 2541 #endif 2542 if ( TCR_PTR(thread->th.th_task_team) == NULL ) { 2543 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", 2544 __kmp_gtid_from_thread( thread ) ) ); 2545 continue; 2546 } 2547 #if KMP_OS_WINDOWS 2548 // TODO: GEH - add this check for Linux* OS / OS X* as well? 2549 if (!__kmp_is_thread_alive(thread, &exit_val)) { 2550 thread->th.th_task_team = NULL; 2551 continue; 2552 } 2553 #endif 2554 2555 done = FALSE; // Because th_task_team pointer is not NULL for this thread 2556 2557 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n", 2558 __kmp_gtid_from_thread( thread ) ) ); 2559 2560 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { 2561 volatile void *sleep_loc; 2562 // If the thread is sleeping, awaken it. 2563 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) { 2564 KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", 2565 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) ); 2566 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); 2567 } 2568 } 2569 } 2570 if (done) { 2571 break; 2572 } 2573 2574 // If we are oversubscribed, 2575 // or have waited a bit (and library mode is throughput), yield. 2576 // Pause is in the following code. 2577 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); 2578 KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput 2579 } 2580 } 2581 2582 2583 //------------------------------------------------------------------------------ 2584 // __kmp_task_team_setup: Create a task_team for the current team, but use 2585 // an already created, unused one if it already exists. 2586 void 2587 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always ) 2588 { 2589 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 2590 2591 // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next. 2592 // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use. 2593 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { 2594 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); 2595 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n", 2596 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], 2597 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); 2598 } 2599 2600 // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is 2601 // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the 2602 // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely 2603 // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for 2604 // serialized teams. 2605 if (team->t.t_nproc > 1) { 2606 int other_team = 1 - this_thr->th.th_task_state; 2607 if (team->t.t_task_team[other_team] == NULL) { // setup other team as well 2608 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); 2609 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", 2610 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], 2611 ((team != NULL) ? team->t.t_id : -1), other_team )); 2612 } 2613 else { // Leave the old task team struct in place for the upcoming region; adjust as needed 2614 kmp_task_team_t *task_team = team->t.t_task_team[other_team]; 2615 if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) { 2616 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); 2617 TCW_4(task_team->tt.tt_found_tasks, FALSE); 2618 #if OMP_45_ENABLED 2619 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); 2620 #endif 2621 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc ); 2622 TCW_4(task_team->tt.tt_active, TRUE ); 2623 } 2624 // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary 2625 KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n", 2626 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], 2627 ((team != NULL) ? team->t.t_id : -1), other_team )); 2628 } 2629 } 2630 } 2631 2632 2633 //------------------------------------------------------------------------------ 2634 // __kmp_task_team_sync: Propagation of task team data from team to threads 2635 // which happens just after the release phase of a team barrier. This may be 2636 // called by any thread, but only for teams with # threads > 1. 2637 2638 void 2639 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) 2640 { 2641 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 2642 2643 // Toggle the th_task_state field, to switch which task_team this thread refers to 2644 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; 2645 // It is now safe to propagate the task team pointer from the team struct to the current thread. 2646 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); 2647 KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n", 2648 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team, 2649 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); 2650 } 2651 2652 2653 //-------------------------------------------------------------------------------------------- 2654 // __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather 2655 // phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created. 2656 // wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0 2657 // optionally as the last argument. When wait is zero, master thread does not wait for 2658 // unfinished_threads to reach 0. 2659 void 2660 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team 2661 USE_ITT_BUILD_ARG(void * itt_sync_obj) 2662 , int wait) 2663 { 2664 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; 2665 2666 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); 2667 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); 2668 2669 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { 2670 if (wait) { 2671 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", 2672 __kmp_gtid_from_thread(this_thr), task_team)); 2673 // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait 2674 // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. 2675 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); 2676 flag.wait(this_thr, TRUE 2677 USE_ITT_BUILD_ARG(itt_sync_obj)); 2678 } 2679 // Deactivate the old task team, so that the worker threads will stop referencing it while spinning. 2680 KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 2681 "setting active to false, setting local and team's pointer to NULL\n", 2682 __kmp_gtid_from_thread(this_thr), task_team)); 2683 #if OMP_45_ENABLED 2684 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE ); 2685 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE ); 2686 #else 2687 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 ); 2688 #endif 2689 TCW_SYNC_4( task_team->tt.tt_active, FALSE ); 2690 KMP_MB(); 2691 2692 TCW_PTR(this_thr->th.th_task_team, NULL); 2693 } 2694 } 2695 2696 2697 //------------------------------------------------------------------------------ 2698 // __kmp_tasking_barrier: 2699 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. 2700 // Internal function to execute all tasks prior to a regular barrier or a 2701 // join barrier. It is a full barrier itself, which unfortunately turns 2702 // regular barriers into double barriers and join barriers into 1 1/2 2703 // barriers. 2704 void 2705 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) 2706 { 2707 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; 2708 int flag = FALSE; 2709 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier ); 2710 2711 #if USE_ITT_BUILD 2712 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL ); 2713 #endif /* USE_ITT_BUILD */ 2714 kmp_flag_32 spin_flag(spin, 0U); 2715 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag 2716 USE_ITT_BUILD_ARG(NULL), 0 ) ) { 2717 #if USE_ITT_BUILD 2718 // TODO: What about itt_sync_obj?? 2719 KMP_FSYNC_SPIN_PREPARE( spin ); 2720 #endif /* USE_ITT_BUILD */ 2721 2722 if( TCR_4(__kmp_global.g.g_done) ) { 2723 if( __kmp_global.g.g_abort ) 2724 __kmp_abort_thread( ); 2725 break; 2726 } 2727 KMP_YIELD( TRUE ); // GH: We always yield here 2728 } 2729 #if USE_ITT_BUILD 2730 KMP_FSYNC_SPIN_ACQUIRED( (void*) spin ); 2731 #endif /* USE_ITT_BUILD */ 2732 } 2733 2734 2735 #if OMP_45_ENABLED 2736 2737 /* __kmp_give_task puts a task into a given thread queue if: 2738 - the queue for that thread was created 2739 - there's space in that queue 2740 2741 Because of this, __kmp_push_task needs to check if there's space after getting the lock 2742 */ 2743 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass ) 2744 { 2745 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 2746 kmp_task_team_t * task_team = taskdata->td_task_team; 2747 2748 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) ); 2749 2750 // If task_team is NULL something went really bad... 2751 KMP_DEBUG_ASSERT( task_team != NULL ); 2752 2753 bool result = false; 2754 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; 2755 2756 if (thread_data -> td.td_deque == NULL ) { 2757 // There's no queue in this thread, go find another one 2758 // We're guaranteed that at least one thread has a queue 2759 KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) ); 2760 return result; 2761 } 2762 2763 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) 2764 { 2765 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); 2766 2767 // if this deque is bigger than the pass ratio give a chance to another thread 2768 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result; 2769 2770 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); 2771 __kmp_realloc_task_deque(thread,thread_data); 2772 2773 } else { 2774 2775 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); 2776 2777 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) 2778 { 2779 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); 2780 2781 // if this deque is bigger than the pass ratio give a chance to another thread 2782 if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) 2783 goto release_and_exit; 2784 2785 __kmp_realloc_task_deque(thread,thread_data); 2786 } 2787 } 2788 2789 // lock is held here, and there is space in the deque 2790 2791 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; 2792 // Wrap index. 2793 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td); 2794 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); 2795 2796 result = true; 2797 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) ); 2798 2799 release_and_exit: 2800 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock ); 2801 2802 return result; 2803 } 2804 2805 2806 /* The finish of the a proxy tasks is divided in two pieces: 2807 - the top half is the one that can be done from a thread outside the team 2808 - the bottom half must be run from a them within the team 2809 2810 In order to run the bottom half the task gets queued back into one of the threads of the team. 2811 Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers. 2812 So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts: 2813 - things that can be run before queuing the bottom half 2814 - things that must be run after queuing the bottom half 2815 2816 This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this 2817 we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half. 2818 */ 2819 2820 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata ) 2821 { 2822 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); 2823 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); 2824 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); 2825 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); 2826 2827 taskdata -> td_flags.complete = 1; // mark the task as completed 2828 2829 if ( taskdata->td_taskgroup ) 2830 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); 2831 2832 // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half 2833 TCI_4(taskdata->td_incomplete_child_tasks); 2834 } 2835 2836 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata ) 2837 { 2838 kmp_int32 children = 0; 2839 2840 // Predecrement simulated by "- 1" calculation 2841 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; 2842 KMP_DEBUG_ASSERT( children >= 0 ); 2843 2844 // Remove the imaginary children 2845 TCD_4(taskdata->td_incomplete_child_tasks); 2846 } 2847 2848 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ) 2849 { 2850 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); 2851 kmp_info_t * thread = __kmp_threads[ gtid ]; 2852 2853 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); 2854 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half 2855 2856 // We need to wait to make sure the top half is finished 2857 // Spinning here should be ok as this should happen quickly 2858 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ; 2859 2860 __kmp_release_deps(gtid,taskdata); 2861 __kmp_free_task_and_ancestors(gtid, taskdata, thread); 2862 } 2863 2864 /*! 2865 @ingroup TASKING 2866 @param gtid Global Thread ID of encountering thread 2867 @param ptask Task which execution is completed 2868 2869 Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly. 2870 */ 2871 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ) 2872 { 2873 KMP_DEBUG_ASSERT( ptask != NULL ); 2874 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); 2875 KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) ); 2876 2877 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); 2878 2879 __kmp_first_top_half_finish_proxy(taskdata); 2880 __kmp_second_top_half_finish_proxy(taskdata); 2881 __kmp_bottom_half_finish_proxy(gtid,ptask); 2882 2883 KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) ); 2884 } 2885 2886 /*! 2887 @ingroup TASKING 2888 @param ptask Task which execution is completed 2889 2890 Execute the completation of a proxy task from a thread that could not belong to the team. 2891 */ 2892 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ) 2893 { 2894 KMP_DEBUG_ASSERT( ptask != NULL ); 2895 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); 2896 2897 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) ); 2898 2899 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); 2900 2901 __kmp_first_top_half_finish_proxy(taskdata); 2902 2903 // Enqueue task to complete bottom half completion from a thread within the corresponding team 2904 kmp_team_t * team = taskdata->td_team; 2905 kmp_int32 nthreads = team->t.t_nproc; 2906 kmp_info_t *thread; 2907 2908 //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here 2909 kmp_int32 start_k = 0; 2910 kmp_int32 pass = 1; 2911 kmp_int32 k = start_k; 2912 2913 do { 2914 //For now we're just linearly trying to find a thread 2915 thread = team->t.t_threads[k]; 2916 k = (k+1) % nthreads; 2917 2918 // we did a full pass through all the threads 2919 if ( k == start_k ) pass = pass << 1; 2920 2921 } while ( !__kmp_give_task( thread, k, ptask, pass ) ); 2922 2923 __kmp_second_top_half_finish_proxy(taskdata); 2924 2925 KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) ); 2926 } 2927 2928 //--------------------------------------------------------------------------------- 2929 // __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop 2930 // 2931 // thread: allocating thread 2932 // task_src: pointer to source task to be duplicated 2933 // returns: a pointer to the allocated kmp_task_t structure (task). 2934 kmp_task_t * 2935 __kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src ) 2936 { 2937 kmp_task_t *task; 2938 kmp_taskdata_t *taskdata; 2939 kmp_taskdata_t *taskdata_src; 2940 kmp_taskdata_t *parent_task = thread->th.th_current_task; 2941 size_t shareds_offset; 2942 size_t task_size; 2943 2944 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) ); 2945 taskdata_src = KMP_TASK_TO_TASKDATA( task_src ); 2946 KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task 2947 KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT ); 2948 task_size = taskdata_src->td_size_alloc; 2949 2950 // Allocate a kmp_taskdata_t block and a kmp_task_t block. 2951 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) ); 2952 #if USE_FAST_MEMORY 2953 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size ); 2954 #else 2955 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size ); 2956 #endif /* USE_FAST_MEMORY */ 2957 KMP_MEMCPY(taskdata, taskdata_src, task_size); 2958 2959 task = KMP_TASKDATA_TO_TASK(taskdata); 2960 2961 // Initialize new task (only specific fields not affected by memcpy) 2962 taskdata->td_task_id = KMP_GEN_TASK_ID(); 2963 if( task->shareds != NULL ) { // need setup shareds pointer 2964 shareds_offset = (char*)task_src->shareds - (char*)taskdata_src; 2965 task->shareds = &((char*)taskdata)[shareds_offset]; 2966 KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 ); 2967 } 2968 taskdata->td_alloc_thread = thread; 2969 taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task 2970 2971 // Only need to keep track of child task counts if team parallel and tasking not serialized 2972 if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) { 2973 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); 2974 if ( parent_task->td_taskgroup ) 2975 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); 2976 // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated 2977 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) 2978 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); 2979 } 2980 2981 KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", 2982 thread, taskdata, taskdata->td_parent) ); 2983 #if OMPT_SUPPORT 2984 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine); 2985 #endif 2986 return task; 2987 } 2988 2989 // Routine optionally generated by th ecompiler for setting the lastprivate flag 2990 // and calling needed constructors for private/firstprivate objects 2991 // (used to form taskloop tasks from pattern task) 2992 typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); 2993 2994 //--------------------------------------------------------------------------------- 2995 // __kmp_taskloop_linear: Start tasks of the taskloop linearly 2996 // 2997 // loc Source location information 2998 // gtid Global thread ID 2999 // task Task with whole loop iteration range 3000 // lb Pointer to loop lower bound 3001 // ub Pointer to loop upper bound 3002 // st Loop stride 3003 // sched Schedule specified 0/1/2 for none/grainsize/num_tasks 3004 // grainsize Schedule value if specified 3005 // task_dup Tasks duplication routine 3006 void 3007 __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, 3008 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 3009 int sched, kmp_uint64 grainsize, void *task_dup ) 3010 { 3011 KMP_COUNT_BLOCK(OMP_TASKLOOP); 3012 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); 3013 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; 3014 kmp_uint64 tc; 3015 kmp_uint64 lower = *lb; // compiler provides global bounds here 3016 kmp_uint64 upper = *ub; 3017 kmp_uint64 i, num_tasks = 0, extras = 0; 3018 kmp_info_t *thread = __kmp_threads[gtid]; 3019 kmp_taskdata_t *current_task = thread->th.th_current_task; 3020 kmp_task_t *next_task; 3021 kmp_int32 lastpriv = 0; 3022 size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure 3023 size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure 3024 3025 // compute trip count 3026 if ( st == 1 ) { // most common case 3027 tc = upper - lower + 1; 3028 } else if ( st < 0 ) { 3029 tc = (lower - upper) / (-st) + 1; 3030 } else { // st > 0 3031 tc = (upper - lower) / st + 1; 3032 } 3033 if(tc == 0) { 3034 KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid)); 3035 // free the pattern task and exit 3036 __kmp_task_start( gtid, task, current_task ); 3037 // do not execute anything for zero-trip loop 3038 __kmp_task_finish( gtid, task, current_task ); 3039 return; 3040 } 3041 3042 // compute num_tasks/grainsize based on the input provided 3043 switch( sched ) { 3044 case 0: // no schedule clause specified, we can choose the default 3045 // let's try to schedule (team_size*10) tasks 3046 grainsize = thread->th.th_team_nproc * 10; 3047 case 2: // num_tasks provided 3048 if( grainsize > tc ) { 3049 num_tasks = tc; // too big num_tasks requested, adjust values 3050 grainsize = 1; 3051 extras = 0; 3052 } else { 3053 num_tasks = grainsize; 3054 grainsize = tc / num_tasks; 3055 extras = tc % num_tasks; 3056 } 3057 break; 3058 case 1: // grainsize provided 3059 if( grainsize > tc ) { 3060 num_tasks = 1; // too big grainsize requested, adjust values 3061 grainsize = tc; 3062 extras = 0; 3063 } else { 3064 num_tasks = tc / grainsize; 3065 grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations 3066 extras = tc % num_tasks; 3067 } 3068 break; 3069 default: 3070 KMP_ASSERT2(0, "unknown scheduling of taskloop"); 3071 } 3072 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); 3073 KMP_DEBUG_ASSERT(num_tasks > extras); 3074 KMP_DEBUG_ASSERT(num_tasks > 0); 3075 KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n", 3076 gtid, num_tasks, grainsize, extras)); 3077 3078 // Main loop, launch num_tasks tasks, assign grainsize iterations each task 3079 for( i = 0; i < num_tasks; ++i ) { 3080 kmp_uint64 chunk_minus_1; 3081 if( extras == 0 ) { 3082 chunk_minus_1 = grainsize - 1; 3083 } else { 3084 chunk_minus_1 = grainsize; 3085 --extras; // first extras iterations get bigger chunk (grainsize+1) 3086 } 3087 upper = lower + st * chunk_minus_1; 3088 if( i == num_tasks - 1 ) { 3089 // schedule the last task, set lastprivate flag 3090 lastpriv = 1; 3091 #if KMP_DEBUG 3092 if( st == 1 ) 3093 KMP_DEBUG_ASSERT(upper == *ub); 3094 else if( st > 0 ) 3095 KMP_DEBUG_ASSERT(upper+st > *ub); 3096 else 3097 KMP_DEBUG_ASSERT(upper+st < *ub); 3098 #endif 3099 } 3100 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task 3101 *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds 3102 *(kmp_uint64*)((char*)next_task + upper_offset) = upper; 3103 if( ptask_dup != NULL ) 3104 ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc. 3105 KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n", 3106 gtid, next_task, lower, upper, lower_offset, upper_offset)); 3107 __kmp_omp_task(gtid, next_task, true); // schedule new task 3108 lower = upper + st; // adjust lower bound for the next iteration 3109 } 3110 // free the pattern task and exit 3111 __kmp_task_start( gtid, task, current_task ); 3112 // do not execute the pattern task, just do bookkeeping 3113 __kmp_task_finish( gtid, task, current_task ); 3114 } 3115 3116 /*! 3117 @ingroup TASKING 3118 @param loc Source location information 3119 @param gtid Global thread ID 3120 @param task Task structure 3121 @param if_val Value of the if clause 3122 @param lb Pointer to loop lower bound 3123 @param ub Pointer to loop upper bound 3124 @param st Loop stride 3125 @param nogroup Flag, 1 if nogroup clause specified, 0 otherwise 3126 @param sched Schedule specified 0/1/2 for none/grainsize/num_tasks 3127 @param grainsize Schedule value if specified 3128 @param task_dup Tasks duplication routine 3129 3130 Execute the taskloop construct. 3131 */ 3132 void 3133 __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, 3134 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, 3135 int nogroup, int sched, kmp_uint64 grainsize, void *task_dup ) 3136 { 3137 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); 3138 KMP_DEBUG_ASSERT( task != NULL ); 3139 3140 KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n", 3141 gtid, taskdata, *lb, *ub, st, grainsize, sched)); 3142 3143 // check if clause value first 3144 if( if_val == 0 ) { // if(0) specified, mark task as serial 3145 taskdata->td_flags.task_serial = 1; 3146 taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied 3147 } 3148 if( nogroup == 0 ) { 3149 __kmpc_taskgroup( loc, gtid ); 3150 } 3151 3152 if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) { 3153 __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup ); 3154 } 3155 3156 if( nogroup == 0 ) { 3157 __kmpc_end_taskgroup( loc, gtid ); 3158 } 3159 KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid)); 3160 } 3161 3162 #endif 3163