1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "oneapi/tbb/detail/_config.h" 18 #include "oneapi/tbb/tbb_allocator.h" 19 #include "oneapi/tbb/task_group.h" 20 #include "governor.h" 21 #include "thread_data.h" 22 #include "scheduler_common.h" 23 #include "itt_notify.h" 24 #include "task_dispatcher.h" 25 26 #include <type_traits> 27 28 namespace tbb { 29 namespace detail { 30 namespace r1 { 31 32 //------------------------------------------------------------------------ 33 // tbb_exception_ptr 34 //------------------------------------------------------------------------ 35 tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept { 36 tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr)); 37 return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr; 38 } 39 40 void tbb_exception_ptr::destroy() noexcept { 41 this->~tbb_exception_ptr(); 42 deallocate_memory(this); 43 } 44 45 void tbb_exception_ptr::throw_self() { 46 if (governor::rethrow_exception_broken()) fix_broken_rethrow(); 47 std::rethrow_exception(my_ptr); 48 } 49 50 //------------------------------------------------------------------------ 51 // task_group_context 52 //------------------------------------------------------------------------ 53 54 void task_group_context_impl::destroy(d1::task_group_context& ctx) { 55 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 56 57 auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed); 58 __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr); 59 60 if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) { 61 // The owner can be destroyed at any moment. Access the associate data with caution. 62 thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed); 63 if (governor::is_thread_data_set(owner)) { 64 thread_data::context_list_state& cls = owner->my_context_list_state; 65 // We are the owner, so cls is valid. 66 // Local update of the context list 67 std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_relaxed); 68 // The sequentially-consistent store to prevent load of nonlocal update flag 69 // from being hoisted before the store to local update flag. 70 cls.local_update = 1; 71 if (cls.nonlocal_update.load(std::memory_order_relaxed)) { 72 spin_mutex::scoped_lock lock(cls.mutex); 73 ctx.my_node.remove_relaxed(); 74 cls.local_update.store(0, std::memory_order_relaxed); 75 } else { 76 ctx.my_node.remove_relaxed(); 77 // Release fence is necessary so that update of our neighbors in 78 // the context list was committed when possible concurrent destroyer 79 // proceeds after local update flag is reset by the following store. 80 cls.local_update.store(0, std::memory_order_release); 81 if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { 82 // Another thread was propagating cancellation request when we removed 83 // ourselves from the list. We must ensure that it is not accessing us 84 // when this destructor finishes. We'll be able to acquire the lock 85 // below only after the other thread finishes with us. 86 spin_mutex::scoped_lock lock(cls.mutex); 87 } 88 } 89 } else { 90 d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound; 91 if ( 92 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 93 !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( 94 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected, 95 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) 96 #else 97 !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked) 98 #endif 99 ) { 100 __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr); 101 // The "owner" local variable can be a dangling pointer here. Do not access it. 102 owner = nullptr; 103 spin_wait_until_eq(ctx.my_owner, nullptr); 104 // It is unsafe to remove the node because its neighbors might be already destroyed. 105 // TODO: reconsider the logic. 106 // ctx.my_node.remove_relaxed(); 107 } 108 else { 109 __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr); 110 __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr); 111 thread_data::context_list_state& cls = owner->my_context_list_state; 112 __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive."); 113 114 ++cls.nonlocal_update; 115 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release); 116 spin_wait_until_eq(cls.local_update, 0u); 117 { 118 spin_mutex::scoped_lock lock(cls.mutex); 119 ctx.my_node.remove_relaxed(); 120 } 121 --cls.nonlocal_update; 122 } 123 } 124 } 125 126 if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) { 127 spin_wait_until_eq(ctx.my_owner, nullptr); 128 } 129 130 d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); 131 #if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER 132 suppress_unused_warning(ctl); 133 #endif 134 ctl->~cpu_ctl_env(); 135 136 if (ctx.my_exception) 137 ctx.my_exception->destroy(); 138 ITT_STACK_DESTROY(ctx.my_itt_caller); 139 140 poison_pointer(ctx.my_parent); 141 poison_pointer(ctx.my_parent); 142 poison_pointer(ctx.my_owner); 143 poison_pointer(ctx.my_node.next); 144 poison_pointer(ctx.my_node.prev); 145 poison_pointer(ctx.my_exception); 146 poison_pointer(ctx.my_itt_caller); 147 } 148 149 void task_group_context_impl::initialize(d1::task_group_context& ctx) { 150 ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); 151 152 ctx.my_cpu_ctl_env = 0; 153 ctx.my_cancellation_requested = 0; 154 ctx.my_state.store(0, std::memory_order_relaxed); 155 // Set the created state to bound at the first usage. 156 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); 157 ctx.my_parent = nullptr; 158 ctx.my_owner = nullptr; 159 ctx.my_node.next.store(nullptr, std::memory_order_relaxed); 160 ctx.my_node.next.store(nullptr, std::memory_order_relaxed); 161 ctx.my_exception = nullptr; 162 ctx.my_itt_caller = nullptr; 163 164 static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); 165 d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; 166 if (ctx.my_traits.fp_settings) 167 ctl->get_env(); 168 } 169 170 void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { 171 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 172 __TBB_ASSERT(td, NULL); 173 ctx.my_owner.store(td, std::memory_order_relaxed); 174 thread_data::context_list_state& cls = td->my_context_list_state; 175 // state propagation logic assumes new contexts are bound to head of the list 176 ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed); 177 // Notify threads that may be concurrently destroying contexts registered 178 // in this scheduler's list that local list update is underway. 179 // Prevent load of global propagation epoch counter from being hoisted before 180 // speculative stores above, as well as load of nonlocal update flag from 181 // being hoisted before the store to local update flag. 182 cls.local_update = 1; 183 // Finalize local context list update 184 if (cls.nonlocal_update.load(std::memory_order_relaxed)) { 185 spin_mutex::scoped_lock lock(cls.mutex); 186 d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); 187 head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); 188 ctx.my_node.next.store(head_next, std::memory_order_relaxed); 189 cls.local_update.store(0, std::memory_order_relaxed); 190 cls.head.next.store(&ctx.my_node, std::memory_order_relaxed); 191 } else { 192 d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); 193 head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); 194 ctx.my_node.next.store(head_next, std::memory_order_relaxed); 195 cls.local_update.store(0, std::memory_order_release); 196 // Thread-local list of contexts allows concurrent traversal by another thread 197 // while propagating state change. To ensure visibility of ctx.my_node's members 198 // to the concurrently traversing thread, the list's head is updated by means 199 // of store-with-release. 200 cls.head.next.store(&ctx.my_node, std::memory_order_release); 201 } 202 } 203 204 void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { 205 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 206 __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); 207 __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); 208 209 ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; 210 __TBB_ASSERT(ctx.my_parent, NULL); 211 212 // Inherit FPU settings only if the context has not captured FPU settings yet. 213 if (!ctx.my_traits.fp_settings) 214 copy_fp_settings(ctx, *ctx.my_parent); 215 216 // Condition below prevents unnecessary thrashing parent context's cache line 217 if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { 218 ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below 219 } 220 if (ctx.my_parent->my_parent) { 221 // Even if this context were made accessible for state change propagation 222 // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node) 223 // above), it still could be missed if state propagation from a grand-ancestor 224 // was underway concurrently with binding. 225 // Speculative propagation from the parent together with epoch counters 226 // detecting possibility of such a race allow to avoid taking locks when 227 // there is no contention. 228 229 // Acquire fence is necessary to prevent reordering subsequent speculative 230 // loads of parent state data out of the scope where epoch counters comparison 231 // can reliably validate it. 232 uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire); 233 // Speculative propagation of parent's state. The speculation will be 234 // validated by the epoch counters check further on. 235 ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 236 register_with(ctx, td); // Issues full fence 237 238 // If no state propagation was detected by the following condition, the above 239 // full fence guarantees that the parent had correct state during speculative 240 // propagation before the fence. Otherwise the propagation from parent is 241 // repeated under the lock. 242 if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { 243 // Another thread may be propagating state change right now. So resort to lock. 244 context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); 245 ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 246 } 247 } else { 248 register_with(ctx, td); // Issues full fence 249 // As we do not have grand-ancestors, concurrent state propagation (if any) 250 // may originate only from the parent context, and thus it is safe to directly 251 // copy the state from it. 252 ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 253 } 254 255 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release); 256 } 257 258 void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { 259 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 260 d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); 261 if (state <= d1::task_group_context::lifetime_state::locked) { 262 if (state == d1::task_group_context::lifetime_state::created && 263 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 264 ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( 265 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, 266 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) 267 #else 268 ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) 269 #endif 270 ) { 271 // If we are in the outermost task dispatch loop of an external thread, then 272 // there is nothing to bind this context to, and we skip the binding part 273 // treating the context as isolated. 274 __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); 275 if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { 276 if (!ctx.my_traits.fp_settings) { 277 copy_fp_settings(ctx, *td->my_arena->my_default_ctx); 278 } 279 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release); 280 } else { 281 bind_to_impl(ctx, td); 282 } 283 ITT_STACK_CREATE(ctx.my_itt_caller); 284 } 285 spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); 286 } 287 __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL); 288 __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL); 289 } 290 291 template <typename T> 292 void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 293 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 294 if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) { 295 // Nothing to do, whether descending from "src" or not, so no need to scan. 296 // Hopefully this happens often thanks to earlier invocations. 297 // This optimization is enabled by LIFO order in the context lists: 298 // - new contexts are bound to the beginning of lists; 299 // - descendants are newer than ancestors; 300 // - earlier invocations are therefore likely to "paint" long chains. 301 } else if (&ctx == &src) { 302 // This clause is disjunct from the traversal below, which skips src entirely. 303 // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). 304 // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). 305 // Letting the other thread prevail may also be fairer. 306 } else { 307 for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) { 308 if (ancestor == &src) { 309 for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) 310 (c->*mptr_state).store(new_state, std::memory_order_relaxed); 311 break; 312 } 313 } 314 } 315 } 316 317 bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { 318 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 319 __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); 320 if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { 321 // This task group and any descendants have already been canceled. 322 // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested, 323 // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) 324 return false; 325 } 326 governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); 327 return true; 328 } 329 330 bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) { 331 return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0; 332 } 333 334 // IMPORTANT: It is assumed that this method is not used concurrently! 335 void task_group_context_impl::reset(d1::task_group_context& ctx) { 336 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 337 //! TODO: Add assertion that this context does not have children 338 // No fences are necessary since this context can be accessed from another thread 339 // only after stealing happened (which means necessary fences were used). 340 if (ctx.my_exception) { 341 ctx.my_exception->destroy(); 342 ctx.my_exception = NULL; 343 } 344 ctx.my_cancellation_requested = 0; 345 } 346 347 // IMPORTANT: It is assumed that this method is not used concurrently! 348 void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { 349 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 350 //! TODO: Add assertion that this context does not have children 351 // No fences are necessary since this context can be accessed from another thread 352 // only after stealing happened (which means necessary fences were used). 353 d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); 354 if (!ctx.my_traits.fp_settings) { 355 ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; 356 ctx.my_traits.fp_settings = true; 357 } 358 ctl->get_env(); 359 } 360 361 void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { 362 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 363 __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); 364 __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); 365 366 const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env); 367 new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl); 368 ctx.my_traits.fp_settings = true; 369 } 370 371 template <typename T> 372 void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 373 spin_mutex::scoped_lock lock(my_context_list_state.mutex); 374 // Acquire fence is necessary to ensure that the subsequent node->my_next load 375 // returned the correct value in case it was just inserted in another thread. 376 // The fence also ensures visibility of the correct ctx.my_parent value. 377 d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire); 378 while (node != &my_context_list_state.head) { 379 d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); 380 if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) 381 task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); 382 node = node->next.load(std::memory_order_relaxed); 383 } 384 // Sync up local propagation epoch with the global one. Release fence prevents 385 // reordering of possible store to *mptr_state after the sync point. 386 my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); 387 } 388 389 template <typename T> 390 bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 391 if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) 392 return true; 393 // The whole propagation algorithm is under the lock in order to ensure correctness 394 // in case of concurrent state changes at the different levels of the context tree. 395 // See comment at the bottom of scheduler.cpp 396 context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); 397 if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) 398 // Another thread has concurrently changed the state. Back down. 399 return false; 400 // Advance global state propagation epoch 401 ++the_context_state_propagation_epoch; 402 // Propagate to all workers and external threads and sync up their local epochs with the global one 403 unsigned num_workers = my_first_unused_worker_idx; 404 for (unsigned i = 0; i < num_workers; ++i) { 405 thread_data* td = my_workers[i]; 406 // If the worker is only about to be registered, skip it. 407 if (td) 408 td->propagate_task_group_state(mptr_state, src, new_state); 409 } 410 // Propagate to all external threads 411 // The whole propagation sequence is locked, thus no contention is expected 412 for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) 413 it->propagate_task_group_state(mptr_state, src, new_state); 414 return true; 415 } 416 417 /* 418 Comments: 419 420 1. The premise of the cancellation support implementation is that cancellations are 421 not part of the hot path of the program execution. Therefore all changes in its 422 implementation in order to reduce the overhead of the cancellation control flow 423 should be done only in ways that do not increase overhead of the normal execution. 424 425 In general, contexts are used by all threads and their descendants are created in 426 different threads as well. In order to minimize impact of the cross-thread tree 427 maintenance (first of all because of the synchronization), the tree of contexts 428 is split into pieces, each of which is handled by a single thread. Such pieces 429 are represented as lists of contexts, members of which are contexts that were 430 bound to their parents in the given thread. 431 432 The context tree maintenance and cancellation propagation algorithms are designed 433 in such a manner that cross-thread access to a context list will take place only 434 when cancellation signal is sent (by user or when an exception happens), and 435 synchronization is necessary only then. Thus the normal execution flow (without 436 exceptions and cancellation) remains free from any synchronization done on 437 behalf of exception handling and cancellation support. 438 439 2. Consider parallel cancellations at the different levels of the context tree: 440 441 Ctx1 <- Cancelled by Thread1 |- Thread2 started processing 442 | | 443 Ctx2 |- Thread1 started processing 444 | T1 |- Thread2 finishes and syncs up local counters 445 Ctx3 <- Cancelled by Thread2 | 446 | |- Ctx5 is bound to Ctx2 447 Ctx4 | 448 T2 |- Thread1 reaches Ctx2 449 450 Thread-propagator of each cancellation increments global counter. However the thread 451 propagating the cancellation from the outermost context (Thread1) may be the last 452 to finish. Which means that the local counters may be synchronized earlier (by Thread2, 453 at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context 454 (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only 455 (Ctx2) may result in cancellation request being lost. 456 457 This issue is solved by doing the whole propagation under the lock. 458 459 If we need more concurrency while processing parallel cancellations, we could try 460 the following modification of the propagation algorithm: 461 462 advance global counter and remember it 463 for each thread: 464 scan thread's list of contexts 465 for each thread: 466 sync up its local counter only if the global counter has not been changed 467 468 However this version of the algorithm requires more analysis and verification. 469 */ 470 471 void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) { 472 task_group_context_impl::initialize(ctx); 473 } 474 void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) { 475 task_group_context_impl::destroy(ctx); 476 } 477 void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) { 478 task_group_context_impl::reset(ctx); 479 } 480 bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) { 481 return task_group_context_impl::cancel_group_execution(ctx); 482 } 483 bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) { 484 return task_group_context_impl::is_group_execution_cancelled(ctx); 485 } 486 void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) { 487 task_group_context_impl::capture_fp_settings(ctx); 488 } 489 490 } // namespace r1 491 } // namespace detail 492 } // namespace tbb 493 494