1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "oneapi/tbb/detail/_config.h" 18 #include "oneapi/tbb/tbb_allocator.h" 19 #include "oneapi/tbb/task_group.h" 20 #include "governor.h" 21 #include "thread_data.h" 22 #include "scheduler_common.h" 23 #include "itt_notify.h" 24 #include "task_dispatcher.h" 25 26 #include <type_traits> 27 28 namespace tbb { 29 namespace detail { 30 namespace r1 { 31 32 //------------------------------------------------------------------------ 33 // tbb_exception_ptr 34 //------------------------------------------------------------------------ 35 tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept { 36 tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr)); 37 return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr; 38 } 39 40 void tbb_exception_ptr::destroy() noexcept { 41 this->~tbb_exception_ptr(); 42 deallocate_memory(this); 43 } 44 45 void tbb_exception_ptr::throw_self() { 46 if (governor::rethrow_exception_broken()) fix_broken_rethrow(); 47 std::rethrow_exception(my_ptr); 48 } 49 50 //------------------------------------------------------------------------ 51 // task_group_context 52 //------------------------------------------------------------------------ 53 54 void task_group_context_impl::destroy(d1::task_group_context& ctx) { 55 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 56 57 auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed); 58 __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr); 59 60 if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) { 61 // The owner can be destroyed at any moment. Access the associate data with caution. 62 thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed); 63 if (governor::is_thread_data_set(owner)) { 64 thread_data::context_list_state& cls = owner->my_context_list_state; 65 // We are the owner, so cls is valid. 66 // Local update of the context list 67 std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_acquire); 68 // The sequentially-consistent store to prevent load of nonlocal update flag 69 // from being hoisted before the store to local update flag. 70 cls.local_update = 1; 71 if (cls.nonlocal_update.load(std::memory_order_acquire)) { 72 spin_mutex::scoped_lock lock(cls.mutex); 73 ctx.my_node.remove_relaxed(); 74 cls.local_update.store(0, std::memory_order_relaxed); 75 } else { 76 ctx.my_node.remove_relaxed(); 77 // Release fence is necessary so that update of our neighbors in 78 // the context list was committed when possible concurrent destroyer 79 // proceeds after local update flag is reset by the following store. 80 cls.local_update.store(0, std::memory_order_release); 81 if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { 82 // Another thread was propagating cancellation request when we removed 83 // ourselves from the list. We must ensure that it is not accessing us 84 // when this destructor finishes. We'll be able to acquire the lock 85 // below only after the other thread finishes with us. 86 spin_mutex::scoped_lock lock(cls.mutex); 87 } else { 88 // TODO: simplify exception propagation mechanism 89 std::atomic_thread_fence(std::memory_order_release); 90 } 91 } 92 } else { 93 d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound; 94 if ( 95 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 96 !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( 97 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected, 98 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) 99 #else 100 !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked) 101 #endif 102 ) { 103 __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr); 104 // The "owner" local variable can be a dangling pointer here. Do not access it. 105 owner = nullptr; 106 spin_wait_until_eq(ctx.my_owner, nullptr); 107 // It is unsafe to remove the node because its neighbors might be already destroyed. 108 // TODO: reconsider the logic. 109 // ctx.my_node.remove_relaxed(); 110 } 111 else { 112 __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr); 113 __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr); 114 thread_data::context_list_state& cls = owner->my_context_list_state; 115 __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive."); 116 117 ++cls.nonlocal_update; 118 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release); 119 spin_wait_until_eq(cls.local_update, 0u); 120 { 121 spin_mutex::scoped_lock lock(cls.mutex); 122 ctx.my_node.remove_relaxed(); 123 } 124 --cls.nonlocal_update; 125 } 126 } 127 } 128 129 if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) { 130 spin_wait_until_eq(ctx.my_owner, nullptr); 131 } 132 133 d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); 134 #if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER 135 suppress_unused_warning(ctl); 136 #endif 137 ctl->~cpu_ctl_env(); 138 139 if (ctx.my_exception) 140 ctx.my_exception->destroy(); 141 ITT_STACK_DESTROY(ctx.my_itt_caller); 142 143 poison_pointer(ctx.my_parent); 144 poison_pointer(ctx.my_owner); 145 poison_pointer(ctx.my_node.next); 146 poison_pointer(ctx.my_node.prev); 147 poison_pointer(ctx.my_exception); 148 poison_pointer(ctx.my_itt_caller); 149 } 150 151 void task_group_context_impl::initialize(d1::task_group_context& ctx) { 152 ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); 153 154 ctx.my_cpu_ctl_env = 0; 155 ctx.my_cancellation_requested = 0; 156 ctx.my_state.store(0, std::memory_order_relaxed); 157 // Set the created state to bound at the first usage. 158 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); 159 ctx.my_parent = nullptr; 160 ctx.my_owner = nullptr; 161 ctx.my_node.next.store(nullptr, std::memory_order_relaxed); 162 ctx.my_node.next.store(nullptr, std::memory_order_relaxed); 163 ctx.my_exception = nullptr; 164 ctx.my_itt_caller = nullptr; 165 166 static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); 167 d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; 168 if (ctx.my_traits.fp_settings) 169 ctl->get_env(); 170 } 171 172 void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { 173 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 174 __TBB_ASSERT(td, NULL); 175 ctx.my_owner.store(td, std::memory_order_relaxed); 176 thread_data::context_list_state& cls = td->my_context_list_state; 177 // state propagation logic assumes new contexts are bound to head of the list 178 ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed); 179 // Notify threads that may be concurrently destroying contexts registered 180 // in this scheduler's list that local list update is underway. 181 // Prevent load of global propagation epoch counter from being hoisted before 182 // speculative stores above, as well as load of nonlocal update flag from 183 // being hoisted before the store to local update flag. 184 cls.local_update = 1; 185 // Finalize local context list update 186 if (cls.nonlocal_update.load(std::memory_order_acquire)) { 187 spin_mutex::scoped_lock lock(cls.mutex); 188 d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); 189 head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); 190 ctx.my_node.next.store(head_next, std::memory_order_relaxed); 191 cls.local_update.store(0, std::memory_order_relaxed); 192 cls.head.next.store(&ctx.my_node, std::memory_order_relaxed); 193 } else { 194 d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); 195 head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); 196 ctx.my_node.next.store(head_next, std::memory_order_relaxed); 197 cls.local_update.store(0, std::memory_order_release); 198 // Thread-local list of contexts allows concurrent traversal by another thread 199 // while propagating state change. To ensure visibility of ctx.my_node's members 200 // to the concurrently traversing thread, the list's head is updated by means 201 // of store-with-release. 202 cls.head.next.store(&ctx.my_node, std::memory_order_release); 203 } 204 } 205 206 void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { 207 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 208 __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); 209 __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); 210 211 ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; 212 __TBB_ASSERT(ctx.my_parent, NULL); 213 214 // Inherit FPU settings only if the context has not captured FPU settings yet. 215 if (!ctx.my_traits.fp_settings) 216 copy_fp_settings(ctx, *ctx.my_parent); 217 218 // Condition below prevents unnecessary thrashing parent context's cache line 219 if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { 220 ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below 221 } 222 if (ctx.my_parent->my_parent) { 223 // Even if this context were made accessible for state change propagation 224 // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node) 225 // above), it still could be missed if state propagation from a grand-ancestor 226 // was underway concurrently with binding. 227 // Speculative propagation from the parent together with epoch counters 228 // detecting possibility of such a race allow to avoid taking locks when 229 // there is no contention. 230 231 // Acquire fence is necessary to prevent reordering subsequent speculative 232 // loads of parent state data out of the scope where epoch counters comparison 233 // can reliably validate it. 234 uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire); 235 // Speculative propagation of parent's state. The speculation will be 236 // validated by the epoch counters check further on. 237 ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 238 register_with(ctx, td); // Issues full fence 239 240 // If no state propagation was detected by the following condition, the above 241 // full fence guarantees that the parent had correct state during speculative 242 // propagation before the fence. Otherwise the propagation from parent is 243 // repeated under the lock. 244 if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { 245 // Another thread may be propagating state change right now. So resort to lock. 246 context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); 247 ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 248 } 249 } else { 250 register_with(ctx, td); // Issues full fence 251 // As we do not have grand-ancestors, concurrent state propagation (if any) 252 // may originate only from the parent context, and thus it is safe to directly 253 // copy the state from it. 254 ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 255 } 256 257 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release); 258 } 259 260 void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { 261 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 262 d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); 263 if (state <= d1::task_group_context::lifetime_state::locked) { 264 if (state == d1::task_group_context::lifetime_state::created && 265 #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 266 ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( 267 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, 268 (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) 269 #else 270 ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) 271 #endif 272 ) { 273 // If we are in the outermost task dispatch loop of an external thread, then 274 // there is nothing to bind this context to, and we skip the binding part 275 // treating the context as isolated. 276 __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); 277 if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { 278 if (!ctx.my_traits.fp_settings) { 279 copy_fp_settings(ctx, *td->my_arena->my_default_ctx); 280 } 281 ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release); 282 } else { 283 bind_to_impl(ctx, td); 284 } 285 ITT_STACK_CREATE(ctx.my_itt_caller); 286 } 287 spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); 288 } 289 __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL); 290 __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL); 291 } 292 293 template <typename T> 294 void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 295 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 296 if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) { 297 // Nothing to do, whether descending from "src" or not, so no need to scan. 298 // Hopefully this happens often thanks to earlier invocations. 299 // This optimization is enabled by LIFO order in the context lists: 300 // - new contexts are bound to the beginning of lists; 301 // - descendants are newer than ancestors; 302 // - earlier invocations are therefore likely to "paint" long chains. 303 } else if (&ctx == &src) { 304 // This clause is disjunct from the traversal below, which skips src entirely. 305 // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). 306 // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). 307 // Letting the other thread prevail may also be fairer. 308 } else { 309 for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) { 310 if (ancestor == &src) { 311 for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) 312 (c->*mptr_state).store(new_state, std::memory_order_relaxed); 313 break; 314 } 315 } 316 } 317 } 318 319 template <typename T> 320 void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 321 spin_mutex::scoped_lock lock(my_context_list_state.mutex); 322 // Acquire fence is necessary to ensure that the subsequent node->my_next load 323 // returned the correct value in case it was just inserted in another thread. 324 // The fence also ensures visibility of the correct ctx.my_parent value. 325 d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire); 326 while (node != &my_context_list_state.head) { 327 d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); 328 if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) 329 task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); 330 node = node->next.load(std::memory_order_relaxed); 331 } 332 // Sync up local propagation epoch with the global one. Release fence prevents 333 // reordering of possible store to *mptr_state after the sync point. 334 my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); 335 } 336 337 template <typename T> 338 bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 339 if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) 340 return true; 341 // The whole propagation algorithm is under the lock in order to ensure correctness 342 // in case of concurrent state changes at the different levels of the context tree. 343 // See comment at the bottom of scheduler.cpp 344 context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); 345 if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) 346 // Another thread has concurrently changed the state. Back down. 347 return false; 348 // Advance global state propagation epoch 349 ++the_context_state_propagation_epoch; 350 // Propagate to all workers and external threads and sync up their local epochs with the global one 351 unsigned num_workers = my_first_unused_worker_idx; 352 for (unsigned i = 0; i < num_workers; ++i) { 353 thread_data* td = my_workers[i]; 354 // If the worker is only about to be registered, skip it. 355 if (td) 356 td->propagate_task_group_state(mptr_state, src, new_state); 357 } 358 // Propagate to all external threads 359 // The whole propagation sequence is locked, thus no contention is expected 360 for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) 361 it->propagate_task_group_state(mptr_state, src, new_state); 362 return true; 363 } 364 365 bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { 366 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 367 __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); 368 if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { 369 // This task group and any descendants have already been canceled. 370 // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested, 371 // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) 372 return false; 373 } 374 governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); 375 return true; 376 } 377 378 bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) { 379 return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0; 380 } 381 382 // IMPORTANT: It is assumed that this method is not used concurrently! 383 void task_group_context_impl::reset(d1::task_group_context& ctx) { 384 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 385 //! TODO: Add assertion that this context does not have children 386 // No fences are necessary since this context can be accessed from another thread 387 // only after stealing happened (which means necessary fences were used). 388 if (ctx.my_exception) { 389 ctx.my_exception->destroy(); 390 ctx.my_exception = NULL; 391 } 392 ctx.my_cancellation_requested = 0; 393 } 394 395 // IMPORTANT: It is assumed that this method is not used concurrently! 396 void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { 397 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 398 //! TODO: Add assertion that this context does not have children 399 // No fences are necessary since this context can be accessed from another thread 400 // only after stealing happened (which means necessary fences were used). 401 d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); 402 if (!ctx.my_traits.fp_settings) { 403 ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; 404 ctx.my_traits.fp_settings = true; 405 } 406 ctl->get_env(); 407 } 408 409 void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { 410 __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); 411 __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); 412 __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); 413 414 const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env); 415 new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl); 416 ctx.my_traits.fp_settings = true; 417 } 418 419 /* 420 Comments: 421 422 1. The premise of the cancellation support implementation is that cancellations are 423 not part of the hot path of the program execution. Therefore all changes in its 424 implementation in order to reduce the overhead of the cancellation control flow 425 should be done only in ways that do not increase overhead of the normal execution. 426 427 In general, contexts are used by all threads and their descendants are created in 428 different threads as well. In order to minimize impact of the cross-thread tree 429 maintenance (first of all because of the synchronization), the tree of contexts 430 is split into pieces, each of which is handled by a single thread. Such pieces 431 are represented as lists of contexts, members of which are contexts that were 432 bound to their parents in the given thread. 433 434 The context tree maintenance and cancellation propagation algorithms are designed 435 in such a manner that cross-thread access to a context list will take place only 436 when cancellation signal is sent (by user or when an exception happens), and 437 synchronization is necessary only then. Thus the normal execution flow (without 438 exceptions and cancellation) remains free from any synchronization done on 439 behalf of exception handling and cancellation support. 440 441 2. Consider parallel cancellations at the different levels of the context tree: 442 443 Ctx1 <- Cancelled by Thread1 |- Thread2 started processing 444 | | 445 Ctx2 |- Thread1 started processing 446 | T1 |- Thread2 finishes and syncs up local counters 447 Ctx3 <- Cancelled by Thread2 | 448 | |- Ctx5 is bound to Ctx2 449 Ctx4 | 450 T2 |- Thread1 reaches Ctx2 451 452 Thread-propagator of each cancellation increments global counter. However the thread 453 propagating the cancellation from the outermost context (Thread1) may be the last 454 to finish. Which means that the local counters may be synchronized earlier (by Thread2, 455 at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context 456 (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only 457 (Ctx2) may result in cancellation request being lost. 458 459 This issue is solved by doing the whole propagation under the lock. 460 461 If we need more concurrency while processing parallel cancellations, we could try 462 the following modification of the propagation algorithm: 463 464 advance global counter and remember it 465 for each thread: 466 scan thread's list of contexts 467 for each thread: 468 sync up its local counter only if the global counter has not been changed 469 470 However this version of the algorithm requires more analysis and verification. 471 */ 472 473 void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) { 474 task_group_context_impl::initialize(ctx); 475 } 476 void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) { 477 task_group_context_impl::destroy(ctx); 478 } 479 void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) { 480 task_group_context_impl::reset(ctx); 481 } 482 bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) { 483 return task_group_context_impl::cancel_group_execution(ctx); 484 } 485 bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) { 486 return task_group_context_impl::is_group_execution_cancelled(ctx); 487 } 488 void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) { 489 task_group_context_impl::capture_fp_settings(ctx); 490 } 491 492 } // namespace r1 493 } // namespace detail 494 } // namespace tbb 495 496