151c0b2f7Stbbdev /* 2b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation 351c0b2f7Stbbdev 451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License"); 551c0b2f7Stbbdev you may not use this file except in compliance with the License. 651c0b2f7Stbbdev You may obtain a copy of the License at 751c0b2f7Stbbdev 851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0 951c0b2f7Stbbdev 1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software 1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS, 1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1351c0b2f7Stbbdev See the License for the specific language governing permissions and 1451c0b2f7Stbbdev limitations under the License. 1551c0b2f7Stbbdev */ 1651c0b2f7Stbbdev 1749e08aacStbbdev #include "oneapi/tbb/detail/_config.h" 1849e08aacStbbdev #include "oneapi/tbb/tbb_allocator.h" 1949e08aacStbbdev #include "oneapi/tbb/task_group.h" 2051c0b2f7Stbbdev #include "governor.h" 2151c0b2f7Stbbdev #include "thread_data.h" 2251c0b2f7Stbbdev #include "scheduler_common.h" 2351c0b2f7Stbbdev #include "itt_notify.h" 2451c0b2f7Stbbdev #include "task_dispatcher.h" 2551c0b2f7Stbbdev 2651c0b2f7Stbbdev #include <type_traits> 2751c0b2f7Stbbdev 2851c0b2f7Stbbdev namespace tbb { 2951c0b2f7Stbbdev namespace detail { 3051c0b2f7Stbbdev namespace r1 { 3151c0b2f7Stbbdev 3251c0b2f7Stbbdev //------------------------------------------------------------------------ 3351c0b2f7Stbbdev // tbb_exception_ptr 3451c0b2f7Stbbdev //------------------------------------------------------------------------ 3551c0b2f7Stbbdev tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept { 3651c0b2f7Stbbdev tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr)); 3751c0b2f7Stbbdev return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr; 3851c0b2f7Stbbdev } 3951c0b2f7Stbbdev 4051c0b2f7Stbbdev void tbb_exception_ptr::destroy() noexcept { 4151c0b2f7Stbbdev this->~tbb_exception_ptr(); 4251c0b2f7Stbbdev deallocate_memory(this); 4351c0b2f7Stbbdev } 4451c0b2f7Stbbdev 4551c0b2f7Stbbdev void tbb_exception_ptr::throw_self() { 4651c0b2f7Stbbdev if (governor::rethrow_exception_broken()) fix_broken_rethrow(); 4751c0b2f7Stbbdev std::rethrow_exception(my_ptr); 4851c0b2f7Stbbdev } 4951c0b2f7Stbbdev 5051c0b2f7Stbbdev //------------------------------------------------------------------------ 5151c0b2f7Stbbdev // task_group_context 5251c0b2f7Stbbdev //------------------------------------------------------------------------ 5351c0b2f7Stbbdev 5451c0b2f7Stbbdev void task_group_context_impl::destroy(d1::task_group_context& ctx) { 5535147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 56d86ed7fbStbbdev 5735147e00SIlya Isaev if (ctx.my_context_list != nullptr) { 5835147e00SIlya Isaev __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::bound, nullptr); 5951c0b2f7Stbbdev // The owner can be destroyed at any moment. Access the associate data with caution. 6035147e00SIlya Isaev ctx.my_context_list->remove(ctx.my_node); 6151c0b2f7Stbbdev } 6251c0b2f7Stbbdev d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); 6351c0b2f7Stbbdev #if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER 6451c0b2f7Stbbdev suppress_unused_warning(ctl); 6551c0b2f7Stbbdev #endif 6651c0b2f7Stbbdev ctl->~cpu_ctl_env(); 6751c0b2f7Stbbdev 68*a080baf9SAlex auto exception = ctx.my_exception.load(std::memory_order_relaxed); 69*a080baf9SAlex if (exception) { 70*a080baf9SAlex exception->destroy(); 71*a080baf9SAlex } 7251c0b2f7Stbbdev ITT_STACK_DESTROY(ctx.my_itt_caller); 7351c0b2f7Stbbdev 7451c0b2f7Stbbdev poison_pointer(ctx.my_parent); 7535147e00SIlya Isaev poison_pointer(ctx.my_context_list); 7635147e00SIlya Isaev poison_pointer(ctx.my_node.my_next_node); 7735147e00SIlya Isaev poison_pointer(ctx.my_node.my_prev_node); 7851c0b2f7Stbbdev poison_pointer(ctx.my_exception); 7951c0b2f7Stbbdev poison_pointer(ctx.my_itt_caller); 8035147e00SIlya Isaev 8135147e00SIlya Isaev ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dead, std::memory_order_release); 8251c0b2f7Stbbdev } 8351c0b2f7Stbbdev 8451c0b2f7Stbbdev void task_group_context_impl::initialize(d1::task_group_context& ctx) { 8551c0b2f7Stbbdev ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); 8651c0b2f7Stbbdev 8735147e00SIlya Isaev ctx.my_node.my_next_node = &ctx.my_node; 8835147e00SIlya Isaev ctx.my_node.my_prev_node = &ctx.my_node; 8951c0b2f7Stbbdev ctx.my_cpu_ctl_env = 0; 9051c0b2f7Stbbdev ctx.my_cancellation_requested = 0; 9151c0b2f7Stbbdev ctx.my_state.store(0, std::memory_order_relaxed); 9251c0b2f7Stbbdev // Set the created state to bound at the first usage. 9351c0b2f7Stbbdev ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); 9451c0b2f7Stbbdev ctx.my_parent = nullptr; 9535147e00SIlya Isaev ctx.my_context_list = nullptr; 96*a080baf9SAlex ctx.my_exception.store(nullptr, std::memory_order_relaxed); 9751c0b2f7Stbbdev ctx.my_itt_caller = nullptr; 9851c0b2f7Stbbdev 9951c0b2f7Stbbdev static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); 10051c0b2f7Stbbdev d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; 10151c0b2f7Stbbdev if (ctx.my_traits.fp_settings) 10251c0b2f7Stbbdev ctl->get_env(); 10351c0b2f7Stbbdev } 10451c0b2f7Stbbdev 10551c0b2f7Stbbdev void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { 10635147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 10735147e00SIlya Isaev __TBB_ASSERT(td, nullptr); 10835147e00SIlya Isaev ctx.my_context_list = td->my_context_list; 10935147e00SIlya Isaev 11035147e00SIlya Isaev ctx.my_context_list->push_front(ctx.my_node); 11151c0b2f7Stbbdev } 11251c0b2f7Stbbdev 11351c0b2f7Stbbdev void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { 11435147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 11551c0b2f7Stbbdev __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); 11651c0b2f7Stbbdev __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); 11751c0b2f7Stbbdev 11851c0b2f7Stbbdev ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; 11935147e00SIlya Isaev __TBB_ASSERT(ctx.my_parent, nullptr); 12051c0b2f7Stbbdev 12151c0b2f7Stbbdev // Inherit FPU settings only if the context has not captured FPU settings yet. 12251c0b2f7Stbbdev if (!ctx.my_traits.fp_settings) 12351c0b2f7Stbbdev copy_fp_settings(ctx, *ctx.my_parent); 12451c0b2f7Stbbdev 12551c0b2f7Stbbdev // Condition below prevents unnecessary thrashing parent context's cache line 12651c0b2f7Stbbdev if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { 12751c0b2f7Stbbdev ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below 12851c0b2f7Stbbdev } 12951c0b2f7Stbbdev if (ctx.my_parent->my_parent) { 13051c0b2f7Stbbdev // Even if this context were made accessible for state change propagation 13151c0b2f7Stbbdev // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node) 13251c0b2f7Stbbdev // above), it still could be missed if state propagation from a grand-ancestor 13351c0b2f7Stbbdev // was underway concurrently with binding. 13451c0b2f7Stbbdev // Speculative propagation from the parent together with epoch counters 13551c0b2f7Stbbdev // detecting possibility of such a race allow to avoid taking locks when 13651c0b2f7Stbbdev // there is no contention. 13751c0b2f7Stbbdev 13851c0b2f7Stbbdev // Acquire fence is necessary to prevent reordering subsequent speculative 13951c0b2f7Stbbdev // loads of parent state data out of the scope where epoch counters comparison 14051c0b2f7Stbbdev // can reliably validate it. 14135147e00SIlya Isaev uintptr_t local_count_snapshot = ctx.my_parent->my_context_list->epoch.load(std::memory_order_acquire); 14251c0b2f7Stbbdev // Speculative propagation of parent's state. The speculation will be 14351c0b2f7Stbbdev // validated by the epoch counters check further on. 14451c0b2f7Stbbdev ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 14551c0b2f7Stbbdev register_with(ctx, td); // Issues full fence 14651c0b2f7Stbbdev 14751c0b2f7Stbbdev // If no state propagation was detected by the following condition, the above 14851c0b2f7Stbbdev // full fence guarantees that the parent had correct state during speculative 14951c0b2f7Stbbdev // propagation before the fence. Otherwise the propagation from parent is 15051c0b2f7Stbbdev // repeated under the lock. 15151c0b2f7Stbbdev if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { 15251c0b2f7Stbbdev // Another thread may be propagating state change right now. So resort to lock. 15351c0b2f7Stbbdev context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); 15451c0b2f7Stbbdev ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 15551c0b2f7Stbbdev } 15651c0b2f7Stbbdev } else { 15751c0b2f7Stbbdev register_with(ctx, td); // Issues full fence 15851c0b2f7Stbbdev // As we do not have grand-ancestors, concurrent state propagation (if any) 15951c0b2f7Stbbdev // may originate only from the parent context, and thus it is safe to directly 16051c0b2f7Stbbdev // copy the state from it. 16151c0b2f7Stbbdev ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); 16251c0b2f7Stbbdev } 16351c0b2f7Stbbdev } 16451c0b2f7Stbbdev 16551c0b2f7Stbbdev void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { 16635147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 16751c0b2f7Stbbdev d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); 16851c0b2f7Stbbdev if (state <= d1::task_group_context::lifetime_state::locked) { 16951c0b2f7Stbbdev if (state == d1::task_group_context::lifetime_state::created && 17051c0b2f7Stbbdev #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 17151c0b2f7Stbbdev ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( 17251c0b2f7Stbbdev (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, 17351c0b2f7Stbbdev (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) 17451c0b2f7Stbbdev #else 17551c0b2f7Stbbdev ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) 17651c0b2f7Stbbdev #endif 17751c0b2f7Stbbdev ) { 178b15aabb3Stbbdev // If we are in the outermost task dispatch loop of an external thread, then 17951c0b2f7Stbbdev // there is nothing to bind this context to, and we skip the binding part 18051c0b2f7Stbbdev // treating the context as isolated. 18151c0b2f7Stbbdev __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); 182*a080baf9SAlex d1::task_group_context::lifetime_state release_state{}; 18351c0b2f7Stbbdev if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { 18451c0b2f7Stbbdev if (!ctx.my_traits.fp_settings) { 18551c0b2f7Stbbdev copy_fp_settings(ctx, *td->my_arena->my_default_ctx); 18651c0b2f7Stbbdev } 187*a080baf9SAlex release_state = d1::task_group_context::lifetime_state::isolated; 18851c0b2f7Stbbdev } else { 18951c0b2f7Stbbdev bind_to_impl(ctx, td); 190*a080baf9SAlex release_state = d1::task_group_context::lifetime_state::bound; 19151c0b2f7Stbbdev } 19251c0b2f7Stbbdev ITT_STACK_CREATE(ctx.my_itt_caller); 193*a080baf9SAlex ctx.my_lifetime_state.store(release_state, std::memory_order_release); 19451c0b2f7Stbbdev } 19551c0b2f7Stbbdev spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); 19651c0b2f7Stbbdev } 19735147e00SIlya Isaev __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, nullptr); 19835147e00SIlya Isaev __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, nullptr); 19951c0b2f7Stbbdev } 20051c0b2f7Stbbdev 20151c0b2f7Stbbdev template <typename T> 20251c0b2f7Stbbdev void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 20335147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 20435147e00SIlya Isaev /* 1. if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state): 20535147e00SIlya Isaev Nothing to do, whether descending from "src" or not, so no need to scan. 20635147e00SIlya Isaev Hopefully this happens often thanks to earlier invocations. 20735147e00SIlya Isaev This optimization is enabled by LIFO order in the context lists: 20835147e00SIlya Isaev - new contexts are bound to the beginning of lists; 20935147e00SIlya Isaev - descendants are newer than ancestors; 21035147e00SIlya Isaev - earlier invocations are therefore likely to "paint" long chains. 21135147e00SIlya Isaev 2. if (&ctx != &src): 21235147e00SIlya Isaev This clause is disjunct from the traversal below, which skips src entirely. 21335147e00SIlya Isaev Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). 21435147e00SIlya Isaev Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). 21535147e00SIlya Isaev Letting the other thread prevail may also be fairer. 21635147e00SIlya Isaev */ 21735147e00SIlya Isaev if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state && &ctx != &src) { 21835147e00SIlya Isaev for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != nullptr; ancestor = ancestor->my_parent) { 21951c0b2f7Stbbdev if (ancestor == &src) { 22051c0b2f7Stbbdev for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) 22151c0b2f7Stbbdev (c->*mptr_state).store(new_state, std::memory_order_relaxed); 22251c0b2f7Stbbdev break; 22351c0b2f7Stbbdev } 22451c0b2f7Stbbdev } 22551c0b2f7Stbbdev } 22651c0b2f7Stbbdev } 22751c0b2f7Stbbdev 228478de5b1Stbbdev template <typename T> 229478de5b1Stbbdev void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 23035147e00SIlya Isaev mutex::scoped_lock lock(my_context_list->m_mutex); 231478de5b1Stbbdev // Acquire fence is necessary to ensure that the subsequent node->my_next load 232478de5b1Stbbdev // returned the correct value in case it was just inserted in another thread. 233478de5b1Stbbdev // The fence also ensures visibility of the correct ctx.my_parent value. 23435147e00SIlya Isaev for (context_list::iterator it = my_context_list->begin(); it != my_context_list->end(); ++it) { 23535147e00SIlya Isaev d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, &(*it)); 236478de5b1Stbbdev if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) 237478de5b1Stbbdev task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); 238478de5b1Stbbdev } 239478de5b1Stbbdev // Sync up local propagation epoch with the global one. Release fence prevents 240478de5b1Stbbdev // reordering of possible store to *mptr_state after the sync point. 24135147e00SIlya Isaev my_context_list->epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); 242478de5b1Stbbdev } 243478de5b1Stbbdev 244478de5b1Stbbdev template <typename T> 245478de5b1Stbbdev bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { 246478de5b1Stbbdev if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) 247478de5b1Stbbdev return true; 248478de5b1Stbbdev // The whole propagation algorithm is under the lock in order to ensure correctness 249478de5b1Stbbdev // in case of concurrent state changes at the different levels of the context tree. 250478de5b1Stbbdev // See comment at the bottom of scheduler.cpp 251478de5b1Stbbdev context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); 252478de5b1Stbbdev if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) 253478de5b1Stbbdev // Another thread has concurrently changed the state. Back down. 254478de5b1Stbbdev return false; 255478de5b1Stbbdev // Advance global state propagation epoch 256478de5b1Stbbdev ++the_context_state_propagation_epoch; 257478de5b1Stbbdev // Propagate to all workers and external threads and sync up their local epochs with the global one 258478de5b1Stbbdev unsigned num_workers = my_first_unused_worker_idx; 259478de5b1Stbbdev for (unsigned i = 0; i < num_workers; ++i) { 260478de5b1Stbbdev thread_data* td = my_workers[i]; 261478de5b1Stbbdev // If the worker is only about to be registered, skip it. 262478de5b1Stbbdev if (td) 263478de5b1Stbbdev td->propagate_task_group_state(mptr_state, src, new_state); 264478de5b1Stbbdev } 265478de5b1Stbbdev // Propagate to all external threads 266478de5b1Stbbdev // The whole propagation sequence is locked, thus no contention is expected 267478de5b1Stbbdev for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) 268478de5b1Stbbdev it->propagate_task_group_state(mptr_state, src, new_state); 269478de5b1Stbbdev return true; 270478de5b1Stbbdev } 271478de5b1Stbbdev 27251c0b2f7Stbbdev bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { 27335147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 27451c0b2f7Stbbdev __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); 27551c0b2f7Stbbdev if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { 27651c0b2f7Stbbdev // This task group and any descendants have already been canceled. 27751c0b2f7Stbbdev // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested, 27851c0b2f7Stbbdev // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) 27951c0b2f7Stbbdev return false; 28051c0b2f7Stbbdev } 28151c0b2f7Stbbdev governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); 28251c0b2f7Stbbdev return true; 28351c0b2f7Stbbdev } 28451c0b2f7Stbbdev 28551c0b2f7Stbbdev bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) { 28651c0b2f7Stbbdev return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0; 28751c0b2f7Stbbdev } 28851c0b2f7Stbbdev 28951c0b2f7Stbbdev // IMPORTANT: It is assumed that this method is not used concurrently! 29051c0b2f7Stbbdev void task_group_context_impl::reset(d1::task_group_context& ctx) { 29135147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 29251c0b2f7Stbbdev //! TODO: Add assertion that this context does not have children 29351c0b2f7Stbbdev // No fences are necessary since this context can be accessed from another thread 29451c0b2f7Stbbdev // only after stealing happened (which means necessary fences were used). 295*a080baf9SAlex 296*a080baf9SAlex auto exception = ctx.my_exception.load(std::memory_order_relaxed); 297*a080baf9SAlex if (exception) { 298*a080baf9SAlex exception->destroy(); 299*a080baf9SAlex ctx.my_exception.store(nullptr, std::memory_order_relaxed); 30051c0b2f7Stbbdev } 30151c0b2f7Stbbdev ctx.my_cancellation_requested = 0; 30251c0b2f7Stbbdev } 30351c0b2f7Stbbdev 30451c0b2f7Stbbdev // IMPORTANT: It is assumed that this method is not used concurrently! 30551c0b2f7Stbbdev void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { 30635147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 30751c0b2f7Stbbdev //! TODO: Add assertion that this context does not have children 30851c0b2f7Stbbdev // No fences are necessary since this context can be accessed from another thread 30951c0b2f7Stbbdev // only after stealing happened (which means necessary fences were used). 31051c0b2f7Stbbdev d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); 31151c0b2f7Stbbdev if (!ctx.my_traits.fp_settings) { 31251c0b2f7Stbbdev ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; 31351c0b2f7Stbbdev ctx.my_traits.fp_settings = true; 31451c0b2f7Stbbdev } 31551c0b2f7Stbbdev ctl->get_env(); 31651c0b2f7Stbbdev } 31751c0b2f7Stbbdev 31851c0b2f7Stbbdev void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { 31935147e00SIlya Isaev __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); 32051c0b2f7Stbbdev __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); 32151c0b2f7Stbbdev __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); 32251c0b2f7Stbbdev 32351c0b2f7Stbbdev const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env); 32451c0b2f7Stbbdev new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl); 32551c0b2f7Stbbdev ctx.my_traits.fp_settings = true; 32651c0b2f7Stbbdev } 32751c0b2f7Stbbdev 32851c0b2f7Stbbdev /* 32951c0b2f7Stbbdev Comments: 33051c0b2f7Stbbdev 33151c0b2f7Stbbdev 1. The premise of the cancellation support implementation is that cancellations are 33251c0b2f7Stbbdev not part of the hot path of the program execution. Therefore all changes in its 33351c0b2f7Stbbdev implementation in order to reduce the overhead of the cancellation control flow 33451c0b2f7Stbbdev should be done only in ways that do not increase overhead of the normal execution. 33551c0b2f7Stbbdev 33651c0b2f7Stbbdev In general, contexts are used by all threads and their descendants are created in 33751c0b2f7Stbbdev different threads as well. In order to minimize impact of the cross-thread tree 33851c0b2f7Stbbdev maintenance (first of all because of the synchronization), the tree of contexts 33951c0b2f7Stbbdev is split into pieces, each of which is handled by a single thread. Such pieces 34051c0b2f7Stbbdev are represented as lists of contexts, members of which are contexts that were 34151c0b2f7Stbbdev bound to their parents in the given thread. 34251c0b2f7Stbbdev 34351c0b2f7Stbbdev The context tree maintenance and cancellation propagation algorithms are designed 34451c0b2f7Stbbdev in such a manner that cross-thread access to a context list will take place only 34551c0b2f7Stbbdev when cancellation signal is sent (by user or when an exception happens), and 34651c0b2f7Stbbdev synchronization is necessary only then. Thus the normal execution flow (without 34751c0b2f7Stbbdev exceptions and cancellation) remains free from any synchronization done on 34851c0b2f7Stbbdev behalf of exception handling and cancellation support. 34951c0b2f7Stbbdev 35051c0b2f7Stbbdev 2. Consider parallel cancellations at the different levels of the context tree: 35151c0b2f7Stbbdev 35251c0b2f7Stbbdev Ctx1 <- Cancelled by Thread1 |- Thread2 started processing 35351c0b2f7Stbbdev | | 35451c0b2f7Stbbdev Ctx2 |- Thread1 started processing 35551c0b2f7Stbbdev | T1 |- Thread2 finishes and syncs up local counters 35651c0b2f7Stbbdev Ctx3 <- Cancelled by Thread2 | 35751c0b2f7Stbbdev | |- Ctx5 is bound to Ctx2 35851c0b2f7Stbbdev Ctx4 | 35951c0b2f7Stbbdev T2 |- Thread1 reaches Ctx2 36051c0b2f7Stbbdev 36151c0b2f7Stbbdev Thread-propagator of each cancellation increments global counter. However the thread 36251c0b2f7Stbbdev propagating the cancellation from the outermost context (Thread1) may be the last 36351c0b2f7Stbbdev to finish. Which means that the local counters may be synchronized earlier (by Thread2, 36451c0b2f7Stbbdev at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context 36551c0b2f7Stbbdev (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only 36651c0b2f7Stbbdev (Ctx2) may result in cancellation request being lost. 36751c0b2f7Stbbdev 36851c0b2f7Stbbdev This issue is solved by doing the whole propagation under the lock. 36951c0b2f7Stbbdev 37051c0b2f7Stbbdev If we need more concurrency while processing parallel cancellations, we could try 37151c0b2f7Stbbdev the following modification of the propagation algorithm: 37251c0b2f7Stbbdev 37351c0b2f7Stbbdev advance global counter and remember it 37451c0b2f7Stbbdev for each thread: 37551c0b2f7Stbbdev scan thread's list of contexts 37651c0b2f7Stbbdev for each thread: 37751c0b2f7Stbbdev sync up its local counter only if the global counter has not been changed 37851c0b2f7Stbbdev 37951c0b2f7Stbbdev However this version of the algorithm requires more analysis and verification. 38051c0b2f7Stbbdev */ 38151c0b2f7Stbbdev 38251c0b2f7Stbbdev void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) { 38351c0b2f7Stbbdev task_group_context_impl::initialize(ctx); 38451c0b2f7Stbbdev } 38551c0b2f7Stbbdev void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) { 38651c0b2f7Stbbdev task_group_context_impl::destroy(ctx); 38751c0b2f7Stbbdev } 38851c0b2f7Stbbdev void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) { 38951c0b2f7Stbbdev task_group_context_impl::reset(ctx); 39051c0b2f7Stbbdev } 39151c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) { 39251c0b2f7Stbbdev return task_group_context_impl::cancel_group_execution(ctx); 39351c0b2f7Stbbdev } 39451c0b2f7Stbbdev bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) { 39551c0b2f7Stbbdev return task_group_context_impl::is_group_execution_cancelled(ctx); 39651c0b2f7Stbbdev } 39751c0b2f7Stbbdev void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) { 39851c0b2f7Stbbdev task_group_context_impl::capture_fp_settings(ctx); 39951c0b2f7Stbbdev } 40051c0b2f7Stbbdev 40151c0b2f7Stbbdev } // namespace r1 40251c0b2f7Stbbdev } // namespace detail 40351c0b2f7Stbbdev } // namespace tbb 40451c0b2f7Stbbdev 405