xref: /oneTBB/include/oneapi/tbb/parallel_invoke.h (revision c4a799df)
149e08aacStbbdev /*
2*c4a799dfSJhaShweta1     Copyright (c) 2005-2023 Intel Corporation
349e08aacStbbdev 
449e08aacStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
549e08aacStbbdev     you may not use this file except in compliance with the License.
649e08aacStbbdev     You may obtain a copy of the License at
749e08aacStbbdev 
849e08aacStbbdev         http://www.apache.org/licenses/LICENSE-2.0
949e08aacStbbdev 
1049e08aacStbbdev     Unless required by applicable law or agreed to in writing, software
1149e08aacStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1249e08aacStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1349e08aacStbbdev     See the License for the specific language governing permissions and
1449e08aacStbbdev     limitations under the License.
1549e08aacStbbdev */
1649e08aacStbbdev 
1749e08aacStbbdev #ifndef __TBB_parallel_invoke_H
1849e08aacStbbdev #define __TBB_parallel_invoke_H
1949e08aacStbbdev 
2049e08aacStbbdev #include "detail/_config.h"
2149e08aacStbbdev #include "detail/_namespace_injection.h"
2249e08aacStbbdev #include "detail/_exception.h"
2349e08aacStbbdev #include "detail/_task.h"
2449e08aacStbbdev #include "detail/_template_helpers.h"
2549e08aacStbbdev #include "detail/_small_object_pool.h"
2649e08aacStbbdev 
2749e08aacStbbdev #include "task_group.h"
2849e08aacStbbdev 
2949e08aacStbbdev #include <tuple>
3049e08aacStbbdev #include <atomic>
3149e08aacStbbdev #include <utility>
3249e08aacStbbdev 
3349e08aacStbbdev namespace tbb {
3449e08aacStbbdev namespace detail {
3549e08aacStbbdev namespace d1 {
3649e08aacStbbdev 
3749e08aacStbbdev //! Simple task object, executing user method
3849e08aacStbbdev template<typename Function, typename WaitObject>
3949e08aacStbbdev struct function_invoker : public task {
function_invokerfunction_invoker4049e08aacStbbdev     function_invoker(const Function& function, WaitObject& wait_ctx) :
4149e08aacStbbdev         my_function(function),
4249e08aacStbbdev         parent_wait_ctx(wait_ctx)
4349e08aacStbbdev     {}
4449e08aacStbbdev 
executefunction_invoker4549e08aacStbbdev     task* execute(execution_data& ed) override {
4649e08aacStbbdev         my_function();
4749e08aacStbbdev         parent_wait_ctx.release(ed);
4849e08aacStbbdev         call_itt_task_notify(destroy, this);
4949e08aacStbbdev         return nullptr;
5049e08aacStbbdev     }
5149e08aacStbbdev 
cancelfunction_invoker5249e08aacStbbdev     task* cancel(execution_data& ed) override {
5349e08aacStbbdev         parent_wait_ctx.release(ed);
5449e08aacStbbdev         return nullptr;
5549e08aacStbbdev     }
5649e08aacStbbdev 
5749e08aacStbbdev     const Function& my_function;
5849e08aacStbbdev     WaitObject& parent_wait_ctx;
5949e08aacStbbdev }; // struct function_invoker
6049e08aacStbbdev 
6149e08aacStbbdev //! Task object for managing subroots in trinary task trees.
62*c4a799dfSJhaShweta1 // Endowed with additional synchronization logic (compatible with wait object interfaces) to support
6349e08aacStbbdev // continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors
6449e08aacStbbdev // and then executes first functor by itself. But only the last executed functor must destruct and deallocate
6549e08aacStbbdev // the subroot task.
6649e08aacStbbdev template<typename F1, typename F2, typename F3>
6749e08aacStbbdev struct invoke_subroot_task : public task {
6849e08aacStbbdev     wait_context& root_wait_ctx;
6949e08aacStbbdev     std::atomic<unsigned> ref_count{0};
7049e08aacStbbdev     bool child_spawned = false;
7149e08aacStbbdev 
7249e08aacStbbdev     const F1& self_invoked_functor;
7349e08aacStbbdev     function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker;
7449e08aacStbbdev     function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker;
7549e08aacStbbdev 
7649e08aacStbbdev     task_group_context& my_execution_context;
7749e08aacStbbdev     small_object_allocator my_allocator;
7849e08aacStbbdev 
invoke_subroot_taskinvoke_subroot_task7949e08aacStbbdev     invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context,
8049e08aacStbbdev                  small_object_allocator& alloc) :
8149e08aacStbbdev         root_wait_ctx(wait_ctx),
8249e08aacStbbdev         self_invoked_functor(f1),
8349e08aacStbbdev         f2_invoker(f2, *this),
8449e08aacStbbdev         f3_invoker(f3, *this),
8549e08aacStbbdev         my_execution_context(context),
8649e08aacStbbdev         my_allocator(alloc)
8749e08aacStbbdev     {
8849e08aacStbbdev         root_wait_ctx.reserve();
8949e08aacStbbdev     }
9049e08aacStbbdev 
finalizeinvoke_subroot_task9149e08aacStbbdev     void finalize(const execution_data& ed) {
9249e08aacStbbdev         root_wait_ctx.release();
9349e08aacStbbdev 
9449e08aacStbbdev         my_allocator.delete_object(this, ed);
9549e08aacStbbdev     }
9649e08aacStbbdev 
releaseinvoke_subroot_task9749e08aacStbbdev     void release(const execution_data& ed) {
9849e08aacStbbdev         __TBB_ASSERT(ref_count > 0, nullptr);
9949e08aacStbbdev         call_itt_task_notify(releasing, this);
10049e08aacStbbdev         if( --ref_count == 0 ) {
10149e08aacStbbdev             call_itt_task_notify(acquired, this);
10249e08aacStbbdev             finalize(ed);
10349e08aacStbbdev         }
10449e08aacStbbdev     }
10549e08aacStbbdev 
executeinvoke_subroot_task10649e08aacStbbdev     task* execute(execution_data& ed) override {
10749e08aacStbbdev         ref_count.fetch_add(3, std::memory_order_relaxed);
10849e08aacStbbdev         spawn(f3_invoker, my_execution_context);
10949e08aacStbbdev         spawn(f2_invoker, my_execution_context);
11049e08aacStbbdev         self_invoked_functor();
11149e08aacStbbdev 
11249e08aacStbbdev         release(ed);
11349e08aacStbbdev         return nullptr;
11449e08aacStbbdev     }
11549e08aacStbbdev 
cancelinvoke_subroot_task11649e08aacStbbdev     task* cancel(execution_data& ed) override {
11749e08aacStbbdev         if( ref_count > 0 ) { // detect children spawn
11849e08aacStbbdev             release(ed);
11949e08aacStbbdev         } else {
12049e08aacStbbdev             finalize(ed);
12149e08aacStbbdev         }
12249e08aacStbbdev         return nullptr;
12349e08aacStbbdev     }
12449e08aacStbbdev }; // struct subroot_task
12549e08aacStbbdev 
12649e08aacStbbdev class invoke_root_task {
12749e08aacStbbdev public:
invoke_root_task(wait_context & wc)12849e08aacStbbdev     invoke_root_task(wait_context& wc) : my_wait_context(wc) {}
release(const execution_data &)12949e08aacStbbdev     void release(const execution_data&) {
13049e08aacStbbdev         my_wait_context.release();
13149e08aacStbbdev     }
13249e08aacStbbdev private:
13349e08aacStbbdev     wait_context& my_wait_context;
13449e08aacStbbdev };
13549e08aacStbbdev 
13649e08aacStbbdev template<typename F1>
invoke_recursive_separation(wait_context & root_wait_ctx,task_group_context & context,const F1 & f1)13749e08aacStbbdev void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) {
13849e08aacStbbdev     root_wait_ctx.reserve(1);
13949e08aacStbbdev     invoke_root_task root(root_wait_ctx);
14049e08aacStbbdev     function_invoker<F1, invoke_root_task> invoker1(f1, root);
14149e08aacStbbdev 
14249e08aacStbbdev     execute_and_wait(invoker1, context, root_wait_ctx, context);
14349e08aacStbbdev }
14449e08aacStbbdev 
14549e08aacStbbdev template<typename F1, typename F2>
invoke_recursive_separation(wait_context & root_wait_ctx,task_group_context & context,const F1 & f1,const F2 & f2)14649e08aacStbbdev void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) {
14749e08aacStbbdev     root_wait_ctx.reserve(2);
14849e08aacStbbdev     invoke_root_task root(root_wait_ctx);
14949e08aacStbbdev     function_invoker<F1, invoke_root_task> invoker1(f1, root);
15049e08aacStbbdev     function_invoker<F2, invoke_root_task> invoker2(f2, root);
15149e08aacStbbdev 
15249e08aacStbbdev     spawn(invoker1, context);
15349e08aacStbbdev     execute_and_wait(invoker2, context, root_wait_ctx, context);
15449e08aacStbbdev }
15549e08aacStbbdev 
15649e08aacStbbdev template<typename F1, typename F2, typename F3>
invoke_recursive_separation(wait_context & root_wait_ctx,task_group_context & context,const F1 & f1,const F2 & f2,const F3 & f3)15749e08aacStbbdev void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) {
15849e08aacStbbdev     root_wait_ctx.reserve(3);
15949e08aacStbbdev     invoke_root_task root(root_wait_ctx);
16049e08aacStbbdev     function_invoker<F1, invoke_root_task> invoker1(f1, root);
16149e08aacStbbdev     function_invoker<F2, invoke_root_task> invoker2(f2, root);
16249e08aacStbbdev     function_invoker<F3, invoke_root_task> invoker3(f3, root);
16349e08aacStbbdev 
16449e08aacStbbdev     //TODO: implement sub root for two tasks (measure performance)
16549e08aacStbbdev     spawn(invoker1, context);
16649e08aacStbbdev     spawn(invoker2, context);
16749e08aacStbbdev     execute_and_wait(invoker3, context, root_wait_ctx, context);
16849e08aacStbbdev }
16949e08aacStbbdev 
17049e08aacStbbdev template<typename F1, typename F2, typename F3, typename... Fs>
invoke_recursive_separation(wait_context & root_wait_ctx,task_group_context & context,const F1 & f1,const F2 & f2,const F3 & f3,const Fs &...fs)17149e08aacStbbdev void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context,
17249e08aacStbbdev                                  const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) {
17349e08aacStbbdev     small_object_allocator alloc{};
17449e08aacStbbdev     auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc);
17549e08aacStbbdev     spawn(*sub_root, context);
17649e08aacStbbdev 
17749e08aacStbbdev     invoke_recursive_separation(root_wait_ctx, context, fs...);
17849e08aacStbbdev }
17949e08aacStbbdev 
18049e08aacStbbdev template<typename... Fs>
parallel_invoke_impl(task_group_context & context,const Fs &...fs)18149e08aacStbbdev void parallel_invoke_impl(task_group_context& context, const Fs&... fs) {
18249e08aacStbbdev     static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable");
18349e08aacStbbdev     wait_context root_wait_ctx{0};
18449e08aacStbbdev 
18549e08aacStbbdev     invoke_recursive_separation(root_wait_ctx, context, fs...);
18649e08aacStbbdev }
18749e08aacStbbdev 
18849e08aacStbbdev template<typename F1, typename... Fs>
parallel_invoke_impl(const F1 & f1,const Fs &...fs)18949e08aacStbbdev void parallel_invoke_impl(const F1& f1, const Fs&... fs) {
19049e08aacStbbdev     static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable");
19149e08aacStbbdev     task_group_context context(PARALLEL_INVOKE);
19249e08aacStbbdev     wait_context root_wait_ctx{0};
19349e08aacStbbdev 
19449e08aacStbbdev     invoke_recursive_separation(root_wait_ctx, context, fs..., f1);
19549e08aacStbbdev }
19649e08aacStbbdev 
19749e08aacStbbdev //! Passes last argument of variadic pack as first for handling user provided task_group_context
19849e08aacStbbdev template <typename Tuple, typename... Fs>
19949e08aacStbbdev struct invoke_helper;
20049e08aacStbbdev 
20149e08aacStbbdev template <typename... Args, typename T, typename... Fs>
20249e08aacStbbdev struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {};
20349e08aacStbbdev 
20449e08aacStbbdev template <typename... Fs, typename T/*task_group_context or callable*/>
20549e08aacStbbdev struct invoke_helper<std::tuple<Fs...>, T> {
20649e08aacStbbdev     void operator()(Fs&&... args, T&& t) {
20749e08aacStbbdev         parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...);
20849e08aacStbbdev     }
20949e08aacStbbdev };
21049e08aacStbbdev 
21149e08aacStbbdev //! Parallel execution of several function objects
21249e08aacStbbdev // We need to pass parameter pack through forwarding reference,
21349e08aacStbbdev // since this pack may contain task_group_context that must be passed via lvalue non-const reference
21449e08aacStbbdev template<typename... Fs>
21549e08aacStbbdev void parallel_invoke(Fs&&... fs) {
21649e08aacStbbdev     invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...);
21749e08aacStbbdev }
21849e08aacStbbdev 
21949e08aacStbbdev } // namespace d1
22049e08aacStbbdev } // namespace detail
22149e08aacStbbdev 
22249e08aacStbbdev inline namespace v1 {
22349e08aacStbbdev using detail::d1::parallel_invoke;
22449e08aacStbbdev } // namespace v1
22549e08aacStbbdev 
22649e08aacStbbdev } // namespace tbb
22749e08aacStbbdev #endif /* __TBB_parallel_invoke_H */
228