tbb/detail/_flow_graph_node_impl.h

/*
    Copyright (c) 2005-2023 Intel Corporation

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        http://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
*/

#ifndef __TBB__flow_graph_node_impl_H
#define __TBB__flow_graph_node_impl_H

#ifndef __TBB_flow_graph_H
#error Do not #include this internal file directly; use public TBB headers instead.
#endif

#include "_flow_graph_item_buffer_impl.h"

template< typename T, typename A >
class function_input_queue : public item_buffer<T,A> {
public:
    bool empty() const {
        return this->buffer_empty();
    }

    const T& front() const {
        return this->item_buffer<T, A>::front();
    }

    void pop() {
        this->destroy_front();
    }

    bool push( T& t ) {
        return this->push_back( t );
    }
};

//! Input and scheduling for a function node that takes a type Input as input
//  The only up-ref is apply_body_impl, which should implement the function
//  call and any handling of the result.
template< typename Input, typename Policy, typename A, typename ImplType >
class function_input_base : public receiver<Input>, no_assign {
    enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency
    };
    typedef function_input_base<Input, Policy, A, ImplType> class_type;

public:

    //! The input type of this receiver
    typedef Input input_type;
    typedef typename receiver<input_type>::predecessor_type predecessor_type;
    typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type;
    typedef function_input_queue<input_type, A> input_queue_type;
    typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type;
    static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, "");

    //! Constructor for function_input_base
    function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority, bool is_no_throw )
        : my_graph_ref(g), my_max_concurrency(max_concurrency)
        , my_concurrency(0), my_priority(a_priority), my_is_no_throw(is_no_throw)
        , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : nullptr)
        , my_predecessors(this)
        , forwarder_busy(false)
    {
        my_aggregator.initialize_handler(handler_type(this));
    }

    //! Copy constructor
    function_input_base( const function_input_base& src )
        : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority, src.my_is_no_throw) {}

    //! Destructor
    // The queue is allocated by the constructor for {multi}function_node.
    // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead.
    // This would be an interface-breaking change.
    virtual ~function_input_base() {
        delete my_queue;
        my_queue = nullptr;
    }

    graph_task* try_put_task( const input_type& t) override {
        if ( my_is_no_throw )
            return try_put_task_impl(t, has_policy<lightweight, Policy>());
        else
            return try_put_task_impl(t, std::false_type());
    }

    //! Adds src to the list of cached predecessors.
    bool register_predecessor( predecessor_type &src ) override {
        operation_type op_data(reg_pred);
        op_data.r = &src;
        my_aggregator.execute(&op_data);
        return true;
    }

    //! Removes src from the list of cached predecessors.
    bool remove_predecessor( predecessor_type &src ) override {
        operation_type op_data(rem_pred);
        op_data.r = &src;
        my_aggregator.execute(&op_data);
        return true;
    }

protected:

    void reset_function_input_base( reset_flags f) {
        my_concurrency = 0;
        if(my_queue) {
            my_queue->reset();
        }
        reset_receiver(f);
        forwarder_busy = false;
    }

    graph& my_graph_ref;
    const size_t my_max_concurrency;
    size_t my_concurrency;
    node_priority_t my_priority;
    const bool my_is_no_throw;
    input_queue_type *my_queue;
    predecessor_cache<input_type, null_mutex > my_predecessors;

    void reset_receiver( reset_flags f) {
        if( f & rf_clear_edges) my_predecessors.clear();
        else
            my_predecessors.reset();
        __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed");
    }

    graph& graph_reference() const override {
        return my_graph_ref;
    }

    graph_task* try_get_postponed_task(const input_type& i) {
        operation_type op_data(i, app_body_bypass);  // tries to pop an item or get_item
        my_aggregator.execute(&op_data);
        return op_data.bypass_t;
    }

private:

    friend class apply_body_task_bypass< class_type, input_type >;
    friend class forward_task_bypass< class_type >;

    class operation_type : public aggregated_operation< operation_type > {
    public:
        char type;
        union {
            input_type *elem;
            predecessor_type *r;
        };
        graph_task* bypass_t;
        operation_type(const input_type& e, op_type t) :
            type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr) {}
        operation_type(op_type t) : type(char(t)), r(nullptr), bypass_t(nullptr) {}
    };

    bool forwarder_busy;
    typedef aggregating_functor<class_type, operation_type> handler_type;
    friend class aggregating_functor<class_type, operation_type>;
    aggregator< handler_type, operation_type > my_aggregator;

    graph_task* perform_queued_requests() {
        graph_task* new_task = nullptr;
        if(my_queue) {
            if(!my_queue->empty()) {
                ++my_concurrency;
                new_task = create_body_task(my_queue->front());

                my_queue->pop();
            }
        }
        else {
            input_type i;
            if(my_predecessors.get_item(i)) {
                ++my_concurrency;
                new_task = create_body_task(i);
            }
        }
        return new_task;
    }
    void handle_operations(operation_type *op_list) {
        operation_type* tmp;
        while (op_list) {
            tmp = op_list;
            op_list = op_list->next;
            switch (tmp->type) {
            case reg_pred:
                my_predecessors.add(*(tmp->r));
                tmp->status.store(SUCCEEDED, std::memory_order_release);
                if (!forwarder_busy) {
                    forwarder_busy = true;
                    spawn_forward_task();
                }
                break;
            case rem_pred:
                my_predecessors.remove(*(tmp->r));
                tmp->status.store(SUCCEEDED, std::memory_order_release);
                break;
            case app_body_bypass: {
                tmp->bypass_t = nullptr;
                __TBB_ASSERT(my_max_concurrency != 0, nullptr);
                --my_concurrency;
                if(my_concurrency<my_max_concurrency)
                    tmp->bypass_t = perform_queued_requests();
                tmp->status.store(SUCCEEDED, std::memory_order_release);
            }
                break;
            case tryput_bypass: internal_try_put_task(tmp);  break;
            case try_fwd: internal_forward(tmp);  break;
            case occupy_concurrency:
                if (my_concurrency < my_max_concurrency) {
                    ++my_concurrency;
                    tmp->status.store(SUCCEEDED, std::memory_order_release);
                } else {
                    tmp->status.store(FAILED, std::memory_order_release);
                }
                break;
            }
        }
    }

    //! Put to the node, but return the task instead of enqueueing it
    void internal_try_put_task(operation_type *op) {
        __TBB_ASSERT(my_max_concurrency != 0, nullptr);
        if (my_concurrency < my_max_concurrency) {
            ++my_concurrency;
            graph_task * new_task = create_body_task(*(op->elem));
            op->bypass_t = new_task;
            op->status.store(SUCCEEDED, std::memory_order_release);
        } else if ( my_queue && my_queue->push(*(op->elem)) ) {
            op->bypass_t = SUCCESSFULLY_ENQUEUED;
            op->status.store(SUCCEEDED, std::memory_order_release);
        } else {
            op->bypass_t = nullptr;
            op->status.store(FAILED, std::memory_order_release);
        }
    }

    //! Creates tasks for postponed messages if available and if concurrency allows
    void internal_forward(operation_type *op) {
        op->bypass_t = nullptr;
        if (my_concurrency < my_max_concurrency)
            op->bypass_t = perform_queued_requests();
        if(op->bypass_t)
            op->status.store(SUCCEEDED, std::memory_order_release);
        else {
            forwarder_busy = false;
            op->status.store(FAILED, std::memory_order_release);
        }
    }

    graph_task* internal_try_put_bypass( const input_type& t ) {
        operation_type op_data(t, tryput_bypass);
        my_aggregator.execute(&op_data);
        if( op_data.status == SUCCEEDED ) {
            return op_data.bypass_t;
        }
        return nullptr;
    }

    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) {
        if( my_max_concurrency == 0 ) {
            return apply_body_bypass(t);
        } else {
            operation_type check_op(t, occupy_concurrency);
            my_aggregator.execute(&check_op);
            if( check_op.status == SUCCEEDED ) {
                return apply_body_bypass(t);
            }
            return internal_try_put_bypass(t);
        }
    }

    graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) {
        if( my_max_concurrency == 0 ) {
            return create_body_task(t);
        } else {
            return internal_try_put_bypass(t);
        }
    }

    //! Applies the body to the provided input
    //  then decides if more work is available
    graph_task* apply_body_bypass( const input_type &i ) {
        return static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
    }

    //! allocates a task to apply a body
    graph_task* create_body_task( const input_type &input ) {
        if (!is_graph_active(my_graph_ref)) {
            return nullptr;
        }
        // TODO revamp: extract helper for common graph task allocation part
        small_object_allocator allocator{};
        typedef apply_body_task_bypass<class_type, input_type> task_type;
        graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority );
        graph_reference().reserve_wait();
        return t;
    }

    //! This is executed by an enqueued task, the "forwarder"
    graph_task* forward_task() {
        operation_type op_data(try_fwd);
        graph_task* rval = nullptr;
        do {
            op_data.status = WAIT;
            my_aggregator.execute(&op_data);
            if(op_data.status == SUCCEEDED) {
                graph_task* ttask = op_data.bypass_t;
                __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, nullptr);
                rval = combine_tasks(my_graph_ref, rval, ttask);
            }
        } while (op_data.status == SUCCEEDED);
        return rval;
    }

    inline graph_task* create_forward_task() {
        if (!is_graph_active(my_graph_ref)) {
            return nullptr;
        }
        small_object_allocator allocator{};
        typedef forward_task_bypass<class_type> task_type;
        graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority );
        graph_reference().reserve_wait();
        return t;
    }

    //! Spawns a task that calls forward()
    inline void spawn_forward_task() {
        graph_task* tp = create_forward_task();
        if(tp) {
            spawn_in_graph_arena(graph_reference(), *tp);
        }
    }

    node_priority_t priority() const override { return my_priority; }
};  // function_input_base

//! Implements methods for a function node that takes a type Input as input and sends
//  a type Output to its successors.
template< typename Input, typename Output, typename Policy, typename A>
class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > {
public:
    typedef Input input_type;
    typedef Output output_type;
    typedef function_body<input_type, output_type> function_body_type;
    typedef function_input<Input, Output, Policy,A> my_class;
    typedef function_input_base<Input, Policy, A, my_class> base_type;
    typedef function_input_queue<input_type, A> input_queue_type;

    // constructor
    template<typename Body>
    function_input(
        graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority )
      : base_type(g, max_concurrency, a_priority, noexcept(tbb::detail::invoke(body, input_type())))
      , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) {
    }

    //! Copy constructor
    function_input( const function_input& src ) :
        base_type(src),
        my_body( src.my_init_body->clone() ),
        my_init_body(src.my_init_body->clone() ) {
    }
#if __INTEL_COMPILER <= 2021
    // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited
    // class while the parent class has the virtual keyword for the destrocutor.
    virtual
#endif
    ~function_input() {
        delete my_body;
        delete my_init_body;
    }

    template< typename Body >
    Body copy_function_object() {
        function_body_type &body_ref = *this->my_body;
        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
    }

    output_type apply_body_impl( const input_type& i) {
        // There is an extra copied needed to capture the
        // body execution without the try_put
        fgt_begin_body( my_body );
        output_type v = tbb::detail::invoke(*my_body, i);
        fgt_end_body( my_body );
        return v;
    }

    //TODO: consider moving into the base class
    graph_task* apply_body_impl_bypass( const input_type &i) {
        output_type v = apply_body_impl(i);
        graph_task* postponed_task = nullptr;
        if( base_type::my_max_concurrency != 0 ) {
            postponed_task = base_type::try_get_postponed_task(i);
            __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, nullptr);
        }
        if( postponed_task ) {
            // make the task available for other workers since we do not know successors'
            // execution policy
            spawn_in_graph_arena(base_type::graph_reference(), *postponed_task);
        }
        graph_task* successor_task = successors().try_put_task(v);
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (push)
#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
#endif
        if(has_policy<lightweight, Policy>::value) {
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif
            if(!successor_task) {
                // Return confirmative status since current
                // node's body has been executed anyway
                successor_task = SUCCESSFULLY_ENQUEUED;
            }
        }
        return successor_task;
    }

protected:

    void reset_function_input(reset_flags f) {
        base_type::reset_function_input_base(f);
        if(f & rf_reset_bodies) {
            function_body_type *tmp = my_init_body->clone();
            delete my_body;
            my_body = tmp;
        }
    }

    function_body_type *my_body;
    function_body_type *my_init_body;
    virtual broadcast_cache<output_type > &successors() = 0;

};  // function_input


// helper templates to clear the successor edges of the output ports of an multifunction_node
template<int N> struct clear_element {
    template<typename P> static void clear_this(P &p) {
        (void)std::get<N-1>(p).successors().clear();
        clear_element<N-1>::clear_this(p);
    }
#if TBB_USE_ASSERT
    template<typename P> static bool this_empty(P &p) {
        if(std::get<N-1>(p).successors().empty())
            return clear_element<N-1>::this_empty(p);
        return false;
    }
#endif
};

template<> struct clear_element<1> {
    template<typename P> static void clear_this(P &p) {
        (void)std::get<0>(p).successors().clear();
    }
#if TBB_USE_ASSERT
    template<typename P> static bool this_empty(P &p) {
        return std::get<0>(p).successors().empty();
    }
#endif
};

template <typename OutputTuple>
struct init_output_ports {
    template <typename... Args>
    static OutputTuple call(graph& g, const std::tuple<Args...>&) {
        return OutputTuple(Args(g)...);
    }
}; // struct init_output_ports

//! Implements methods for a function node that takes a type Input as input
//  and has a tuple of output ports specified.
template< typename Input, typename OutputPortSet, typename Policy, typename A>
class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > {
public:
    static const int N = std::tuple_size<OutputPortSet>::value;
    typedef Input input_type;
    typedef OutputPortSet output_ports_type;
    typedef multifunction_body<input_type, output_ports_type> multifunction_body_type;
    typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class;
    typedef function_input_base<Input, Policy, A, my_class> base_type;
    typedef function_input_queue<input_type, A> input_queue_type;

    // constructor
    template<typename Body>
    multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority )
      : base_type(g, max_concurrency, a_priority, noexcept(tbb::detail::invoke(body, input_type(), my_output_ports)))
      , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
      , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
      , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){
    }

    //! Copy constructor
    multifunction_input( const multifunction_input& src ) :
        base_type(src),
        my_body( src.my_init_body->clone() ),
        my_init_body(src.my_init_body->clone() ),
        my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) {
    }

    ~multifunction_input() {
        delete my_body;
        delete my_init_body;
    }

    template< typename Body >
    Body copy_function_object() {
        multifunction_body_type &body_ref = *this->my_body;
        return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr());
    }

    // for multifunction nodes we do not have a single successor as such.  So we just tell
    // the task we were successful.
    //TODO: consider moving common parts with implementation in function_input into separate function
    graph_task* apply_body_impl_bypass( const input_type &i ) {
        fgt_begin_body( my_body );
        (*my_body)(i, my_output_ports);
        fgt_end_body( my_body );
        graph_task* ttask = nullptr;
        if(base_type::my_max_concurrency != 0) {
            ttask = base_type::try_get_postponed_task(i);
        }
        return ttask ? ttask : SUCCESSFULLY_ENQUEUED;
    }

    output_ports_type &output_ports(){ return my_output_ports; }

protected:

    void reset(reset_flags f) {
        base_type::reset_function_input_base(f);
        if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports);
        if(f & rf_reset_bodies) {
            multifunction_body_type* tmp = my_init_body->clone();
            delete my_body;
            my_body = tmp;
        }
        __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
    }

    multifunction_body_type *my_body;
    multifunction_body_type *my_init_body;
    output_ports_type my_output_ports;

};  // multifunction_input

// template to refer to an output port of a multifunction_node
template<size_t N, typename MOP>
typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
    return std::get<N>(op.output_ports());
}

inline void check_task_and_spawn(graph& g, graph_task* t) {
    if (t && t != SUCCESSFULLY_ENQUEUED) {
        spawn_in_graph_arena(g, *t);
    }
}

// helper structs for split_node
template<int N>
struct emit_element {
    template<typename T, typename P>
    static graph_task* emit_this(graph& g, const T &t, P &p) {
        // TODO: consider to collect all the tasks in task_list and spawn them all at once
        graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t));
        check_task_and_spawn(g, last_task);
        return emit_element<N-1>::emit_this(g,t,p);
    }
};

template<>
struct emit_element<1> {
    template<typename T, typename P>
    static graph_task* emit_this(graph& g, const T &t, P &p) {
        graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t));
        check_task_and_spawn(g, last_task);
        return SUCCESSFULLY_ENQUEUED;
    }
};

//! Implements methods for an executable node that takes continue_msg as input
template< typename Output, typename Policy>
class continue_input : public continue_receiver {
public:

    //! The input type of this receiver
    typedef continue_msg input_type;

    //! The output type of this receiver
    typedef Output output_type;
    typedef function_body<input_type, output_type> function_body_type;
    typedef continue_input<output_type, Policy> class_type;

    template< typename Body >
    continue_input( graph &g, Body& body, node_priority_t a_priority )
        : continue_receiver(/*number_of_predecessors=*/0, a_priority)
        , my_graph_ref(g)
        , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
        , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
    { }

    template< typename Body >
    continue_input( graph &g, int number_of_predecessors,
                    Body& body, node_priority_t a_priority )
      : continue_receiver( number_of_predecessors, a_priority )
      , my_graph_ref(g)
      , my_body( new function_body_leaf< input_type, output_type, Body>(body) )
      , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) )
    { }

    continue_input( const continue_input& src ) : continue_receiver(src),
                                                  my_graph_ref(src.my_graph_ref),
                                                  my_body( src.my_init_body->clone() ),
                                                  my_init_body( src.my_init_body->clone() ) {}

    ~continue_input() {
        delete my_body;
        delete my_init_body;
    }

    template< typename Body >
    Body copy_function_object() {
        function_body_type &body_ref = *my_body;
        return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
    }

    void reset_receiver( reset_flags f) override {
        continue_receiver::reset_receiver(f);
        if(f & rf_reset_bodies) {
            function_body_type *tmp = my_init_body->clone();
            delete my_body;
            my_body = tmp;
        }
    }

protected:

    graph& my_graph_ref;
    function_body_type *my_body;
    function_body_type *my_init_body;

    virtual broadcast_cache<output_type > &successors() = 0;

    friend class apply_body_task_bypass< class_type, continue_msg >;

    //! Applies the body to the provided input
    graph_task* apply_body_bypass( input_type ) {
        // There is an extra copied needed to capture the
        // body execution without the try_put
        fgt_begin_body( my_body );
        output_type v = (*my_body)( continue_msg() );
        fgt_end_body( my_body );
        return successors().try_put_task( v );
    }

    graph_task* execute() override {
        if(!is_graph_active(my_graph_ref)) {
            return nullptr;
        }
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (push)
#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
#endif
        if(has_policy<lightweight, Policy>::value) {
#if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop)
#endif
            return apply_body_bypass( continue_msg() );
        }
        else {
            small_object_allocator allocator{};
            typedef apply_body_task_bypass<class_type, continue_msg> task_type;
            graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority );
            graph_reference().reserve_wait();
            return t;
        }
    }

    graph& graph_reference() const override {
        return my_graph_ref;
    }
};  // continue_input

//! Implements methods for both executable and function nodes that puts Output to its successors
template< typename Output >
class function_output : public sender<Output> {
public:

    template<int N> friend struct clear_element;
    typedef Output output_type;
    typedef typename sender<output_type>::successor_type successor_type;
    typedef broadcast_cache<output_type> broadcast_cache_type;

    function_output(graph& g) : my_successors(this), my_graph_ref(g) {}
    function_output(const function_output& other) = delete;

    //! Adds a new successor to this node
    bool register_successor( successor_type &r ) override {
        successors().register_successor( r );
        return true;
    }

    //! Removes a successor from this node
    bool remove_successor( successor_type &r ) override {
        successors().remove_successor( r );
        return true;
    }

    broadcast_cache_type &successors() { return my_successors; }

    graph& graph_reference() const { return my_graph_ref; }
protected:
    broadcast_cache_type my_successors;
    graph& my_graph_ref;
};  // function_output

template< typename Output >
class multifunction_output : public function_output<Output> {
public:
    typedef Output output_type;
    typedef function_output<output_type> base_type;
    using base_type::my_successors;

    multifunction_output(graph& g) : base_type(g) {}
    multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {}

    bool try_put(const output_type &i) {
        graph_task *res = try_put_task(i);
        if( !res ) return false;
        if( res != SUCCESSFULLY_ENQUEUED ) {
            // wrapping in task_arena::execute() is not needed since the method is called from
            // inside task::execute()
            spawn_in_graph_arena(graph_reference(), *res);
        }
        return true;
    }

    using base_type::graph_reference;

protected:

    graph_task* try_put_task(const output_type &i) {
        return my_successors.try_put_task(i);
    }

    template <int N> friend struct emit_element;

};  // multifunction_output

//composite_node
template<typename CompositeType>
void add_nodes_impl(CompositeType*, bool) {}

template< typename CompositeType, typename NodeType1, typename... NodeTypes >
void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) {
    void *addr = const_cast<NodeType1 *>(&n1);

    fgt_alias_port(c_node, addr, visible);
    add_nodes_impl(c_node, visible, n...);
}

#endif // __TBB__flow_graph_node_impl_H