xref: /oneTBB/include/oneapi/tbb/parallel_for.h (revision a088cfa0)
149e08aacStbbdev /*
2*a088cfa0SKonstantin Boyarinov     Copyright (c) 2005-2023 Intel Corporation
349e08aacStbbdev 
449e08aacStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
549e08aacStbbdev     you may not use this file except in compliance with the License.
649e08aacStbbdev     You may obtain a copy of the License at
749e08aacStbbdev 
849e08aacStbbdev         http://www.apache.org/licenses/LICENSE-2.0
949e08aacStbbdev 
1049e08aacStbbdev     Unless required by applicable law or agreed to in writing, software
1149e08aacStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1249e08aacStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1349e08aacStbbdev     See the License for the specific language governing permissions and
1449e08aacStbbdev     limitations under the License.
1549e08aacStbbdev */
1649e08aacStbbdev 
1749e08aacStbbdev #ifndef __TBB_parallel_for_H
1849e08aacStbbdev #define __TBB_parallel_for_H
1949e08aacStbbdev 
2049e08aacStbbdev #include "detail/_config.h"
2149e08aacStbbdev #include "detail/_namespace_injection.h"
2249e08aacStbbdev #include "detail/_exception.h"
2349e08aacStbbdev #include "detail/_task.h"
2449e08aacStbbdev #include "detail/_small_object_pool.h"
2549e08aacStbbdev #include "profiling.h"
2649e08aacStbbdev 
2749e08aacStbbdev #include "partitioner.h"
2849e08aacStbbdev #include "blocked_range.h"
2949e08aacStbbdev #include "task_group.h"
3049e08aacStbbdev 
3149e08aacStbbdev #include <cstddef>
3249e08aacStbbdev #include <new>
3349e08aacStbbdev 
3449e08aacStbbdev namespace tbb {
3549e08aacStbbdev namespace detail {
36478de5b1Stbbdev #if __TBB_CPP20_CONCEPTS_PRESENT
37478de5b1Stbbdev inline namespace d0 {
38478de5b1Stbbdev 
39478de5b1Stbbdev template <typename Body, typename Range>
40*a088cfa0SKonstantin Boyarinov concept parallel_for_body = std::copy_constructible<Body> && std::invocable<const std::remove_reference_t<Body>&, Range&>;
41478de5b1Stbbdev 
42478de5b1Stbbdev template <typename Index>
43478de5b1Stbbdev concept parallel_for_index = std::constructible_from<Index, int> &&
44478de5b1Stbbdev                              std::copyable<Index> &&
requires(const std::remove_reference_t<Index> & lhs,const std::remove_reference_t<Index> & rhs)45478de5b1Stbbdev                              requires( const std::remove_reference_t<Index>& lhs, const std::remove_reference_t<Index>& rhs ) {
46478de5b1Stbbdev                                  { lhs < rhs } -> adaptive_same_as<bool>;
47478de5b1Stbbdev                                  { lhs - rhs } -> std::convertible_to<std::size_t>;
48478de5b1Stbbdev                                  { lhs + (rhs - lhs) } -> std::convertible_to<Index>;
49478de5b1Stbbdev                              };
50478de5b1Stbbdev 
51478de5b1Stbbdev template <typename Function, typename Index>
52*a088cfa0SKonstantin Boyarinov concept parallel_for_function = std::invocable<const std::remove_reference_t<Function>&, Index>;
53478de5b1Stbbdev 
54478de5b1Stbbdev } // namespace d0
55478de5b1Stbbdev #endif // __TBB_CPP20_CONCEPTS_PRESENT
5649e08aacStbbdev namespace d1 {
5749e08aacStbbdev 
5849e08aacStbbdev //! Task type used in parallel_for
5949e08aacStbbdev /** @ingroup algorithms */
6049e08aacStbbdev template<typename Range, typename Body, typename Partitioner>
6149e08aacStbbdev struct start_for : public task {
6249e08aacStbbdev     Range my_range;
6349e08aacStbbdev     const Body my_body;
6449e08aacStbbdev     node* my_parent;
6549e08aacStbbdev 
6649e08aacStbbdev     typename Partitioner::task_partition_type my_partition;
6749e08aacStbbdev     small_object_allocator my_allocator;
6849e08aacStbbdev 
6949e08aacStbbdev     task* execute(execution_data&) override;
7049e08aacStbbdev     task* cancel(execution_data&) override;
7149e08aacStbbdev     void finalize(const execution_data&);
7249e08aacStbbdev 
7349e08aacStbbdev     //! Constructor for root task.
start_forstart_for7449e08aacStbbdev     start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
7549e08aacStbbdev         my_range(range),
7649e08aacStbbdev         my_body(body),
77f2af7473Skboyarinov         my_parent(nullptr),
7849e08aacStbbdev         my_partition(partitioner),
7949e08aacStbbdev         my_allocator(alloc) {}
8049e08aacStbbdev     //! Splitting constructor used to generate children.
8149e08aacStbbdev     /** parent_ becomes left child.  Newly constructed object is right child. */
start_forstart_for8249e08aacStbbdev     start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
8349e08aacStbbdev         my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
8449e08aacStbbdev         my_body(parent_.my_body),
85f2af7473Skboyarinov         my_parent(nullptr),
8649e08aacStbbdev         my_partition(parent_.my_partition, split_obj),
8749e08aacStbbdev         my_allocator(alloc) {}
8849e08aacStbbdev     //! Construct right child from the given range as response to the demand.
8949e08aacStbbdev     /** parent_ remains left child.  Newly constructed object is right child. */
start_forstart_for9049e08aacStbbdev     start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
9149e08aacStbbdev         my_range(r),
9249e08aacStbbdev         my_body(parent_.my_body),
93f2af7473Skboyarinov         my_parent(nullptr),
9449e08aacStbbdev         my_partition(parent_.my_partition, split()),
9549e08aacStbbdev         my_allocator(alloc)
9649e08aacStbbdev     {
9749e08aacStbbdev         my_partition.align_depth( d );
9849e08aacStbbdev     }
runstart_for9949e08aacStbbdev     static void run(const Range& range, const Body& body, Partitioner& partitioner) {
10049e08aacStbbdev         task_group_context context(PARALLEL_FOR);
10149e08aacStbbdev         run(range, body, partitioner, context);
10249e08aacStbbdev     }
10349e08aacStbbdev 
runstart_for10449e08aacStbbdev     static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) {
10549e08aacStbbdev         if ( !range.empty() ) {
10649e08aacStbbdev             small_object_allocator alloc{};
10749e08aacStbbdev             start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc);
108b15aabb3Stbbdev 
109b15aabb3Stbbdev             // defer creation of the wait node until task allocation succeeds
110b15aabb3Stbbdev             wait_node wn;
11149e08aacStbbdev             for_task.my_parent = &wn;
11249e08aacStbbdev             execute_and_wait(for_task, context, wn.m_wait, context);
11349e08aacStbbdev         }
11449e08aacStbbdev     }
11549e08aacStbbdev     //! Run body for range, serves as callback for partitioner
run_bodystart_for11649e08aacStbbdev     void run_body( Range &r ) {
117*a088cfa0SKonstantin Boyarinov         tbb::detail::invoke(my_body, r);
11849e08aacStbbdev     }
11949e08aacStbbdev 
12049e08aacStbbdev     //! spawn right task, serves as callback for partitioner
offer_workstart_for12149e08aacStbbdev     void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
12249e08aacStbbdev        offer_work_impl(ed, *this, split_obj);
12349e08aacStbbdev     }
12449e08aacStbbdev 
12549e08aacStbbdev     //! spawn right task, serves as callback for partitioner
offer_workstart_for12649e08aacStbbdev     void offer_work(const Range& r, depth_t d, execution_data& ed) {
12749e08aacStbbdev         offer_work_impl(ed, *this, r, d);
12849e08aacStbbdev     }
12949e08aacStbbdev 
13049e08aacStbbdev private:
13149e08aacStbbdev     template <typename... Args>
offer_work_implstart_for13249e08aacStbbdev     void offer_work_impl(execution_data& ed, Args&&... constructor_args) {
13349e08aacStbbdev         // New right child
13449e08aacStbbdev         small_object_allocator alloc{};
13549e08aacStbbdev         start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc);
13649e08aacStbbdev 
13749e08aacStbbdev         // New root node as a continuation and ref count. Left and right child attach to the new parent.
13849e08aacStbbdev         right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc);
13949e08aacStbbdev         // Spawn the right sibling
14049e08aacStbbdev         right_child.spawn_self(ed);
14149e08aacStbbdev     }
14249e08aacStbbdev 
spawn_selfstart_for14349e08aacStbbdev     void spawn_self(execution_data& ed) {
14449e08aacStbbdev         my_partition.spawn_task(*this, *context(ed));
14549e08aacStbbdev     }
14649e08aacStbbdev };
14749e08aacStbbdev 
14849e08aacStbbdev //! fold the tree and deallocate the task
14949e08aacStbbdev template<typename Range, typename Body, typename Partitioner>
finalize(const execution_data & ed)15049e08aacStbbdev void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) {
15149e08aacStbbdev     // Get the current parent and allocator an object destruction
15249e08aacStbbdev     node* parent = my_parent;
15349e08aacStbbdev     auto allocator = my_allocator;
15449e08aacStbbdev     // Task execution finished - destroy it
15549e08aacStbbdev     this->~start_for();
15649e08aacStbbdev     // Unwind the tree decrementing the parent`s reference count
15749e08aacStbbdev 
15849e08aacStbbdev     fold_tree<tree_node>(parent, ed);
15949e08aacStbbdev     allocator.deallocate(this, ed);
16049e08aacStbbdev 
16149e08aacStbbdev }
16249e08aacStbbdev 
16349e08aacStbbdev //! execute task for parallel_for
16449e08aacStbbdev template<typename Range, typename Body, typename Partitioner>
execute(execution_data & ed)16549e08aacStbbdev task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) {
16649e08aacStbbdev     if (!is_same_affinity(ed)) {
16749e08aacStbbdev         my_partition.note_affinity(execution_slot(ed));
16849e08aacStbbdev     }
16949e08aacStbbdev     my_partition.check_being_stolen(*this, ed);
17049e08aacStbbdev     my_partition.execute(*this, my_range, ed);
17149e08aacStbbdev     finalize(ed);
17249e08aacStbbdev     return nullptr;
17349e08aacStbbdev }
17449e08aacStbbdev 
17549e08aacStbbdev //! cancel task for parallel_for
17649e08aacStbbdev template<typename Range, typename Body, typename Partitioner>
cancel(execution_data & ed)17749e08aacStbbdev task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) {
17849e08aacStbbdev     finalize(ed);
17949e08aacStbbdev     return nullptr;
18049e08aacStbbdev }
18149e08aacStbbdev 
18249e08aacStbbdev //! Calls the function with values from range [begin, end) with a step provided
18349e08aacStbbdev template<typename Function, typename Index>
184478de5b1Stbbdev class parallel_for_body_wrapper : detail::no_assign {
18549e08aacStbbdev     const Function &my_func;
18649e08aacStbbdev     const Index my_begin;
18749e08aacStbbdev     const Index my_step;
18849e08aacStbbdev public:
parallel_for_body_wrapper(const Function & _func,Index & _begin,Index & _step)189478de5b1Stbbdev     parallel_for_body_wrapper( const Function& _func, Index& _begin, Index& _step )
19049e08aacStbbdev         : my_func(_func), my_begin(_begin), my_step(_step) {}
19149e08aacStbbdev 
operator()19249e08aacStbbdev     void operator()( const blocked_range<Index>& r ) const {
19349e08aacStbbdev         // A set of local variables to help the compiler with vectorization of the following loop.
19449e08aacStbbdev         Index b = r.begin();
19549e08aacStbbdev         Index e = r.end();
19649e08aacStbbdev         Index ms = my_step;
19749e08aacStbbdev         Index k = my_begin + b*ms;
19849e08aacStbbdev 
19949e08aacStbbdev #if __INTEL_COMPILER
20049e08aacStbbdev #pragma ivdep
20149e08aacStbbdev #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
20249e08aacStbbdev #pragma vector always assert
20349e08aacStbbdev #endif
20449e08aacStbbdev #endif
20549e08aacStbbdev         for ( Index i = b; i < e; ++i, k += ms ) {
206*a088cfa0SKonstantin Boyarinov             tbb::detail::invoke(my_func, k);
20749e08aacStbbdev         }
20849e08aacStbbdev     }
20949e08aacStbbdev };
21049e08aacStbbdev 
21149e08aacStbbdev // Requirements on Range concept are documented in blocked_range.h
21249e08aacStbbdev 
21349e08aacStbbdev /** \page parallel_for_body_req Requirements on parallel_for body
21449e08aacStbbdev     Class \c Body implementing the concept of parallel_for body must define:
21549e08aacStbbdev     - \code Body::Body( const Body& ); \endcode                 Copy constructor
21649e08aacStbbdev     - \code Body::~Body(); \endcode                             Destructor
21749e08aacStbbdev     - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r.
21849e08aacStbbdev **/
21949e08aacStbbdev 
22049e08aacStbbdev /** \name parallel_for
22149e08aacStbbdev     See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
22249e08aacStbbdev //@{
22349e08aacStbbdev 
22449e08aacStbbdev //! Parallel iteration over range with default partitioner.
22549e08aacStbbdev /** @ingroup algorithms **/
22649e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2274a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
22849e08aacStbbdev void parallel_for( const Range& range, const Body& body ) {
22949e08aacStbbdev     start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
23049e08aacStbbdev }
23149e08aacStbbdev 
23249e08aacStbbdev //! Parallel iteration over range with simple partitioner.
23349e08aacStbbdev /** @ingroup algorithms **/
23449e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2354a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
23649e08aacStbbdev void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
23749e08aacStbbdev     start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
23849e08aacStbbdev }
23949e08aacStbbdev 
24049e08aacStbbdev //! Parallel iteration over range with auto_partitioner.
24149e08aacStbbdev /** @ingroup algorithms **/
24249e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2434a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
24449e08aacStbbdev void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
24549e08aacStbbdev     start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
24649e08aacStbbdev }
24749e08aacStbbdev 
24849e08aacStbbdev //! Parallel iteration over range with static_partitioner.
24949e08aacStbbdev /** @ingroup algorithms **/
25049e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2514a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
25249e08aacStbbdev void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
25349e08aacStbbdev     start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
25449e08aacStbbdev }
25549e08aacStbbdev 
25649e08aacStbbdev //! Parallel iteration over range with affinity_partitioner.
25749e08aacStbbdev /** @ingroup algorithms **/
25849e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2594a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
26049e08aacStbbdev void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
26149e08aacStbbdev     start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
26249e08aacStbbdev }
26349e08aacStbbdev 
26449e08aacStbbdev //! Parallel iteration over range with default partitioner and user-supplied context.
26549e08aacStbbdev /** @ingroup algorithms **/
26649e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2674a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
26849e08aacStbbdev void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
26949e08aacStbbdev     start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
27049e08aacStbbdev }
27149e08aacStbbdev 
27249e08aacStbbdev //! Parallel iteration over range with simple partitioner and user-supplied context.
27349e08aacStbbdev /** @ingroup algorithms **/
27449e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2754a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
27649e08aacStbbdev void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
27749e08aacStbbdev     start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
27849e08aacStbbdev }
27949e08aacStbbdev 
28049e08aacStbbdev //! Parallel iteration over range with auto_partitioner and user-supplied context.
28149e08aacStbbdev /** @ingroup algorithms **/
28249e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2834a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
28449e08aacStbbdev void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
28549e08aacStbbdev     start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
28649e08aacStbbdev }
28749e08aacStbbdev 
28849e08aacStbbdev //! Parallel iteration over range with static_partitioner and user-supplied context.
28949e08aacStbbdev /** @ingroup algorithms **/
29049e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2914a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
29249e08aacStbbdev void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
29349e08aacStbbdev     start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
29449e08aacStbbdev }
29549e08aacStbbdev 
29649e08aacStbbdev //! Parallel iteration over range with affinity_partitioner and user-supplied context.
29749e08aacStbbdev /** @ingroup algorithms **/
29849e08aacStbbdev template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_for_body<Body,Range>)2994a23d002Skboyarinov     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
30049e08aacStbbdev void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
30149e08aacStbbdev     start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
30249e08aacStbbdev }
30349e08aacStbbdev 
30449e08aacStbbdev //! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
30549e08aacStbbdev template <typename Index, typename Function, typename Partitioner>
parallel_for_impl(Index first,Index last,Index step,const Function & f,Partitioner & partitioner)30649e08aacStbbdev void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
30749e08aacStbbdev     if (step <= 0 )
30849e08aacStbbdev         throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument
3094a23d002Skboyarinov     else if (first < last) {
31049e08aacStbbdev         // Above "else" avoids "potential divide by zero" warning on some platforms
311*a088cfa0SKonstantin Boyarinov         Index end = Index(last - first - 1ul) / step + Index(1);
31249e08aacStbbdev         blocked_range<Index> range(static_cast<Index>(0), end);
313478de5b1Stbbdev         parallel_for_body_wrapper<Function, Index> body(f, first, step);
31449e08aacStbbdev         parallel_for(range, body, partitioner);
31549e08aacStbbdev     }
31649e08aacStbbdev }
31749e08aacStbbdev 
31849e08aacStbbdev //! Parallel iteration over a range of integers with a step provided and default partitioner
31949e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)320478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
32149e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f) {
32249e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
32349e08aacStbbdev }
32449e08aacStbbdev //! Parallel iteration over a range of integers with a step provided and simple partitioner
32549e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)326478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
32749e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
32849e08aacStbbdev     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
32949e08aacStbbdev }
33049e08aacStbbdev //! Parallel iteration over a range of integers with a step provided and auto partitioner
33149e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)332478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
33349e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
33449e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
33549e08aacStbbdev }
33649e08aacStbbdev //! Parallel iteration over a range of integers with a step provided and static partitioner
33749e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)338478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
33949e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
34049e08aacStbbdev     parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
34149e08aacStbbdev }
34249e08aacStbbdev //! Parallel iteration over a range of integers with a step provided and affinity partitioner
34349e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)344478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
34549e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
34649e08aacStbbdev     parallel_for_impl(first, last, step, f, partitioner);
34749e08aacStbbdev }
34849e08aacStbbdev 
34949e08aacStbbdev //! Parallel iteration over a range of integers with a default step value and default partitioner
35049e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)351478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
35249e08aacStbbdev void parallel_for(Index first, Index last, const Function& f) {
35349e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
35449e08aacStbbdev }
35549e08aacStbbdev //! Parallel iteration over a range of integers with a default step value and simple partitioner
35649e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)357478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
35849e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
35949e08aacStbbdev     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
36049e08aacStbbdev }
36149e08aacStbbdev //! Parallel iteration over a range of integers with a default step value and auto partitioner
36249e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)363478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
36449e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
36549e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
36649e08aacStbbdev }
36749e08aacStbbdev //! Parallel iteration over a range of integers with a default step value and static partitioner
36849e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)369478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
37049e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
37149e08aacStbbdev     parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
37249e08aacStbbdev }
37349e08aacStbbdev //! Parallel iteration over a range of integers with a default step value and affinity partitioner
37449e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)375478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
37649e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
37749e08aacStbbdev     parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
37849e08aacStbbdev }
37949e08aacStbbdev 
38049e08aacStbbdev //! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
38149e08aacStbbdev template <typename Index, typename Function, typename Partitioner>
parallel_for_impl(Index first,Index last,Index step,const Function & f,Partitioner & partitioner,task_group_context & context)38249e08aacStbbdev void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) {
38349e08aacStbbdev     if (step <= 0 )
38449e08aacStbbdev         throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument
385478de5b1Stbbdev     else if (first < last) {
38649e08aacStbbdev         // Above "else" avoids "potential divide by zero" warning on some platforms
38749e08aacStbbdev         Index end = (last - first - Index(1)) / step + Index(1);
38849e08aacStbbdev         blocked_range<Index> range(static_cast<Index>(0), end);
389478de5b1Stbbdev         parallel_for_body_wrapper<Function, Index> body(f, first, step);
39049e08aacStbbdev         parallel_for(range, body, partitioner, context);
39149e08aacStbbdev     }
39249e08aacStbbdev }
39349e08aacStbbdev 
39449e08aacStbbdev //! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
39549e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)396478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
39749e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) {
39849e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
39949e08aacStbbdev }
40049e08aacStbbdev //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
40149e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)402478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
40349e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
40449e08aacStbbdev     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
40549e08aacStbbdev }
40649e08aacStbbdev //! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
40749e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)408478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
40949e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
41049e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
41149e08aacStbbdev }
41249e08aacStbbdev //! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
41349e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)414478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
41549e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
41649e08aacStbbdev     parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
41749e08aacStbbdev }
41849e08aacStbbdev //! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
41949e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)420478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
42149e08aacStbbdev void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
42249e08aacStbbdev     parallel_for_impl(first, last, step, f, partitioner, context);
42349e08aacStbbdev }
42449e08aacStbbdev 
42549e08aacStbbdev //! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
42649e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)427478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
42849e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, task_group_context &context) {
42949e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
43049e08aacStbbdev }
43149e08aacStbbdev //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
43249e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)433478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
43449e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
43549e08aacStbbdev     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
43649e08aacStbbdev }
43749e08aacStbbdev //! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
43849e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)439478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
44049e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
44149e08aacStbbdev     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
44249e08aacStbbdev }
44349e08aacStbbdev //! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
44449e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)445478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
44649e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
44749e08aacStbbdev     parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
44849e08aacStbbdev }
44949e08aacStbbdev //! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
45049e08aacStbbdev template <typename Index, typename Function>
__TBB_requires(parallel_for_index<Index> && parallel_for_function<Function,Index>)451478de5b1Stbbdev     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
45249e08aacStbbdev void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
45349e08aacStbbdev     parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
45449e08aacStbbdev }
45549e08aacStbbdev // @}
45649e08aacStbbdev 
45749e08aacStbbdev } // namespace d1
45849e08aacStbbdev } // namespace detail
45949e08aacStbbdev 
46049e08aacStbbdev inline namespace v1 {
46149e08aacStbbdev using detail::d1::parallel_for;
46249e08aacStbbdev // Split types
46349e08aacStbbdev using detail::split;
46449e08aacStbbdev using detail::proportional_split;
46549e08aacStbbdev } // namespace v1
46649e08aacStbbdev 
46749e08aacStbbdev } // namespace tbb
46849e08aacStbbdev 
46949e08aacStbbdev #endif /* __TBB_parallel_for_H */
470