1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #ifndef __TBB_parallel_reduce_H
18 #define __TBB_parallel_reduce_H
19
20 #include <new>
21 #include "detail/_namespace_injection.h"
22 #include "detail/_task.h"
23 #include "detail/_aligned_space.h"
24 #include "detail/_small_object_pool.h"
25 #include "detail/_range_common.h"
26
27 #include "task_group.h" // task_group_context
28 #include "partitioner.h"
29 #include "profiling.h"
30
31 namespace tbb {
32 namespace detail {
33 #if __TBB_CPP20_CONCEPTS_PRESENT
34 inline namespace d0 {
35
36 template <typename Body, typename Range>
37 concept parallel_reduce_body = splittable<Body> &&
requires(Body & body,const Range & range,Body & rhs)38 requires( Body& body, const Range& range, Body& rhs ) {
39 body(range);
40 body.join(rhs);
41 };
42
43 template <typename Function, typename Range, typename Value>
44 concept parallel_reduce_function = std::invocable<const std::remove_reference_t<Function>&,
45 const Range&, const Value&> &&
46 std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Function>&,
47 const Range&, const Value&>,
48 Value>;
49
50 template <typename Combine, typename Value>
51 concept parallel_reduce_combine = std::invocable<const std::remove_reference_t<Combine>&,
52 const Value&, const Value&> &&
53 std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Combine>&,
54 const Value&, const Value&>,
55 Value>;
56
57 } // namespace d0
58 #endif // __TBB_CPP20_CONCEPTS_PRESENT
59 namespace d1 {
60
61 //! Tree node type for parallel_reduce.
62 /** @ingroup algorithms */
63 //TODO: consider folding tree via bypass execution(instead of manual folding)
64 // for better cancellation and critical tasks handling (performance measurements required).
65 template<typename Body>
66 struct reduction_tree_node : public tree_node {
67 tbb::detail::aligned_space<Body> zombie_space;
68 Body& left_body;
69 bool has_right_zombie{false};
70
reduction_tree_nodereduction_tree_node71 reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
72 tree_node{parent, ref_count, alloc},
73 left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */
74 {}
75
joinreduction_tree_node76 void join(task_group_context* context) {
77 if (has_right_zombie && !context->is_group_execution_cancelled())
78 left_body.join(*zombie_space.begin());
79 }
80
~reduction_tree_nodereduction_tree_node81 ~reduction_tree_node() {
82 if( has_right_zombie ) zombie_space.begin()->~Body();
83 }
84 };
85
86 //! Task type used to split the work of parallel_reduce.
87 /** @ingroup algorithms */
88 template<typename Range, typename Body, typename Partitioner>
89 struct start_reduce : public task {
90 Range my_range;
91 Body* my_body;
92 node* my_parent;
93
94 typename Partitioner::task_partition_type my_partition;
95 small_object_allocator my_allocator;
96 bool is_right_child;
97
98 task* execute(execution_data&) override;
99 task* cancel(execution_data&) override;
100 void finalize(const execution_data&);
101
102 using tree_node_type = reduction_tree_node<Body>;
103
104 //! Constructor reduce root task.
start_reducestart_reduce105 start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
106 my_range(range),
107 my_body(&body),
108 my_parent(nullptr),
109 my_partition(partitioner),
110 my_allocator(alloc),
111 is_right_child(false) {}
112 //! Splitting constructor used to generate children.
113 /** parent_ becomes left child. Newly constructed object is right child. */
start_reducestart_reduce114 start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
115 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
116 my_body(parent_.my_body),
117 my_parent(nullptr),
118 my_partition(parent_.my_partition, split_obj),
119 my_allocator(alloc),
120 is_right_child(true)
121 {
122 parent_.is_right_child = false;
123 }
124 //! Construct right child from the given range as response to the demand.
125 /** parent_ remains left child. Newly constructed object is right child. */
start_reducestart_reduce126 start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
127 my_range(r),
128 my_body(parent_.my_body),
129 my_parent(nullptr),
130 my_partition(parent_.my_partition, split()),
131 my_allocator(alloc),
132 is_right_child(true)
133 {
134 my_partition.align_depth( d );
135 parent_.is_right_child = false;
136 }
runstart_reduce137 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
138 if ( !range.empty() ) {
139 wait_node wn;
140 small_object_allocator alloc{};
141 auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc);
142 reduce_task->my_parent = &wn;
143 execute_and_wait(*reduce_task, context, wn.m_wait, context);
144 }
145 }
runstart_reduce146 static void run(const Range& range, Body& body, Partitioner& partitioner) {
147 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
148 // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block.
149 task_group_context context(PARALLEL_REDUCE);
150 run(range, body, partitioner, context);
151 }
152 //! Run body for range, serves as callback for partitioner
run_bodystart_reduce153 void run_body( Range &r ) {
154 tbb::detail::invoke(*my_body, r);
155 }
156
157 //! spawn right task, serves as callback for partitioner
offer_workstart_reduce158 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
159 offer_work_impl(ed, *this, split_obj);
160 }
161 //! spawn right task, serves as callback for partitioner
offer_workstart_reduce162 void offer_work(const Range& r, depth_t d, execution_data& ed) {
163 offer_work_impl(ed, *this, r, d);
164 }
165
166 private:
167 template <typename... Args>
offer_work_implstart_reduce168 void offer_work_impl(execution_data& ed, Args&&... args) {
169 small_object_allocator alloc{};
170 // New right child
171 auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc);
172
173 // New root node as a continuation and ref count. Left and right child attach to the new parent.
174 right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc);
175
176 // Spawn the right sibling
177 right_child->spawn_self(ed);
178 }
179
spawn_selfstart_reduce180 void spawn_self(execution_data& ed) {
181 my_partition.spawn_task(*this, *context(ed));
182 }
183 };
184
185 //! fold the tree and deallocate the task
186 template<typename Range, typename Body, typename Partitioner>
finalize(const execution_data & ed)187 void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
188 // Get the current parent and wait object before an object destruction
189 node* parent = my_parent;
190 auto allocator = my_allocator;
191 // Task execution finished - destroy it
192 this->~start_reduce();
193 // Unwind the tree decrementing the parent`s reference count
194 fold_tree<tree_node_type>(parent, ed);
195 allocator.deallocate(this, ed);
196 }
197
198 //! Execute parallel_reduce task
199 template<typename Range, typename Body, typename Partitioner>
execute(execution_data & ed)200 task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
201 if (!is_same_affinity(ed)) {
202 my_partition.note_affinity(execution_slot(ed));
203 }
204 my_partition.check_being_stolen(*this, ed);
205
206 // The acquire barrier synchronizes the data pointed with my_body if the left
207 // task has already finished.
208 __TBB_ASSERT(my_parent, nullptr);
209 if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) {
210 tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent);
211 my_body = static_cast<Body*>(new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()));
212 parent_ptr->has_right_zombie = true;
213 }
214 __TBB_ASSERT(my_body != nullptr, "Incorrect body value");
215
216 my_partition.execute(*this, my_range, ed);
217
218 finalize(ed);
219 return nullptr;
220 }
221
222 //! Cancel parallel_reduce task
223 template<typename Range, typename Body, typename Partitioner>
cancel(execution_data & ed)224 task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
225 finalize(ed);
226 return nullptr;
227 }
228
229 //! Tree node type for parallel_deterministic_reduce.
230 /** @ingroup algorithms */
231 template<typename Body>
232 struct deterministic_reduction_tree_node : public tree_node {
233 Body right_body;
234 Body& left_body;
235
deterministic_reduction_tree_nodedeterministic_reduction_tree_node236 deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
237 tree_node{parent, ref_count, alloc},
238 right_body{input_left_body, detail::split()},
239 left_body(input_left_body)
240 {}
241
joindeterministic_reduction_tree_node242 void join(task_group_context* context) {
243 if (!context->is_group_execution_cancelled())
244 left_body.join(right_body);
245 }
246 };
247
248 //! Task type used to split the work of parallel_deterministic_reduce.
249 /** @ingroup algorithms */
250 template<typename Range, typename Body, typename Partitioner>
251 struct start_deterministic_reduce : public task {
252 Range my_range;
253 Body& my_body;
254 node* my_parent;
255
256 typename Partitioner::task_partition_type my_partition;
257 small_object_allocator my_allocator;
258
259 task* execute(execution_data&) override;
260 task* cancel(execution_data&) override;
261 void finalize(const execution_data&);
262
263 using tree_node_type = deterministic_reduction_tree_node<Body>;
264
265 //! Constructor deterministic_reduce root task.
start_deterministic_reducestart_deterministic_reduce266 start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) :
267 my_range(range),
268 my_body(body),
269 my_parent(nullptr),
270 my_partition(partitioner),
271 my_allocator(alloc) {}
272 //! Splitting constructor used to generate children.
273 /** parent_ becomes left child. Newly constructed object is right child. */
start_deterministic_reducestart_deterministic_reduce274 start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body,
275 small_object_allocator& alloc ) :
276 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
277 my_body(body),
278 my_parent(nullptr),
279 my_partition(parent_.my_partition, split_obj),
280 my_allocator(alloc) {}
runstart_deterministic_reduce281 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
282 if ( !range.empty() ) {
283 wait_node wn;
284 small_object_allocator alloc{};
285 auto deterministic_reduce_task =
286 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc);
287 deterministic_reduce_task->my_parent = &wn;
288 execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context);
289 }
290 }
runstart_deterministic_reduce291 static void run(const Range& range, Body& body, Partitioner& partitioner) {
292 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
293 // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce
294 // in the try-block.
295 task_group_context context(PARALLEL_REDUCE);
296 run(range, body, partitioner, context);
297 }
298 //! Run body for range, serves as callback for partitioner
run_bodystart_deterministic_reduce299 void run_body( Range &r ) {
300 tbb::detail::invoke(my_body, r);
301 }
302 //! Spawn right task, serves as callback for partitioner
offer_workstart_deterministic_reduce303 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
304 offer_work_impl(ed, *this, split_obj);
305 }
306 private:
307 template <typename... Args>
offer_work_implstart_deterministic_reduce308 void offer_work_impl(execution_data& ed, Args&&... args) {
309 small_object_allocator alloc{};
310 // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body.
311 auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc);
312
313 // New right child
314 auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc);
315
316 right_child->my_parent = my_parent = new_tree_node;
317
318 // Spawn the right sibling
319 right_child->spawn_self(ed);
320 }
321
spawn_selfstart_deterministic_reduce322 void spawn_self(execution_data& ed) {
323 my_partition.spawn_task(*this, *context(ed));
324 }
325 };
326
327 //! Fold the tree and deallocate the task
328 template<typename Range, typename Body, typename Partitioner>
finalize(const execution_data & ed)329 void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
330 // Get the current parent and wait object before an object destruction
331 node* parent = my_parent;
332
333 auto allocator = my_allocator;
334 // Task execution finished - destroy it
335 this->~start_deterministic_reduce();
336 // Unwind the tree decrementing the parent`s reference count
337 fold_tree<tree_node_type>(parent, ed);
338 allocator.deallocate(this, ed);
339 }
340
341 //! Execute parallel_deterministic_reduce task
342 template<typename Range, typename Body, typename Partitioner>
execute(execution_data & ed)343 task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
344 if (!is_same_affinity(ed)) {
345 my_partition.note_affinity(execution_slot(ed));
346 }
347 my_partition.check_being_stolen(*this, ed);
348
349 my_partition.execute(*this, my_range, ed);
350
351 finalize(ed);
352 return nullptr;
353 }
354
355 //! Cancel parallel_deterministic_reduce task
356 template<typename Range, typename Body, typename Partitioner>
cancel(execution_data & ed)357 task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
358 finalize(ed);
359 return nullptr;
360 }
361
362
363 //! Auxiliary class for parallel_reduce; for internal use only.
364 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
365 using given \ref parallel_reduce_lambda_req "anonymous function objects".
366 **/
367 /** @ingroup algorithms */
368 template<typename Range, typename Value, typename RealBody, typename Reduction>
369 class lambda_reduce_body {
370 //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced
371 // (might require some performance measurements)
372
373 const Value& my_identity_element;
374 const RealBody& my_real_body;
375 const Reduction& my_reduction;
376 Value my_value;
377 lambda_reduce_body& operator= ( const lambda_reduce_body& other );
378 public:
lambda_reduce_body(const Value & identity,const RealBody & body,const Reduction & reduction)379 lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
380 : my_identity_element(identity)
381 , my_real_body(body)
382 , my_reduction(reduction)
383 , my_value(identity)
384 { }
385 lambda_reduce_body( const lambda_reduce_body& other ) = default;
lambda_reduce_body(lambda_reduce_body & other,tbb::split)386 lambda_reduce_body( lambda_reduce_body& other, tbb::split )
387 : my_identity_element(other.my_identity_element)
388 , my_real_body(other.my_real_body)
389 , my_reduction(other.my_reduction)
390 , my_value(other.my_identity_element)
391 { }
operator()392 void operator()(Range& range) {
393 my_value = tbb::detail::invoke(my_real_body, range, const_cast<const Value&>(my_value));
394 }
join(lambda_reduce_body & rhs)395 void join( lambda_reduce_body& rhs ) {
396 my_value = tbb::detail::invoke(my_reduction, const_cast<const Value&>(my_value),
397 const_cast<const Value&>(rhs.my_value));
398 }
result()399 Value result() const {
400 return my_value;
401 }
402 };
403
404
405 // Requirements on Range concept are documented in blocked_range.h
406
407 /** \page parallel_reduce_body_req Requirements on parallel_reduce body
408 Class \c Body implementing the concept of parallel_reduce body must define:
409 - \code Body::Body( Body&, split ); \endcode Splitting constructor.
410 Must be able to run concurrently with operator() and method \c join
411 - \code Body::~Body(); \endcode Destructor
412 - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
413 and accumulating the result
414 - \code void Body::join( Body& b ); \endcode Join results.
415 The result in \c b should be merged into the result of \c this
416 **/
417
418 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
419 TO BE DOCUMENTED
420 **/
421
422 /** \name parallel_reduce
423 See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
424 //@{
425
426 //! Parallel iteration with reduction and default partitioner.
427 /** @ingroup algorithms **/
428 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)429 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
430 void parallel_reduce( const Range& range, Body& body ) {
431 start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
432 }
433
434 //! Parallel iteration with reduction and simple_partitioner
435 /** @ingroup algorithms **/
436 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)437 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
438 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
439 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
440 }
441
442 //! Parallel iteration with reduction and auto_partitioner
443 /** @ingroup algorithms **/
444 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)445 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
446 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
447 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
448 }
449
450 //! Parallel iteration with reduction and static_partitioner
451 /** @ingroup algorithms **/
452 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)453 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
454 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
455 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
456 }
457
458 //! Parallel iteration with reduction and affinity_partitioner
459 /** @ingroup algorithms **/
460 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)461 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
462 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
463 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
464 }
465
466 //! Parallel iteration with reduction, default partitioner and user-supplied context.
467 /** @ingroup algorithms **/
468 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)469 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
470 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
471 start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
472 }
473
474 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
475 /** @ingroup algorithms **/
476 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)477 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
478 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
479 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
480 }
481
482 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
483 /** @ingroup algorithms **/
484 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)485 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
486 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
487 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
488 }
489
490 //! Parallel iteration with reduction, static_partitioner and user-supplied context
491 /** @ingroup algorithms **/
492 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)493 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
494 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
495 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
496 }
497
498 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
499 /** @ingroup algorithms **/
500 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)501 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
502 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
503 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
504 }
505 /** parallel_reduce overloads that work with anonymous function objects
506 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
507
508 //! Parallel iteration with reduction and default partitioner.
509 /** @ingroup algorithms **/
510 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)511 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
512 parallel_reduce_combine<Reduction, Value>)
513 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
514 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
515 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
516 ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
517 return body.result();
518 }
519
520 //! Parallel iteration with reduction and simple_partitioner.
521 /** @ingroup algorithms **/
522 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)523 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
524 parallel_reduce_combine<Reduction, Value>)
525 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
526 const simple_partitioner& partitioner ) {
527 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
528 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
529 ::run(range, body, partitioner );
530 return body.result();
531 }
532
533 //! Parallel iteration with reduction and auto_partitioner
534 /** @ingroup algorithms **/
535 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)536 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
537 parallel_reduce_combine<Reduction, Value>)
538 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
539 const auto_partitioner& partitioner ) {
540 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
541 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
542 ::run( range, body, partitioner );
543 return body.result();
544 }
545
546 //! Parallel iteration with reduction and static_partitioner
547 /** @ingroup algorithms **/
548 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)549 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
550 parallel_reduce_combine<Reduction, Value>)
551 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
552 const static_partitioner& partitioner ) {
553 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
554 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
555 ::run( range, body, partitioner );
556 return body.result();
557 }
558
559 //! Parallel iteration with reduction and affinity_partitioner
560 /** @ingroup algorithms **/
561 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)562 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
563 parallel_reduce_combine<Reduction, Value>)
564 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
565 affinity_partitioner& partitioner ) {
566 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
567 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
568 ::run( range, body, partitioner );
569 return body.result();
570 }
571
572 //! Parallel iteration with reduction, default partitioner and user-supplied context.
573 /** @ingroup algorithms **/
574 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)575 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
576 parallel_reduce_combine<Reduction, Value>)
577 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
578 task_group_context& context ) {
579 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
580 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
581 ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
582 return body.result();
583 }
584
585 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
586 /** @ingroup algorithms **/
587 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)588 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
589 parallel_reduce_combine<Reduction, Value>)
590 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
591 const simple_partitioner& partitioner, task_group_context& context ) {
592 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
593 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
594 ::run( range, body, partitioner, context );
595 return body.result();
596 }
597
598 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
599 /** @ingroup algorithms **/
600 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)601 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
602 parallel_reduce_combine<Reduction, Value>)
603 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
604 const auto_partitioner& partitioner, task_group_context& context ) {
605 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
606 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
607 ::run( range, body, partitioner, context );
608 return body.result();
609 }
610
611 //! Parallel iteration with reduction, static_partitioner and user-supplied context
612 /** @ingroup algorithms **/
613 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)614 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
615 parallel_reduce_combine<Reduction, Value>)
616 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
617 const static_partitioner& partitioner, task_group_context& context ) {
618 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
619 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
620 ::run( range, body, partitioner, context );
621 return body.result();
622 }
623
624 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
625 /** @ingroup algorithms **/
626 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)627 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
628 parallel_reduce_combine<Reduction, Value>)
629 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
630 affinity_partitioner& partitioner, task_group_context& context ) {
631 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
632 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
633 ::run( range, body, partitioner, context );
634 return body.result();
635 }
636
637 //! Parallel iteration with deterministic reduction and default simple partitioner.
638 /** @ingroup algorithms **/
639 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)640 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
641 void parallel_deterministic_reduce( const Range& range, Body& body ) {
642 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
643 }
644
645 //! Parallel iteration with deterministic reduction and simple partitioner.
646 /** @ingroup algorithms **/
647 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)648 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
649 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
650 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
651 }
652
653 //! Parallel iteration with deterministic reduction and static partitioner.
654 /** @ingroup algorithms **/
655 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)656 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
657 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
658 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
659 }
660
661 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
662 /** @ingroup algorithms **/
663 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)664 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
665 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
666 start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
667 }
668
669 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
670 /** @ingroup algorithms **/
671 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)672 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
673 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
674 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
675 }
676
677 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
678 /** @ingroup algorithms **/
679 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_reduce_body<Body,Range>)680 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
681 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
682 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
683 }
684
685 /** parallel_reduce overloads that work with anonymous function objects
686 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
687
688 //! Parallel iteration with deterministic reduction and default simple partitioner.
689 // TODO: consider making static_partitioner the default
690 /** @ingroup algorithms **/
691 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)692 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
693 parallel_reduce_combine<Reduction, Value>)
694 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
695 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
696 }
697
698 //! Parallel iteration with deterministic reduction and simple partitioner.
699 /** @ingroup algorithms **/
700 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)701 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
702 parallel_reduce_combine<Reduction, Value>)
703 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
704 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
705 start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
706 ::run(range, body, partitioner);
707 return body.result();
708 }
709
710 //! Parallel iteration with deterministic reduction and static partitioner.
711 /** @ingroup algorithms **/
712 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)713 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
714 parallel_reduce_combine<Reduction, Value>)
715 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
716 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
717 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
718 ::run(range, body, partitioner);
719 return body.result();
720 }
721
722 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
723 /** @ingroup algorithms **/
724 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)725 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
726 parallel_reduce_combine<Reduction, Value>)
727 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
728 task_group_context& context ) {
729 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
730 }
731
732 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
733 /** @ingroup algorithms **/
734 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)735 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
736 parallel_reduce_combine<Reduction, Value>)
737 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
738 const simple_partitioner& partitioner, task_group_context& context ) {
739 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
740 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
741 ::run(range, body, partitioner, context);
742 return body.result();
743 }
744
745 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
746 /** @ingroup algorithms **/
747 template<typename Range, typename Value, typename RealBody, typename Reduction>
__TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody,Range,Value> && parallel_reduce_combine<Reduction,Value>)748 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
749 parallel_reduce_combine<Reduction, Value>)
750 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
751 const static_partitioner& partitioner, task_group_context& context ) {
752 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
753 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
754 ::run(range, body, partitioner, context);
755 return body.result();
756 }
757 //@}
758
759 } // namespace d1
760 } // namespace detail
761
762 inline namespace v1 {
763 using detail::d1::parallel_reduce;
764 using detail::d1::parallel_deterministic_reduce;
765 // Split types
766 using detail::split;
767 using detail::proportional_split;
768 } // namespace v1
769
770 } // namespace tbb
771 #endif /* __TBB_parallel_reduce_H */
772