1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #ifndef __TBB_parallel_scan_H
18 #define __TBB_parallel_scan_H
19
20 #include <functional>
21
22 #include "detail/_config.h"
23 #include "detail/_namespace_injection.h"
24 #include "detail/_exception.h"
25 #include "detail/_task.h"
26
27 #include "profiling.h"
28 #include "partitioner.h"
29 #include "blocked_range.h"
30 #include "task_group.h"
31
32 namespace tbb {
33 namespace detail {
34 namespace d1 {
35
36 //! Used to indicate that the initial scan is being performed.
37 /** @ingroup algorithms */
38 struct pre_scan_tag {
is_final_scanpre_scan_tag39 static bool is_final_scan() {return false;}
40 operator bool() {return is_final_scan();}
41 };
42
43 //! Used to indicate that the final scan is being performed.
44 /** @ingroup algorithms */
45 struct final_scan_tag {
is_final_scanfinal_scan_tag46 static bool is_final_scan() {return true;}
47 operator bool() {return is_final_scan();}
48 };
49
50 template<typename Range, typename Body>
51 struct sum_node;
52
53 #if __TBB_CPP20_CONCEPTS_PRESENT
54 } // namespace d1
55 namespace d0 {
56
57 template <typename Body, typename Range>
58 concept parallel_scan_body = splittable<Body> &&
requires(Body & body,const Range & range,Body & other)59 requires( Body& body, const Range& range, Body& other ) {
60 body(range, tbb::detail::d1::pre_scan_tag{});
61 body(range, tbb::detail::d1::final_scan_tag{});
62 body.reverse_join(other);
63 body.assign(other);
64 };
65
66 template <typename Function, typename Range, typename Value>
67 concept parallel_scan_function = std::invocable<const std::remove_reference_t<Function>&,
68 const Range&, const Value&, bool> &&
69 std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Function>&,
70 const Range&, const Value&, bool>,
71 Value>;
72
73 template <typename Combine, typename Value>
74 concept parallel_scan_combine = std::invocable<const std::remove_reference_t<Combine>&,
75 const Value&, const Value&> &&
76 std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Combine>&,
77 const Value&, const Value&>,
78 Value>;
79
80 } // namespace d0
81 namespace d1 {
82 #endif // __TBB_CPP20_CONCEPTS_PRESENT
83
84 //! Performs final scan for a leaf
85 /** @ingroup algorithms */
86 template<typename Range, typename Body>
87 struct final_sum : public task {
88 private:
89 using sum_node_type = sum_node<Range, Body>;
90 Body m_body;
91 aligned_space<Range> m_range;
92 //! Where to put result of last subrange, or nullptr if not last subrange.
93 Body* m_stuff_last;
94
95 wait_context& m_wait_context;
96 sum_node_type* m_parent = nullptr;
97 public:
98 small_object_allocator m_allocator;
final_sumfinal_sum99 final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) :
100 m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) {
101 poison_pointer(m_stuff_last);
102 }
103
final_sumfinal_sum104 final_sum( final_sum& sum, small_object_allocator& alloc ) :
105 m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) {
106 poison_pointer(m_stuff_last);
107 }
108
~final_sumfinal_sum109 ~final_sum() {
110 m_range.begin()->~Range();
111 }
finish_constructionfinal_sum112 void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) {
113 __TBB_ASSERT( m_parent == nullptr, nullptr );
114 m_parent = parent;
115 new( m_range.begin() ) Range(range);
116 m_stuff_last = stuff_last;
117 }
118 private:
release_parentfinal_sum119 sum_node_type* release_parent() {
120 call_itt_task_notify(releasing, m_parent);
121 if (m_parent) {
122 auto parent = m_parent;
123 m_parent = nullptr;
124 if (parent->ref_count.fetch_sub(1) == 1) {
125 return parent;
126 }
127 }
128 else
129 m_wait_context.release();
130 return nullptr;
131 }
finalizefinal_sum132 sum_node_type* finalize(const execution_data& ed){
133 sum_node_type* next_task = release_parent();
134 m_allocator.delete_object<final_sum>(this, ed);
135 return next_task;
136 }
137
138 public:
executefinal_sum139 task* execute(execution_data& ed) override {
140 m_body( *m_range.begin(), final_scan_tag() );
141 if( m_stuff_last )
142 m_stuff_last->assign(m_body);
143
144 return finalize(ed);
145 }
cancelfinal_sum146 task* cancel(execution_data& ed) override {
147 return finalize(ed);
148 }
149 template<typename Tag>
operatorfinal_sum150 void operator()( const Range& r, Tag tag ) {
151 m_body( r, tag );
152 }
reverse_joinfinal_sum153 void reverse_join( final_sum& a ) {
154 m_body.reverse_join(a.m_body);
155 }
reverse_joinfinal_sum156 void reverse_join( Body& body ) {
157 m_body.reverse_join(body);
158 }
assign_tofinal_sum159 void assign_to( Body& body ) {
160 body.assign(m_body);
161 }
self_destroyfinal_sum162 void self_destroy(const execution_data& ed) {
163 m_allocator.delete_object<final_sum>(this, ed);
164 }
165 };
166
167 //! Split work to be done in the scan.
168 /** @ingroup algorithms */
169 template<typename Range, typename Body>
170 struct sum_node : public task {
171 private:
172 using final_sum_type = final_sum<Range,Body>;
173 public:
174 final_sum_type *m_incoming;
175 final_sum_type *m_body;
176 Body *m_stuff_last;
177 private:
178 final_sum_type *m_left_sum;
179 sum_node *m_left;
180 sum_node *m_right;
181 bool m_left_is_final;
182 Range m_range;
183 wait_context& m_wait_context;
184 sum_node* m_parent;
185 small_object_allocator m_allocator;
186 public:
187 std::atomic<unsigned int> ref_count{0};
sum_nodesum_node188 sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) :
189 m_stuff_last(nullptr),
190 m_left_sum(nullptr),
191 m_left(nullptr),
192 m_right(nullptr),
193 m_left_is_final(left_is_final_),
194 m_range(range),
195 m_wait_context(w_o),
196 m_parent(parent),
197 m_allocator(alloc)
198 {
199 if( m_parent )
200 m_parent->ref_count.fetch_add(1);
201 // Poison fields that will be set by second pass.
202 poison_pointer(m_body);
203 poison_pointer(m_incoming);
204 }
205
~sum_nodesum_node206 ~sum_node() {
207 if (m_parent)
208 m_parent->ref_count.fetch_sub(1);
209 }
210 private:
release_parentsum_node211 sum_node* release_parent() {
212 call_itt_task_notify(releasing, m_parent);
213 if (m_parent) {
214 auto parent = m_parent;
215 m_parent = nullptr;
216 if (parent->ref_count.fetch_sub(1) == 1) {
217 return parent;
218 }
219 }
220 else
221 m_wait_context.release();
222 return nullptr;
223 }
create_childsum_node224 task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) {
225 if( child ) {
226 __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr );
227 child->prepare_for_execution(body, incoming, stuff_last);
228 return child;
229 } else {
230 body.finish_construction(this, range, stuff_last);
231 return &body;
232 }
233 }
234
finalizesum_node235 sum_node* finalize(const execution_data& ed) {
236 sum_node* next_task = release_parent();
237 m_allocator.delete_object<sum_node>(this, ed);
238 return next_task;
239 }
240
241 public:
prepare_for_executionsum_node242 void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) {
243 this->m_body = &body;
244 this->m_incoming = incoming;
245 this->m_stuff_last = stuff_last;
246 }
executesum_node247 task* execute(execution_data& ed) override {
248 if( m_body ) {
249 if( m_incoming )
250 m_left_sum->reverse_join( *m_incoming );
251 task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last);
252 task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr);
253 ref_count = (left_child != nullptr) + (right_child != nullptr);
254 m_body = nullptr;
255 if( left_child ) {
256 spawn(*right_child, *ed.context);
257 return left_child;
258 } else {
259 return right_child;
260 }
261 } else {
262 return finalize(ed);
263 }
264 }
cancelsum_node265 task* cancel(execution_data& ed) override {
266 return finalize(ed);
267 }
self_destroysum_node268 void self_destroy(const execution_data& ed) {
269 m_allocator.delete_object<sum_node>(this, ed);
270 }
271 template<typename range,typename body,typename partitioner>
272 friend struct start_scan;
273
274 template<typename range,typename body>
275 friend struct finish_scan;
276 };
277
278 //! Combine partial results
279 /** @ingroup algorithms */
280 template<typename Range, typename Body>
281 struct finish_scan : public task {
282 private:
283 using sum_node_type = sum_node<Range,Body>;
284 using final_sum_type = final_sum<Range,Body>;
285 final_sum_type** const m_sum_slot;
286 sum_node_type*& m_return_slot;
287 small_object_allocator m_allocator;
288 public:
289 std::atomic<final_sum_type*> m_right_zombie;
290 sum_node_type& m_result;
291 std::atomic<unsigned int> ref_count{2};
292 finish_scan* m_parent;
293 wait_context& m_wait_context;
executefinish_scan294 task* execute(execution_data& ed) override {
295 __TBB_ASSERT( m_result.ref_count.load() == static_cast<unsigned int>((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr );
296 if( m_result.m_left )
297 m_result.m_left_is_final = false;
298 final_sum_type* right_zombie = m_right_zombie.load(std::memory_order_acquire);
299 if( right_zombie && m_sum_slot )
300 (*m_sum_slot)->reverse_join(*m_result.m_left_sum);
301 __TBB_ASSERT( !m_return_slot, nullptr );
302 if( right_zombie || m_result.m_right ) {
303 m_return_slot = &m_result;
304 } else {
305 m_result.self_destroy(ed);
306 }
307 if( right_zombie && !m_sum_slot && !m_result.m_right ) {
308 right_zombie->self_destroy(ed);
309 m_right_zombie.store(nullptr, std::memory_order_relaxed);
310 }
311 return finalize(ed);
312 }
cancelfinish_scan313 task* cancel(execution_data& ed) override {
314 return finalize(ed);
315 }
finish_scanfinish_scan316 finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) :
317 m_sum_slot(sum),
318 m_return_slot(return_slot),
319 m_allocator(alloc),
320 m_right_zombie(nullptr),
321 m_result(result_),
322 m_parent(parent),
323 m_wait_context(w_o)
324 {
325 __TBB_ASSERT( !m_return_slot, nullptr );
326 }
327 private:
release_parentfinish_scan328 finish_scan* release_parent() {
329 call_itt_task_notify(releasing, m_parent);
330 if (m_parent) {
331 auto parent = m_parent;
332 m_parent = nullptr;
333 if (parent->ref_count.fetch_sub(1) == 1) {
334 return parent;
335 }
336 }
337 else
338 m_wait_context.release();
339 return nullptr;
340 }
finalizefinish_scan341 finish_scan* finalize(const execution_data& ed) {
342 finish_scan* next_task = release_parent();
343 m_allocator.delete_object<finish_scan>(this, ed);
344 return next_task;
345 }
346 };
347
348 //! Initial task to split the work
349 /** @ingroup algorithms */
350 template<typename Range, typename Body, typename Partitioner>
351 struct start_scan : public task {
352 private:
353 using sum_node_type = sum_node<Range,Body>;
354 using final_sum_type = final_sum<Range,Body>;
355 using finish_pass1_type = finish_scan<Range,Body>;
356 std::reference_wrapper<sum_node_type*> m_return_slot;
357 Range m_range;
358 std::reference_wrapper<final_sum_type> m_body;
359 typename Partitioner::partition_type m_partition;
360 /** Non-null if caller is requesting total. */
361 final_sum_type** m_sum_slot;
362 bool m_is_final;
363 bool m_is_right_child;
364
365 finish_pass1_type* m_parent;
366 small_object_allocator m_allocator;
367 wait_context& m_wait_context;
368
release_parentstart_scan369 finish_pass1_type* release_parent() {
370 call_itt_task_notify(releasing, m_parent);
371 if (m_parent) {
372 auto parent = m_parent;
373 m_parent = nullptr;
374 if (parent->ref_count.fetch_sub(1) == 1) {
375 return parent;
376 }
377 }
378 else
379 m_wait_context.release();
380 return nullptr;
381 }
382
finalizestart_scan383 finish_pass1_type* finalize( const execution_data& ed ) {
384 finish_pass1_type* next_task = release_parent();
385 m_allocator.delete_object<start_scan>(this, ed);
386 return next_task;
387 }
388
389 public:
390 task* execute( execution_data& ) override;
cancelstart_scan391 task* cancel( execution_data& ed ) override {
392 return finalize(ed);
393 }
start_scanstart_scan394 start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) :
395 m_return_slot(return_slot),
396 m_range(parent.m_range,split()),
397 m_body(parent.m_body),
398 m_partition(parent.m_partition,split()),
399 m_sum_slot(parent.m_sum_slot),
400 m_is_final(parent.m_is_final),
401 m_is_right_child(true),
402 m_parent(parent.m_parent),
403 m_allocator(alloc),
404 m_wait_context(parent.m_wait_context)
405 {
406 __TBB_ASSERT( !m_return_slot, nullptr );
407 parent.m_is_right_child = false;
408 }
409
start_scanstart_scan410 start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) :
411 m_return_slot(return_slot),
412 m_range(range),
413 m_body(body),
414 m_partition(partitioner),
415 m_sum_slot(nullptr),
416 m_is_final(true),
417 m_is_right_child(false),
418 m_parent(nullptr),
419 m_allocator(alloc),
420 m_wait_context(w_o)
421 {
422 __TBB_ASSERT( !m_return_slot, nullptr );
423 }
424
runstart_scan425 static void run( const Range& range, Body& body, const Partitioner& partitioner ) {
426 if( !range.empty() ) {
427 task_group_context context(PARALLEL_SCAN);
428
429 using start_pass1_type = start_scan<Range,Body,Partitioner>;
430 sum_node_type* root = nullptr;
431 wait_context w_ctx{1};
432 small_object_allocator alloc{};
433
434 auto& temp_body = *alloc.new_object<final_sum_type>(body, w_ctx, alloc);
435 temp_body.reverse_join(body);
436
437 auto& pass1 = *alloc.new_object<start_pass1_type>(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc);
438
439 execute_and_wait(pass1, context, w_ctx, context);
440 if( root ) {
441 root->prepare_for_execution(temp_body, nullptr, &body);
442 w_ctx.reserve();
443 execute_and_wait(*root, context, w_ctx, context);
444 } else {
445 temp_body.assign_to(body);
446 temp_body.finish_construction(nullptr, range, nullptr);
447 alloc.delete_object<final_sum_type>(&temp_body);
448 }
449 }
450 }
451 };
452
453 template<typename Range, typename Body, typename Partitioner>
execute(execution_data & ed)454 task* start_scan<Range,Body,Partitioner>::execute( execution_data& ed ) {
455 // Inspecting m_parent->result.left_sum would ordinarily be a race condition.
456 // But we inspect it only if we are not a stolen task, in which case we
457 // know that task assigning to m_parent->result.left_sum has completed.
458 __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan");
459 bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum);
460 if( treat_as_stolen ) {
461 // Invocation is for right child that has been really stolen or needs to be virtually stolen
462 small_object_allocator alloc{};
463 final_sum_type* right_zombie = alloc.new_object<final_sum_type>(m_body, alloc);
464 m_parent->m_right_zombie.store(right_zombie, std::memory_order_release);
465 m_body = *right_zombie;
466 m_is_final = false;
467 }
468 task* next_task = nullptr;
469 if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) {
470 if( m_is_final )
471 m_body(m_range, final_scan_tag());
472 else if( m_sum_slot )
473 m_body(m_range, pre_scan_tag());
474 if( m_sum_slot )
475 *m_sum_slot = &m_body.get();
476 __TBB_ASSERT( !m_return_slot, nullptr );
477
478 next_task = finalize(ed);
479 } else {
480 small_object_allocator alloc{};
481 auto result = alloc.new_object<sum_node_type>(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc);
482
483 auto new_parent = alloc.new_object<finish_pass1_type>(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc);
484 m_parent = new_parent;
485
486 // Split off right child
487 auto& right_child = *alloc.new_object<start_scan>(/*m_return_slot=*/result->m_right, *this, alloc);
488
489 spawn(right_child, *ed.context);
490
491 m_sum_slot = &result->m_left_sum;
492 m_return_slot = result->m_left;
493
494 __TBB_ASSERT( !m_return_slot, nullptr );
495 next_task = this;
496 }
497 return next_task;
498 }
499
500 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
501 class lambda_scan_body {
502 Value m_sum_slot;
503 const Value& identity_element;
504 const Scan& m_scan;
505 const ReverseJoin& m_reverse_join;
506 public:
507 void operator=(const lambda_scan_body&) = delete;
508 lambda_scan_body(const lambda_scan_body&) = default;
509
lambda_scan_body(const Value & identity,const Scan & scan,const ReverseJoin & rev_join)510 lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join )
511 : m_sum_slot(identity)
512 , identity_element(identity)
513 , m_scan(scan)
514 , m_reverse_join(rev_join) {}
515
lambda_scan_body(lambda_scan_body & b,split)516 lambda_scan_body( lambda_scan_body& b, split )
517 : m_sum_slot(b.identity_element)
518 , identity_element(b.identity_element)
519 , m_scan(b.m_scan)
520 , m_reverse_join(b.m_reverse_join) {}
521
522 template<typename Tag>
operator()523 void operator()( const Range& r, Tag tag ) {
524 m_sum_slot = tbb::detail::invoke(m_scan, r, m_sum_slot, tag);
525 }
526
reverse_join(lambda_scan_body & a)527 void reverse_join( lambda_scan_body& a ) {
528 m_sum_slot = tbb::detail::invoke(m_reverse_join, a.m_sum_slot, m_sum_slot);
529 }
530
assign(lambda_scan_body & b)531 void assign( lambda_scan_body& b ) {
532 m_sum_slot = b.m_sum_slot;
533 }
534
result()535 Value result() const {
536 return m_sum_slot;
537 }
538 };
539
540 // Requirements on Range concept are documented in blocked_range.h
541
542 /** \page parallel_scan_body_req Requirements on parallel_scan body
543 Class \c Body implementing the concept of parallel_scan body must define:
544 - \code Body::Body( Body&, split ); \endcode Splitting constructor.
545 Split \c b so that \c this and \c b can accumulate separately
546 - \code Body::~Body(); \endcode Destructor
547 - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
548 Preprocess iterations for range \c r
549 - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
550 Do final processing for iterations of range \c r
551 - \code void Body::reverse_join( Body& a ); \endcode
552 Merge preprocessing state of \c a into \c this, where \c a was
553 created earlier from \c b by b's splitting constructor
554 **/
555
556 /** \name parallel_scan
557 See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
558 //@{
559
560 //! Parallel prefix with default partitioner
561 /** @ingroup algorithms **/
562 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_scan_body<Body,Range>)563 __TBB_requires(tbb_range<Range> && parallel_scan_body<Body, Range>)
564 void parallel_scan( const Range& range, Body& body ) {
565 start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER());
566 }
567
568 //! Parallel prefix with simple_partitioner
569 /** @ingroup algorithms **/
570 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_scan_body<Body,Range>)571 __TBB_requires(tbb_range<Range> && parallel_scan_body<Body, Range>)
572 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
573 start_scan<Range, Body, simple_partitioner>::run(range, body, partitioner);
574 }
575
576 //! Parallel prefix with auto_partitioner
577 /** @ingroup algorithms **/
578 template<typename Range, typename Body>
__TBB_requires(tbb_range<Range> && parallel_scan_body<Body,Range>)579 __TBB_requires(tbb_range<Range> && parallel_scan_body<Body, Range>)
580 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
581 start_scan<Range,Body,auto_partitioner>::run(range, body, partitioner);
582 }
583
584 //! Parallel prefix with default partitioner
585 /** @ingroup algorithms **/
586 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
__TBB_requires(tbb_range<Range> && parallel_scan_function<Scan,Range,Value> && parallel_scan_combine<ReverseJoin,Value>)587 __TBB_requires(tbb_range<Range> && parallel_scan_function<Scan, Range, Value> &&
588 parallel_scan_combine<ReverseJoin, Value>)
589 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
590 lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
591 parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER());
592 return body.result();
593 }
594
595 //! Parallel prefix with simple_partitioner
596 /** @ingroup algorithms **/
597 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
__TBB_requires(tbb_range<Range> && parallel_scan_function<Scan,Range,Value> && parallel_scan_combine<ReverseJoin,Value>)598 __TBB_requires(tbb_range<Range> && parallel_scan_function<Scan, Range, Value> &&
599 parallel_scan_combine<ReverseJoin, Value>)
600 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join,
601 const simple_partitioner& partitioner ) {
602 lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
603 parallel_scan(range, body, partitioner);
604 return body.result();
605 }
606
607 //! Parallel prefix with auto_partitioner
608 /** @ingroup algorithms **/
609 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
__TBB_requires(tbb_range<Range> && parallel_scan_function<Scan,Range,Value> && parallel_scan_combine<ReverseJoin,Value>)610 __TBB_requires(tbb_range<Range> && parallel_scan_function<Scan, Range, Value> &&
611 parallel_scan_combine<ReverseJoin, Value>)
612 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join,
613 const auto_partitioner& partitioner ) {
614 lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
615 parallel_scan(range, body, partitioner);
616 return body.result();
617 }
618
619 } // namespace d1
620 } // namespace detail
621
622 inline namespace v1 {
623 using detail::d1::parallel_scan;
624 using detail::d1::pre_scan_tag;
625 using detail::d1::final_scan_tag;
626 } // namespace v1
627
628 } // namespace tbb
629
630 #endif /* __TBB_parallel_scan_H */
631