1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_parallel_reduce_H 18 #define __TBB_parallel_reduce_H 19 20 #include <new> 21 #include "detail/_namespace_injection.h" 22 #include "detail/_task.h" 23 #include "detail/_aligned_space.h" 24 #include "detail/_small_object_pool.h" 25 #include "detail/_range_common.h" 26 27 #include "task_group.h" // task_group_context 28 #include "partitioner.h" 29 #include "profiling.h" 30 31 namespace tbb { 32 namespace detail { 33 #if __TBB_CPP20_CONCEPTS_PRESENT 34 inline namespace d0 { 35 36 template <typename Body, typename Range> 37 concept parallel_reduce_body = splittable<Body> && 38 requires( Body& body, const Range& range, Body& rhs ) { 39 body(range); 40 body.join(rhs); 41 }; 42 43 template <typename Function, typename Range, typename Value> 44 concept parallel_reduce_function = requires( const std::remove_reference_t<Function>& func, 45 const Range& range, 46 const Value& value ) { 47 { func(range, value) } -> std::convertible_to<Value>; 48 }; 49 50 template <typename Combine, typename Value> 51 concept parallel_reduce_combine = requires( const std::remove_reference_t<Combine>& combine, 52 const Value& lhs, const Value& rhs ) { 53 { combine(lhs, rhs) } -> std::convertible_to<Value>; 54 }; 55 56 } // namespace d0 57 #endif // __TBB_CPP20_CONCEPTS_PRESENT 58 namespace d1 { 59 60 //! Tree node type for parallel_reduce. 61 /** @ingroup algorithms */ 62 //TODO: consider folding tree via bypass execution(instead of manual folding) 63 // for better cancellation and critical tasks handling (performance measurements required). 64 template<typename Body> 65 struct reduction_tree_node : public tree_node { 66 tbb::detail::aligned_space<Body> zombie_space; 67 Body& left_body; 68 bool has_right_zombie{false}; 69 70 reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 71 tree_node{parent, ref_count, alloc}, 72 left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ 73 {} 74 75 void join(task_group_context* context) { 76 if (has_right_zombie && !context->is_group_execution_cancelled()) 77 left_body.join(*zombie_space.begin()); 78 } 79 80 ~reduction_tree_node() { 81 if( has_right_zombie ) zombie_space.begin()->~Body(); 82 } 83 }; 84 85 //! Task type used to split the work of parallel_reduce. 86 /** @ingroup algorithms */ 87 template<typename Range, typename Body, typename Partitioner> 88 struct start_reduce : public task { 89 Range my_range; 90 Body* my_body; 91 node* my_parent; 92 93 typename Partitioner::task_partition_type my_partition; 94 small_object_allocator my_allocator; 95 bool is_right_child; 96 97 task* execute(execution_data&) override; 98 task* cancel(execution_data&) override; 99 void finalize(const execution_data&); 100 101 using tree_node_type = reduction_tree_node<Body>; 102 103 //! Constructor reduce root task. 104 start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : 105 my_range(range), 106 my_body(&body), 107 my_parent(nullptr), 108 my_partition(partitioner), 109 my_allocator(alloc), 110 is_right_child(false) {} 111 //! Splitting constructor used to generate children. 112 /** parent_ becomes left child. Newly constructed object is right child. */ 113 start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : 114 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 115 my_body(parent_.my_body), 116 my_parent(nullptr), 117 my_partition(parent_.my_partition, split_obj), 118 my_allocator(alloc), 119 is_right_child(true) 120 { 121 parent_.is_right_child = false; 122 } 123 //! Construct right child from the given range as response to the demand. 124 /** parent_ remains left child. Newly constructed object is right child. */ 125 start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : 126 my_range(r), 127 my_body(parent_.my_body), 128 my_parent(nullptr), 129 my_partition(parent_.my_partition, split()), 130 my_allocator(alloc), 131 is_right_child(true) 132 { 133 my_partition.align_depth( d ); 134 parent_.is_right_child = false; 135 } 136 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 137 if ( !range.empty() ) { 138 wait_node wn; 139 small_object_allocator alloc{}; 140 auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); 141 reduce_task->my_parent = &wn; 142 execute_and_wait(*reduce_task, context, wn.m_wait, context); 143 } 144 } 145 static void run(const Range& range, Body& body, Partitioner& partitioner) { 146 // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 147 // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. 148 task_group_context context(PARALLEL_REDUCE); 149 run(range, body, partitioner, context); 150 } 151 //! Run body for range, serves as callback for partitioner 152 void run_body( Range &r ) { 153 (*my_body)(r); 154 } 155 156 //! spawn right task, serves as callback for partitioner 157 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 158 offer_work_impl(ed, *this, split_obj); 159 } 160 //! spawn right task, serves as callback for partitioner 161 void offer_work(const Range& r, depth_t d, execution_data& ed) { 162 offer_work_impl(ed, *this, r, d); 163 } 164 165 private: 166 template <typename... Args> 167 void offer_work_impl(execution_data& ed, Args&&... args) { 168 small_object_allocator alloc{}; 169 // New right child 170 auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); 171 172 // New root node as a continuation and ref count. Left and right child attach to the new parent. 173 right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); 174 175 // Spawn the right sibling 176 right_child->spawn_self(ed); 177 } 178 179 void spawn_self(execution_data& ed) { 180 my_partition.spawn_task(*this, *context(ed)); 181 } 182 }; 183 184 //! fold the tree and deallocate the task 185 template<typename Range, typename Body, typename Partitioner> 186 void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 187 // Get the current parent and wait object before an object destruction 188 node* parent = my_parent; 189 auto allocator = my_allocator; 190 // Task execution finished - destroy it 191 this->~start_reduce(); 192 // Unwind the tree decrementing the parent`s reference count 193 fold_tree<tree_node_type>(parent, ed); 194 allocator.deallocate(this, ed); 195 } 196 197 //! Execute parallel_reduce task 198 template<typename Range, typename Body, typename Partitioner> 199 task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 200 if (!is_same_affinity(ed)) { 201 my_partition.note_affinity(execution_slot(ed)); 202 } 203 my_partition.check_being_stolen(*this, ed); 204 205 // The acquire barrier synchronizes the data pointed with my_body if the left 206 // task has already finished. 207 __TBB_ASSERT(my_parent, nullptr); 208 if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { 209 tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); 210 my_body = static_cast<Body*>(new( parent_ptr->zombie_space.begin() ) Body(*my_body, split())); 211 parent_ptr->has_right_zombie = true; 212 } 213 __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); 214 215 my_partition.execute(*this, my_range, ed); 216 217 finalize(ed); 218 return nullptr; 219 } 220 221 //! Cancel parallel_reduce task 222 template<typename Range, typename Body, typename Partitioner> 223 task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 224 finalize(ed); 225 return nullptr; 226 } 227 228 //! Tree node type for parallel_deterministic_reduce. 229 /** @ingroup algorithms */ 230 template<typename Body> 231 struct deterministic_reduction_tree_node : public tree_node { 232 Body right_body; 233 Body& left_body; 234 235 deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 236 tree_node{parent, ref_count, alloc}, 237 right_body{input_left_body, detail::split()}, 238 left_body(input_left_body) 239 {} 240 241 void join(task_group_context* context) { 242 if (!context->is_group_execution_cancelled()) 243 left_body.join(right_body); 244 } 245 }; 246 247 //! Task type used to split the work of parallel_deterministic_reduce. 248 /** @ingroup algorithms */ 249 template<typename Range, typename Body, typename Partitioner> 250 struct start_deterministic_reduce : public task { 251 Range my_range; 252 Body& my_body; 253 node* my_parent; 254 255 typename Partitioner::task_partition_type my_partition; 256 small_object_allocator my_allocator; 257 258 task* execute(execution_data&) override; 259 task* cancel(execution_data&) override; 260 void finalize(const execution_data&); 261 262 using tree_node_type = deterministic_reduction_tree_node<Body>; 263 264 //! Constructor deterministic_reduce root task. 265 start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : 266 my_range(range), 267 my_body(body), 268 my_parent(nullptr), 269 my_partition(partitioner), 270 my_allocator(alloc) {} 271 //! Splitting constructor used to generate children. 272 /** parent_ becomes left child. Newly constructed object is right child. */ 273 start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, 274 small_object_allocator& alloc ) : 275 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 276 my_body(body), 277 my_parent(nullptr), 278 my_partition(parent_.my_partition, split_obj), 279 my_allocator(alloc) {} 280 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 281 if ( !range.empty() ) { 282 wait_node wn; 283 small_object_allocator alloc{}; 284 auto deterministic_reduce_task = 285 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); 286 deterministic_reduce_task->my_parent = &wn; 287 execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); 288 } 289 } 290 static void run(const Range& range, Body& body, Partitioner& partitioner) { 291 // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 292 // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce 293 // in the try-block. 294 task_group_context context(PARALLEL_REDUCE); 295 run(range, body, partitioner, context); 296 } 297 //! Run body for range, serves as callback for partitioner 298 void run_body( Range &r ) { 299 my_body( r ); 300 } 301 //! Spawn right task, serves as callback for partitioner 302 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 303 offer_work_impl(ed, *this, split_obj); 304 } 305 private: 306 template <typename... Args> 307 void offer_work_impl(execution_data& ed, Args&&... args) { 308 small_object_allocator alloc{}; 309 // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. 310 auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); 311 312 // New right child 313 auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); 314 315 right_child->my_parent = my_parent = new_tree_node; 316 317 // Spawn the right sibling 318 right_child->spawn_self(ed); 319 } 320 321 void spawn_self(execution_data& ed) { 322 my_partition.spawn_task(*this, *context(ed)); 323 } 324 }; 325 326 //! Fold the tree and deallocate the task 327 template<typename Range, typename Body, typename Partitioner> 328 void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 329 // Get the current parent and wait object before an object destruction 330 node* parent = my_parent; 331 332 auto allocator = my_allocator; 333 // Task execution finished - destroy it 334 this->~start_deterministic_reduce(); 335 // Unwind the tree decrementing the parent`s reference count 336 fold_tree<tree_node_type>(parent, ed); 337 allocator.deallocate(this, ed); 338 } 339 340 //! Execute parallel_deterministic_reduce task 341 template<typename Range, typename Body, typename Partitioner> 342 task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 343 if (!is_same_affinity(ed)) { 344 my_partition.note_affinity(execution_slot(ed)); 345 } 346 my_partition.check_being_stolen(*this, ed); 347 348 my_partition.execute(*this, my_range, ed); 349 350 finalize(ed); 351 return nullptr; 352 } 353 354 //! Cancel parallel_deterministic_reduce task 355 template<typename Range, typename Body, typename Partitioner> 356 task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 357 finalize(ed); 358 return nullptr; 359 } 360 361 362 //! Auxiliary class for parallel_reduce; for internal use only. 363 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" 364 using given \ref parallel_reduce_lambda_req "anonymous function objects". 365 **/ 366 /** @ingroup algorithms */ 367 template<typename Range, typename Value, typename RealBody, typename Reduction> 368 class lambda_reduce_body { 369 //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced 370 // (might require some performance measurements) 371 372 const Value& my_identity_element; 373 const RealBody& my_real_body; 374 const Reduction& my_reduction; 375 Value my_value; 376 lambda_reduce_body& operator= ( const lambda_reduce_body& other ); 377 public: 378 lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) 379 : my_identity_element(identity) 380 , my_real_body(body) 381 , my_reduction(reduction) 382 , my_value(identity) 383 { } 384 lambda_reduce_body( const lambda_reduce_body& other ) = default; 385 lambda_reduce_body( lambda_reduce_body& other, tbb::split ) 386 : my_identity_element(other.my_identity_element) 387 , my_real_body(other.my_real_body) 388 , my_reduction(other.my_reduction) 389 , my_value(other.my_identity_element) 390 { } 391 void operator()(Range& range) { 392 my_value = my_real_body(range, const_cast<const Value&>(my_value)); 393 } 394 void join( lambda_reduce_body& rhs ) { 395 my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); 396 } 397 Value result() const { 398 return my_value; 399 } 400 }; 401 402 403 // Requirements on Range concept are documented in blocked_range.h 404 405 /** \page parallel_reduce_body_req Requirements on parallel_reduce body 406 Class \c Body implementing the concept of parallel_reduce body must define: 407 - \code Body::Body( Body&, split ); \endcode Splitting constructor. 408 Must be able to run concurrently with operator() and method \c join 409 - \code Body::~Body(); \endcode Destructor 410 - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r 411 and accumulating the result 412 - \code void Body::join( Body& b ); \endcode Join results. 413 The result in \c b should be merged into the result of \c this 414 **/ 415 416 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) 417 TO BE DOCUMENTED 418 **/ 419 420 /** \name parallel_reduce 421 See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ 422 //@{ 423 424 //! Parallel iteration with reduction and default partitioner. 425 /** @ingroup algorithms **/ 426 template<typename Range, typename Body> 427 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 428 void parallel_reduce( const Range& range, Body& body ) { 429 start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); 430 } 431 432 //! Parallel iteration with reduction and simple_partitioner 433 /** @ingroup algorithms **/ 434 template<typename Range, typename Body> 435 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 436 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 437 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); 438 } 439 440 //! Parallel iteration with reduction and auto_partitioner 441 /** @ingroup algorithms **/ 442 template<typename Range, typename Body> 443 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 444 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { 445 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); 446 } 447 448 //! Parallel iteration with reduction and static_partitioner 449 /** @ingroup algorithms **/ 450 template<typename Range, typename Body> 451 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 452 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 453 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); 454 } 455 456 //! Parallel iteration with reduction and affinity_partitioner 457 /** @ingroup algorithms **/ 458 template<typename Range, typename Body> 459 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 460 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { 461 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); 462 } 463 464 //! Parallel iteration with reduction, default partitioner and user-supplied context. 465 /** @ingroup algorithms **/ 466 template<typename Range, typename Body> 467 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 468 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { 469 start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 470 } 471 472 //! Parallel iteration with reduction, simple partitioner and user-supplied context. 473 /** @ingroup algorithms **/ 474 template<typename Range, typename Body> 475 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 476 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 477 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); 478 } 479 480 //! Parallel iteration with reduction, auto_partitioner and user-supplied context 481 /** @ingroup algorithms **/ 482 template<typename Range, typename Body> 483 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 484 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { 485 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); 486 } 487 488 //! Parallel iteration with reduction, static_partitioner and user-supplied context 489 /** @ingroup algorithms **/ 490 template<typename Range, typename Body> 491 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 492 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 493 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); 494 } 495 496 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context 497 /** @ingroup algorithms **/ 498 template<typename Range, typename Body> 499 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 500 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { 501 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); 502 } 503 /** parallel_reduce overloads that work with anonymous function objects 504 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 505 506 //! Parallel iteration with reduction and default partitioner. 507 /** @ingroup algorithms **/ 508 template<typename Range, typename Value, typename RealBody, typename Reduction> 509 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 510 parallel_reduce_combine<Reduction, Value>) 511 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 512 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 513 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 514 ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); 515 return body.result(); 516 } 517 518 //! Parallel iteration with reduction and simple_partitioner. 519 /** @ingroup algorithms **/ 520 template<typename Range, typename Value, typename RealBody, typename Reduction> 521 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 522 parallel_reduce_combine<Reduction, Value>) 523 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 524 const simple_partitioner& partitioner ) { 525 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 526 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 527 ::run(range, body, partitioner ); 528 return body.result(); 529 } 530 531 //! Parallel iteration with reduction and auto_partitioner 532 /** @ingroup algorithms **/ 533 template<typename Range, typename Value, typename RealBody, typename Reduction> 534 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 535 parallel_reduce_combine<Reduction, Value>) 536 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 537 const auto_partitioner& partitioner ) { 538 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 539 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 540 ::run( range, body, partitioner ); 541 return body.result(); 542 } 543 544 //! Parallel iteration with reduction and static_partitioner 545 /** @ingroup algorithms **/ 546 template<typename Range, typename Value, typename RealBody, typename Reduction> 547 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 548 parallel_reduce_combine<Reduction, Value>) 549 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 550 const static_partitioner& partitioner ) { 551 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 552 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 553 ::run( range, body, partitioner ); 554 return body.result(); 555 } 556 557 //! Parallel iteration with reduction and affinity_partitioner 558 /** @ingroup algorithms **/ 559 template<typename Range, typename Value, typename RealBody, typename Reduction> 560 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 561 parallel_reduce_combine<Reduction, Value>) 562 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 563 affinity_partitioner& partitioner ) { 564 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 565 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 566 ::run( range, body, partitioner ); 567 return body.result(); 568 } 569 570 //! Parallel iteration with reduction, default partitioner and user-supplied context. 571 /** @ingroup algorithms **/ 572 template<typename Range, typename Value, typename RealBody, typename Reduction> 573 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 574 parallel_reduce_combine<Reduction, Value>) 575 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 576 task_group_context& context ) { 577 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 578 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 579 ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 580 return body.result(); 581 } 582 583 //! Parallel iteration with reduction, simple partitioner and user-supplied context. 584 /** @ingroup algorithms **/ 585 template<typename Range, typename Value, typename RealBody, typename Reduction> 586 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 587 parallel_reduce_combine<Reduction, Value>) 588 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 589 const simple_partitioner& partitioner, task_group_context& context ) { 590 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 591 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 592 ::run( range, body, partitioner, context ); 593 return body.result(); 594 } 595 596 //! Parallel iteration with reduction, auto_partitioner and user-supplied context 597 /** @ingroup algorithms **/ 598 template<typename Range, typename Value, typename RealBody, typename Reduction> 599 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 600 parallel_reduce_combine<Reduction, Value>) 601 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 602 const auto_partitioner& partitioner, task_group_context& context ) { 603 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 604 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 605 ::run( range, body, partitioner, context ); 606 return body.result(); 607 } 608 609 //! Parallel iteration with reduction, static_partitioner and user-supplied context 610 /** @ingroup algorithms **/ 611 template<typename Range, typename Value, typename RealBody, typename Reduction> 612 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 613 parallel_reduce_combine<Reduction, Value>) 614 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 615 const static_partitioner& partitioner, task_group_context& context ) { 616 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 617 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 618 ::run( range, body, partitioner, context ); 619 return body.result(); 620 } 621 622 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context 623 /** @ingroup algorithms **/ 624 template<typename Range, typename Value, typename RealBody, typename Reduction> 625 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 626 parallel_reduce_combine<Reduction, Value>) 627 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 628 affinity_partitioner& partitioner, task_group_context& context ) { 629 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 630 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 631 ::run( range, body, partitioner, context ); 632 return body.result(); 633 } 634 635 //! Parallel iteration with deterministic reduction and default simple partitioner. 636 /** @ingroup algorithms **/ 637 template<typename Range, typename Body> 638 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 639 void parallel_deterministic_reduce( const Range& range, Body& body ) { 640 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); 641 } 642 643 //! Parallel iteration with deterministic reduction and simple partitioner. 644 /** @ingroup algorithms **/ 645 template<typename Range, typename Body> 646 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 647 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 648 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); 649 } 650 651 //! Parallel iteration with deterministic reduction and static partitioner. 652 /** @ingroup algorithms **/ 653 template<typename Range, typename Body> 654 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 655 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 656 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); 657 } 658 659 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 660 /** @ingroup algorithms **/ 661 template<typename Range, typename Body> 662 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 663 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { 664 start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); 665 } 666 667 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 668 /** @ingroup algorithms **/ 669 template<typename Range, typename Body> 670 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 671 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 672 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); 673 } 674 675 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 676 /** @ingroup algorithms **/ 677 template<typename Range, typename Body> 678 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 679 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 680 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); 681 } 682 683 /** parallel_reduce overloads that work with anonymous function objects 684 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 685 686 //! Parallel iteration with deterministic reduction and default simple partitioner. 687 // TODO: consider making static_partitioner the default 688 /** @ingroup algorithms **/ 689 template<typename Range, typename Value, typename RealBody, typename Reduction> 690 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 691 parallel_reduce_combine<Reduction, Value>) 692 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 693 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); 694 } 695 696 //! Parallel iteration with deterministic reduction and simple partitioner. 697 /** @ingroup algorithms **/ 698 template<typename Range, typename Value, typename RealBody, typename Reduction> 699 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 700 parallel_reduce_combine<Reduction, Value>) 701 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { 702 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 703 start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> 704 ::run(range, body, partitioner); 705 return body.result(); 706 } 707 708 //! Parallel iteration with deterministic reduction and static partitioner. 709 /** @ingroup algorithms **/ 710 template<typename Range, typename Value, typename RealBody, typename Reduction> 711 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 712 parallel_reduce_combine<Reduction, Value>) 713 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { 714 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 715 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 716 ::run(range, body, partitioner); 717 return body.result(); 718 } 719 720 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 721 /** @ingroup algorithms **/ 722 template<typename Range, typename Value, typename RealBody, typename Reduction> 723 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 724 parallel_reduce_combine<Reduction, Value>) 725 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 726 task_group_context& context ) { 727 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); 728 } 729 730 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 731 /** @ingroup algorithms **/ 732 template<typename Range, typename Value, typename RealBody, typename Reduction> 733 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 734 parallel_reduce_combine<Reduction, Value>) 735 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 736 const simple_partitioner& partitioner, task_group_context& context ) { 737 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 738 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> 739 ::run(range, body, partitioner, context); 740 return body.result(); 741 } 742 743 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 744 /** @ingroup algorithms **/ 745 template<typename Range, typename Value, typename RealBody, typename Reduction> 746 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 747 parallel_reduce_combine<Reduction, Value>) 748 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 749 const static_partitioner& partitioner, task_group_context& context ) { 750 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 751 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 752 ::run(range, body, partitioner, context); 753 return body.result(); 754 } 755 //@} 756 757 } // namespace d1 758 } // namespace detail 759 760 inline namespace v1 { 761 using detail::d1::parallel_reduce; 762 using detail::d1::parallel_deterministic_reduce; 763 // Split types 764 using detail::split; 765 using detail::proportional_split; 766 } // namespace v1 767 768 } // namespace tbb 769 #endif /* __TBB_parallel_reduce_H */ 770