1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_parallel_reduce_H 18 #define __TBB_parallel_reduce_H 19 20 #include <new> 21 #include "detail/_namespace_injection.h" 22 #include "detail/_task.h" 23 #include "detail/_aligned_space.h" 24 #include "detail/_small_object_pool.h" 25 #include "detail/_range_common.h" 26 27 #include "task_group.h" // task_group_context 28 #include "partitioner.h" 29 #include "profiling.h" 30 31 namespace tbb { 32 namespace detail { 33 #if __TBB_CPP20_CONCEPTS_PRESENT 34 inline namespace d0 { 35 36 template <typename Body, typename Range> 37 concept parallel_reduce_body = splittable<Body> && 38 requires( Body& body, const Range& range, Body& rhs ) { 39 body(range); 40 body.join(rhs); 41 }; 42 43 template <typename Function, typename Range, typename Value> 44 concept parallel_reduce_function = requires( const std::remove_reference_t<Function>& func, 45 const Range& range, 46 const Value& value ) { 47 { func(range, value) } -> std::convertible_to<Value>; 48 }; 49 50 template <typename Combine, typename Value> 51 concept parallel_reduce_combine = requires( const std::remove_reference_t<Combine>& combine, 52 const Value& lhs, const Value& rhs ) { 53 { combine(lhs, rhs) } -> std::convertible_to<Value>; 54 }; 55 56 } // namespace d0 57 #endif // __TBB_CPP20_CONCEPTS_PRESENT 58 namespace d1 { 59 60 //! Tree node type for parallel_reduce. 61 /** @ingroup algorithms */ 62 //TODO: consider folding tree via bypass execution(instead of manual folding) 63 // for better cancellation and critical tasks handling (performance measurements required). 64 template<typename Body> 65 struct reduction_tree_node : public tree_node { 66 tbb::detail::aligned_space<Body> zombie_space; 67 Body& left_body; 68 bool has_right_zombie{false}; 69 70 reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 71 tree_node{parent, ref_count, alloc}, 72 left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ 73 {} 74 75 void join(task_group_context* context) { 76 if (has_right_zombie && !context->is_group_execution_cancelled()) 77 left_body.join(*zombie_space.begin()); 78 } 79 80 ~reduction_tree_node() { 81 if( has_right_zombie ) zombie_space.begin()->~Body(); 82 } 83 }; 84 85 //! Task type used to split the work of parallel_reduce. 86 /** @ingroup algorithms */ 87 template<typename Range, typename Body, typename Partitioner> 88 struct start_reduce : public task { 89 Range my_range; 90 Body* my_body; 91 node* my_parent; 92 93 typename Partitioner::task_partition_type my_partition; 94 small_object_allocator my_allocator; 95 bool is_right_child; 96 97 task* execute(execution_data&) override; 98 task* cancel(execution_data&) override; 99 void finalize(const execution_data&); 100 101 using tree_node_type = reduction_tree_node<Body>; 102 103 //! Constructor reduce root task. 104 start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : 105 my_range(range), 106 my_body(&body), 107 my_partition(partitioner), 108 my_allocator(alloc), 109 is_right_child(false) {} 110 //! Splitting constructor used to generate children. 111 /** parent_ becomes left child. Newly constructed object is right child. */ 112 start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : 113 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 114 my_body(parent_.my_body), 115 my_partition(parent_.my_partition, split_obj), 116 my_allocator(alloc), 117 is_right_child(true) 118 { 119 parent_.is_right_child = false; 120 } 121 //! Construct right child from the given range as response to the demand. 122 /** parent_ remains left child. Newly constructed object is right child. */ 123 start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : 124 my_range(r), 125 my_body(parent_.my_body), 126 my_partition(parent_.my_partition, split()), 127 my_allocator(alloc), 128 is_right_child(true) 129 { 130 my_partition.align_depth( d ); 131 parent_.is_right_child = false; 132 } 133 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 134 if ( !range.empty() ) { 135 wait_node wn; 136 small_object_allocator alloc{}; 137 auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); 138 reduce_task->my_parent = &wn; 139 execute_and_wait(*reduce_task, context, wn.m_wait, context); 140 } 141 } 142 static void run(const Range& range, Body& body, Partitioner& partitioner) { 143 // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 144 // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. 145 task_group_context context(PARALLEL_REDUCE); 146 run(range, body, partitioner, context); 147 } 148 //! Run body for range, serves as callback for partitioner 149 void run_body( Range &r ) { 150 (*my_body)(r); 151 } 152 153 //! spawn right task, serves as callback for partitioner 154 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 155 offer_work_impl(ed, *this, split_obj); 156 } 157 //! spawn right task, serves as callback for partitioner 158 void offer_work(const Range& r, depth_t d, execution_data& ed) { 159 offer_work_impl(ed, *this, r, d); 160 } 161 162 private: 163 template <typename... Args> 164 void offer_work_impl(execution_data& ed, Args&&... args) { 165 small_object_allocator alloc{}; 166 // New right child 167 auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); 168 169 // New root node as a continuation and ref count. Left and right child attach to the new parent. 170 right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); 171 172 // Spawn the right sibling 173 right_child->spawn_self(ed); 174 } 175 176 void spawn_self(execution_data& ed) { 177 my_partition.spawn_task(*this, *context(ed)); 178 } 179 }; 180 181 //! fold the tree and deallocate the task 182 template<typename Range, typename Body, typename Partitioner> 183 void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 184 // Get the current parent and wait object before an object destruction 185 node* parent = my_parent; 186 auto allocator = my_allocator; 187 // Task execution finished - destroy it 188 this->~start_reduce(); 189 // Unwind the tree decrementing the parent`s reference count 190 fold_tree<tree_node_type>(parent, ed); 191 allocator.deallocate(this, ed); 192 } 193 194 //! Execute parallel_reduce task 195 template<typename Range, typename Body, typename Partitioner> 196 task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 197 if (!is_same_affinity(ed)) { 198 my_partition.note_affinity(execution_slot(ed)); 199 } 200 my_partition.check_being_stolen(*this, ed); 201 202 // The acquire barrier synchronizes the data pointed with my_body if the left 203 // task has already finished. 204 if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { 205 tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); 206 my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); 207 parent_ptr->has_right_zombie = true; 208 } 209 __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); 210 211 my_partition.execute(*this, my_range, ed); 212 213 finalize(ed); 214 return nullptr; 215 } 216 217 //! Cancel parallel_reduce task 218 template<typename Range, typename Body, typename Partitioner> 219 task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 220 finalize(ed); 221 return nullptr; 222 } 223 224 //! Tree node type for parallel_deterministic_reduce. 225 /** @ingroup algorithms */ 226 template<typename Body> 227 struct deterministic_reduction_tree_node : public tree_node { 228 Body right_body; 229 Body& left_body; 230 231 deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 232 tree_node{parent, ref_count, alloc}, 233 right_body{input_left_body, detail::split()}, 234 left_body(input_left_body) 235 {} 236 237 void join(task_group_context* context) { 238 if (!context->is_group_execution_cancelled()) 239 left_body.join(right_body); 240 } 241 }; 242 243 //! Task type used to split the work of parallel_deterministic_reduce. 244 /** @ingroup algorithms */ 245 template<typename Range, typename Body, typename Partitioner> 246 struct start_deterministic_reduce : public task { 247 Range my_range; 248 Body& my_body; 249 node* my_parent; 250 251 typename Partitioner::task_partition_type my_partition; 252 small_object_allocator my_allocator; 253 254 task* execute(execution_data&) override; 255 task* cancel(execution_data&) override; 256 void finalize(const execution_data&); 257 258 using tree_node_type = deterministic_reduction_tree_node<Body>; 259 260 //! Constructor deterministic_reduce root task. 261 start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : 262 my_range(range), 263 my_body(body), 264 my_partition(partitioner), 265 my_allocator(alloc) {} 266 //! Splitting constructor used to generate children. 267 /** parent_ becomes left child. Newly constructed object is right child. */ 268 start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, 269 small_object_allocator& alloc ) : 270 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 271 my_body(body), 272 my_partition(parent_.my_partition, split_obj), 273 my_allocator(alloc) {} 274 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 275 if ( !range.empty() ) { 276 wait_node wn; 277 small_object_allocator alloc{}; 278 auto deterministic_reduce_task = 279 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); 280 deterministic_reduce_task->my_parent = &wn; 281 execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); 282 } 283 } 284 static void run(const Range& range, Body& body, Partitioner& partitioner) { 285 // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 286 // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce 287 // in the try-block. 288 task_group_context context(PARALLEL_REDUCE); 289 run(range, body, partitioner, context); 290 } 291 //! Run body for range, serves as callback for partitioner 292 void run_body( Range &r ) { 293 my_body( r ); 294 } 295 //! Spawn right task, serves as callback for partitioner 296 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 297 offer_work_impl(ed, *this, split_obj); 298 } 299 private: 300 template <typename... Args> 301 void offer_work_impl(execution_data& ed, Args&&... args) { 302 small_object_allocator alloc{}; 303 // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. 304 auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); 305 306 // New right child 307 auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); 308 309 right_child->my_parent = my_parent = new_tree_node; 310 311 // Spawn the right sibling 312 right_child->spawn_self(ed); 313 } 314 315 void spawn_self(execution_data& ed) { 316 my_partition.spawn_task(*this, *context(ed)); 317 } 318 }; 319 320 //! Fold the tree and deallocate the task 321 template<typename Range, typename Body, typename Partitioner> 322 void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 323 // Get the current parent and wait object before an object destruction 324 node* parent = my_parent; 325 326 auto allocator = my_allocator; 327 // Task execution finished - destroy it 328 this->~start_deterministic_reduce(); 329 // Unwind the tree decrementing the parent`s reference count 330 fold_tree<tree_node_type>(parent, ed); 331 allocator.deallocate(this, ed); 332 } 333 334 //! Execute parallel_deterministic_reduce task 335 template<typename Range, typename Body, typename Partitioner> 336 task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 337 if (!is_same_affinity(ed)) { 338 my_partition.note_affinity(execution_slot(ed)); 339 } 340 my_partition.check_being_stolen(*this, ed); 341 342 my_partition.execute(*this, my_range, ed); 343 344 finalize(ed); 345 return NULL; 346 } 347 348 //! Cancel parallel_deterministic_reduce task 349 template<typename Range, typename Body, typename Partitioner> 350 task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 351 finalize(ed); 352 return NULL; 353 } 354 355 356 //! Auxiliary class for parallel_reduce; for internal use only. 357 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" 358 using given \ref parallel_reduce_lambda_req "anonymous function objects". 359 **/ 360 /** @ingroup algorithms */ 361 template<typename Range, typename Value, typename RealBody, typename Reduction> 362 class lambda_reduce_body { 363 //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced 364 // (might require some performance measurements) 365 366 const Value& my_identity_element; 367 const RealBody& my_real_body; 368 const Reduction& my_reduction; 369 Value my_value; 370 lambda_reduce_body& operator= ( const lambda_reduce_body& other ); 371 public: 372 lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) 373 : my_identity_element(identity) 374 , my_real_body(body) 375 , my_reduction(reduction) 376 , my_value(identity) 377 { } 378 lambda_reduce_body( const lambda_reduce_body& other ) = default; 379 lambda_reduce_body( lambda_reduce_body& other, tbb::split ) 380 : my_identity_element(other.my_identity_element) 381 , my_real_body(other.my_real_body) 382 , my_reduction(other.my_reduction) 383 , my_value(other.my_identity_element) 384 { } 385 void operator()(Range& range) { 386 my_value = my_real_body(range, const_cast<const Value&>(my_value)); 387 } 388 void join( lambda_reduce_body& rhs ) { 389 my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); 390 } 391 Value result() const { 392 return my_value; 393 } 394 }; 395 396 397 // Requirements on Range concept are documented in blocked_range.h 398 399 /** \page parallel_reduce_body_req Requirements on parallel_reduce body 400 Class \c Body implementing the concept of parallel_reduce body must define: 401 - \code Body::Body( Body&, split ); \endcode Splitting constructor. 402 Must be able to run concurrently with operator() and method \c join 403 - \code Body::~Body(); \endcode Destructor 404 - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r 405 and accumulating the result 406 - \code void Body::join( Body& b ); \endcode Join results. 407 The result in \c b should be merged into the result of \c this 408 **/ 409 410 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) 411 TO BE DOCUMENTED 412 **/ 413 414 /** \name parallel_reduce 415 See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ 416 //@{ 417 418 //! Parallel iteration with reduction and default partitioner. 419 /** @ingroup algorithms **/ 420 template<typename Range, typename Body> 421 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 422 void parallel_reduce( const Range& range, Body& body ) { 423 start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); 424 } 425 426 //! Parallel iteration with reduction and simple_partitioner 427 /** @ingroup algorithms **/ 428 template<typename Range, typename Body> 429 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 430 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 431 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); 432 } 433 434 //! Parallel iteration with reduction and auto_partitioner 435 /** @ingroup algorithms **/ 436 template<typename Range, typename Body> 437 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 438 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { 439 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); 440 } 441 442 //! Parallel iteration with reduction and static_partitioner 443 /** @ingroup algorithms **/ 444 template<typename Range, typename Body> 445 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 446 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 447 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); 448 } 449 450 //! Parallel iteration with reduction and affinity_partitioner 451 /** @ingroup algorithms **/ 452 template<typename Range, typename Body> 453 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 454 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { 455 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); 456 } 457 458 //! Parallel iteration with reduction, default partitioner and user-supplied context. 459 /** @ingroup algorithms **/ 460 template<typename Range, typename Body> 461 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 462 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { 463 start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 464 } 465 466 //! Parallel iteration with reduction, simple partitioner and user-supplied context. 467 /** @ingroup algorithms **/ 468 template<typename Range, typename Body> 469 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 470 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 471 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); 472 } 473 474 //! Parallel iteration with reduction, auto_partitioner and user-supplied context 475 /** @ingroup algorithms **/ 476 template<typename Range, typename Body> 477 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 478 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { 479 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); 480 } 481 482 //! Parallel iteration with reduction, static_partitioner and user-supplied context 483 /** @ingroup algorithms **/ 484 template<typename Range, typename Body> 485 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 486 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 487 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); 488 } 489 490 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context 491 /** @ingroup algorithms **/ 492 template<typename Range, typename Body> 493 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 494 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { 495 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); 496 } 497 /** parallel_reduce overloads that work with anonymous function objects 498 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 499 500 //! Parallel iteration with reduction and default partitioner. 501 /** @ingroup algorithms **/ 502 template<typename Range, typename Value, typename RealBody, typename Reduction> 503 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 504 parallel_reduce_combine<Reduction, Value>) 505 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 506 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 507 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 508 ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); 509 return body.result(); 510 } 511 512 //! Parallel iteration with reduction and simple_partitioner. 513 /** @ingroup algorithms **/ 514 template<typename Range, typename Value, typename RealBody, typename Reduction> 515 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 516 parallel_reduce_combine<Reduction, Value>) 517 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 518 const simple_partitioner& partitioner ) { 519 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 520 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 521 ::run(range, body, partitioner ); 522 return body.result(); 523 } 524 525 //! Parallel iteration with reduction and auto_partitioner 526 /** @ingroup algorithms **/ 527 template<typename Range, typename Value, typename RealBody, typename Reduction> 528 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 529 parallel_reduce_combine<Reduction, Value>) 530 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 531 const auto_partitioner& partitioner ) { 532 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 533 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 534 ::run( range, body, partitioner ); 535 return body.result(); 536 } 537 538 //! Parallel iteration with reduction and static_partitioner 539 /** @ingroup algorithms **/ 540 template<typename Range, typename Value, typename RealBody, typename Reduction> 541 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 542 parallel_reduce_combine<Reduction, Value>) 543 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 544 const static_partitioner& partitioner ) { 545 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 546 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 547 ::run( range, body, partitioner ); 548 return body.result(); 549 } 550 551 //! Parallel iteration with reduction and affinity_partitioner 552 /** @ingroup algorithms **/ 553 template<typename Range, typename Value, typename RealBody, typename Reduction> 554 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 555 parallel_reduce_combine<Reduction, Value>) 556 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 557 affinity_partitioner& partitioner ) { 558 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 559 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 560 ::run( range, body, partitioner ); 561 return body.result(); 562 } 563 564 //! Parallel iteration with reduction, default partitioner and user-supplied context. 565 /** @ingroup algorithms **/ 566 template<typename Range, typename Value, typename RealBody, typename Reduction> 567 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 568 parallel_reduce_combine<Reduction, Value>) 569 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 570 task_group_context& context ) { 571 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 572 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 573 ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 574 return body.result(); 575 } 576 577 //! Parallel iteration with reduction, simple partitioner and user-supplied context. 578 /** @ingroup algorithms **/ 579 template<typename Range, typename Value, typename RealBody, typename Reduction> 580 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 581 parallel_reduce_combine<Reduction, Value>) 582 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 583 const simple_partitioner& partitioner, task_group_context& context ) { 584 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 585 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 586 ::run( range, body, partitioner, context ); 587 return body.result(); 588 } 589 590 //! Parallel iteration with reduction, auto_partitioner and user-supplied context 591 /** @ingroup algorithms **/ 592 template<typename Range, typename Value, typename RealBody, typename Reduction> 593 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 594 parallel_reduce_combine<Reduction, Value>) 595 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 596 const auto_partitioner& partitioner, task_group_context& context ) { 597 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 598 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 599 ::run( range, body, partitioner, context ); 600 return body.result(); 601 } 602 603 //! Parallel iteration with reduction, static_partitioner and user-supplied context 604 /** @ingroup algorithms **/ 605 template<typename Range, typename Value, typename RealBody, typename Reduction> 606 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 607 parallel_reduce_combine<Reduction, Value>) 608 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 609 const static_partitioner& partitioner, task_group_context& context ) { 610 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 611 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 612 ::run( range, body, partitioner, context ); 613 return body.result(); 614 } 615 616 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context 617 /** @ingroup algorithms **/ 618 template<typename Range, typename Value, typename RealBody, typename Reduction> 619 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 620 parallel_reduce_combine<Reduction, Value>) 621 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 622 affinity_partitioner& partitioner, task_group_context& context ) { 623 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 624 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 625 ::run( range, body, partitioner, context ); 626 return body.result(); 627 } 628 629 //! Parallel iteration with deterministic reduction and default simple partitioner. 630 /** @ingroup algorithms **/ 631 template<typename Range, typename Body> 632 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 633 void parallel_deterministic_reduce( const Range& range, Body& body ) { 634 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); 635 } 636 637 //! Parallel iteration with deterministic reduction and simple partitioner. 638 /** @ingroup algorithms **/ 639 template<typename Range, typename Body> 640 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 641 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 642 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); 643 } 644 645 //! Parallel iteration with deterministic reduction and static partitioner. 646 /** @ingroup algorithms **/ 647 template<typename Range, typename Body> 648 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 649 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 650 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); 651 } 652 653 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 654 /** @ingroup algorithms **/ 655 template<typename Range, typename Body> 656 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 657 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { 658 start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); 659 } 660 661 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 662 /** @ingroup algorithms **/ 663 template<typename Range, typename Body> 664 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 665 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 666 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); 667 } 668 669 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 670 /** @ingroup algorithms **/ 671 template<typename Range, typename Body> 672 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 673 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 674 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); 675 } 676 677 /** parallel_reduce overloads that work with anonymous function objects 678 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 679 680 //! Parallel iteration with deterministic reduction and default simple partitioner. 681 // TODO: consider making static_partitioner the default 682 /** @ingroup algorithms **/ 683 template<typename Range, typename Value, typename RealBody, typename Reduction> 684 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 685 parallel_reduce_combine<Reduction, Value>) 686 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 687 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); 688 } 689 690 //! Parallel iteration with deterministic reduction and simple partitioner. 691 /** @ingroup algorithms **/ 692 template<typename Range, typename Value, typename RealBody, typename Reduction> 693 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 694 parallel_reduce_combine<Reduction, Value>) 695 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { 696 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 697 start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> 698 ::run(range, body, partitioner); 699 return body.result(); 700 } 701 702 //! Parallel iteration with deterministic reduction and static partitioner. 703 /** @ingroup algorithms **/ 704 template<typename Range, typename Value, typename RealBody, typename Reduction> 705 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 706 parallel_reduce_combine<Reduction, Value>) 707 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { 708 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 709 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 710 ::run(range, body, partitioner); 711 return body.result(); 712 } 713 714 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 715 /** @ingroup algorithms **/ 716 template<typename Range, typename Value, typename RealBody, typename Reduction> 717 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 718 parallel_reduce_combine<Reduction, Value>) 719 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 720 task_group_context& context ) { 721 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); 722 } 723 724 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 725 /** @ingroup algorithms **/ 726 template<typename Range, typename Value, typename RealBody, typename Reduction> 727 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 728 parallel_reduce_combine<Reduction, Value>) 729 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 730 const simple_partitioner& partitioner, task_group_context& context ) { 731 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 732 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> 733 ::run(range, body, partitioner, context); 734 return body.result(); 735 } 736 737 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 738 /** @ingroup algorithms **/ 739 template<typename Range, typename Value, typename RealBody, typename Reduction> 740 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 741 parallel_reduce_combine<Reduction, Value>) 742 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 743 const static_partitioner& partitioner, task_group_context& context ) { 744 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 745 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 746 ::run(range, body, partitioner, context); 747 return body.result(); 748 } 749 //@} 750 751 } // namespace d1 752 } // namespace detail 753 754 inline namespace v1 { 755 using detail::d1::parallel_reduce; 756 using detail::d1::parallel_deterministic_reduce; 757 // Split types 758 using detail::split; 759 using detail::proportional_split; 760 } // namespace v1 761 762 } // namespace tbb 763 #endif /* __TBB_parallel_reduce_H */ 764