1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef __TBB_parallel_reduce_H 18 #define __TBB_parallel_reduce_H 19 20 #include <new> 21 #include "detail/_namespace_injection.h" 22 #include "detail/_task.h" 23 #include "detail/_aligned_space.h" 24 #include "detail/_small_object_pool.h" 25 #include "detail/_range_common.h" 26 27 #include "task_group.h" // task_group_context 28 #include "partitioner.h" 29 #include "profiling.h" 30 31 namespace tbb { 32 namespace detail { 33 #if __TBB_CPP20_CONCEPTS_PRESENT 34 inline namespace d0 { 35 36 template <typename Body, typename Range> 37 concept parallel_reduce_body = splittable<Body> && 38 requires( Body& body, const Range& range, Body& rhs ) { 39 body(range); 40 body.join(rhs); 41 }; 42 43 template <typename Function, typename Range, typename Value> 44 concept parallel_reduce_function = std::invocable<const std::remove_reference_t<Function>&, 45 const Range&, const Value&> && 46 std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Function>&, 47 const Range&, const Value&>, 48 Value>; 49 50 template <typename Combine, typename Value> 51 concept parallel_reduce_combine = std::invocable<const std::remove_reference_t<Combine>&, 52 const Value&, const Value&> && 53 std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Combine>&, 54 const Value&, const Value&>, 55 Value>; 56 57 } // namespace d0 58 #endif // __TBB_CPP20_CONCEPTS_PRESENT 59 namespace d1 { 60 61 //! Tree node type for parallel_reduce. 62 /** @ingroup algorithms */ 63 //TODO: consider folding tree via bypass execution(instead of manual folding) 64 // for better cancellation and critical tasks handling (performance measurements required). 65 template<typename Body> 66 struct reduction_tree_node : public tree_node { 67 tbb::detail::aligned_space<Body> zombie_space; 68 Body& left_body; 69 bool has_right_zombie{false}; 70 71 reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 72 tree_node{parent, ref_count, alloc}, 73 left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ 74 {} 75 76 void join(task_group_context* context) { 77 if (has_right_zombie && !context->is_group_execution_cancelled()) 78 left_body.join(*zombie_space.begin()); 79 } 80 81 ~reduction_tree_node() { 82 if( has_right_zombie ) zombie_space.begin()->~Body(); 83 } 84 }; 85 86 //! Task type used to split the work of parallel_reduce. 87 /** @ingroup algorithms */ 88 template<typename Range, typename Body, typename Partitioner> 89 struct start_reduce : public task { 90 Range my_range; 91 Body* my_body; 92 node* my_parent; 93 94 typename Partitioner::task_partition_type my_partition; 95 small_object_allocator my_allocator; 96 bool is_right_child; 97 98 task* execute(execution_data&) override; 99 task* cancel(execution_data&) override; 100 void finalize(const execution_data&); 101 102 using tree_node_type = reduction_tree_node<Body>; 103 104 //! Constructor reduce root task. 105 start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : 106 my_range(range), 107 my_body(&body), 108 my_parent(nullptr), 109 my_partition(partitioner), 110 my_allocator(alloc), 111 is_right_child(false) {} 112 //! Splitting constructor used to generate children. 113 /** parent_ becomes left child. Newly constructed object is right child. */ 114 start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : 115 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 116 my_body(parent_.my_body), 117 my_parent(nullptr), 118 my_partition(parent_.my_partition, split_obj), 119 my_allocator(alloc), 120 is_right_child(true) 121 { 122 parent_.is_right_child = false; 123 } 124 //! Construct right child from the given range as response to the demand. 125 /** parent_ remains left child. Newly constructed object is right child. */ 126 start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : 127 my_range(r), 128 my_body(parent_.my_body), 129 my_parent(nullptr), 130 my_partition(parent_.my_partition, split()), 131 my_allocator(alloc), 132 is_right_child(true) 133 { 134 my_partition.align_depth( d ); 135 parent_.is_right_child = false; 136 } 137 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 138 if ( !range.empty() ) { 139 wait_node wn; 140 small_object_allocator alloc{}; 141 auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); 142 reduce_task->my_parent = &wn; 143 execute_and_wait(*reduce_task, context, wn.m_wait, context); 144 } 145 } 146 static void run(const Range& range, Body& body, Partitioner& partitioner) { 147 // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 148 // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. 149 task_group_context context(PARALLEL_REDUCE); 150 run(range, body, partitioner, context); 151 } 152 //! Run body for range, serves as callback for partitioner 153 void run_body( Range &r ) { 154 tbb::detail::invoke(*my_body, r); 155 } 156 157 //! spawn right task, serves as callback for partitioner 158 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 159 offer_work_impl(ed, *this, split_obj); 160 } 161 //! spawn right task, serves as callback for partitioner 162 void offer_work(const Range& r, depth_t d, execution_data& ed) { 163 offer_work_impl(ed, *this, r, d); 164 } 165 166 private: 167 template <typename... Args> 168 void offer_work_impl(execution_data& ed, Args&&... args) { 169 small_object_allocator alloc{}; 170 // New right child 171 auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); 172 173 // New root node as a continuation and ref count. Left and right child attach to the new parent. 174 right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); 175 176 // Spawn the right sibling 177 right_child->spawn_self(ed); 178 } 179 180 void spawn_self(execution_data& ed) { 181 my_partition.spawn_task(*this, *context(ed)); 182 } 183 }; 184 185 //! fold the tree and deallocate the task 186 template<typename Range, typename Body, typename Partitioner> 187 void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 188 // Get the current parent and wait object before an object destruction 189 node* parent = my_parent; 190 auto allocator = my_allocator; 191 // Task execution finished - destroy it 192 this->~start_reduce(); 193 // Unwind the tree decrementing the parent`s reference count 194 fold_tree<tree_node_type>(parent, ed); 195 allocator.deallocate(this, ed); 196 } 197 198 //! Execute parallel_reduce task 199 template<typename Range, typename Body, typename Partitioner> 200 task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 201 if (!is_same_affinity(ed)) { 202 my_partition.note_affinity(execution_slot(ed)); 203 } 204 my_partition.check_being_stolen(*this, ed); 205 206 // The acquire barrier synchronizes the data pointed with my_body if the left 207 // task has already finished. 208 __TBB_ASSERT(my_parent, nullptr); 209 if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { 210 tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); 211 my_body = static_cast<Body*>(new( parent_ptr->zombie_space.begin() ) Body(*my_body, split())); 212 parent_ptr->has_right_zombie = true; 213 } 214 __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); 215 216 my_partition.execute(*this, my_range, ed); 217 218 finalize(ed); 219 return nullptr; 220 } 221 222 //! Cancel parallel_reduce task 223 template<typename Range, typename Body, typename Partitioner> 224 task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 225 finalize(ed); 226 return nullptr; 227 } 228 229 //! Tree node type for parallel_deterministic_reduce. 230 /** @ingroup algorithms */ 231 template<typename Body> 232 struct deterministic_reduction_tree_node : public tree_node { 233 Body right_body; 234 Body& left_body; 235 236 deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : 237 tree_node{parent, ref_count, alloc}, 238 right_body{input_left_body, detail::split()}, 239 left_body(input_left_body) 240 {} 241 242 void join(task_group_context* context) { 243 if (!context->is_group_execution_cancelled()) 244 left_body.join(right_body); 245 } 246 }; 247 248 //! Task type used to split the work of parallel_deterministic_reduce. 249 /** @ingroup algorithms */ 250 template<typename Range, typename Body, typename Partitioner> 251 struct start_deterministic_reduce : public task { 252 Range my_range; 253 Body& my_body; 254 node* my_parent; 255 256 typename Partitioner::task_partition_type my_partition; 257 small_object_allocator my_allocator; 258 259 task* execute(execution_data&) override; 260 task* cancel(execution_data&) override; 261 void finalize(const execution_data&); 262 263 using tree_node_type = deterministic_reduction_tree_node<Body>; 264 265 //! Constructor deterministic_reduce root task. 266 start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : 267 my_range(range), 268 my_body(body), 269 my_parent(nullptr), 270 my_partition(partitioner), 271 my_allocator(alloc) {} 272 //! Splitting constructor used to generate children. 273 /** parent_ becomes left child. Newly constructed object is right child. */ 274 start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, 275 small_object_allocator& alloc ) : 276 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), 277 my_body(body), 278 my_parent(nullptr), 279 my_partition(parent_.my_partition, split_obj), 280 my_allocator(alloc) {} 281 static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { 282 if ( !range.empty() ) { 283 wait_node wn; 284 small_object_allocator alloc{}; 285 auto deterministic_reduce_task = 286 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); 287 deterministic_reduce_task->my_parent = &wn; 288 execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); 289 } 290 } 291 static void run(const Range& range, Body& body, Partitioner& partitioner) { 292 // Bound context prevents exceptions from body to affect nesting or sibling algorithms, 293 // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce 294 // in the try-block. 295 task_group_context context(PARALLEL_REDUCE); 296 run(range, body, partitioner, context); 297 } 298 //! Run body for range, serves as callback for partitioner 299 void run_body( Range &r ) { 300 tbb::detail::invoke(my_body, r); 301 } 302 //! Spawn right task, serves as callback for partitioner 303 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { 304 offer_work_impl(ed, *this, split_obj); 305 } 306 private: 307 template <typename... Args> 308 void offer_work_impl(execution_data& ed, Args&&... args) { 309 small_object_allocator alloc{}; 310 // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. 311 auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); 312 313 // New right child 314 auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); 315 316 right_child->my_parent = my_parent = new_tree_node; 317 318 // Spawn the right sibling 319 right_child->spawn_self(ed); 320 } 321 322 void spawn_self(execution_data& ed) { 323 my_partition.spawn_task(*this, *context(ed)); 324 } 325 }; 326 327 //! Fold the tree and deallocate the task 328 template<typename Range, typename Body, typename Partitioner> 329 void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { 330 // Get the current parent and wait object before an object destruction 331 node* parent = my_parent; 332 333 auto allocator = my_allocator; 334 // Task execution finished - destroy it 335 this->~start_deterministic_reduce(); 336 // Unwind the tree decrementing the parent`s reference count 337 fold_tree<tree_node_type>(parent, ed); 338 allocator.deallocate(this, ed); 339 } 340 341 //! Execute parallel_deterministic_reduce task 342 template<typename Range, typename Body, typename Partitioner> 343 task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { 344 if (!is_same_affinity(ed)) { 345 my_partition.note_affinity(execution_slot(ed)); 346 } 347 my_partition.check_being_stolen(*this, ed); 348 349 my_partition.execute(*this, my_range, ed); 350 351 finalize(ed); 352 return nullptr; 353 } 354 355 //! Cancel parallel_deterministic_reduce task 356 template<typename Range, typename Body, typename Partitioner> 357 task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { 358 finalize(ed); 359 return nullptr; 360 } 361 362 363 //! Auxiliary class for parallel_reduce; for internal use only. 364 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" 365 using given \ref parallel_reduce_lambda_req "anonymous function objects". 366 **/ 367 /** @ingroup algorithms */ 368 template<typename Range, typename Value, typename RealBody, typename Reduction> 369 class lambda_reduce_body { 370 //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced 371 // (might require some performance measurements) 372 373 const Value& my_identity_element; 374 const RealBody& my_real_body; 375 const Reduction& my_reduction; 376 Value my_value; 377 lambda_reduce_body& operator= ( const lambda_reduce_body& other ); 378 public: 379 lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) 380 : my_identity_element(identity) 381 , my_real_body(body) 382 , my_reduction(reduction) 383 , my_value(identity) 384 { } 385 lambda_reduce_body( const lambda_reduce_body& other ) = default; 386 lambda_reduce_body( lambda_reduce_body& other, tbb::split ) 387 : my_identity_element(other.my_identity_element) 388 , my_real_body(other.my_real_body) 389 , my_reduction(other.my_reduction) 390 , my_value(other.my_identity_element) 391 { } 392 void operator()(Range& range) { 393 my_value = tbb::detail::invoke(my_real_body, range, const_cast<const Value&>(my_value)); 394 } 395 void join( lambda_reduce_body& rhs ) { 396 my_value = tbb::detail::invoke(my_reduction, const_cast<const Value&>(my_value), 397 const_cast<const Value&>(rhs.my_value)); 398 } 399 Value result() const { 400 return my_value; 401 } 402 }; 403 404 405 // Requirements on Range concept are documented in blocked_range.h 406 407 /** \page parallel_reduce_body_req Requirements on parallel_reduce body 408 Class \c Body implementing the concept of parallel_reduce body must define: 409 - \code Body::Body( Body&, split ); \endcode Splitting constructor. 410 Must be able to run concurrently with operator() and method \c join 411 - \code Body::~Body(); \endcode Destructor 412 - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r 413 and accumulating the result 414 - \code void Body::join( Body& b ); \endcode Join results. 415 The result in \c b should be merged into the result of \c this 416 **/ 417 418 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) 419 TO BE DOCUMENTED 420 **/ 421 422 /** \name parallel_reduce 423 See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ 424 //@{ 425 426 //! Parallel iteration with reduction and default partitioner. 427 /** @ingroup algorithms **/ 428 template<typename Range, typename Body> 429 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 430 void parallel_reduce( const Range& range, Body& body ) { 431 start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); 432 } 433 434 //! Parallel iteration with reduction and simple_partitioner 435 /** @ingroup algorithms **/ 436 template<typename Range, typename Body> 437 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 438 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 439 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); 440 } 441 442 //! Parallel iteration with reduction and auto_partitioner 443 /** @ingroup algorithms **/ 444 template<typename Range, typename Body> 445 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 446 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { 447 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); 448 } 449 450 //! Parallel iteration with reduction and static_partitioner 451 /** @ingroup algorithms **/ 452 template<typename Range, typename Body> 453 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 454 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 455 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); 456 } 457 458 //! Parallel iteration with reduction and affinity_partitioner 459 /** @ingroup algorithms **/ 460 template<typename Range, typename Body> 461 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 462 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { 463 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); 464 } 465 466 //! Parallel iteration with reduction, default partitioner and user-supplied context. 467 /** @ingroup algorithms **/ 468 template<typename Range, typename Body> 469 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 470 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { 471 start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 472 } 473 474 //! Parallel iteration with reduction, simple partitioner and user-supplied context. 475 /** @ingroup algorithms **/ 476 template<typename Range, typename Body> 477 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 478 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 479 start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); 480 } 481 482 //! Parallel iteration with reduction, auto_partitioner and user-supplied context 483 /** @ingroup algorithms **/ 484 template<typename Range, typename Body> 485 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 486 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { 487 start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); 488 } 489 490 //! Parallel iteration with reduction, static_partitioner and user-supplied context 491 /** @ingroup algorithms **/ 492 template<typename Range, typename Body> 493 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 494 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 495 start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); 496 } 497 498 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context 499 /** @ingroup algorithms **/ 500 template<typename Range, typename Body> 501 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 502 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { 503 start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); 504 } 505 /** parallel_reduce overloads that work with anonymous function objects 506 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 507 508 //! Parallel iteration with reduction and default partitioner. 509 /** @ingroup algorithms **/ 510 template<typename Range, typename Value, typename RealBody, typename Reduction> 511 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 512 parallel_reduce_combine<Reduction, Value>) 513 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 514 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 515 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 516 ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); 517 return body.result(); 518 } 519 520 //! Parallel iteration with reduction and simple_partitioner. 521 /** @ingroup algorithms **/ 522 template<typename Range, typename Value, typename RealBody, typename Reduction> 523 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 524 parallel_reduce_combine<Reduction, Value>) 525 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 526 const simple_partitioner& partitioner ) { 527 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 528 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 529 ::run(range, body, partitioner ); 530 return body.result(); 531 } 532 533 //! Parallel iteration with reduction and auto_partitioner 534 /** @ingroup algorithms **/ 535 template<typename Range, typename Value, typename RealBody, typename Reduction> 536 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 537 parallel_reduce_combine<Reduction, Value>) 538 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 539 const auto_partitioner& partitioner ) { 540 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 541 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 542 ::run( range, body, partitioner ); 543 return body.result(); 544 } 545 546 //! Parallel iteration with reduction and static_partitioner 547 /** @ingroup algorithms **/ 548 template<typename Range, typename Value, typename RealBody, typename Reduction> 549 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 550 parallel_reduce_combine<Reduction, Value>) 551 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 552 const static_partitioner& partitioner ) { 553 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 554 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 555 ::run( range, body, partitioner ); 556 return body.result(); 557 } 558 559 //! Parallel iteration with reduction and affinity_partitioner 560 /** @ingroup algorithms **/ 561 template<typename Range, typename Value, typename RealBody, typename Reduction> 562 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 563 parallel_reduce_combine<Reduction, Value>) 564 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 565 affinity_partitioner& partitioner ) { 566 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 567 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 568 ::run( range, body, partitioner ); 569 return body.result(); 570 } 571 572 //! Parallel iteration with reduction, default partitioner and user-supplied context. 573 /** @ingroup algorithms **/ 574 template<typename Range, typename Value, typename RealBody, typename Reduction> 575 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 576 parallel_reduce_combine<Reduction, Value>) 577 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 578 task_group_context& context ) { 579 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 580 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> 581 ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); 582 return body.result(); 583 } 584 585 //! Parallel iteration with reduction, simple partitioner and user-supplied context. 586 /** @ingroup algorithms **/ 587 template<typename Range, typename Value, typename RealBody, typename Reduction> 588 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 589 parallel_reduce_combine<Reduction, Value>) 590 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 591 const simple_partitioner& partitioner, task_group_context& context ) { 592 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 593 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> 594 ::run( range, body, partitioner, context ); 595 return body.result(); 596 } 597 598 //! Parallel iteration with reduction, auto_partitioner and user-supplied context 599 /** @ingroup algorithms **/ 600 template<typename Range, typename Value, typename RealBody, typename Reduction> 601 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 602 parallel_reduce_combine<Reduction, Value>) 603 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 604 const auto_partitioner& partitioner, task_group_context& context ) { 605 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 606 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> 607 ::run( range, body, partitioner, context ); 608 return body.result(); 609 } 610 611 //! Parallel iteration with reduction, static_partitioner and user-supplied context 612 /** @ingroup algorithms **/ 613 template<typename Range, typename Value, typename RealBody, typename Reduction> 614 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 615 parallel_reduce_combine<Reduction, Value>) 616 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 617 const static_partitioner& partitioner, task_group_context& context ) { 618 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 619 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> 620 ::run( range, body, partitioner, context ); 621 return body.result(); 622 } 623 624 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context 625 /** @ingroup algorithms **/ 626 template<typename Range, typename Value, typename RealBody, typename Reduction> 627 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 628 parallel_reduce_combine<Reduction, Value>) 629 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 630 affinity_partitioner& partitioner, task_group_context& context ) { 631 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 632 start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> 633 ::run( range, body, partitioner, context ); 634 return body.result(); 635 } 636 637 //! Parallel iteration with deterministic reduction and default simple partitioner. 638 /** @ingroup algorithms **/ 639 template<typename Range, typename Body> 640 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 641 void parallel_deterministic_reduce( const Range& range, Body& body ) { 642 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); 643 } 644 645 //! Parallel iteration with deterministic reduction and simple partitioner. 646 /** @ingroup algorithms **/ 647 template<typename Range, typename Body> 648 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 649 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { 650 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); 651 } 652 653 //! Parallel iteration with deterministic reduction and static partitioner. 654 /** @ingroup algorithms **/ 655 template<typename Range, typename Body> 656 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 657 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { 658 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); 659 } 660 661 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 662 /** @ingroup algorithms **/ 663 template<typename Range, typename Body> 664 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 665 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { 666 start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); 667 } 668 669 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 670 /** @ingroup algorithms **/ 671 template<typename Range, typename Body> 672 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 673 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { 674 start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); 675 } 676 677 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 678 /** @ingroup algorithms **/ 679 template<typename Range, typename Body> 680 __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>) 681 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { 682 start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); 683 } 684 685 /** parallel_reduce overloads that work with anonymous function objects 686 (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ 687 688 //! Parallel iteration with deterministic reduction and default simple partitioner. 689 // TODO: consider making static_partitioner the default 690 /** @ingroup algorithms **/ 691 template<typename Range, typename Value, typename RealBody, typename Reduction> 692 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 693 parallel_reduce_combine<Reduction, Value>) 694 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { 695 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); 696 } 697 698 //! Parallel iteration with deterministic reduction and simple partitioner. 699 /** @ingroup algorithms **/ 700 template<typename Range, typename Value, typename RealBody, typename Reduction> 701 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 702 parallel_reduce_combine<Reduction, Value>) 703 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { 704 lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); 705 start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> 706 ::run(range, body, partitioner); 707 return body.result(); 708 } 709 710 //! Parallel iteration with deterministic reduction and static partitioner. 711 /** @ingroup algorithms **/ 712 template<typename Range, typename Value, typename RealBody, typename Reduction> 713 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 714 parallel_reduce_combine<Reduction, Value>) 715 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { 716 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 717 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 718 ::run(range, body, partitioner); 719 return body.result(); 720 } 721 722 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. 723 /** @ingroup algorithms **/ 724 template<typename Range, typename Value, typename RealBody, typename Reduction> 725 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 726 parallel_reduce_combine<Reduction, Value>) 727 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 728 task_group_context& context ) { 729 return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); 730 } 731 732 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. 733 /** @ingroup algorithms **/ 734 template<typename Range, typename Value, typename RealBody, typename Reduction> 735 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 736 parallel_reduce_combine<Reduction, Value>) 737 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 738 const simple_partitioner& partitioner, task_group_context& context ) { 739 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 740 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> 741 ::run(range, body, partitioner, context); 742 return body.result(); 743 } 744 745 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. 746 /** @ingroup algorithms **/ 747 template<typename Range, typename Value, typename RealBody, typename Reduction> 748 __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> && 749 parallel_reduce_combine<Reduction, Value>) 750 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, 751 const static_partitioner& partitioner, task_group_context& context ) { 752 lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); 753 start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> 754 ::run(range, body, partitioner, context); 755 return body.result(); 756 } 757 //@} 758 759 } // namespace d1 760 } // namespace detail 761 762 inline namespace v1 { 763 using detail::d1::parallel_reduce; 764 using detail::d1::parallel_deterministic_reduce; 765 // Split types 766 using detail::split; 767 using detail::proportional_split; 768 } // namespace v1 769 770 } // namespace tbb 771 #endif /* __TBB_parallel_reduce_H */ 772