1 /*
2     Copyright (c) 2005-2023 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_for_each_H
18 #define __TBB_parallel_for_each_H
19 
20 #include "detail/_config.h"
21 #include "detail/_namespace_injection.h"
22 #include "detail/_exception.h"
23 #include "detail/_task.h"
24 #include "detail/_aligned_space.h"
25 #include "detail/_small_object_pool.h"
26 #include "detail/_utils.h"
27 
28 #include "parallel_for.h"
29 #include "task_group.h" // task_group_context
30 
31 #include <iterator>
32 #include <type_traits>
33 
34 namespace tbb {
35 namespace detail {
36 #if __TBB_CPP20_CONCEPTS_PRESENT
37 namespace d1 {
38 template <typename Item>
39 class feeder;
40 
41 } // namespace d1
42 inline namespace d0 {
43 
44 template <typename Body, typename ItemType, typename FeederItemType>
45 concept parallel_for_each_body = std::invocable<const std::remove_reference_t<Body>&, ItemType&&> ||
46                                  std::invocable<const std::remove_reference_t<Body>&, ItemType&&, tbb::detail::d1::feeder<FeederItemType>&>;
47 
48 } // namespace d0
49 #endif // __TBB_CPP20_CONCEPTS_PRESENT
50 namespace d2 {
51 template<typename Body, typename Item> class feeder_impl;
52 } // namespace d2
53 
54 namespace d1 {
55 //! Class the user supplied algorithm body uses to add new tasks
56 template<typename Item>
57 class feeder {
feeder()58     feeder() {}
59     feeder(const feeder&) = delete;
60     void operator=( const feeder&) = delete;
61 
~feeder()62     virtual ~feeder () {}
63     virtual void internal_add_copy(const Item& item) = 0;
64     virtual void internal_add_move(Item&& item) = 0;
65 
66     template<typename Body_, typename Item_> friend class d2::feeder_impl;
67 public:
68     //! Add a work item to a running parallel_for_each.
add(const Item & item)69     void add(const Item& item) {internal_add_copy(item);}
add(Item && item)70     void add(Item&& item) {internal_add_move(std::move(item));}
71 };
72 
73 } // namespace d1
74 
75 namespace d2 {
76 using namespace tbb::detail::d1;
77 /** Selects one of the two possible forms of function call member operator.
78     @ingroup algorithms **/
79 template<class Body>
80 struct parallel_for_each_operator_selector {
81 public:
82     template<typename ItemArg, typename FeederArg>
83     static auto call(const Body& body, ItemArg&& item, FeederArg*)
84     -> decltype(tbb::detail::invoke(body, std::forward<ItemArg>(item)), void()) {
85         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
86         // Suppression of Microsoft non-standard extension warnings
87         #pragma warning (push)
88         #pragma warning (disable: 4239)
89         #endif
90 
91         tbb::detail::invoke(body, std::forward<ItemArg>(item));
92 
93         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
94         #pragma warning (pop)
95         #endif
96     }
97 
98     template<typename ItemArg, typename FeederArg>
99     static auto call(const Body& body, ItemArg&& item, FeederArg* feeder)
100     -> decltype(tbb::detail::invoke(body, std::forward<ItemArg>(item), *feeder), void()) {
101         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
102         // Suppression of Microsoft non-standard extension warnings
103         #pragma warning (push)
104         #pragma warning (disable: 4239)
105         #endif
106         __TBB_ASSERT(feeder, "Feeder was not created but should be");
107 
108         tbb::detail::invoke(body, std::forward<ItemArg>(item), *feeder);
109 
110         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
111         #pragma warning (pop)
112         #endif
113     }
114 };
115 
116 template<typename Body, typename Item>
117 struct feeder_item_task: public task {
118     using feeder_type = feeder_impl<Body, Item>;
119 
120     template <typename ItemType>
feeder_item_taskfeeder_item_task121     feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) :
122         item(std::forward<ItemType>(input_item)),
123         my_feeder(feeder),
124         my_allocator(alloc)
125     {}
126 
finalizefeeder_item_task127     void finalize(const execution_data& ed) {
128         my_feeder.my_wait_context.release();
129         my_allocator.delete_object(this, ed);
130     }
131 
132     //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy
133     //! Executing body with moving the copy should have higher priority
134     using first_priority = int;
135     using second_priority = double;
136 
137     template <typename BodyType, typename ItemType, typename FeederType>
138     static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority)
139     -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void())
140     {
141         parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder);
142     }
143 
144     template <typename BodyType, typename ItemType, typename FeederType>
callfeeder_item_task145     static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) {
146         parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder);
147     }
148 
executefeeder_item_task149     task* execute(execution_data& ed) override {
150         call(my_feeder.my_body, item, my_feeder, first_priority{});
151         finalize(ed);
152         return nullptr;
153     }
154 
cancelfeeder_item_task155     task* cancel(execution_data& ed) override {
156         finalize(ed);
157         return nullptr;
158     }
159 
160     Item item;
161     feeder_type& my_feeder;
162     small_object_allocator my_allocator;
163 }; // class feeder_item_task
164 
165 /** Implements new task adding procedure.
166     @ingroup algorithms **/
167 template<typename Body, typename Item>
168 class feeder_impl : public feeder<Item> {
169     // Avoiding use of copy constructor in a virtual method if the type does not support it
internal_add_copy_impl(std::true_type,const Item & item)170     void internal_add_copy_impl(std::true_type, const Item& item) {
171         using feeder_task = feeder_item_task<Body, Item>;
172         small_object_allocator alloc;
173         auto task = alloc.new_object<feeder_task>(item, *this, alloc);
174 
175         my_wait_context.reserve();
176         spawn(*task, my_execution_context);
177     }
178 
internal_add_copy_impl(std::false_type,const Item &)179     void internal_add_copy_impl(std::false_type, const Item&) {
180         __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object");
181     }
182 
internal_add_copy(const Item & item)183     void internal_add_copy(const Item& item) override {
184         internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item);
185     }
186 
internal_add_move(Item && item)187     void internal_add_move(Item&& item) override {
188         using feeder_task = feeder_item_task<Body, Item>;
189         small_object_allocator alloc{};
190         auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc);
191 
192         my_wait_context.reserve();
193         spawn(*task, my_execution_context);
194     }
195 public:
feeder_impl(const Body & body,wait_context & w_context,task_group_context & context)196     feeder_impl(const Body& body, wait_context& w_context, task_group_context &context)
197       : my_body(body),
198         my_wait_context(w_context)
199       , my_execution_context(context)
200     {}
201 
202     const Body& my_body;
203     wait_context& my_wait_context;
204     task_group_context& my_execution_context;
205 }; // class feeder_impl
206 
207 /** Execute computation under one element of the range
208     @ingroup algorithms **/
209 template<typename Iterator, typename Body, typename Item>
210 struct for_each_iteration_task: public task {
211     using feeder_type = feeder_impl<Body, Item>;
212 
for_each_iteration_taskfor_each_iteration_task213     for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) :
214         item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context)
215     {}
216 
finalizefor_each_iteration_task217     void finalize() {
218         parent_wait_context.release();
219     }
220 
executefor_each_iteration_task221     task* execute(execution_data&) override {
222         parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr);
223         finalize();
224         return nullptr;
225     }
226 
cancelfor_each_iteration_task227     task* cancel(execution_data&) override {
228         finalize();
229         return nullptr;
230     }
231 
232     Iterator item_ptr;
233     const Body& my_body;
234     feeder_impl<Body, Item>* my_feeder_ptr;
235     wait_context& parent_wait_context;
236 }; // class for_each_iteration_task
237 
238 // Helper to get the type of the iterator to the internal sequence of copies
239 // If the element can be passed to the body as an rvalue - this iterator should be move_iterator
240 template <typename Body, typename Item, typename = void>
241 struct input_iteration_task_iterator_helper {
242     // For input iterators we pass const lvalue reference to the body
243     // It is prohibited to take non-constant lvalue references for input iterators
244     using type = const Item*;
245 };
246 
247 template <typename Body, typename Item>
248 struct input_iteration_task_iterator_helper<Body, Item,
249     tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(),
250                                                                                  std::declval<Item&&>(),
251                                                                                  std::declval<feeder_impl<Body, Item>*>()))>>
252 {
253     using type = std::move_iterator<Item*>;
254 };
255 
256 /** Split one block task to several(max_block_size) iteration tasks for input iterators
257     @ingroup algorithms **/
258 template <typename Body, typename Item>
259 struct input_block_handling_task : public task {
260     static constexpr size_t max_block_size = 4;
261 
262     using feeder_type = feeder_impl<Body, Item>;
263     using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type;
264     using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>;
265 
266     input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context,
267                               const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc)
268         :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context),
269          my_execution_context(e_context), my_allocator(alloc)
270     {
271         auto item_it = block_iteration_space.begin();
272         for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) {
273             new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context);
274         }
275     }
276 
277     void finalize(const execution_data& ed) {
278         my_root_wait_context.release();
279         my_allocator.delete_object(this, ed);
280     }
281 
282     task* execute(execution_data& ed) override {
283         __TBB_ASSERT( my_size > 0, "Negative size was passed to task");
284         for (std::size_t counter = 1; counter < my_size; ++counter) {
285             my_wait_context.reserve();
286             spawn(*(task_pool.begin() + counter), my_execution_context);
287         }
288         my_wait_context.reserve();
289         execute_and_wait(*task_pool.begin(), my_execution_context,
290                          my_wait_context,    my_execution_context);
291 
292         // deallocate current task after children execution
293         finalize(ed);
294         return nullptr;
295     }
296 
297     task* cancel(execution_data& ed) override {
298         finalize(ed);
299         return nullptr;
300     }
301 
302     ~input_block_handling_task() {
303         for(std::size_t counter = 0; counter < max_block_size; ++counter) {
304             (task_pool.begin() + counter)->~iteration_task();
305             if (counter < my_size) {
306                 (block_iteration_space.begin() + counter)->~Item();
307             }
308         }
309     }
310 
311     aligned_space<Item, max_block_size> block_iteration_space;
312     aligned_space<iteration_task, max_block_size> task_pool;
313     std::size_t my_size;
314     wait_context my_wait_context;
315     wait_context& my_root_wait_context;
316     task_group_context& my_execution_context;
317     small_object_allocator my_allocator;
318 }; // class input_block_handling_task
319 
320 /** Split one block task to several(max_block_size) iteration tasks for forward iterators
321     @ingroup algorithms **/
322 template <typename Iterator, typename Body, typename Item>
323 struct forward_block_handling_task : public task {
324     static constexpr size_t max_block_size = 4;
325 
326     using iteration_task = for_each_iteration_task<Iterator, Body, Item>;
327 
328     forward_block_handling_task(Iterator first, std::size_t size,
329                                 wait_context& w_context, task_group_context& e_context,
330                                 const Body& body, feeder_impl<Body, Item>* feeder_ptr,
331                                 small_object_allocator& alloc)
332         : my_size(size), my_wait_context(0), my_root_wait_context(w_context),
333           my_execution_context(e_context), my_allocator(alloc)
334     {
335         auto* task_it = task_pool.begin();
336         for (std::size_t i = 0; i < size; i++) {
337             new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context);
338             ++first;
339         }
340     }
341 
342     void finalize(const execution_data& ed) {
343         my_root_wait_context.release();
344         my_allocator.delete_object(this, ed);
345     }
346 
347     task* execute(execution_data& ed) override {
348         __TBB_ASSERT( my_size > 0, "Negative size was passed to task");
349         for(std::size_t counter = 1; counter < my_size; ++counter) {
350             my_wait_context.reserve();
351             spawn(*(task_pool.begin() + counter), my_execution_context);
352         }
353         my_wait_context.reserve();
354         execute_and_wait(*task_pool.begin(), my_execution_context,
355                          my_wait_context,    my_execution_context);
356 
357         // deallocate current task after children execution
358         finalize(ed);
359         return nullptr;
360     }
361 
362     task* cancel(execution_data& ed) override {
363         finalize(ed);
364         return nullptr;
365     }
366 
367     ~forward_block_handling_task() {
368         for(std::size_t counter = 0; counter < my_size; ++counter) {
369             (task_pool.begin() + counter)->~iteration_task();
370         }
371     }
372 
373     aligned_space<iteration_task, max_block_size> task_pool;
374     std::size_t my_size;
375     wait_context my_wait_context;
376     wait_context& my_root_wait_context;
377     task_group_context& my_execution_context;
378     small_object_allocator my_allocator;
379 }; // class forward_block_handling_task
380 
381 /** Body for parallel_for algorithm.
382   * Allows to redirect operations under random access iterators range to the parallel_for algorithm.
383     @ingroup algorithms **/
384 template <typename Iterator, typename Body, typename Item>
385 class parallel_for_body_wrapper {
386     Iterator my_first;
387     const Body& my_body;
388     feeder_impl<Body, Item>* my_feeder_ptr;
389 public:
390     parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr)
391         : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {}
392 
393     void operator()(tbb::blocked_range<std::size_t> range) const {
394 #if __INTEL_COMPILER
395 #pragma ivdep
396 #endif
397         for (std::size_t count = range.begin(); count != range.end(); count++) {
398             parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count),
399                                                             my_feeder_ptr);
400         }
401     }
402 }; // class parallel_for_body_wrapper
403 
404 
405 /** Helper for getting iterators tag including inherited custom tags
406     @ingroup algorithms */
407 template<typename It>
408 using tag = typename std::iterator_traits<It>::iterator_category;
409 
410 #if __TBB_CPP20_PRESENT
411 template <typename It>
412 struct move_iterator_dispatch_helper {
413     using type = It;
414 };
415 
416 // Until C++23, std::move_iterator::iterator_concept always defines
417 // to std::input_iterator_tag and hence std::forward_iterator concept
418 // always evaluates to false, so std::move_iterator dispatch should be
419 // made according to the base iterator type.
420 template <typename It>
421 struct move_iterator_dispatch_helper<std::move_iterator<It>> {
422     using type = It;
423 };
424 
425 template <typename It>
426 using iterator_tag_dispatch_impl =
427     std::conditional_t<std::random_access_iterator<It>,
428                        std::random_access_iterator_tag,
429                        std::conditional_t<std::forward_iterator<It>,
430                                           std::forward_iterator_tag,
431                                           std::input_iterator_tag>>;
432 
433 template <typename It>
434 using iterator_tag_dispatch =
435     iterator_tag_dispatch_impl<typename move_iterator_dispatch_helper<It>::type>;
436 
437 #else
438 template<typename It>
439 using iterator_tag_dispatch = typename
440     std::conditional<
441         std::is_base_of<std::random_access_iterator_tag, tag<It>>::value,
442         std::random_access_iterator_tag,
443         typename std::conditional<
444             std::is_base_of<std::forward_iterator_tag, tag<It>>::value,
445             std::forward_iterator_tag,
446             std::input_iterator_tag
447         >::type
448     >::type;
449 #endif // __TBB_CPP20_PRESENT
450 
451 template <typename Body, typename Iterator, typename Item>
452 using feeder_is_required = tbb::detail::void_t<decltype(tbb::detail::invoke(std::declval<const Body>(),
453                                                                             std::declval<typename std::iterator_traits<Iterator>::reference>(),
454                                                                             std::declval<feeder<Item>&>()))>;
455 
456 // Creates feeder object only if the body can accept it
457 template <typename Iterator, typename Body, typename Item, typename = void>
458 struct feeder_holder {
459     feeder_holder( wait_context&, task_group_context&, const Body& ) {}
460 
461     feeder_impl<Body, Item>* feeder_ptr() { return nullptr; }
462 }; // class feeder_holder
463 
464 template <typename Iterator, typename Body, typename Item>
465 class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> {
466 public:
467     feeder_holder( wait_context& w_context, task_group_context& context, const Body& body )
468         : my_feeder(body, w_context, context) {}
469 
470     feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; }
471 private:
472     feeder_impl<Body, Item> my_feeder;
473 }; // class feeder_holder
474 
475 template <typename Iterator, typename Body, typename Item>
476 class for_each_root_task_base : public task {
477 public:
478     for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context)
479         : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context),
480           my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body)
481     {
482         my_wait_context.reserve();
483     }
484 private:
485     task* cancel(execution_data&) override {
486         this->my_wait_context.release();
487         return nullptr;
488     }
489 protected:
490     Iterator my_first;
491     Iterator my_last;
492     wait_context& my_wait_context;
493     task_group_context& my_execution_context;
494     const Body& my_body;
495     feeder_holder<Iterator, Body, Item> my_feeder_holder;
496 }; // class for_each_root_task_base
497 
498 /** parallel_for_each algorithm root task - most generic version
499   * Splits input range to blocks
500     @ingroup algorithms **/
501 template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>>
502 class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item>
503 {
504     using base_type = for_each_root_task_base<Iterator, Body, Item>;
505 public:
506     using base_type::base_type;
507 private:
508     task* execute(execution_data& ed) override {
509         using block_handling_type = input_block_handling_task<Body, Item>;
510 
511         if (this->my_first == this->my_last) {
512             this->my_wait_context.release();
513             return nullptr;
514         }
515 
516         this->my_wait_context.reserve();
517         small_object_allocator alloc{};
518         auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context,
519                                                                          this->my_body, this->my_feeder_holder.feeder_ptr(),
520                                                                          alloc);
521 
522         auto* block_iterator = block_handling_task->block_iteration_space.begin();
523         for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) {
524             // Move semantics are automatically used when supported by the iterator
525             new (block_iterator++) Item(*this->my_first);
526             ++block_handling_task->my_size;
527         }
528 
529         // Do not access this after spawn to avoid races
530         spawn(*this, this->my_execution_context);
531         return block_handling_task;
532     }
533 }; // class for_each_root_task - most generic implementation
534 
535 /** parallel_for_each algorithm root task - forward iterator based specialization
536   * Splits input range to blocks
537     @ingroup algorithms **/
538 template <typename Iterator, typename Body, typename Item>
539 class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag>
540     : public for_each_root_task_base<Iterator, Body, Item>
541 {
542     using base_type = for_each_root_task_base<Iterator, Body, Item>;
543 public:
544     using base_type::base_type;
545 private:
546     task* execute(execution_data& ed) override {
547         using block_handling_type = forward_block_handling_task<Iterator, Body, Item>;
548         if (this->my_first == this->my_last) {
549             this->my_wait_context.release();
550             return nullptr;
551         }
552 
553         std::size_t block_size{0};
554         Iterator first_block_element = this->my_first;
555         for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) {
556             ++block_size;
557         }
558 
559         this->my_wait_context.reserve();
560         small_object_allocator alloc{};
561         auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size,
562                                                                          this->my_wait_context, this->my_execution_context,
563                                                                          this->my_body, this->my_feeder_holder.feeder_ptr(), alloc);
564 
565         // Do not access this after spawn to avoid races
566         spawn(*this, this->my_execution_context);
567         return block_handling_task;
568     }
569 }; // class for_each_root_task - forward iterator based specialization
570 
571 /** parallel_for_each algorithm root task - random access iterator based specialization
572   * Splits input range to blocks
573     @ingroup algorithms **/
574 template <typename Iterator, typename Body, typename Item>
575 class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag>
576     : public for_each_root_task_base<Iterator, Body, Item>
577 {
578     using base_type = for_each_root_task_base<Iterator, Body, Item>;
579 public:
580     using base_type::base_type;
581 private:
582     task* execute(execution_data&) override {
583         tbb::parallel_for(
584             tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)),
585             parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr())
586             , this->my_execution_context
587         );
588 
589         this->my_wait_context.release();
590         return nullptr;
591     }
592 }; // class for_each_root_task - random access iterator based specialization
593 
594 /** Helper for getting item type. If item type can be deduced from feeder - got it from feeder,
595     if feeder is generic - got item type from range.
596     @ingroup algorithms */
597 template<typename Body, typename Item, typename FeederArg>
598 auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg;
599 
600 template<typename Body, typename>
601 decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>)
602 template<typename Body, typename Item> Item get_item_type_impl(...); // stub
603 
604 template <typename Body, typename Item>
605 using get_item_type = decltype(get_item_type_impl<Body, Item>(0));
606 
607 #if __TBB_CPP20_CONCEPTS_PRESENT
608 template <typename Body, typename ItemType>
609 using feeder_item_type = std::remove_cvref_t<get_item_type<Body, ItemType>>;
610 
611 template <typename Body, typename Iterator>
612 concept parallel_for_each_iterator_body =
613     parallel_for_each_body<Body, iterator_reference_type<Iterator>, feeder_item_type<Body, iterator_reference_type<Iterator>>>;
614 
615 template <typename Body, typename Range>
616 concept parallel_for_each_range_body =
617     parallel_for_each_body<Body, range_reference_type<Range>, feeder_item_type<Body, range_reference_type<Range>>>;
618 #endif
619 
620 /** Implements parallel iteration over a range.
621     @ingroup algorithms */
622 template<typename Iterator, typename Body>
623 void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context)
624 {
625     if (!(first == last)) {
626         using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>;
627         wait_context w_context(0);
628 
629         for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context);
630 
631         execute_and_wait(root_task, context, w_context, context);
632     }
633 }
634 
635 /** \page parallel_for_each_body_req Requirements on parallel_for_each body
636     Class \c Body implementing the concept of parallel_for_each body must define:
637     - \code
638         B::operator()(
639                 cv_item_type item,
640                 feeder<item_type>& feeder
641         ) const
642 
643         OR
644 
645         B::operator()( cv_item_type& item ) const
646       \endcode                                               Process item.
647                                                              May be invoked concurrently  for the same \c this but different \c item.
648 
649     - \code item_type( const item_type& ) \endcode
650                                                              Copy a work item.
651     - \code ~item_type() \endcode                            Destroy a work item
652 **/
653 
654 /** \name parallel_for_each
655     See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/
656 //@{
657 //! Parallel iteration over a range, with optional addition of more work.
658 /** @ingroup algorithms */
659 template<typename Iterator, typename Body>
660     __TBB_requires(std::input_iterator<Iterator> && parallel_for_each_iterator_body<Body, Iterator>)
661 void parallel_for_each(Iterator first, Iterator last, const Body& body) {
662     task_group_context context(PARALLEL_FOR_EACH);
663     run_parallel_for_each<Iterator, Body>(first, last, body, context);
664 }
665 
666 template<typename Range, typename Body>
667     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
668 void parallel_for_each(Range& rng, const Body& body) {
669     parallel_for_each(std::begin(rng), std::end(rng), body);
670 }
671 
672 template<typename Range, typename Body>
673     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
674 void parallel_for_each(const Range& rng, const Body& body) {
675     parallel_for_each(std::begin(rng), std::end(rng), body);
676 }
677 
678 //! Parallel iteration over a range, with optional addition of more work and user-supplied context
679 /** @ingroup algorithms */
680 template<typename Iterator, typename Body>
681     __TBB_requires(std::input_iterator<Iterator> && parallel_for_each_iterator_body<Body, Iterator>)
682 void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) {
683     run_parallel_for_each<Iterator, Body>(first, last, body, context);
684 }
685 
686 template<typename Range, typename Body>
687     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
688 void parallel_for_each(Range& rng, const Body& body, task_group_context& context) {
689     parallel_for_each(std::begin(rng), std::end(rng), body, context);
690 }
691 
692 template<typename Range, typename Body>
693     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
694 void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) {
695     parallel_for_each(std::begin(rng), std::end(rng), body, context);
696 }
697 
698 } // namespace d2
699 } // namespace detail
700 //! @endcond
701 //@}
702 
703 inline namespace v1 {
704 using detail::d2::parallel_for_each;
705 using detail::d1::feeder;
706 } // namespace v1
707 
708 } // namespace tbb
709 
710 #endif /* __TBB_parallel_for_each_H */
711