xref: /oneTBB/test/tbb/test_parallel_reduce.cpp (revision 6caecf96)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include <atomic>
18 
19 #include "common/parallel_reduce_common.h"
20 #include "common/cpu_usertime.h"
21 #include "common/exception_handling.h"
22 #include "common/concepts_common.h"
23 
24 //! \file test_parallel_reduce.cpp
25 //! \brief Test for [algorithms.parallel_reduce algorithms.parallel_deterministic_reduce] specification
26 
27 using ValueType = uint64_t;
28 
29 struct Sum {
30     template<typename T>
31     T operator() ( const T& v1, const T& v2 ) const {
32         return v1 + v2;
33     }
34 };
35 
36 struct Accumulator {
37     ValueType operator() ( const tbb::blocked_range<ValueType*>& r, ValueType value ) const {
38         for ( ValueType* pv = r.begin(); pv != r.end(); ++pv )
39             value += *pv;
40         return value;
41     }
42 };
43 
44 class ParallelSumTester {
45 public:
46     ParallelSumTester( const ParallelSumTester& ) = default;
47     void operator=( const ParallelSumTester& ) = delete;
48 
49     ParallelSumTester() : m_range(nullptr, nullptr) {
50         m_array = new ValueType[unsigned(count)];
51         for ( ValueType i = 0; i < count; ++i )
52             m_array[i] = i + 1;
53         m_range = tbb::blocked_range<ValueType*>( m_array, m_array + count );
54     }
55     ~ParallelSumTester() { delete[] m_array; }
56 
57     template<typename Partitioner>
58     void CheckParallelReduce() {
59         Partitioner partitioner;
60         ValueType result1 = reduce_invoker<ValueType>( m_range, Accumulator(), Sum(), partitioner );
61         REQUIRE_MESSAGE( result1 == expected, "Wrong parallel summation result" );
62         ValueType result2 = reduce_invoker<ValueType>( m_range,
63             [](const tbb::blocked_range<ValueType*>& r, ValueType value) -> ValueType {
64                 for ( const ValueType* pv = r.begin(); pv != r.end(); ++pv )
65                     value += *pv;
66                 return value;
67             },
68             Sum(),
69             partitioner
70         );
71         REQUIRE_MESSAGE( result2 == expected, "Wrong parallel summation result" );
72     }
73 private:
74     ValueType* m_array;
75     tbb::blocked_range<ValueType*> m_range;
76     static const ValueType count, expected;
77 };
78 
79 const ValueType ParallelSumTester::count = 1000000;
80 const ValueType ParallelSumTester::expected = count * (count + 1) / 2;
81 
82 namespace test_cancellation {
83 
84 struct ReduceToCancel {
85     std::size_t operator()( const tbb::blocked_range<std::size_t>&, std::size_t ) const {
86         ++g_CurExecuted;
87         Cancellator::WaitUntilReady();
88         return 1;
89     }
90 }; // struct ReduceToCancel
91 
92 struct JoinToCancel {
93     std::size_t operator()( std::size_t, std::size_t ) const {
94         ++g_CurExecuted;
95         Cancellator::WaitUntilReady();
96         return 1;
97     }
98 }; // struct Join
99 
100 struct ReduceFunctorToCancel {
101     std::size_t result;
102 
103     ReduceFunctorToCancel() : result(0) {}
104     ReduceFunctorToCancel( ReduceFunctorToCancel&, tbb::split ) : result(0) {}
105 
106     void operator()( const tbb::blocked_range<std::size_t>& br ) {
107         result = ReduceToCancel{}(br, result);
108     }
109 
110     void join( ReduceFunctorToCancel& rhs ) {
111         result = JoinToCancel{}(result, rhs.result);
112     }
113 }; // struct ReduceFunctorToCancel
114 
115 static constexpr std::size_t buffer_test_size = 1024;
116 static constexpr std::size_t maxParallelReduceRunnerMode = 9;
117 
118 template <std::size_t Mode>
119 class ParallelReduceRunner {
120     tbb::task_group_context& my_ctx;
121 
122     static_assert(Mode >= 0 && Mode <= maxParallelReduceRunnerMode, "Incorrect mode for ParallelReduceTask");
123 
124     template <typename... Args>
125     void run_parallel_reduce( Args&&... args ) const {
126         switch(Mode % 5) {
127             case 0 : {
128                 tbb::parallel_reduce(std::forward<Args>(args)..., my_ctx);
129                 break;
130             }
131             case 1 : {
132                 tbb::parallel_reduce(std::forward<Args>(args)..., tbb::simple_partitioner{}, my_ctx);
133                 break;
134             }
135             case 2 : {
136                 tbb::parallel_reduce(std::forward<Args>(args)..., tbb::auto_partitioner{}, my_ctx);
137                 break;
138             }
139             case 3 : {
140                 tbb::parallel_reduce(std::forward<Args>(args)..., tbb::static_partitioner{}, my_ctx);
141                 break;
142             }
143             case 4 : {
144                 tbb::affinity_partitioner aff;
145                 tbb::parallel_reduce(std::forward<Args>(args)..., aff, my_ctx);
146                 break;
147             }
148         }
149     }
150 
151 public:
152     ParallelReduceRunner( tbb::task_group_context& ctx )
153         : my_ctx(ctx) {}
154 
155     void operator()() const {
156         tbb::blocked_range<std::size_t> br(0, buffer_test_size);
157         if (Mode < 5) {
158             ReduceFunctorToCancel functor;
159             run_parallel_reduce(br, functor);
160         } else {
161             run_parallel_reduce(br, std::size_t(0), ReduceToCancel{}, JoinToCancel{});
162         }
163     }
164 }; // class ParallelReduceRunner
165 
166 static constexpr std::size_t maxParallelDeterministicReduceRunnerMode = 5;
167 
168 // TODO: unify with ParallelReduceRunner
169 template <std::size_t Mode>
170 class ParallelDeterministicReduceRunner {
171     tbb::task_group_context& my_ctx;
172 
173     static_assert(Mode >= 0 && Mode <= maxParallelDeterministicReduceRunnerMode, "Incorrect Mode for deterministic_reduce task");
174 
175     template <typename... Args>
176     void run_parallel_deterministic_reduce( Args&&... args ) const {
177         switch(Mode % 3) {
178             case 0 : {
179                 tbb::parallel_deterministic_reduce(std::forward<Args>(args)..., my_ctx);
180                 break;
181             }
182             case 1 : {
183                 tbb::parallel_deterministic_reduce(std::forward<Args>(args)..., tbb::simple_partitioner{}, my_ctx);
184                 break;
185             }
186             case 2 : {
187                 tbb::parallel_deterministic_reduce(std::forward<Args>(args)..., tbb::static_partitioner{}, my_ctx);
188                 break;
189             }
190         }
191     }
192 
193 public:
194     ParallelDeterministicReduceRunner( tbb::task_group_context& ctx )
195         : my_ctx(ctx) {}
196 
197     void operator()() const {
198         tbb::blocked_range<std::size_t> br(0, buffer_test_size);
199         if (Mode < 3) {
200             ReduceFunctorToCancel functor;
201             run_parallel_deterministic_reduce(br, functor);
202         } else {
203             run_parallel_deterministic_reduce(br, std::size_t(0), ReduceToCancel{}, JoinToCancel{});
204         }
205     }
206 }; // class ParallelDeterministicReduceRunner
207 
208 template <std::size_t Mode>
209 void run_parallel_reduce_cancellation_test() {
210     for ( auto concurrency_level : utils::concurrency_range() ) {
211         if (concurrency_level < 2) continue;
212 
213         tbb::global_control gc(tbb::global_control::max_allowed_parallelism, concurrency_level);
214         ResetEhGlobals();
215         RunCancellationTest<ParallelReduceRunner<Mode>, Cancellator>();
216     }
217 }
218 
219 template <std::size_t Mode>
220 void run_parallel_deterministic_reduce_cancellation_test() {
221     for ( auto concurrency_level : utils::concurrency_range() ) {
222         if (concurrency_level < 2) continue;
223 
224         tbb::global_control gc(tbb::global_control::max_allowed_parallelism, concurrency_level);
225         ResetEhGlobals();
226         RunCancellationTest<ParallelDeterministicReduceRunner<Mode>, Cancellator>();
227     }
228 }
229 
230 template <std::size_t Mode>
231 struct ParallelReduceTestRunner {
232     static void run() {
233         run_parallel_reduce_cancellation_test<Mode>();
234         ParallelReduceTestRunner<Mode + 1>::run();
235     }
236 }; // struct ParallelReduceTestRunner
237 
238 template <>
239 struct ParallelReduceTestRunner<maxParallelReduceRunnerMode> {
240     static void run() {
241         run_parallel_reduce_cancellation_test<maxParallelReduceRunnerMode>();
242     }
243 }; // struct ParallelReduceTestRunner<maxParallelReduceRunnerMode>
244 
245 template <std::size_t Mode>
246 struct ParallelDeterministicReduceTestRunner {
247     static void run() {
248         run_parallel_deterministic_reduce_cancellation_test<Mode>();
249         ParallelDeterministicReduceTestRunner<Mode + 1>::run();
250     }
251 }; // struct ParallelDeterministicReduceTestRunner
252 
253 template <>
254 struct ParallelDeterministicReduceTestRunner<maxParallelDeterministicReduceRunnerMode> {
255     static void run() {
256         run_parallel_deterministic_reduce_cancellation_test<maxParallelDeterministicReduceRunnerMode>();
257     }
258 }; // struct ParallelDeterministicReduceTestRunner<maxParallelDeterministicReduceRunnerMode>
259 
260 } // namespace test_cancellation
261 
262 #if __TBB_CPP20_CONCEPTS_PRESENT
263 template <typename... Args>
264 concept can_call_parallel_reduce_basic = requires( Args&&... args ) {
265     tbb::parallel_reduce(std::forward<Args>(args)...);
266 };
267 
268 template <typename... Args>
269 concept can_call_parallel_deterministic_reduce_basic = requires ( Args&&... args ) {
270     tbb::parallel_deterministic_reduce(std::forward<Args>(args)...);
271 };
272 
273 template <typename... Args>
274 concept can_call_preduce_helper = can_call_parallel_reduce_basic<Args...> &&
275                                   can_call_parallel_reduce_basic<Args..., tbb::task_group_context&>;
276 
277 template <typename... Args>
278 concept can_call_pdet_reduce_helper = can_call_parallel_deterministic_reduce_basic<Args...> &&
279                                       can_call_parallel_deterministic_reduce_basic<Args..., tbb::task_group_context&>;
280 
281 template <typename... Args>
282 concept can_call_preduce_with_partitioner = can_call_preduce_helper<Args...> &&
283                                             can_call_preduce_helper<Args..., const tbb::simple_partitioner&> &&
284                                             can_call_preduce_helper<Args..., const tbb::auto_partitioner&> &&
285                                             can_call_preduce_helper<Args..., const tbb::static_partitioner&> &&
286                                             can_call_preduce_helper<Args..., tbb::affinity_partitioner&>;
287 
288 template <typename... Args>
289 concept can_call_pdet_reduce_with_partitioner = can_call_pdet_reduce_helper<Args...> &&
290                                                 can_call_pdet_reduce_helper<Args..., const tbb::simple_partitioner&> &&
291                                                 can_call_pdet_reduce_helper<Args..., const tbb::static_partitioner&>;
292 
293 template <typename Range, typename Body>
294 concept can_call_imperative_preduce = can_call_preduce_with_partitioner<const Range&, Body&>;
295 
296 template <typename Range, typename Body>
297 concept can_call_imperative_pdet_reduce = can_call_pdet_reduce_with_partitioner<const Range&, Body&>;
298 
299 template <typename Range, typename Value, typename RealBody, typename Reduction>
300 concept can_call_functional_preduce = can_call_preduce_with_partitioner<const Range&, const Value&,
301                                                                         const RealBody&, const Reduction&>;
302 
303 template <typename Range, typename Value, typename RealBody, typename Reduction>
304 concept can_call_functional_pdet_reduce = can_call_pdet_reduce_with_partitioner<const Range&, const Value&,
305                                                                                 const RealBody&, const Reduction&>;
306 
307 template <typename Range>
308 using CorrectBody = test_concepts::parallel_reduce_body::Correct<Range>;
309 
310 template <typename Range>
311 using CorrectFunc = test_concepts::parallel_reduce_function::Correct<Range>;
312 
313 using CorrectReduction = test_concepts::parallel_reduce_combine::Correct<int>;
314 using CorrectRange = test_concepts::range::Correct;
315 
316 void test_preduce_range_constraints() {
317     using namespace test_concepts::range;
318     static_assert(can_call_imperative_preduce<Correct, CorrectBody<Correct>>);
319     static_assert(!can_call_imperative_preduce<NonCopyable, CorrectBody<NonCopyable>>);
320     static_assert(!can_call_imperative_preduce<NonDestructible, CorrectBody<NonDestructible>>);
321     static_assert(!can_call_imperative_preduce<NonSplittable, CorrectBody<NonSplittable>>);
322     static_assert(!can_call_imperative_preduce<NoEmpty, CorrectBody<NoEmpty>>);
323     static_assert(!can_call_imperative_preduce<EmptyNonConst, CorrectBody<EmptyNonConst>>);
324     static_assert(!can_call_imperative_preduce<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
325     static_assert(!can_call_imperative_preduce<NoIsDivisible, CorrectBody<NoIsDivisible>>);
326     static_assert(!can_call_imperative_preduce<IsDivisibleNonConst, CorrectBody<NoIsDivisible>>);
327     static_assert(!can_call_imperative_preduce<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
328 
329     static_assert(can_call_functional_preduce<Correct, int, CorrectFunc<Correct>, CorrectReduction>);
330     static_assert(!can_call_functional_preduce<NonCopyable, int, CorrectFunc<NonCopyable>, CorrectReduction>);
331     static_assert(!can_call_functional_preduce<NonDestructible, int, CorrectFunc<NonDestructible>, CorrectReduction>);
332     static_assert(!can_call_functional_preduce<NonSplittable, int, CorrectFunc<NonSplittable>, CorrectReduction>);
333     static_assert(!can_call_functional_preduce<NoEmpty, int, CorrectFunc<NoEmpty>, CorrectReduction>);
334     static_assert(!can_call_functional_preduce<EmptyNonConst, int, CorrectFunc<EmptyNonConst>, CorrectReduction>);
335     static_assert(!can_call_functional_preduce<WrongReturnEmpty, int, CorrectFunc<WrongReturnEmpty>, CorrectReduction>);
336     static_assert(!can_call_functional_preduce<NoIsDivisible, int, CorrectFunc<NoIsDivisible>, CorrectReduction>);
337     static_assert(!can_call_functional_preduce<IsDivisibleNonConst, int, CorrectFunc<IsDivisibleNonConst>, CorrectReduction>);
338     static_assert(!can_call_functional_preduce<WrongReturnIsDivisible, int, CorrectFunc<WrongReturnIsDivisible>, CorrectReduction>);
339 }
340 
341 void test_preduce_body_constraints() {
342     using namespace test_concepts::parallel_reduce_body;
343     static_assert(can_call_imperative_preduce<CorrectRange, Correct<CorrectRange>>);
344     static_assert(!can_call_imperative_preduce<CorrectRange, NonSplittable<CorrectRange>>);
345     static_assert(!can_call_imperative_preduce<CorrectRange, NonDestructible<CorrectRange>>);
346     static_assert(!can_call_imperative_preduce<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>);
347     static_assert(!can_call_imperative_preduce<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>);
348     static_assert(!can_call_imperative_preduce<CorrectRange, NoJoin<CorrectRange>>);
349     static_assert(!can_call_imperative_preduce<CorrectRange, WrongInputJoin<CorrectRange>>);
350 }
351 
352 void test_preduce_func_constraints() {
353     using namespace test_concepts::parallel_reduce_function;
354     static_assert(can_call_functional_preduce<CorrectRange, int, Correct<CorrectRange>, CorrectReduction>);
355     static_assert(!can_call_functional_preduce<CorrectRange, int, NoOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
356     static_assert(!can_call_functional_preduce<CorrectRange, int, OperatorRoundBracketsNonConst<CorrectRange>, CorrectReduction>);
357     static_assert(!can_call_functional_preduce<CorrectRange, int, WrongFirstInputOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
358     static_assert(!can_call_functional_preduce<CorrectRange, int, WrongSecondInputOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
359     static_assert(!can_call_functional_preduce<CorrectRange, int, WrongReturnOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
360 }
361 
362 void test_preduce_combine_constraints() {
363     using namespace test_concepts::parallel_reduce_combine;
364     static_assert(can_call_functional_preduce<CorrectRange, int, CorrectFunc<CorrectRange>, Correct<int>>);
365     static_assert(!can_call_functional_preduce<CorrectRange, int, CorrectFunc<CorrectRange>, NoOperatorRoundBrackets<int>>);
366     static_assert(!can_call_functional_preduce<CorrectRange, int, CorrectFunc<CorrectRange>, OperatorRoundBracketsNonConst<int>>);
367     static_assert(!can_call_functional_preduce<CorrectRange, int, CorrectFunc<CorrectRange>, WrongFirstInputOperatorRoundBrackets<int>>);
368     static_assert(!can_call_functional_preduce<CorrectRange, int, CorrectFunc<CorrectRange>, WrongSecondInputOperatorRoundBrackets<int>>);
369     static_assert(!can_call_functional_preduce<CorrectRange, int, CorrectFunc<CorrectRange>, WrongReturnOperatorRoundBrackets<int>>);
370 }
371 
372 void test_pdet_reduce_range_constraints() {
373     using namespace test_concepts::range;
374     static_assert(can_call_imperative_pdet_reduce<Correct, CorrectBody<Correct>>);
375     static_assert(!can_call_imperative_pdet_reduce<NonCopyable, CorrectBody<NonCopyable>>);
376     static_assert(!can_call_imperative_pdet_reduce<NonDestructible, CorrectBody<NonDestructible>>);
377     static_assert(!can_call_imperative_pdet_reduce<NonSplittable, CorrectBody<NonSplittable>>);
378     static_assert(!can_call_imperative_pdet_reduce<NoEmpty, CorrectBody<NoEmpty>>);
379     static_assert(!can_call_imperative_pdet_reduce<EmptyNonConst, CorrectBody<EmptyNonConst>>);
380     static_assert(!can_call_imperative_pdet_reduce<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
381     static_assert(!can_call_imperative_pdet_reduce<NoIsDivisible, CorrectBody<NoIsDivisible>>);
382     static_assert(!can_call_imperative_pdet_reduce<IsDivisibleNonConst, CorrectBody<NoIsDivisible>>);
383     static_assert(!can_call_imperative_pdet_reduce<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
384 
385     static_assert(can_call_functional_pdet_reduce<Correct, int, CorrectFunc<Correct>, CorrectReduction>);
386     static_assert(!can_call_functional_pdet_reduce<NonCopyable, int, CorrectFunc<NonCopyable>, CorrectReduction>);
387     static_assert(!can_call_functional_pdet_reduce<NonDestructible, int, CorrectFunc<NonDestructible>, CorrectReduction>);
388     static_assert(!can_call_functional_pdet_reduce<NonSplittable, int, CorrectFunc<NonSplittable>, CorrectReduction>);
389     static_assert(!can_call_functional_pdet_reduce<NoEmpty, int, CorrectFunc<NoEmpty>, CorrectReduction>);
390     static_assert(!can_call_functional_pdet_reduce<EmptyNonConst, int, CorrectFunc<EmptyNonConst>, CorrectReduction>);
391     static_assert(!can_call_functional_pdet_reduce<WrongReturnEmpty, int, CorrectFunc<WrongReturnEmpty>, CorrectReduction>);
392     static_assert(!can_call_functional_pdet_reduce<NoIsDivisible, int, CorrectFunc<NoIsDivisible>, CorrectReduction>);
393     static_assert(!can_call_functional_pdet_reduce<IsDivisibleNonConst, int, CorrectFunc<IsDivisibleNonConst>, CorrectReduction>);
394     static_assert(!can_call_functional_pdet_reduce<WrongReturnIsDivisible, int, CorrectFunc<WrongReturnIsDivisible>, CorrectReduction>);
395 }
396 
397 void test_pdet_reduce_body_constraints() {
398     using namespace test_concepts::parallel_reduce_body;
399     static_assert(can_call_imperative_pdet_reduce<CorrectRange, Correct<CorrectRange>>);
400     static_assert(!can_call_imperative_pdet_reduce<CorrectRange, NonSplittable<CorrectRange>>);
401     static_assert(!can_call_imperative_pdet_reduce<CorrectRange, NonDestructible<CorrectRange>>);
402     static_assert(!can_call_imperative_pdet_reduce<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>);
403     static_assert(!can_call_imperative_pdet_reduce<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>);
404     static_assert(!can_call_imperative_pdet_reduce<CorrectRange, NoJoin<CorrectRange>>);
405     static_assert(!can_call_imperative_pdet_reduce<CorrectRange, WrongInputJoin<CorrectRange>>);
406 }
407 
408 void test_pdet_reduce_func_constraints() {
409     using namespace test_concepts::parallel_reduce_function;
410     static_assert(can_call_functional_pdet_reduce<CorrectRange, int, Correct<CorrectRange>, CorrectReduction>);
411     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, NoOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
412     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, OperatorRoundBracketsNonConst<CorrectRange>, CorrectReduction>);
413     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, WrongFirstInputOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
414     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, WrongSecondInputOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
415     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, WrongReturnOperatorRoundBrackets<CorrectRange>, CorrectReduction>);
416 }
417 
418 void test_pdet_reduce_combine_constraints() {
419     using namespace test_concepts::parallel_reduce_combine;
420     static_assert(can_call_functional_pdet_reduce<CorrectRange, int, CorrectFunc<CorrectRange>, Correct<int>>);
421     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, CorrectFunc<CorrectRange>, NoOperatorRoundBrackets<int>>);
422     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, CorrectFunc<CorrectRange>, OperatorRoundBracketsNonConst<int>>);
423     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, CorrectFunc<CorrectRange>, WrongFirstInputOperatorRoundBrackets<int>>);
424     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, CorrectFunc<CorrectRange>, WrongSecondInputOperatorRoundBrackets<int>>);
425     static_assert(!can_call_functional_pdet_reduce<CorrectRange, int, CorrectFunc<CorrectRange>, WrongReturnOperatorRoundBrackets<int>>);
426 }
427 #endif // __TBB_CPP20_CONCEPTS_PRESENT
428 
429 //! Test parallel summation correctness
430 //! \brief \ref stress
431 TEST_CASE("Test parallel summation correctness") {
432     ParallelSumTester pst;
433     pst.CheckParallelReduce<utils_default_partitioner>();
434     pst.CheckParallelReduce<tbb::simple_partitioner>();
435     pst.CheckParallelReduce<tbb::auto_partitioner>();
436     pst.CheckParallelReduce<tbb::affinity_partitioner>();
437     pst.CheckParallelReduce<tbb::static_partitioner>();
438 }
439 
440 static std::atomic<long> ForkCount;
441 static std::atomic<long> FooBodyCount;
442 
443 //! Class with public interface that is exactly minimal requirements for Range concept
444 class MinimalRange {
445     size_t begin, end;
446     friend class FooBody;
447     explicit MinimalRange( size_t i ) : begin(0), end(i) {}
448     template <typename Partitioner_> friend void TestSplitting( std::size_t nthread );
449 public:
450     MinimalRange( MinimalRange& r, tbb::split ) : end(r.end) {
451         begin = r.end = (r.begin+r.end)/2;
452     }
453     bool is_divisible() const {return end-begin>=2;}
454     bool empty() const {return begin==end;}
455 };
456 
457 //! Class with public interface that is exactly minimal requirements for Body of a parallel_reduce
458 class FooBody {
459 private:
460     FooBody( const FooBody& );          // Deny access
461     void operator=( const FooBody& );   // Deny access
462     template <typename Partitioner_> friend void TestSplitting( std::size_t nthread );
463     //! Parent that created this body via split operation.  NULL if original body.
464     FooBody* parent;
465     //! Total number of index values processed by body and its children.
466     size_t sum;
467     //! Number of join operations done so far on this body and its children.
468     long join_count;
469     //! Range that has been processed so far by this body and its children.
470     size_t begin, end;
471     //! True if body has not yet been processed at least once by operator().
472     bool is_new;
473     //! 1 if body was created by split; 0 if original body.
474     int forked;
475     FooBody() {++FooBodyCount;}
476 public:
477     ~FooBody() {
478         forked = 0xDEADBEEF;
479         sum=0xDEADBEEF;
480         join_count=0xDEADBEEF;
481         --FooBodyCount;
482     }
483     FooBody( FooBody& other, tbb::split ) {
484         ++FooBodyCount;
485         ++ForkCount;
486         sum = 0;
487         parent = &other;
488         join_count = 0;
489         is_new = true;
490         forked = 1;
491     }
492 
493     void init() {
494         sum = 0;
495         parent = nullptr;
496         join_count = 0;
497         is_new = true;
498         forked = 0;
499         begin = ~size_t(0);
500         end = ~size_t(0);
501     }
502 
503     void join( FooBody& s ) {
504         REQUIRE( s.forked==1 );
505         REQUIRE( this!=&s );
506         REQUIRE( this==s.parent );
507         REQUIRE( end==s.begin );
508         end = s.end;
509         sum += s.sum;
510         join_count += s.join_count + 1;
511         s.forked = 2;
512     }
513     void operator()( const MinimalRange& r ) {
514         for( size_t k=r.begin; k<r.end; ++k )
515             ++sum;
516         if( is_new ) {
517             is_new = false;
518             begin = r.begin;
519         } else
520             REQUIRE( end==r.begin );
521         end = r.end;
522     }
523 };
524 
525 template<typename Partitioner>
526 void TestSplitting( std::size_t nthread ) {
527     ForkCount = 0;
528     long join_count = 0;
529     Partitioner partitioner;
530     for( size_t i=0; i<=1000; ++i ) {
531         FooBody f;
532         f.init();
533         REQUIRE_MESSAGE( FooBodyCount==1, "Wrong initial BodyCount value" );
534         reduce_invoker(MinimalRange(i), f, partitioner);
535 
536         if (nthread == 1) REQUIRE_MESSAGE(ForkCount==0, "Body was split during 1 thread execution");
537 
538         join_count += f.join_count;
539         REQUIRE_MESSAGE( FooBodyCount==1, "Some copies of FooBody was not removed after reduction");
540         REQUIRE_MESSAGE( f.sum==i, "Incorrect reduction" );
541         REQUIRE_MESSAGE( f.begin==(i==0 ? ~size_t(0) : 0), "Incorrect range borders" );
542         REQUIRE_MESSAGE( f.end==(i==0 ? ~size_t(0) : i), "Incorrect range borders" );
543     }
544 }
545 
546 //! Test splitting range and body during reduction, test that all workers sleep when no work
547 //! \brief \ref resource_usage \ref error_guessing
548 TEST_CASE("Test splitting range and body during reduction, test that all workers sleep when no work") {
549     for ( auto concurrency_level : utils::concurrency_range() ) {
550         tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
551 
552         TestSplitting<tbb::simple_partitioner>(concurrency_level);
553         TestSplitting<tbb::static_partitioner>(concurrency_level);
554         TestSplitting<tbb::auto_partitioner>(concurrency_level);
555         TestSplitting<tbb::affinity_partitioner>(concurrency_level);
556         TestSplitting<utils_default_partitioner>(concurrency_level);
557 
558         // Test that all workers sleep when no work
559         TestCPUUserTime(concurrency_level);
560     }
561 }
562 
563 //! Define overloads of parallel_deterministic_reduce that accept "undesired" types of partitioners
564 namespace unsupported {
565     template<typename Range, typename Body>
566     void parallel_deterministic_reduce(const Range&, Body&, const tbb::auto_partitioner&) { }
567     template<typename Range, typename Body>
568     void parallel_deterministic_reduce(const Range&, Body&, tbb::affinity_partitioner&) { }
569 
570     template<typename Range, typename Value, typename RealBody, typename Reduction>
571     Value parallel_deterministic_reduce(const Range& , const Value& identity, const RealBody& , const Reduction& , const tbb::auto_partitioner&) {
572         return identity;
573     }
574     template<typename Range, typename Value, typename RealBody, typename Reduction>
575     Value parallel_deterministic_reduce(const Range& , const Value& identity, const RealBody& , const Reduction& , tbb::affinity_partitioner&) {
576         return identity;
577     }
578 }
579 
580 struct Body {
581     float value;
582     Body() : value(0) {}
583     Body(Body&, tbb::split) { value = 0; }
584     void operator()(const tbb::blocked_range<int>&) {}
585     void join(Body&) {}
586 };
587 
588 //! Check that other types of partitioners are not supported (auto, affinity)
589 //! In the case of "unsupported" API unexpectedly sneaking into namespace tbb,
590 //! this test should result in a compilation error due to overload resolution ambiguity
591 //! \brief \ref negative \ref error_guessing
592 TEST_CASE("Test Unsupported Partitioners") {
593     using namespace tbb;
594     using namespace unsupported;
595     Body body;
596     parallel_deterministic_reduce(blocked_range<int>(0, 10), body, tbb::auto_partitioner());
597 
598     tbb::affinity_partitioner ap;
599     parallel_deterministic_reduce(blocked_range<int>(0, 10), body, ap);
600 
601     parallel_deterministic_reduce(
602         blocked_range<int>(0, 10),
603         0,
604         [](const blocked_range<int>&, int init)->int {
605             return init;
606         },
607         [](int x, int y)->int {
608             return x + y;
609         },
610         tbb::auto_partitioner()
611     );
612     parallel_deterministic_reduce(
613         blocked_range<int>(0, 10),
614         0,
615         [](const blocked_range<int>&, int init)->int {
616             return init;
617         },
618         [](int x, int y)->int {
619             return x + y;
620         },
621         ap
622     );
623 }
624 
625 //! Testing tbb::parallel_reduce with tbb::task_group_context
626 //! \brief \ref interface \ref error_guessing
627 TEST_CASE("cancellation test for tbb::parallel_reduce") {
628     test_cancellation::ParallelReduceTestRunner</*First mode = */0>::run();
629 }
630 
631 //! Testing tbb::parallel_deterministic_reduce with tbb::task_group_context
632 //! \brief \ref interface \ref error_guessing
633 TEST_CASE("cancellation test for tbb::parallel_deterministic_reduce") {
634     test_cancellation::ParallelDeterministicReduceTestRunner</*First mode = */0>::run();
635 }
636 
637 #if __TBB_CPP20_CONCEPTS_PRESENT
638 //! \brief \ref error_guessing
639 TEST_CASE("parallel_reduce constraints") {
640     test_preduce_range_constraints();
641     test_preduce_body_constraints();
642     test_preduce_func_constraints();
643     test_preduce_combine_constraints();
644 }
645 
646 //! \brief \ref error_guessing
647 TEST_CASE("parallel_deterministic_reduce constraints") {
648     test_pdet_reduce_range_constraints();
649     test_pdet_reduce_body_constraints();
650     test_pdet_reduce_func_constraints();
651     test_pdet_reduce_combine_constraints();
652 }
653 #endif
654