xref: /oneTBB/test/tbb/test_parallel_for.cpp (revision 07300f7e)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "common/test.h"
18 
19 #include "tbb/parallel_for.h"
20 
21 #include "common/config.h"
22 #include "common/utils.h"
23 #include "common/utils_concurrency_limit.h"
24 #include "common/utils_report.h"
25 #include "common/vector_types.h"
26 #include "common/cpu_usertime.h"
27 #include "common/spin_barrier.h"
28 #include "common/exception_handling.h"
29 #include "common/concepts_common.h"
30 #include "test_partitioner.h"
31 
32 #include <cstddef>
33 #include <vector>
34 
35 //! \file test_parallel_for.cpp
36 //! \brief Test for [algorithms.parallel_for] specification
37 
38 #if _MSC_VER
39 #pragma warning (push)
40 // Suppress conditional expression is constant
41 #pragma warning (disable: 4127)
42 #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
43     // Suppress pointless "unreachable code" warning.
44     #pragma warning (disable: 4702)
45 #endif
46 #if defined(_Wp64)
47     // Workaround for overzealous compiler warnings in /Wp64 mode
48     #pragma warning (disable: 4267)
49 #endif
50 #define _SCL_SECURE_NO_WARNINGS
51 #endif //#if _MSC_VER
52 
53 
54 #if (HAVE_m128 || HAVE_m256)
55 template<typename ClassWithVectorType>
56 struct SSE_Functor {
57     ClassWithVectorType* Src, * Dst;
58     SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {}
59 
60     void operator()( tbb::blocked_range<int>& r ) const {
61         for( int i=r.begin(); i!=r.end(); ++i )
62             Dst[i] = Src[i];
63     }
64 };
65 
66 //! Test that parallel_for works with stack-allocated __m128
67 template<typename ClassWithVectorType>
68 void TestVectorTypes() {
69     const int aSize = 300;
70     ClassWithVectorType Array1[aSize], Array2[aSize];
71     for( int i=0; i<aSize; ++i ) {
72         // VC8 does not properly align a temporary value; to work around, use explicit variable
73         ClassWithVectorType foo(i);
74         Array1[i] = foo;
75     }
76     tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) );
77     for( int i=0; i<aSize; ++i ) {
78         ClassWithVectorType foo(i);
79         CHECK( Array2[i]==foo ) ;
80     }
81 }
82 #endif /* HAVE_m128 || HAVE_m256 */
83 
84 struct TestSimplePartitionerStabilityFunctor {
85   std::vector<int> & ranges;
86   TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){}
87   void operator()(tbb::blocked_range<size_t>& r)const{
88       ranges.at(r.begin()) = 1;
89   }
90 };
91 void TestSimplePartitionerStability(){
92     const std::size_t repeat_count= 10;
93     const std::size_t rangeToSplitSize=1000000;
94     const std::size_t grainsizeStep=rangeToSplitSize/repeat_count;
95     typedef TestSimplePartitionerStabilityFunctor FunctorType;
96 
97     for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){
98         std::vector<int> firstSeries(rangeToSplitSize,0);
99         std::vector<int> secondSeries(rangeToSplitSize,0);
100 
101         tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner());
102         tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner());
103 
104         CHECK_MESSAGE(
105             firstSeries == secondSeries,
106             "Splitting range with tbb::simple_partitioner must be reproducible; i = " << i
107         );
108     }
109 }
110 
111 namespace various_range_implementations {
112 
113 using namespace test_partitioner_utils;
114 using namespace test_partitioner_utils::TestRanges;
115 
116 // Body ensures that initial work distribution is done uniformly through affinity mechanism and not through work stealing
117 class Body {
118     utils::SpinBarrier &m_sb;
119 public:
120     Body(utils::SpinBarrier& sb) : m_sb(sb) { }
121     Body(Body& b, tbb::split) : m_sb(b.m_sb) { }
122 
123     template <typename Range>
124     void operator()(Range& r) const {
125         INFO("Executing range [" << r.begin() << ", " << r.end() << "]");
126         m_sb.wait(); // waiting for all threads
127     }
128 };
129 
130 namespace correctness {
131 
132 /* Testing only correctness (that is parallel_for does not hang) */
133 template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness>
134 void test() {
135     RangeType range( 0, utils::get_platform_max_threads(), NULL, false, ensure_non_emptiness );
136     tbb::affinity_partitioner ap;
137     tbb::parallel_for( range, SimpleBody(), ap );
138 }
139 
140 } // namespace correctness
141 
142 namespace uniform_distribution {
143 
144 /* Body of parallel_for algorithm would hang if non-uniform work distribution happened  */
145 template <typename RangeType, bool feedback, bool ensure_non_emptiness>
146 void test() {
147     static const std::size_t thread_num = utils::get_platform_max_threads();
148     utils::SpinBarrier sb( thread_num );
149     RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness);
150     const Body sync_body( sb );
151     tbb::affinity_partitioner ap;
152     tbb::parallel_for( range, sync_body, ap );
153     tbb::parallel_for( range, sync_body, tbb::static_partitioner() );
154 }
155 
156 } // namespace uniform_distribution
157 
158 void test() {
159     const bool provide_feedback = false;
160     const bool ensure_non_empty_range = true;
161 
162     // BlockedRange does not take into account feedback and non-emptiness settings but uses the
163     // tbb::blocked_range implementation
164     uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>();
165     using correctness::test;
166 
167     {
168         test<RoundedDownRange, provide_feedback, ensure_non_empty_range>();
169         test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>();
170     }
171 
172     {
173         test<RoundedUpRange, provide_feedback, ensure_non_empty_range>();
174         test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>();
175     }
176 
177     // Testing that parallel_for algorithm works with such weird ranges
178     correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>();
179     correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>();
180     correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>();
181 
182     // The following ranges do not comply with the proportion suggested by partitioner. Therefore
183     // they have to provide the proportion in which they were actually split back to partitioner and
184     // ensure theirs non-emptiness
185     test<Range1_2, provide_feedback, ensure_non_empty_range>();
186     test<Range1_999, provide_feedback, ensure_non_empty_range>();
187     test<Range999_1, provide_feedback, ensure_non_empty_range>();
188 }
189 
190 } // namespace various_range_implementations
191 
192 namespace test_cancellation {
193 
194 struct FunctorToCancel {
195     static std::atomic<bool> need_to_wait;
196 
197     void operator()( std::size_t ) const {
198         ++g_CurExecuted;
199         if (need_to_wait) {
200             need_to_wait = Cancellator::WaitUntilReady();
201         }
202     }
203 
204     void operator()( const tbb::blocked_range<std::size_t>& ) const {
205         ++g_CurExecuted;
206         Cancellator::WaitUntilReady();
207     }
208 
209     static void reset() { need_to_wait = true; }
210 }; // struct FunctorToCancel
211 
212 std::atomic<bool> FunctorToCancel::need_to_wait(true);
213 
214 static constexpr std::size_t buffer_test_size = 1024;
215 static constexpr std::size_t maxParallelForRunnerMode = 14;
216 
217 template <std::size_t Mode>
218 class ParallelForRunner {
219     tbb::task_group_context& my_ctx;
220     const std::size_t worker_task_step = 1;
221 
222     static_assert(Mode >= 0 && Mode <= maxParallelForRunnerMode, "Incorrect mode for ParallelForRunner");
223 
224     template <typename Partitioner, typename... Args>
225     void run_parallel_for( Args&&... args ) const {
226         Partitioner part;
227         tbb::parallel_for(std::forward<Args>(args)..., part, my_ctx);
228     }
229 
230     template <typename... Args>
231     void run_overload( Args&&... args ) const {
232 
233         switch(Mode % 5) {
234             case 0 : {
235                 tbb::parallel_for(std::forward<Args>(args)..., my_ctx);
236                 break;
237             }
238             case 1 : {
239                 run_parallel_for<tbb::simple_partitioner>(std::forward<Args>(args)...);
240                 break;
241             }
242             case 2 : {
243                 run_parallel_for<tbb::auto_partitioner>(std::forward<Args>(args)...);
244                 break;
245             }
246             case 3 : {
247                 run_parallel_for<tbb::static_partitioner>(std::forward<Args>(args)...);
248                 break;
249             }
250             case 4 : {
251                 run_parallel_for<tbb::affinity_partitioner>(std::forward<Args>(args)...);
252                 break;
253             }
254         }
255     }
256 
257 public:
258     ParallelForRunner( tbb::task_group_context& ctx )
259         : my_ctx(ctx) {}
260 
261     ~ParallelForRunner() { FunctorToCancel::reset(); }
262 
263     void operator()() const {
264         if (Mode < 5) {
265             // Overload with blocked range
266             tbb::blocked_range<std::size_t> br(0, buffer_test_size);
267             run_overload(br, FunctorToCancel{});
268         } else if (Mode < 10) {
269             // Overload with two indexes
270             run_overload(std::size_t(0), buffer_test_size, FunctorToCancel{});
271         } else {
272             // Overload with two indexes and step
273             run_overload(std::size_t(0), buffer_test_size, worker_task_step, FunctorToCancel{});
274         }
275     }
276 }; // class ParallelForRunner
277 
278 template <std::size_t Mode>
279 void run_parallel_for_cancellation_test() {
280     // TODO: enable concurrency_range
281     if (utils::get_platform_max_threads() < 2) {
282         // The test requires at least one worker thread to request cancellation
283         return;
284     }
285     ResetEhGlobals();
286     RunCancellationTest<ParallelForRunner<Mode>, Cancellator>();
287 }
288 
289 template <std::size_t Mode>
290 struct ParallelForTestRunner {
291     static void run() {
292         run_parallel_for_cancellation_test<Mode>();
293         ParallelForTestRunner<Mode + 1>::run();
294     }
295 }; // struct ParallelForTestRunner
296 
297 template <>
298 struct ParallelForTestRunner<maxParallelForRunnerMode> {
299     static void run() {
300         run_parallel_for_cancellation_test<maxParallelForRunnerMode>();
301     }
302 }; // struct ParallelForTestRunner<maxParallelForRunnerMode>
303 
304 } // namespace test_cancellation
305 
306 #if __TBB_CPP20_CONCEPTS_PRESENT
307 template <typename... Args>
308 concept can_call_parallel_for_basic = requires( Args&&... args ) {
309     tbb::parallel_for(std::forward<Args>(args)...);
310 };
311 
312 template <typename... Args>
313 concept can_call_parallel_for_helper = can_call_parallel_for_basic<Args...> &&
314                                        can_call_parallel_for_basic<Args..., tbb::task_group_context&>;
315 
316 template <typename... Args>
317 concept can_call_parallel_for_with_partitioner = can_call_parallel_for_helper<Args...> &&
318                                                  can_call_parallel_for_helper<Args..., const tbb::simple_partitioner&> &&
319                                                  can_call_parallel_for_helper<Args..., const tbb::auto_partitioner&> &&
320                                                  can_call_parallel_for_helper<Args..., const tbb::static_partitioner> &&
321                                                  can_call_parallel_for_helper<Args..., tbb::affinity_partitioner&>;
322 
323 template <typename Range, typename Body>
324 concept can_call_range_pfor = can_call_parallel_for_with_partitioner<const Range&, const Body&>;
325 
326 template <typename Index, typename Function>
327 concept can_call_index_pfor = can_call_parallel_for_with_partitioner<Index, Index, const Function&> &&
328                               can_call_parallel_for_with_partitioner<Index, Index, Index, const Function&>;
329 
330 
331 template <typename Range>
332 using CorrectBody = test_concepts::parallel_for_body::Correct<Range>;
333 template <typename Index>
334 using CorrectFunc = test_concepts::parallel_for_function::Correct<Index>;
335 
336 void test_pfor_range_constraints() {
337     using namespace test_concepts::range;
338 
339     static_assert(can_call_range_pfor<Correct, CorrectBody<Correct>>);
340     static_assert(!can_call_range_pfor<NonCopyable, CorrectBody<NonCopyable>>);
341     static_assert(!can_call_range_pfor<NonSplittable, CorrectBody<NonSplittable>>);
342     static_assert(!can_call_range_pfor<NonDestructible, CorrectBody<NonDestructible>>);
343     static_assert(!can_call_range_pfor<NoEmpty, CorrectBody<NoEmpty>>);
344     static_assert(!can_call_range_pfor<EmptyNonConst, CorrectBody<EmptyNonConst>>);
345     static_assert(!can_call_range_pfor<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
346     static_assert(!can_call_range_pfor<NoIsDivisible, CorrectBody<NoIsDivisible>>);
347     static_assert(!can_call_range_pfor<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>);
348     static_assert(!can_call_range_pfor<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
349 }
350 
351 void test_pfor_body_constraints() {
352     using namespace test_concepts::parallel_for_body;
353     using CorrectRange = test_concepts::range::Correct;
354 
355     static_assert(can_call_range_pfor<CorrectRange, Correct<CorrectRange>>);
356     static_assert(!can_call_range_pfor<CorrectRange, NonCopyable<CorrectRange>>);
357     static_assert(!can_call_range_pfor<CorrectRange, NonDestructible<CorrectRange>>);
358     static_assert(!can_call_range_pfor<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>);
359     static_assert(!can_call_range_pfor<CorrectRange, OperatorRoundBracketsNonConst<CorrectRange>>);
360     static_assert(!can_call_range_pfor<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>);
361 }
362 
363 void test_pfor_func_constraints() {
364     using namespace test_concepts::parallel_for_function;
365     using CorrectIndex = test_concepts::parallel_for_index::Correct;
366 
367     static_assert(can_call_index_pfor<CorrectIndex, Correct<CorrectIndex>>);
368     static_assert(!can_call_index_pfor<CorrectIndex, NoOperatorRoundBrackets<CorrectIndex>>);
369     static_assert(!can_call_index_pfor<CorrectIndex, OperatorRoundBracketsNonConst<CorrectIndex>>);
370     static_assert(!can_call_index_pfor<CorrectIndex, WrongInputOperatorRoundBrackets<CorrectIndex>>);
371 }
372 
373 void test_pfor_index_constraints() {
374     using namespace test_concepts::parallel_for_index;
375     static_assert(can_call_index_pfor<Correct, CorrectFunc<Correct>>);
376     static_assert(!can_call_index_pfor<NoIntCtor, CorrectFunc<NoIntCtor>>);
377     static_assert(!can_call_index_pfor<NonCopyable, CorrectFunc<NonCopyable>>);
378     static_assert(!can_call_index_pfor<NonCopyAssignable, CorrectFunc<NonCopyAssignable>>);
379     static_assert(!can_call_index_pfor<NonDestructible, CorrectFunc<NonDestructible>>);
380     static_assert(!can_call_index_pfor<NoOperatorLess, CorrectFunc<NoOperatorLess>>);
381     static_assert(!can_call_index_pfor<OperatorLessNonConst, CorrectFunc<OperatorLessNonConst>>);
382     static_assert(!can_call_index_pfor<WrongInputOperatorLess, CorrectFunc<WrongInputOperatorLess>>);
383     static_assert(!can_call_index_pfor<WrongReturnOperatorLess, CorrectFunc<WrongReturnOperatorLess>>);
384     static_assert(!can_call_index_pfor<NoOperatorMinus, CorrectFunc<NoOperatorMinus>>);
385     static_assert(!can_call_index_pfor<OperatorMinusNonConst, CorrectFunc<OperatorMinusNonConst>>);
386     static_assert(!can_call_index_pfor<WrongInputOperatorMinus, CorrectFunc<WrongInputOperatorMinus>>);
387     static_assert(!can_call_index_pfor<WrongReturnOperatorMinus, CorrectFunc<WrongReturnOperatorMinus>>);
388     static_assert(!can_call_index_pfor<NoOperatorPlus, CorrectFunc<NoOperatorPlus>>);
389     static_assert(!can_call_index_pfor<OperatorPlusNonConst, CorrectFunc<OperatorPlusNonConst>>);
390     static_assert(!can_call_index_pfor<WrongInputOperatorPlus, CorrectFunc<WrongInputOperatorPlus>>);
391     static_assert(!can_call_index_pfor<WrongReturnOperatorPlus, CorrectFunc<WrongReturnOperatorPlus>>);
392 }
393 #endif // __TBB_CPP20_CONCEPTS_PRESENT
394 
395 #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN && TBB_REVAMP_TODO
396 #include "tbb/global_control.h"
397 //! Testing exceptions
398 //! \brief \ref requirement
399 TEST_CASE("Exceptions support") {
400     for ( int p = MinThread; p <= MaxThread; ++p ) {
401         if ( p > 0 ) {
402             tbb::global_control control(tbb::global_control::max_allowed_parallelism, p);
403             TestExceptionsSupport();
404         }
405     }
406 }
407 #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
408 
409 //! Testing cancellation
410 //! \brief \ref error_guessing
411 TEST_CASE("Vector types") {
412 #if HAVE_m128
413     TestVectorTypes<ClassWithSSE>();
414 #endif
415 #if HAVE_m256
416     if (have_AVX()) TestVectorTypes<ClassWithAVX>();
417 #endif
418 }
419 
420 //! Testing workers going to sleep
421 //! \brief \ref resource_usage
422 TEST_CASE("That all workers sleep when no work") {
423     const std::size_t N = 100000;
424     std::atomic<int> counter{};
425 
426     tbb::parallel_for(std::size_t(0), N, [&](std::size_t) {
427         for (int i = 0; i < 1000; ++i) {
428             ++counter;
429         }
430     }, tbb::simple_partitioner());
431     TestCPUUserTime(utils::get_platform_max_threads());
432 }
433 
434 //! Testing simple partitioner stability
435 //! \brief \ref error_guessing
436 TEST_CASE("Simple partitioner stability") {
437     TestSimplePartitionerStability();
438 }
439 
440 //! Testing various range implementations
441 //! \brief \ref requirement
442 TEST_CASE("Various range implementations") {
443     various_range_implementations::test();
444 }
445 
446 //! Testing parallel_for with explicit task_group_context
447 //! \brief \ref interface \ref error_guessing
448 TEST_CASE("Сancellation test for tbb::parallel_for") {
449     test_cancellation::ParallelForTestRunner</*FirstMode = */0>::run();
450 }
451 
452 #if __TBB_CPP20_CONCEPTS_PRESENT
453 //! \brief \ref error_guessing
454 TEST_CASE("parallel_for constraints") {
455     test_pfor_range_constraints();
456     test_pfor_body_constraints();
457     test_pfor_func_constraints();
458     test_pfor_index_constraints();
459 }
460 #endif // __TBB_CPP20_CONCEPTS_PRESENT
461 
462 #if _MSC_VER
463 #pragma warning (pop)
464 #endif
465