xref: /oneTBB/test/tbb/test_parallel_for.cpp (revision 6caecf96)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "common/test.h"
18 
19 #include "tbb/parallel_for.h"
20 
21 #include "common/config.h"
22 #include "common/utils.h"
23 #include "common/utils_concurrency_limit.h"
24 #include "common/utils_report.h"
25 #include "common/vector_types.h"
26 #include "common/cpu_usertime.h"
27 #include "common/spin_barrier.h"
28 #include "common/exception_handling.h"
29 #include "common/concepts_common.h"
30 #include "test_partitioner.h"
31 
32 #include <cstddef>
33 #include <vector>
34 
35 //! \file test_parallel_for.cpp
36 //! \brief Test for [algorithms.parallel_for] specification
37 
38 #if _MSC_VER
39 #pragma warning (push)
40 #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
41     // Suppress pointless "unreachable code" warning.
42     #pragma warning (disable: 4702)
43 #endif
44 #if defined(_Wp64)
45     // Workaround for overzealous compiler warnings in /Wp64 mode
46     #pragma warning (disable: 4267)
47 #endif
48 #define _SCL_SECURE_NO_WARNINGS
49 #endif //#if _MSC_VER
50 
51 
52 #if (HAVE_m128 || HAVE_m256)
53 template<typename ClassWithVectorType>
54 struct SSE_Functor {
55     ClassWithVectorType* Src, * Dst;
56     SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {}
57 
58     void operator()( tbb::blocked_range<int>& r ) const {
59         for( int i=r.begin(); i!=r.end(); ++i )
60             Dst[i] = Src[i];
61     }
62 };
63 
64 //! Test that parallel_for works with stack-allocated __m128
65 template<typename ClassWithVectorType>
66 void TestVectorTypes() {
67     const int aSize = 300;
68     ClassWithVectorType Array1[aSize], Array2[aSize];
69     for( int i=0; i<aSize; ++i ) {
70         // VC8 does not properly align a temporary value; to work around, use explicit variable
71         ClassWithVectorType foo(i);
72         Array1[i] = foo;
73     }
74     tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) );
75     for( int i=0; i<aSize; ++i ) {
76         ClassWithVectorType foo(i);
77         CHECK( Array2[i]==foo ) ;
78     }
79 }
80 #endif /* HAVE_m128 || HAVE_m256 */
81 
82 struct TestSimplePartitionerStabilityFunctor {
83   std::vector<int> & ranges;
84   TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){}
85   void operator()(tbb::blocked_range<size_t>& r)const{
86       ranges.at(r.begin()) = 1;
87   }
88 };
89 void TestSimplePartitionerStability(){
90     const std::size_t repeat_count= 10;
91     const std::size_t rangeToSplitSize=1000000;
92     const std::size_t grainsizeStep=rangeToSplitSize/repeat_count;
93     typedef TestSimplePartitionerStabilityFunctor FunctorType;
94 
95     for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){
96         std::vector<int> firstSeries(rangeToSplitSize,0);
97         std::vector<int> secondSeries(rangeToSplitSize,0);
98 
99         tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner());
100         tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner());
101 
102         CHECK_MESSAGE(
103             firstSeries == secondSeries,
104             "Splitting range with tbb::simple_partitioner must be reproducible; i = " << i
105         );
106     }
107 }
108 
109 namespace various_range_implementations {
110 
111 using namespace test_partitioner_utils;
112 using namespace test_partitioner_utils::TestRanges;
113 
114 // Body ensures that initial work distribution is done uniformly through affinity mechanism and not through work stealing
115 class Body {
116     utils::SpinBarrier &m_sb;
117 public:
118     Body(utils::SpinBarrier& sb) : m_sb(sb) { }
119     Body(Body& b, tbb::split) : m_sb(b.m_sb) { }
120 
121     template <typename Range>
122     void operator()(Range& r) const {
123         INFO("Executing range [" << r.begin() << ", " << r.end() << "]");
124         m_sb.wait(); // waiting for all threads
125     }
126 };
127 
128 namespace correctness {
129 
130 /* Testing only correctness (that is parallel_for does not hang) */
131 template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness>
132 void test() {
133     RangeType range( 0, utils::get_platform_max_threads(), NULL, false, ensure_non_emptiness );
134     tbb::affinity_partitioner ap;
135     tbb::parallel_for( range, SimpleBody(), ap );
136 }
137 
138 } // namespace correctness
139 
140 namespace uniform_distribution {
141 
142 /* Body of parallel_for algorithm would hang if non-uniform work distribution happened  */
143 template <typename RangeType, bool feedback, bool ensure_non_emptiness>
144 void test() {
145     static const std::size_t thread_num = utils::get_platform_max_threads();
146     utils::SpinBarrier sb( thread_num );
147     RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness);
148     const Body sync_body( sb );
149     tbb::affinity_partitioner ap;
150     tbb::parallel_for( range, sync_body, ap );
151     tbb::parallel_for( range, sync_body, tbb::static_partitioner() );
152 }
153 
154 } // namespace uniform_distribution
155 
156 void test() {
157     const bool provide_feedback = false;
158     const bool ensure_non_empty_range = true;
159 
160     // BlockedRange does not take into account feedback and non-emptiness settings but uses the
161     // tbb::blocked_range implementation
162     uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>();
163     using correctness::test;
164 
165     {
166         test<RoundedDownRange, provide_feedback, ensure_non_empty_range>();
167         test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>();
168     }
169 
170     {
171         test<RoundedUpRange, provide_feedback, ensure_non_empty_range>();
172         test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>();
173     }
174 
175     // Testing that parallel_for algorithm works with such weird ranges
176     correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>();
177     correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>();
178     correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>();
179 
180     // The following ranges do not comply with the proportion suggested by partitioner. Therefore
181     // they have to provide the proportion in which they were actually split back to partitioner and
182     // ensure theirs non-emptiness
183     test<Range1_2, provide_feedback, ensure_non_empty_range>();
184     test<Range1_999, provide_feedback, ensure_non_empty_range>();
185     test<Range999_1, provide_feedback, ensure_non_empty_range>();
186 }
187 
188 } // namespace various_range_implementations
189 
190 namespace test_cancellation {
191 
192 struct FunctorToCancel {
193     static std::atomic<bool> need_to_wait;
194 
195     void operator()( std::size_t ) const {
196         ++g_CurExecuted;
197         if (need_to_wait) {
198             need_to_wait = Cancellator::WaitUntilReady();
199         }
200     }
201 
202     void operator()( const tbb::blocked_range<std::size_t>& ) const {
203         ++g_CurExecuted;
204         Cancellator::WaitUntilReady();
205     }
206 
207     static void reset() { need_to_wait = true; }
208 }; // struct FunctorToCancel
209 
210 std::atomic<bool> FunctorToCancel::need_to_wait(true);
211 
212 static constexpr std::size_t buffer_test_size = 1024;
213 static constexpr std::size_t maxParallelForRunnerMode = 14;
214 
215 template <std::size_t Mode>
216 class ParallelForRunner {
217     tbb::task_group_context& my_ctx;
218     const std::size_t worker_task_step = 1;
219 
220     static_assert(Mode >= 0 && Mode <= maxParallelForRunnerMode, "Incorrect mode for ParallelForRunner");
221 
222     template <typename Partitioner, typename... Args>
223     void run_parallel_for( Args&&... args ) const {
224         Partitioner part;
225         tbb::parallel_for(std::forward<Args>(args)..., part, my_ctx);
226     }
227 
228     template <typename... Args>
229     void run_overload( Args&&... args ) const {
230 
231         switch(Mode % 5) {
232             case 0 : {
233                 tbb::parallel_for(std::forward<Args>(args)..., my_ctx);
234                 break;
235             }
236             case 1 : {
237                 run_parallel_for<tbb::simple_partitioner>(std::forward<Args>(args)...);
238                 break;
239             }
240             case 2 : {
241                 run_parallel_for<tbb::auto_partitioner>(std::forward<Args>(args)...);
242                 break;
243             }
244             case 3 : {
245                 run_parallel_for<tbb::static_partitioner>(std::forward<Args>(args)...);
246                 break;
247             }
248             case 4 : {
249                 run_parallel_for<tbb::affinity_partitioner>(std::forward<Args>(args)...);
250                 break;
251             }
252         }
253     }
254 
255 public:
256     ParallelForRunner( tbb::task_group_context& ctx )
257         : my_ctx(ctx) {}
258 
259     ~ParallelForRunner() { FunctorToCancel::reset(); }
260 
261     void operator()() const {
262         if (Mode < 5) {
263             // Overload with blocked range
264             tbb::blocked_range<std::size_t> br(0, buffer_test_size);
265             run_overload(br, FunctorToCancel{});
266         } else if (Mode < 10) {
267             // Overload with two indexes
268             run_overload(std::size_t(0), buffer_test_size, FunctorToCancel{});
269         } else {
270             // Overload with two indexes and step
271             run_overload(std::size_t(0), buffer_test_size, worker_task_step, FunctorToCancel{});
272         }
273     }
274 }; // class ParallelForRunner
275 
276 template <std::size_t Mode>
277 void run_parallel_for_cancellation_test() {
278     // TODO: enable concurrency_range
279     if (utils::get_platform_max_threads() < 2) {
280         // The test requires at least one worker thread to request cancellation
281         return;
282     }
283     ResetEhGlobals();
284     RunCancellationTest<ParallelForRunner<Mode>, Cancellator>();
285 }
286 
287 template <std::size_t Mode>
288 struct ParallelForTestRunner {
289     static void run() {
290         run_parallel_for_cancellation_test<Mode>();
291         ParallelForTestRunner<Mode + 1>::run();
292     }
293 }; // struct ParallelForTestRunner
294 
295 template <>
296 struct ParallelForTestRunner<maxParallelForRunnerMode> {
297     static void run() {
298         run_parallel_for_cancellation_test<maxParallelForRunnerMode>();
299     }
300 }; // struct ParallelForTestRunner<maxParallelForRunnerMode>
301 
302 } // namespace test_cancellation
303 
304 #if __TBB_CPP20_CONCEPTS_PRESENT
305 template <typename... Args>
306 concept can_call_parallel_for_basic = requires( Args&&... args ) {
307     tbb::parallel_for(std::forward<Args>(args)...);
308 };
309 
310 template <typename... Args>
311 concept can_call_parallel_for_helper = can_call_parallel_for_basic<Args...> &&
312                                        can_call_parallel_for_basic<Args..., tbb::task_group_context&>;
313 
314 template <typename... Args>
315 concept can_call_parallel_for_with_partitioner = can_call_parallel_for_helper<Args...> &&
316                                                  can_call_parallel_for_helper<Args..., const tbb::simple_partitioner&> &&
317                                                  can_call_parallel_for_helper<Args..., const tbb::auto_partitioner&> &&
318                                                  can_call_parallel_for_helper<Args..., const tbb::static_partitioner> &&
319                                                  can_call_parallel_for_helper<Args..., tbb::affinity_partitioner&>;
320 
321 template <typename Range, typename Body>
322 concept can_call_range_pfor = can_call_parallel_for_with_partitioner<const Range&, const Body&>;
323 
324 template <typename Index, typename Function>
325 concept can_call_index_pfor = can_call_parallel_for_with_partitioner<Index, Index, const Function&> &&
326                               can_call_parallel_for_with_partitioner<Index, Index, Index, const Function&>;
327 
328 
329 template <typename Range>
330 using CorrectBody = test_concepts::parallel_for_body::Correct<Range>;
331 template <typename Index>
332 using CorrectFunc = test_concepts::parallel_for_function::Correct<Index>;
333 
334 void test_pfor_range_constraints() {
335     using namespace test_concepts::range;
336 
337     static_assert(can_call_range_pfor<Correct, CorrectBody<Correct>>);
338     static_assert(!can_call_range_pfor<NonCopyable, CorrectBody<NonCopyable>>);
339     static_assert(!can_call_range_pfor<NonSplittable, CorrectBody<NonSplittable>>);
340     static_assert(!can_call_range_pfor<NonDestructible, CorrectBody<NonDestructible>>);
341     static_assert(!can_call_range_pfor<NoEmpty, CorrectBody<NoEmpty>>);
342     static_assert(!can_call_range_pfor<EmptyNonConst, CorrectBody<EmptyNonConst>>);
343     static_assert(!can_call_range_pfor<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
344     static_assert(!can_call_range_pfor<NoIsDivisible, CorrectBody<NoIsDivisible>>);
345     static_assert(!can_call_range_pfor<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>);
346     static_assert(!can_call_range_pfor<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
347 }
348 
349 void test_pfor_body_constraints() {
350     using namespace test_concepts::parallel_for_body;
351     using CorrectRange = test_concepts::range::Correct;
352 
353     static_assert(can_call_range_pfor<CorrectRange, Correct<CorrectRange>>);
354     static_assert(!can_call_range_pfor<CorrectRange, NonCopyable<CorrectRange>>);
355     static_assert(!can_call_range_pfor<CorrectRange, NonDestructible<CorrectRange>>);
356     static_assert(!can_call_range_pfor<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>);
357     static_assert(!can_call_range_pfor<CorrectRange, OperatorRoundBracketsNonConst<CorrectRange>>);
358     static_assert(!can_call_range_pfor<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>);
359 }
360 
361 void test_pfor_func_constraints() {
362     using namespace test_concepts::parallel_for_function;
363     using CorrectIndex = test_concepts::parallel_for_index::Correct;
364 
365     static_assert(can_call_index_pfor<CorrectIndex, Correct<CorrectIndex>>);
366     static_assert(!can_call_index_pfor<CorrectIndex, NoOperatorRoundBrackets<CorrectIndex>>);
367     static_assert(!can_call_index_pfor<CorrectIndex, OperatorRoundBracketsNonConst<CorrectIndex>>);
368     static_assert(!can_call_index_pfor<CorrectIndex, WrongInputOperatorRoundBrackets<CorrectIndex>>);
369 }
370 
371 void test_pfor_index_constraints() {
372     using namespace test_concepts::parallel_for_index;
373     static_assert(can_call_index_pfor<Correct, CorrectFunc<Correct>>);
374     static_assert(!can_call_index_pfor<NoIntCtor, CorrectFunc<NoIntCtor>>);
375     static_assert(!can_call_index_pfor<NonCopyable, CorrectFunc<NonCopyable>>);
376     static_assert(!can_call_index_pfor<NonCopyAssignable, CorrectFunc<NonCopyAssignable>>);
377     static_assert(!can_call_index_pfor<NonDestructible, CorrectFunc<NonDestructible>>);
378     static_assert(!can_call_index_pfor<NoOperatorLess, CorrectFunc<NoOperatorLess>>);
379     static_assert(!can_call_index_pfor<OperatorLessNonConst, CorrectFunc<OperatorLessNonConst>>);
380     static_assert(!can_call_index_pfor<WrongInputOperatorLess, CorrectFunc<WrongInputOperatorLess>>);
381     static_assert(!can_call_index_pfor<WrongReturnOperatorLess, CorrectFunc<WrongReturnOperatorLess>>);
382     static_assert(!can_call_index_pfor<NoOperatorMinus, CorrectFunc<NoOperatorMinus>>);
383     static_assert(!can_call_index_pfor<OperatorMinusNonConst, CorrectFunc<OperatorMinusNonConst>>);
384     static_assert(!can_call_index_pfor<WrongInputOperatorMinus, CorrectFunc<WrongInputOperatorMinus>>);
385     static_assert(!can_call_index_pfor<WrongReturnOperatorMinus, CorrectFunc<WrongReturnOperatorMinus>>);
386     static_assert(!can_call_index_pfor<NoOperatorPlus, CorrectFunc<NoOperatorPlus>>);
387     static_assert(!can_call_index_pfor<OperatorPlusNonConst, CorrectFunc<OperatorPlusNonConst>>);
388     static_assert(!can_call_index_pfor<WrongInputOperatorPlus, CorrectFunc<WrongInputOperatorPlus>>);
389     static_assert(!can_call_index_pfor<WrongReturnOperatorPlus, CorrectFunc<WrongReturnOperatorPlus>>);
390 }
391 #endif // __TBB_CPP20_CONCEPTS_PRESENT
392 
393 #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN && TBB_REVAMP_TODO
394 #include "tbb/global_control.h"
395 //! Testing exceptions
396 //! \brief \ref requirement
397 TEST_CASE("Exceptions support") {
398     for ( int p = MinThread; p <= MaxThread; ++p ) {
399         if ( p > 0 ) {
400             tbb::global_control control(tbb::global_control::max_allowed_parallelism, p);
401             TestExceptionsSupport();
402         }
403     }
404 }
405 #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
406 
407 //! Testing cancellation
408 //! \brief \ref error_guessing
409 TEST_CASE("Vector types") {
410 #if HAVE_m128
411     TestVectorTypes<ClassWithSSE>();
412 #endif
413 #if HAVE_m256
414     if (have_AVX()) TestVectorTypes<ClassWithAVX>();
415 #endif
416 }
417 
418 //! Testing workers going to sleep
419 //! \brief \ref resource_usage
420 TEST_CASE("That all workers sleep when no work") {
421     const std::size_t N = 100000;
422     std::atomic<int> counter{};
423 
424     tbb::parallel_for(std::size_t(0), N, [&](std::size_t) {
425         for (int i = 0; i < 1000; ++i) {
426             ++counter;
427         }
428     }, tbb::simple_partitioner());
429     TestCPUUserTime(utils::get_platform_max_threads());
430 }
431 
432 //! Testing simple partitioner stability
433 //! \brief \ref error_guessing
434 TEST_CASE("Simple partitioner stability") {
435     TestSimplePartitionerStability();
436 }
437 
438 //! Testing various range implementations
439 //! \brief \ref requirement
440 TEST_CASE("Various range implementations") {
441     various_range_implementations::test();
442 }
443 
444 //! Testing parallel_for with explicit task_group_context
445 //! \brief \ref interface \ref error_guessing
446 TEST_CASE("Сancellation test for tbb::parallel_for") {
447     test_cancellation::ParallelForTestRunner</*FirstMode = */0>::run();
448 }
449 
450 #if __TBB_CPP20_CONCEPTS_PRESENT
451 //! \brief \ref error_guessing
452 TEST_CASE("parallel_for constraints") {
453     test_pfor_range_constraints();
454     test_pfor_body_constraints();
455     test_pfor_func_constraints();
456     test_pfor_index_constraints();
457 }
458 #endif // __TBB_CPP20_CONCEPTS_PRESENT
459 
460 #if _MSC_VER
461 #pragma warning (pop)
462 #endif
463