1 /*
2 Copyright (c) 2005-2022 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "common/test.h"
18
19 #include "tbb/parallel_for.h"
20
21 #include "common/config.h"
22 #include "common/utils.h"
23 #include "common/utils_concurrency_limit.h"
24 #include "common/utils_report.h"
25 #include "common/vector_types.h"
26 #include "common/cpu_usertime.h"
27 #include "common/spin_barrier.h"
28 #include "common/exception_handling.h"
29 #include "common/concepts_common.h"
30 #include "test_partitioner.h"
31
32 #include <cstddef>
33 #include <vector>
34
35 //! \file test_parallel_for.cpp
36 //! \brief Test for [algorithms.parallel_for] specification
37
38 #if _MSC_VER
39 #pragma warning (push)
40 // Suppress conditional expression is constant
41 #pragma warning (disable: 4127)
42 #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
43 // Suppress pointless "unreachable code" warning.
44 #pragma warning (disable: 4702)
45 #endif
46 #if defined(_Wp64)
47 // Workaround for overzealous compiler warnings in /Wp64 mode
48 #pragma warning (disable: 4267)
49 #endif
50 #define _SCL_SECURE_NO_WARNINGS
51 #endif //#if _MSC_VER
52
53
54 #if (HAVE_m128 || HAVE_m256)
55 template<typename ClassWithVectorType>
56 struct SSE_Functor {
57 ClassWithVectorType* Src, * Dst;
SSE_FunctorSSE_Functor58 SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {}
59
operator ()SSE_Functor60 void operator()( tbb::blocked_range<int>& r ) const {
61 for( int i=r.begin(); i!=r.end(); ++i )
62 Dst[i] = Src[i];
63 }
64 };
65
66 //! Test that parallel_for works with stack-allocated __m128
67 template<typename ClassWithVectorType>
TestVectorTypes()68 void TestVectorTypes() {
69 const int aSize = 300;
70 ClassWithVectorType Array1[aSize], Array2[aSize];
71 for( int i=0; i<aSize; ++i ) {
72 // VC8 does not properly align a temporary value; to work around, use explicit variable
73 ClassWithVectorType foo(i);
74 Array1[i] = foo;
75 }
76 tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) );
77 for( int i=0; i<aSize; ++i ) {
78 ClassWithVectorType foo(i);
79 CHECK( Array2[i]==foo ) ;
80 }
81 }
82 #endif /* HAVE_m128 || HAVE_m256 */
83
84 struct TestSimplePartitionerStabilityFunctor {
85 std::vector<int> & ranges;
TestSimplePartitionerStabilityFunctorTestSimplePartitionerStabilityFunctor86 TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){}
operator ()TestSimplePartitionerStabilityFunctor87 void operator()(tbb::blocked_range<size_t>& r)const{
88 ranges.at(r.begin()) = 1;
89 }
90 };
TestSimplePartitionerStability()91 void TestSimplePartitionerStability(){
92 const std::size_t repeat_count= 10;
93 const std::size_t rangeToSplitSize=1000000;
94 const std::size_t grainsizeStep=rangeToSplitSize/repeat_count;
95 typedef TestSimplePartitionerStabilityFunctor FunctorType;
96
97 for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){
98 std::vector<int> firstSeries(rangeToSplitSize,0);
99 std::vector<int> secondSeries(rangeToSplitSize,0);
100
101 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner());
102 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner());
103
104 CHECK_MESSAGE(
105 firstSeries == secondSeries,
106 "Splitting range with tbb::simple_partitioner must be reproducible; i = " << i
107 );
108 }
109 }
110
111 namespace various_range_implementations {
112
113 using namespace test_partitioner_utils;
114 using namespace test_partitioner_utils::TestRanges;
115
116 // Body ensures that initial work distribution is done uniformly through affinity mechanism and not through work stealing
117 class Body {
118 utils::SpinBarrier &m_sb;
119 public:
Body(utils::SpinBarrier & sb)120 Body(utils::SpinBarrier& sb) : m_sb(sb) { }
Body(Body & b,tbb::split)121 Body(Body& b, tbb::split) : m_sb(b.m_sb) { }
122
123 template <typename Range>
operator ()(Range & r) const124 void operator()(Range& r) const {
125 INFO("Executing range [" << r.begin() << ", " << r.end() << "]");
126 m_sb.wait(); // waiting for all threads
127 }
128 };
129
130 namespace correctness {
131
132 /* Testing only correctness (that is parallel_for does not hang) */
133 template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness>
test()134 void test() {
135 RangeType range( 0, utils::get_platform_max_threads(), nullptr, false, ensure_non_emptiness );
136 tbb::affinity_partitioner ap;
137 tbb::parallel_for( range, SimpleBody(), ap );
138 }
139
140 } // namespace correctness
141
142 namespace uniform_distribution {
143
144 /* Body of parallel_for algorithm would hang if non-uniform work distribution happened */
145 template <typename RangeType, bool feedback, bool ensure_non_emptiness>
test()146 void test() {
147 static const std::size_t thread_num = utils::get_platform_max_threads();
148 utils::SpinBarrier sb( thread_num );
149 RangeType range(0, thread_num, nullptr, feedback, ensure_non_emptiness);
150 const Body sync_body( sb );
151 tbb::affinity_partitioner ap;
152 tbb::parallel_for( range, sync_body, ap );
153 tbb::parallel_for( range, sync_body, tbb::static_partitioner() );
154 }
155
156 } // namespace uniform_distribution
157
test()158 void test() {
159 const bool provide_feedback = false;
160 const bool ensure_non_empty_range = true;
161
162 // BlockedRange does not take into account feedback and non-emptiness settings but uses the
163 // tbb::blocked_range implementation
164 uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>();
165 using correctness::test;
166
167 {
168 test<RoundedDownRange, provide_feedback, ensure_non_empty_range>();
169 test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>();
170 }
171
172 {
173 test<RoundedUpRange, provide_feedback, ensure_non_empty_range>();
174 test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>();
175 }
176
177 // Testing that parallel_for algorithm works with such weird ranges
178 correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>();
179 correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>();
180 correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>();
181
182 // The following ranges do not comply with the proportion suggested by partitioner. Therefore
183 // they have to provide the proportion in which they were actually split back to partitioner and
184 // ensure theirs non-emptiness
185 test<Range1_2, provide_feedback, ensure_non_empty_range>();
186 test<Range1_999, provide_feedback, ensure_non_empty_range>();
187 test<Range999_1, provide_feedback, ensure_non_empty_range>();
188 }
189
190 } // namespace various_range_implementations
191
192 namespace test_cancellation {
193
194 struct FunctorToCancel {
195 static std::atomic<bool> need_to_wait;
196
operator ()test_cancellation::FunctorToCancel197 void operator()( std::size_t ) const {
198 ++g_CurExecuted;
199 if (need_to_wait) {
200 need_to_wait = Cancellator::WaitUntilReady();
201 }
202 }
203
operator ()test_cancellation::FunctorToCancel204 void operator()( const tbb::blocked_range<std::size_t>& ) const {
205 ++g_CurExecuted;
206 Cancellator::WaitUntilReady();
207 }
208
resettest_cancellation::FunctorToCancel209 static void reset() { need_to_wait = true; }
210 }; // struct FunctorToCancel
211
212 std::atomic<bool> FunctorToCancel::need_to_wait(true);
213
214 static constexpr std::size_t buffer_test_size = 1024;
215 static constexpr std::size_t maxParallelForRunnerMode = 14;
216
217 template <std::size_t Mode>
218 class ParallelForRunner {
219 tbb::task_group_context& my_ctx;
220 const std::size_t worker_task_step = 1;
221
222 static_assert(Mode >= 0 && Mode <= maxParallelForRunnerMode, "Incorrect mode for ParallelForRunner");
223
224 template <typename Partitioner, typename... Args>
run_parallel_for(Args &&...args) const225 void run_parallel_for( Args&&... args ) const {
226 Partitioner part;
227 tbb::parallel_for(std::forward<Args>(args)..., part, my_ctx);
228 }
229
230 template <typename... Args>
run_overload(Args &&...args) const231 void run_overload( Args&&... args ) const {
232
233 switch(Mode % 5) {
234 case 0 : {
235 tbb::parallel_for(std::forward<Args>(args)..., my_ctx);
236 break;
237 }
238 case 1 : {
239 run_parallel_for<tbb::simple_partitioner>(std::forward<Args>(args)...);
240 break;
241 }
242 case 2 : {
243 run_parallel_for<tbb::auto_partitioner>(std::forward<Args>(args)...);
244 break;
245 }
246 case 3 : {
247 run_parallel_for<tbb::static_partitioner>(std::forward<Args>(args)...);
248 break;
249 }
250 case 4 : {
251 run_parallel_for<tbb::affinity_partitioner>(std::forward<Args>(args)...);
252 break;
253 }
254 }
255 }
256
257 public:
ParallelForRunner(tbb::task_group_context & ctx)258 ParallelForRunner( tbb::task_group_context& ctx )
259 : my_ctx(ctx) {}
260
~ParallelForRunner()261 ~ParallelForRunner() { FunctorToCancel::reset(); }
262
operator ()() const263 void operator()() const {
264 if (Mode < 5) {
265 // Overload with blocked range
266 tbb::blocked_range<std::size_t> br(0, buffer_test_size);
267 run_overload(br, FunctorToCancel{});
268 } else if (Mode < 10) {
269 // Overload with two indexes
270 run_overload(std::size_t(0), buffer_test_size, FunctorToCancel{});
271 } else {
272 // Overload with two indexes and step
273 run_overload(std::size_t(0), buffer_test_size, worker_task_step, FunctorToCancel{});
274 }
275 }
276 }; // class ParallelForRunner
277
278 template <std::size_t Mode>
run_parallel_for_cancellation_test()279 void run_parallel_for_cancellation_test() {
280 // TODO: enable concurrency_range
281 if (utils::get_platform_max_threads() < 2) {
282 // The test requires at least one worker thread to request cancellation
283 return;
284 }
285 ResetEhGlobals();
286 RunCancellationTest<ParallelForRunner<Mode>, Cancellator>();
287 }
288
289 template <std::size_t Mode>
290 struct ParallelForTestRunner {
runtest_cancellation::ParallelForTestRunner291 static void run() {
292 run_parallel_for_cancellation_test<Mode>();
293 ParallelForTestRunner<Mode + 1>::run();
294 }
295 }; // struct ParallelForTestRunner
296
297 template <>
298 struct ParallelForTestRunner<maxParallelForRunnerMode> {
runtest_cancellation::ParallelForTestRunner299 static void run() {
300 run_parallel_for_cancellation_test<maxParallelForRunnerMode>();
301 }
302 }; // struct ParallelForTestRunner<maxParallelForRunnerMode>
303
304 } // namespace test_cancellation
305
306 #if __TBB_CPP20_CONCEPTS_PRESENT
307 template <typename... Args>
308 concept can_call_parallel_for_basic = requires( Args&&... args ) {
309 tbb::parallel_for(std::forward<Args>(args)...);
310 };
311
312 template <typename... Args>
313 concept can_call_parallel_for_helper = can_call_parallel_for_basic<Args...> &&
314 can_call_parallel_for_basic<Args..., tbb::task_group_context&>;
315
316 template <typename... Args>
317 concept can_call_parallel_for_with_partitioner = can_call_parallel_for_helper<Args...> &&
318 can_call_parallel_for_helper<Args..., const tbb::simple_partitioner&> &&
319 can_call_parallel_for_helper<Args..., const tbb::auto_partitioner&> &&
320 can_call_parallel_for_helper<Args..., const tbb::static_partitioner> &&
321 can_call_parallel_for_helper<Args..., tbb::affinity_partitioner&>;
322
323 template <typename Range, typename Body>
324 concept can_call_range_pfor = can_call_parallel_for_with_partitioner<const Range&, const Body&>;
325
326 template <typename Index, typename Function>
327 concept can_call_index_pfor = can_call_parallel_for_with_partitioner<Index, Index, const Function&> &&
328 can_call_parallel_for_with_partitioner<Index, Index, Index, const Function&>;
329
330
331 template <typename Range>
332 using CorrectBody = test_concepts::parallel_for_body::Correct<Range>;
333 template <typename Index>
334 using CorrectFunc = test_concepts::parallel_for_function::Correct<Index>;
335
test_pfor_range_constraints()336 void test_pfor_range_constraints() {
337 using namespace test_concepts::range;
338
339 static_assert(can_call_range_pfor<Correct, CorrectBody<Correct>>);
340 static_assert(!can_call_range_pfor<NonCopyable, CorrectBody<NonCopyable>>);
341 static_assert(!can_call_range_pfor<NonSplittable, CorrectBody<NonSplittable>>);
342 static_assert(!can_call_range_pfor<NonDestructible, CorrectBody<NonDestructible>>);
343 static_assert(!can_call_range_pfor<NoEmpty, CorrectBody<NoEmpty>>);
344 static_assert(!can_call_range_pfor<EmptyNonConst, CorrectBody<EmptyNonConst>>);
345 static_assert(!can_call_range_pfor<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
346 static_assert(!can_call_range_pfor<NoIsDivisible, CorrectBody<NoIsDivisible>>);
347 static_assert(!can_call_range_pfor<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>);
348 static_assert(!can_call_range_pfor<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
349 }
350
test_pfor_body_constraints()351 void test_pfor_body_constraints() {
352 using namespace test_concepts::parallel_for_body;
353 using CorrectRange = test_concepts::range::Correct;
354
355 static_assert(can_call_range_pfor<CorrectRange, Correct<CorrectRange>>);
356 static_assert(!can_call_range_pfor<CorrectRange, NonCopyable<CorrectRange>>);
357 static_assert(!can_call_range_pfor<CorrectRange, NonDestructible<CorrectRange>>);
358 static_assert(!can_call_range_pfor<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>);
359 static_assert(!can_call_range_pfor<CorrectRange, OperatorRoundBracketsNonConst<CorrectRange>>);
360 static_assert(!can_call_range_pfor<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>);
361 }
362
test_pfor_func_constraints()363 void test_pfor_func_constraints() {
364 using namespace test_concepts::parallel_for_function;
365 using CorrectIndex = test_concepts::parallel_for_index::Correct;
366
367 static_assert(can_call_index_pfor<CorrectIndex, Correct<CorrectIndex>>);
368 static_assert(!can_call_index_pfor<CorrectIndex, NoOperatorRoundBrackets<CorrectIndex>>);
369 static_assert(!can_call_index_pfor<CorrectIndex, OperatorRoundBracketsNonConst<CorrectIndex>>);
370 static_assert(!can_call_index_pfor<CorrectIndex, WrongInputOperatorRoundBrackets<CorrectIndex>>);
371 }
372
test_pfor_index_constraints()373 void test_pfor_index_constraints() {
374 using namespace test_concepts::parallel_for_index;
375 static_assert(can_call_index_pfor<Correct, CorrectFunc<Correct>>);
376 static_assert(!can_call_index_pfor<NoIntCtor, CorrectFunc<NoIntCtor>>);
377 static_assert(!can_call_index_pfor<NonCopyable, CorrectFunc<NonCopyable>>);
378 static_assert(!can_call_index_pfor<NonCopyAssignable, CorrectFunc<NonCopyAssignable>>);
379 static_assert(!can_call_index_pfor<NonDestructible, CorrectFunc<NonDestructible>>);
380 static_assert(!can_call_index_pfor<NoOperatorLess, CorrectFunc<NoOperatorLess>>);
381 static_assert(!can_call_index_pfor<OperatorLessNonConst, CorrectFunc<OperatorLessNonConst>>);
382 static_assert(!can_call_index_pfor<WrongInputOperatorLess, CorrectFunc<WrongInputOperatorLess>>);
383 static_assert(!can_call_index_pfor<WrongReturnOperatorLess, CorrectFunc<WrongReturnOperatorLess>>);
384 static_assert(!can_call_index_pfor<NoOperatorMinus, CorrectFunc<NoOperatorMinus>>);
385 static_assert(!can_call_index_pfor<OperatorMinusNonConst, CorrectFunc<OperatorMinusNonConst>>);
386 static_assert(!can_call_index_pfor<WrongInputOperatorMinus, CorrectFunc<WrongInputOperatorMinus>>);
387 static_assert(!can_call_index_pfor<WrongReturnOperatorMinus, CorrectFunc<WrongReturnOperatorMinus>>);
388 static_assert(!can_call_index_pfor<NoOperatorPlus, CorrectFunc<NoOperatorPlus>>);
389 static_assert(!can_call_index_pfor<OperatorPlusNonConst, CorrectFunc<OperatorPlusNonConst>>);
390 static_assert(!can_call_index_pfor<WrongInputOperatorPlus, CorrectFunc<WrongInputOperatorPlus>>);
391 static_assert(!can_call_index_pfor<WrongReturnOperatorPlus, CorrectFunc<WrongReturnOperatorPlus>>);
392 }
393 #endif // __TBB_CPP20_CONCEPTS_PRESENT
394
395 #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN && TBB_REVAMP_TODO
396 #include "tbb/global_control.h"
397 //! Testing exceptions
398 //! \brief \ref requirement
399 TEST_CASE("Exceptions support") {
400 for ( int p = MinThread; p <= MaxThread; ++p ) {
401 if ( p > 0 ) {
402 tbb::global_control control(tbb::global_control::max_allowed_parallelism, p);
403 TestExceptionsSupport();
404 }
405 }
406 }
407 #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */
408
409 //! Testing cancellation
410 //! \brief \ref error_guessing
411 TEST_CASE("Vector types") {
412 #if HAVE_m128
413 TestVectorTypes<ClassWithSSE>();
414 #endif
415 #if HAVE_m256
416 if (have_AVX()) TestVectorTypes<ClassWithAVX>();
417 #endif
418 }
419
420 //! Testing workers going to sleep
421 //! \brief \ref resource_usage
422 TEST_CASE("That all workers sleep when no work") {
423 const std::size_t N = 100000;
424 std::atomic<int> counter{};
425
__anon95d702fd0102(std::size_t) 426 tbb::parallel_for(std::size_t(0), N, [&](std::size_t) {
427 for (int i = 0; i < 1000; ++i) {
428 ++counter;
429 }
430 }, tbb::simple_partitioner());
431 TestCPUUserTime(utils::get_platform_max_threads());
432 }
433
434 //! Testing simple partitioner stability
435 //! \brief \ref error_guessing
436 TEST_CASE("Simple partitioner stability") {
437 TestSimplePartitionerStability();
438 }
439
440 //! Testing various range implementations
441 //! \brief \ref requirement
442 TEST_CASE("Various range implementations") {
443 various_range_implementations::test();
444 }
445
446 //! Testing parallel_for with explicit task_group_context
447 //! \brief \ref interface \ref error_guessing
448 TEST_CASE("Сancellation test for tbb::parallel_for") {
449 test_cancellation::ParallelForTestRunner</*FirstMode = */0>::run();
450 }
451
452 #if __TBB_CPP20_CONCEPTS_PRESENT
453 //! \brief \ref error_guessing
454 TEST_CASE("parallel_for constraints") {
455 test_pfor_range_constraints();
456 test_pfor_body_constraints();
457 test_pfor_func_constraints();
458 test_pfor_index_constraints();
459 }
460 #endif // __TBB_CPP20_CONCEPTS_PRESENT
461
462 #if _MSC_VER
463 #pragma warning (pop)
464 #endif
465