1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "common/test.h" 18 19 #include "tbb/parallel_for.h" 20 21 #include "common/config.h" 22 #include "common/utils.h" 23 #include "common/utils_concurrency_limit.h" 24 #include "common/utils_report.h" 25 #include "common/vector_types.h" 26 #include "common/cpu_usertime.h" 27 #include "common/spin_barrier.h" 28 #include "common/exception_handling.h" 29 #include "common/concepts_common.h" 30 #include "test_partitioner.h" 31 32 #include <cstddef> 33 #include <vector> 34 35 //! \file test_parallel_for.cpp 36 //! \brief Test for [algorithms.parallel_for] specification 37 38 #if _MSC_VER 39 #pragma warning (push) 40 // Suppress conditional expression is constant 41 #pragma warning (disable: 4127) 42 #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED 43 // Suppress pointless "unreachable code" warning. 44 #pragma warning (disable: 4702) 45 #endif 46 #if defined(_Wp64) 47 // Workaround for overzealous compiler warnings in /Wp64 mode 48 #pragma warning (disable: 4267) 49 #endif 50 #define _SCL_SECURE_NO_WARNINGS 51 #endif //#if _MSC_VER 52 53 54 #if (HAVE_m128 || HAVE_m256) 55 template<typename ClassWithVectorType> 56 struct SSE_Functor { 57 ClassWithVectorType* Src, * Dst; 58 SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {} 59 60 void operator()( tbb::blocked_range<int>& r ) const { 61 for( int i=r.begin(); i!=r.end(); ++i ) 62 Dst[i] = Src[i]; 63 } 64 }; 65 66 //! Test that parallel_for works with stack-allocated __m128 67 template<typename ClassWithVectorType> 68 void TestVectorTypes() { 69 const int aSize = 300; 70 ClassWithVectorType Array1[aSize], Array2[aSize]; 71 for( int i=0; i<aSize; ++i ) { 72 // VC8 does not properly align a temporary value; to work around, use explicit variable 73 ClassWithVectorType foo(i); 74 Array1[i] = foo; 75 } 76 tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) ); 77 for( int i=0; i<aSize; ++i ) { 78 ClassWithVectorType foo(i); 79 CHECK( Array2[i]==foo ) ; 80 } 81 } 82 #endif /* HAVE_m128 || HAVE_m256 */ 83 84 struct TestSimplePartitionerStabilityFunctor { 85 std::vector<int> & ranges; 86 TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){} 87 void operator()(tbb::blocked_range<size_t>& r)const{ 88 ranges.at(r.begin()) = 1; 89 } 90 }; 91 void TestSimplePartitionerStability(){ 92 const std::size_t repeat_count= 10; 93 const std::size_t rangeToSplitSize=1000000; 94 const std::size_t grainsizeStep=rangeToSplitSize/repeat_count; 95 typedef TestSimplePartitionerStabilityFunctor FunctorType; 96 97 for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){ 98 std::vector<int> firstSeries(rangeToSplitSize,0); 99 std::vector<int> secondSeries(rangeToSplitSize,0); 100 101 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner()); 102 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner()); 103 104 CHECK_MESSAGE( 105 firstSeries == secondSeries, 106 "Splitting range with tbb::simple_partitioner must be reproducible; i = " << i 107 ); 108 } 109 } 110 111 namespace various_range_implementations { 112 113 using namespace test_partitioner_utils; 114 using namespace test_partitioner_utils::TestRanges; 115 116 // Body ensures that initial work distribution is done uniformly through affinity mechanism and not through work stealing 117 class Body { 118 utils::SpinBarrier &m_sb; 119 public: 120 Body(utils::SpinBarrier& sb) : m_sb(sb) { } 121 Body(Body& b, tbb::split) : m_sb(b.m_sb) { } 122 123 template <typename Range> 124 void operator()(Range& r) const { 125 INFO("Executing range [" << r.begin() << ", " << r.end() << "]"); 126 m_sb.wait(); // waiting for all threads 127 } 128 }; 129 130 namespace correctness { 131 132 /* Testing only correctness (that is parallel_for does not hang) */ 133 template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness> 134 void test() { 135 RangeType range( 0, utils::get_platform_max_threads(), NULL, false, ensure_non_emptiness ); 136 tbb::affinity_partitioner ap; 137 tbb::parallel_for( range, SimpleBody(), ap ); 138 } 139 140 } // namespace correctness 141 142 namespace uniform_distribution { 143 144 /* Body of parallel_for algorithm would hang if non-uniform work distribution happened */ 145 template <typename RangeType, bool feedback, bool ensure_non_emptiness> 146 void test() { 147 static const std::size_t thread_num = utils::get_platform_max_threads(); 148 utils::SpinBarrier sb( thread_num ); 149 RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness); 150 const Body sync_body( sb ); 151 tbb::affinity_partitioner ap; 152 tbb::parallel_for( range, sync_body, ap ); 153 tbb::parallel_for( range, sync_body, tbb::static_partitioner() ); 154 } 155 156 } // namespace uniform_distribution 157 158 void test() { 159 const bool provide_feedback = false; 160 const bool ensure_non_empty_range = true; 161 162 // BlockedRange does not take into account feedback and non-emptiness settings but uses the 163 // tbb::blocked_range implementation 164 uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>(); 165 using correctness::test; 166 167 { 168 test<RoundedDownRange, provide_feedback, ensure_non_empty_range>(); 169 test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>(); 170 } 171 172 { 173 test<RoundedUpRange, provide_feedback, ensure_non_empty_range>(); 174 test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>(); 175 } 176 177 // Testing that parallel_for algorithm works with such weird ranges 178 correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>(); 179 correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>(); 180 correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>(); 181 182 // The following ranges do not comply with the proportion suggested by partitioner. Therefore 183 // they have to provide the proportion in which they were actually split back to partitioner and 184 // ensure theirs non-emptiness 185 test<Range1_2, provide_feedback, ensure_non_empty_range>(); 186 test<Range1_999, provide_feedback, ensure_non_empty_range>(); 187 test<Range999_1, provide_feedback, ensure_non_empty_range>(); 188 } 189 190 } // namespace various_range_implementations 191 192 namespace test_cancellation { 193 194 struct FunctorToCancel { 195 static std::atomic<bool> need_to_wait; 196 197 void operator()( std::size_t ) const { 198 ++g_CurExecuted; 199 if (need_to_wait) { 200 need_to_wait = Cancellator::WaitUntilReady(); 201 } 202 } 203 204 void operator()( const tbb::blocked_range<std::size_t>& ) const { 205 ++g_CurExecuted; 206 Cancellator::WaitUntilReady(); 207 } 208 209 static void reset() { need_to_wait = true; } 210 }; // struct FunctorToCancel 211 212 std::atomic<bool> FunctorToCancel::need_to_wait(true); 213 214 static constexpr std::size_t buffer_test_size = 1024; 215 static constexpr std::size_t maxParallelForRunnerMode = 14; 216 217 template <std::size_t Mode> 218 class ParallelForRunner { 219 tbb::task_group_context& my_ctx; 220 const std::size_t worker_task_step = 1; 221 222 static_assert(Mode >= 0 && Mode <= maxParallelForRunnerMode, "Incorrect mode for ParallelForRunner"); 223 224 template <typename Partitioner, typename... Args> 225 void run_parallel_for( Args&&... args ) const { 226 Partitioner part; 227 tbb::parallel_for(std::forward<Args>(args)..., part, my_ctx); 228 } 229 230 template <typename... Args> 231 void run_overload( Args&&... args ) const { 232 233 switch(Mode % 5) { 234 case 0 : { 235 tbb::parallel_for(std::forward<Args>(args)..., my_ctx); 236 break; 237 } 238 case 1 : { 239 run_parallel_for<tbb::simple_partitioner>(std::forward<Args>(args)...); 240 break; 241 } 242 case 2 : { 243 run_parallel_for<tbb::auto_partitioner>(std::forward<Args>(args)...); 244 break; 245 } 246 case 3 : { 247 run_parallel_for<tbb::static_partitioner>(std::forward<Args>(args)...); 248 break; 249 } 250 case 4 : { 251 run_parallel_for<tbb::affinity_partitioner>(std::forward<Args>(args)...); 252 break; 253 } 254 } 255 } 256 257 public: 258 ParallelForRunner( tbb::task_group_context& ctx ) 259 : my_ctx(ctx) {} 260 261 ~ParallelForRunner() { FunctorToCancel::reset(); } 262 263 void operator()() const { 264 if (Mode < 5) { 265 // Overload with blocked range 266 tbb::blocked_range<std::size_t> br(0, buffer_test_size); 267 run_overload(br, FunctorToCancel{}); 268 } else if (Mode < 10) { 269 // Overload with two indexes 270 run_overload(std::size_t(0), buffer_test_size, FunctorToCancel{}); 271 } else { 272 // Overload with two indexes and step 273 run_overload(std::size_t(0), buffer_test_size, worker_task_step, FunctorToCancel{}); 274 } 275 } 276 }; // class ParallelForRunner 277 278 template <std::size_t Mode> 279 void run_parallel_for_cancellation_test() { 280 // TODO: enable concurrency_range 281 if (utils::get_platform_max_threads() < 2) { 282 // The test requires at least one worker thread to request cancellation 283 return; 284 } 285 ResetEhGlobals(); 286 RunCancellationTest<ParallelForRunner<Mode>, Cancellator>(); 287 } 288 289 template <std::size_t Mode> 290 struct ParallelForTestRunner { 291 static void run() { 292 run_parallel_for_cancellation_test<Mode>(); 293 ParallelForTestRunner<Mode + 1>::run(); 294 } 295 }; // struct ParallelForTestRunner 296 297 template <> 298 struct ParallelForTestRunner<maxParallelForRunnerMode> { 299 static void run() { 300 run_parallel_for_cancellation_test<maxParallelForRunnerMode>(); 301 } 302 }; // struct ParallelForTestRunner<maxParallelForRunnerMode> 303 304 } // namespace test_cancellation 305 306 #if __TBB_CPP20_CONCEPTS_PRESENT 307 template <typename... Args> 308 concept can_call_parallel_for_basic = requires( Args&&... args ) { 309 tbb::parallel_for(std::forward<Args>(args)...); 310 }; 311 312 template <typename... Args> 313 concept can_call_parallel_for_helper = can_call_parallel_for_basic<Args...> && 314 can_call_parallel_for_basic<Args..., tbb::task_group_context&>; 315 316 template <typename... Args> 317 concept can_call_parallel_for_with_partitioner = can_call_parallel_for_helper<Args...> && 318 can_call_parallel_for_helper<Args..., const tbb::simple_partitioner&> && 319 can_call_parallel_for_helper<Args..., const tbb::auto_partitioner&> && 320 can_call_parallel_for_helper<Args..., const tbb::static_partitioner> && 321 can_call_parallel_for_helper<Args..., tbb::affinity_partitioner&>; 322 323 template <typename Range, typename Body> 324 concept can_call_range_pfor = can_call_parallel_for_with_partitioner<const Range&, const Body&>; 325 326 template <typename Index, typename Function> 327 concept can_call_index_pfor = can_call_parallel_for_with_partitioner<Index, Index, const Function&> && 328 can_call_parallel_for_with_partitioner<Index, Index, Index, const Function&>; 329 330 331 template <typename Range> 332 using CorrectBody = test_concepts::parallel_for_body::Correct<Range>; 333 template <typename Index> 334 using CorrectFunc = test_concepts::parallel_for_function::Correct<Index>; 335 336 void test_pfor_range_constraints() { 337 using namespace test_concepts::range; 338 339 static_assert(can_call_range_pfor<Correct, CorrectBody<Correct>>); 340 static_assert(!can_call_range_pfor<NonCopyable, CorrectBody<NonCopyable>>); 341 static_assert(!can_call_range_pfor<NonSplittable, CorrectBody<NonSplittable>>); 342 static_assert(!can_call_range_pfor<NonDestructible, CorrectBody<NonDestructible>>); 343 static_assert(!can_call_range_pfor<NoEmpty, CorrectBody<NoEmpty>>); 344 static_assert(!can_call_range_pfor<EmptyNonConst, CorrectBody<EmptyNonConst>>); 345 static_assert(!can_call_range_pfor<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>); 346 static_assert(!can_call_range_pfor<NoIsDivisible, CorrectBody<NoIsDivisible>>); 347 static_assert(!can_call_range_pfor<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>); 348 static_assert(!can_call_range_pfor<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>); 349 } 350 351 void test_pfor_body_constraints() { 352 using namespace test_concepts::parallel_for_body; 353 using CorrectRange = test_concepts::range::Correct; 354 355 static_assert(can_call_range_pfor<CorrectRange, Correct<CorrectRange>>); 356 static_assert(!can_call_range_pfor<CorrectRange, NonCopyable<CorrectRange>>); 357 static_assert(!can_call_range_pfor<CorrectRange, NonDestructible<CorrectRange>>); 358 static_assert(!can_call_range_pfor<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>); 359 static_assert(!can_call_range_pfor<CorrectRange, OperatorRoundBracketsNonConst<CorrectRange>>); 360 static_assert(!can_call_range_pfor<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>); 361 } 362 363 void test_pfor_func_constraints() { 364 using namespace test_concepts::parallel_for_function; 365 using CorrectIndex = test_concepts::parallel_for_index::Correct; 366 367 static_assert(can_call_index_pfor<CorrectIndex, Correct<CorrectIndex>>); 368 static_assert(!can_call_index_pfor<CorrectIndex, NoOperatorRoundBrackets<CorrectIndex>>); 369 static_assert(!can_call_index_pfor<CorrectIndex, OperatorRoundBracketsNonConst<CorrectIndex>>); 370 static_assert(!can_call_index_pfor<CorrectIndex, WrongInputOperatorRoundBrackets<CorrectIndex>>); 371 } 372 373 void test_pfor_index_constraints() { 374 using namespace test_concepts::parallel_for_index; 375 static_assert(can_call_index_pfor<Correct, CorrectFunc<Correct>>); 376 static_assert(!can_call_index_pfor<NoIntCtor, CorrectFunc<NoIntCtor>>); 377 static_assert(!can_call_index_pfor<NonCopyable, CorrectFunc<NonCopyable>>); 378 static_assert(!can_call_index_pfor<NonCopyAssignable, CorrectFunc<NonCopyAssignable>>); 379 static_assert(!can_call_index_pfor<NonDestructible, CorrectFunc<NonDestructible>>); 380 static_assert(!can_call_index_pfor<NoOperatorLess, CorrectFunc<NoOperatorLess>>); 381 static_assert(!can_call_index_pfor<OperatorLessNonConst, CorrectFunc<OperatorLessNonConst>>); 382 static_assert(!can_call_index_pfor<WrongInputOperatorLess, CorrectFunc<WrongInputOperatorLess>>); 383 static_assert(!can_call_index_pfor<WrongReturnOperatorLess, CorrectFunc<WrongReturnOperatorLess>>); 384 static_assert(!can_call_index_pfor<NoOperatorMinus, CorrectFunc<NoOperatorMinus>>); 385 static_assert(!can_call_index_pfor<OperatorMinusNonConst, CorrectFunc<OperatorMinusNonConst>>); 386 static_assert(!can_call_index_pfor<WrongInputOperatorMinus, CorrectFunc<WrongInputOperatorMinus>>); 387 static_assert(!can_call_index_pfor<WrongReturnOperatorMinus, CorrectFunc<WrongReturnOperatorMinus>>); 388 static_assert(!can_call_index_pfor<NoOperatorPlus, CorrectFunc<NoOperatorPlus>>); 389 static_assert(!can_call_index_pfor<OperatorPlusNonConst, CorrectFunc<OperatorPlusNonConst>>); 390 static_assert(!can_call_index_pfor<WrongInputOperatorPlus, CorrectFunc<WrongInputOperatorPlus>>); 391 static_assert(!can_call_index_pfor<WrongReturnOperatorPlus, CorrectFunc<WrongReturnOperatorPlus>>); 392 } 393 #endif // __TBB_CPP20_CONCEPTS_PRESENT 394 395 #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN && TBB_REVAMP_TODO 396 #include "tbb/global_control.h" 397 //! Testing exceptions 398 //! \brief \ref requirement 399 TEST_CASE("Exceptions support") { 400 for ( int p = MinThread; p <= MaxThread; ++p ) { 401 if ( p > 0 ) { 402 tbb::global_control control(tbb::global_control::max_allowed_parallelism, p); 403 TestExceptionsSupport(); 404 } 405 } 406 } 407 #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */ 408 409 //! Testing cancellation 410 //! \brief \ref error_guessing 411 TEST_CASE("Vector types") { 412 #if HAVE_m128 413 TestVectorTypes<ClassWithSSE>(); 414 #endif 415 #if HAVE_m256 416 if (have_AVX()) TestVectorTypes<ClassWithAVX>(); 417 #endif 418 } 419 420 //! Testing workers going to sleep 421 //! \brief \ref resource_usage 422 TEST_CASE("That all workers sleep when no work") { 423 const std::size_t N = 100000; 424 std::atomic<int> counter{}; 425 426 tbb::parallel_for(std::size_t(0), N, [&](std::size_t) { 427 for (int i = 0; i < 1000; ++i) { 428 ++counter; 429 } 430 }, tbb::simple_partitioner()); 431 TestCPUUserTime(utils::get_platform_max_threads()); 432 } 433 434 //! Testing simple partitioner stability 435 //! \brief \ref error_guessing 436 TEST_CASE("Simple partitioner stability") { 437 TestSimplePartitionerStability(); 438 } 439 440 //! Testing various range implementations 441 //! \brief \ref requirement 442 TEST_CASE("Various range implementations") { 443 various_range_implementations::test(); 444 } 445 446 //! Testing parallel_for with explicit task_group_context 447 //! \brief \ref interface \ref error_guessing 448 TEST_CASE("Сancellation test for tbb::parallel_for") { 449 test_cancellation::ParallelForTestRunner</*FirstMode = */0>::run(); 450 } 451 452 #if __TBB_CPP20_CONCEPTS_PRESENT 453 //! \brief \ref error_guessing 454 TEST_CASE("parallel_for constraints") { 455 test_pfor_range_constraints(); 456 test_pfor_body_constraints(); 457 test_pfor_func_constraints(); 458 test_pfor_index_constraints(); 459 } 460 #endif // __TBB_CPP20_CONCEPTS_PRESENT 461 462 #if _MSC_VER 463 #pragma warning (pop) 464 #endif 465