1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "common/test.h" 18 19 #include "tbb/parallel_for.h" 20 21 #include "common/config.h" 22 #include "common/utils.h" 23 #include "common/utils_concurrency_limit.h" 24 #include "common/utils_report.h" 25 #include "common/vector_types.h" 26 #include "common/cpu_usertime.h" 27 #include "common/spin_barrier.h" 28 #include "common/exception_handling.h" 29 #include "common/concepts_common.h" 30 #include "test_partitioner.h" 31 32 #include <cstddef> 33 #include <vector> 34 35 //! \file test_parallel_for.cpp 36 //! \brief Test for [algorithms.parallel_for] specification 37 38 #if _MSC_VER 39 #pragma warning (push) 40 #if __TBB_MSVC_UNREACHABLE_CODE_IGNORED 41 // Suppress pointless "unreachable code" warning. 42 #pragma warning (disable: 4702) 43 #endif 44 #if defined(_Wp64) 45 // Workaround for overzealous compiler warnings in /Wp64 mode 46 #pragma warning (disable: 4267) 47 #endif 48 #define _SCL_SECURE_NO_WARNINGS 49 #endif //#if _MSC_VER 50 51 52 #if (HAVE_m128 || HAVE_m256) 53 template<typename ClassWithVectorType> 54 struct SSE_Functor { 55 ClassWithVectorType* Src, * Dst; 56 SSE_Functor( ClassWithVectorType* src, ClassWithVectorType* dst ) : Src(src), Dst(dst) {} 57 58 void operator()( tbb::blocked_range<int>& r ) const { 59 for( int i=r.begin(); i!=r.end(); ++i ) 60 Dst[i] = Src[i]; 61 } 62 }; 63 64 //! Test that parallel_for works with stack-allocated __m128 65 template<typename ClassWithVectorType> 66 void TestVectorTypes() { 67 const int aSize = 300; 68 ClassWithVectorType Array1[aSize], Array2[aSize]; 69 for( int i=0; i<aSize; ++i ) { 70 // VC8 does not properly align a temporary value; to work around, use explicit variable 71 ClassWithVectorType foo(i); 72 Array1[i] = foo; 73 } 74 tbb::parallel_for( tbb::blocked_range<int>(0,aSize), SSE_Functor<ClassWithVectorType>(Array1, Array2) ); 75 for( int i=0; i<aSize; ++i ) { 76 ClassWithVectorType foo(i); 77 CHECK( Array2[i]==foo ) ; 78 } 79 } 80 #endif /* HAVE_m128 || HAVE_m256 */ 81 82 struct TestSimplePartitionerStabilityFunctor { 83 std::vector<int> & ranges; 84 TestSimplePartitionerStabilityFunctor(std::vector<int> & theRanges):ranges(theRanges){} 85 void operator()(tbb::blocked_range<size_t>& r)const{ 86 ranges.at(r.begin()) = 1; 87 } 88 }; 89 void TestSimplePartitionerStability(){ 90 const std::size_t repeat_count= 10; 91 const std::size_t rangeToSplitSize=1000000; 92 const std::size_t grainsizeStep=rangeToSplitSize/repeat_count; 93 typedef TestSimplePartitionerStabilityFunctor FunctorType; 94 95 for (std::size_t i=0 , grainsize=grainsizeStep; i<repeat_count;i++, grainsize+=grainsizeStep){ 96 std::vector<int> firstSeries(rangeToSplitSize,0); 97 std::vector<int> secondSeries(rangeToSplitSize,0); 98 99 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(firstSeries),tbb::simple_partitioner()); 100 tbb::parallel_for(tbb::blocked_range<size_t>(0,rangeToSplitSize,grainsize),FunctorType(secondSeries),tbb::simple_partitioner()); 101 102 CHECK_MESSAGE( 103 firstSeries == secondSeries, 104 "Splitting range with tbb::simple_partitioner must be reproducible; i = " << i 105 ); 106 } 107 } 108 109 namespace various_range_implementations { 110 111 using namespace test_partitioner_utils; 112 using namespace test_partitioner_utils::TestRanges; 113 114 // Body ensures that initial work distribution is done uniformly through affinity mechanism and not through work stealing 115 class Body { 116 utils::SpinBarrier &m_sb; 117 public: 118 Body(utils::SpinBarrier& sb) : m_sb(sb) { } 119 Body(Body& b, tbb::split) : m_sb(b.m_sb) { } 120 121 template <typename Range> 122 void operator()(Range& r) const { 123 INFO("Executing range [" << r.begin() << ", " << r.end() << "]"); 124 m_sb.wait(); // waiting for all threads 125 } 126 }; 127 128 namespace correctness { 129 130 /* Testing only correctness (that is parallel_for does not hang) */ 131 template <typename RangeType, bool /* feedback */, bool ensure_non_emptiness> 132 void test() { 133 RangeType range( 0, utils::get_platform_max_threads(), NULL, false, ensure_non_emptiness ); 134 tbb::affinity_partitioner ap; 135 tbb::parallel_for( range, SimpleBody(), ap ); 136 } 137 138 } // namespace correctness 139 140 namespace uniform_distribution { 141 142 /* Body of parallel_for algorithm would hang if non-uniform work distribution happened */ 143 template <typename RangeType, bool feedback, bool ensure_non_emptiness> 144 void test() { 145 static const std::size_t thread_num = utils::get_platform_max_threads(); 146 utils::SpinBarrier sb( thread_num ); 147 RangeType range(0, thread_num, NULL, feedback, ensure_non_emptiness); 148 const Body sync_body( sb ); 149 tbb::affinity_partitioner ap; 150 tbb::parallel_for( range, sync_body, ap ); 151 tbb::parallel_for( range, sync_body, tbb::static_partitioner() ); 152 } 153 154 } // namespace uniform_distribution 155 156 void test() { 157 const bool provide_feedback = false; 158 const bool ensure_non_empty_range = true; 159 160 // BlockedRange does not take into account feedback and non-emptiness settings but uses the 161 // tbb::blocked_range implementation 162 uniform_distribution::test<BlockedRange, !provide_feedback, !ensure_non_empty_range>(); 163 using correctness::test; 164 165 { 166 test<RoundedDownRange, provide_feedback, ensure_non_empty_range>(); 167 test<RoundedDownRange, provide_feedback, !ensure_non_empty_range>(); 168 } 169 170 { 171 test<RoundedUpRange, provide_feedback, ensure_non_empty_range>(); 172 test<RoundedUpRange, provide_feedback, !ensure_non_empty_range>(); 173 } 174 175 // Testing that parallel_for algorithm works with such weird ranges 176 correctness::test<Range1_2, /* provide_feedback= */ false, !ensure_non_empty_range>(); 177 correctness::test<Range1_999, /* provide_feedback= */ false, !ensure_non_empty_range>(); 178 correctness::test<Range999_1, /* provide_feedback= */ false, !ensure_non_empty_range>(); 179 180 // The following ranges do not comply with the proportion suggested by partitioner. Therefore 181 // they have to provide the proportion in which they were actually split back to partitioner and 182 // ensure theirs non-emptiness 183 test<Range1_2, provide_feedback, ensure_non_empty_range>(); 184 test<Range1_999, provide_feedback, ensure_non_empty_range>(); 185 test<Range999_1, provide_feedback, ensure_non_empty_range>(); 186 } 187 188 } // namespace various_range_implementations 189 190 namespace test_cancellation { 191 192 struct FunctorToCancel { 193 static std::atomic<bool> need_to_wait; 194 195 void operator()( std::size_t ) const { 196 ++g_CurExecuted; 197 if (need_to_wait) { 198 need_to_wait = Cancellator::WaitUntilReady(); 199 } 200 } 201 202 void operator()( const tbb::blocked_range<std::size_t>& ) const { 203 ++g_CurExecuted; 204 Cancellator::WaitUntilReady(); 205 } 206 207 static void reset() { need_to_wait = true; } 208 }; // struct FunctorToCancel 209 210 std::atomic<bool> FunctorToCancel::need_to_wait(true); 211 212 static constexpr std::size_t buffer_test_size = 1024; 213 static constexpr std::size_t maxParallelForRunnerMode = 14; 214 215 template <std::size_t Mode> 216 class ParallelForRunner { 217 tbb::task_group_context& my_ctx; 218 const std::size_t worker_task_step = 1; 219 220 static_assert(Mode >= 0 && Mode <= maxParallelForRunnerMode, "Incorrect mode for ParallelForRunner"); 221 222 template <typename Partitioner, typename... Args> 223 void run_parallel_for( Args&&... args ) const { 224 Partitioner part; 225 tbb::parallel_for(std::forward<Args>(args)..., part, my_ctx); 226 } 227 228 template <typename... Args> 229 void run_overload( Args&&... args ) const { 230 231 switch(Mode % 5) { 232 case 0 : { 233 tbb::parallel_for(std::forward<Args>(args)..., my_ctx); 234 break; 235 } 236 case 1 : { 237 run_parallel_for<tbb::simple_partitioner>(std::forward<Args>(args)...); 238 break; 239 } 240 case 2 : { 241 run_parallel_for<tbb::auto_partitioner>(std::forward<Args>(args)...); 242 break; 243 } 244 case 3 : { 245 run_parallel_for<tbb::static_partitioner>(std::forward<Args>(args)...); 246 break; 247 } 248 case 4 : { 249 run_parallel_for<tbb::affinity_partitioner>(std::forward<Args>(args)...); 250 break; 251 } 252 } 253 } 254 255 public: 256 ParallelForRunner( tbb::task_group_context& ctx ) 257 : my_ctx(ctx) {} 258 259 ~ParallelForRunner() { FunctorToCancel::reset(); } 260 261 void operator()() const { 262 if (Mode < 5) { 263 // Overload with blocked range 264 tbb::blocked_range<std::size_t> br(0, buffer_test_size); 265 run_overload(br, FunctorToCancel{}); 266 } else if (Mode < 10) { 267 // Overload with two indexes 268 run_overload(std::size_t(0), buffer_test_size, FunctorToCancel{}); 269 } else { 270 // Overload with two indexes and step 271 run_overload(std::size_t(0), buffer_test_size, worker_task_step, FunctorToCancel{}); 272 } 273 } 274 }; // class ParallelForRunner 275 276 template <std::size_t Mode> 277 void run_parallel_for_cancellation_test() { 278 // TODO: enable concurrency_range 279 if (utils::get_platform_max_threads() < 2) { 280 // The test requires at least one worker thread to request cancellation 281 return; 282 } 283 ResetEhGlobals(); 284 RunCancellationTest<ParallelForRunner<Mode>, Cancellator>(); 285 } 286 287 template <std::size_t Mode> 288 struct ParallelForTestRunner { 289 static void run() { 290 run_parallel_for_cancellation_test<Mode>(); 291 ParallelForTestRunner<Mode + 1>::run(); 292 } 293 }; // struct ParallelForTestRunner 294 295 template <> 296 struct ParallelForTestRunner<maxParallelForRunnerMode> { 297 static void run() { 298 run_parallel_for_cancellation_test<maxParallelForRunnerMode>(); 299 } 300 }; // struct ParallelForTestRunner<maxParallelForRunnerMode> 301 302 } // namespace test_cancellation 303 304 #if __TBB_CPP20_CONCEPTS_PRESENT 305 template <typename... Args> 306 concept can_call_parallel_for_basic = requires( Args&&... args ) { 307 tbb::parallel_for(std::forward<Args>(args)...); 308 }; 309 310 template <typename... Args> 311 concept can_call_parallel_for_helper = can_call_parallel_for_basic<Args...> && 312 can_call_parallel_for_basic<Args..., tbb::task_group_context&>; 313 314 template <typename... Args> 315 concept can_call_parallel_for_with_partitioner = can_call_parallel_for_helper<Args...> && 316 can_call_parallel_for_helper<Args..., const tbb::simple_partitioner&> && 317 can_call_parallel_for_helper<Args..., const tbb::auto_partitioner&> && 318 can_call_parallel_for_helper<Args..., const tbb::static_partitioner> && 319 can_call_parallel_for_helper<Args..., tbb::affinity_partitioner&>; 320 321 template <typename Range, typename Body> 322 concept can_call_range_pfor = can_call_parallel_for_with_partitioner<const Range&, const Body&>; 323 324 template <typename Index, typename Function> 325 concept can_call_index_pfor = can_call_parallel_for_with_partitioner<Index, Index, const Function&> && 326 can_call_parallel_for_with_partitioner<Index, Index, Index, const Function&>; 327 328 329 template <typename Range> 330 using CorrectBody = test_concepts::parallel_for_body::Correct<Range>; 331 template <typename Index> 332 using CorrectFunc = test_concepts::parallel_for_function::Correct<Index>; 333 334 void test_pfor_range_constraints() { 335 using namespace test_concepts::range; 336 337 static_assert(can_call_range_pfor<Correct, CorrectBody<Correct>>); 338 static_assert(!can_call_range_pfor<NonCopyable, CorrectBody<NonCopyable>>); 339 static_assert(!can_call_range_pfor<NonSplittable, CorrectBody<NonSplittable>>); 340 static_assert(!can_call_range_pfor<NonDestructible, CorrectBody<NonDestructible>>); 341 static_assert(!can_call_range_pfor<NoEmpty, CorrectBody<NoEmpty>>); 342 static_assert(!can_call_range_pfor<EmptyNonConst, CorrectBody<EmptyNonConst>>); 343 static_assert(!can_call_range_pfor<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>); 344 static_assert(!can_call_range_pfor<NoIsDivisible, CorrectBody<NoIsDivisible>>); 345 static_assert(!can_call_range_pfor<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>); 346 static_assert(!can_call_range_pfor<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>); 347 } 348 349 void test_pfor_body_constraints() { 350 using namespace test_concepts::parallel_for_body; 351 using CorrectRange = test_concepts::range::Correct; 352 353 static_assert(can_call_range_pfor<CorrectRange, Correct<CorrectRange>>); 354 static_assert(!can_call_range_pfor<CorrectRange, NonCopyable<CorrectRange>>); 355 static_assert(!can_call_range_pfor<CorrectRange, NonDestructible<CorrectRange>>); 356 static_assert(!can_call_range_pfor<CorrectRange, NoOperatorRoundBrackets<CorrectRange>>); 357 static_assert(!can_call_range_pfor<CorrectRange, OperatorRoundBracketsNonConst<CorrectRange>>); 358 static_assert(!can_call_range_pfor<CorrectRange, WrongInputOperatorRoundBrackets<CorrectRange>>); 359 } 360 361 void test_pfor_func_constraints() { 362 using namespace test_concepts::parallel_for_function; 363 using CorrectIndex = test_concepts::parallel_for_index::Correct; 364 365 static_assert(can_call_index_pfor<CorrectIndex, Correct<CorrectIndex>>); 366 static_assert(!can_call_index_pfor<CorrectIndex, NoOperatorRoundBrackets<CorrectIndex>>); 367 static_assert(!can_call_index_pfor<CorrectIndex, OperatorRoundBracketsNonConst<CorrectIndex>>); 368 static_assert(!can_call_index_pfor<CorrectIndex, WrongInputOperatorRoundBrackets<CorrectIndex>>); 369 } 370 371 void test_pfor_index_constraints() { 372 using namespace test_concepts::parallel_for_index; 373 static_assert(can_call_index_pfor<Correct, CorrectFunc<Correct>>); 374 static_assert(!can_call_index_pfor<NoIntCtor, CorrectFunc<NoIntCtor>>); 375 static_assert(!can_call_index_pfor<NonCopyable, CorrectFunc<NonCopyable>>); 376 static_assert(!can_call_index_pfor<NonCopyAssignable, CorrectFunc<NonCopyAssignable>>); 377 static_assert(!can_call_index_pfor<NonDestructible, CorrectFunc<NonDestructible>>); 378 static_assert(!can_call_index_pfor<NoOperatorLess, CorrectFunc<NoOperatorLess>>); 379 static_assert(!can_call_index_pfor<OperatorLessNonConst, CorrectFunc<OperatorLessNonConst>>); 380 static_assert(!can_call_index_pfor<WrongInputOperatorLess, CorrectFunc<WrongInputOperatorLess>>); 381 static_assert(!can_call_index_pfor<WrongReturnOperatorLess, CorrectFunc<WrongReturnOperatorLess>>); 382 static_assert(!can_call_index_pfor<NoOperatorMinus, CorrectFunc<NoOperatorMinus>>); 383 static_assert(!can_call_index_pfor<OperatorMinusNonConst, CorrectFunc<OperatorMinusNonConst>>); 384 static_assert(!can_call_index_pfor<WrongInputOperatorMinus, CorrectFunc<WrongInputOperatorMinus>>); 385 static_assert(!can_call_index_pfor<WrongReturnOperatorMinus, CorrectFunc<WrongReturnOperatorMinus>>); 386 static_assert(!can_call_index_pfor<NoOperatorPlus, CorrectFunc<NoOperatorPlus>>); 387 static_assert(!can_call_index_pfor<OperatorPlusNonConst, CorrectFunc<OperatorPlusNonConst>>); 388 static_assert(!can_call_index_pfor<WrongInputOperatorPlus, CorrectFunc<WrongInputOperatorPlus>>); 389 static_assert(!can_call_index_pfor<WrongReturnOperatorPlus, CorrectFunc<WrongReturnOperatorPlus>>); 390 } 391 #endif // __TBB_CPP20_CONCEPTS_PRESENT 392 393 #if TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN && TBB_REVAMP_TODO 394 #include "tbb/global_control.h" 395 //! Testing exceptions 396 //! \brief \ref requirement 397 TEST_CASE("Exceptions support") { 398 for ( int p = MinThread; p <= MaxThread; ++p ) { 399 if ( p > 0 ) { 400 tbb::global_control control(tbb::global_control::max_allowed_parallelism, p); 401 TestExceptionsSupport(); 402 } 403 } 404 } 405 #endif /* TBB_USE_EXCEPTIONS && !__TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN */ 406 407 //! Testing cancellation 408 //! \brief \ref error_guessing 409 TEST_CASE("Vector types") { 410 #if HAVE_m128 411 TestVectorTypes<ClassWithSSE>(); 412 #endif 413 #if HAVE_m256 414 if (have_AVX()) TestVectorTypes<ClassWithAVX>(); 415 #endif 416 } 417 418 //! Testing workers going to sleep 419 //! \brief \ref resource_usage 420 TEST_CASE("That all workers sleep when no work") { 421 const std::size_t N = 100000; 422 std::atomic<int> counter{}; 423 424 tbb::parallel_for(std::size_t(0), N, [&](std::size_t) { 425 for (int i = 0; i < 1000; ++i) { 426 ++counter; 427 } 428 }, tbb::simple_partitioner()); 429 TestCPUUserTime(utils::get_platform_max_threads()); 430 } 431 432 //! Testing simple partitioner stability 433 //! \brief \ref error_guessing 434 TEST_CASE("Simple partitioner stability") { 435 TestSimplePartitionerStability(); 436 } 437 438 //! Testing various range implementations 439 //! \brief \ref requirement 440 TEST_CASE("Various range implementations") { 441 various_range_implementations::test(); 442 } 443 444 //! Testing parallel_for with explicit task_group_context 445 //! \brief \ref interface \ref error_guessing 446 TEST_CASE("Сancellation test for tbb::parallel_for") { 447 test_cancellation::ParallelForTestRunner</*FirstMode = */0>::run(); 448 } 449 450 #if __TBB_CPP20_CONCEPTS_PRESENT 451 //! \brief \ref error_guessing 452 TEST_CASE("parallel_for constraints") { 453 test_pfor_range_constraints(); 454 test_pfor_body_constraints(); 455 test_pfor_func_constraints(); 456 test_pfor_index_constraints(); 457 } 458 #endif // __TBB_CPP20_CONCEPTS_PRESENT 459 460 #if _MSC_VER 461 #pragma warning (pop) 462 #endif 463