1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "common/parallel_reduce_common.h" 18 #include "common/concurrency_tracker.h" 19 #include "common/test_invoke.h" 20 21 #include "../tbb/test_partitioner.h" 22 23 //! \file conformance_parallel_reduce.cpp 24 //! \brief Test for [algorithms.parallel_reduce algorithms.parallel_deterministic_reduce] specification 25 26 class RotOp { 27 public: 28 using Type = int; 29 int operator() ( int x, int i ) const { 30 return ( x<<1 ) ^ i; 31 } 32 int join( int x, int y ) const { 33 return operator()( x, y ); 34 } 35 }; 36 37 template <class Op> 38 struct ReduceBody { 39 using result_type = typename Op::Type; 40 result_type my_value; 41 42 ReduceBody() : my_value() {} 43 ReduceBody( ReduceBody &, oneapi::tbb::split ) : my_value() {} 44 45 void operator() ( const oneapi::tbb::blocked_range<int>& r ) { 46 utils::ConcurrencyTracker ct; 47 for ( int i = r.begin(); i != r.end(); ++i ) { 48 Op op; 49 my_value = op(my_value, i); 50 } 51 } 52 53 void join( const ReduceBody& y ) { 54 Op op; 55 my_value = op.join(my_value, y.my_value); 56 } 57 }; 58 59 template <class Partitioner> 60 void TestDeterministicReductionFor() { 61 const int N = 1000; 62 const oneapi::tbb::blocked_range<int> range(0, N); 63 using BodyType = ReduceBody<RotOp>; 64 using Type = RotOp::Type; 65 66 BodyType benchmark_body; 67 deterministic_reduce_invoker(range, benchmark_body, Partitioner()); 68 for ( int i=0; i<100; ++i ) { 69 BodyType measurement_body; 70 deterministic_reduce_invoker(range, measurement_body, Partitioner()); 71 REQUIRE_MESSAGE( benchmark_body.my_value == measurement_body.my_value, 72 "parallel_deterministic_reduce behaves differently from run to run" ); 73 74 Type lambda_measurement_result = deterministic_reduce_invoker<Type>( range, 75 [](const oneapi::tbb::blocked_range<int>& br, Type value) -> Type { 76 utils::ConcurrencyTracker ct; 77 for ( int ii = br.begin(); ii != br.end(); ++ii ) { 78 RotOp op; 79 value = op(value, ii); 80 } 81 return value; 82 }, 83 [](const Type& v1, const Type& v2) -> Type { 84 RotOp op; 85 return op.join(v1,v2); 86 }, 87 Partitioner() 88 ); 89 REQUIRE_MESSAGE( benchmark_body.my_value == lambda_measurement_result, 90 "lambda-based parallel_deterministic_reduce behaves differently from run to run" ); 91 } 92 } 93 94 //! Test that deterministic reduction returns the same result during several measurements 95 //! \brief \ref requirement \ref interface 96 TEST_CASE("Test deterministic reduce correctness") { 97 for ( auto concurrency_level : utils::concurrency_range() ) { 98 oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism, concurrency_level); 99 TestDeterministicReductionFor<oneapi::tbb::simple_partitioner>(); 100 TestDeterministicReductionFor<oneapi::tbb::static_partitioner>(); 101 TestDeterministicReductionFor<utils_default_partitioner>(); 102 } 103 } 104 105 //! Test partitioners interaction with various ranges 106 //! \brief \ref requirement \ref interface 107 TEST_CASE("Test partitioners interaction with various ranges") { 108 using namespace test_partitioner_utils::interaction_with_range_and_partitioner; 109 for ( auto concurrency_level : utils::concurrency_range() ) { 110 oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism, concurrency_level); 111 112 test_partitioner_utils::SimpleReduceBody body; 113 oneapi::tbb::affinity_partitioner ap; 114 115 parallel_reduce(Range1(/*assert_in_split*/ true, /*assert_in_proportional_split*/ false), body, ap); 116 parallel_reduce(Range6(false, true), body, ap); 117 118 parallel_reduce(Range1(/*assert_in_split*/ true, /*assert_in_proportional_split*/ false), body, oneapi::tbb::static_partitioner()); 119 parallel_reduce(Range6(false, true), body, oneapi::tbb::static_partitioner()); 120 121 parallel_reduce(Range1(/*assert_in_split*/ false, /*assert_in_proportional_split*/ true), body, oneapi::tbb::simple_partitioner()); 122 parallel_reduce(Range6(false, true), body, oneapi::tbb::simple_partitioner()); 123 124 parallel_reduce(Range1(/*assert_in_split*/ false, /*assert_in_proportional_split*/ true), body, oneapi::tbb::auto_partitioner()); 125 parallel_reduce(Range6(false, true), body, oneapi::tbb::auto_partitioner()); 126 127 parallel_deterministic_reduce(Range1(/*assert_in_split*/true, /*assert_in_proportional_split*/ false), body, oneapi::tbb::static_partitioner()); 128 parallel_deterministic_reduce(Range6(false, true), body, oneapi::tbb::static_partitioner()); 129 130 parallel_deterministic_reduce(Range1(/*assert_in_split*/false, /*assert_in_proportional_split*/ true), body, oneapi::tbb::simple_partitioner()); 131 parallel_deterministic_reduce(Range6(false, true), body, oneapi::tbb::simple_partitioner()); 132 } 133 } 134 135 #if __TBB_CPP17_INVOKE_PRESENT 136 137 template <typename Body, typename Reduction> 138 void test_preduce_invoke_basic(const Body& body, const Reduction& reduction) { 139 const std::size_t iterations = 100000; 140 const std::size_t result = iterations * (iterations - 1) / 2; 141 142 test_invoke::SmartRange<test_invoke::SmartValue> range(0, iterations); 143 test_invoke::SmartValue identity(0); 144 145 CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction).get()); 146 CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, oneapi::tbb::simple_partitioner()).get()); 147 CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, oneapi::tbb::auto_partitioner()).get()); 148 CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, oneapi::tbb::static_partitioner()).get()); 149 oneapi::tbb::affinity_partitioner aff; 150 CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, aff).get()); 151 152 CHECK(result == oneapi::tbb::parallel_deterministic_reduce(range, identity, body, reduction).get()); 153 CHECK(result == oneapi::tbb::parallel_deterministic_reduce(range, identity, body, reduction, oneapi::tbb::simple_partitioner()).get()); 154 CHECK(result == oneapi::tbb::parallel_deterministic_reduce(range, identity, body, reduction, oneapi::tbb::static_partitioner()).get()); 155 } 156 157 //! Test that parallel_reduce uses std::invoke to run the body 158 //! \brief \ref interface \ref requirement 159 TEST_CASE("parallel_[deterministic_]reduce and std::invoke") { 160 auto regular_reduce = [](const test_invoke::SmartRange<test_invoke::SmartValue>& range, const test_invoke::SmartValue& idx) { 161 test_invoke::SmartValue result = idx; 162 for (auto i = range.begin(); i.get() != range.end().get(); ++i) { 163 result = result + i; 164 } 165 return result; 166 }; 167 auto regular_join = [](const test_invoke::SmartValue& lhs, const test_invoke::SmartValue& rhs) { 168 return lhs + rhs; 169 }; 170 171 test_preduce_invoke_basic(&test_invoke::SmartRange<test_invoke::SmartValue>::reduction, &test_invoke::SmartValue::operator+); 172 test_preduce_invoke_basic(&test_invoke::SmartRange<test_invoke::SmartValue>::reduction, regular_join); 173 test_preduce_invoke_basic(regular_reduce, &test_invoke::SmartValue::operator+); 174 } 175 176 #endif 177