151c0b2f7Stbbdev /*
2*a088cfa0SKonstantin Boyarinov     Copyright (c) 2005-2023 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
1751c0b2f7Stbbdev #include "common/parallel_reduce_common.h"
1851c0b2f7Stbbdev #include "common/concurrency_tracker.h"
19*a088cfa0SKonstantin Boyarinov #include "common/test_invoke.h"
2051c0b2f7Stbbdev 
2151c0b2f7Stbbdev #include "../tbb/test_partitioner.h"
2251c0b2f7Stbbdev 
2351c0b2f7Stbbdev //! \file conformance_parallel_reduce.cpp
2451c0b2f7Stbbdev //! \brief Test for [algorithms.parallel_reduce algorithms.parallel_deterministic_reduce] specification
2551c0b2f7Stbbdev 
2651c0b2f7Stbbdev class RotOp {
2751c0b2f7Stbbdev public:
2851c0b2f7Stbbdev     using Type = int;
operator ()(int x,int i) const2951c0b2f7Stbbdev     int operator() ( int x, int i ) const {
3051c0b2f7Stbbdev         return ( x<<1 ) ^ i;
3151c0b2f7Stbbdev     }
join(int x,int y) const3251c0b2f7Stbbdev     int join( int x, int y ) const {
3351c0b2f7Stbbdev         return operator()( x, y );
3451c0b2f7Stbbdev     }
3551c0b2f7Stbbdev };
3651c0b2f7Stbbdev 
3751c0b2f7Stbbdev template <class Op>
3851c0b2f7Stbbdev struct ReduceBody {
3951c0b2f7Stbbdev     using result_type = typename Op::Type;
4051c0b2f7Stbbdev     result_type my_value;
4151c0b2f7Stbbdev 
ReduceBodyReduceBody4251c0b2f7Stbbdev     ReduceBody() : my_value() {}
ReduceBodyReduceBody4349e08aacStbbdev     ReduceBody( ReduceBody &, oneapi::tbb::split ) : my_value() {}
4451c0b2f7Stbbdev 
operator ()ReduceBody4549e08aacStbbdev     void operator() ( const oneapi::tbb::blocked_range<int>& r ) {
4651c0b2f7Stbbdev         utils::ConcurrencyTracker ct;
4751c0b2f7Stbbdev         for ( int i = r.begin(); i != r.end(); ++i ) {
4851c0b2f7Stbbdev             Op op;
4951c0b2f7Stbbdev             my_value = op(my_value, i);
5051c0b2f7Stbbdev         }
5151c0b2f7Stbbdev     }
5251c0b2f7Stbbdev 
joinReduceBody5351c0b2f7Stbbdev     void join( const ReduceBody& y ) {
5451c0b2f7Stbbdev         Op op;
5551c0b2f7Stbbdev         my_value = op.join(my_value, y.my_value);
5651c0b2f7Stbbdev     }
5751c0b2f7Stbbdev };
5851c0b2f7Stbbdev 
5951c0b2f7Stbbdev template <class Partitioner>
TestDeterministicReductionFor()6051c0b2f7Stbbdev void TestDeterministicReductionFor() {
6151c0b2f7Stbbdev     const int N = 1000;
6249e08aacStbbdev     const oneapi::tbb::blocked_range<int> range(0, N);
6351c0b2f7Stbbdev     using BodyType = ReduceBody<RotOp>;
6451c0b2f7Stbbdev     using Type = RotOp::Type;
6551c0b2f7Stbbdev 
6651c0b2f7Stbbdev     BodyType benchmark_body;
6751c0b2f7Stbbdev     deterministic_reduce_invoker(range, benchmark_body, Partitioner());
6851c0b2f7Stbbdev     for ( int i=0; i<100; ++i ) {
6951c0b2f7Stbbdev         BodyType measurement_body;
7051c0b2f7Stbbdev         deterministic_reduce_invoker(range, measurement_body, Partitioner());
7151c0b2f7Stbbdev         REQUIRE_MESSAGE( benchmark_body.my_value == measurement_body.my_value,
7251c0b2f7Stbbdev         "parallel_deterministic_reduce behaves differently from run to run" );
7351c0b2f7Stbbdev 
7451c0b2f7Stbbdev         Type lambda_measurement_result = deterministic_reduce_invoker<Type>( range,
7549e08aacStbbdev             [](const oneapi::tbb::blocked_range<int>& br, Type value) -> Type {
7651c0b2f7Stbbdev                 utils::ConcurrencyTracker ct;
7751c0b2f7Stbbdev                 for ( int ii = br.begin(); ii != br.end(); ++ii ) {
7851c0b2f7Stbbdev                     RotOp op;
7951c0b2f7Stbbdev                     value = op(value, ii);
8051c0b2f7Stbbdev                 }
8151c0b2f7Stbbdev                 return value;
8251c0b2f7Stbbdev             },
8351c0b2f7Stbbdev             [](const Type& v1, const Type& v2) -> Type {
8451c0b2f7Stbbdev                 RotOp op;
8551c0b2f7Stbbdev                 return op.join(v1,v2);
8651c0b2f7Stbbdev             },
8751c0b2f7Stbbdev             Partitioner()
8851c0b2f7Stbbdev         );
8951c0b2f7Stbbdev         REQUIRE_MESSAGE( benchmark_body.my_value == lambda_measurement_result,
9051c0b2f7Stbbdev             "lambda-based parallel_deterministic_reduce behaves differently from run to run" );
9151c0b2f7Stbbdev     }
9251c0b2f7Stbbdev }
9351c0b2f7Stbbdev 
9451c0b2f7Stbbdev //! Test that deterministic reduction returns the same result during several measurements
9551c0b2f7Stbbdev //! \brief \ref requirement \ref interface
9651c0b2f7Stbbdev TEST_CASE("Test deterministic reduce correctness") {
9751c0b2f7Stbbdev     for ( auto concurrency_level : utils::concurrency_range() ) {
9849e08aacStbbdev         oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism, concurrency_level);
9949e08aacStbbdev         TestDeterministicReductionFor<oneapi::tbb::simple_partitioner>();
10049e08aacStbbdev         TestDeterministicReductionFor<oneapi::tbb::static_partitioner>();
10151c0b2f7Stbbdev         TestDeterministicReductionFor<utils_default_partitioner>();
10251c0b2f7Stbbdev     }
10351c0b2f7Stbbdev }
10451c0b2f7Stbbdev 
10551c0b2f7Stbbdev //! Test partitioners interaction with various ranges
10651c0b2f7Stbbdev //! \brief \ref requirement \ref interface
10751c0b2f7Stbbdev TEST_CASE("Test partitioners interaction with various ranges") {
10851c0b2f7Stbbdev     using namespace test_partitioner_utils::interaction_with_range_and_partitioner;
10951c0b2f7Stbbdev     for ( auto concurrency_level : utils::concurrency_range() ) {
11049e08aacStbbdev         oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism, concurrency_level);
11151c0b2f7Stbbdev 
11251c0b2f7Stbbdev         test_partitioner_utils::SimpleReduceBody body;
11349e08aacStbbdev         oneapi::tbb::affinity_partitioner ap;
11451c0b2f7Stbbdev 
11551c0b2f7Stbbdev         parallel_reduce(Range1(/*assert_in_split*/ true, /*assert_in_proportional_split*/ false), body, ap);
11651c0b2f7Stbbdev         parallel_reduce(Range6(false, true), body, ap);
11751c0b2f7Stbbdev 
11849e08aacStbbdev         parallel_reduce(Range1(/*assert_in_split*/ true, /*assert_in_proportional_split*/ false), body, oneapi::tbb::static_partitioner());
11949e08aacStbbdev         parallel_reduce(Range6(false, true), body, oneapi::tbb::static_partitioner());
12051c0b2f7Stbbdev 
12149e08aacStbbdev         parallel_reduce(Range1(/*assert_in_split*/ false, /*assert_in_proportional_split*/ true), body, oneapi::tbb::simple_partitioner());
12249e08aacStbbdev         parallel_reduce(Range6(false, true), body, oneapi::tbb::simple_partitioner());
12351c0b2f7Stbbdev 
12449e08aacStbbdev         parallel_reduce(Range1(/*assert_in_split*/ false, /*assert_in_proportional_split*/ true), body, oneapi::tbb::auto_partitioner());
12549e08aacStbbdev         parallel_reduce(Range6(false, true), body, oneapi::tbb::auto_partitioner());
12651c0b2f7Stbbdev 
12749e08aacStbbdev         parallel_deterministic_reduce(Range1(/*assert_in_split*/true, /*assert_in_proportional_split*/ false), body, oneapi::tbb::static_partitioner());
12849e08aacStbbdev         parallel_deterministic_reduce(Range6(false, true), body, oneapi::tbb::static_partitioner());
12951c0b2f7Stbbdev 
13049e08aacStbbdev         parallel_deterministic_reduce(Range1(/*assert_in_split*/false, /*assert_in_proportional_split*/ true), body, oneapi::tbb::simple_partitioner());
13149e08aacStbbdev         parallel_deterministic_reduce(Range6(false, true), body, oneapi::tbb::simple_partitioner());
13251c0b2f7Stbbdev     }
13351c0b2f7Stbbdev }
134*a088cfa0SKonstantin Boyarinov 
135*a088cfa0SKonstantin Boyarinov #if __TBB_CPP17_INVOKE_PRESENT
136*a088cfa0SKonstantin Boyarinov 
137*a088cfa0SKonstantin Boyarinov template <typename Body, typename Reduction>
test_preduce_invoke_basic(const Body & body,const Reduction & reduction)138*a088cfa0SKonstantin Boyarinov void test_preduce_invoke_basic(const Body& body, const Reduction& reduction) {
139*a088cfa0SKonstantin Boyarinov     const std::size_t iterations = 100000;
140*a088cfa0SKonstantin Boyarinov     const std::size_t result = iterations * (iterations - 1) / 2;
141*a088cfa0SKonstantin Boyarinov 
142*a088cfa0SKonstantin Boyarinov     test_invoke::SmartRange<test_invoke::SmartValue> range(0, iterations);
143*a088cfa0SKonstantin Boyarinov     test_invoke::SmartValue identity(0);
144*a088cfa0SKonstantin Boyarinov 
145*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction).get());
146*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, oneapi::tbb::simple_partitioner()).get());
147*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, oneapi::tbb::auto_partitioner()).get());
148*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, oneapi::tbb::static_partitioner()).get());
149*a088cfa0SKonstantin Boyarinov     oneapi::tbb::affinity_partitioner aff;
150*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_reduce(range, identity, body, reduction, aff).get());
151*a088cfa0SKonstantin Boyarinov 
152*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_deterministic_reduce(range, identity, body, reduction).get());
153*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_deterministic_reduce(range, identity, body, reduction, oneapi::tbb::simple_partitioner()).get());
154*a088cfa0SKonstantin Boyarinov     CHECK(result == oneapi::tbb::parallel_deterministic_reduce(range, identity, body, reduction, oneapi::tbb::static_partitioner()).get());
155*a088cfa0SKonstantin Boyarinov }
156*a088cfa0SKonstantin Boyarinov 
157*a088cfa0SKonstantin Boyarinov //! Test that parallel_reduce uses std::invoke to run the body
158*a088cfa0SKonstantin Boyarinov //! \brief \ref interface \ref requirement
159*a088cfa0SKonstantin Boyarinov TEST_CASE("parallel_[deterministic_]reduce and std::invoke") {
__anon105f294c0302(const test_invoke::SmartRange<test_invoke::SmartValue>& range, const test_invoke::SmartValue& idx) 160*a088cfa0SKonstantin Boyarinov     auto regular_reduce = [](const test_invoke::SmartRange<test_invoke::SmartValue>& range, const test_invoke::SmartValue& idx) {
161*a088cfa0SKonstantin Boyarinov         test_invoke::SmartValue result = idx;
162*a088cfa0SKonstantin Boyarinov         for (auto i = range.begin(); i.get() != range.end().get(); ++i) {
163*a088cfa0SKonstantin Boyarinov             result = result + i;
164*a088cfa0SKonstantin Boyarinov         }
165*a088cfa0SKonstantin Boyarinov         return result;
166*a088cfa0SKonstantin Boyarinov     };
__anon105f294c0402(const test_invoke::SmartValue& lhs, const test_invoke::SmartValue& rhs) 167*a088cfa0SKonstantin Boyarinov     auto regular_join = [](const test_invoke::SmartValue& lhs, const test_invoke::SmartValue& rhs) {
168*a088cfa0SKonstantin Boyarinov         return lhs + rhs;
169*a088cfa0SKonstantin Boyarinov     };
170*a088cfa0SKonstantin Boyarinov 
171*a088cfa0SKonstantin Boyarinov     test_preduce_invoke_basic(&test_invoke::SmartRange<test_invoke::SmartValue>::reduction, &test_invoke::SmartValue::operator+);
172*a088cfa0SKonstantin Boyarinov     test_preduce_invoke_basic(&test_invoke::SmartRange<test_invoke::SmartValue>::reduction, regular_join);
173*a088cfa0SKonstantin Boyarinov     test_preduce_invoke_basic(regular_reduce, &test_invoke::SmartValue::operator+);
174*a088cfa0SKonstantin Boyarinov }
175*a088cfa0SKonstantin Boyarinov 
176*a088cfa0SKonstantin Boyarinov #endif
177