151c0b2f7Stbbdev /*
2*a088cfa0SKonstantin Boyarinov Copyright (c) 2020-2023 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
17478de5b1Stbbdev #if __INTEL_COMPILER && _MSC_VER
18478de5b1Stbbdev #pragma warning(disable : 2586) // decorated name length exceeded, name was truncated
19478de5b1Stbbdev #endif
20478de5b1Stbbdev
2151c0b2f7Stbbdev #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS
2251c0b2f7Stbbdev #include "common/test.h"
23*a088cfa0SKonstantin Boyarinov #include "common/test_invoke.h"
2451c0b2f7Stbbdev
2549e08aacStbbdev #include "oneapi/tbb/parallel_scan.h"
2651c0b2f7Stbbdev
2751c0b2f7Stbbdev #include <vector>
2851c0b2f7Stbbdev
2951c0b2f7Stbbdev //! \file conformance_parallel_scan.cpp
3051c0b2f7Stbbdev //! \brief Test for [algorithms.parallel_scan] specification
3151c0b2f7Stbbdev
3251c0b2f7Stbbdev constexpr std::size_t size = 1000;
3351c0b2f7Stbbdev
3451c0b2f7Stbbdev template<typename T, typename Op>
3551c0b2f7Stbbdev class Body {
3651c0b2f7Stbbdev const T identity;
3751c0b2f7Stbbdev T sum;
3851c0b2f7Stbbdev std::vector<T>& y;
3951c0b2f7Stbbdev const std::vector<T>& z;
4051c0b2f7Stbbdev public:
Body(const std::vector<T> & z_,std::vector<T> & y_,T id)4151c0b2f7Stbbdev Body( const std::vector<T>& z_, std::vector<T>& y_, T id ) : identity(id), sum(id), y(y_), z(z_) {}
get_sum() const4251c0b2f7Stbbdev T get_sum() const { return sum; }
4351c0b2f7Stbbdev
4451c0b2f7Stbbdev template<typename Tag>
operator ()(const oneapi::tbb::blocked_range<std::size_t> & r,Tag)4549e08aacStbbdev void operator()( const oneapi::tbb::blocked_range<std::size_t>& r, Tag ) {
4651c0b2f7Stbbdev T temp = sum;
4751c0b2f7Stbbdev for(std::size_t i=r.begin(); i<r.end(); ++i ) {
4851c0b2f7Stbbdev temp = Op()(temp, z[i]);
4951c0b2f7Stbbdev if( Tag::is_final_scan() )
5051c0b2f7Stbbdev y[i] = temp;
5151c0b2f7Stbbdev }
5251c0b2f7Stbbdev sum = temp;
5351c0b2f7Stbbdev }
Body(Body & b,oneapi::tbb::split)5449e08aacStbbdev Body( Body& b, oneapi::tbb::split ): identity(b.identity), sum(b.identity), y(b.y), z(b.z) {}
reverse_join(Body & a)5551c0b2f7Stbbdev void reverse_join( Body& a ) { sum = Op()(a.sum, sum); }
assign(Body & b)5651c0b2f7Stbbdev void assign( Body& b ) { sum = b.sum; }
5751c0b2f7Stbbdev };
5851c0b2f7Stbbdev
5951c0b2f7Stbbdev class default_partitioner_tag{};
6051c0b2f7Stbbdev
6151c0b2f7Stbbdev template<typename Partitioner>
6251c0b2f7Stbbdev struct parallel_scan_wrapper{
6351c0b2f7Stbbdev template<typename... Args>
operator ()parallel_scan_wrapper6451c0b2f7Stbbdev void operator()(Args&&... args) {
6549e08aacStbbdev oneapi::tbb::parallel_scan(std::forward<Args>(args)..., Partitioner());
6651c0b2f7Stbbdev }
6751c0b2f7Stbbdev };
6851c0b2f7Stbbdev
6951c0b2f7Stbbdev template<>
7051c0b2f7Stbbdev struct parallel_scan_wrapper<default_partitioner_tag>{
7151c0b2f7Stbbdev template<typename... Args>
operator ()parallel_scan_wrapper7251c0b2f7Stbbdev void operator()(Args&&... args) {
7349e08aacStbbdev oneapi::tbb::parallel_scan(std::forward<Args>(args)...);
7451c0b2f7Stbbdev }
7551c0b2f7Stbbdev };
7651c0b2f7Stbbdev
7751c0b2f7Stbbdev // Test scan tag
7851c0b2f7Stbbdev //! \brief \ref interface
7951c0b2f7Stbbdev TEST_CASE("scan tags testing") {
8049e08aacStbbdev CHECK(oneapi::tbb::pre_scan_tag::is_final_scan()==false);
8149e08aacStbbdev CHECK(oneapi::tbb::final_scan_tag::is_final_scan()==true);
8249e08aacStbbdev CHECK((bool)oneapi::tbb::pre_scan_tag()==false);
8349e08aacStbbdev CHECK((bool)oneapi::tbb::final_scan_tag()==true);
8451c0b2f7Stbbdev }
8551c0b2f7Stbbdev
8651c0b2f7Stbbdev //! Test parallel prefix sum calculation for body-based interface
8751c0b2f7Stbbdev //! \brief \ref requirement \ref interface
8849e08aacStbbdev TEST_CASE_TEMPLATE("Test parallel scan with body", Partitioner, default_partitioner_tag, oneapi::tbb::simple_partitioner, oneapi::tbb::auto_partitioner) {
8951c0b2f7Stbbdev std::vector<int> input(size);
9051c0b2f7Stbbdev std::vector<int> output(size);
9151c0b2f7Stbbdev std::vector<int> control(size);
9251c0b2f7Stbbdev
9351c0b2f7Stbbdev for(size_t i = 0; i < size; ++i) {
9451c0b2f7Stbbdev input[i] = int(i / 2);
9551c0b2f7Stbbdev if(i)
9651c0b2f7Stbbdev control[i] = control[i-1] + input[i];
9751c0b2f7Stbbdev else
9851c0b2f7Stbbdev control[i] = input[i];
9951c0b2f7Stbbdev }
10051c0b2f7Stbbdev Body<int, std::plus<int>> body(input, output, 0);
10149e08aacStbbdev parallel_scan_wrapper<Partitioner>()(oneapi::tbb::blocked_range<std::size_t>(0U, size, 1U), body);
10251c0b2f7Stbbdev CHECK((control == output));
10351c0b2f7Stbbdev }
10451c0b2f7Stbbdev
10551c0b2f7Stbbdev
10651c0b2f7Stbbdev //! Test parallel prefix sum calculation for scan-based interface
10751c0b2f7Stbbdev //! \brief \ref requirement \ref interface
10849e08aacStbbdev TEST_CASE_TEMPLATE("Test parallel scan with body", Partitioner, default_partitioner_tag, oneapi::tbb::simple_partitioner, oneapi::tbb::auto_partitioner) {
109478de5b1Stbbdev std::vector<std::size_t> input(size);
110478de5b1Stbbdev std::vector<std::size_t> output(size);
111478de5b1Stbbdev std::vector<std::size_t> control(size);
11251c0b2f7Stbbdev
113478de5b1Stbbdev for (std::size_t i = 0; i<size; ++i) {
114478de5b1Stbbdev input[i] = i;
11551c0b2f7Stbbdev if (i)
11651c0b2f7Stbbdev control[i] = control[i-1]+input[i];
11751c0b2f7Stbbdev else
11851c0b2f7Stbbdev control[i] = input[i];
11951c0b2f7Stbbdev }
120478de5b1Stbbdev parallel_scan_wrapper<Partitioner>()(oneapi::tbb::blocked_range<std::size_t>(0U, size, 1U), std::size_t(0),
121478de5b1Stbbdev [&](const oneapi::tbb::blocked_range<std::size_t>& r, std::size_t sum, bool is_final) -> std::size_t
__anonb56fb5590102(const oneapi::tbb::blocked_range<std::size_t>& r, std::size_t sum, bool is_final) 12251c0b2f7Stbbdev {
123478de5b1Stbbdev std::size_t temp = sum;
12451c0b2f7Stbbdev for (std::size_t i = r.begin(); i<r.end(); ++i) {
12551c0b2f7Stbbdev temp = temp + input[i];
12651c0b2f7Stbbdev if (is_final)
12751c0b2f7Stbbdev output[i] = temp;
12851c0b2f7Stbbdev }
12951c0b2f7Stbbdev return temp;
13051c0b2f7Stbbdev },
131478de5b1Stbbdev [](std::size_t a, std::size_t b) -> std::size_t
__anonb56fb5590202(std::size_t a, std::size_t b) 13251c0b2f7Stbbdev {
13351c0b2f7Stbbdev return a + b;
13451c0b2f7Stbbdev });
13551c0b2f7Stbbdev
13651c0b2f7Stbbdev CHECK((control==output));
13751c0b2f7Stbbdev }
138*a088cfa0SKonstantin Boyarinov
139*a088cfa0SKonstantin Boyarinov #if __TBB_CPP17_INVOKE_PRESENT
140*a088cfa0SKonstantin Boyarinov template <typename... Args>
test_pscan_invoke(const std::vector<std::size_t> & desired_vector,std::vector<std::size_t> & result_vector,Args &&...args)141*a088cfa0SKonstantin Boyarinov void test_pscan_invoke(const std::vector<std::size_t>& desired_vector,
142*a088cfa0SKonstantin Boyarinov std::vector<std::size_t>& result_vector,
143*a088cfa0SKonstantin Boyarinov Args&&... args) {
144*a088cfa0SKonstantin Boyarinov auto result = oneapi::tbb::parallel_scan(std::forward<Args>(args)...);
145*a088cfa0SKonstantin Boyarinov CHECK(desired_vector == result_vector);
146*a088cfa0SKonstantin Boyarinov CHECK(result.get() == result_vector.back());
147*a088cfa0SKonstantin Boyarinov
148*a088cfa0SKonstantin Boyarinov for (std::size_t& item : result_vector) item = 0;
149*a088cfa0SKonstantin Boyarinov }
150*a088cfa0SKonstantin Boyarinov
151*a088cfa0SKonstantin Boyarinov //! Test that parallel_scan uses std::invoke to run the body
152*a088cfa0SKonstantin Boyarinov //! \brief \ref requirement
153*a088cfa0SKonstantin Boyarinov TEST_CASE("parallel_scan and std::invoke") {
154*a088cfa0SKonstantin Boyarinov const std::size_t iterations = 1000000;
155*a088cfa0SKonstantin Boyarinov std::vector<std::size_t> desired_vector(iterations);
156*a088cfa0SKonstantin Boyarinov
157*a088cfa0SKonstantin Boyarinov for (std::size_t i = 1; i < iterations; ++i) {
158*a088cfa0SKonstantin Boyarinov desired_vector[i] = desired_vector[i - 1] + i;
159*a088cfa0SKonstantin Boyarinov }
160*a088cfa0SKonstantin Boyarinov
161*a088cfa0SKonstantin Boyarinov std::vector<std::size_t> change_vector(iterations, 0);
162*a088cfa0SKonstantin Boyarinov test_invoke::SmartRange<test_invoke::SmartValue> range(0, iterations, change_vector);
163*a088cfa0SKonstantin Boyarinov test_invoke::SmartValue identity(0);
164*a088cfa0SKonstantin Boyarinov
165*a088cfa0SKonstantin Boyarinov auto scan = &test_invoke::SmartRange<test_invoke::SmartValue>::scan;
166*a088cfa0SKonstantin Boyarinov auto combine = &test_invoke::SmartValue::operator+;
167*a088cfa0SKonstantin Boyarinov
168*a088cfa0SKonstantin Boyarinov test_pscan_invoke(desired_vector, change_vector, range, identity, scan, combine);
169*a088cfa0SKonstantin Boyarinov test_pscan_invoke(desired_vector, change_vector, range, identity, scan, combine, oneapi::tbb::auto_partitioner());
170*a088cfa0SKonstantin Boyarinov test_pscan_invoke(desired_vector, change_vector, range, identity, scan, combine, oneapi::tbb::simple_partitioner());
171*a088cfa0SKonstantin Boyarinov }
172*a088cfa0SKonstantin Boyarinov
173*a088cfa0SKonstantin Boyarinov #endif // __TBB_CPP17_INVOKE_PRESENT
174