151c0b2f7Stbbdev /*
2*a088cfa0SKonstantin Boyarinov     Copyright (c) 2020-2023 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
17478de5b1Stbbdev #if __INTEL_COMPILER && _MSC_VER
18478de5b1Stbbdev #pragma warning(disable : 2586) // decorated name length exceeded, name was truncated
19478de5b1Stbbdev #endif
20478de5b1Stbbdev 
2151c0b2f7Stbbdev #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS
2251c0b2f7Stbbdev #include "common/test.h"
23*a088cfa0SKonstantin Boyarinov #include "common/test_invoke.h"
2451c0b2f7Stbbdev 
2549e08aacStbbdev #include "oneapi/tbb/parallel_scan.h"
2651c0b2f7Stbbdev 
2751c0b2f7Stbbdev #include <vector>
2851c0b2f7Stbbdev 
2951c0b2f7Stbbdev //! \file conformance_parallel_scan.cpp
3051c0b2f7Stbbdev //! \brief Test for [algorithms.parallel_scan] specification
3151c0b2f7Stbbdev 
3251c0b2f7Stbbdev constexpr std::size_t size = 1000;
3351c0b2f7Stbbdev 
3451c0b2f7Stbbdev template<typename T, typename Op>
3551c0b2f7Stbbdev class Body {
3651c0b2f7Stbbdev     const T identity;
3751c0b2f7Stbbdev     T sum;
3851c0b2f7Stbbdev     std::vector<T>& y;
3951c0b2f7Stbbdev     const std::vector<T>& z;
4051c0b2f7Stbbdev public:
Body(const std::vector<T> & z_,std::vector<T> & y_,T id)4151c0b2f7Stbbdev     Body( const std::vector<T>& z_, std::vector<T>& y_, T id ) : identity(id), sum(id), y(y_), z(z_) {}
get_sum() const4251c0b2f7Stbbdev     T get_sum() const { return sum; }
4351c0b2f7Stbbdev 
4451c0b2f7Stbbdev     template<typename Tag>
operator ()(const oneapi::tbb::blocked_range<std::size_t> & r,Tag)4549e08aacStbbdev     void operator()( const oneapi::tbb::blocked_range<std::size_t>& r, Tag ) {
4651c0b2f7Stbbdev         T temp = sum;
4751c0b2f7Stbbdev         for(std::size_t i=r.begin(); i<r.end(); ++i ) {
4851c0b2f7Stbbdev             temp = Op()(temp, z[i]);
4951c0b2f7Stbbdev             if( Tag::is_final_scan() )
5051c0b2f7Stbbdev                 y[i] = temp;
5151c0b2f7Stbbdev         }
5251c0b2f7Stbbdev         sum = temp;
5351c0b2f7Stbbdev     }
Body(Body & b,oneapi::tbb::split)5449e08aacStbbdev     Body( Body& b, oneapi::tbb::split ): identity(b.identity), sum(b.identity), y(b.y), z(b.z) {}
reverse_join(Body & a)5551c0b2f7Stbbdev     void reverse_join( Body& a ) { sum = Op()(a.sum, sum); }
assign(Body & b)5651c0b2f7Stbbdev     void assign( Body& b ) { sum = b.sum; }
5751c0b2f7Stbbdev };
5851c0b2f7Stbbdev 
5951c0b2f7Stbbdev class default_partitioner_tag{};
6051c0b2f7Stbbdev 
6151c0b2f7Stbbdev template<typename Partitioner>
6251c0b2f7Stbbdev struct parallel_scan_wrapper{
6351c0b2f7Stbbdev     template<typename... Args>
operator ()parallel_scan_wrapper6451c0b2f7Stbbdev     void operator()(Args&&... args) {
6549e08aacStbbdev     oneapi::tbb::parallel_scan(std::forward<Args>(args)..., Partitioner());
6651c0b2f7Stbbdev     }
6751c0b2f7Stbbdev };
6851c0b2f7Stbbdev 
6951c0b2f7Stbbdev template<>
7051c0b2f7Stbbdev struct parallel_scan_wrapper<default_partitioner_tag>{
7151c0b2f7Stbbdev     template<typename... Args>
operator ()parallel_scan_wrapper7251c0b2f7Stbbdev     void operator()(Args&&... args) {
7349e08aacStbbdev     oneapi::tbb::parallel_scan(std::forward<Args>(args)...);
7451c0b2f7Stbbdev     }
7551c0b2f7Stbbdev };
7651c0b2f7Stbbdev 
7751c0b2f7Stbbdev // Test scan tag
7851c0b2f7Stbbdev //! \brief \ref interface
7951c0b2f7Stbbdev TEST_CASE("scan tags testing") {
8049e08aacStbbdev     CHECK(oneapi::tbb::pre_scan_tag::is_final_scan()==false);
8149e08aacStbbdev     CHECK(oneapi::tbb::final_scan_tag::is_final_scan()==true);
8249e08aacStbbdev     CHECK((bool)oneapi::tbb::pre_scan_tag()==false);
8349e08aacStbbdev     CHECK((bool)oneapi::tbb::final_scan_tag()==true);
8451c0b2f7Stbbdev }
8551c0b2f7Stbbdev 
8651c0b2f7Stbbdev //! Test parallel prefix sum calculation for body-based interface
8751c0b2f7Stbbdev //! \brief \ref requirement \ref interface
8849e08aacStbbdev TEST_CASE_TEMPLATE("Test parallel scan with body", Partitioner, default_partitioner_tag, oneapi::tbb::simple_partitioner, oneapi::tbb::auto_partitioner) {
8951c0b2f7Stbbdev     std::vector<int> input(size);
9051c0b2f7Stbbdev     std::vector<int> output(size);
9151c0b2f7Stbbdev     std::vector<int> control(size);
9251c0b2f7Stbbdev 
9351c0b2f7Stbbdev     for(size_t i = 0; i < size; ++i) {
9451c0b2f7Stbbdev         input[i] = int(i / 2);
9551c0b2f7Stbbdev         if(i)
9651c0b2f7Stbbdev             control[i] = control[i-1] + input[i];
9751c0b2f7Stbbdev         else
9851c0b2f7Stbbdev             control[i] = input[i];
9951c0b2f7Stbbdev     }
10051c0b2f7Stbbdev     Body<int, std::plus<int>> body(input, output, 0);
10149e08aacStbbdev     parallel_scan_wrapper<Partitioner>()(oneapi::tbb::blocked_range<std::size_t>(0U, size, 1U), body);
10251c0b2f7Stbbdev     CHECK((control == output));
10351c0b2f7Stbbdev }
10451c0b2f7Stbbdev 
10551c0b2f7Stbbdev 
10651c0b2f7Stbbdev //! Test parallel prefix sum calculation for scan-based interface
10751c0b2f7Stbbdev //! \brief \ref requirement \ref interface
10849e08aacStbbdev TEST_CASE_TEMPLATE("Test parallel scan with body", Partitioner, default_partitioner_tag, oneapi::tbb::simple_partitioner, oneapi::tbb::auto_partitioner) {
109478de5b1Stbbdev     std::vector<std::size_t> input(size);
110478de5b1Stbbdev     std::vector<std::size_t> output(size);
111478de5b1Stbbdev     std::vector<std::size_t> control(size);
11251c0b2f7Stbbdev 
113478de5b1Stbbdev     for (std::size_t i = 0; i<size; ++i) {
114478de5b1Stbbdev         input[i] = i;
11551c0b2f7Stbbdev         if (i)
11651c0b2f7Stbbdev             control[i] = control[i-1]+input[i];
11751c0b2f7Stbbdev         else
11851c0b2f7Stbbdev             control[i] = input[i];
11951c0b2f7Stbbdev     }
120478de5b1Stbbdev     parallel_scan_wrapper<Partitioner>()(oneapi::tbb::blocked_range<std::size_t>(0U, size, 1U), std::size_t(0),
121478de5b1Stbbdev         [&](const oneapi::tbb::blocked_range<std::size_t>& r, std::size_t sum, bool is_final) -> std::size_t
__anonb56fb5590102(const oneapi::tbb::blocked_range<std::size_t>& r, std::size_t sum, bool is_final) 12251c0b2f7Stbbdev         {
123478de5b1Stbbdev             std::size_t temp = sum;
12451c0b2f7Stbbdev             for (std::size_t i = r.begin(); i<r.end(); ++i) {
12551c0b2f7Stbbdev                 temp = temp + input[i];
12651c0b2f7Stbbdev                 if (is_final)
12751c0b2f7Stbbdev                     output[i] = temp;
12851c0b2f7Stbbdev             }
12951c0b2f7Stbbdev             return temp;
13051c0b2f7Stbbdev         },
131478de5b1Stbbdev         [](std::size_t a, std::size_t b) -> std::size_t
__anonb56fb5590202(std::size_t a, std::size_t b) 13251c0b2f7Stbbdev         {
13351c0b2f7Stbbdev             return a + b;
13451c0b2f7Stbbdev         });
13551c0b2f7Stbbdev 
13651c0b2f7Stbbdev     CHECK((control==output));
13751c0b2f7Stbbdev }
138*a088cfa0SKonstantin Boyarinov 
139*a088cfa0SKonstantin Boyarinov #if __TBB_CPP17_INVOKE_PRESENT
140*a088cfa0SKonstantin Boyarinov template <typename... Args>
test_pscan_invoke(const std::vector<std::size_t> & desired_vector,std::vector<std::size_t> & result_vector,Args &&...args)141*a088cfa0SKonstantin Boyarinov void test_pscan_invoke(const std::vector<std::size_t>& desired_vector,
142*a088cfa0SKonstantin Boyarinov                        std::vector<std::size_t>& result_vector,
143*a088cfa0SKonstantin Boyarinov                        Args&&... args) {
144*a088cfa0SKonstantin Boyarinov     auto result = oneapi::tbb::parallel_scan(std::forward<Args>(args)...);
145*a088cfa0SKonstantin Boyarinov     CHECK(desired_vector == result_vector);
146*a088cfa0SKonstantin Boyarinov     CHECK(result.get() == result_vector.back());
147*a088cfa0SKonstantin Boyarinov 
148*a088cfa0SKonstantin Boyarinov     for (std::size_t& item : result_vector) item = 0;
149*a088cfa0SKonstantin Boyarinov }
150*a088cfa0SKonstantin Boyarinov 
151*a088cfa0SKonstantin Boyarinov //! Test that parallel_scan uses std::invoke to run the body
152*a088cfa0SKonstantin Boyarinov //! \brief \ref requirement
153*a088cfa0SKonstantin Boyarinov TEST_CASE("parallel_scan and std::invoke") {
154*a088cfa0SKonstantin Boyarinov     const std::size_t iterations = 1000000;
155*a088cfa0SKonstantin Boyarinov     std::vector<std::size_t> desired_vector(iterations);
156*a088cfa0SKonstantin Boyarinov 
157*a088cfa0SKonstantin Boyarinov     for (std::size_t i = 1; i < iterations; ++i) {
158*a088cfa0SKonstantin Boyarinov         desired_vector[i] = desired_vector[i - 1] + i;
159*a088cfa0SKonstantin Boyarinov     }
160*a088cfa0SKonstantin Boyarinov 
161*a088cfa0SKonstantin Boyarinov     std::vector<std::size_t> change_vector(iterations, 0);
162*a088cfa0SKonstantin Boyarinov     test_invoke::SmartRange<test_invoke::SmartValue> range(0, iterations, change_vector);
163*a088cfa0SKonstantin Boyarinov     test_invoke::SmartValue identity(0);
164*a088cfa0SKonstantin Boyarinov 
165*a088cfa0SKonstantin Boyarinov     auto scan = &test_invoke::SmartRange<test_invoke::SmartValue>::scan;
166*a088cfa0SKonstantin Boyarinov     auto combine = &test_invoke::SmartValue::operator+;
167*a088cfa0SKonstantin Boyarinov 
168*a088cfa0SKonstantin Boyarinov     test_pscan_invoke(desired_vector, change_vector, range, identity, scan, combine);
169*a088cfa0SKonstantin Boyarinov     test_pscan_invoke(desired_vector, change_vector, range, identity, scan, combine, oneapi::tbb::auto_partitioner());
170*a088cfa0SKonstantin Boyarinov     test_pscan_invoke(desired_vector, change_vector, range, identity, scan, combine, oneapi::tbb::simple_partitioner());
171*a088cfa0SKonstantin Boyarinov }
172*a088cfa0SKonstantin Boyarinov 
173*a088cfa0SKonstantin Boyarinov #endif // __TBB_CPP17_INVOKE_PRESENT
174