xref: /oneTBB/test/tbb/test_parallel_scan.cpp (revision c21e688a)
151c0b2f7Stbbdev /*
2*c21e688aSSergey Zheltov     Copyright (c) 2005-2022 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
17478de5b1Stbbdev #if __INTEL_COMPILER && _MSC_VER
18478de5b1Stbbdev #pragma warning(disable : 2586) // decorated name length exceeded, name was truncated
19478de5b1Stbbdev #endif
20478de5b1Stbbdev 
2151c0b2f7Stbbdev #include "common/test.h"
2251c0b2f7Stbbdev #include "common/config.h"
2351c0b2f7Stbbdev #include "common/utils_concurrency_limit.h"
2451c0b2f7Stbbdev #include "common/cpu_usertime.h"
25478de5b1Stbbdev #include "common/concepts_common.h"
2651c0b2f7Stbbdev 
2751c0b2f7Stbbdev #include "tbb/global_control.h"
2851c0b2f7Stbbdev #include "tbb/parallel_scan.h"
2951c0b2f7Stbbdev #include "tbb/blocked_range.h"
3051c0b2f7Stbbdev #include "tbb/tick_count.h"
3151c0b2f7Stbbdev #include <vector>
3251c0b2f7Stbbdev #include <atomic>
3351c0b2f7Stbbdev 
3451c0b2f7Stbbdev //! \file test_parallel_scan.cpp
3551c0b2f7Stbbdev //! \brief Test for [algorithms.parallel_scan] specification
3651c0b2f7Stbbdev 
3751c0b2f7Stbbdev using Range = tbb::blocked_range<long>;
3851c0b2f7Stbbdev 
3951c0b2f7Stbbdev static volatile bool ScanIsRunning = false;
4051c0b2f7Stbbdev 
4151c0b2f7Stbbdev //! Sum of 0..i with wrap around on overflow.
TriangularSum(int i)4251c0b2f7Stbbdev inline int TriangularSum( int i ) {
4351c0b2f7Stbbdev     return i&1 ? ((i>>1)+1)*i : (i>>1)*(i+1);
4451c0b2f7Stbbdev }
4551c0b2f7Stbbdev 
4651c0b2f7Stbbdev //! Verify that sum is init plus sum of integers in closed interval [0..finish_index].
4751c0b2f7Stbbdev /** line should be the source line of the caller */
VerifySum(int init,long finish_index,int sum,int line)4851c0b2f7Stbbdev void VerifySum( int init, long finish_index, int sum, int line ) {
4951c0b2f7Stbbdev     int expected = init + TriangularSum(finish_index);
5051c0b2f7Stbbdev     CHECK_MESSAGE(expected == sum, "line " << line << ": sum[0.." << finish_index << "] should be = " << expected << ", but was computed as " << sum << "\n");
5151c0b2f7Stbbdev }
5251c0b2f7Stbbdev 
5351c0b2f7Stbbdev const int MAXN = 20000;
5451c0b2f7Stbbdev 
5551c0b2f7Stbbdev enum AddendFlag {
5651c0b2f7Stbbdev     UNUSED=0,
5751c0b2f7Stbbdev     USED_NONFINAL=1,
5851c0b2f7Stbbdev     USED_FINAL=2
5951c0b2f7Stbbdev };
6051c0b2f7Stbbdev 
6151c0b2f7Stbbdev //! Array recording how each addend was used.
6251c0b2f7Stbbdev /** 'unsigned char' instead of AddendFlag for sake of compactness. */
6351c0b2f7Stbbdev static unsigned char AddendHistory[MAXN];
6451c0b2f7Stbbdev 
6551c0b2f7Stbbdev std::atomic<long> NumberOfLiveStorage;
6651c0b2f7Stbbdev 
6751c0b2f7Stbbdev template<typename T>
6851c0b2f7Stbbdev struct Storage {
6951c0b2f7Stbbdev     T my_total;
7051c0b2f7Stbbdev     Range my_range;
StorageStorage7151c0b2f7Stbbdev     Storage(T init) :
7251c0b2f7Stbbdev         my_total(init), my_range(-1, -1, 1) {
7351c0b2f7Stbbdev         ++NumberOfLiveStorage;
7451c0b2f7Stbbdev     }
~StorageStorage7551c0b2f7Stbbdev     ~Storage() {
7651c0b2f7Stbbdev         --NumberOfLiveStorage;
7751c0b2f7Stbbdev     }
StorageStorage7851c0b2f7Stbbdev     Storage(const Storage& strg) :
7951c0b2f7Stbbdev         my_total(strg.my_total), my_range(strg.my_range) {
8051c0b2f7Stbbdev         ++NumberOfLiveStorage;
8151c0b2f7Stbbdev     }
operator =Storage8251c0b2f7Stbbdev     Storage & operator=(const Storage& strg) {
8351c0b2f7Stbbdev         my_total = strg.my_total;
8451c0b2f7Stbbdev         my_range = strg.my_range;
8551c0b2f7Stbbdev         return *this;
8651c0b2f7Stbbdev     }
8751c0b2f7Stbbdev };
8851c0b2f7Stbbdev 
8951c0b2f7Stbbdev template<typename T>
JoinStorages(const Storage<T> & left,const Storage<T> & right)90478de5b1Stbbdev Storage<T> JoinStorages(const Storage<T>& left, const Storage<T>& right) {
91478de5b1Stbbdev     Storage<T> result = right;
9251c0b2f7Stbbdev     CHECK(ScanIsRunning);
9351c0b2f7Stbbdev     CHECK(left.my_range.end() == right.my_range.begin());
94478de5b1Stbbdev     result.my_total += left.my_total;
95478de5b1Stbbdev     result.my_range = Range(left.my_range.begin(), right.my_range.end(), 1);
9651c0b2f7Stbbdev     CHECK(ScanIsRunning);
97478de5b1Stbbdev     return result;
9851c0b2f7Stbbdev }
9951c0b2f7Stbbdev 
10051c0b2f7Stbbdev template<typename T>
Scan(const Range & r,bool is_final,Storage<T> & storage,std::vector<T> & sum,const std::vector<T> & addend)10151c0b2f7Stbbdev void Scan(const Range & r, bool is_final, Storage<T> & storage, std::vector<T> & sum, const std::vector<T> & addend) {
10251c0b2f7Stbbdev     CHECK((!is_final || (storage.my_range.begin() == 0 && storage.my_range.end() == r.begin()) || (storage.my_range.empty() && r.begin() == 0)));
10351c0b2f7Stbbdev     for (long i = r.begin(); i < r.end(); ++i) {
10451c0b2f7Stbbdev         storage.my_total += addend[i];
10551c0b2f7Stbbdev         if (is_final) {
10651c0b2f7Stbbdev             CHECK_MESSAGE(AddendHistory[i] < USED_FINAL, "addend used 'finally' twice?");
10751c0b2f7Stbbdev             AddendHistory[i] |= USED_FINAL;
10851c0b2f7Stbbdev             sum[i] = storage.my_total;
10951c0b2f7Stbbdev             VerifySum(42, i, int(sum[i]), __LINE__);
11051c0b2f7Stbbdev         }
11151c0b2f7Stbbdev         else {
11251c0b2f7Stbbdev             CHECK_MESSAGE(AddendHistory[i] == UNUSED, "addend used too many times");
11351c0b2f7Stbbdev             AddendHistory[i] |= USED_NONFINAL;
11451c0b2f7Stbbdev         }
11551c0b2f7Stbbdev     }
11651c0b2f7Stbbdev     if (storage.my_range.empty())
11751c0b2f7Stbbdev         storage.my_range = r;
11851c0b2f7Stbbdev     else
11951c0b2f7Stbbdev         storage.my_range = Range(storage.my_range.begin(), r.end(), 1);
12051c0b2f7Stbbdev }
12151c0b2f7Stbbdev 
12251c0b2f7Stbbdev template<typename T>
ScanWithInit(const Range & r,T init,bool is_final,Storage<T> & storage,std::vector<T> & sum,const std::vector<T> & addend)12351c0b2f7Stbbdev Storage<T> ScanWithInit(const Range & r, T init, bool is_final, Storage<T> & storage, std::vector<T> & sum, const std::vector<T> & addend) {
12451c0b2f7Stbbdev     if (r.begin() == 0)
12551c0b2f7Stbbdev         storage.my_total = init;
12651c0b2f7Stbbdev     Scan(r, is_final, storage, sum, addend);
12751c0b2f7Stbbdev     return storage;
12851c0b2f7Stbbdev }
12951c0b2f7Stbbdev 
13051c0b2f7Stbbdev template<typename T>
13151c0b2f7Stbbdev class Accumulator {
13251c0b2f7Stbbdev     const  std::vector<T> &my_array;
13351c0b2f7Stbbdev     std::vector<T> & my_sum;
13451c0b2f7Stbbdev     Storage<T> storage;
13551c0b2f7Stbbdev     enum state_type {
13651c0b2f7Stbbdev         full,       // Accumulator has sufficient information for final scan,
13751c0b2f7Stbbdev                     // i.e. has seen all iterations to its left.
13851c0b2f7Stbbdev                     // It's either the original Accumulator provided by the user
13951c0b2f7Stbbdev                     // or a Accumulator constructed by a splitting constructor *and* subsequently
14051c0b2f7Stbbdev                     // subjected to a reverse_join with a full accumulator.
14151c0b2f7Stbbdev 
14251c0b2f7Stbbdev         partial,    // Accumulator has only enough information for pre_scan.
14351c0b2f7Stbbdev                     // i.e. has not seen all iterations to its left.
14451c0b2f7Stbbdev                     // It's an Accumulator created by a splitting constructor that
14551c0b2f7Stbbdev                     // has not yet been subjected to a reverse_join with a full accumulator.
14651c0b2f7Stbbdev 
14751c0b2f7Stbbdev         summary,    // Accumulator has summary of iterations processed, but not necessarily
14851c0b2f7Stbbdev                     // the information required for a final_scan or pre_scan.
14951c0b2f7Stbbdev                     // It's the result of "assign".
15051c0b2f7Stbbdev 
15151c0b2f7Stbbdev         trash       // Accumulator with possibly no useful information.
15251c0b2f7Stbbdev                     // It was the source for "assign".
15351c0b2f7Stbbdev 
15451c0b2f7Stbbdev     };
15551c0b2f7Stbbdev     mutable state_type my_state;
15657f524caSIlya Isaev     //! Equals this while object is fully constructed, nullptr otherwise.
15751c0b2f7Stbbdev     /** Used to detect premature destruction and accidental bitwise copy. */
15851c0b2f7Stbbdev     Accumulator* self;
15951c0b2f7Stbbdev     Accumulator& operator= (const Accumulator& other);
16051c0b2f7Stbbdev public:
Accumulator(T init,const std::vector<T> & array,std::vector<T> & sum)16151c0b2f7Stbbdev     Accumulator( T init, const std::vector<T> & array, std::vector<T> & sum ) :
16251c0b2f7Stbbdev         my_array(array), my_sum(sum), storage(init), my_state(full)
16351c0b2f7Stbbdev     {
16451c0b2f7Stbbdev         // Set self as last action of constructor, to indicate that object is fully constructed.
16551c0b2f7Stbbdev         self = this;
16651c0b2f7Stbbdev     }
~Accumulator()16751c0b2f7Stbbdev     ~Accumulator() {
16851c0b2f7Stbbdev         // Clear self as first action of destructor, to indicate that object is not fully constructed.
16957f524caSIlya Isaev         self = nullptr;
17051c0b2f7Stbbdev     }
Accumulator(Accumulator & a,tbb::split)17151c0b2f7Stbbdev     Accumulator( Accumulator& a, tbb::split ) :
17251c0b2f7Stbbdev         my_array(a.my_array), my_sum(a.my_sum), storage(0), my_state(partial)
17351c0b2f7Stbbdev     {
17451c0b2f7Stbbdev         if (!(a.my_state == partial))
17551c0b2f7Stbbdev             CHECK(a.my_state == full);
17651c0b2f7Stbbdev         if (!(a.my_state == full))
17751c0b2f7Stbbdev             CHECK(a.my_state == partial);
17851c0b2f7Stbbdev         CHECK(ScanIsRunning);
17951c0b2f7Stbbdev         // Set self as last action of constructor, to indicate that object is fully constructed.
18051c0b2f7Stbbdev         self = this;
18151c0b2f7Stbbdev     }
18251c0b2f7Stbbdev     template<typename Tag>
operator ()(const Range & r,Tag)18351c0b2f7Stbbdev     void operator()( const Range& r, Tag /*tag*/ ) {
18451c0b2f7Stbbdev         if(Tag::is_final_scan())
18551c0b2f7Stbbdev             CHECK(my_state == full);
18651c0b2f7Stbbdev         else
18751c0b2f7Stbbdev             CHECK(my_state == partial);
18851c0b2f7Stbbdev         Scan(r, Tag::is_final_scan(), storage, my_sum, my_array);
18951c0b2f7Stbbdev         CHECK_MESSAGE(self==this, "this Accumulator corrupted or prematurely destroyed");
19051c0b2f7Stbbdev     }
reverse_join(const Accumulator & left_body)19151c0b2f7Stbbdev     void reverse_join( const Accumulator& left_body) {
19251c0b2f7Stbbdev         const Storage<T> & left = left_body.storage;
19351c0b2f7Stbbdev         Storage<T> & right = storage;
19451c0b2f7Stbbdev         CHECK(my_state == partial);
19551c0b2f7Stbbdev         CHECK( ((left_body.my_state == full) || (left_body.my_state==partial)) );
19651c0b2f7Stbbdev 
197478de5b1Stbbdev         right = JoinStorages(left, right);
19851c0b2f7Stbbdev 
19951c0b2f7Stbbdev         CHECK(left_body.self == &left_body);
20051c0b2f7Stbbdev         my_state = left_body.my_state;
20151c0b2f7Stbbdev     }
assign(const Accumulator & other)20251c0b2f7Stbbdev     void assign( const Accumulator& other ) {
20351c0b2f7Stbbdev         CHECK(other.my_state == full);
20451c0b2f7Stbbdev         CHECK(my_state == full);
20551c0b2f7Stbbdev         storage.my_total = other.storage.my_total;
20651c0b2f7Stbbdev         storage.my_range = other.storage.my_range;
20751c0b2f7Stbbdev         CHECK(self == this);
20851c0b2f7Stbbdev         CHECK_MESSAGE(other.self==&other, "other Accumulator corrupted or prematurely destroyed");
20951c0b2f7Stbbdev         my_state = summary;
21051c0b2f7Stbbdev         other.my_state = trash;
21151c0b2f7Stbbdev     }
get_total()21251c0b2f7Stbbdev     T get_total() {
21351c0b2f7Stbbdev         return storage.my_total;
21451c0b2f7Stbbdev     }
21551c0b2f7Stbbdev };
21651c0b2f7Stbbdev 
21751c0b2f7Stbbdev 
21851c0b2f7Stbbdev template<typename T, typename Scan, typename ReverseJoin>
ParallelScanFunctionalInvoker(const Range & range,T idx,const Scan & scan,const ReverseJoin & reverse_join,int mode)21951c0b2f7Stbbdev T ParallelScanFunctionalInvoker(const Range& range, T idx, const Scan& scan, const ReverseJoin& reverse_join, int mode) {
22051c0b2f7Stbbdev     switch (mode%3) {
22151c0b2f7Stbbdev     case 0:
22251c0b2f7Stbbdev         return tbb::parallel_scan(range, idx, scan, reverse_join);
22351c0b2f7Stbbdev         break;
22451c0b2f7Stbbdev     case 1:
22551c0b2f7Stbbdev         return tbb::parallel_scan(range, idx, scan, reverse_join, tbb::simple_partitioner());
22651c0b2f7Stbbdev         break;
22751c0b2f7Stbbdev     default:
22851c0b2f7Stbbdev         return tbb::parallel_scan(range, idx, scan, reverse_join, tbb::auto_partitioner());
22951c0b2f7Stbbdev     }
23051c0b2f7Stbbdev }
23151c0b2f7Stbbdev 
23251c0b2f7Stbbdev template<typename T>
23351c0b2f7Stbbdev class ScanBody {
23451c0b2f7Stbbdev     const std::vector<T> &my_addend;
23551c0b2f7Stbbdev     std::vector<T> &my_sum;
23651c0b2f7Stbbdev     const T my_init;
23751c0b2f7Stbbdev     ScanBody& operator= (const ScanBody&);
23851c0b2f7Stbbdev public:
ScanBody(T init,const std::vector<T> & addend,std::vector<T> & sum)23951c0b2f7Stbbdev     ScanBody(T init, const std::vector<T> &addend, std::vector<T> &sum) :my_addend(addend), my_sum(sum), my_init(init) {}
24051c0b2f7Stbbdev     template<typename S, typename Tag>
operator ()(const Range & r,Storage<S> storage,Tag) const24151c0b2f7Stbbdev     Storage<S> operator()(const Range& r, Storage<S> storage, Tag) const {
24251c0b2f7Stbbdev         return ScanWithInit(r, my_init, Tag::is_final_scan(), storage, my_sum, my_addend);
24351c0b2f7Stbbdev     }
24451c0b2f7Stbbdev };
24551c0b2f7Stbbdev 
24651c0b2f7Stbbdev class JoinBody {
24751c0b2f7Stbbdev public:
24851c0b2f7Stbbdev     template<typename T>
operator ()(const Storage<T> & left,const Storage<T> & right) const249478de5b1Stbbdev     Storage<T> operator()(const Storage<T>& left, const Storage<T>& right) const {
250478de5b1Stbbdev         return JoinStorages(left, right);
25151c0b2f7Stbbdev     }
25251c0b2f7Stbbdev };
25351c0b2f7Stbbdev 
25451c0b2f7Stbbdev struct ParallelScanTemplateFunctor {
25551c0b2f7Stbbdev     template<typename T>
operator ()ParallelScanTemplateFunctor25651c0b2f7Stbbdev     T operator()(Range range, T init, const std::vector<T> &addend, std::vector<T> &sum, int mode) {
25751c0b2f7Stbbdev         for (long i = 0; i<MAXN; ++i) {
25851c0b2f7Stbbdev             AddendHistory[i] = UNUSED;
25951c0b2f7Stbbdev         }
26051c0b2f7Stbbdev         ScanIsRunning = true;
26151c0b2f7Stbbdev         ScanBody<T> sb(init, addend, sum);
26251c0b2f7Stbbdev         JoinBody jb;
26351c0b2f7Stbbdev         Storage<T> res = ParallelScanFunctionalInvoker(range, Storage<T>(0), sb, jb, mode);
26451c0b2f7Stbbdev         ScanIsRunning = false;
26551c0b2f7Stbbdev         if (range.empty())
26651c0b2f7Stbbdev             res.my_total = init;
26751c0b2f7Stbbdev         return res.my_total;
26851c0b2f7Stbbdev     }
26951c0b2f7Stbbdev };
27051c0b2f7Stbbdev 
27151c0b2f7Stbbdev struct ParallelScanLambda {
27251c0b2f7Stbbdev     template<typename T>
operator ()ParallelScanLambda27351c0b2f7Stbbdev     T operator()(Range range, T init, const std::vector<T> &addend, std::vector<T> &sum, int mode) {
27451c0b2f7Stbbdev         for (long i = 0; i<MAXN; ++i) {
27551c0b2f7Stbbdev             AddendHistory[i] = UNUSED;
27651c0b2f7Stbbdev         }
27751c0b2f7Stbbdev         ScanIsRunning = true;
27851c0b2f7Stbbdev         Storage<T> res = ParallelScanFunctionalInvoker(range, Storage<T>(0),
27951c0b2f7Stbbdev             [&addend, &sum, init](const Range& r, Storage<T> storage, bool is_final_scan /*tag*/) -> Storage<T> {
28051c0b2f7Stbbdev                 return ScanWithInit(r, init, is_final_scan, storage, sum, addend);
28151c0b2f7Stbbdev             },
282478de5b1Stbbdev             [](const Storage<T>& left, const Storage<T>& right) -> Storage<T> {
283478de5b1Stbbdev                 return JoinStorages(left, right);
28451c0b2f7Stbbdev             },
28551c0b2f7Stbbdev             mode);
28651c0b2f7Stbbdev         ScanIsRunning = false;
28751c0b2f7Stbbdev         if (range.empty())
28851c0b2f7Stbbdev             res.my_total = init;
28951c0b2f7Stbbdev         return res.my_total;
29051c0b2f7Stbbdev     }
29151c0b2f7Stbbdev };
29251c0b2f7Stbbdev 
TestAccumulator(int mode)29351c0b2f7Stbbdev void TestAccumulator( int mode ) {
29451c0b2f7Stbbdev     typedef int T;
29551c0b2f7Stbbdev     std::vector<T> addend(MAXN);
29651c0b2f7Stbbdev     std::vector<T> sum(MAXN);
29751c0b2f7Stbbdev     std::vector<T> control_sum(MAXN);
29851c0b2f7Stbbdev     T control_total;
29951c0b2f7Stbbdev     for( int n=0; n<=MAXN; n = n <=128? n+1: n*3) {
30051c0b2f7Stbbdev         for( int gs : {1, 2, 100, 511, 12345, n/ 111, n/17, n-1, n}) {
30151c0b2f7Stbbdev             if(gs<=0 || gs > n)
30251c0b2f7Stbbdev                 continue;
30351c0b2f7Stbbdev             control_total = 42;
30451c0b2f7Stbbdev             for( long i=0; i<MAXN; ++i ) {
30551c0b2f7Stbbdev                 addend[i] = -1;
30651c0b2f7Stbbdev                 sum[i] = -2;
30751c0b2f7Stbbdev                 control_sum[i] = -2;
30851c0b2f7Stbbdev                 AddendHistory[i] = UNUSED;
30951c0b2f7Stbbdev             }
31051c0b2f7Stbbdev             for (long i = 0; i<n; ++i) {
31151c0b2f7Stbbdev                 addend[i] = i;
31251c0b2f7Stbbdev                 control_total += addend[i];
31351c0b2f7Stbbdev                 control_sum[i] = control_total;
31451c0b2f7Stbbdev             }
31551c0b2f7Stbbdev 
31651c0b2f7Stbbdev             Accumulator<T> acc( 42, addend, sum);
31751c0b2f7Stbbdev             ScanIsRunning = true;
31851c0b2f7Stbbdev 
31951c0b2f7Stbbdev             switch (mode) {
32051c0b2f7Stbbdev                 case 0:
32151c0b2f7Stbbdev                     tbb::parallel_scan( Range( 0, n,  gs ), acc );
32251c0b2f7Stbbdev                 break;
32351c0b2f7Stbbdev                 case 1:
32451c0b2f7Stbbdev                     tbb::parallel_scan( Range( 0, n, gs ), acc, tbb::simple_partitioner() );
32551c0b2f7Stbbdev                 break;
32651c0b2f7Stbbdev                 case 2:
32751c0b2f7Stbbdev                     tbb::parallel_scan( Range( 0, n, gs ), acc, tbb::auto_partitioner() );
32851c0b2f7Stbbdev                 break;
32951c0b2f7Stbbdev             }
33051c0b2f7Stbbdev 
33151c0b2f7Stbbdev             ScanIsRunning = false;
33251c0b2f7Stbbdev 
33351c0b2f7Stbbdev             for( long i=0; i<n; ++i )
33451c0b2f7Stbbdev                 CHECK_MESSAGE((AddendHistory[i]&USED_FINAL), "failed to use addend[" << i << "] " << (AddendHistory[i] & USED_NONFINAL ? "(but used nonfinal)\n" : "\n"));
33551c0b2f7Stbbdev             for( long i=0; i<n; ++i ) {
33651c0b2f7Stbbdev                 VerifySum( 42, i, sum[i], __LINE__ );
33751c0b2f7Stbbdev             }
33851c0b2f7Stbbdev             if( n )
33951c0b2f7Stbbdev                 CHECK(acc.get_total()==sum[n-1]);
34051c0b2f7Stbbdev             else
34151c0b2f7Stbbdev                 CHECK(acc.get_total()==42);
34251c0b2f7Stbbdev             CHECK(control_total ==acc.get_total());
34351c0b2f7Stbbdev             CHECK(control_sum==sum);
34451c0b2f7Stbbdev         }
34551c0b2f7Stbbdev     }
34651c0b2f7Stbbdev }
34751c0b2f7Stbbdev 
34851c0b2f7Stbbdev template<typename ParallelScanWrapper>
TestInterface(int mode,ParallelScanWrapper parallel_scan_wrapper)34951c0b2f7Stbbdev void TestInterface( int mode, ParallelScanWrapper parallel_scan_wrapper ) {
35051c0b2f7Stbbdev     using T = int;
35151c0b2f7Stbbdev     std::vector<T> addend(MAXN);
35251c0b2f7Stbbdev     std::vector<T> control_sum(MAXN);
35351c0b2f7Stbbdev     T control_total(42);
35451c0b2f7Stbbdev     for( long i=0; i<MAXN; ++i ) {
35551c0b2f7Stbbdev         addend[i] = i;
35651c0b2f7Stbbdev         control_total += addend[i];
35751c0b2f7Stbbdev         control_sum[i] = control_total;
35851c0b2f7Stbbdev         AddendHistory[i] = UNUSED;
35951c0b2f7Stbbdev     }
36051c0b2f7Stbbdev 
36151c0b2f7Stbbdev     std::vector<T> sum(MAXN);
36251c0b2f7Stbbdev     for (long i = 0; i<MAXN; ++i)
36351c0b2f7Stbbdev         sum[i] = -2;
36451c0b2f7Stbbdev     ScanIsRunning = true;
36551c0b2f7Stbbdev     T total = parallel_scan_wrapper(Range(0, MAXN, 1), 42, addend, sum, mode);
36651c0b2f7Stbbdev     ScanIsRunning = false;
36751c0b2f7Stbbdev 
36851c0b2f7Stbbdev     CHECK_MESSAGE(control_total==total, "Parallel prefix sum is not equal to serial");
36951c0b2f7Stbbdev     CHECK_MESSAGE(control_sum==sum, "Parallel prefix vector is not equal to serial");
37051c0b2f7Stbbdev }
37151c0b2f7Stbbdev 
37251c0b2f7Stbbdev 
37351c0b2f7Stbbdev #if __TBB_CPP14_GENERIC_LAMBDAS_PRESENT
37451c0b2f7Stbbdev struct ParallelScanGenericLambda {
37551c0b2f7Stbbdev     template<typename T>
operator ()ParallelScanGenericLambda37651c0b2f7Stbbdev     T operator()(Range range, T init, const std::vector<T> &addend, std::vector<T> &sum, int mode) {
37751c0b2f7Stbbdev         for (long i = 0; i<MAXN; ++i) {
37851c0b2f7Stbbdev             AddendHistory[i] = UNUSED;
37951c0b2f7Stbbdev         }
38051c0b2f7Stbbdev         ScanIsRunning = true;
38151c0b2f7Stbbdev         Storage<T> res = ParallelScanFunctionalInvoker(range, Storage<T>(0),
382478de5b1Stbbdev             [&addend, &sum, init](const auto& rng, auto storage, bool is_final_scan) {
383478de5b1Stbbdev                 return ScanWithInit(rng, init, is_final_scan, storage, sum, addend);
38451c0b2f7Stbbdev             },
385478de5b1Stbbdev             [](const auto& left, const auto& right) {
386478de5b1Stbbdev                 return JoinStorages(left, right);
38751c0b2f7Stbbdev             },
38851c0b2f7Stbbdev             mode);
38951c0b2f7Stbbdev         ScanIsRunning = false;
39051c0b2f7Stbbdev         if (range.empty())
39151c0b2f7Stbbdev             res.my_total = init;
39251c0b2f7Stbbdev         return res.my_total;
39351c0b2f7Stbbdev     }
39451c0b2f7Stbbdev };
39551c0b2f7Stbbdev #endif /* __TBB_CPP14_GENERIC_LAMBDAS_PRESENT */
39651c0b2f7Stbbdev 
397478de5b1Stbbdev #if __TBB_CPP20_CONCEPTS_PRESENT
398478de5b1Stbbdev template <typename... Args>
399478de5b1Stbbdev concept can_call_parallel_scan_basic = requires( Args&&... args ) {
400478de5b1Stbbdev     tbb::parallel_scan(std::forward<Args>(args)...);
401478de5b1Stbbdev };
40251c0b2f7Stbbdev 
403478de5b1Stbbdev template <typename Range, typename Body>
404478de5b1Stbbdev concept can_call_imperative_pscan = can_call_parallel_scan_basic<const Range&, Body&> &&
405478de5b1Stbbdev                                     can_call_parallel_scan_basic<const Range&, Body&, const tbb::simple_partitioner&> &&
406478de5b1Stbbdev                                     can_call_parallel_scan_basic<const Range&, Body&, const tbb::auto_partitioner&>;
407478de5b1Stbbdev 
408478de5b1Stbbdev template <typename Range, typename Value, typename Func, typename Combine>
409478de5b1Stbbdev concept can_call_functional_pscan = can_call_parallel_scan_basic<const Range&, const Value&, const Func&, const Combine&> &&
410478de5b1Stbbdev                                     can_call_parallel_scan_basic<const Range&, const Value&, const Func&, const Combine&, const tbb::simple_partitioner&> &&
411478de5b1Stbbdev                                     can_call_parallel_scan_basic<const Range&, const Value&, const Func&, const Combine&, const tbb::auto_partitioner&>;
412478de5b1Stbbdev 
413478de5b1Stbbdev using CorrectRange = test_concepts::range::Correct;
414478de5b1Stbbdev 
415478de5b1Stbbdev template <typename Range>
416478de5b1Stbbdev using CorrectBody = test_concepts::parallel_scan_body::Correct<Range>;
417478de5b1Stbbdev 
418478de5b1Stbbdev template <typename Range, typename T>
419478de5b1Stbbdev using CorrectFunc = test_concepts::parallel_scan_function::Correct<Range, T>;
420478de5b1Stbbdev 
421478de5b1Stbbdev template <typename T>
422478de5b1Stbbdev using CorrectCombine = test_concepts::parallel_scan_combine::Correct<T>;
423478de5b1Stbbdev 
test_pscan_range_constraints()424478de5b1Stbbdev void test_pscan_range_constraints() {
425478de5b1Stbbdev     using namespace test_concepts::range;
426478de5b1Stbbdev     static_assert(can_call_imperative_pscan<Correct, CorrectBody<Correct>>);
427478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<NonCopyable, CorrectBody<NonCopyable>>);
428478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<NonDestructible, CorrectBody<NonDestructible>>);
429478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<NonSplittable, CorrectBody<NonSplittable>>);
430478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<NoEmpty, CorrectBody<NoEmpty>>);
431478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<EmptyNonConst, CorrectBody<EmptyNonConst>>);
432478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
433478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<NoIsDivisible, CorrectBody<NoIsDivisible>>);
434478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>);
435478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
436478de5b1Stbbdev 
437478de5b1Stbbdev     static_assert(can_call_functional_pscan<Correct, int, CorrectFunc<Correct, int>, CorrectCombine<int>>);
438478de5b1Stbbdev     static_assert(!can_call_functional_pscan<NonCopyable, int, CorrectFunc<NonCopyable, int>, CorrectCombine<int>>);
439478de5b1Stbbdev     static_assert(!can_call_functional_pscan<NonDestructible, int, CorrectFunc<NonDestructible, int>, CorrectCombine<int>>);
440478de5b1Stbbdev     static_assert(!can_call_functional_pscan<NonSplittable, int, CorrectFunc<NonSplittable, int>, CorrectCombine<int>>);
441478de5b1Stbbdev     static_assert(!can_call_functional_pscan<NoEmpty, int, CorrectFunc<NoEmpty, int>, CorrectCombine<int>>);
442478de5b1Stbbdev     static_assert(!can_call_functional_pscan<EmptyNonConst, int, CorrectFunc<EmptyNonConst, int>, CorrectCombine<int>>);
443478de5b1Stbbdev     static_assert(!can_call_functional_pscan<WrongReturnEmpty, int, CorrectFunc<WrongReturnEmpty, int>, CorrectCombine<int>>);
444478de5b1Stbbdev     static_assert(!can_call_functional_pscan<NoIsDivisible, int, CorrectFunc<NoIsDivisible, int>, CorrectCombine<int>>);
445478de5b1Stbbdev     static_assert(!can_call_functional_pscan<IsDivisibleNonConst, int, CorrectFunc<IsDivisibleNonConst, int>, CorrectCombine<int>>);
446478de5b1Stbbdev     static_assert(!can_call_functional_pscan<WrongReturnIsDivisible, int, CorrectFunc<WrongReturnIsDivisible, int>, CorrectCombine<int>>);
447478de5b1Stbbdev }
448478de5b1Stbbdev 
test_pscan_body_constraints()449478de5b1Stbbdev void test_pscan_body_constraints() {
450478de5b1Stbbdev     using namespace test_concepts::parallel_scan_body;
451478de5b1Stbbdev     static_assert(can_call_imperative_pscan<CorrectRange, Correct<CorrectRange>>);
452478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, NonSplittable<CorrectRange>>);
453478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, NoPreScanOperatorRoundBrackets<CorrectRange>>);
454478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, WrongFirstInputPreScanOperatorRoundBrackets<CorrectRange>>);
455478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, WrongSecondInputPreScanOperatorRoundBrackets<CorrectRange>>);
456478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, NoFinalScanOperatorRoundBrackets<CorrectRange>>);
457478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, WrongFirstInputFinalScanOperatorRoundBrackets<CorrectRange>>);
458478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, WrongSecondInputFinalScanOperatorRoundBrackets<CorrectRange>>);
459478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, NoReverseJoin<CorrectRange>>);
460478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, WrongInputReverseJoin<CorrectRange>>);
461478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, NoAssign<CorrectRange>>);
462478de5b1Stbbdev     static_assert(!can_call_imperative_pscan<CorrectRange, WrongInputAssign<CorrectRange>>);
463478de5b1Stbbdev }
464478de5b1Stbbdev 
test_pscan_func_constraints()465478de5b1Stbbdev void test_pscan_func_constraints() {
466478de5b1Stbbdev     using namespace test_concepts::parallel_scan_function;
467478de5b1Stbbdev     static_assert(can_call_functional_pscan<CorrectRange, int, Correct<CorrectRange, int>, CorrectCombine<int>>);
468478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, NoOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
469478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, OperatorRoundBracketsNonConst<CorrectRange, int>, CorrectCombine<int>>);
470478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, WrongFirstInputOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
471478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, WrongSecondInputOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
472478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, WrongReturnOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
473478de5b1Stbbdev }
474478de5b1Stbbdev 
test_pscan_combine_constraints()475478de5b1Stbbdev void test_pscan_combine_constraints() {
476478de5b1Stbbdev     using namespace test_concepts::parallel_scan_combine;
477478de5b1Stbbdev     static_assert(can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, Correct<int>>);
478478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, NoOperatorRoundBrackets<int>>);
479478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, OperatorRoundBracketsNonConst<int>>);
480478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, WrongFirstInputOperatorRoundBrackets<int>>);
481478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, WrongSecondInputOperatorRoundBrackets<int>>);
482478de5b1Stbbdev     static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, WrongReturnOperatorRoundBrackets<int>>);
483478de5b1Stbbdev }
484478de5b1Stbbdev 
485478de5b1Stbbdev #endif // __TBB_CPP20_CONCEPTS_PRESENT
48651c0b2f7Stbbdev 
48751c0b2f7Stbbdev // Test for parallel_scan with with different partitioners
48851c0b2f7Stbbdev //! \brief \ref error_guessing \ref resource_usage
48951c0b2f7Stbbdev TEST_CASE("parallel_scan testing with different partitioners") {
49051c0b2f7Stbbdev     for (auto concurrency_level : utils::concurrency_range()) {
49151c0b2f7Stbbdev         tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
49251c0b2f7Stbbdev         for (int mode = 0; mode < 3; mode++) {
49351c0b2f7Stbbdev             NumberOfLiveStorage = 0;
49451c0b2f7Stbbdev             TestAccumulator(mode);
49551c0b2f7Stbbdev             // Test that all workers sleep when no work
49651c0b2f7Stbbdev             TestCPUUserTime(concurrency_level);
49751c0b2f7Stbbdev 
49851c0b2f7Stbbdev             // Checking has to be done late, because when parallel_scan makes copies of
49951c0b2f7Stbbdev             // the user's "Body", the copies might be destroyed slightly after parallel_scan
50051c0b2f7Stbbdev             // returns.
50151c0b2f7Stbbdev             CHECK(NumberOfLiveStorage == 0);
50251c0b2f7Stbbdev         }
50351c0b2f7Stbbdev     }
50451c0b2f7Stbbdev }
50551c0b2f7Stbbdev 
50651c0b2f7Stbbdev // Test for parallel_scan with template functors
50751c0b2f7Stbbdev //! \brief \ref error_guessing \ref interface \ref resource_usage
50851c0b2f7Stbbdev TEST_CASE("parallel_scan testing with template functor") {
50951c0b2f7Stbbdev     for (auto concurrency_level : utils::concurrency_range()) {
51051c0b2f7Stbbdev         tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
51151c0b2f7Stbbdev         for (int mode = 0; mode < 3; mode++) {
51251c0b2f7Stbbdev             NumberOfLiveStorage = 0;
51351c0b2f7Stbbdev             TestInterface(mode,  ParallelScanTemplateFunctor());
51451c0b2f7Stbbdev             // Test that all workers sleep when no work
51551c0b2f7Stbbdev             TestCPUUserTime(concurrency_level);
51651c0b2f7Stbbdev 
51751c0b2f7Stbbdev             // Checking has to be done late, because when parallel_scan makes copies of
51851c0b2f7Stbbdev             // the user's "Body", the copies might be destroyed slightly after parallel_scan
51951c0b2f7Stbbdev             // returns.
52051c0b2f7Stbbdev             CHECK(NumberOfLiveStorage == 0);
52151c0b2f7Stbbdev         }
52251c0b2f7Stbbdev     }
52351c0b2f7Stbbdev }
52451c0b2f7Stbbdev 
52551c0b2f7Stbbdev // Test for parallel_scan with lambdas
52651c0b2f7Stbbdev //! \brief \ref error_guessing \ref interface \ref resource_usage
52751c0b2f7Stbbdev TEST_CASE("parallel_scan testing with lambdas") {
52851c0b2f7Stbbdev     for (auto concurrency_level : utils::concurrency_range()) {
52951c0b2f7Stbbdev         tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
53051c0b2f7Stbbdev         for (int mode = 0; mode < 3; mode++) {
53151c0b2f7Stbbdev             NumberOfLiveStorage = 0;
53251c0b2f7Stbbdev             TestInterface(mode,  ParallelScanLambda());
53351c0b2f7Stbbdev 
53451c0b2f7Stbbdev             // Test that all workers sleep when no work
53551c0b2f7Stbbdev             TestCPUUserTime(concurrency_level);
53651c0b2f7Stbbdev 
53751c0b2f7Stbbdev             // Checking has to be done late, because when parallel_scan makes copies of
53851c0b2f7Stbbdev             // the user's "Body", the copies might be destroyed slightly after parallel_scan
53951c0b2f7Stbbdev             // returns.
54051c0b2f7Stbbdev             CHECK(NumberOfLiveStorage == 0);
54151c0b2f7Stbbdev         }
54251c0b2f7Stbbdev     }
54351c0b2f7Stbbdev }
54451c0b2f7Stbbdev 
54551c0b2f7Stbbdev #if __TBB_CPP14_GENERIC_LAMBDAS_PRESENT
54651c0b2f7Stbbdev // Test for parallel_scan with genetic lambdas
54751c0b2f7Stbbdev //! \brief \ref error_guessing \ref interface \ref resource_usage
54851c0b2f7Stbbdev TEST_CASE("parallel_scan testing with generic lambdas") {
54951c0b2f7Stbbdev     for (auto concurrency_level : utils::concurrency_range()) {
55051c0b2f7Stbbdev         tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
55151c0b2f7Stbbdev         for (int mode = 0; mode < 3; mode++) {
55251c0b2f7Stbbdev             NumberOfLiveStorage = 0;
55351c0b2f7Stbbdev             TestInterface(mode,  ParallelScanGenericLambda());
55451c0b2f7Stbbdev             // Test that all workers sleep when no work
55551c0b2f7Stbbdev             TestCPUUserTime(concurrency_level);
55651c0b2f7Stbbdev 
55751c0b2f7Stbbdev             // Checking has to be done late, because when parallel_scan makes copies of
55851c0b2f7Stbbdev             // the user's "Body", the copies might be destroyed slightly after parallel_scan
55951c0b2f7Stbbdev             // returns.
56051c0b2f7Stbbdev             CHECK(NumberOfLiveStorage == 0);
56151c0b2f7Stbbdev         }
56251c0b2f7Stbbdev     }
56351c0b2f7Stbbdev }
56451c0b2f7Stbbdev #endif /* __TBB_CPP14_GENERIC_LAMBDAS_PRESENT */
565478de5b1Stbbdev 
566478de5b1Stbbdev #if __TBB_CPP20_CONCEPTS_PRESENT
567478de5b1Stbbdev //! \brief \ref error_guessing
568478de5b1Stbbdev TEST_CASE("parallel_scan constraints") {
569478de5b1Stbbdev     test_pscan_range_constraints();
570478de5b1Stbbdev     test_pscan_body_constraints();
571478de5b1Stbbdev     test_pscan_func_constraints();
572478de5b1Stbbdev     test_pscan_combine_constraints();
573478de5b1Stbbdev }
574478de5b1Stbbdev #endif // __TBB_CPP20_CONCEPTS_PRESENT
575