151c0b2f7Stbbdev /*
2*c21e688aSSergey Zheltov Copyright (c) 2005-2022 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
17478de5b1Stbbdev #if __INTEL_COMPILER && _MSC_VER
18478de5b1Stbbdev #pragma warning(disable : 2586) // decorated name length exceeded, name was truncated
19478de5b1Stbbdev #endif
20478de5b1Stbbdev
2151c0b2f7Stbbdev #include "common/test.h"
2251c0b2f7Stbbdev #include "common/config.h"
2351c0b2f7Stbbdev #include "common/utils_concurrency_limit.h"
2451c0b2f7Stbbdev #include "common/cpu_usertime.h"
25478de5b1Stbbdev #include "common/concepts_common.h"
2651c0b2f7Stbbdev
2751c0b2f7Stbbdev #include "tbb/global_control.h"
2851c0b2f7Stbbdev #include "tbb/parallel_scan.h"
2951c0b2f7Stbbdev #include "tbb/blocked_range.h"
3051c0b2f7Stbbdev #include "tbb/tick_count.h"
3151c0b2f7Stbbdev #include <vector>
3251c0b2f7Stbbdev #include <atomic>
3351c0b2f7Stbbdev
3451c0b2f7Stbbdev //! \file test_parallel_scan.cpp
3551c0b2f7Stbbdev //! \brief Test for [algorithms.parallel_scan] specification
3651c0b2f7Stbbdev
3751c0b2f7Stbbdev using Range = tbb::blocked_range<long>;
3851c0b2f7Stbbdev
3951c0b2f7Stbbdev static volatile bool ScanIsRunning = false;
4051c0b2f7Stbbdev
4151c0b2f7Stbbdev //! Sum of 0..i with wrap around on overflow.
TriangularSum(int i)4251c0b2f7Stbbdev inline int TriangularSum( int i ) {
4351c0b2f7Stbbdev return i&1 ? ((i>>1)+1)*i : (i>>1)*(i+1);
4451c0b2f7Stbbdev }
4551c0b2f7Stbbdev
4651c0b2f7Stbbdev //! Verify that sum is init plus sum of integers in closed interval [0..finish_index].
4751c0b2f7Stbbdev /** line should be the source line of the caller */
VerifySum(int init,long finish_index,int sum,int line)4851c0b2f7Stbbdev void VerifySum( int init, long finish_index, int sum, int line ) {
4951c0b2f7Stbbdev int expected = init + TriangularSum(finish_index);
5051c0b2f7Stbbdev CHECK_MESSAGE(expected == sum, "line " << line << ": sum[0.." << finish_index << "] should be = " << expected << ", but was computed as " << sum << "\n");
5151c0b2f7Stbbdev }
5251c0b2f7Stbbdev
5351c0b2f7Stbbdev const int MAXN = 20000;
5451c0b2f7Stbbdev
5551c0b2f7Stbbdev enum AddendFlag {
5651c0b2f7Stbbdev UNUSED=0,
5751c0b2f7Stbbdev USED_NONFINAL=1,
5851c0b2f7Stbbdev USED_FINAL=2
5951c0b2f7Stbbdev };
6051c0b2f7Stbbdev
6151c0b2f7Stbbdev //! Array recording how each addend was used.
6251c0b2f7Stbbdev /** 'unsigned char' instead of AddendFlag for sake of compactness. */
6351c0b2f7Stbbdev static unsigned char AddendHistory[MAXN];
6451c0b2f7Stbbdev
6551c0b2f7Stbbdev std::atomic<long> NumberOfLiveStorage;
6651c0b2f7Stbbdev
6751c0b2f7Stbbdev template<typename T>
6851c0b2f7Stbbdev struct Storage {
6951c0b2f7Stbbdev T my_total;
7051c0b2f7Stbbdev Range my_range;
StorageStorage7151c0b2f7Stbbdev Storage(T init) :
7251c0b2f7Stbbdev my_total(init), my_range(-1, -1, 1) {
7351c0b2f7Stbbdev ++NumberOfLiveStorage;
7451c0b2f7Stbbdev }
~StorageStorage7551c0b2f7Stbbdev ~Storage() {
7651c0b2f7Stbbdev --NumberOfLiveStorage;
7751c0b2f7Stbbdev }
StorageStorage7851c0b2f7Stbbdev Storage(const Storage& strg) :
7951c0b2f7Stbbdev my_total(strg.my_total), my_range(strg.my_range) {
8051c0b2f7Stbbdev ++NumberOfLiveStorage;
8151c0b2f7Stbbdev }
operator =Storage8251c0b2f7Stbbdev Storage & operator=(const Storage& strg) {
8351c0b2f7Stbbdev my_total = strg.my_total;
8451c0b2f7Stbbdev my_range = strg.my_range;
8551c0b2f7Stbbdev return *this;
8651c0b2f7Stbbdev }
8751c0b2f7Stbbdev };
8851c0b2f7Stbbdev
8951c0b2f7Stbbdev template<typename T>
JoinStorages(const Storage<T> & left,const Storage<T> & right)90478de5b1Stbbdev Storage<T> JoinStorages(const Storage<T>& left, const Storage<T>& right) {
91478de5b1Stbbdev Storage<T> result = right;
9251c0b2f7Stbbdev CHECK(ScanIsRunning);
9351c0b2f7Stbbdev CHECK(left.my_range.end() == right.my_range.begin());
94478de5b1Stbbdev result.my_total += left.my_total;
95478de5b1Stbbdev result.my_range = Range(left.my_range.begin(), right.my_range.end(), 1);
9651c0b2f7Stbbdev CHECK(ScanIsRunning);
97478de5b1Stbbdev return result;
9851c0b2f7Stbbdev }
9951c0b2f7Stbbdev
10051c0b2f7Stbbdev template<typename T>
Scan(const Range & r,bool is_final,Storage<T> & storage,std::vector<T> & sum,const std::vector<T> & addend)10151c0b2f7Stbbdev void Scan(const Range & r, bool is_final, Storage<T> & storage, std::vector<T> & sum, const std::vector<T> & addend) {
10251c0b2f7Stbbdev CHECK((!is_final || (storage.my_range.begin() == 0 && storage.my_range.end() == r.begin()) || (storage.my_range.empty() && r.begin() == 0)));
10351c0b2f7Stbbdev for (long i = r.begin(); i < r.end(); ++i) {
10451c0b2f7Stbbdev storage.my_total += addend[i];
10551c0b2f7Stbbdev if (is_final) {
10651c0b2f7Stbbdev CHECK_MESSAGE(AddendHistory[i] < USED_FINAL, "addend used 'finally' twice?");
10751c0b2f7Stbbdev AddendHistory[i] |= USED_FINAL;
10851c0b2f7Stbbdev sum[i] = storage.my_total;
10951c0b2f7Stbbdev VerifySum(42, i, int(sum[i]), __LINE__);
11051c0b2f7Stbbdev }
11151c0b2f7Stbbdev else {
11251c0b2f7Stbbdev CHECK_MESSAGE(AddendHistory[i] == UNUSED, "addend used too many times");
11351c0b2f7Stbbdev AddendHistory[i] |= USED_NONFINAL;
11451c0b2f7Stbbdev }
11551c0b2f7Stbbdev }
11651c0b2f7Stbbdev if (storage.my_range.empty())
11751c0b2f7Stbbdev storage.my_range = r;
11851c0b2f7Stbbdev else
11951c0b2f7Stbbdev storage.my_range = Range(storage.my_range.begin(), r.end(), 1);
12051c0b2f7Stbbdev }
12151c0b2f7Stbbdev
12251c0b2f7Stbbdev template<typename T>
ScanWithInit(const Range & r,T init,bool is_final,Storage<T> & storage,std::vector<T> & sum,const std::vector<T> & addend)12351c0b2f7Stbbdev Storage<T> ScanWithInit(const Range & r, T init, bool is_final, Storage<T> & storage, std::vector<T> & sum, const std::vector<T> & addend) {
12451c0b2f7Stbbdev if (r.begin() == 0)
12551c0b2f7Stbbdev storage.my_total = init;
12651c0b2f7Stbbdev Scan(r, is_final, storage, sum, addend);
12751c0b2f7Stbbdev return storage;
12851c0b2f7Stbbdev }
12951c0b2f7Stbbdev
13051c0b2f7Stbbdev template<typename T>
13151c0b2f7Stbbdev class Accumulator {
13251c0b2f7Stbbdev const std::vector<T> &my_array;
13351c0b2f7Stbbdev std::vector<T> & my_sum;
13451c0b2f7Stbbdev Storage<T> storage;
13551c0b2f7Stbbdev enum state_type {
13651c0b2f7Stbbdev full, // Accumulator has sufficient information for final scan,
13751c0b2f7Stbbdev // i.e. has seen all iterations to its left.
13851c0b2f7Stbbdev // It's either the original Accumulator provided by the user
13951c0b2f7Stbbdev // or a Accumulator constructed by a splitting constructor *and* subsequently
14051c0b2f7Stbbdev // subjected to a reverse_join with a full accumulator.
14151c0b2f7Stbbdev
14251c0b2f7Stbbdev partial, // Accumulator has only enough information for pre_scan.
14351c0b2f7Stbbdev // i.e. has not seen all iterations to its left.
14451c0b2f7Stbbdev // It's an Accumulator created by a splitting constructor that
14551c0b2f7Stbbdev // has not yet been subjected to a reverse_join with a full accumulator.
14651c0b2f7Stbbdev
14751c0b2f7Stbbdev summary, // Accumulator has summary of iterations processed, but not necessarily
14851c0b2f7Stbbdev // the information required for a final_scan or pre_scan.
14951c0b2f7Stbbdev // It's the result of "assign".
15051c0b2f7Stbbdev
15151c0b2f7Stbbdev trash // Accumulator with possibly no useful information.
15251c0b2f7Stbbdev // It was the source for "assign".
15351c0b2f7Stbbdev
15451c0b2f7Stbbdev };
15551c0b2f7Stbbdev mutable state_type my_state;
15657f524caSIlya Isaev //! Equals this while object is fully constructed, nullptr otherwise.
15751c0b2f7Stbbdev /** Used to detect premature destruction and accidental bitwise copy. */
15851c0b2f7Stbbdev Accumulator* self;
15951c0b2f7Stbbdev Accumulator& operator= (const Accumulator& other);
16051c0b2f7Stbbdev public:
Accumulator(T init,const std::vector<T> & array,std::vector<T> & sum)16151c0b2f7Stbbdev Accumulator( T init, const std::vector<T> & array, std::vector<T> & sum ) :
16251c0b2f7Stbbdev my_array(array), my_sum(sum), storage(init), my_state(full)
16351c0b2f7Stbbdev {
16451c0b2f7Stbbdev // Set self as last action of constructor, to indicate that object is fully constructed.
16551c0b2f7Stbbdev self = this;
16651c0b2f7Stbbdev }
~Accumulator()16751c0b2f7Stbbdev ~Accumulator() {
16851c0b2f7Stbbdev // Clear self as first action of destructor, to indicate that object is not fully constructed.
16957f524caSIlya Isaev self = nullptr;
17051c0b2f7Stbbdev }
Accumulator(Accumulator & a,tbb::split)17151c0b2f7Stbbdev Accumulator( Accumulator& a, tbb::split ) :
17251c0b2f7Stbbdev my_array(a.my_array), my_sum(a.my_sum), storage(0), my_state(partial)
17351c0b2f7Stbbdev {
17451c0b2f7Stbbdev if (!(a.my_state == partial))
17551c0b2f7Stbbdev CHECK(a.my_state == full);
17651c0b2f7Stbbdev if (!(a.my_state == full))
17751c0b2f7Stbbdev CHECK(a.my_state == partial);
17851c0b2f7Stbbdev CHECK(ScanIsRunning);
17951c0b2f7Stbbdev // Set self as last action of constructor, to indicate that object is fully constructed.
18051c0b2f7Stbbdev self = this;
18151c0b2f7Stbbdev }
18251c0b2f7Stbbdev template<typename Tag>
operator ()(const Range & r,Tag)18351c0b2f7Stbbdev void operator()( const Range& r, Tag /*tag*/ ) {
18451c0b2f7Stbbdev if(Tag::is_final_scan())
18551c0b2f7Stbbdev CHECK(my_state == full);
18651c0b2f7Stbbdev else
18751c0b2f7Stbbdev CHECK(my_state == partial);
18851c0b2f7Stbbdev Scan(r, Tag::is_final_scan(), storage, my_sum, my_array);
18951c0b2f7Stbbdev CHECK_MESSAGE(self==this, "this Accumulator corrupted or prematurely destroyed");
19051c0b2f7Stbbdev }
reverse_join(const Accumulator & left_body)19151c0b2f7Stbbdev void reverse_join( const Accumulator& left_body) {
19251c0b2f7Stbbdev const Storage<T> & left = left_body.storage;
19351c0b2f7Stbbdev Storage<T> & right = storage;
19451c0b2f7Stbbdev CHECK(my_state == partial);
19551c0b2f7Stbbdev CHECK( ((left_body.my_state == full) || (left_body.my_state==partial)) );
19651c0b2f7Stbbdev
197478de5b1Stbbdev right = JoinStorages(left, right);
19851c0b2f7Stbbdev
19951c0b2f7Stbbdev CHECK(left_body.self == &left_body);
20051c0b2f7Stbbdev my_state = left_body.my_state;
20151c0b2f7Stbbdev }
assign(const Accumulator & other)20251c0b2f7Stbbdev void assign( const Accumulator& other ) {
20351c0b2f7Stbbdev CHECK(other.my_state == full);
20451c0b2f7Stbbdev CHECK(my_state == full);
20551c0b2f7Stbbdev storage.my_total = other.storage.my_total;
20651c0b2f7Stbbdev storage.my_range = other.storage.my_range;
20751c0b2f7Stbbdev CHECK(self == this);
20851c0b2f7Stbbdev CHECK_MESSAGE(other.self==&other, "other Accumulator corrupted or prematurely destroyed");
20951c0b2f7Stbbdev my_state = summary;
21051c0b2f7Stbbdev other.my_state = trash;
21151c0b2f7Stbbdev }
get_total()21251c0b2f7Stbbdev T get_total() {
21351c0b2f7Stbbdev return storage.my_total;
21451c0b2f7Stbbdev }
21551c0b2f7Stbbdev };
21651c0b2f7Stbbdev
21751c0b2f7Stbbdev
21851c0b2f7Stbbdev template<typename T, typename Scan, typename ReverseJoin>
ParallelScanFunctionalInvoker(const Range & range,T idx,const Scan & scan,const ReverseJoin & reverse_join,int mode)21951c0b2f7Stbbdev T ParallelScanFunctionalInvoker(const Range& range, T idx, const Scan& scan, const ReverseJoin& reverse_join, int mode) {
22051c0b2f7Stbbdev switch (mode%3) {
22151c0b2f7Stbbdev case 0:
22251c0b2f7Stbbdev return tbb::parallel_scan(range, idx, scan, reverse_join);
22351c0b2f7Stbbdev break;
22451c0b2f7Stbbdev case 1:
22551c0b2f7Stbbdev return tbb::parallel_scan(range, idx, scan, reverse_join, tbb::simple_partitioner());
22651c0b2f7Stbbdev break;
22751c0b2f7Stbbdev default:
22851c0b2f7Stbbdev return tbb::parallel_scan(range, idx, scan, reverse_join, tbb::auto_partitioner());
22951c0b2f7Stbbdev }
23051c0b2f7Stbbdev }
23151c0b2f7Stbbdev
23251c0b2f7Stbbdev template<typename T>
23351c0b2f7Stbbdev class ScanBody {
23451c0b2f7Stbbdev const std::vector<T> &my_addend;
23551c0b2f7Stbbdev std::vector<T> &my_sum;
23651c0b2f7Stbbdev const T my_init;
23751c0b2f7Stbbdev ScanBody& operator= (const ScanBody&);
23851c0b2f7Stbbdev public:
ScanBody(T init,const std::vector<T> & addend,std::vector<T> & sum)23951c0b2f7Stbbdev ScanBody(T init, const std::vector<T> &addend, std::vector<T> &sum) :my_addend(addend), my_sum(sum), my_init(init) {}
24051c0b2f7Stbbdev template<typename S, typename Tag>
operator ()(const Range & r,Storage<S> storage,Tag) const24151c0b2f7Stbbdev Storage<S> operator()(const Range& r, Storage<S> storage, Tag) const {
24251c0b2f7Stbbdev return ScanWithInit(r, my_init, Tag::is_final_scan(), storage, my_sum, my_addend);
24351c0b2f7Stbbdev }
24451c0b2f7Stbbdev };
24551c0b2f7Stbbdev
24651c0b2f7Stbbdev class JoinBody {
24751c0b2f7Stbbdev public:
24851c0b2f7Stbbdev template<typename T>
operator ()(const Storage<T> & left,const Storage<T> & right) const249478de5b1Stbbdev Storage<T> operator()(const Storage<T>& left, const Storage<T>& right) const {
250478de5b1Stbbdev return JoinStorages(left, right);
25151c0b2f7Stbbdev }
25251c0b2f7Stbbdev };
25351c0b2f7Stbbdev
25451c0b2f7Stbbdev struct ParallelScanTemplateFunctor {
25551c0b2f7Stbbdev template<typename T>
operator ()ParallelScanTemplateFunctor25651c0b2f7Stbbdev T operator()(Range range, T init, const std::vector<T> &addend, std::vector<T> &sum, int mode) {
25751c0b2f7Stbbdev for (long i = 0; i<MAXN; ++i) {
25851c0b2f7Stbbdev AddendHistory[i] = UNUSED;
25951c0b2f7Stbbdev }
26051c0b2f7Stbbdev ScanIsRunning = true;
26151c0b2f7Stbbdev ScanBody<T> sb(init, addend, sum);
26251c0b2f7Stbbdev JoinBody jb;
26351c0b2f7Stbbdev Storage<T> res = ParallelScanFunctionalInvoker(range, Storage<T>(0), sb, jb, mode);
26451c0b2f7Stbbdev ScanIsRunning = false;
26551c0b2f7Stbbdev if (range.empty())
26651c0b2f7Stbbdev res.my_total = init;
26751c0b2f7Stbbdev return res.my_total;
26851c0b2f7Stbbdev }
26951c0b2f7Stbbdev };
27051c0b2f7Stbbdev
27151c0b2f7Stbbdev struct ParallelScanLambda {
27251c0b2f7Stbbdev template<typename T>
operator ()ParallelScanLambda27351c0b2f7Stbbdev T operator()(Range range, T init, const std::vector<T> &addend, std::vector<T> &sum, int mode) {
27451c0b2f7Stbbdev for (long i = 0; i<MAXN; ++i) {
27551c0b2f7Stbbdev AddendHistory[i] = UNUSED;
27651c0b2f7Stbbdev }
27751c0b2f7Stbbdev ScanIsRunning = true;
27851c0b2f7Stbbdev Storage<T> res = ParallelScanFunctionalInvoker(range, Storage<T>(0),
27951c0b2f7Stbbdev [&addend, &sum, init](const Range& r, Storage<T> storage, bool is_final_scan /*tag*/) -> Storage<T> {
28051c0b2f7Stbbdev return ScanWithInit(r, init, is_final_scan, storage, sum, addend);
28151c0b2f7Stbbdev },
282478de5b1Stbbdev [](const Storage<T>& left, const Storage<T>& right) -> Storage<T> {
283478de5b1Stbbdev return JoinStorages(left, right);
28451c0b2f7Stbbdev },
28551c0b2f7Stbbdev mode);
28651c0b2f7Stbbdev ScanIsRunning = false;
28751c0b2f7Stbbdev if (range.empty())
28851c0b2f7Stbbdev res.my_total = init;
28951c0b2f7Stbbdev return res.my_total;
29051c0b2f7Stbbdev }
29151c0b2f7Stbbdev };
29251c0b2f7Stbbdev
TestAccumulator(int mode)29351c0b2f7Stbbdev void TestAccumulator( int mode ) {
29451c0b2f7Stbbdev typedef int T;
29551c0b2f7Stbbdev std::vector<T> addend(MAXN);
29651c0b2f7Stbbdev std::vector<T> sum(MAXN);
29751c0b2f7Stbbdev std::vector<T> control_sum(MAXN);
29851c0b2f7Stbbdev T control_total;
29951c0b2f7Stbbdev for( int n=0; n<=MAXN; n = n <=128? n+1: n*3) {
30051c0b2f7Stbbdev for( int gs : {1, 2, 100, 511, 12345, n/ 111, n/17, n-1, n}) {
30151c0b2f7Stbbdev if(gs<=0 || gs > n)
30251c0b2f7Stbbdev continue;
30351c0b2f7Stbbdev control_total = 42;
30451c0b2f7Stbbdev for( long i=0; i<MAXN; ++i ) {
30551c0b2f7Stbbdev addend[i] = -1;
30651c0b2f7Stbbdev sum[i] = -2;
30751c0b2f7Stbbdev control_sum[i] = -2;
30851c0b2f7Stbbdev AddendHistory[i] = UNUSED;
30951c0b2f7Stbbdev }
31051c0b2f7Stbbdev for (long i = 0; i<n; ++i) {
31151c0b2f7Stbbdev addend[i] = i;
31251c0b2f7Stbbdev control_total += addend[i];
31351c0b2f7Stbbdev control_sum[i] = control_total;
31451c0b2f7Stbbdev }
31551c0b2f7Stbbdev
31651c0b2f7Stbbdev Accumulator<T> acc( 42, addend, sum);
31751c0b2f7Stbbdev ScanIsRunning = true;
31851c0b2f7Stbbdev
31951c0b2f7Stbbdev switch (mode) {
32051c0b2f7Stbbdev case 0:
32151c0b2f7Stbbdev tbb::parallel_scan( Range( 0, n, gs ), acc );
32251c0b2f7Stbbdev break;
32351c0b2f7Stbbdev case 1:
32451c0b2f7Stbbdev tbb::parallel_scan( Range( 0, n, gs ), acc, tbb::simple_partitioner() );
32551c0b2f7Stbbdev break;
32651c0b2f7Stbbdev case 2:
32751c0b2f7Stbbdev tbb::parallel_scan( Range( 0, n, gs ), acc, tbb::auto_partitioner() );
32851c0b2f7Stbbdev break;
32951c0b2f7Stbbdev }
33051c0b2f7Stbbdev
33151c0b2f7Stbbdev ScanIsRunning = false;
33251c0b2f7Stbbdev
33351c0b2f7Stbbdev for( long i=0; i<n; ++i )
33451c0b2f7Stbbdev CHECK_MESSAGE((AddendHistory[i]&USED_FINAL), "failed to use addend[" << i << "] " << (AddendHistory[i] & USED_NONFINAL ? "(but used nonfinal)\n" : "\n"));
33551c0b2f7Stbbdev for( long i=0; i<n; ++i ) {
33651c0b2f7Stbbdev VerifySum( 42, i, sum[i], __LINE__ );
33751c0b2f7Stbbdev }
33851c0b2f7Stbbdev if( n )
33951c0b2f7Stbbdev CHECK(acc.get_total()==sum[n-1]);
34051c0b2f7Stbbdev else
34151c0b2f7Stbbdev CHECK(acc.get_total()==42);
34251c0b2f7Stbbdev CHECK(control_total ==acc.get_total());
34351c0b2f7Stbbdev CHECK(control_sum==sum);
34451c0b2f7Stbbdev }
34551c0b2f7Stbbdev }
34651c0b2f7Stbbdev }
34751c0b2f7Stbbdev
34851c0b2f7Stbbdev template<typename ParallelScanWrapper>
TestInterface(int mode,ParallelScanWrapper parallel_scan_wrapper)34951c0b2f7Stbbdev void TestInterface( int mode, ParallelScanWrapper parallel_scan_wrapper ) {
35051c0b2f7Stbbdev using T = int;
35151c0b2f7Stbbdev std::vector<T> addend(MAXN);
35251c0b2f7Stbbdev std::vector<T> control_sum(MAXN);
35351c0b2f7Stbbdev T control_total(42);
35451c0b2f7Stbbdev for( long i=0; i<MAXN; ++i ) {
35551c0b2f7Stbbdev addend[i] = i;
35651c0b2f7Stbbdev control_total += addend[i];
35751c0b2f7Stbbdev control_sum[i] = control_total;
35851c0b2f7Stbbdev AddendHistory[i] = UNUSED;
35951c0b2f7Stbbdev }
36051c0b2f7Stbbdev
36151c0b2f7Stbbdev std::vector<T> sum(MAXN);
36251c0b2f7Stbbdev for (long i = 0; i<MAXN; ++i)
36351c0b2f7Stbbdev sum[i] = -2;
36451c0b2f7Stbbdev ScanIsRunning = true;
36551c0b2f7Stbbdev T total = parallel_scan_wrapper(Range(0, MAXN, 1), 42, addend, sum, mode);
36651c0b2f7Stbbdev ScanIsRunning = false;
36751c0b2f7Stbbdev
36851c0b2f7Stbbdev CHECK_MESSAGE(control_total==total, "Parallel prefix sum is not equal to serial");
36951c0b2f7Stbbdev CHECK_MESSAGE(control_sum==sum, "Parallel prefix vector is not equal to serial");
37051c0b2f7Stbbdev }
37151c0b2f7Stbbdev
37251c0b2f7Stbbdev
37351c0b2f7Stbbdev #if __TBB_CPP14_GENERIC_LAMBDAS_PRESENT
37451c0b2f7Stbbdev struct ParallelScanGenericLambda {
37551c0b2f7Stbbdev template<typename T>
operator ()ParallelScanGenericLambda37651c0b2f7Stbbdev T operator()(Range range, T init, const std::vector<T> &addend, std::vector<T> &sum, int mode) {
37751c0b2f7Stbbdev for (long i = 0; i<MAXN; ++i) {
37851c0b2f7Stbbdev AddendHistory[i] = UNUSED;
37951c0b2f7Stbbdev }
38051c0b2f7Stbbdev ScanIsRunning = true;
38151c0b2f7Stbbdev Storage<T> res = ParallelScanFunctionalInvoker(range, Storage<T>(0),
382478de5b1Stbbdev [&addend, &sum, init](const auto& rng, auto storage, bool is_final_scan) {
383478de5b1Stbbdev return ScanWithInit(rng, init, is_final_scan, storage, sum, addend);
38451c0b2f7Stbbdev },
385478de5b1Stbbdev [](const auto& left, const auto& right) {
386478de5b1Stbbdev return JoinStorages(left, right);
38751c0b2f7Stbbdev },
38851c0b2f7Stbbdev mode);
38951c0b2f7Stbbdev ScanIsRunning = false;
39051c0b2f7Stbbdev if (range.empty())
39151c0b2f7Stbbdev res.my_total = init;
39251c0b2f7Stbbdev return res.my_total;
39351c0b2f7Stbbdev }
39451c0b2f7Stbbdev };
39551c0b2f7Stbbdev #endif /* __TBB_CPP14_GENERIC_LAMBDAS_PRESENT */
39651c0b2f7Stbbdev
397478de5b1Stbbdev #if __TBB_CPP20_CONCEPTS_PRESENT
398478de5b1Stbbdev template <typename... Args>
399478de5b1Stbbdev concept can_call_parallel_scan_basic = requires( Args&&... args ) {
400478de5b1Stbbdev tbb::parallel_scan(std::forward<Args>(args)...);
401478de5b1Stbbdev };
40251c0b2f7Stbbdev
403478de5b1Stbbdev template <typename Range, typename Body>
404478de5b1Stbbdev concept can_call_imperative_pscan = can_call_parallel_scan_basic<const Range&, Body&> &&
405478de5b1Stbbdev can_call_parallel_scan_basic<const Range&, Body&, const tbb::simple_partitioner&> &&
406478de5b1Stbbdev can_call_parallel_scan_basic<const Range&, Body&, const tbb::auto_partitioner&>;
407478de5b1Stbbdev
408478de5b1Stbbdev template <typename Range, typename Value, typename Func, typename Combine>
409478de5b1Stbbdev concept can_call_functional_pscan = can_call_parallel_scan_basic<const Range&, const Value&, const Func&, const Combine&> &&
410478de5b1Stbbdev can_call_parallel_scan_basic<const Range&, const Value&, const Func&, const Combine&, const tbb::simple_partitioner&> &&
411478de5b1Stbbdev can_call_parallel_scan_basic<const Range&, const Value&, const Func&, const Combine&, const tbb::auto_partitioner&>;
412478de5b1Stbbdev
413478de5b1Stbbdev using CorrectRange = test_concepts::range::Correct;
414478de5b1Stbbdev
415478de5b1Stbbdev template <typename Range>
416478de5b1Stbbdev using CorrectBody = test_concepts::parallel_scan_body::Correct<Range>;
417478de5b1Stbbdev
418478de5b1Stbbdev template <typename Range, typename T>
419478de5b1Stbbdev using CorrectFunc = test_concepts::parallel_scan_function::Correct<Range, T>;
420478de5b1Stbbdev
421478de5b1Stbbdev template <typename T>
422478de5b1Stbbdev using CorrectCombine = test_concepts::parallel_scan_combine::Correct<T>;
423478de5b1Stbbdev
test_pscan_range_constraints()424478de5b1Stbbdev void test_pscan_range_constraints() {
425478de5b1Stbbdev using namespace test_concepts::range;
426478de5b1Stbbdev static_assert(can_call_imperative_pscan<Correct, CorrectBody<Correct>>);
427478de5b1Stbbdev static_assert(!can_call_imperative_pscan<NonCopyable, CorrectBody<NonCopyable>>);
428478de5b1Stbbdev static_assert(!can_call_imperative_pscan<NonDestructible, CorrectBody<NonDestructible>>);
429478de5b1Stbbdev static_assert(!can_call_imperative_pscan<NonSplittable, CorrectBody<NonSplittable>>);
430478de5b1Stbbdev static_assert(!can_call_imperative_pscan<NoEmpty, CorrectBody<NoEmpty>>);
431478de5b1Stbbdev static_assert(!can_call_imperative_pscan<EmptyNonConst, CorrectBody<EmptyNonConst>>);
432478de5b1Stbbdev static_assert(!can_call_imperative_pscan<WrongReturnEmpty, CorrectBody<WrongReturnEmpty>>);
433478de5b1Stbbdev static_assert(!can_call_imperative_pscan<NoIsDivisible, CorrectBody<NoIsDivisible>>);
434478de5b1Stbbdev static_assert(!can_call_imperative_pscan<IsDivisibleNonConst, CorrectBody<IsDivisibleNonConst>>);
435478de5b1Stbbdev static_assert(!can_call_imperative_pscan<WrongReturnIsDivisible, CorrectBody<WrongReturnIsDivisible>>);
436478de5b1Stbbdev
437478de5b1Stbbdev static_assert(can_call_functional_pscan<Correct, int, CorrectFunc<Correct, int>, CorrectCombine<int>>);
438478de5b1Stbbdev static_assert(!can_call_functional_pscan<NonCopyable, int, CorrectFunc<NonCopyable, int>, CorrectCombine<int>>);
439478de5b1Stbbdev static_assert(!can_call_functional_pscan<NonDestructible, int, CorrectFunc<NonDestructible, int>, CorrectCombine<int>>);
440478de5b1Stbbdev static_assert(!can_call_functional_pscan<NonSplittable, int, CorrectFunc<NonSplittable, int>, CorrectCombine<int>>);
441478de5b1Stbbdev static_assert(!can_call_functional_pscan<NoEmpty, int, CorrectFunc<NoEmpty, int>, CorrectCombine<int>>);
442478de5b1Stbbdev static_assert(!can_call_functional_pscan<EmptyNonConst, int, CorrectFunc<EmptyNonConst, int>, CorrectCombine<int>>);
443478de5b1Stbbdev static_assert(!can_call_functional_pscan<WrongReturnEmpty, int, CorrectFunc<WrongReturnEmpty, int>, CorrectCombine<int>>);
444478de5b1Stbbdev static_assert(!can_call_functional_pscan<NoIsDivisible, int, CorrectFunc<NoIsDivisible, int>, CorrectCombine<int>>);
445478de5b1Stbbdev static_assert(!can_call_functional_pscan<IsDivisibleNonConst, int, CorrectFunc<IsDivisibleNonConst, int>, CorrectCombine<int>>);
446478de5b1Stbbdev static_assert(!can_call_functional_pscan<WrongReturnIsDivisible, int, CorrectFunc<WrongReturnIsDivisible, int>, CorrectCombine<int>>);
447478de5b1Stbbdev }
448478de5b1Stbbdev
test_pscan_body_constraints()449478de5b1Stbbdev void test_pscan_body_constraints() {
450478de5b1Stbbdev using namespace test_concepts::parallel_scan_body;
451478de5b1Stbbdev static_assert(can_call_imperative_pscan<CorrectRange, Correct<CorrectRange>>);
452478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, NonSplittable<CorrectRange>>);
453478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, NoPreScanOperatorRoundBrackets<CorrectRange>>);
454478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, WrongFirstInputPreScanOperatorRoundBrackets<CorrectRange>>);
455478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, WrongSecondInputPreScanOperatorRoundBrackets<CorrectRange>>);
456478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, NoFinalScanOperatorRoundBrackets<CorrectRange>>);
457478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, WrongFirstInputFinalScanOperatorRoundBrackets<CorrectRange>>);
458478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, WrongSecondInputFinalScanOperatorRoundBrackets<CorrectRange>>);
459478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, NoReverseJoin<CorrectRange>>);
460478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, WrongInputReverseJoin<CorrectRange>>);
461478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, NoAssign<CorrectRange>>);
462478de5b1Stbbdev static_assert(!can_call_imperative_pscan<CorrectRange, WrongInputAssign<CorrectRange>>);
463478de5b1Stbbdev }
464478de5b1Stbbdev
test_pscan_func_constraints()465478de5b1Stbbdev void test_pscan_func_constraints() {
466478de5b1Stbbdev using namespace test_concepts::parallel_scan_function;
467478de5b1Stbbdev static_assert(can_call_functional_pscan<CorrectRange, int, Correct<CorrectRange, int>, CorrectCombine<int>>);
468478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, NoOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
469478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, OperatorRoundBracketsNonConst<CorrectRange, int>, CorrectCombine<int>>);
470478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, WrongFirstInputOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
471478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, WrongSecondInputOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
472478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, WrongReturnOperatorRoundBrackets<CorrectRange, int>, CorrectCombine<int>>);
473478de5b1Stbbdev }
474478de5b1Stbbdev
test_pscan_combine_constraints()475478de5b1Stbbdev void test_pscan_combine_constraints() {
476478de5b1Stbbdev using namespace test_concepts::parallel_scan_combine;
477478de5b1Stbbdev static_assert(can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, Correct<int>>);
478478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, NoOperatorRoundBrackets<int>>);
479478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, OperatorRoundBracketsNonConst<int>>);
480478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, WrongFirstInputOperatorRoundBrackets<int>>);
481478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, WrongSecondInputOperatorRoundBrackets<int>>);
482478de5b1Stbbdev static_assert(!can_call_functional_pscan<CorrectRange, int, CorrectFunc<CorrectRange, int>, WrongReturnOperatorRoundBrackets<int>>);
483478de5b1Stbbdev }
484478de5b1Stbbdev
485478de5b1Stbbdev #endif // __TBB_CPP20_CONCEPTS_PRESENT
48651c0b2f7Stbbdev
48751c0b2f7Stbbdev // Test for parallel_scan with with different partitioners
48851c0b2f7Stbbdev //! \brief \ref error_guessing \ref resource_usage
48951c0b2f7Stbbdev TEST_CASE("parallel_scan testing with different partitioners") {
49051c0b2f7Stbbdev for (auto concurrency_level : utils::concurrency_range()) {
49151c0b2f7Stbbdev tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
49251c0b2f7Stbbdev for (int mode = 0; mode < 3; mode++) {
49351c0b2f7Stbbdev NumberOfLiveStorage = 0;
49451c0b2f7Stbbdev TestAccumulator(mode);
49551c0b2f7Stbbdev // Test that all workers sleep when no work
49651c0b2f7Stbbdev TestCPUUserTime(concurrency_level);
49751c0b2f7Stbbdev
49851c0b2f7Stbbdev // Checking has to be done late, because when parallel_scan makes copies of
49951c0b2f7Stbbdev // the user's "Body", the copies might be destroyed slightly after parallel_scan
50051c0b2f7Stbbdev // returns.
50151c0b2f7Stbbdev CHECK(NumberOfLiveStorage == 0);
50251c0b2f7Stbbdev }
50351c0b2f7Stbbdev }
50451c0b2f7Stbbdev }
50551c0b2f7Stbbdev
50651c0b2f7Stbbdev // Test for parallel_scan with template functors
50751c0b2f7Stbbdev //! \brief \ref error_guessing \ref interface \ref resource_usage
50851c0b2f7Stbbdev TEST_CASE("parallel_scan testing with template functor") {
50951c0b2f7Stbbdev for (auto concurrency_level : utils::concurrency_range()) {
51051c0b2f7Stbbdev tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
51151c0b2f7Stbbdev for (int mode = 0; mode < 3; mode++) {
51251c0b2f7Stbbdev NumberOfLiveStorage = 0;
51351c0b2f7Stbbdev TestInterface(mode, ParallelScanTemplateFunctor());
51451c0b2f7Stbbdev // Test that all workers sleep when no work
51551c0b2f7Stbbdev TestCPUUserTime(concurrency_level);
51651c0b2f7Stbbdev
51751c0b2f7Stbbdev // Checking has to be done late, because when parallel_scan makes copies of
51851c0b2f7Stbbdev // the user's "Body", the copies might be destroyed slightly after parallel_scan
51951c0b2f7Stbbdev // returns.
52051c0b2f7Stbbdev CHECK(NumberOfLiveStorage == 0);
52151c0b2f7Stbbdev }
52251c0b2f7Stbbdev }
52351c0b2f7Stbbdev }
52451c0b2f7Stbbdev
52551c0b2f7Stbbdev // Test for parallel_scan with lambdas
52651c0b2f7Stbbdev //! \brief \ref error_guessing \ref interface \ref resource_usage
52751c0b2f7Stbbdev TEST_CASE("parallel_scan testing with lambdas") {
52851c0b2f7Stbbdev for (auto concurrency_level : utils::concurrency_range()) {
52951c0b2f7Stbbdev tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
53051c0b2f7Stbbdev for (int mode = 0; mode < 3; mode++) {
53151c0b2f7Stbbdev NumberOfLiveStorage = 0;
53251c0b2f7Stbbdev TestInterface(mode, ParallelScanLambda());
53351c0b2f7Stbbdev
53451c0b2f7Stbbdev // Test that all workers sleep when no work
53551c0b2f7Stbbdev TestCPUUserTime(concurrency_level);
53651c0b2f7Stbbdev
53751c0b2f7Stbbdev // Checking has to be done late, because when parallel_scan makes copies of
53851c0b2f7Stbbdev // the user's "Body", the copies might be destroyed slightly after parallel_scan
53951c0b2f7Stbbdev // returns.
54051c0b2f7Stbbdev CHECK(NumberOfLiveStorage == 0);
54151c0b2f7Stbbdev }
54251c0b2f7Stbbdev }
54351c0b2f7Stbbdev }
54451c0b2f7Stbbdev
54551c0b2f7Stbbdev #if __TBB_CPP14_GENERIC_LAMBDAS_PRESENT
54651c0b2f7Stbbdev // Test for parallel_scan with genetic lambdas
54751c0b2f7Stbbdev //! \brief \ref error_guessing \ref interface \ref resource_usage
54851c0b2f7Stbbdev TEST_CASE("parallel_scan testing with generic lambdas") {
54951c0b2f7Stbbdev for (auto concurrency_level : utils::concurrency_range()) {
55051c0b2f7Stbbdev tbb::global_control control(tbb::global_control::max_allowed_parallelism, concurrency_level);
55151c0b2f7Stbbdev for (int mode = 0; mode < 3; mode++) {
55251c0b2f7Stbbdev NumberOfLiveStorage = 0;
55351c0b2f7Stbbdev TestInterface(mode, ParallelScanGenericLambda());
55451c0b2f7Stbbdev // Test that all workers sleep when no work
55551c0b2f7Stbbdev TestCPUUserTime(concurrency_level);
55651c0b2f7Stbbdev
55751c0b2f7Stbbdev // Checking has to be done late, because when parallel_scan makes copies of
55851c0b2f7Stbbdev // the user's "Body", the copies might be destroyed slightly after parallel_scan
55951c0b2f7Stbbdev // returns.
56051c0b2f7Stbbdev CHECK(NumberOfLiveStorage == 0);
56151c0b2f7Stbbdev }
56251c0b2f7Stbbdev }
56351c0b2f7Stbbdev }
56451c0b2f7Stbbdev #endif /* __TBB_CPP14_GENERIC_LAMBDAS_PRESENT */
565478de5b1Stbbdev
566478de5b1Stbbdev #if __TBB_CPP20_CONCEPTS_PRESENT
567478de5b1Stbbdev //! \brief \ref error_guessing
568478de5b1Stbbdev TEST_CASE("parallel_scan constraints") {
569478de5b1Stbbdev test_pscan_range_constraints();
570478de5b1Stbbdev test_pscan_body_constraints();
571478de5b1Stbbdev test_pscan_func_constraints();
572478de5b1Stbbdev test_pscan_combine_constraints();
573478de5b1Stbbdev }
574478de5b1Stbbdev #endif // __TBB_CPP20_CONCEPTS_PRESENT
575