1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS
18 #include "common/test.h"
19 #include "common/utils.h"
20 #include "common/utils_report.h"
21 
22 #include "oneapi/tbb/parallel_for.h"
23 #include "oneapi/tbb/tick_count.h"
24 
25 #include "../tbb/test_partitioner.h"
26 
27 #include <atomic>
28 
29 //! \file conformance_parallel_for.cpp
30 //! \brief Test for [algorithms.parallel_for algorithms.auto_partitioner algorithms.simple_partitioner algorithms.static_partitioner algorithms.affinity_partitioner] specification
31 
32 static const int N = 500;
33 static std::atomic<int> Array[N];
34 
35 struct parallel_tag {};
36 struct empty_partitioner_tag {};
37 
38 // Testing parallel_for with step support
39 const std::size_t PFOR_BUFFER_TEST_SIZE = 1024;
40 // test_buffer has some extra items beyond its right bound
41 const std::size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024;
42 size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE];
43 
44 template<typename T>
45 class TestFunctor{
46 public:
47     void operator ()(T index) const {
48         pfor_buffer[index]++;
49     }
50 };
51 
52 static std::atomic<int> FooBodyCount;
53 
54 // A range object whose only public members are those required by the Range concept.
55 template<size_t Pad>
56 class FooRange {
57     // Start of range
58     int start;
59 
60     // Size of range
61     int size;
62     FooRange( int start_, int size_ ) : start(start_), size(size_) {
63         utils::zero_fill<char>(pad, Pad);
64         pad[Pad-1] = 'x';
65     }
66     template<typename Flavor_, std::size_t Pad_> friend void Flog( );
67     template<size_t Pad_> friend class FooBody;
68     void operator&();
69 
70     char pad[Pad];
71 public:
72     bool empty() const {return size==0;}
73     bool is_divisible() const {return size>1;}
74     FooRange( FooRange& original, oneapi::tbb::split ) : size(original.size/2) {
75         original.size -= size;
76         start = original.start+original.size;
77         CHECK( original.pad[Pad-1]=='x');
78         pad[Pad-1] = 'x';
79     }
80 };
81 
82 // A range object whose only public members are those required by the parallel_for.h body concept.
83 template<size_t Pad>
84 class FooBody {
85 public:
86     ~FooBody() {
87         --FooBodyCount;
88         for( std::size_t i=0; i<sizeof(*this); ++i )
89             reinterpret_cast<char*>(this)[i] = -1;
90     }
91     // Copy constructor
92     FooBody( const FooBody& other ) : array(other.array), state(other.state) {
93         ++FooBodyCount;
94         CHECK(state == LIVE);
95     }
96     void operator()( FooRange<Pad>& r ) const {
97         for( int k=0; k<r.size; ++k ) {
98             const int i = array[r.start+k]++;
99             CHECK( i==0 );
100         }
101     }
102 private:
103     const int LIVE = 0x1234;
104     std::atomic<int>* array;
105     int state;
106     friend class FooRange<Pad>;
107     template<typename Flavor_, std::size_t Pad_> friend void Flog( );
108     FooBody( std::atomic<int>* array_ ) : array(array_), state(LIVE) {}
109 };
110 
111 template <typename Flavor, typename Partitioner, typename Range, typename Body>
112 struct Invoker;
113 
114 template <typename Range, typename Body>
115 struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> {
116     void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
117         oneapi::tbb::parallel_for( r, body );
118     }
119 };
120 
121 template <typename Partitioner, typename Range, typename Body>
122 struct Invoker<parallel_tag, Partitioner, Range, Body> {
123     void operator()( const Range& r, const Body& body, Partitioner& p ) {
124         oneapi::tbb::parallel_for( r, body, p );
125     }
126 };
127 
128 template <typename Flavor, typename Partitioner, typename T, typename Body>
129 struct InvokerStep;
130 
131 template <typename T, typename Body>
132 struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> {
133     void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
134         oneapi::tbb::parallel_for( first, last, f );
135     }
136     void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
137         oneapi::tbb::parallel_for( first, last, step, f );
138     }
139 };
140 
141 template <typename Partitioner, typename T, typename Body>
142 struct InvokerStep<parallel_tag, Partitioner, T, Body> {
143     void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
144         oneapi::tbb::parallel_for( first, last, f, p );
145     }
146     void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
147         oneapi::tbb::parallel_for( first, last, step, f, p );
148     }
149 };
150 
151 template<typename Flavor, std::size_t Pad>
152 void Flog() {
153     for ( int i=0; i<N; ++i ) {
154         for ( int mode = 0; mode < 4; ++mode) {
155             FooRange<Pad> r( 0, i );
156             const FooRange<Pad> rc = r;
157             FooBody<Pad> f( Array );
158             const FooBody<Pad> fc = f;
159             for (int a_i = 0; a_i < N; a_i++) {
160                 Array[a_i].store(0, std::memory_order_relaxed);
161             }
162             FooBodyCount = 1;
163             switch (mode) {
164             case 0: {
165                 empty_partitioner_tag p;
166                 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for;
167                 invoke_for( rc, fc, p );
168             }
169                 break;
170             case 1: {
171                 Invoker< Flavor, const oneapi::tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
172                 invoke_for( rc, fc, oneapi::tbb::simple_partitioner() );
173             }
174                 break;
175             case 2: {
176                 Invoker< Flavor, const oneapi::tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
177                 invoke_for( rc, fc, oneapi::tbb::auto_partitioner() );
178             }
179                 break;
180             case 3: {
181                 static oneapi::tbb::affinity_partitioner affinity;
182                 Invoker< Flavor, oneapi::tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
183                 invoke_for( rc, fc, affinity );
184             }
185                 break;
186             }
187             for( int j=0; j<i; ++j )
188                 CHECK( Array[j]==1);
189             for( int j=i; j<N; ++j )
190                 CHECK( Array[j]==0);
191             CHECK( FooBodyCount==1);
192         }
193     }
194 }
195 
196 #include <stdexcept> // std::invalid_argument
197 
198 template <typename Flavor, typename T, typename Partitioner>
199 void TestParallelForWithStepSupportHelper(Partitioner& p) {
200     const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE);
201     const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE);
202     // Testing parallel_for with different step values
203     InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for;
204     for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) {
205         T step;
206         for (step = 1; step < pfor_buffer_test_size; step++) {
207             std::memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(std::size_t));
208             if (step == 1){
209                 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p);
210             } else {
211                 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p);
212             }
213             // Verifying that parallel_for processed all items it should
214             for (T i = begin; i < pfor_buffer_test_size; i = i + step) {
215                 if (pfor_buffer[i] != 1) {
216                     CHECK_MESSAGE(false, "parallel_for didn't process all required elements");
217                 }
218                 pfor_buffer[i] = 0;
219             }
220             // Verifying that no extra items were processed and right bound of array wasn't crossed
221             for (T i = 0; i < pfor_buffer_actual_size; i++) {
222                 if (pfor_buffer[i] != 0) {
223                     CHECK_MESSAGE(false, "parallel_for processed an extra element");
224                 }
225             }
226         }
227     }
228 }
229 
230 template <typename Flavor, typename T>
231 void TestParallelForWithStepSupport() {
232     static oneapi::tbb::affinity_partitioner affinity_p;
233     oneapi::tbb::auto_partitioner auto_p;
234     oneapi::tbb::simple_partitioner simple_p;
235     oneapi::tbb::static_partitioner static_p;
236     empty_partitioner_tag p;
237 
238     // Try out all partitioner combinations
239     TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p);
240     TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::auto_partitioner >(auto_p);
241     TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::simple_partitioner >(simple_p);
242     TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::affinity_partitioner >(affinity_p);
243     TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::static_partitioner >(static_p);
244 
245     // Testing some corner cases
246     oneapi::tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>());
247 }
248 
249 //! Test simple parallel_for with different partitioners
250 //! \brief \ref interface \ref requirement
251 TEST_CASE("Basic parallel_for") {
252     std::atomic<unsigned long> counter{};
253     const std::size_t number_of_partitioners = 5;
254     const std::size_t iterations = 100000;
255 
256     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
257         counter++;
258     });
259 
260     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
261         counter++;
262     }, oneapi::tbb::simple_partitioner());
263 
264     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
265         counter++;
266     }, oneapi::tbb::auto_partitioner());
267 
268     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
269         counter++;
270     }, oneapi::tbb::static_partitioner());
271 
272     oneapi::tbb::affinity_partitioner aff;
273     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
274         counter++;
275     }, aff);
276 
277     CHECK_EQ(counter.load(std::memory_order_relaxed), iterations * number_of_partitioners);
278 }
279 
280 //! Testing parallel for with different partitioners and ranges ranges
281 //! \brief \ref interface \ref requirement \ref stress
282 TEST_CASE("Flog test") {
283     Flog<parallel_tag, 1>();
284     Flog<parallel_tag, 10>();
285     Flog<parallel_tag, 100>();
286     Flog<parallel_tag, 1000>();
287     Flog<parallel_tag, 10000>();
288 }
289 
290 //! Testing parallel for with different types and step
291 //! \brief \ref interface \ref requirement
292 TEST_CASE_TEMPLATE("parallel_for with step support", T, short, unsigned short, int, unsigned int,
293                                     long, unsigned long, long long, unsigned long long, std::size_t) {
294     // Testing with different integer types
295     TestParallelForWithStepSupport<parallel_tag, T>();
296 }
297 
298 //! Testing with different types of ranges and partitioners
299 //! \brief \ref interface \ref requirement
300 TEST_CASE("Testing parallel_for with partitioners") {
301     using namespace test_partitioner_utils::interaction_with_range_and_partitioner;
302 
303     test_partitioner_utils::SimpleBody b;
304     oneapi::tbb::affinity_partitioner ap;
305 
306     parallel_for(Range1(true, false), b, ap);
307     parallel_for(Range6(false, true), b, ap);
308 
309     parallel_for(Range1(false, true), b, oneapi::tbb::simple_partitioner());
310     parallel_for(Range6(false, true), b, oneapi::tbb::simple_partitioner());
311 
312     parallel_for(Range1(false, true), b, oneapi::tbb::auto_partitioner());
313     parallel_for(Range6(false, true), b, oneapi::tbb::auto_partitioner());
314 
315     parallel_for(Range1(true, false), b, oneapi::tbb::static_partitioner());
316     parallel_for(Range6(false, true), b, oneapi::tbb::static_partitioner());
317 }
318