1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS
18 #include "common/test.h"
19 #include "common/utils.h"
20 #include "common/utils_report.h"
21 
22 #include "oneapi/tbb/parallel_for.h"
23 #include "oneapi/tbb/tick_count.h"
24 
25 #include "../tbb/test_partitioner.h"
26 
27 #include <atomic>
28 
29 //! \file conformance_parallel_for.cpp
30 //! \brief Test for [algorithms.parallel_for algorithms.auto_partitioner algorithms.simple_partitioner algorithms.static_partitioner algorithms.affinity_partitioner] specification
31 
32 static const int N = 500;
33 static std::atomic<int> Array[N];
34 
35 struct parallel_tag {};
36 struct empty_partitioner_tag {};
37 
38 // Testing parallel_for with step support
39 const std::size_t PFOR_BUFFER_TEST_SIZE = 1024;
40 // test_buffer has some extra items beyond its right bound
41 const std::size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024;
42 size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE];
43 
44 template<typename T>
45 class TestFunctor{
46 public:
47     void operator ()(T index) const {
48         pfor_buffer[index]++;
49     }
50 };
51 
52 static std::atomic<int> FooBodyCount;
53 
54 // A range object whose only public members are those required by the Range concept.
55 template<size_t Pad>
56 class FooRange {
57     // Start of range
58     int start;
59 
60     // Size of range
61     int size;
62     FooRange( int start_, int size_ ) : start(start_), size(size_) {
63         utils::zero_fill<char>(pad, Pad);
64         pad[Pad-1] = 'x';
65     }
66     template<typename Flavor_, std::size_t Pad_> friend void Flog( );
67     template<size_t Pad_> friend class FooBody;
68     void operator&();
69 
70     char pad[Pad];
71 public:
72     bool empty() const {return size==0;}
73     bool is_divisible() const {return size>1;}
74     FooRange( FooRange& original, oneapi::tbb::split ) : size(original.size/2) {
75         original.size -= size;
76         start = original.start+original.size;
77         CHECK_FAST( original.pad[Pad-1]=='x');
78         pad[Pad-1] = 'x';
79     }
80 };
81 
82 // A range object whose only public members are those required by the parallel_for.h body concept.
83 template<size_t Pad>
84 class FooBody {
85 public:
86     ~FooBody() {
87         --FooBodyCount;
88         for( std::size_t i=0; i<sizeof(*this); ++i )
89             reinterpret_cast<char*>(this)[i] = -1;
90     }
91     // Copy constructor
92     FooBody( const FooBody& other ) : array(other.array), state(other.state) {
93         ++FooBodyCount;
94         CHECK_FAST(state == LIVE);
95     }
96     void operator()( FooRange<Pad>& r ) const {
97         for (int k = r.start; k < r.start + r.size; ++k) {
98             CHECK_FAST(array[k].load(std::memory_order_relaxed) == 0);
99             array[k].store(1, std::memory_order_relaxed);
100         }
101     }
102 private:
103     const int LIVE = 0x1234;
104     std::atomic<int>* array;
105     int state;
106     friend class FooRange<Pad>;
107     template<typename Flavor_, std::size_t Pad_> friend void Flog( );
108     FooBody( std::atomic<int>* array_ ) : array(array_), state(LIVE) {}
109 };
110 
111 template <typename Flavor, typename Partitioner, typename Range, typename Body>
112 struct Invoker;
113 
114 template <typename Range, typename Body>
115 struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> {
116     void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
117         oneapi::tbb::parallel_for( r, body );
118     }
119 };
120 
121 template <typename Partitioner, typename Range, typename Body>
122 struct Invoker<parallel_tag, Partitioner, Range, Body> {
123     void operator()( const Range& r, const Body& body, Partitioner& p ) {
124         oneapi::tbb::parallel_for( r, body, p );
125     }
126 };
127 
128 template <typename Flavor, typename Partitioner, typename T, typename Body>
129 struct InvokerStep;
130 
131 template <typename T, typename Body>
132 struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> {
133     void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
134         oneapi::tbb::parallel_for( first, last, f );
135     }
136     void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
137         oneapi::tbb::parallel_for( first, last, step, f );
138     }
139 };
140 
141 template <typename Partitioner, typename T, typename Body>
142 struct InvokerStep<parallel_tag, Partitioner, T, Body> {
143     void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
144         oneapi::tbb::parallel_for( first, last, f, p );
145     }
146     void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
147         oneapi::tbb::parallel_for( first, last, step, f, p );
148     }
149 };
150 
151 template<typename Flavor, std::size_t Pad>
152 void Flog() {
153     for ( int i=0; i<N; ++i ) {
154         for ( int mode = 0; mode < 4; ++mode) {
155             FooRange<Pad> r( 0, i );
156             const FooRange<Pad> rc = r;
157             FooBody<Pad> f( Array );
158             const FooBody<Pad> fc = f;
159             for (int a_i = 0; a_i < N; a_i++) {
160                 Array[a_i].store(0, std::memory_order_relaxed);
161             }
162             FooBodyCount = 1;
163             switch (mode) {
164             case 0: {
165                 empty_partitioner_tag p;
166                 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for;
167                 invoke_for( rc, fc, p );
168             }
169                 break;
170             case 1: {
171                 Invoker< Flavor, const oneapi::tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
172                 invoke_for( rc, fc, oneapi::tbb::simple_partitioner() );
173             }
174                 break;
175             case 2: {
176                 Invoker< Flavor, const oneapi::tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
177                 invoke_for( rc, fc, oneapi::tbb::auto_partitioner() );
178             }
179                 break;
180             case 3: {
181                 static oneapi::tbb::affinity_partitioner affinity;
182                 Invoker< Flavor, oneapi::tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
183                 invoke_for( rc, fc, affinity );
184             }
185                 break;
186             }
187             CHECK(std::find_if_not(Array, Array + i, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 1; }) == Array + i);
188             CHECK(std::find_if_not(Array + i, Array + N, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 0; }) == Array + N);
189             CHECK(FooBodyCount == 1);
190         }
191     }
192 }
193 
194 #include <stdexcept> // std::invalid_argument
195 
196 template <typename Flavor, typename T, typename Partitioner>
197 void TestParallelForWithStepSupportHelper(Partitioner& p) {
198     const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE);
199     const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE);
200     // Testing parallel_for with different step values
201     InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for;
202     for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) {
203         T step;
204         for (step = 1; step < pfor_buffer_test_size; step++) {
205             std::memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(std::size_t));
206             if (step == 1){
207                 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p);
208             } else {
209                 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p);
210             }
211             // Verifying that parallel_for processed all items it should
212             for (T i = begin; i < pfor_buffer_test_size; i = i + step) {
213                 if (pfor_buffer[i] != 1) {
214                     CHECK_MESSAGE(false, "parallel_for didn't process all required elements");
215                 }
216                 pfor_buffer[i] = 0;
217             }
218             // Verifying that no extra items were processed and right bound of array wasn't crossed
219             for (T i = 0; i < pfor_buffer_actual_size; i++) {
220                 if (pfor_buffer[i] != 0) {
221                     CHECK_MESSAGE(false, "parallel_for processed an extra element");
222                 }
223             }
224         }
225     }
226 }
227 
228 template <typename Flavor, typename T>
229 void TestParallelForWithStepSupport() {
230     static oneapi::tbb::affinity_partitioner affinity_p;
231     oneapi::tbb::auto_partitioner auto_p;
232     oneapi::tbb::simple_partitioner simple_p;
233     oneapi::tbb::static_partitioner static_p;
234     empty_partitioner_tag p;
235 
236     // Try out all partitioner combinations
237     TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p);
238     TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::auto_partitioner >(auto_p);
239     TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::simple_partitioner >(simple_p);
240     TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::affinity_partitioner >(affinity_p);
241     TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::static_partitioner >(static_p);
242 
243     // Testing some corner cases
244     oneapi::tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>());
245 }
246 
247 //! Test simple parallel_for with different partitioners
248 //! \brief \ref interface \ref requirement
249 TEST_CASE("Basic parallel_for") {
250     std::atomic<unsigned long> counter{};
251     const std::size_t number_of_partitioners = 5;
252     const std::size_t iterations = 100000;
253 
254     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
255         counter++;
256     });
257 
258     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
259         counter++;
260     }, oneapi::tbb::simple_partitioner());
261 
262     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
263         counter++;
264     }, oneapi::tbb::auto_partitioner());
265 
266     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
267         counter++;
268     }, oneapi::tbb::static_partitioner());
269 
270     oneapi::tbb::affinity_partitioner aff;
271     oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
272         counter++;
273     }, aff);
274 
275     CHECK_EQ(counter.load(std::memory_order_relaxed), iterations * number_of_partitioners);
276 }
277 
278 //! Testing parallel for with different partitioners and ranges ranges
279 //! \brief \ref interface \ref requirement \ref stress
280 TEST_CASE("Flog test") {
281     Flog<parallel_tag, 1>();
282     Flog<parallel_tag, 10>();
283     Flog<parallel_tag, 100>();
284     Flog<parallel_tag, 1000>();
285     Flog<parallel_tag, 10000>();
286 }
287 
288 //! Testing parallel for with different types and step
289 //! \brief \ref interface \ref requirement
290 TEST_CASE_TEMPLATE("parallel_for with step support", T, short, unsigned short, int, unsigned int,
291                                     long, unsigned long, long long, unsigned long long, std::size_t) {
292     // Testing with different integer types
293     TestParallelForWithStepSupport<parallel_tag, T>();
294 }
295 
296 //! Testing with different types of ranges and partitioners
297 //! \brief \ref interface \ref requirement
298 TEST_CASE("Testing parallel_for with partitioners") {
299     using namespace test_partitioner_utils::interaction_with_range_and_partitioner;
300 
301     test_partitioner_utils::SimpleBody b;
302     oneapi::tbb::affinity_partitioner ap;
303 
304     parallel_for(Range1(true, false), b, ap);
305     parallel_for(Range6(false, true), b, ap);
306 
307     parallel_for(Range1(false, true), b, oneapi::tbb::simple_partitioner());
308     parallel_for(Range6(false, true), b, oneapi::tbb::simple_partitioner());
309 
310     parallel_for(Range1(false, true), b, oneapi::tbb::auto_partitioner());
311     parallel_for(Range6(false, true), b, oneapi::tbb::auto_partitioner());
312 
313     parallel_for(Range1(true, false), b, oneapi::tbb::static_partitioner());
314     parallel_for(Range6(false, true), b, oneapi::tbb::static_partitioner());
315 }
316