1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS
18 #include "common/test.h"
19 #include "common/utils.h"
20 #include "common/utils_report.h"
21 #include "common/test_invoke.h"
22
23 #include "oneapi/tbb/parallel_for.h"
24 #include "oneapi/tbb/tick_count.h"
25
26 #include "../tbb/test_partitioner.h"
27
28 #include <atomic>
29
30 //! \file conformance_parallel_for.cpp
31 //! \brief Test for [algorithms.parallel_for algorithms.auto_partitioner algorithms.simple_partitioner algorithms.static_partitioner algorithms.affinity_partitioner] specification
32
33 static const int N = 500;
34 static std::atomic<int> Array[N];
35
36 struct parallel_tag {};
37 struct empty_partitioner_tag {};
38
39 // Testing parallel_for with step support
40 const std::size_t PFOR_BUFFER_TEST_SIZE = 1024;
41 // test_buffer has some extra items beyond its right bound
42 const std::size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024;
43 size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE];
44
45 template<typename T>
46 class TestFunctor{
47 public:
operator ()(T index) const48 void operator ()(T index) const {
49 pfor_buffer[index]++;
50 }
51 };
52
53 static std::atomic<int> FooBodyCount;
54
55 // A range object whose only public members are those required by the Range concept.
56 template<size_t Pad>
57 class FooRange {
58 // Start of range
59 int start;
60
61 // Size of range
62 int size;
FooRange(int start_,int size_)63 FooRange( int start_, int size_ ) : start(start_), size(size_) {
64 utils::zero_fill<char>(pad, Pad);
65 pad[Pad-1] = 'x';
66 }
67 template<typename Flavor_, std::size_t Pad_> friend void Flog( );
68 template<size_t Pad_> friend class FooBody;
69 void operator&();
70
71 char pad[Pad];
72 public:
empty() const73 bool empty() const {return size==0;}
is_divisible() const74 bool is_divisible() const {return size>1;}
FooRange(FooRange & original,oneapi::tbb::split)75 FooRange( FooRange& original, oneapi::tbb::split ) : size(original.size/2) {
76 original.size -= size;
77 start = original.start+original.size;
78 CHECK_FAST( original.pad[Pad-1]=='x');
79 pad[Pad-1] = 'x';
80 }
81 };
82
83 // A range object whose only public members are those required by the parallel_for.h body concept.
84 template<size_t Pad>
85 class FooBody {
86 public:
~FooBody()87 ~FooBody() {
88 --FooBodyCount;
89 for( std::size_t i=0; i<sizeof(*this); ++i )
90 reinterpret_cast<char*>(this)[i] = -1;
91 }
92 // Copy constructor
FooBody(const FooBody & other)93 FooBody( const FooBody& other ) : array(other.array), state(other.state) {
94 ++FooBodyCount;
95 CHECK_FAST(state == LIVE);
96 }
operator ()(FooRange<Pad> & r) const97 void operator()( FooRange<Pad>& r ) const {
98 for (int k = r.start; k < r.start + r.size; ++k) {
99 CHECK_FAST(array[k].load(std::memory_order_relaxed) == 0);
100 array[k].store(1, std::memory_order_relaxed);
101 }
102 }
103 private:
104 const int LIVE = 0x1234;
105 std::atomic<int>* array;
106 int state;
107 friend class FooRange<Pad>;
108 template<typename Flavor_, std::size_t Pad_> friend void Flog( );
FooBody(std::atomic<int> * array_)109 FooBody( std::atomic<int>* array_ ) : array(array_), state(LIVE) {}
110 };
111
112 template <typename Flavor, typename Partitioner, typename Range, typename Body>
113 struct Invoker;
114
115 template <typename Range, typename Body>
116 struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> {
operator ()Invoker117 void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) {
118 oneapi::tbb::parallel_for( r, body );
119 }
120 };
121
122 template <typename Partitioner, typename Range, typename Body>
123 struct Invoker<parallel_tag, Partitioner, Range, Body> {
operator ()Invoker124 void operator()( const Range& r, const Body& body, Partitioner& p ) {
125 oneapi::tbb::parallel_for( r, body, p );
126 }
127 };
128
129 template <typename Flavor, typename Partitioner, typename T, typename Body>
130 struct InvokerStep;
131
132 template <typename T, typename Body>
133 struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> {
operator ()InvokerStep134 void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) {
135 oneapi::tbb::parallel_for( first, last, f );
136 }
operator ()InvokerStep137 void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) {
138 oneapi::tbb::parallel_for( first, last, step, f );
139 }
140 };
141
142 template <typename Partitioner, typename T, typename Body>
143 struct InvokerStep<parallel_tag, Partitioner, T, Body> {
operator ()InvokerStep144 void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) {
145 oneapi::tbb::parallel_for( first, last, f, p );
146 }
operator ()InvokerStep147 void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) {
148 oneapi::tbb::parallel_for( first, last, step, f, p );
149 }
150 };
151
152 template<typename Flavor, std::size_t Pad>
Flog()153 void Flog() {
154 for ( int i=0; i<N; ++i ) {
155 for ( int mode = 0; mode < 4; ++mode) {
156 FooRange<Pad> r( 0, i );
157 const FooRange<Pad> rc = r;
158 FooBody<Pad> f( Array );
159 const FooBody<Pad> fc = f;
160 for (int a_i = 0; a_i < N; a_i++) {
161 Array[a_i].store(0, std::memory_order_relaxed);
162 }
163 FooBodyCount = 1;
164 switch (mode) {
165 case 0: {
166 empty_partitioner_tag p;
167 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for;
168 invoke_for( rc, fc, p );
169 }
170 break;
171 case 1: {
172 Invoker< Flavor, const oneapi::tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
173 invoke_for( rc, fc, oneapi::tbb::simple_partitioner() );
174 }
175 break;
176 case 2: {
177 Invoker< Flavor, const oneapi::tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
178 invoke_for( rc, fc, oneapi::tbb::auto_partitioner() );
179 }
180 break;
181 case 3: {
182 static oneapi::tbb::affinity_partitioner affinity;
183 Invoker< Flavor, oneapi::tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for;
184 invoke_for( rc, fc, affinity );
185 }
186 break;
187 }
188 CHECK(std::find_if_not(Array, Array + i, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 1; }) == Array + i);
189 CHECK(std::find_if_not(Array + i, Array + N, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 0; }) == Array + N);
190 CHECK(FooBodyCount == 1);
191 }
192 }
193 }
194
195 #include <stdexcept> // std::invalid_argument
196
197 template <typename Flavor, typename T, typename Partitioner>
TestParallelForWithStepSupportHelper(Partitioner & p)198 void TestParallelForWithStepSupportHelper(Partitioner& p) {
199 const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE);
200 const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE);
201 // Testing parallel_for with different step values
202 InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for;
203 for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) {
204 T step;
205 for (step = 1; step < pfor_buffer_test_size; step++) {
206 std::memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(std::size_t));
207 if (step == 1){
208 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p);
209 } else {
210 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p);
211 }
212 // Verifying that parallel_for processed all items it should
213 for (T i = begin; i < pfor_buffer_test_size; i = i + step) {
214 if (pfor_buffer[i] != 1) {
215 CHECK_MESSAGE(false, "parallel_for didn't process all required elements");
216 }
217 pfor_buffer[i] = 0;
218 }
219 // Verifying that no extra items were processed and right bound of array wasn't crossed
220 for (T i = 0; i < pfor_buffer_actual_size; i++) {
221 if (pfor_buffer[i] != 0) {
222 CHECK_MESSAGE(false, "parallel_for processed an extra element");
223 }
224 }
225 }
226 }
227 }
228
229 template <typename Flavor, typename T>
TestParallelForWithStepSupport()230 void TestParallelForWithStepSupport() {
231 static oneapi::tbb::affinity_partitioner affinity_p;
232 oneapi::tbb::auto_partitioner auto_p;
233 oneapi::tbb::simple_partitioner simple_p;
234 oneapi::tbb::static_partitioner static_p;
235 empty_partitioner_tag p;
236
237 // Try out all partitioner combinations
238 TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p);
239 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::auto_partitioner >(auto_p);
240 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::simple_partitioner >(simple_p);
241 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::affinity_partitioner >(affinity_p);
242 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::static_partitioner >(static_p);
243
244 // Testing some corner cases
245 oneapi::tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>());
246 }
247
248 #if __TBB_CPP17_INVOKE_PRESENT
249 class SmartIndex {
250 public:
SmartIndex(std::size_t ri)251 SmartIndex(std::size_t ri) : real_index(ri), change_vector(nullptr) {}
SmartIndex(std::size_t ri,std::vector<std::size_t> & cv)252 SmartIndex(std::size_t ri, std::vector<std::size_t>& cv)
253 : real_index(ri), change_vector(&cv) {}
SmartIndex(const SmartIndex & other)254 SmartIndex(const SmartIndex& other) : real_index(other.real_index),
255 change_vector(other.change_vector) {}
256 ~SmartIndex() = default;
257
operator =(const SmartIndex & other)258 SmartIndex& operator=(const SmartIndex& other) {
259 real_index = other.real_index;
260 change_vector = other.change_vector;
261 return *this;
262 }
263
operator <(const SmartIndex & other) const264 bool operator<(const SmartIndex& other) const {
265 return real_index < other.real_index;
266 }
267
operator <=(const SmartIndex & other) const268 bool operator<=(const SmartIndex& other) const {
269 return real_index <= other.real_index;
270 }
271
operator /(const SmartIndex & other) const272 SmartIndex operator/(const SmartIndex& other) const {
273 return {real_index / other.real_index, *change_vector};
274 }
275
operator *(const SmartIndex & other) const276 SmartIndex operator*(const SmartIndex& other) const {
277 return {real_index * other.real_index, *change_vector};
278 }
279
operator +(const SmartIndex & other) const280 SmartIndex operator+(const SmartIndex& other) const {
281 return {real_index + other.real_index, *change_vector};
282 }
283
operator +=(const SmartIndex & other)284 SmartIndex& operator+=(const SmartIndex& other) {
285 real_index += other.real_index;
286 return *this;
287 }
288
operator ++()289 SmartIndex& operator++() { ++real_index; return *this; }
290
operator -(const SmartIndex & other) const291 std::size_t operator-(const SmartIndex& other) const {
292 return real_index - other.real_index;
293 }
294
operator +(std::size_t k)295 SmartIndex operator+(std::size_t k) {
296 return {real_index + k, *change_vector};
297 }
298
increase() const299 void increase() const {
300 CHECK(change_vector);
301 ++(*change_vector)[real_index];
302 }
303 private:
304 std::size_t real_index;
305 std::vector<std::size_t>* change_vector;
306 };
307
test_pfor_body_invoke()308 void test_pfor_body_invoke() {
309 const std::size_t number_of_overloads = 5;
310 const std::size_t iterations = 100000;
311
312 using range_type = test_invoke::SmartRange<std::size_t>;
313 std::vector<std::size_t> change_vector(iterations, 0);
314 range_type range{0, iterations, change_vector};
315
316 oneapi::tbb::parallel_for(range, &range_type::increase);
317 oneapi::tbb::parallel_for(range, &range_type::increase, oneapi::tbb::simple_partitioner());
318 oneapi::tbb::parallel_for(range, &range_type::increase, oneapi::tbb::auto_partitioner());
319 oneapi::tbb::parallel_for(range, &range_type::increase, oneapi::tbb::static_partitioner());
320 oneapi::tbb::affinity_partitioner aff;
321 oneapi::tbb::parallel_for(range, &range_type::increase, aff);
322
323 for (std::size_t item : change_vector) {
324 CHECK(item == number_of_overloads);
325 }
326 }
327
328
test_pfor_func_invoke()329 void test_pfor_func_invoke() {
330 const std::size_t number_of_overloads = 5;
331 const std::size_t iterations = 100000;
332
333 std::vector<std::size_t> change_vector(iterations, 0);
334 SmartIndex first{0, change_vector};
335 SmartIndex last{iterations, change_vector};
336 SmartIndex stride{2};
337
338 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase);
339 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, oneapi::tbb::simple_partitioner());
340 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, oneapi::tbb::auto_partitioner());
341 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, oneapi::tbb::static_partitioner());
342 oneapi::tbb::affinity_partitioner aff;
343 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, aff);
344
345 for (std::size_t& item : change_vector) {
346 CHECK(item == number_of_overloads);
347 item = 0;
348 }
349
350 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase);
351 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, oneapi::tbb::simple_partitioner());
352 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, oneapi::tbb::auto_partitioner());
353 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, oneapi::tbb::static_partitioner());
354 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, aff);
355
356 CHECK(change_vector[0] == number_of_overloads);
357 for (std::size_t i = 1; i < iterations; ++i) {
358 std::size_t expected = change_vector[i - 1] == 0 ? number_of_overloads : 0;
359 CHECK(change_vector[i] == expected);
360 }
361 }
362 #endif // __TBB_CPP17_INVOKE_PRESENT
363
364 //! Test simple parallel_for with different partitioners
365 //! \brief \ref interface \ref requirement
366 TEST_CASE("Basic parallel_for") {
367 std::atomic<unsigned long> counter{};
368 const std::size_t number_of_partitioners = 5;
369 const std::size_t iterations = 100000;
370
__anondcc278fb0302(std::size_t) 371 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
372 counter++;
373 });
374
__anondcc278fb0402(std::size_t) 375 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
376 counter++;
377 }, oneapi::tbb::simple_partitioner());
378
__anondcc278fb0502(std::size_t) 379 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
380 counter++;
381 }, oneapi::tbb::auto_partitioner());
382
__anondcc278fb0602(std::size_t) 383 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
384 counter++;
385 }, oneapi::tbb::static_partitioner());
386
387 oneapi::tbb::affinity_partitioner aff;
__anondcc278fb0702(std::size_t) 388 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) {
389 counter++;
390 }, aff);
391
392 CHECK_EQ(counter.load(std::memory_order_relaxed), iterations * number_of_partitioners);
393 }
394
395 //! Testing parallel for with different partitioners and ranges ranges
396 //! \brief \ref interface \ref requirement \ref stress
397 TEST_CASE("Flog test") {
398 Flog<parallel_tag, 1>();
399 Flog<parallel_tag, 10>();
400 Flog<parallel_tag, 100>();
401 Flog<parallel_tag, 1000>();
402 Flog<parallel_tag, 10000>();
403 }
404
405 //! Testing parallel for with different types and step
406 //! \brief \ref interface \ref requirement
407 TEST_CASE_TEMPLATE("parallel_for with step support", T, short, unsigned short, int, unsigned int,
408 long, unsigned long, long long, unsigned long long, std::size_t) {
409 // Testing with different integer types
410 TestParallelForWithStepSupport<parallel_tag, T>();
411 }
412
413 //! Testing with different types of ranges and partitioners
414 //! \brief \ref interface \ref requirement
415 TEST_CASE("Testing parallel_for with partitioners") {
416 using namespace test_partitioner_utils::interaction_with_range_and_partitioner;
417
418 test_partitioner_utils::SimpleBody b;
419 oneapi::tbb::affinity_partitioner ap;
420
421 parallel_for(Range1(true, false), b, ap);
422 parallel_for(Range6(false, true), b, ap);
423
424 parallel_for(Range1(false, true), b, oneapi::tbb::simple_partitioner());
425 parallel_for(Range6(false, true), b, oneapi::tbb::simple_partitioner());
426
427 parallel_for(Range1(false, true), b, oneapi::tbb::auto_partitioner());
428 parallel_for(Range6(false, true), b, oneapi::tbb::auto_partitioner());
429
430 parallel_for(Range1(true, false), b, oneapi::tbb::static_partitioner());
431 parallel_for(Range6(false, true), b, oneapi::tbb::static_partitioner());
432 }
433
434 #if __TBB_CPP17_INVOKE_PRESENT
435 //! Test that parallel_for uses std::invoke to run body and function
436 //! \brief \ref interface \ref requirement
437 TEST_CASE("parallel_for and std::invoke") {
438 test_pfor_body_invoke();
439 test_pfor_func_invoke();
440 }
441 #endif
442