1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #if _MSC_VER
18 #if __INTEL_COMPILER
19     #pragma warning(disable : 2586) // decorated name length exceeded, name was truncated
20 #else
21     // Workaround for vs2015 and warning name was longer than the compiler limit (4096).
22     #pragma warning (disable: 4503)
23 #endif
24 #endif
25 
26 #include "common/test.h"
27 #include "common/utils.h"
28 #include "common/utils_report.h"
29 #include "common/utils_concurrency_limit.h"
30 #include "common/spin_barrier.h"
31 #include "common/checktype.h"
32 #include "common/test_comparisons.h"
33 
34 #include "oneapi/tbb/detail/_utils.h"
35 #include "oneapi/tbb/enumerable_thread_specific.h"
36 #include "oneapi/tbb/parallel_for.h"
37 #include "oneapi/tbb/parallel_reduce.h"
38 #include "oneapi/tbb/parallel_invoke.h"
39 #include "oneapi/tbb/blocked_range.h"
40 #include "oneapi/tbb/tbb_allocator.h"
41 #include "oneapi/tbb/global_control.h"
42 #include "oneapi/tbb/cache_aligned_allocator.h"
43 
44 #include <cstring>
45 #include <cstdio>
46 #include <vector>
47 #include <numeric>
48 #include <utility>
49 #include <atomic>
50 
51 //! \file conformance_enumerable_thread_specific.cpp
52 //! \brief Test for [tls.enumerable_thread_specific tls.flattened2d] specification
53 
54 //------------------------------------------------------------------------------------------------------
55 // Utility types/classes/functions
56 //------------------------------------------------------------------------------------------------------
57 
58 //! Minimum number of threads
59 static int MinThread = 1;
60 
61 //! Maximum number of threads
62 static int MaxThread = 4;
63 
64 static std::atomic<int> construction_counter;
65 static std::atomic<int> destruction_counter;
66 
67 const int REPETITIONS = 5;
68 const int N = 25000;
69 const int RANGE_MIN = 5000;
70 const double EXPECTED_SUM = (REPETITIONS + 1) * N;
71 
72 //! A minimal class that occupies N bytes.
73 /** Defines default and copy constructor, and allows implicit operator&. Hides operator=. */
74 template<size_t N = oneapi::tbb::detail::max_nfs_size>
75 class minimalNComparable: utils::NoAssign {
76 private:
77     int my_value;
78     bool is_constructed;
79     char pad[N-sizeof(int) - sizeof(bool)];
80 public:
81     minimalNComparable() : utils::NoAssign(), my_value(0) { ++construction_counter; is_constructed = true; }
82     minimalNComparable( const minimalNComparable &m ) : utils::NoAssign(), my_value(m.my_value) { ++construction_counter; is_constructed = true; }
83     ~minimalNComparable() { ++destruction_counter; CHECK_FAST(is_constructed); is_constructed = false; }
84     void set_value( const int i ) { CHECK_FAST(is_constructed); my_value = i; }
85     int value( ) const { CHECK_FAST(is_constructed); return my_value; }
86 
87     bool operator==( const minimalNComparable& other ) const { return my_value == other.my_value; }
88 };
89 
90 static size_t AlignMask = 0;  // set to cache-line-size - 1
91 
92 template<typename T>
93 T& check_alignment(T& t, const char *aname) {
94     if( !oneapi::tbb::detail::is_aligned(&t, AlignMask)) {
95         // TBB_REVAMP_TODO: previously was REPORT_ONCE
96         REPORT("alignment error with %s allocator (%x)\n", aname, (int)size_t(&t) & (AlignMask-1));
97     }
98     return t;
99 }
100 
101 template<typename T>
102 const T& check_alignment(const T& t, const char *aname) {
103     if( !oneapi::tbb::detail::is_aligned(&t, AlignMask)) {
104         // TBB_REVAMP_TODO: previously was REPORT_ONCE
105         REPORT("alignment error with %s allocator (%x)\n", aname, (int)size_t(&t) & (AlignMask-1));
106     }
107     return t;
108 }
109 
110 // Test constructors which throw.  If an ETS constructor throws before completion,
111 // the already-built objects are un-constructed.  Do not call the destructor if
112 // this occurs.
113 
114 static std::atomic<int> gThrowValue;
115 static int targetThrowValue = 3;
116 
117 class Thrower {
118 public:
119     Thrower() {
120 #if TBB_USE_EXCEPTIONS
121         if(++gThrowValue == targetThrowValue) {
122             throw std::bad_alloc();
123         }
124 #endif
125     }
126 };
127 
128 // MyThrower field of ThrowingConstructor will throw after a certain number of
129 // construction calls.  The constructor unwinder wshould unconstruct the instance
130 // of check_type<int> that was constructed just before.
131 class ThrowingConstructor {
132     CheckType<int> m_checktype;
133     Thrower m_throwing_field;
134 public:
135     int m_cnt;
136     ThrowingConstructor() : m_checktype(), m_throwing_field() { m_cnt = 0;}
137 
138     bool operator==( const ThrowingConstructor& other ) const { return m_cnt == other.m_cnt; }
139 private:
140 };
141 
142 //
143 // A helper class that simplifies writing the tests since minimalNComparable does not
144 // define = or + operators.
145 //
146 
147 template< typename T >
148 struct test_helper {
149    static inline void init(T &e) { e = static_cast<T>(0); }
150    static inline void sum(T &e, const int addend ) { e += static_cast<T>(addend); }
151    static inline void sum(T &e, const double addend ) { e += static_cast<T>(addend); }
152    static inline void set(T &e, const int value ) { e = static_cast<T>(value); }
153    static inline double get(const T &e ) { return static_cast<double>(e); }
154 };
155 
156 template<size_t N>
157 struct test_helper<minimalNComparable<N> > {
158    static inline void init(minimalNComparable<N> &sum) { sum.set_value( 0 ); }
159    static inline void sum(minimalNComparable<N> &sum, const int addend ) { sum.set_value( sum.value() + addend); }
160    static inline void sum(minimalNComparable<N> &sum, const double addend ) { sum.set_value( sum.value() + static_cast<int>(addend)); }
161    static inline void sum(minimalNComparable<N> &sum, const minimalNComparable<N> &addend ) { sum.set_value( sum.value() + addend.value()); }
162    static inline void set(minimalNComparable<N> &v, const int value ) { v.set_value( static_cast<int>(value) ); }
163    static inline double get(const minimalNComparable<N> &sum ) { return static_cast<double>(sum.value()); }
164 };
165 
166 template<>
167 struct test_helper<ThrowingConstructor> {
168    static inline void init(ThrowingConstructor &sum) { sum.m_cnt = 0; }
169    static inline void sum(ThrowingConstructor &sum, const int addend ) { sum.m_cnt += addend; }
170    static inline void sum(ThrowingConstructor &sum, const double addend ) { sum.m_cnt += static_cast<int>(addend); }
171    static inline void sum(ThrowingConstructor &sum, const ThrowingConstructor &addend ) { sum.m_cnt += addend.m_cnt; }
172    static inline void set(ThrowingConstructor &v, const int value ) { v.m_cnt = static_cast<int>(value); }
173    static inline double get(const ThrowingConstructor &sum ) { return static_cast<double>(sum.m_cnt); }
174 };
175 
176 //! Tag class used to make certain constructors hard to invoke accidentally.
177 struct SecretTagType {} SecretTag;
178 
179 //// functors and routines for initialization and combine
180 
181 //! Counts instances of FunctorFinit
182 static std::atomic<int> FinitCounter;
183 
184 template <typename T, int Value>
185 struct FunctorFinit {
186     FunctorFinit( const FunctorFinit& ) {++FinitCounter;}
187     FunctorFinit( SecretTagType ) {++FinitCounter;}
188     ~FunctorFinit() {--FinitCounter;}
189     T operator()() { return Value; }
190 };
191 
192 template <int Value>
193 struct FunctorFinit<ThrowingConstructor,Value> {
194     FunctorFinit( const FunctorFinit& ) {++FinitCounter;}
195     FunctorFinit( SecretTagType ) {++FinitCounter;}
196     ~FunctorFinit() {--FinitCounter;}
197     ThrowingConstructor operator()() { ThrowingConstructor temp; temp.m_cnt = Value; return temp; }
198 };
199 
200 template <size_t N, int Value>
201 struct FunctorFinit<minimalNComparable<N>,Value> {
202     FunctorFinit( const FunctorFinit& ) {++FinitCounter;}
203     FunctorFinit( SecretTagType ) {++FinitCounter;}
204     ~FunctorFinit() {--FinitCounter;}
205     minimalNComparable<N> operator()() {
206         minimalNComparable<N> result;
207         result.set_value( Value );
208         return result;
209     }
210 };
211 
212 // Addition
213 
214 template <typename T>
215 struct FunctorAddCombineRef {
216     T operator()(const T& left, const T& right) const {
217         return left+right;
218     }
219 };
220 
221 template <size_t N>
222 struct FunctorAddCombineRef<minimalNComparable<N> > {
223     minimalNComparable<N> operator()(const minimalNComparable<N>& left, const minimalNComparable<N>& right) const {
224         minimalNComparable<N> result;
225         result.set_value( left.value() + right.value() );
226         return result;
227     }
228 };
229 
230 template <>
231 struct FunctorAddCombineRef<ThrowingConstructor> {
232     ThrowingConstructor operator()(const ThrowingConstructor& left, const ThrowingConstructor& right) const {
233         ThrowingConstructor result;
234         result.m_cnt = ( left.m_cnt + right.m_cnt );
235         return result;
236     }
237 };
238 
239 template <typename T>
240 struct FunctorAddCombine {
241     T operator()(T left, T right ) const {
242         return FunctorAddCombineRef<T>()( left, right );
243     }
244 };
245 
246 template <typename T>
247 T FunctionAddByRef( const T &left, const T &right) {
248     return FunctorAddCombineRef<T>()( left, right );
249 }
250 
251 template <typename T>
252 T FunctionAdd( T left, T right) { return FunctionAddByRef(left,right); }
253 
254 template <typename T>
255 class Accumulator {
256 public:
257     Accumulator(T& result) : my_result(result) {}
258     Accumulator(const Accumulator& other) : my_result(other.my_result) {}
259     Accumulator& operator=(const Accumulator& other) {
260         test_helper<T>::set(my_result, test_helper<T>::get(other));
261         return *this;
262     }
263     void operator()(const T& new_bit) { test_helper<T>::sum(my_result, new_bit); }
264 private:
265     T& my_result;
266 };
267 
268 template <typename T>
269 class ClearingAccumulator {
270 public:
271     ClearingAccumulator(T& result) : my_result(result) {}
272     ClearingAccumulator(const ClearingAccumulator& other) : my_result(other.my_result) {}
273     ClearingAccumulator& operator=(const ClearingAccumulator& other) {
274         test_helper<T>::set(my_result, test_helper<T>::get(other));
275         return *this;
276     }
277     void operator()(T& new_bit) {
278         test_helper<T>::sum(my_result, new_bit);
279         test_helper<T>::init(new_bit);
280     }
281     static void AssertClean(const T& thread_local_value) {
282         T zero;
283         test_helper<T>::init(zero);
284         REQUIRE_MESSAGE(test_helper<T>::get(thread_local_value)==test_helper<T>::get(zero),
285                "combine_each does not allow to modify thread local values?");
286     }
287 private:
288     T& my_result;
289 };
290 
291 //// end functors and routines
292 
293 //------------------------------------------------------------------------------------------------------
294 // Tests for tests cases
295 //------------------------------------------------------------------------------------------------------
296 
297 template <typename T, template<class> class Allocator>
298 class parallel_scalar_body: utils::NoAssign {
299     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
300     ets_type &sums;
301     const char* allocator_name;
302 
303 public:
304 
305     parallel_scalar_body ( ets_type &_sums, const char *alloc_name ) : sums(_sums), allocator_name(alloc_name) { }
306 
307     void operator()( const oneapi::tbb::blocked_range<int> &r ) const {
308         for (int i = r.begin(); i != r.end(); ++i)
309             test_helper<T>::sum( check_alignment(sums.local(),allocator_name), 1 );
310     }
311 
312 };
313 
314 template< typename T, template<class> class Allocator>
315 void run_parallel_scalar_tests_nocombine(const char* /* test_name */, const char *allocator_name) {
316 
317     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
318 
319     Checker<T> my_check;
320 
321     gThrowValue = 0;
322     struct fail_on_exception_guard {
323         bool dismiss = false;
324         ~fail_on_exception_guard() {
325             if (!dismiss) {
326                 FAIL("The exception is not expected");
327             }
328         }
329     } guard;
330     T default_value{};
331     guard.dismiss = true;
332 
333     gThrowValue = 0;
334     {
335         // We assume that static_sums zero-initialized or has a default constructor that zeros it.
336         ets_type static_sums = ets_type( T() );
337 
338         T exemplar;
339         test_helper<T>::init(exemplar);
340 
341         for (int p = std::max(MinThread, 2); p <= MaxThread; ++p) {
342             oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
343 
344             T iterator_sum;
345             test_helper<T>::init(iterator_sum);
346 
347             T finit_ets_sum;
348             test_helper<T>::init(finit_ets_sum);
349 
350             T const_iterator_sum;
351             test_helper<T>::init(const_iterator_sum);
352 
353             T range_sum;
354             test_helper<T>::init(range_sum);
355 
356             T const_range_sum;
357             test_helper<T>::init(const_range_sum);
358 
359             T cconst_sum;
360             test_helper<T>::init(cconst_sum);
361 
362             T assign_sum;
363             test_helper<T>::init(assign_sum);
364 
365             T cassgn_sum;
366             test_helper<T>::init(cassgn_sum);
367             T non_cassgn_sum;
368             test_helper<T>::init(non_cassgn_sum);
369 
370             T static_sum;
371             test_helper<T>::init(static_sum);
372 
373             for (int t = -1; t < REPETITIONS; ++t) {
374                 static_sums.clear();
375 
376                 ets_type sums(exemplar);
377                 FunctorFinit<T,0> my_finit(SecretTag);
378                 ets_type finit_ets(my_finit);
379 
380                 REQUIRE( sums.empty());
381                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N*p, RANGE_MIN ), parallel_scalar_body<T,Allocator>( sums, allocator_name ) );
382                 REQUIRE( !sums.empty());
383 
384                 REQUIRE( finit_ets.empty());
385                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N*p, RANGE_MIN ), parallel_scalar_body<T,Allocator>( finit_ets, allocator_name ) );
386                 REQUIRE( !finit_ets.empty());
387 
388                 REQUIRE(static_sums.empty());
389                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N*p, RANGE_MIN ), parallel_scalar_body<T,Allocator>( static_sums, allocator_name ) );
390                 REQUIRE( !static_sums.empty());
391 
392                 // use iterator
393                 typename ets_type::size_type size = 0;
394                 for ( typename ets_type::iterator i = sums.begin(); i != sums.end(); ++i ) {
395                      ++size;
396                      test_helper<T>::sum(iterator_sum, *i);
397                 }
398                 REQUIRE( sums.size() == size);
399 
400                 // use const_iterator
401                 for ( typename ets_type::const_iterator i = sums.begin(); i != sums.end(); ++i ) {
402                      test_helper<T>::sum(const_iterator_sum, *i);
403                 }
404 
405                 // use range_type
406                 typename ets_type::range_type r = sums.range();
407                 for ( typename ets_type::range_type::const_iterator i = r.begin(); i != r.end(); ++i ) {
408                      test_helper<T>::sum(range_sum, *i);
409                 }
410 
411                 // use const_range_type
412                 const ets_type& csums = sums;
413                 typename ets_type::const_range_type cr = csums.range();
414                 for ( typename ets_type::const_range_type::iterator i = cr.begin(); i != cr.end(); ++i ) {
415                      test_helper<T>::sum(const_range_sum, *i);
416                 }
417 
418                 // test copy constructor, with TLS-cached locals
419                 typedef typename oneapi::tbb::enumerable_thread_specific<T, Allocator<T>, oneapi::tbb::ets_key_per_instance> cached_ets_type;
420 
421                 cached_ets_type cconst(sums);
422                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>(0, N*p, RANGE_MIN), [&]( const oneapi::tbb::blocked_range<int>& ) {
423                     bool exists = false;
424                     T& ref = cconst.local(exists);
425                     CHECK( (exists || ref == default_value) );
426                 } );
427                 cached_ets_type cconst_to_assign1 = cconst;
428                 cached_ets_type cconst_to_assign2;
429                 cconst_to_assign2 = std::move(cconst_to_assign1);
430                 REQUIRE(cconst_to_assign2.size() == cconst.size());
431 
432                 for ( typename cached_ets_type::const_iterator i = cconst.begin(); i != cconst.end(); ++i ) {
433                      test_helper<T>::sum(cconst_sum, *i);
434                 }
435 
436                 // test assignment
437                 ets_type assigned;
438                 assigned = sums;
439 
440                 for ( typename ets_type::const_iterator i = assigned.begin(); i != assigned.end(); ++i ) {
441                      test_helper<T>::sum(assign_sum, *i);
442                 }
443 
444                 // test assign to and from cached locals
445                 cached_ets_type cassgn;
446                 cassgn = sums;
447                 for ( typename cached_ets_type::const_iterator i = cassgn.begin(); i != cassgn.end(); ++i ) {
448                      test_helper<T>::sum(cassgn_sum, *i);
449                 }
450 
451                 ets_type non_cassgn;
452                 non_cassgn = cassgn;
453                 for ( typename ets_type::const_iterator i = non_cassgn.begin(); i != non_cassgn.end(); ++i ) {
454                      test_helper<T>::sum(non_cassgn_sum, *i);
455                 }
456 
457                 // test finit-initialized ets
458                 for(typename ets_type::const_iterator i = finit_ets.begin(); i != finit_ets.end(); ++i) {
459                     test_helper<T>::sum(finit_ets_sum, *i);
460                 }
461 
462                 // test static ets
463                 for(typename ets_type::const_iterator i = static_sums.begin(); i != static_sums.end(); ++i) {
464                     test_helper<T>::sum(static_sum, *i);
465                 }
466 
467             }
468 
469             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(iterator_sum));
470             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(const_iterator_sum));
471             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(range_sum));
472             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(const_range_sum));
473 
474             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(cconst_sum));
475             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(assign_sum));
476             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(cassgn_sum));
477             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(non_cassgn_sum));
478             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(finit_ets_sum));
479             REQUIRE(EXPECTED_SUM*p == test_helper<T>::get(static_sum));
480         }
481     }  // Checker block
482 }
483 
484 template< typename T, template<class> class Allocator>
485 void run_parallel_scalar_tests(const char* test_name, const char* allocator_name) {
486 
487     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
488     bool exception_caught = false;
489 
490     // We assume that static_sums zero-initialized or has a default constructor that zeros it.
491     ets_type static_sums = ets_type( T() );
492 
493     T exemplar;
494     test_helper<T>::init(exemplar);
495 
496     int test_throw_count = 10;
497     // the test will be performed repeatedly until it does not throw.  For non-throwing types
498     // this means once; for the throwing type test it may loop two or three times.  The
499     // value of targetThrowValue will determine when and if the test will throw.
500     do {
501         targetThrowValue = test_throw_count;  // keep testing until we get no exception
502         exception_caught = false;
503 #if TBB_USE_EXCEPTIONS
504         try {
505 #endif
506             run_parallel_scalar_tests_nocombine<T,Allocator>(test_name, allocator_name);
507 #if TBB_USE_EXCEPTIONS
508         }
509         catch(...) {}
510 #endif
511         for (int p = std::max(MinThread, 2); p <= MaxThread; ++p) {
512             oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
513 
514             gThrowValue = 0;
515 
516             T combine_sum;
517             test_helper<T>::init(combine_sum);
518 
519             T combine_ref_sum;
520             test_helper<T>::init(combine_ref_sum);
521 
522             T accumulator_sum;
523             test_helper<T>::init(accumulator_sum);
524 
525             T static_sum;
526             test_helper<T>::init(static_sum);
527 
528             T clearing_accumulator_sum;
529             test_helper<T>::init(clearing_accumulator_sum);
530 
531             {
532                 Checker<T> my_check;
533 #if TBB_USE_EXCEPTIONS
534                 try
535 #endif
536                 {
537                     for (int t = -1; t < REPETITIONS; ++t) {
538                         static_sums.clear();
539 
540                         ets_type sums(exemplar);
541 
542                         REQUIRE(sums.empty());
543                         oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, N * p, RANGE_MIN),
544                             parallel_scalar_body<T, Allocator>(sums, allocator_name));
545                         REQUIRE(!sums.empty());
546 
547                         REQUIRE(static_sums.empty());
548                         oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, N * p, RANGE_MIN),
549                             parallel_scalar_body<T, Allocator>(static_sums, allocator_name));
550                         REQUIRE(!static_sums.empty());
551 
552                         // Use combine
553                         test_helper<T>::sum(combine_sum, sums.combine(FunctionAdd<T>));
554                         test_helper<T>::sum(combine_ref_sum, sums.combine(FunctionAddByRef<T>));
555                         test_helper<T>::sum(static_sum, static_sums.combine(FunctionAdd<T>));
556 
557                         // Accumulate with combine_each
558                         sums.combine_each(Accumulator<T>(accumulator_sum));
559                         // Accumulate and clear thread-local values
560                         sums.combine_each(ClearingAccumulator<T>(clearing_accumulator_sum));
561                         // Check that the values were cleared
562                         sums.combine_each(ClearingAccumulator<T>::AssertClean);
563                     }
564                 }
565 #if TBB_USE_EXCEPTIONS
566                 catch (...) {
567                     exception_caught = true;
568                 }
569 #endif
570             }
571 
572             if (!exception_caught) {
573                 REQUIRE(EXPECTED_SUM * p == test_helper<T>::get(combine_sum));
574                 REQUIRE(EXPECTED_SUM * p == test_helper<T>::get(combine_ref_sum));
575                 REQUIRE(EXPECTED_SUM * p == test_helper<T>::get(static_sum));
576                 REQUIRE(EXPECTED_SUM * p == test_helper<T>::get(accumulator_sum));
577                 REQUIRE(EXPECTED_SUM * p == test_helper<T>::get(clearing_accumulator_sum));
578             }
579 
580         }  // MinThread .. MaxThread
581         test_throw_count += 10;  // keep testing until we don't get an exception
582     } while (exception_caught && test_throw_count < 200);
583     REQUIRE_MESSAGE(!exception_caught, "No non-exception test completed");
584 }
585 
586 template <typename T, template<class> class Allocator>
587 class parallel_vector_for_body: utils::NoAssign {
588     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
589     typedef oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type> > ets_type;
590     ets_type &locals;
591     const char *allocator_name;
592 
593 public:
594 
595     parallel_vector_for_body ( ets_type &_locals, const char *aname ) : locals(_locals), allocator_name(aname) { }
596 
597     void operator()( const oneapi::tbb::blocked_range<int> &r ) const {
598         T one;
599         test_helper<T>::set(one, 1);
600 
601         for (int i = r.begin(); i < r.end(); ++i) {
602             check_alignment(locals.local(),allocator_name).push_back( one );
603         }
604     }
605 
606 };
607 
608 template <typename R, typename T>
609 struct parallel_vector_reduce_body {
610 
611     T sum;
612     size_t count;
613     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
614 
615     parallel_vector_reduce_body ( ) : count(0) { test_helper<T>::init(sum); }
616     parallel_vector_reduce_body ( parallel_vector_reduce_body<R, T> &, oneapi::tbb::split ) : count(0) {  test_helper<T>::init(sum); }
617 
618     void operator()( const R &r ) {
619         for (typename R::iterator ri = r.begin(); ri != r.end(); ++ri) {
620             const container_type &v = *ri;
621             ++count;
622             for (typename container_type::const_iterator vi = v.begin(); vi != v.end(); ++vi) {
623                 test_helper<T>::sum(sum, *vi);
624             }
625         }
626     }
627 
628     void join( const parallel_vector_reduce_body &b ) {
629         test_helper<T>::sum(sum,b.sum);
630         count += b.count;
631     }
632 
633 };
634 
635 template< typename T, template<class> class Allocator>
636 void run_parallel_vector_tests(const char* /* test_name */, const char *allocator_name) {
637     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
638     typedef oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type> > ets_type;
639 
640     for (int p = std::max(MinThread, 2); p <= MaxThread; ++p) {
641         oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
642 
643         T sum;
644         test_helper<T>::init(sum);
645 
646         for (int t = -1; t < REPETITIONS; ++t) {
647             ets_type vs;
648 
649             REQUIRE( vs.empty() );
650             oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int> (0, N*p, RANGE_MIN),
651                                parallel_vector_for_body<T,Allocator>( vs, allocator_name ) );
652             REQUIRE( !vs.empty() );
653 
654             // copy construct
655             ets_type vs2(vs); // this causes an assertion failure, related to allocators...
656 
657             // assign
658             ets_type vs3;
659             vs3 = vs;
660 
661             parallel_vector_reduce_body< typename ets_type::const_range_type, T > pvrb;
662             oneapi::tbb::parallel_reduce ( vs.range(1), pvrb );
663 
664             test_helper<T>::sum(sum, pvrb.sum);
665 
666             REQUIRE( vs.size() == pvrb.count );
667             REQUIRE( vs2.size() == pvrb.count );
668             REQUIRE( vs3.size() == pvrb.count );
669 
670             oneapi::tbb::flattened2d<ets_type> fvs = flatten2d(vs);
671             size_t ccount = fvs.size();
672             REQUIRE( ccount == size_t(N*p) );
673             size_t elem_cnt = 0;
674             typename oneapi::tbb::flattened2d<ets_type>::iterator it;
675             auto it2(it);
676             it = fvs.begin();
677             REQUIRE(it != it2);
678             typename oneapi::tbb::flattened2d<ets_type>::iterator it3;
679             typename oneapi::tbb::flattened2d<ets_type>::const_iterator cit = fvs.begin();
680             it3 = cit;
681             REQUIRE(it3 == cit);
682             REQUIRE(it3.operator->() == &(*it3));
683 
684             for(typename oneapi::tbb::flattened2d<ets_type>::const_iterator i = fvs.begin(); i != fvs.end(); ++i) {
685                 ++elem_cnt;
686             };
687             REQUIRE( ccount == elem_cnt );
688 
689             elem_cnt = 0;
690             for(typename oneapi::tbb::flattened2d<ets_type>::iterator i = fvs.begin(); i != fvs.end(); i++) {
691                 ++elem_cnt;
692             };
693             REQUIRE( ccount == elem_cnt );
694 
695             // Test the ETS constructor with multiple args
696             T minus_one;
697             test_helper<T>::set(minus_one, -1);
698             // Set ETS to construct "local" vectors pre-occupied with 25 "minus_one"s
699             // Cast 25 to size_type to prevent Intel Compiler SFINAE compilation issues with gcc 5.
700             ets_type vvs( typename container_type::size_type(25), minus_one, oneapi::tbb::tbb_allocator<T>() );
701             REQUIRE( vvs.empty() );
702             oneapi::tbb::parallel_for ( oneapi::tbb::blocked_range<int> (0, N*p, RANGE_MIN), parallel_vector_for_body<T,Allocator>( vvs, allocator_name ) );
703             REQUIRE( !vvs.empty() );
704 
705             parallel_vector_reduce_body< typename ets_type::const_range_type, T > pvrb2;
706             oneapi::tbb::parallel_reduce ( vvs.range(1), pvrb2 );
707             REQUIRE( pvrb2.count == vvs.size() );
708             REQUIRE( test_helper<T>::get(pvrb2.sum) == N*p-pvrb2.count*25 );
709 
710             oneapi::tbb::flattened2d<ets_type> fvvs = flatten2d(vvs);
711             ccount = fvvs.size();
712             REQUIRE( ccount == N*p+pvrb2.count*25 );
713         }
714 
715         double result_value = test_helper<T>::get(sum);
716         REQUIRE( EXPECTED_SUM*p == result_value);
717     }
718 }
719 
720 template<typename T, template<class> class Allocator>
721 void run_cross_type_vector_tests(const char* /* test_name */) {
722     const char* allocator_name = "default";
723     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
724 
725     for (int p = std::max(MinThread, 2); p <= MaxThread; ++p) {
726         oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
727 
728         T sum;
729         test_helper<T>::init(sum);
730 
731         for (int t = -1; t < REPETITIONS; ++t) {
732             typedef typename oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type>, oneapi::tbb::ets_no_key > ets_nokey_type;
733             typedef typename oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type>, oneapi::tbb::ets_key_per_instance > ets_tlskey_type;
734             ets_nokey_type vs;
735 
736             REQUIRE( vs.empty());
737             oneapi::tbb::parallel_for ( oneapi::tbb::blocked_range<int> (0, N*p, RANGE_MIN), parallel_vector_for_body<T, Allocator>( vs, allocator_name ) );
738             REQUIRE( !vs.empty());
739 
740             // copy construct
741             ets_tlskey_type vs2(vs);
742 
743             // assign
744             ets_nokey_type vs3;
745             vs3 = vs2;
746 
747             parallel_vector_reduce_body< typename ets_nokey_type::const_range_type, T > pvrb;
748             oneapi::tbb::parallel_reduce ( vs3.range(1), pvrb );
749 
750             test_helper<T>::sum(sum, pvrb.sum);
751 
752             REQUIRE( vs3.size() == pvrb.count);
753 
754             oneapi::tbb::flattened2d<ets_nokey_type> fvs = flatten2d(vs3);
755             size_t ccount = fvs.size();
756             size_t elem_cnt = 0;
757             for(typename oneapi::tbb::flattened2d<ets_nokey_type>::const_iterator i = fvs.begin(); i != fvs.end(); ++i) {
758                 ++elem_cnt;
759             };
760             REQUIRE(ccount == elem_cnt);
761 
762             elem_cnt = 0;
763             for(typename oneapi::tbb::flattened2d<ets_nokey_type>::iterator i = fvs.begin(); i != fvs.end(); ++i) {
764                 ++elem_cnt;
765             };
766             REQUIRE(ccount == elem_cnt);
767 
768             oneapi::tbb::flattened2d<ets_nokey_type> fvs2 = flatten2d(vs3, vs3.begin(), std::next(vs3.begin()));
769             REQUIRE(std::distance(fvs2.begin(), fvs2.end()) == vs3.begin()->size());
770             const oneapi::tbb::flattened2d<ets_nokey_type>& cfvs2(fvs2);
771             REQUIRE(std::distance(cfvs2.begin(), cfvs2.end()) == vs3.begin()->size());
772         }
773 
774         double result_value = test_helper<T>::get(sum);
775         REQUIRE( EXPECTED_SUM*p == result_value);
776     }
777 }
778 
779 template< typename T >
780 void run_serial_scalar_tests(const char* /* test_name */) {
781     T sum;
782     test_helper<T>::init(sum);
783 
784     for (int t = -1; t < REPETITIONS; ++t) {
785         for (int i = 0; i < N; ++i) {
786             test_helper<T>::sum(sum,1);
787         }
788     }
789 
790     double result_value = test_helper<T>::get(sum);
791     REQUIRE( EXPECTED_SUM == result_value);
792 }
793 
794 template< typename T >
795 void run_serial_vector_tests(const char* /* test_name */) {
796     T sum;
797     test_helper<T>::init(sum);
798     T one;
799     test_helper<T>::set(one, 1);
800 
801     for (int t = -1; t < REPETITIONS; ++t) {
802         std::vector<T, oneapi::tbb::tbb_allocator<T> > v;
803         for (int i = 0; i < N; ++i) {
804             v.push_back( one );
805         }
806         for (typename std::vector<T, oneapi::tbb::tbb_allocator<T> >::const_iterator i = v.begin(); i != v.end(); ++i)
807             test_helper<T>::sum(sum, *i);
808     }
809 
810     double result_value = test_helper<T>::get(sum);
811     REQUIRE( EXPECTED_SUM == result_value);
812 }
813 
814 const size_t line_size = oneapi::tbb::detail::max_nfs_size;
815 
816 void run_reference_check() {
817     run_serial_scalar_tests<int>("int");
818     run_serial_scalar_tests<double>("double");
819     run_serial_scalar_tests<minimalNComparable<> >("minimalNComparable<>");
820     run_serial_vector_tests<int>("std::vector<int, oneapi::tbb::tbb_allocator<int> >");
821     run_serial_vector_tests<double>("std::vector<double, oneapi::tbb::tbb_allocator<double> >");
822 }
823 
824 template<template<class>class Allocator>
825 void run_parallel_tests(const char *allocator_name) {
826     run_parallel_scalar_tests<int, Allocator>("int",allocator_name);
827     run_parallel_scalar_tests<double, Allocator>("double",allocator_name);
828     run_parallel_scalar_tests_nocombine<minimalNComparable<>,Allocator>("minimalNComparable<>",allocator_name);
829     run_parallel_scalar_tests<ThrowingConstructor, Allocator>("ThrowingConstructor", allocator_name);
830     run_parallel_vector_tests<int, Allocator>("std::vector<int, oneapi::tbb::tbb_allocator<int> >",allocator_name);
831     run_parallel_vector_tests<double, Allocator>("std::vector<double, oneapi::tbb::tbb_allocator<double> >",allocator_name);
832 }
833 
834 void run_cross_type_tests() {
835     // cross-type scalar tests are part of run_parallel_scalar_tests_nocombine
836     run_cross_type_vector_tests<int, oneapi::tbb::tbb_allocator>("std::vector<int, oneapi::tbb::tbb_allocator<int> >");
837     run_cross_type_vector_tests<double, oneapi::tbb::tbb_allocator>("std::vector<double, oneapi::tbb::tbb_allocator<double> >");
838 }
839 
840 template<typename T, template<class> class Allocator, typename Init>
841 oneapi::tbb::enumerable_thread_specific<T,Allocator<T> > MakeETS( Init init ) {
842     return oneapi::tbb::enumerable_thread_specific<T,Allocator<T> >(init);
843 }
844 // In some GCC versions, parameter packs in lambdas might cause compile errors
845 template<typename ETS, typename... P>
846 struct MakeETS_Functor {
847     ETS operator()( typename std::decay<P>::type&&... params ) {
848         return ETS(std::move(params)...);
849     }
850 };
851 template<typename T, template<class> class Allocator, typename... P>
852 oneapi::tbb::enumerable_thread_specific<T,Allocator<T> > MakeETS( oneapi::tbb::detail::stored_pack<P...> pack ) {
853     typedef oneapi::tbb::enumerable_thread_specific<T,Allocator<T> > result_type;
854     return oneapi::tbb::detail::call_and_return< result_type >(
855         MakeETS_Functor<result_type,P...>(), std::move(pack)
856     );
857 }
858 
859 template<typename T, template<class> class Allocator, typename InitSrc, typename InitDst, typename Validator>
860 void ets_copy_assign_test( InitSrc init1, InitDst init2, Validator check, const char *allocator_name ) {
861     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
862 
863     // Create the source instance
864     const ets_type& cref_binder = MakeETS<T, Allocator>(init1);
865     ets_type& source = const_cast<ets_type&>(cref_binder);
866     check(check_alignment(source.local(),allocator_name));
867 
868     // Test copy construction
869     bool existed = false;
870     ets_type copy(source);
871     check(check_alignment(copy.local(existed),allocator_name));
872     REQUIRE_MESSAGE(existed, "Local data not created by ETS copy constructor");
873     copy.clear();
874     check(check_alignment(copy.local(),allocator_name));
875 
876     // Test assignment
877     existed = false;
878     ets_type assign(init2);
879     assign = source;
880     check(check_alignment(assign.local(existed),allocator_name));
881     REQUIRE_MESSAGE(existed, "Local data not created by ETS assignment");
882     assign.clear();
883     check(check_alignment(assign.local(),allocator_name));
884 
885     // Create the source instance
886     ets_type&& rvref_binder = MakeETS<T, Allocator>(init1);
887     check(check_alignment(rvref_binder.local(),allocator_name));
888 
889     // Test move construction
890     existed = false;
891     ets_type moved(rvref_binder);
892     check(check_alignment(moved.local(existed),allocator_name));
893     REQUIRE_MESSAGE(existed, "Local data not created by ETS move constructor");
894     moved.clear();
895     check(check_alignment(moved.local(),allocator_name));
896 
897     // Test assignment
898     existed = false;
899     ets_type move_assign(init2);
900     move_assign = std::move(moved);
901     check(check_alignment(move_assign.local(existed),allocator_name));
902     REQUIRE_MESSAGE(existed, "Local data not created by ETS move assignment");
903     move_assign.clear();
904     check(check_alignment(move_assign.local(),allocator_name));
905 }
906 
907 template<typename T, int Expected>
908 struct Validator {
909     void operator()( const T& value ) {
910         REQUIRE(test_helper<T>::get(value) == Expected);
911     }
912     void operator()( const std::pair<int,T>& value ) {
913         REQUIRE(value.first > 0);
914         REQUIRE(test_helper<T>::get(value.second) == Expected*value.first);
915     }
916 };
917 
918 template <typename T, template<class> class Allocator>
919 void run_assign_and_copy_constructor_test(const char* /* test_name */, const char *allocator_name) {
920     #define EXPECTED 3142
921 
922     // test with exemplar initializer
923     T src_init;
924     test_helper<T>::set(src_init,EXPECTED);
925     T other_init;
926     test_helper<T>::init(other_init);
927     ets_copy_assign_test<T, Allocator>(src_init, other_init, Validator<T,EXPECTED>(), allocator_name);
928 
929     // test with function initializer
930     FunctorFinit<T,EXPECTED> src_finit(SecretTag);
931     FunctorFinit<T,0> other_finit(SecretTag);
932     ets_copy_assign_test<T, Allocator>(src_finit, other_finit, Validator<T,EXPECTED>(), allocator_name);
933 
934     // test with multi-argument "emplace" initializer
935     // The arguments are wrapped into oneapi::tbb::internal::stored_pack to avoid variadic templates in ets_copy_assign_test.
936     test_helper<T>::set(src_init,EXPECTED*17);
937     ets_copy_assign_test< std::pair<int,T>, Allocator>(oneapi::tbb::detail::save_pack(17,src_init), std::make_pair(-1,T()), Validator<T,EXPECTED>(), allocator_name);
938     #undef EXPECTED
939 }
940 
941 template< template<class> class Allocator>
942 void run_assignment_and_copy_constructor_tests(const char* allocator_name) {
943     run_assign_and_copy_constructor_test<int, Allocator>("int", allocator_name);
944     run_assign_and_copy_constructor_test<double, Allocator>("double", allocator_name);
945     // Try class sizes that are close to a cache line in size, in order to check padding calculations.
946     run_assign_and_copy_constructor_test<minimalNComparable<line_size-1>, Allocator >("minimalNComparable<line_size-1>", allocator_name);
947     run_assign_and_copy_constructor_test<minimalNComparable<line_size>, Allocator >("minimalNComparable<line_size>", allocator_name);
948     run_assign_and_copy_constructor_test<minimalNComparable<line_size+1>, Allocator >("minimalNComparable<line_size+1>", allocator_name);
949     REQUIRE(FinitCounter==0);
950 }
951 
952 // Class with no default constructor
953 class HasNoDefaultConstructor {
954     HasNoDefaultConstructor();
955 public:
956     HasNoDefaultConstructor( SecretTagType ) {}
957 };
958 // Initialization functor for HasNoDefaultConstructor
959 struct HasNoDefaultConstructorFinit {
960     HasNoDefaultConstructor operator()() {
961         return HasNoDefaultConstructor(SecretTag);
962     }
963 };
964 // Combine functor for HasNoDefaultConstructor
965 struct HasNoDefaultConstructorCombine {
966     HasNoDefaultConstructor operator()( HasNoDefaultConstructor, HasNoDefaultConstructor ) {
967         return HasNoDefaultConstructor(SecretTag);
968     }
969 };
970 
971 // Class that only has a constructor with multiple parameters and a move constructor
972 class HasSpecialAndMoveCtor : utils::NoCopy {
973     HasSpecialAndMoveCtor();
974 public:
975     HasSpecialAndMoveCtor( SecretTagType, size_t = size_t(0), const char* = "" ) {}
976     HasSpecialAndMoveCtor( HasSpecialAndMoveCtor&& ) {}
977 };
978 
979 // No-op combine-each functor
980 template<typename V>
981 struct EmptyCombineEach {
982     void operator()( const V& ) { }
983 };
984 
985 //! Test situations where only default constructor or copy constructor is required.
986 template<template<class> class Allocator>
987 void TestInstantiation(const char* /* allocator_name */) {
988     // Test instantiation is possible when copy constructor is not required.
989     oneapi::tbb::enumerable_thread_specific<utils::NoCopy, Allocator<utils::NoCopy> > ets1;
990     ets1.local();
991     ets1.combine_each(EmptyCombineEach<utils::NoCopy>());
992 
993     // Test instantiation when default constructor is not required, because exemplar is provided.
994     HasNoDefaultConstructor x(SecretTag);
995     oneapi::tbb::enumerable_thread_specific<HasNoDefaultConstructor, Allocator<HasNoDefaultConstructor> > ets2(x);
996     ets2.local();
997     ets2.combine(HasNoDefaultConstructorCombine());
998 
999     // Test instantiation when default constructor is not required, because init function is provided.
1000     HasNoDefaultConstructorFinit f;
1001     oneapi::tbb::enumerable_thread_specific<HasNoDefaultConstructor, Allocator<HasNoDefaultConstructor> > ets3(f);
1002     ets3.local();
1003     ets3.combine(HasNoDefaultConstructorCombine());
1004 
1005     // Test instantiation with multiple arguments
1006     oneapi::tbb::enumerable_thread_specific<HasSpecialAndMoveCtor, Allocator<HasSpecialAndMoveCtor> > ets4(SecretTag, 0x42, "meaningless");
1007     ets4.local();
1008     ets4.combine_each(EmptyCombineEach<HasSpecialAndMoveCtor>());
1009     // Test instantiation with one argument that should however use the variadic constructor
1010     oneapi::tbb::enumerable_thread_specific<HasSpecialAndMoveCtor, Allocator<HasSpecialAndMoveCtor> > ets5(SecretTag);
1011     ets5.local();
1012     ets5.combine_each(EmptyCombineEach<HasSpecialAndMoveCtor>());
1013     // Test that move operations do not impose extra requirements
1014     // Default allocator is used. If it does not match Allocator, there will be elementwise move
1015     oneapi::tbb::enumerable_thread_specific<HasSpecialAndMoveCtor> ets6( std::move(ets4) );
1016     ets6.combine_each(EmptyCombineEach<HasSpecialAndMoveCtor>());
1017     ets6 = std::move(ets5);
1018 }
1019 
1020 void TestMemberTypes() {
1021     using default_container_type = oneapi::tbb::enumerable_thread_specific<int>;
1022     static_assert(std::is_same<typename default_container_type::allocator_type, oneapi::tbb::cache_aligned_allocator<int>>::value,
1023             "Incorrect default template allocator");
1024 
1025     using test_allocator_type = std::allocator<int>;
1026     using ets_container_type = oneapi::tbb::enumerable_thread_specific<int, test_allocator_type>;
1027 
1028     static_assert(std::is_same<typename ets_container_type::allocator_type, test_allocator_type>::value,
1029                   "Incorrect container allocator_type member type");
1030 
1031     using value_type = typename ets_container_type::value_type;
1032 
1033     static_assert(std::is_same<typename ets_container_type::value_type, int>::value,
1034                   "Incorrect container value_type member type");
1035     static_assert(std::is_same<typename ets_container_type::reference, value_type&>::value,
1036                   "Incorrect container reference member type");
1037     static_assert(std::is_same<typename ets_container_type::const_reference, const value_type&>::value,
1038                   "Incorrect container const_reference member type");
1039 
1040     using allocator_type = typename ets_container_type::allocator_type;
1041     static_assert(std::is_same<typename ets_container_type::pointer, typename std::allocator_traits<allocator_type>::pointer>::value,
1042                   "Incorrect container pointer member type");
1043     static_assert(std::is_same<typename ets_container_type::const_pointer, typename std::allocator_traits<allocator_type>::const_pointer>::value,
1044                   "Incorrect container const_pointer member type");
1045 
1046     static_assert(std::is_unsigned<typename ets_container_type::size_type>::value,
1047                   "Incorrect container size_type member type");
1048     static_assert(std::is_signed<typename ets_container_type::difference_type>::value,
1049                   "Incorrect container difference_type member type");
1050 
1051     static_assert(utils::is_random_access_iterator<typename ets_container_type::iterator>::value,
1052                   "Incorrect container iterator member type");
1053     static_assert(!std::is_const<typename ets_container_type::iterator::value_type>::value,
1054                   "Incorrect container iterator member type");
1055     static_assert(utils::is_random_access_iterator<typename ets_container_type::const_iterator>::value,
1056                   "Incorrect container const_iterator member type");
1057     static_assert(std::is_const<typename ets_container_type::const_iterator::value_type>::value,
1058                   "Incorrect container iterator member type");
1059 }
1060 
1061 size_t init_tbb_alloc_mask() {
1062     // TODO: use __TBB_alignof(T) to check for local() results instead of using internal knowledges of ets element padding
1063     if(oneapi::tbb::tbb_allocator<int>::allocator_type() == oneapi::tbb::tbb_allocator<int>::standard) {
1064         // scalable allocator is not available.
1065         return 1;
1066     }
1067     else {
1068         // this value is for large objects, but will be correct for small.
1069         return 64; // TBB_REVAMP_TODO: enable as estimatedCacheLineSize when tbbmalloc is available;
1070     }
1071 }
1072 
1073 // TODO: rework the test not to depend on oneTBB internals
1074 static const size_t cache_allocator_mask = oneapi::tbb::detail::r1::cache_line_size();
1075 static const size_t tbb_allocator_mask = init_tbb_alloc_mask();
1076 
1077 void TestETSIterator() {
1078     using ets_type = oneapi::tbb::enumerable_thread_specific<int>;
1079     if (utils::get_platform_max_threads() == 1) {
1080         ets_type ets;
1081         ets.local() = 1;
1082         REQUIRE_MESSAGE(std::next(ets.begin()) == ets.end(), "Incorrect begin or end of the ETS");
1083         REQUIRE_MESSAGE(std::prev(ets.end()) == ets.begin(), "Incorrect begin or end of the ETS");
1084     } else {
1085         std::atomic<std::size_t> sync_counter(0);
1086 
1087         const std::size_t expected_ets_size = 2;
1088         ets_type ets;
1089         const ets_type& cets(ets);
1090 
1091         auto fill_ets_body = [&](){
1092             ets.local() = 42;
1093             ++sync_counter;
1094             while(sync_counter != expected_ets_size)
1095                 utils::yield();
1096         };
1097 
1098         oneapi::tbb::parallel_invoke(fill_ets_body, fill_ets_body);
1099         REQUIRE_MESSAGE(ets.size() == expected_ets_size, "Incorrect ETS size");
1100 
1101         std::size_t counter = 0;
1102         auto iter = ets.begin();
1103         while(iter != ets.end()) {
1104             ++counter % 2 == 0 ? ++iter : iter++;
1105         }
1106         REQUIRE(counter == expected_ets_size);
1107         while(iter != ets.begin()) {
1108             --counter % 2 == 0 ? --iter : iter--;
1109         }
1110         REQUIRE(counter == 0);
1111         auto citer = cets.begin();
1112         while(citer != cets.end()) {
1113             ++counter % 2 == 0 ? ++citer : citer++;
1114         }
1115         REQUIRE(counter == expected_ets_size);
1116         while(citer != cets.begin()) {
1117             --counter % 2 == 0 ? --citer : citer--;
1118         }
1119         REQUIRE(counter == 0);
1120         REQUIRE(ets.begin() + expected_ets_size == ets.end());
1121         REQUIRE(expected_ets_size + ets.begin() == ets.end());
1122         REQUIRE(ets.end() - expected_ets_size == ets.begin());
1123 
1124         typename ets_type::iterator it;
1125         it = ets.begin();
1126 
1127         auto it_bkp = it;
1128         auto it2 = it++;
1129         REQUIRE(it2 == it_bkp);
1130 
1131         it = ets.begin();
1132         it += expected_ets_size;
1133         REQUIRE(it == ets.end());
1134         it -= expected_ets_size;
1135         REQUIRE(it == ets.begin());
1136 
1137         for (int i = 0; i < int(expected_ets_size - 1); ++i) {
1138             REQUIRE(ets.begin()[i] == 42);
1139             REQUIRE(std::prev(ets.end())[-i] == 42);
1140         }
1141 
1142         auto iter1 = ets.begin();
1143         auto iter2 = ets.end();
1144         REQUIRE(iter1 < iter2);
1145         REQUIRE(iter1 <= iter2);
1146         REQUIRE(!(iter1 > iter2));
1147         REQUIRE(!(iter1 >= iter2));
1148     }
1149 }
1150 
1151 template <bool ExpectEqual, bool ExpectLess, typename Iterator>
1152 void DoETSIteratorComparisons( const Iterator& lhs, const Iterator& rhs ) {
1153     // TODO: replace with testEqualityAndLessComparisons after adding <=> operator for ETS iterator
1154     using namespace comparisons_testing;
1155     testEqualityComparisons<ExpectEqual>(lhs, rhs);
1156     testTwoWayComparisons<ExpectEqual, ExpectLess>(lhs, rhs);
1157 }
1158 
1159 template <typename Iterator, typename ETS>
1160 void TestETSIteratorComparisonsBasic( ETS& ets ) {
1161     REQUIRE_MESSAGE(!ets.empty(), "Incorrect test setup");
1162     Iterator it1, it2;
1163     DoETSIteratorComparisons</*ExpectEqual = */true, /*ExpectLess = */false>(it1, it2);
1164     it1 = ets.begin();
1165     it2 = ets.begin();
1166     DoETSIteratorComparisons</*ExpectEqual = */true, /*ExpectLess = */false>(it1, it2);
1167     it2 = std::prev(ets.end());
1168     DoETSIteratorComparisons</*ExpectEqual = */false, /*ExpectLess = */true>(it1, it2);
1169 }
1170 
1171 void TestETSIteratorComparisons() {
1172     using ets_type = oneapi::tbb::enumerable_thread_specific<int>;
1173     ets_type ets;
1174 
1175     // Fill the ets
1176     const std::size_t expected_ets_size = 2;
1177     std::atomic<std::size_t> sync_counter(0);
1178     auto fill_ets_body = [&](int){
1179             ets.local() = 42;
1180             ++sync_counter;
1181             while(sync_counter != expected_ets_size)
1182                 std::this_thread::yield();
1183         };
1184 
1185     utils::NativeParallelFor(2, fill_ets_body);
1186 
1187     TestETSIteratorComparisonsBasic<typename ets_type::iterator>(ets);
1188     const ets_type& cets = ets;
1189     TestETSIteratorComparisonsBasic<typename ets_type::const_iterator>(cets);
1190 }
1191 
1192 //! Test container instantiation
1193 //! \brief \ref interface \ref requirement
1194 TEST_CASE("Instantiation") {
1195     AlignMask = cache_allocator_mask;
1196     TestInstantiation<oneapi::tbb::cache_aligned_allocator>("oneapi::tbb::cache_aligned_allocator");
1197     AlignMask = tbb_allocator_mask;
1198     TestInstantiation<oneapi::tbb::tbb_allocator>("oneapi::tbb::tbb_allocator");
1199 }
1200 
1201 //! Test assignment and copy constructor
1202 //! \brief \ref interface \ref requirement
1203 TEST_CASE("Assignment and copy constructor") {
1204     AlignMask = cache_allocator_mask;
1205     run_assignment_and_copy_constructor_tests<oneapi::tbb::cache_aligned_allocator>("oneapi::tbb::cache_aligned_allocator");
1206     AlignMask = tbb_allocator_mask;
1207     run_assignment_and_copy_constructor_tests<oneapi::tbb::tbb_allocator>("oneapi::tbb::tbb_allocator");
1208 }
1209 
1210 //! Test for basic ETS functionality and requirements
1211 //! \brief \ref interface \ref requirement
1212 TEST_CASE("Basic ETS functionality") {
1213     const int LOCALS = 10;
1214 
1215     oneapi::tbb::enumerable_thread_specific<int> ets;
1216     ets.local() = 42;
1217 
1218     utils::SpinBarrier barrier(LOCALS);
1219     utils::NativeParallelFor(LOCALS, [&](int i) {
1220         barrier.wait();
1221         ets.local() = i;
1222         CHECK(ets.local() == i);
1223     });
1224     CHECK(ets.local() == 42);
1225 
1226     int ref_combined{0};
1227     std::vector<int> sequence(LOCALS);
1228     std::iota(sequence.begin(), sequence.end(), 0);
1229     for (int i : sequence) {
1230         ref_combined += i;
1231     }
1232     ref_combined += 42;
1233     int ets_combined = ets.combine([](int x, int y) {
1234         return x + y;
1235     });
1236     CHECK(ref_combined == ets_combined);
1237 }
1238 
1239 //! Test ETS usage in parallel algorithms.
1240 //! Also tests flattened2d and flattend2d
1241 //! \brief \ref interface \ref requirement \ref stress
1242 TEST_CASE("Parallel test") {
1243     run_reference_check();
1244     AlignMask = cache_allocator_mask;
1245     run_parallel_tests<oneapi::tbb::cache_aligned_allocator>("oneapi::tbb::cache_aligned_allocator");
1246     AlignMask = tbb_allocator_mask;
1247     run_parallel_tests<oneapi::tbb::tbb_allocator>("oneapi::tbb::tbb_allocator");
1248     run_cross_type_tests();
1249 }
1250 
1251 //! \brief \ref interface \ref requirement
1252 TEST_CASE("Member types") {
1253     TestMemberTypes();
1254 }
1255 
1256 //! \brief \ref interface \ref requirement
1257 TEST_CASE("enumerable_thread_specific iterator") {
1258     TestETSIterator();
1259 }
1260 
1261 //! \brief \ref interface \ref requirement
1262 TEST_CASE("enumerable_thread_specific iterator comparisons") {
1263     TestETSIteratorComparisons();
1264 }
1265