1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #if _MSC_VER && !defined(__INTEL_COMPILER)
18     // Workaround for vs2015 and warning name was longer than the compiler limit (4096).
19     #pragma warning (push)
20     #pragma warning (disable: 4503)
21 #endif
22 
23 #include "common/test.h"
24 #include "common/utils.h"
25 #include "common/utils_report.h"
26 #include "common/utils_concurrency_limit.h"
27 #include "common/spin_barrier.h"
28 #include "common/checktype.h"
29 
30 #include "oneapi/tbb/detail/_utils.h"
31 #include "oneapi/tbb/enumerable_thread_specific.h"
32 #include "oneapi/tbb/parallel_for.h"
33 #include "oneapi/tbb/parallel_reduce.h"
34 #include "oneapi/tbb/parallel_invoke.h"
35 #include "oneapi/tbb/blocked_range.h"
36 #include "oneapi/tbb/tbb_allocator.h"
37 #include "oneapi/tbb/global_control.h"
38 #include "oneapi/tbb/cache_aligned_allocator.h"
39 // INFO: #include "oneapi/tbb/tick_count.h"
40 
41 #include <cstring>
42 #include <cstdio>
43 #include <vector>
44 #include <numeric>
45 #include <utility>
46 #include <atomic>
47 
48 //! \file conformance_enumerable_thread_specific.cpp
49 //! \brief Test for [tls.enumerable_thread_specific tls.flattened2d] specification
50 
51 //------------------------------------------------------------------------------------------------------
52 // Utility types/classes/functions
53 //------------------------------------------------------------------------------------------------------
54 
55 //! Minimum number of threads
56 static int MinThread = 1;
57 
58 //! Maximum number of threads
59 static int MaxThread = 4;
60 
61 static std::atomic<int> construction_counter;
62 static std::atomic<int> destruction_counter;
63 
64 #if TBB_USE_DEBUG
65 const int REPETITIONS = 4;
66 const int N = 10000;
67 const int RANGE_MIN=1000;
68 #else
69 const int REPETITIONS = 10;
70 const int N = 100000;
71 const int RANGE_MIN=10000;
72 #endif
73 const double EXPECTED_SUM = (REPETITIONS + 1) * N;
74 
75 //! A minimal class that occupies N bytes.
76 /** Defines default and copy constructor, and allows implicit operator&. Hides operator=. */
77 template<size_t N = oneapi::tbb::detail::max_nfs_size>
78 class minimal: utils::NoAssign {
79 private:
80     int my_value;
81     bool is_constructed;
82     char pad[N-sizeof(int) - sizeof(bool)];
83 public:
84     minimal() : utils::NoAssign(), my_value(0) { ++construction_counter; is_constructed = true; }
85     minimal( const minimal &m ) : utils::NoAssign(), my_value(m.my_value) { ++construction_counter; is_constructed = true; }
86     ~minimal() { ++destruction_counter; REQUIRE(is_constructed); is_constructed = false; }
87     void set_value( const int i ) { REQUIRE(is_constructed); my_value = i; }
88     int value( ) const { REQUIRE(is_constructed); return my_value; }
89 
90     bool operator==( const minimal& other ) { return my_value == other.my_value; }
91 };
92 
93 static size_t AlignMask = 0;  // set to cache-line-size - 1
94 
95 template<typename T>
96 T& check_alignment(T& t, const char *aname) {
97     if( !oneapi::tbb::detail::is_aligned(&t, AlignMask)) {
98         // TBB_REVAMP_TODO: previously was REPORT_ONCE
99         REPORT("alignment error with %s allocator (%x)\n", aname, (int)size_t(&t) & (AlignMask-1));
100     }
101     return t;
102 }
103 
104 template<typename T>
105 const T& check_alignment(const T& t, const char *aname) {
106     if( !oneapi::tbb::detail::is_aligned(&t, AlignMask)) {
107         // TBB_REVAMP_TODO: previously was REPORT_ONCE
108         REPORT("alignment error with %s allocator (%x)\n", aname, (int)size_t(&t) & (AlignMask-1));
109     }
110     return t;
111 }
112 
113 // Test constructors which throw.  If an ETS constructor throws before completion,
114 // the already-built objects are un-constructed.  Do not call the destructor if
115 // this occurs.
116 
117 static std::atomic<int> gThrowValue;
118 static int targetThrowValue = 3;
119 
120 class Thrower {
121 public:
122     Thrower() {
123 #if TBB_USE_EXCEPTIONS
124         if(++gThrowValue == targetThrowValue) {
125             throw std::bad_alloc();
126         }
127 #endif
128     }
129 };
130 
131 // MyThrower field of ThrowingConstructor will throw after a certain number of
132 // construction calls.  The constructor unwinder wshould unconstruct the instance
133 // of check_type<int> that was constructed just before.
134 class ThrowingConstructor {
135     CheckType<int> m_checktype;
136     Thrower m_throwing_field;
137 public:
138     int m_cnt;
139     ThrowingConstructor() : m_checktype(), m_throwing_field() { m_cnt = 0;}
140 
141     bool operator==( const ThrowingConstructor& other ) { return m_cnt == other.m_cnt; }
142 private:
143 };
144 
145 //
146 // A helper class that simplifies writing the tests since minimal does not
147 // define = or + operators.
148 //
149 
150 template< typename T >
151 struct test_helper {
152    static inline void init(T &e) { e = static_cast<T>(0); }
153    static inline void sum(T &e, const int addend ) { e += static_cast<T>(addend); }
154    static inline void sum(T &e, const double addend ) { e += static_cast<T>(addend); }
155    static inline void set(T &e, const int value ) { e = static_cast<T>(value); }
156    static inline double get(const T &e ) { return static_cast<double>(e); }
157 };
158 
159 template<size_t N>
160 struct test_helper<minimal<N> > {
161    static inline void init(minimal<N> &sum) { sum.set_value( 0 ); }
162    static inline void sum(minimal<N> &sum, const int addend ) { sum.set_value( sum.value() + addend); }
163    static inline void sum(minimal<N> &sum, const double addend ) { sum.set_value( sum.value() + static_cast<int>(addend)); }
164    static inline void sum(minimal<N> &sum, const minimal<N> &addend ) { sum.set_value( sum.value() + addend.value()); }
165    static inline void set(minimal<N> &v, const int value ) { v.set_value( static_cast<int>(value) ); }
166    static inline double get(const minimal<N> &sum ) { return static_cast<double>(sum.value()); }
167 };
168 
169 template<>
170 struct test_helper<ThrowingConstructor> {
171    static inline void init(ThrowingConstructor &sum) { sum.m_cnt = 0; }
172    static inline void sum(ThrowingConstructor &sum, const int addend ) { sum.m_cnt += addend; }
173    static inline void sum(ThrowingConstructor &sum, const double addend ) { sum.m_cnt += static_cast<int>(addend); }
174    static inline void sum(ThrowingConstructor &sum, const ThrowingConstructor &addend ) { sum.m_cnt += addend.m_cnt; }
175    static inline void set(ThrowingConstructor &v, const int value ) { v.m_cnt = static_cast<int>(value); }
176    static inline double get(const ThrowingConstructor &sum ) { return static_cast<double>(sum.m_cnt); }
177 };
178 
179 //! Tag class used to make certain constructors hard to invoke accidentally.
180 struct SecretTagType {} SecretTag;
181 
182 //// functors and routines for initialization and combine
183 
184 //! Counts instances of FunctorFinit
185 static std::atomic<int> FinitCounter;
186 
187 template <typename T, int Value>
188 struct FunctorFinit {
189     FunctorFinit( const FunctorFinit& ) {++FinitCounter;}
190     FunctorFinit( SecretTagType ) {++FinitCounter;}
191     ~FunctorFinit() {--FinitCounter;}
192     T operator()() { return Value; }
193 };
194 
195 template <int Value>
196 struct FunctorFinit<ThrowingConstructor,Value> {
197     FunctorFinit( const FunctorFinit& ) {++FinitCounter;}
198     FunctorFinit( SecretTagType ) {++FinitCounter;}
199     ~FunctorFinit() {--FinitCounter;}
200     ThrowingConstructor operator()() { ThrowingConstructor temp; temp.m_cnt = Value; return temp; }
201 };
202 
203 template <size_t N, int Value>
204 struct FunctorFinit<minimal<N>,Value> {
205     FunctorFinit( const FunctorFinit& ) {++FinitCounter;}
206     FunctorFinit( SecretTagType ) {++FinitCounter;}
207     ~FunctorFinit() {--FinitCounter;}
208     minimal<N> operator()() {
209         minimal<N> result;
210         result.set_value( Value );
211         return result;
212     }
213 };
214 
215 // Addition
216 
217 template <typename T>
218 struct FunctorAddCombineRef {
219     T operator()(const T& left, const T& right) const {
220         return left+right;
221     }
222 };
223 
224 template <size_t N>
225 struct FunctorAddCombineRef<minimal<N> > {
226     minimal<N> operator()(const minimal<N>& left, const minimal<N>& right) const {
227         minimal<N> result;
228         result.set_value( left.value() + right.value() );
229         return result;
230     }
231 };
232 
233 template <>
234 struct FunctorAddCombineRef<ThrowingConstructor> {
235     ThrowingConstructor operator()(const ThrowingConstructor& left, const ThrowingConstructor& right) const {
236         ThrowingConstructor result;
237         result.m_cnt = ( left.m_cnt + right.m_cnt );
238         return result;
239     }
240 };
241 
242 template <typename T>
243 struct FunctorAddCombine {
244     T operator()(T left, T right ) const {
245         return FunctorAddCombineRef<T>()( left, right );
246     }
247 };
248 
249 template <typename T>
250 T FunctionAddByRef( const T &left, const T &right) {
251     return FunctorAddCombineRef<T>()( left, right );
252 }
253 
254 template <typename T>
255 T FunctionAdd( T left, T right) { return FunctionAddByRef(left,right); }
256 
257 template <typename T>
258 class Accumulator {
259 public:
260     Accumulator(T& result) : my_result(result) {}
261     Accumulator(const Accumulator& other) : my_result(other.my_result) {}
262     Accumulator& operator=(const Accumulator& other) {
263         test_helper<T>::set(my_result, test_helper<T>::get(other));
264         return *this;
265     }
266     void operator()(const T& new_bit) { test_helper<T>::sum(my_result, new_bit); }
267 private:
268     T& my_result;
269 };
270 
271 template <typename T>
272 class ClearingAccumulator {
273 public:
274     ClearingAccumulator(T& result) : my_result(result) {}
275     ClearingAccumulator(const ClearingAccumulator& other) : my_result(other.my_result) {}
276     ClearingAccumulator& operator=(const ClearingAccumulator& other) {
277         test_helper<T>::set(my_result, test_helper<T>::get(other));
278         return *this;
279     }
280     void operator()(T& new_bit) {
281         test_helper<T>::sum(my_result, new_bit);
282         test_helper<T>::init(new_bit);
283     }
284     static void AssertClean(const T& thread_local_value) {
285         T zero;
286         test_helper<T>::init(zero);
287         REQUIRE_MESSAGE(test_helper<T>::get(thread_local_value)==test_helper<T>::get(zero),
288                "combine_each does not allow to modify thread local values?");
289     }
290 private:
291     T& my_result;
292 };
293 
294 //// end functors and routines
295 
296 //------------------------------------------------------------------------------------------------------
297 // Tests for tests cases
298 //------------------------------------------------------------------------------------------------------
299 
300 template <typename T, template<class> class Allocator>
301 class parallel_scalar_body: utils::NoAssign {
302     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
303     ets_type &sums;
304     const char* allocator_name;
305 
306 public:
307 
308     parallel_scalar_body ( ets_type &_sums, const char *alloc_name ) : sums(_sums), allocator_name(alloc_name) { }
309 
310     void operator()( const oneapi::tbb::blocked_range<int> &r ) const {
311         for (int i = r.begin(); i != r.end(); ++i)
312             test_helper<T>::sum( check_alignment(sums.local(),allocator_name), 1 );
313     }
314 
315 };
316 
317 template< typename T, template<class> class Allocator>
318 void run_parallel_scalar_tests_nocombine(const char* /* test_name */, const char *allocator_name) {
319 
320     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
321 
322     Checker<T> my_check;
323     gThrowValue = 0;
324     {
325         // We assume that static_sums zero-initialized or has a default constructor that zeros it.
326         ets_type static_sums = ets_type( T() );
327 
328         T exemplar;
329         test_helper<T>::init(exemplar);
330 
331         for (int p = MinThread; p <= MaxThread; ++p) {
332             // INFO("Testing parallel %s with allocator %s on %d thread(s)... ", test_name, allocator_name, p);
333             oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
334 
335             // INFO: oneapi::tbb::tick_count t0;
336 
337             T iterator_sum;
338             test_helper<T>::init(iterator_sum);
339 
340             T finit_ets_sum;
341             test_helper<T>::init(finit_ets_sum);
342 
343             T const_iterator_sum;
344             test_helper<T>::init(const_iterator_sum);
345 
346             T range_sum;
347             test_helper<T>::init(range_sum);
348 
349             T const_range_sum;
350             test_helper<T>::init(const_range_sum);
351 
352             T cconst_sum;
353             test_helper<T>::init(cconst_sum);
354 
355             T assign_sum;
356             test_helper<T>::init(assign_sum);
357 
358             T cassgn_sum;
359             test_helper<T>::init(cassgn_sum);
360             T non_cassgn_sum;
361             test_helper<T>::init(non_cassgn_sum);
362 
363             T static_sum;
364             test_helper<T>::init(static_sum);
365 
366             for (int t = -1; t < REPETITIONS; ++t) {
367                 // INFO: if (Verbose && t == 0) t0 = oneapi::tbb::tick_count::now();
368 
369                 static_sums.clear();
370 
371                 ets_type sums(exemplar);
372                 FunctorFinit<T,0> my_finit(SecretTag);
373                 ets_type finit_ets(my_finit);
374 
375                 REQUIRE( sums.empty());
376                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N, RANGE_MIN ), parallel_scalar_body<T,Allocator>( sums, allocator_name ) );
377                 REQUIRE( !sums.empty());
378 
379                 REQUIRE( finit_ets.empty());
380                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N, RANGE_MIN ), parallel_scalar_body<T,Allocator>( finit_ets, allocator_name ) );
381                 REQUIRE( !finit_ets.empty());
382 
383                 REQUIRE(static_sums.empty());
384                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N, RANGE_MIN ), parallel_scalar_body<T,Allocator>( static_sums, allocator_name ) );
385                 REQUIRE( !static_sums.empty());
386 
387                 // use iterator
388                 typename ets_type::size_type size = 0;
389                 for ( typename ets_type::iterator i = sums.begin(); i != sums.end(); ++i ) {
390                      ++size;
391                      test_helper<T>::sum(iterator_sum, *i);
392                 }
393                 REQUIRE( sums.size() == size);
394 
395                 // use const_iterator
396                 for ( typename ets_type::const_iterator i = sums.begin(); i != sums.end(); ++i ) {
397                      test_helper<T>::sum(const_iterator_sum, *i);
398                 }
399 
400                 // use range_type
401                 typename ets_type::range_type r = sums.range();
402                 for ( typename ets_type::range_type::const_iterator i = r.begin(); i != r.end(); ++i ) {
403                      test_helper<T>::sum(range_sum, *i);
404                 }
405 
406                 // use const_range_type
407                 const ets_type& csums = sums;
408                 typename ets_type::const_range_type cr = csums.range();
409                 for ( typename ets_type::const_range_type::iterator i = cr.begin(); i != cr.end(); ++i ) {
410                      test_helper<T>::sum(const_range_sum, *i);
411                 }
412 
413                 // test copy constructor, with TLS-cached locals
414                 typedef typename oneapi::tbb::enumerable_thread_specific<T, Allocator<T>, oneapi::tbb::ets_key_per_instance> cached_ets_type;
415 
416                 cached_ets_type cconst(sums);
417                 oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>(0, N, RANGE_MIN), [&]( const oneapi::tbb::blocked_range<int>& ) {
418                     bool exists = false;
419                     T& ref = cconst.local(exists);
420                     CHECK((exists || ref == T()));
421                 } );
422                 cached_ets_type cconst_to_assign1 = cconst;
423                 cached_ets_type cconst_to_assign2;
424                 cconst_to_assign2 = std::move(cconst_to_assign1);
425                 REQUIRE(cconst_to_assign2.size() == cconst.size());
426 
427                 for ( typename cached_ets_type::const_iterator i = cconst.begin(); i != cconst.end(); ++i ) {
428                      test_helper<T>::sum(cconst_sum, *i);
429                 }
430 
431                 // test assignment
432                 ets_type assigned;
433                 assigned = sums;
434 
435                 for ( typename ets_type::const_iterator i = assigned.begin(); i != assigned.end(); ++i ) {
436                      test_helper<T>::sum(assign_sum, *i);
437                 }
438 
439                 // test assign to and from cached locals
440                 cached_ets_type cassgn;
441                 cassgn = sums;
442                 for ( typename cached_ets_type::const_iterator i = cassgn.begin(); i != cassgn.end(); ++i ) {
443                      test_helper<T>::sum(cassgn_sum, *i);
444                 }
445 
446                 ets_type non_cassgn;
447                 non_cassgn = cassgn;
448                 for ( typename ets_type::const_iterator i = non_cassgn.begin(); i != non_cassgn.end(); ++i ) {
449                      test_helper<T>::sum(non_cassgn_sum, *i);
450                 }
451 
452                 // test finit-initialized ets
453                 for(typename ets_type::const_iterator i = finit_ets.begin(); i != finit_ets.end(); ++i) {
454                     test_helper<T>::sum(finit_ets_sum, *i);
455                 }
456 
457                 // test static ets
458                 for(typename ets_type::const_iterator i = static_sums.begin(); i != static_sums.end(); ++i) {
459                     test_helper<T>::sum(static_sum, *i);
460                 }
461 
462             }
463 
464             REQUIRE(EXPECTED_SUM == test_helper<T>::get(iterator_sum));
465             REQUIRE(EXPECTED_SUM == test_helper<T>::get(const_iterator_sum));
466             REQUIRE(EXPECTED_SUM == test_helper<T>::get(range_sum));
467             REQUIRE(EXPECTED_SUM == test_helper<T>::get(const_range_sum));
468 
469             REQUIRE(EXPECTED_SUM == test_helper<T>::get(cconst_sum));
470             REQUIRE(EXPECTED_SUM == test_helper<T>::get(assign_sum));
471             REQUIRE(EXPECTED_SUM == test_helper<T>::get(cassgn_sum));
472             REQUIRE(EXPECTED_SUM == test_helper<T>::get(non_cassgn_sum));
473             REQUIRE(EXPECTED_SUM == test_helper<T>::get(finit_ets_sum));
474             REQUIRE(EXPECTED_SUM == test_helper<T>::get(static_sum));
475 
476             // INFO("done\nparallel %s, %d, %g, %g\n", test_name, p, test_helper<T>::get(iterator_sum), ( oneapi::tbb::tick_count::now() - t0).seconds());
477         }
478     }  // Checker block
479 }
480 
481 template< typename T, template<class> class Allocator>
482 void run_parallel_scalar_tests(const char* test_name, const char* allocator_name) {
483 
484     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
485     bool exception_caught = false;
486 
487     // We assume that static_sums zero-initialized or has a default constructor that zeros it.
488     ets_type static_sums = ets_type( T() );
489 
490     T exemplar;
491     test_helper<T>::init(exemplar);
492 
493     int test_throw_count = 10;
494     // the test will be performed repeatedly until it does not throw.  For non-throwing types
495     // this means once; for the throwing type test it may loop two or three times.  The
496     // value of targetThrowValue will determine when and if the test will throw.
497     do {
498         targetThrowValue = test_throw_count;  // keep testing until we get no exception
499         exception_caught = false;
500 #if TBB_USE_EXCEPTIONS
501         try {
502 #endif
503             run_parallel_scalar_tests_nocombine<T,Allocator>(test_name, allocator_name);
504 #if TBB_USE_EXCEPTIONS
505         }
506         catch(...) {
507             // INFO("Exception caught %d\n", targetThrowValue);
508         }
509 #endif
510         for (int p = MinThread; p <= MaxThread; ++p) {
511             // INFO("Testing parallel %s with allocator %s on %d thread(s)... ", test_name, allocator_name, p);
512             oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
513 
514             // INFO: oneapi::tbb::tick_count t0;
515 
516             gThrowValue = 0;
517 
518             T combine_sum;
519             test_helper<T>::init(combine_sum);
520 
521             T combine_ref_sum;
522             test_helper<T>::init(combine_ref_sum);
523 
524             T accumulator_sum;
525             test_helper<T>::init(accumulator_sum);
526 
527             T static_sum;
528             test_helper<T>::init(static_sum);
529 
530             T clearing_accumulator_sum;
531             test_helper<T>::init(clearing_accumulator_sum);
532 
533             {
534                 Checker<T> my_check;
535 #if TBB_USE_EXCEPTIONS
536                 try
537 #endif
538                 {
539                     for (int t = -1; t < REPETITIONS; ++t) {
540                         // INFO: if (Verbose && t == 0) t0 = oneapi::tbb::tick_count::now();
541 
542                         static_sums.clear();
543 
544                         ets_type sums(exemplar);
545 
546                         REQUIRE( sums.empty());
547                         oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N, RANGE_MIN ),
548                                 parallel_scalar_body<T,Allocator>( sums, allocator_name ) );
549                         REQUIRE( !sums.empty());
550 
551                         REQUIRE(static_sums.empty());
552                         oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int>( 0, N, RANGE_MIN ),
553                                 parallel_scalar_body<T,Allocator>( static_sums, allocator_name ) );
554                         REQUIRE( !static_sums.empty());
555 
556                         // Use combine
557                         test_helper<T>::sum(combine_sum, sums.combine(FunctionAdd<T>));
558                         test_helper<T>::sum(combine_ref_sum, sums.combine(FunctionAddByRef<T>));
559                         test_helper<T>::sum(static_sum, static_sums.combine(FunctionAdd<T>));
560 
561                         // Accumulate with combine_each
562                         sums.combine_each(Accumulator<T>(accumulator_sum));
563                         // Accumulate and clear thread-local values
564                         sums.combine_each(ClearingAccumulator<T>(clearing_accumulator_sum));
565                         // Check that the values were cleared
566                         sums.combine_each(ClearingAccumulator<T>::AssertClean);
567                     }
568                 }
569 #if TBB_USE_EXCEPTIONS
570                 catch(...) {
571                     // INFO("Exception caught %d\n", targetThrowValue);
572                     exception_caught = true;
573                 }
574 #endif
575             }
576 
577             REQUIRE((EXPECTED_SUM == test_helper<T>::get(combine_sum) || exception_caught));
578             REQUIRE((EXPECTED_SUM == test_helper<T>::get(combine_ref_sum) || exception_caught));
579             REQUIRE((EXPECTED_SUM == test_helper<T>::get(static_sum) || exception_caught));
580             REQUIRE((EXPECTED_SUM == test_helper<T>::get(accumulator_sum) || exception_caught));
581             REQUIRE((EXPECTED_SUM == test_helper<T>::get(clearing_accumulator_sum) || exception_caught));
582 
583             // INFO("done\nparallel combine %s, %d, %g, %g\n", test_name, p, test_helper<T>::get(combine_sum), ( oneapi::tbb::tick_count::now() - t0).seconds());
584         }  // MinThread .. MaxThread
585         test_throw_count += 10;  // keep testing until we don't get an exception
586     } while (exception_caught && test_throw_count < 200);
587     REQUIRE_MESSAGE(!exception_caught, "No non-exception test completed");
588 }
589 
590 template <typename T, template<class> class Allocator>
591 class parallel_vector_for_body: utils::NoAssign {
592     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
593     typedef oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type> > ets_type;
594     ets_type &locals;
595     const char *allocator_name;
596 
597 public:
598 
599     parallel_vector_for_body ( ets_type &_locals, const char *aname ) : locals(_locals), allocator_name(aname) { }
600 
601     void operator()( const oneapi::tbb::blocked_range<int> &r ) const {
602         T one;
603         test_helper<T>::set(one, 1);
604 
605         for (int i = r.begin(); i < r.end(); ++i) {
606             check_alignment(locals.local(),allocator_name).push_back( one );
607         }
608     }
609 
610 };
611 
612 template <typename R, typename T>
613 struct parallel_vector_reduce_body {
614 
615     T sum;
616     size_t count;
617     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
618 
619     parallel_vector_reduce_body ( ) : count(0) { test_helper<T>::init(sum); }
620     parallel_vector_reduce_body ( parallel_vector_reduce_body<R, T> &, oneapi::tbb::split ) : count(0) {  test_helper<T>::init(sum); }
621 
622     void operator()( const R &r ) {
623         for (typename R::iterator ri = r.begin(); ri != r.end(); ++ri) {
624             const container_type &v = *ri;
625             ++count;
626             for (typename container_type::const_iterator vi = v.begin(); vi != v.end(); ++vi) {
627                 test_helper<T>::sum(sum, *vi);
628             }
629         }
630     }
631 
632     void join( const parallel_vector_reduce_body &b ) {
633         test_helper<T>::sum(sum,b.sum);
634         count += b.count;
635     }
636 
637 };
638 
639 template< typename T, template<class> class Allocator>
640 void run_parallel_vector_tests(const char* /* test_name */, const char *allocator_name) {
641     // INFO: oneapi::tbb::tick_count t0;
642     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
643     typedef oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type> > ets_type;
644 
645     for (int p = MinThread; p <= MaxThread; ++p) {
646         // INFO("Testing parallel %s with allocator %s on %d thread(s)... ", test_name, allocator_name, p);
647         oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
648 
649         T sum;
650         test_helper<T>::init(sum);
651 
652         for (int t = -1; t < REPETITIONS; ++t) {
653             // INFO: if (Verbose && t == 0) t0 = oneapi::tbb::tick_count::now();
654             ets_type vs;
655 
656             REQUIRE( vs.empty() );
657             oneapi::tbb::parallel_for( oneapi::tbb::blocked_range<int> (0, N, RANGE_MIN),
658                                parallel_vector_for_body<T,Allocator>( vs, allocator_name ) );
659             REQUIRE( !vs.empty() );
660 
661             // copy construct
662             ets_type vs2(vs); // this causes an assertion failure, related to allocators...
663 
664             // assign
665             ets_type vs3;
666             vs3 = vs;
667 
668             parallel_vector_reduce_body< typename ets_type::const_range_type, T > pvrb;
669             oneapi::tbb::parallel_reduce ( vs.range(1), pvrb );
670 
671             test_helper<T>::sum(sum, pvrb.sum);
672 
673             REQUIRE( vs.size() == pvrb.count );
674             REQUIRE( vs2.size() == pvrb.count );
675             REQUIRE( vs3.size() == pvrb.count );
676 
677             oneapi::tbb::flattened2d<ets_type> fvs = flatten2d(vs);
678             size_t ccount = fvs.size();
679             REQUIRE( ccount == size_t(N) );
680             size_t elem_cnt = 0;
681             typename oneapi::tbb::flattened2d<ets_type>::iterator it;
682             auto it2(it);
683             it = fvs.begin();
684             REQUIRE(it != it2);
685             typename oneapi::tbb::flattened2d<ets_type>::iterator it3;
686             typename oneapi::tbb::flattened2d<ets_type>::const_iterator cit = fvs.begin();
687             it3 = cit;
688             REQUIRE(it3 == cit);
689             REQUIRE(it3.operator->() == &(*it3));
690 
691             for(typename oneapi::tbb::flattened2d<ets_type>::const_iterator i = fvs.begin(); i != fvs.end(); ++i) {
692                 ++elem_cnt;
693             };
694             REQUIRE( ccount == elem_cnt );
695 
696             elem_cnt = 0;
697             for(typename oneapi::tbb::flattened2d<ets_type>::iterator i = fvs.begin(); i != fvs.end(); i++) {
698                 ++elem_cnt;
699             };
700             REQUIRE( ccount == elem_cnt );
701 
702             // Test the ETS constructor with multiple args
703             T minus_one;
704             test_helper<T>::set(minus_one, -1);
705             // Set ETS to construct "local" vectors pre-occupied with 25 "minus_one"s
706             // Cast 25 to size_type to prevent Intel Compiler SFINAE compilation issues with gcc 5.
707             ets_type vvs( typename container_type::size_type(25), minus_one, oneapi::tbb::tbb_allocator<T>() );
708             REQUIRE( vvs.empty() );
709             oneapi::tbb::parallel_for ( oneapi::tbb::blocked_range<int> (0, N, RANGE_MIN), parallel_vector_for_body<T,Allocator>( vvs, allocator_name ) );
710             REQUIRE( !vvs.empty() );
711 
712             parallel_vector_reduce_body< typename ets_type::const_range_type, T > pvrb2;
713             oneapi::tbb::parallel_reduce ( vvs.range(1), pvrb2 );
714             REQUIRE( pvrb2.count == vvs.size() );
715             REQUIRE( test_helper<T>::get(pvrb2.sum) == N-pvrb2.count*25 );
716 
717             oneapi::tbb::flattened2d<ets_type> fvvs = flatten2d(vvs);
718             ccount = fvvs.size();
719             REQUIRE( ccount == N+pvrb2.count*25 );
720         }
721 
722         double result_value = test_helper<T>::get(sum);
723         REQUIRE( EXPECTED_SUM == result_value);
724         // INFO("done\nparallel %s, %d, %g, %g\n", test_name, p, result_value, ( oneapi::tbb::tick_count::now() - t0).seconds());
725     }
726 }
727 
728 template<typename T, template<class> class Allocator>
729 void run_cross_type_vector_tests(const char* /* test_name */) {
730     // INFO: oneapi::tbb::tick_count t0;
731     const char* allocator_name = "default";
732     typedef std::vector<T, oneapi::tbb::tbb_allocator<T> > container_type;
733 
734     for (int p = MinThread; p <= MaxThread; ++p) {
735         // INFO("Testing parallel %s on %d thread(s)... ", test_name, p);
736         oneapi::tbb::global_control gc(oneapi::tbb::global_control::max_allowed_parallelism, p);
737 
738         T sum;
739         test_helper<T>::init(sum);
740 
741         for (int t = -1; t < REPETITIONS; ++t) {
742             // INFO: if (Verbose && t == 0) t0 = oneapi::tbb::tick_count::now();
743             typedef typename oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type>, oneapi::tbb::ets_no_key > ets_nokey_type;
744             typedef typename oneapi::tbb::enumerable_thread_specific< container_type, Allocator<container_type>, oneapi::tbb::ets_key_per_instance > ets_tlskey_type;
745             ets_nokey_type vs;
746 
747             REQUIRE( vs.empty());
748             oneapi::tbb::parallel_for ( oneapi::tbb::blocked_range<int> (0, N, RANGE_MIN), parallel_vector_for_body<T, Allocator>( vs, allocator_name ) );
749             REQUIRE( !vs.empty());
750 
751             // copy construct
752             ets_tlskey_type vs2(vs);
753 
754             // assign
755             ets_nokey_type vs3;
756             vs3 = vs2;
757 
758             parallel_vector_reduce_body< typename ets_nokey_type::const_range_type, T > pvrb;
759             oneapi::tbb::parallel_reduce ( vs3.range(1), pvrb );
760 
761             test_helper<T>::sum(sum, pvrb.sum);
762 
763             REQUIRE( vs3.size() == pvrb.count);
764 
765             oneapi::tbb::flattened2d<ets_nokey_type> fvs = flatten2d(vs3);
766             size_t ccount = fvs.size();
767             size_t elem_cnt = 0;
768             for(typename oneapi::tbb::flattened2d<ets_nokey_type>::const_iterator i = fvs.begin(); i != fvs.end(); ++i) {
769                 ++elem_cnt;
770             };
771             REQUIRE(ccount == elem_cnt);
772 
773             elem_cnt = 0;
774             for(typename oneapi::tbb::flattened2d<ets_nokey_type>::iterator i = fvs.begin(); i != fvs.end(); ++i) {
775                 ++elem_cnt;
776             };
777             REQUIRE(ccount == elem_cnt);
778 
779             oneapi::tbb::flattened2d<ets_nokey_type> fvs2 = flatten2d(vs3, vs3.begin(), std::next(vs3.begin()));
780             REQUIRE(std::distance(fvs2.begin(), fvs2.end()) == vs3.begin()->size());
781             const oneapi::tbb::flattened2d<ets_nokey_type>& cfvs2(fvs2);
782             REQUIRE(std::distance(cfvs2.begin(), cfvs2.end()) == vs3.begin()->size());
783         }
784 
785         double result_value = test_helper<T>::get(sum);
786         REQUIRE( EXPECTED_SUM == result_value);
787         // INFO("done\nparallel %s, %d, %g, %g\n", test_name, p, result_value, ( oneapi::tbb::tick_count::now() - t0).seconds());
788     }
789 }
790 
791 template< typename T >
792 void run_serial_scalar_tests(const char* /* test_name */) {
793     // INFO: oneapi::tbb::tick_count t0;
794     T sum;
795     test_helper<T>::init(sum);
796 
797     // INFO("Testing serial %s... ", test_name);
798     for (int t = -1; t < REPETITIONS; ++t) {
799         // INFO: if (Verbose && t == 0) t0 = oneapi::tbb::tick_count::now();
800         for (int i = 0; i < N; ++i) {
801             test_helper<T>::sum(sum,1);
802         }
803     }
804 
805     double result_value = test_helper<T>::get(sum);
806     REQUIRE( EXPECTED_SUM == result_value);
807     // INFO("done\nserial %s, 0, %g, %g\n", test_name, result_value, ( oneapi::tbb::tick_count::now() - t0).seconds());
808 }
809 
810 template< typename T >
811 void run_serial_vector_tests(const char* /* test_name */) {
812     // INFO: oneapi::tbb::tick_count t0;
813     T sum;
814     test_helper<T>::init(sum);
815     T one;
816     test_helper<T>::set(one, 1);
817 
818     // INFO("Testing serial %s... ", test_name);
819     for (int t = -1; t < REPETITIONS; ++t) {
820         // INFO: if (Verbose && t == 0) t0 = oneapi::tbb::tick_count::now();
821         std::vector<T, oneapi::tbb::tbb_allocator<T> > v;
822         for (int i = 0; i < N; ++i) {
823             v.push_back( one );
824         }
825         for (typename std::vector<T, oneapi::tbb::tbb_allocator<T> >::const_iterator i = v.begin(); i != v.end(); ++i)
826             test_helper<T>::sum(sum, *i);
827     }
828 
829     double result_value = test_helper<T>::get(sum);
830     REQUIRE( EXPECTED_SUM == result_value);
831     // INFO("done\nserial %s, 0, %g, %g\n", test_name, result_value, ( oneapi::tbb::tick_count::now() - t0).seconds());
832 }
833 
834 const size_t line_size = oneapi::tbb::detail::max_nfs_size;
835 
836 void run_reference_check() {
837     run_serial_scalar_tests<int>("int");
838     run_serial_scalar_tests<double>("double");
839     run_serial_scalar_tests<minimal<> >("minimal<>");
840     run_serial_vector_tests<int>("std::vector<int, oneapi::tbb::tbb_allocator<int> >");
841     run_serial_vector_tests<double>("std::vector<double, oneapi::tbb::tbb_allocator<double> >");
842 }
843 
844 template<template<class>class Allocator>
845 void run_parallel_tests(const char *allocator_name) {
846     run_parallel_scalar_tests<int, Allocator>("int",allocator_name);
847     run_parallel_scalar_tests<double, Allocator>("double",allocator_name);
848     run_parallel_scalar_tests_nocombine<minimal<>,Allocator>("minimal<>",allocator_name);
849     run_parallel_scalar_tests<ThrowingConstructor, Allocator>("ThrowingConstructor", allocator_name);
850     run_parallel_vector_tests<int, Allocator>("std::vector<int, oneapi::tbb::tbb_allocator<int> >",allocator_name);
851     run_parallel_vector_tests<double, Allocator>("std::vector<double, oneapi::tbb::tbb_allocator<double> >",allocator_name);
852 }
853 
854 void run_cross_type_tests() {
855     // cross-type scalar tests are part of run_parallel_scalar_tests_nocombine
856     run_cross_type_vector_tests<int, oneapi::tbb::tbb_allocator>("std::vector<int, oneapi::tbb::tbb_allocator<int> >");
857     run_cross_type_vector_tests<double, oneapi::tbb::tbb_allocator>("std::vector<double, oneapi::tbb::tbb_allocator<double> >");
858 }
859 
860 template<typename T, template<class> class Allocator, typename Init>
861 oneapi::tbb::enumerable_thread_specific<T,Allocator<T> > MakeETS( Init init ) {
862     return oneapi::tbb::enumerable_thread_specific<T,Allocator<T> >(init);
863 }
864 // In some GCC versions, parameter packs in lambdas might cause compile errors
865 template<typename ETS, typename... P>
866 struct MakeETS_Functor {
867     ETS operator()( typename std::decay<P>::type&&... params ) {
868         return ETS(std::move(params)...);
869     }
870 };
871 template<typename T, template<class> class Allocator, typename... P>
872 oneapi::tbb::enumerable_thread_specific<T,Allocator<T> > MakeETS( oneapi::tbb::detail::stored_pack<P...> pack ) {
873     typedef oneapi::tbb::enumerable_thread_specific<T,Allocator<T> > result_type;
874     return oneapi::tbb::detail::call_and_return< result_type >(
875         MakeETS_Functor<result_type,P...>(), std::move(pack)
876     );
877 }
878 
879 template<typename T, template<class> class Allocator, typename InitSrc, typename InitDst, typename Validator>
880 void ets_copy_assign_test( InitSrc init1, InitDst init2, Validator check, const char *allocator_name ) {
881     typedef oneapi::tbb::enumerable_thread_specific<T, Allocator<T> > ets_type;
882 
883     // Create the source instance
884     const ets_type& cref_binder = MakeETS<T, Allocator>(init1);
885     ets_type& source = const_cast<ets_type&>(cref_binder);
886     check(check_alignment(source.local(),allocator_name));
887 
888     // Test copy construction
889     bool existed = false;
890     ets_type copy(source);
891     check(check_alignment(copy.local(existed),allocator_name));
892     REQUIRE_MESSAGE(existed, "Local data not created by ETS copy constructor");
893     copy.clear();
894     check(check_alignment(copy.local(),allocator_name));
895 
896     // Test assignment
897     existed = false;
898     ets_type assign(init2);
899     assign = source;
900     check(check_alignment(assign.local(existed),allocator_name));
901     REQUIRE_MESSAGE(existed, "Local data not created by ETS assignment");
902     assign.clear();
903     check(check_alignment(assign.local(),allocator_name));
904 
905     // Create the source instance
906     ets_type&& rvref_binder = MakeETS<T, Allocator>(init1);
907     check(check_alignment(rvref_binder.local(),allocator_name));
908 
909     // Test move construction
910     existed = false;
911     ets_type moved(rvref_binder);
912     check(check_alignment(moved.local(existed),allocator_name));
913     REQUIRE_MESSAGE(existed, "Local data not created by ETS move constructor");
914     moved.clear();
915     check(check_alignment(moved.local(),allocator_name));
916 
917     // Test assignment
918     existed = false;
919     ets_type move_assign(init2);
920     move_assign = std::move(moved);
921     check(check_alignment(move_assign.local(existed),allocator_name));
922     REQUIRE_MESSAGE(existed, "Local data not created by ETS move assignment");
923     move_assign.clear();
924     check(check_alignment(move_assign.local(),allocator_name));
925 }
926 
927 template<typename T, int Expected>
928 struct Validator {
929     void operator()( const T& value ) {
930         REQUIRE(test_helper<T>::get(value) == Expected);
931     }
932     void operator()( const std::pair<int,T>& value ) {
933         REQUIRE(value.first > 0);
934         REQUIRE(test_helper<T>::get(value.second) == Expected*value.first);
935     }
936 };
937 
938 template <typename T, template<class> class Allocator>
939 void run_assign_and_copy_constructor_test(const char* /* test_name */, const char *allocator_name) {
940     // INFO("Testing assignment and copy construction for %s with allocator %s\n", test_name, allocator_name);
941     #define EXPECTED 3142
942 
943     // test with exemplar initializer
944     T src_init;
945     test_helper<T>::set(src_init,EXPECTED);
946     T other_init;
947     test_helper<T>::init(other_init);
948     ets_copy_assign_test<T, Allocator>(src_init, other_init, Validator<T,EXPECTED>(), allocator_name);
949 
950     // test with function initializer
951     FunctorFinit<T,EXPECTED> src_finit(SecretTag);
952     FunctorFinit<T,0> other_finit(SecretTag);
953     ets_copy_assign_test<T, Allocator>(src_finit, other_finit, Validator<T,EXPECTED>(), allocator_name);
954 
955     // test with multi-argument "emplace" initializer
956     // The arguments are wrapped into oneapi::tbb::internal::stored_pack to avoid variadic templates in ets_copy_assign_test.
957     test_helper<T>::set(src_init,EXPECTED*17);
958     ets_copy_assign_test< std::pair<int,T>, Allocator>(oneapi::tbb::detail::save_pack(17,src_init), std::make_pair(-1,T()), Validator<T,EXPECTED>(), allocator_name);
959     #undef EXPECTED
960 }
961 
962 template< template<class> class Allocator>
963 void run_assignment_and_copy_constructor_tests(const char* allocator_name) {
964     // INFO("Running assignment and copy constructor tests\n");
965     run_assign_and_copy_constructor_test<int, Allocator>("int", allocator_name);
966     run_assign_and_copy_constructor_test<double, Allocator>("double", allocator_name);
967     // Try class sizes that are close to a cache line in size, in order to check padding calculations.
968     run_assign_and_copy_constructor_test<minimal<line_size-1>, Allocator >("minimal<line_size-1>", allocator_name);
969     run_assign_and_copy_constructor_test<minimal<line_size>, Allocator >("minimal<line_size>", allocator_name);
970     run_assign_and_copy_constructor_test<minimal<line_size+1>, Allocator >("minimal<line_size+1>", allocator_name);
971     REQUIRE(FinitCounter==0);
972 }
973 
974 // Class with no default constructor
975 class HasNoDefaultConstructor {
976     HasNoDefaultConstructor();
977 public:
978     HasNoDefaultConstructor( SecretTagType ) {}
979 };
980 // Initialization functor for HasNoDefaultConstructor
981 struct HasNoDefaultConstructorFinit {
982     HasNoDefaultConstructor operator()() {
983         return HasNoDefaultConstructor(SecretTag);
984     }
985 };
986 // Combine functor for HasNoDefaultConstructor
987 struct HasNoDefaultConstructorCombine {
988     HasNoDefaultConstructor operator()( HasNoDefaultConstructor, HasNoDefaultConstructor ) {
989         return HasNoDefaultConstructor(SecretTag);
990     }
991 };
992 
993 // Class that only has a constructor with multiple parameters and a move constructor
994 class HasSpecialAndMoveCtor : utils::NoCopy {
995     HasSpecialAndMoveCtor();
996 public:
997     HasSpecialAndMoveCtor( SecretTagType, size_t = size_t(0), const char* = "" ) {}
998     HasSpecialAndMoveCtor( HasSpecialAndMoveCtor&& ) {}
999 };
1000 
1001 // No-op combine-each functor
1002 template<typename V>
1003 struct EmptyCombineEach {
1004     void operator()( const V& ) { }
1005 };
1006 
1007 //! Test situations where only default constructor or copy constructor is required.
1008 template<template<class> class Allocator>
1009 void TestInstantiation(const char* /* allocator_name */) {
1010     // INFO("TestInstantiation<%s>\n", allocator_name);
1011     // Test instantiation is possible when copy constructor is not required.
1012     oneapi::tbb::enumerable_thread_specific<utils::NoCopy, Allocator<utils::NoCopy> > ets1;
1013     ets1.local();
1014     ets1.combine_each(EmptyCombineEach<utils::NoCopy>());
1015 
1016     // Test instantiation when default constructor is not required, because exemplar is provided.
1017     HasNoDefaultConstructor x(SecretTag);
1018     oneapi::tbb::enumerable_thread_specific<HasNoDefaultConstructor, Allocator<HasNoDefaultConstructor> > ets2(x);
1019     ets2.local();
1020     ets2.combine(HasNoDefaultConstructorCombine());
1021 
1022     // Test instantiation when default constructor is not required, because init function is provided.
1023     HasNoDefaultConstructorFinit f;
1024     oneapi::tbb::enumerable_thread_specific<HasNoDefaultConstructor, Allocator<HasNoDefaultConstructor> > ets3(f);
1025     ets3.local();
1026     ets3.combine(HasNoDefaultConstructorCombine());
1027 
1028     // Test instantiation with multiple arguments
1029     oneapi::tbb::enumerable_thread_specific<HasSpecialAndMoveCtor, Allocator<HasSpecialAndMoveCtor> > ets4(SecretTag, 0x42, "meaningless");
1030     ets4.local();
1031     ets4.combine_each(EmptyCombineEach<HasSpecialAndMoveCtor>());
1032     // Test instantiation with one argument that should however use the variadic constructor
1033     oneapi::tbb::enumerable_thread_specific<HasSpecialAndMoveCtor, Allocator<HasSpecialAndMoveCtor> > ets5(SecretTag);
1034     ets5.local();
1035     ets5.combine_each(EmptyCombineEach<HasSpecialAndMoveCtor>());
1036     // Test that move operations do not impose extra requirements
1037     // Default allocator is used. If it does not match Allocator, there will be elementwise move
1038     oneapi::tbb::enumerable_thread_specific<HasSpecialAndMoveCtor> ets6( std::move(ets4) );
1039     ets6.combine_each(EmptyCombineEach<HasSpecialAndMoveCtor>());
1040     ets6 = std::move(ets5);
1041 }
1042 
1043 void TestMemberTypes() {
1044     using default_container_type = oneapi::tbb::enumerable_thread_specific<int>;
1045     static_assert(std::is_same<typename default_container_type::allocator_type, oneapi::tbb::cache_aligned_allocator<int>>::value,
1046             "Incorrect default template allocator");
1047 
1048     using test_allocator_type = std::allocator<int>;
1049     using ets_container_type = oneapi::tbb::enumerable_thread_specific<int, test_allocator_type>;
1050 
1051     static_assert(std::is_same<typename ets_container_type::allocator_type, test_allocator_type>::value,
1052                   "Incorrect container allocator_type member type");
1053 
1054     using value_type = typename ets_container_type::value_type;
1055 
1056     static_assert(std::is_same<typename ets_container_type::value_type, int>::value,
1057                   "Incorrect container value_type member type");
1058     static_assert(std::is_same<typename ets_container_type::reference, value_type&>::value,
1059                   "Incorrect container reference member type");
1060     static_assert(std::is_same<typename ets_container_type::const_reference, const value_type&>::value,
1061                   "Incorrect container const_reference member type");
1062 
1063     using allocator_type = typename ets_container_type::allocator_type;
1064     static_assert(std::is_same<typename ets_container_type::pointer, typename std::allocator_traits<allocator_type>::pointer>::value,
1065                   "Incorrect container pointer member type");
1066     static_assert(std::is_same<typename ets_container_type::const_pointer, typename std::allocator_traits<allocator_type>::const_pointer>::value,
1067                   "Incorrect container const_pointer member type");
1068 
1069     static_assert(std::is_unsigned<typename ets_container_type::size_type>::value,
1070                   "Incorrect container size_type member type");
1071     static_assert(std::is_signed<typename ets_container_type::difference_type>::value,
1072                   "Incorrect container difference_type member type");
1073 
1074     static_assert(utils::is_random_access_iterator<typename ets_container_type::iterator>::value,
1075                   "Incorrect container iterator member type");
1076     static_assert(!std::is_const<typename ets_container_type::iterator::value_type>::value,
1077                   "Incorrect container iterator member type");
1078     static_assert(utils::is_random_access_iterator<typename ets_container_type::const_iterator>::value,
1079                   "Incorrect container const_iterator member type");
1080     static_assert(std::is_const<typename ets_container_type::const_iterator::value_type>::value,
1081                   "Incorrect container iterator member type");
1082 }
1083 
1084 size_t init_tbb_alloc_mask() {
1085     // INFO("estimatedCacheLineSize == %d, NFS_GetLineSize() returns %d\n", (int)estimatedCacheLineSize, (int)oneapi::tbb::detail::d1::cache_line_size());
1086     // TODO: use __TBB_alignof(T) to check for local() results instead of using internal knowledges of ets element padding
1087     if(oneapi::tbb::tbb_allocator<int>::allocator_type() == oneapi::tbb::tbb_allocator<int>::standard) {
1088         // scalable allocator is not available.
1089         // INFO("oneapi::tbb::tbb_allocator is not available\n");
1090         return 1;
1091     }
1092     else {
1093         // this value is for large objects, but will be correct for small.
1094         return 64; // TBB_REVAMP_TODO: enable as estimatedCacheLineSize when tbbmalloc is available;
1095     }
1096 }
1097 
1098 // TODO: rework the test not to depend on oneTBB internals
1099 static const size_t cache_allocator_mask = oneapi::tbb::detail::r1::cache_line_size();
1100 static const size_t tbb_allocator_mask = init_tbb_alloc_mask();
1101 
1102 void TestETSIterator() {
1103     using ets_type = oneapi::tbb::enumerable_thread_specific<int>;
1104     if (utils::get_platform_max_threads() == 1) {
1105         ets_type ets;
1106         ets.local() = 1;
1107         REQUIRE_MESSAGE(std::next(ets.begin()) == ets.end(), "Incorrect begin or end of the ETS");
1108         REQUIRE_MESSAGE(std::prev(ets.end()) == ets.begin(), "Incorrect begin or end of the ETS");
1109     } else {
1110         std::atomic<std::size_t> sync_counter(0);
1111 
1112         const std::size_t expected_ets_size = 2;
1113         ets_type ets;
1114         const ets_type& cets(ets);
1115 
1116         auto fill_ets_body = [&](){
1117             ets.local() = 42;
1118             ++sync_counter;
1119             while(sync_counter != expected_ets_size)
1120                 std::this_thread::yield();
1121         };
1122 
1123         oneapi::tbb::parallel_invoke(fill_ets_body, fill_ets_body);
1124         REQUIRE_MESSAGE(ets.size() == expected_ets_size, "Incorrect ETS size");
1125 
1126         std::size_t counter = 0;
1127         auto iter = ets.begin();
1128         while(iter != ets.end()) {
1129             ++counter % 2 == 0 ? ++iter : iter++;
1130         }
1131         REQUIRE(counter == expected_ets_size);
1132         while(iter != ets.begin()) {
1133             --counter % 2 == 0 ? --iter : iter--;
1134         }
1135         REQUIRE(counter == 0);
1136         auto citer = cets.begin();
1137         while(citer != cets.end()) {
1138             ++counter % 2 == 0 ? ++citer : citer++;
1139         }
1140         REQUIRE(counter == expected_ets_size);
1141         while(citer != cets.begin()) {
1142             --counter % 2 == 0 ? --citer : citer--;
1143         }
1144         REQUIRE(counter == 0);
1145         REQUIRE(ets.begin() + expected_ets_size == ets.end());
1146         REQUIRE(expected_ets_size + ets.begin() == ets.end());
1147         REQUIRE(ets.end() - expected_ets_size == ets.begin());
1148 
1149         typename ets_type::iterator it;
1150         it = ets.begin();
1151 
1152         auto it_bkp = it;
1153         auto it2 = it++;
1154         REQUIRE(it2 == it_bkp);
1155 
1156         it = ets.begin();
1157         it += expected_ets_size;
1158         REQUIRE(it == ets.end());
1159         it -= expected_ets_size;
1160         REQUIRE(it == ets.begin());
1161 
1162         for (int i = 0; i < int(expected_ets_size - 1); ++i) {
1163             REQUIRE(ets.begin()[i] == 42);
1164             REQUIRE(std::prev(ets.end())[-i] == 42);
1165         }
1166 
1167         auto iter1 = ets.begin();
1168         auto iter2 = ets.end();
1169         REQUIRE(iter1 < iter2);
1170         REQUIRE(iter1 <= iter2);
1171         REQUIRE(!(iter1 > iter2));
1172         REQUIRE(!(iter1 >= iter2));
1173     }
1174 }
1175 
1176 //! Test container instantiation
1177 //! \brief \ref interface \ref requirement
1178 TEST_CASE("Instantiation") {
1179     AlignMask = cache_allocator_mask;
1180     TestInstantiation<oneapi::tbb::cache_aligned_allocator>("oneapi::tbb::cache_aligned_allocator");
1181     AlignMask = tbb_allocator_mask;
1182     TestInstantiation<oneapi::tbb::tbb_allocator>("oneapi::tbb::tbb_allocator");
1183 }
1184 
1185 //! Test assignment and copy constructor
1186 //! \brief \ref interface \ref requirement
1187 TEST_CASE("Assignment and copy constructor") {
1188     AlignMask = cache_allocator_mask;
1189     run_assignment_and_copy_constructor_tests<oneapi::tbb::cache_aligned_allocator>("oneapi::tbb::cache_aligned_allocator");
1190     AlignMask = tbb_allocator_mask;
1191     run_assignment_and_copy_constructor_tests<oneapi::tbb::tbb_allocator>("oneapi::tbb::tbb_allocator");
1192 }
1193 
1194 //! Test for basic ETS functionality and requirements
1195 //! \brief \ref interface \ref requirement
1196 TEST_CASE("Basic ETS functionality") {
1197     const int LOCALS = 10;
1198 
1199     oneapi::tbb::enumerable_thread_specific<int> ets;
1200     ets.local() = 42;
1201 
1202     utils::SpinBarrier barrier(LOCALS);
1203     utils::NativeParallelFor(LOCALS, [&](int i) {
1204         barrier.wait();
1205         ets.local() = i;
1206         CHECK(ets.local() == i);
1207     });
1208     CHECK(ets.local() == 42);
1209 
1210     int ref_combined{0};
1211     std::vector<int> sequence(LOCALS);
1212     std::iota(sequence.begin(), sequence.end(), 0);
1213     for (int i : sequence) {
1214         ref_combined += i;
1215     }
1216     ref_combined += 42;
1217     int ets_combined = ets.combine([](int x, int y) {
1218         return x + y;
1219     });
1220     CHECK(ref_combined == ets_combined);
1221 }
1222 
1223 //! Test ETS usage in parallel algorithms.
1224 //! Also tests flattened2d and flattend2d
1225 //! \brief \ref interface \ref requirement \ref stress
1226 TEST_CASE("Parallel test") {
1227     run_reference_check();
1228     AlignMask = cache_allocator_mask;
1229     run_parallel_tests<oneapi::tbb::cache_aligned_allocator>("oneapi::tbb::cache_aligned_allocator");
1230     AlignMask = tbb_allocator_mask;
1231     run_parallel_tests<oneapi::tbb::tbb_allocator>("oneapi::tbb::tbb_allocator");
1232     run_cross_type_tests();
1233 }
1234 
1235 //! \brief \ref interface \ref requirement
1236 TEST_CASE("Member types") {
1237     TestMemberTypes();
1238 }
1239 
1240 //! \brief \ref interface \ref requirement
1241 TEST_CASE("enumerable_thread_specific iterator") {
1242     TestETSIterator();
1243 }
1244 
1245 #if _MSC_VER && !defined(__INTEL_COMPILER)
1246     #pragma warning (pop)
1247 #endif // warning 4503 is back
1248